1 files changed, 15908 insertions, 0 deletions
diff --git a/js/src/wasm/WasmBaselineCompile.cpp b/js/src/wasm/WasmBaselineCompile.cpp
new file mode 100644
index 0000000000..a22a07b944
--- /dev/null
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -0,0 +1,15908 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ *
+ * Copyright 2016 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * [SMDOC] WebAssembly baseline compiler (RabaldrMonkey)
+ *
+ * General assumptions for 32-bit vs 64-bit code:
+ *
+ * - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
+ *   systems.
+ *
+ * - Code that knows that Register64 has a '.reg' member on 64-bit systems and
+ *   '.high' and '.low' members on 32-bit systems, or knows the implications
+ *   thereof, is #ifdef JS_PUNBOX64.  All other code is #if(n)?def JS_64BIT.
+ *
+ *
+ * Coding standards:
+ *
+ * - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
+ *   and surrounding functions; most functions fall into this class) where the
+ *   meaning is obvious:
+ *
+ *   - if there is a single source + destination register, it is called 'r'
+ *   - if there is one source and a different destination, they are called 'rs'
+ *     and 'rd'
+ *   - if there is one source + destination register and another source register
+ *     they are called 'r' and 'rs'
+ *   - if there are two source registers and a destination register they are
+ *     called 'rs0', 'rs1', and 'rd'.
+ *
+ * - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
+ *
+ * - Registers can be named non-generically for their function ('rp' for the
+ *   'pointer' register and 'rv' for the 'value' register are typical) and those
+ *   names may or may not have an 'r' prefix.
+ *
+ * - "Larger" code generating functions make their own rules.
+ *
+ *
+ * General status notes:
+ *
+ * "FIXME" indicates a known or suspected bug.  Always has a bug#.
+ *
+ * "TODO" indicates an opportunity for a general improvement, with an additional
+ * tag to indicate the area of improvement.  Usually has a bug#.
+ *
+ * There are lots of machine dependencies here but they are pretty well isolated
+ * to a segment of the compiler.  Many dependencies will eventually be factored
+ * into the MacroAssembler layer and shared with other code generators.
+ *
+ *
+ * High-value compiler performance improvements:
+ *
+ * - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
+ *   etc methods) can avoid syncing the value stack if the specific register is
+ *   in use but there is a free register to shuffle the specific register into.
+ *   (This will also improve the generated code.)  The sync happens often enough
+ *   here to show up in profiles, because it is triggered by integer multiply
+ *   and divide.
+ *
+ *
+ * High-value code generation improvements:
+ *
+ * - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
+ *   the stack and then jumps to the code for the target case.  If no cleanup is
+ *   needed we could just branch conditionally to the target; if the same amount
+ *   of cleanup is needed for all cases then the cleanup can be done before the
+ *   dispatch.  Both are highly likely.
+ *
+ * - (Bug 1316806) Register management around calls: At the moment we sync the
+ *   value stack unconditionally (this is simple) but there are probably many
+ *   common cases where we could instead save/restore live caller-saves
+ *   registers and perform parallel assignment into argument registers.  This
+ *   may be important if we keep some locals in registers.
+ *
+ * - (Bug 1316808) Allocate some locals to registers on machines where there are
+ *   enough registers.  This is probably hard to do well in a one-pass compiler
+ *   but it might be that just keeping register arguments and the first few
+ *   locals in registers is a viable strategy; another (more general) strategy
+ *   is caching locals in registers in straight-line code.  Such caching could
+ *   also track constant values in registers, if that is deemed valuable.  A
+ *   combination of techniques may be desirable: parameters and the first few
+ *   locals could be cached on entry to the function but not statically assigned
+ *   to registers throughout.
+ *
+ *   (On a large corpus of code it should be possible to compute, for every
+ *   signature comprising the types of parameters and locals, and using a static
+ *   weight for loops, a list in priority order of which parameters and locals
+ *   that should be assigned to registers.  Or something like that.  Wasm makes
+ *   this simple.  Static assignments are desirable because they are not flushed
+ *   to memory by the pre-block sync() call.)
+ */
+
+#include "wasm/WasmBaselineCompile.h"
+
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "jit/AtomicOp.h"
+#include "jit/IonTypes.h"
+#include "jit/JitAllocPolicy.h"
+#include "jit/Label.h"
+#include "jit/MIR.h"
+#include "jit/RegisterAllocator.h"
+#include "jit/Registers.h"
+#include "jit/RegisterSets.h"
+#if defined(JS_CODEGEN_ARM)
+#  include "jit/arm/Assembler-arm.h"
+#endif
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+#  include "jit/x86-shared/Architecture-x86-shared.h"
+#  include "jit/x86-shared/Assembler-x86-shared.h"
+#endif
+#if defined(JS_CODEGEN_MIPS32)
+#  include "jit/mips-shared/Assembler-mips-shared.h"
+#  include "jit/mips32/Assembler-mips32.h"
+#endif
+#if defined(JS_CODEGEN_MIPS64)
+#  include "jit/mips-shared/Assembler-mips-shared.h"
+#  include "jit/mips64/Assembler-mips64.h"
+#endif
+#include "js/ScalarType.h"  // js::Scalar::Type
+#include "util/Memory.h"
+#include "wasm/WasmGC.h"
+#include "wasm/WasmGenerator.h"
+#include "wasm/WasmInstance.h"
+#include "wasm/WasmOpIter.h"
+#include "wasm/WasmSignalHandlers.h"
+#include "wasm/WasmStubs.h"
+#include "wasm/WasmValidate.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using mozilla::DebugOnly;
+using mozilla::FloorLog2;
+using mozilla::IsPowerOfTwo;
+using mozilla::Maybe;
+
+namespace js {
+namespace wasm {
+
+using namespace js::jit;
+
+using HandleNaNSpecially = bool;
+using InvertBranch = bool;
+using IsKnownNotZero = bool;
+using IsUnsigned = bool;
+using NeedsBoundsCheck = bool;
+using WantResult = bool;
+using ZeroOnOverflow = bool;
+
+class BaseStackFrame;
+
+// Two flags, useABI and interModule, control how calls are made.
+//
+// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
+// except when InterModule::True is also set, when they are volatile.
+//
+// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
+// In this case, we require InterModule::False.  The calling convention
+// is otherwise like UseABI::Wasm.
+//
+// UseABI::System implies that the Tls/Heap/Global registers are volatile.
+// Additionally, the parameter passing mechanism may be slightly different from
+// the UseABI::Wasm convention.
+//
+// When the Tls/Heap/Global registers are not volatile, the baseline compiler
+// will restore the Tls register from its save slot before the call, since the
+// baseline compiler uses the Tls register for other things.
+//
+// When those registers are volatile, the baseline compiler will reload them
+// after the call (it will restore the Tls register from the save slot and load
+// the other two from the Tls data).
+
+enum class UseABI { Wasm, Builtin, System };
+enum class InterModule { False = false, True = true };
+enum class RhsDestOp { True = true };
+
+#if defined(JS_CODEGEN_NONE)
+#  define RABALDR_SCRATCH_I32
+#  define RABALDR_SCRATCH_F32
+#  define RABALDR_SCRATCH_F64
+
+static constexpr Register RabaldrScratchI32 = Register::Invalid();
+static constexpr FloatRegister RabaldrScratchF32 = InvalidFloatReg;
+static constexpr FloatRegister RabaldrScratchF64 = InvalidFloatReg;
+#endif
+
+#ifdef JS_CODEGEN_ARM64
+#  define RABALDR_CHUNKY_STACK
+#  define RABALDR_SIDEALLOC_V128
+#  define RABALDR_SCRATCH_I32
+#  define RABALDR_SCRATCH_F32
+#  define RABALDR_SCRATCH_F64
+#  define RABALDR_SCRATCH_V128
+#  define RABALDR_SCRATCH_F32_ALIASES_F64
+
+static constexpr Register RabaldrScratchI32{Registers::x15};
+
+// Note, the float scratch regs cannot be registers that are used for parameter
+// passing in any ABI we use.  Argregs tend to be low-numbered; register 30
+// should be safe.
+
+static constexpr FloatRegister RabaldrScratchF32{FloatRegisters::s30,
+                                                 FloatRegisters::Single};
+static constexpr FloatRegister RabaldrScratchF64{FloatRegisters::d30,
+                                                 FloatRegisters::Double};
+#  ifdef ENABLE_WASM_SIMD
+static constexpr FloatRegister RabaldrScratchV128{FloatRegisters::d30,
+                                                  FloatRegisters::Simd128};
+#  endif
+
+static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
+static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
+#  ifdef ENABLE_WASM_SIMD
+static_assert(RabaldrScratchV128 != ScratchSimd128Reg, "Too busy");
+#  endif
+#endif
+
+#ifdef JS_CODEGEN_X86
+// The selection of EBX here steps gingerly around: the need for EDX
+// to be allocatable for multiply/divide; ECX to be allocatable for
+// shift/rotate; EAX (= ReturnReg) to be allocatable as the result
+// register; EBX not being one of the WasmTableCall registers; and
+// needing a temp register for load/store that has a single-byte
+// persona.
+//
+// The compiler assumes that RabaldrScratchI32 has a single-byte
+// persona.  Code for 8-byte atomic operations assumes that
+// RabaldrScratchI32 is in fact ebx.
+
+#  define RABALDR_SCRATCH_I32
+static constexpr Register RabaldrScratchI32 = ebx;
+
+#  define RABALDR_INT_DIV_I64_CALLOUT
+#endif
+
+#ifdef JS_CODEGEN_ARM
+// We use our own scratch register, because the macro assembler uses
+// the regular scratch register(s) pretty liberally.  We could
+// work around that in several cases but the mess does not seem
+// worth it yet.  CallTempReg2 seems safe.
+
+#  define RABALDR_SCRATCH_I32
+static constexpr Register RabaldrScratchI32 = CallTempReg2;
+
+#  define RABALDR_INT_DIV_I64_CALLOUT
+#  define RABALDR_I64_TO_FLOAT_CALLOUT
+#  define RABALDR_FLOAT_TO_I64_CALLOUT
+#endif
+
+#ifdef JS_CODEGEN_MIPS32
+#  define RABALDR_SCRATCH_I32
+static constexpr Register RabaldrScratchI32 = CallTempReg2;
+
+#  define RABALDR_INT_DIV_I64_CALLOUT
+#  define RABALDR_I64_TO_FLOAT_CALLOUT
+#  define RABALDR_FLOAT_TO_I64_CALLOUT
+#endif
+
+#ifdef JS_CODEGEN_MIPS64
+#  define RABALDR_SCRATCH_I32
+static constexpr Register RabaldrScratchI32 = CallTempReg2;
+#endif
+
+#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
+#  if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
+#    error "Bad configuration"
+#  endif
+#endif
+
+template <MIRType t>
+struct RegTypeOf {
+#ifdef ENABLE_WASM_SIMD
+  static_assert(t == MIRType::Float32 || t == MIRType::Double ||
+                    t == MIRType::Simd128,
+                "Float mask type");
+#else
+  static_assert(t == MIRType::Float32 || t == MIRType::Double,
+                "Float mask type");
+#endif
+};
+
+template <>
+struct RegTypeOf<MIRType::Float32> {
+  static constexpr RegTypeName value = RegTypeName::Float32;
+};
+template <>
+struct RegTypeOf<MIRType::Double> {
+  static constexpr RegTypeName value = RegTypeName::Float64;
+};
+#ifdef ENABLE_WASM_SIMD
+template <>
+struct RegTypeOf<MIRType::Simd128> {
+  static constexpr RegTypeName value = RegTypeName::Vector128;
+};
+#endif
+
+// The strongly typed register wrappers are especially useful to distinguish
+// float registers from double registers, but they also clearly distinguish
+// 32-bit registers from 64-bit register pairs on 32-bit systems.
+
+struct RegI32 : public Register {
+  RegI32() : Register(Register::Invalid()) {}
+  explicit RegI32(Register reg) : Register(reg) {
+    MOZ_ASSERT(reg != Invalid());
+  }
+  bool isInvalid() const { return *this == Invalid(); }
+  bool isValid() const { return !isInvalid(); }
+  static RegI32 Invalid() { return RegI32(); }
+};
+
+struct RegI64 : public Register64 {
+  RegI64() : Register64(Register64::Invalid()) {}
+  explicit RegI64(Register64 reg) : Register64(reg) {
+    MOZ_ASSERT(reg != Invalid());
+  }
+  bool isInvalid() const { return *this == Invalid(); }
+  bool isValid() const { return !isInvalid(); }
+  static RegI64 Invalid() { return RegI64(); }
+};
+
+struct RegPtr : public Register {
+  RegPtr() : Register(Register::Invalid()) {}
+  explicit RegPtr(Register reg) : Register(reg) {
+    MOZ_ASSERT(reg != Invalid());
+  }
+  bool isInvalid() const { return *this == Invalid(); }
+  bool isValid() const { return !isInvalid(); }
+  static RegPtr Invalid() { return RegPtr(); }
+};
+
+struct RegF32 : public FloatRegister {
+  RegF32() : FloatRegister() {}
+  explicit RegF32(FloatRegister reg) : FloatRegister(reg) {
+    MOZ_ASSERT(isSingle());
+  }
+  bool isValid() const { return !isInvalid(); }
+  static RegF32 Invalid() { return RegF32(); }
+};
+
+struct RegF64 : public FloatRegister {
+  RegF64() : FloatRegister() {}
+  explicit RegF64(FloatRegister reg) : FloatRegister(reg) {
+    MOZ_ASSERT(isDouble());
+  }
+  bool isValid() const { return !isInvalid(); }
+  static RegF64 Invalid() { return RegF64(); }
+};
+
+#ifdef ENABLE_WASM_SIMD
+#  ifdef RABALDR_SIDEALLOC_V128
+class RegV128 {
+  // fpr_ is either invalid or a double that aliases the simd register, see
+  // comments below at BaseRegAlloc.
+  FloatRegister fpr_;
+
+ public:
+  RegV128() : fpr_(FloatRegister()) {}
+  explicit RegV128(FloatRegister reg)
+      : fpr_(FloatRegister(reg.encoding(), FloatRegisters::Double)) {
+    MOZ_ASSERT(reg.isSimd128());
+  }
+  static RegV128 fromDouble(FloatRegister reg) {
+    MOZ_ASSERT(reg.isDouble());
+    return RegV128(FloatRegister(reg.encoding(), FloatRegisters::Simd128));
+  }
+  FloatRegister asDouble() const { return fpr_; }
+  bool isInvalid() const { return fpr_.isInvalid(); }
+  bool isValid() const { return !isInvalid(); }
+  static RegV128 Invalid() { return RegV128(); }
+
+  operator FloatRegister() const {
+    return FloatRegister(fpr_.encoding(), FloatRegisters::Simd128);
+  }
+
+  bool operator==(const RegV128& that) const {
+    return asDouble() == that.asDouble();
+  }
+
+  bool operator!=(const RegV128& that) const {
+    return asDouble() != that.asDouble();
+  }
+};
+#  else
+struct RegV128 : public FloatRegister {
+  RegV128() : FloatRegister() {}
+  explicit RegV128(FloatRegister reg) : FloatRegister(reg) {
+    MOZ_ASSERT(isSimd128());
+  }
+  bool isValid() const { return !isInvalid(); }
+  static RegV128 Invalid() { return RegV128(); }
+};
+#  endif
+#endif
+
+struct AnyReg {
+  union {
+    RegI32 i32_;
+    RegI64 i64_;
+    RegPtr ref_;
+    RegF32 f32_;
+    RegF64 f64_;
+#ifdef ENABLE_WASM_SIMD
+    RegV128 v128_;
+#endif
+  };
+
+  enum {
+    I32,
+    I64,
+    REF,
+    F32,
+    F64,
+#ifdef ENABLE_WASM_SIMD
+    V128
+#endif
+  } tag;
+
+  explicit AnyReg(RegI32 r) {
+    tag = I32;
+    i32_ = r;
+  }
+  explicit AnyReg(RegI64 r) {
+    tag = I64;
+    i64_ = r;
+  }
+  explicit AnyReg(RegF32 r) {
+    tag = F32;
+    f32_ = r;
+  }
+  explicit AnyReg(RegF64 r) {
+    tag = F64;
+    f64_ = r;
+  }
+#ifdef ENABLE_WASM_SIMD
+  explicit AnyReg(RegV128 r) {
+    tag = V128;
+    v128_ = r;
+  }
+#endif
+  explicit AnyReg(RegPtr r) {
+    tag = REF;
+    ref_ = r;
+  }
+
+  RegI32 i32() const {
+    MOZ_ASSERT(tag == I32);
+    return i32_;
+  }
+  RegI64 i64() const {
+    MOZ_ASSERT(tag == I64);
+    return i64_;
+  }
+  RegF32 f32() const {
+    MOZ_ASSERT(tag == F32);
+    return f32_;
+  }
+  RegF64 f64() const {
+    MOZ_ASSERT(tag == F64);
+    return f64_;
+  }
+#ifdef ENABLE_WASM_SIMD
+  RegV128 v128() const {
+    MOZ_ASSERT(tag == V128);
+    return v128_;
+  }
+#endif
+  RegPtr ref() const {
+    MOZ_ASSERT(tag == REF);
+    return ref_;
+  }
+
+  AnyRegister any() const {
+    switch (tag) {
+      case F32:
+        return AnyRegister(f32_);
+      case F64:
+        return AnyRegister(f64_);
+#ifdef ENABLE_WASM_SIMD
+      case V128:
+        return AnyRegister(v128_);
+#endif
+      case I32:
+        return AnyRegister(i32_);
+      case I64:
+#ifdef JS_PUNBOX64
+        return AnyRegister(i64_.reg);
+#else
+        // The compiler is written so that this is never needed: any() is
+        // called on arbitrary registers for asm.js but asm.js does not have
+        // 64-bit ints.  For wasm, any() is called on arbitrary registers
+        // only on 64-bit platforms.
+        MOZ_CRASH("AnyReg::any() on 32-bit platform");
+#endif
+      case REF:
+        MOZ_CRASH("AnyReg::any() not implemented for ref types");
+      default:
+        MOZ_CRASH();
+    }
+    // Work around GCC 5 analysis/warning bug.
+    MOZ_CRASH("AnyReg::any(): impossible case");
+  }
+};
+
+// Platform-specific registers.
+//
+// All platforms must define struct SpecificRegs.  All 32-bit platforms must
+// have an abiReturnRegI64 member in that struct.
+
+#if defined(JS_CODEGEN_X64)
+struct SpecificRegs {
+  RegI32 eax, ecx, edx, edi, esi;
+  RegI64 rax, rcx, rdx;
+
+  SpecificRegs()
+      : eax(RegI32(js::jit::eax)),
+        ecx(RegI32(js::jit::ecx)),
+        edx(RegI32(js::jit::edx)),
+        edi(RegI32(js::jit::edi)),
+        esi(RegI32(js::jit::esi)),
+        rax(RegI64(Register64(js::jit::rax))),
+        rcx(RegI64(Register64(js::jit::rcx))),
+        rdx(RegI64(Register64(js::jit::rdx))) {}
+};
+#elif defined(JS_CODEGEN_X86)
+struct SpecificRegs {
+  RegI32 eax, ecx, edx, edi, esi;
+  RegI64 ecx_ebx, edx_eax, abiReturnRegI64;
+
+  SpecificRegs()
+      : eax(RegI32(js::jit::eax)),
+        ecx(RegI32(js::jit::ecx)),
+        edx(RegI32(js::jit::edx)),
+        edi(RegI32(js::jit::edi)),
+        esi(RegI32(js::jit::esi)),
+        ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
+        edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
+        abiReturnRegI64(edx_eax) {}
+};
+#elif defined(JS_CODEGEN_ARM)
+struct SpecificRegs {
+  RegI64 abiReturnRegI64;
+
+  SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
+};
+#elif defined(JS_CODEGEN_ARM64)
+struct SpecificRegs {};
+#elif defined(JS_CODEGEN_MIPS32)
+struct SpecificRegs {
+  RegI64 abiReturnRegI64;
+
+  SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
+};
+#elif defined(JS_CODEGEN_MIPS64)
+struct SpecificRegs {};
+#else
+struct SpecificRegs {
+#  ifndef JS_64BIT
+  RegI64 abiReturnRegI64;
+#  endif
+
+  SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
+};
+#endif
+
+class BaseCompilerInterface {
+ public:
+  // Spill all spillable registers.
+  //
+  // TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
+  // spilling only enough registers to satisfy current needs.
+  virtual void sync() = 0;
+  virtual void saveTempPtr(RegPtr r) = 0;
+  virtual void restoreTempPtr(RegPtr r) = 0;
+};
+
+// Register allocator.
+
+class BaseRegAlloc {
+  // Notes on float register allocation.
+  //
+  // The general rule in SpiderMonkey is that float registers can alias double
+  // registers, but there are predicates to handle exceptions to that rule:
+  // hasUnaliasedDouble() and hasMultiAlias().  The way aliasing actually
+  // works is platform dependent and exposed through the aliased(n, &r)
+  // predicate, etc.
+  //
+  //  - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
+  //    cannot be treated as float.
+  //  - hasMultiAlias(): on ARM and MIPS a double register aliases two float
+  //    registers.
+  //
+  // On some platforms (x86, x64, ARM64) but not all (ARM)
+  // ScratchFloat32Register is the same as ScratchDoubleRegister.
+  //
+  // It's a basic invariant of the AllocatableRegisterSet that it deals
+  // properly with aliasing of registers: if s0 or s1 are allocated then d0 is
+  // not allocatable; if s0 and s1 are freed individually then d0 becomes
+  // allocatable.
+  //
+  // On platforms with RABALDR_SIDEALLOC_V128, the register set does not
+  // represent SIMD registers.  Instead, we allocate and free these registers as
+  // doubles and change the kind to Simd128 while the register is exposed to
+  // masm.  (This is the case on ARM64 for now, and is a consequence of needing
+  // more than 64 bits for FloatRegisters::SetType to represent SIMD registers.
+  // See lengty comment in Architecture-arm64.h.)
+
+  BaseCompilerInterface* bc;
+  AllocatableGeneralRegisterSet availGPR;
+  AllocatableFloatRegisterSet availFPU;
+#ifdef DEBUG
+  // The registers available after removing ScratchReg, HeapReg, etc.
+  AllocatableGeneralRegisterSet allGPR;
+  AllocatableFloatRegisterSet allFPU;
+  uint32_t scratchTaken;
+#endif
+#ifdef JS_CODEGEN_X86
+  AllocatableGeneralRegisterSet singleByteRegs;
+#endif
+
+  bool hasGPR() { return !availGPR.empty(); }
+
+  bool hasGPR64() {
+#ifdef JS_PUNBOX64
+    return !availGPR.empty();
+#else
+    if (availGPR.empty()) {
+      return false;
+    }
+    Register r = allocGPR();
+    bool available = !availGPR.empty();
+    freeGPR(r);
+    return available;
+#endif
+  }
+
+  template <MIRType t>
+  bool hasFPU() {
+#ifdef RABALDR_SIDEALLOC_V128
+    // Workaround for GCC problem, bug 1677690
+    if constexpr (t == MIRType::Simd128) {
+      MOZ_CRASH("Should not happen");
+    } else
+#endif
+    {
+      return availFPU.hasAny<RegTypeOf<t>::value>();
+    }
+  }
+
+  bool isAvailableGPR(Register r) { return availGPR.has(r); }
+
+  bool isAvailableFPU(FloatRegister r) {
+#ifdef RABALDR_SIDEALLOC_V128
+    MOZ_ASSERT(!r.isSimd128());
+#endif
+    return availFPU.has(r);
+  }
+
+  void allocGPR(Register r) {
+    MOZ_ASSERT(isAvailableGPR(r));
+    availGPR.take(r);
+  }
+
+  Register allocGPR() {
+    MOZ_ASSERT(hasGPR());
+    return availGPR.takeAny();
+  }
+
+  void allocInt64(Register64 r) {
+#ifdef JS_PUNBOX64
+    allocGPR(r.reg);
+#else
+    allocGPR(r.low);
+    allocGPR(r.high);
+#endif
+  }
+
+  Register64 allocInt64() {
+    MOZ_ASSERT(hasGPR64());
+#ifdef JS_PUNBOX64
+    return Register64(availGPR.takeAny());
+#else
+    Register high = availGPR.takeAny();
+    Register low = availGPR.takeAny();
+    return Register64(high, low);
+#endif
+  }
+
+#ifdef JS_CODEGEN_ARM
+  // r12 is normally the ScratchRegister and r13 is always the stack pointer,
+  // so the highest possible pair has r10 as the even-numbered register.
+
+  static constexpr uint32_t PAIR_LIMIT = 10;
+
+  bool hasGPRPair() {
+    for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
+      if (isAvailableGPR(Register::FromCode(i)) &&
+          isAvailableGPR(Register::FromCode(i + 1))) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  void allocGPRPair(Register* low, Register* high) {
+    MOZ_ASSERT(hasGPRPair());
+    for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
+      if (isAvailableGPR(Register::FromCode(i)) &&
+          isAvailableGPR(Register::FromCode(i + 1))) {
+        *low = Register::FromCode(i);
+        *high = Register::FromCode(i + 1);
+        allocGPR(*low);
+        allocGPR(*high);
+        return;
+      }
+    }
+    MOZ_CRASH("No pair");
+  }
+#endif
+
+  void allocFPU(FloatRegister r) {
+#ifdef RABALDR_SIDEALLOC_V128
+    MOZ_ASSERT(!r.isSimd128());
+#endif
+    MOZ_ASSERT(isAvailableFPU(r));
+    availFPU.take(r);
+  }
+
+  template <MIRType t>
+  FloatRegister allocFPU() {
+#ifdef RABALDR_SIDEALLOC_V128
+    // Workaround for GCC problem, bug 1677690
+    if constexpr (t == MIRType::Simd128) {
+      MOZ_CRASH("Should not happen");
+    } else
+#endif
+    {
+      return availFPU.takeAny<RegTypeOf<t>::value>();
+    }
+  }
+
+  void freeGPR(Register r) { availGPR.add(r); }
+
+  void freeInt64(Register64 r) {
+#ifdef JS_PUNBOX64
+    freeGPR(r.reg);
+#else
+    freeGPR(r.low);
+    freeGPR(r.high);
+#endif
+  }
+
+  void freeFPU(FloatRegister r) {
+#ifdef RABALDR_SIDEALLOC_V128
+    MOZ_ASSERT(!r.isSimd128());
+#endif
+    availFPU.add(r);
+  }
+
+ public:
+  explicit BaseRegAlloc()
+      : bc(nullptr),
+        availGPR(GeneralRegisterSet::All()),
+        availFPU(FloatRegisterSet::All())
+#ifdef DEBUG
+        ,
+        scratchTaken(0)
+#endif
+#ifdef JS_CODEGEN_X86
+        ,
+        singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
+#endif
+  {
+    RegisterAllocator::takeWasmRegisters(availGPR);
+
+    // Allocate any private scratch registers.
+#if defined(RABALDR_SCRATCH_I32)
+    if (RabaldrScratchI32 != RegI32::Invalid()) {
+      availGPR.take(RabaldrScratchI32);
+    }
+#endif
+
+#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
+    static_assert(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
+    static_assert(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
+#endif
+
+#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
+    if (RabaldrScratchF32 != RegF32::Invalid()) {
+      availFPU.take(RabaldrScratchF32);
+    }
+#endif
+
+#if defined(RABALDR_SCRATCH_F64)
+#  ifdef RABALDR_SCRATCH_F32_ALIASES_F64
+    MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
+#  endif
+    if (RabaldrScratchF64 != RegF64::Invalid()) {
+      availFPU.take(RabaldrScratchF64);
+    }
+#  ifdef RABALDR_SCRATCH_F32_ALIASES_F64
+    MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
+#  endif
+#endif
+
+#ifdef DEBUG
+    allGPR = availGPR;
+    allFPU = availFPU;
+#endif
+  }
+
+  void init(BaseCompilerInterface* bc) { this->bc = bc; }
+
+  enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4, V128 = 8 };
+
+#ifdef DEBUG
+  bool isScratchRegisterTaken(ScratchKind s) const {
+    return (scratchTaken & uint32_t(s)) != 0;
+  }
+
+  void setScratchRegisterTaken(ScratchKind s, bool state) {
+    if (state) {
+      scratchTaken |= uint32_t(s);
+    } else {
+      scratchTaken &= ~uint32_t(s);
+    }
+  }
+#endif
+
+#ifdef JS_CODEGEN_X86
+  bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
+#endif
+
+  bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }
+
+  bool isAvailableI64(RegI64 r) {
+#ifdef JS_PUNBOX64
+    return isAvailableGPR(r.reg);
+#else
+    return isAvailableGPR(r.low) && isAvailableGPR(r.high);
+#endif
+  }
+
+  bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }
+
+  bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }
+
+  bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }
+
+#ifdef ENABLE_WASM_SIMD
+#  ifdef RABALDR_SIDEALLOC_V128
+  bool isAvailableV128(RegV128 r) { return isAvailableFPU(r.asDouble()); }
+#  else
+  bool isAvailableV128(RegV128 r) { return isAvailableFPU(r); }
+#  endif
+#endif
+
+  // TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
+  // failure, only as much as we need.
+
+  [[nodiscard]] RegI32 needI32() {
+    if (!hasGPR()) {
+      bc->sync();
+    }
+    return RegI32(allocGPR());
+  }
+
+  void needI32(RegI32 specific) {
+    if (!isAvailableI32(specific)) {
+      bc->sync();
+    }
+    allocGPR(specific);
+  }
+
+  [[nodiscard]] RegI64 needI64() {
+    if (!hasGPR64()) {
+      bc->sync();
+    }
+    return RegI64(allocInt64());
+  }
+
+  void needI64(RegI64 specific) {
+    if (!isAvailableI64(specific)) {
+      bc->sync();
+    }
+    allocInt64(specific);
+  }
+
+  [[nodiscard]] RegPtr needPtr() {
+    if (!hasGPR()) {
+      bc->sync();
+    }
+    return RegPtr(allocGPR());
+  }
+
+  void needPtr(RegPtr specific) {
+    if (!isAvailablePtr(specific)) {
+      bc->sync();
+    }
+    allocGPR(specific);
+  }
+
+  // Use when you need a register for a short time but explicitly want to avoid
+  // a full sync().
+  [[nodiscard]] RegPtr needTempPtr(RegPtr fallback, bool* saved) {
+    if (hasGPR()) {
+      *saved = false;
+      return RegPtr(allocGPR());
+    }
+    *saved = true;
+    bc->saveTempPtr(fallback);
+    MOZ_ASSERT(isAvailablePtr(fallback));
+    allocGPR(fallback);
+    return RegPtr(fallback);
+  }
+
+  [[nodiscard]] RegF32 needF32() {
+    if (!hasFPU<MIRType::Float32>()) {
+      bc->sync();
+    }
+    return RegF32(allocFPU<MIRType::Float32>());
+  }
+
+  void needF32(RegF32 specific) {
+    if (!isAvailableF32(specific)) {
+      bc->sync();
+    }
+    allocFPU(specific);
+  }
+
+  [[nodiscard]] RegF64 needF64() {
+    if (!hasFPU<MIRType::Double>()) {
+      bc->sync();
+    }
+    return RegF64(allocFPU<MIRType::Double>());
+  }
+
+  void needF64(RegF64 specific) {
+    if (!isAvailableF64(specific)) {
+      bc->sync();
+    }
+    allocFPU(specific);
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  [[nodiscard]] RegV128 needV128() {
+#  ifdef RABALDR_SIDEALLOC_V128
+    if (!hasFPU<MIRType::Double>()) {
+      bc->sync();
+    }
+    return RegV128::fromDouble(allocFPU<MIRType::Double>());
+#  else
+    if (!hasFPU<MIRType::Simd128>()) {
+      bc->sync();
+    }
+    return RegV128(allocFPU<MIRType::Simd128>());
+#  endif
+  }
+
+  void needV128(RegV128 specific) {
+#  ifdef RABALDR_SIDEALLOC_V128
+    if (!isAvailableV128(specific)) {
+      bc->sync();
+    }
+    allocFPU(specific.asDouble());
+#  else
+    if (!isAvailableV128(specific)) {
+      bc->sync();
+    }
+    allocFPU(specific);
+#  endif
+  }
+#endif
+
+  void freeI32(RegI32 r) { freeGPR(r); }
+
+  void freeI64(RegI64 r) { freeInt64(r); }
+
+  void freePtr(RegPtr r) { freeGPR(r); }
+
+  void freeF64(RegF64 r) { freeFPU(r); }
+
+  void freeF32(RegF32 r) { freeFPU(r); }
+
+#ifdef ENABLE_WASM_SIMD
+  void freeV128(RegV128 r) {
+#  ifdef RABALDR_SIDEALLOC_V128
+    freeFPU(r.asDouble());
+#  else
+    freeFPU(r);
+#  endif
+  }
+#endif
+
+  void freeTempPtr(RegPtr r, bool saved) {
+    freePtr(r);
+    if (saved) {
+      bc->restoreTempPtr(r);
+      MOZ_ASSERT(!isAvailablePtr(r));
+    }
+  }
+
+#ifdef JS_CODEGEN_ARM
+  [[nodiscard]] RegI64 needI64Pair() {
+    if (!hasGPRPair()) {
+      bc->sync();
+    }
+    Register low, high;
+    allocGPRPair(&low, &high);
+    return RegI64(Register64(high, low));
+  }
+#endif
+
+#ifdef DEBUG
+  friend class LeakCheck;
+
+  class MOZ_RAII LeakCheck {
+   private:
+    const BaseRegAlloc& ra;
+    AllocatableGeneralRegisterSet knownGPR_;
+    AllocatableFloatRegisterSet knownFPU_;
+
+   public:
+    explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
+      knownGPR_ = ra.availGPR;
+      knownFPU_ = ra.availFPU;
+    }
+
+    ~LeakCheck() {
+      MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
+      MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
+    }
+
+    void addKnownI32(RegI32 r) { knownGPR_.add(r); }
+
+    void addKnownI64(RegI64 r) {
+#  ifdef JS_PUNBOX64
+      knownGPR_.add(r.reg);
+#  else
+      knownGPR_.add(r.high);
+      knownGPR_.add(r.low);
+#  endif
+    }
+
+    void addKnownF32(RegF32 r) { knownFPU_.add(r); }
+
+    void addKnownF64(RegF64 r) { knownFPU_.add(r); }
+
+#  ifdef ENABLE_WASM_SIMD
+    void addKnownV128(RegV128 r) {
+#    ifdef RABALDR_SIDEALLOC_V128
+      knownFPU_.add(r.asDouble());
+#    else
+      knownFPU_.add(r);
+#    endif
+    }
+#  endif
+
+    void addKnownRef(RegPtr r) { knownGPR_.add(r); }
+  };
+#endif
+};
+
+// Scratch register abstractions.
+//
+// We define our own scratch registers when the platform doesn't provide what we
+// need.  A notable use case is that we will need a private scratch register
+// when the platform masm uses its scratch register very frequently (eg, ARM).
+
+class BaseScratchRegister {
+#ifdef DEBUG
+  BaseRegAlloc& ra;
+  BaseRegAlloc::ScratchKind kind_;
+
+ public:
+  explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
+      : ra(ra), kind_(kind) {
+    MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
+    ra.setScratchRegisterTaken(kind_, true);
+  }
+  ~BaseScratchRegister() {
+    MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
+    ra.setScratchRegisterTaken(kind_, false);
+  }
+#else
+ public:
+  explicit BaseScratchRegister(BaseRegAlloc& ra,
+                               BaseRegAlloc::ScratchKind kind) {}
+#endif
+};
+
+#ifdef ENABLE_WASM_SIMD
+#  ifdef RABALDR_SCRATCH_V128
+class ScratchV128 : public BaseScratchRegister {
+ public:
+  explicit ScratchV128(BaseRegAlloc& ra)
+      : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::V128) {}
+  operator RegV128() const { return RegV128(RabaldrScratchV128); }
+};
+#  else
+class ScratchV128 : public ScratchSimd128Scope {
+ public:
+  explicit ScratchV128(MacroAssembler& m) : ScratchSimd128Scope(m) {}
+  operator RegV128() const { return RegV128(FloatRegister(*this)); }
+};
+#  endif
+#endif
+
+#ifdef RABALDR_SCRATCH_F64
+class ScratchF64 : public BaseScratchRegister {
+ public:
+  explicit ScratchF64(BaseRegAlloc& ra)
+      : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
+  operator RegF64() const { return RegF64(RabaldrScratchF64); }
+};
+#else
+class ScratchF64 : public ScratchDoubleScope {
+ public:
+  explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
+  operator RegF64() const { return RegF64(FloatRegister(*this)); }
+};
+#endif
+
+#ifdef RABALDR_SCRATCH_F32
+class ScratchF32 : public BaseScratchRegister {
+ public:
+  explicit ScratchF32(BaseRegAlloc& ra)
+      : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
+  operator RegF32() const { return RegF32(RabaldrScratchF32); }
+};
+#else
+class ScratchF32 : public ScratchFloat32Scope {
+ public:
+  explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
+  operator RegF32() const { return RegF32(FloatRegister(*this)); }
+};
+#endif
+
+#ifdef RABALDR_SCRATCH_I32
+template <class RegType>
+class ScratchGPR : public BaseScratchRegister {
+ public:
+  explicit ScratchGPR(BaseRegAlloc& ra)
+      : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
+  operator RegType() const { return RegType(RabaldrScratchI32); }
+};
+#else
+template <class RegType>
+class ScratchGPR : public ScratchRegisterScope {
+ public:
+  explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
+  operator RegType() const { return RegType(Register(*this)); }
+};
+#endif
+
+using ScratchI32 = ScratchGPR<RegI32>;
+using ScratchPtr = ScratchGPR<RegPtr>;
+
+#if defined(JS_CODEGEN_X86)
+// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
+// no other register will do.  And we would normally have to allocate that
+// register using ScratchI32 since normally the scratch register is EBX.
+// But the whole point of ScratchI32 is to hide that relationship.  By using
+// the ScratchEBX alias, we document that at that point we require the
+// scratch register to be EBX.
+using ScratchEBX = ScratchI32;
+
+// ScratchI8 is a mnemonic device: For some ops we need a register with a
+// byte subregister.
+using ScratchI8 = ScratchI32;
+#endif
+
+// The stack frame.
+//
+// The stack frame has four parts ("below" means at lower addresses):
+//
+//  - the Frame element;
+//  - the Local area, including the DebugFrame element and possibly a spilled
+//    pointer to stack results, if any; allocated below the header with various
+//    forms of alignment;
+//  - the Dynamic area, comprising the temporary storage the compiler uses for
+//    register spilling, allocated below the Local area;
+//  - the Arguments area, comprising memory allocated for outgoing calls,
+//    allocated below the Dynamic area.
+//
+//                +==============================+
+//                |    Incoming stack arg        |
+//                |    ...                       |
+// -------------  +==============================+
+//                |    Frame (fixed size)        |
+// -------------  +==============================+ <-------------------- FP
+//         ^      |    DebugFrame (optional)     |    ^  ^             ^^
+//   localSize    |    Register arg local        |    |  |             ||
+//         |      |    ...                       |    |  |     framePushed
+//         |      |    Register stack result ptr?|    |  |             ||
+//         |      |    Non-arg local             |    |  |             ||
+//         |      |    ...                       |    |  |             ||
+//         |      |    (padding)                 |    |  |             ||
+//         |      |    Tls pointer               |    |  |             ||
+//         |      +------------------------------+    |  |             ||
+//         v      |    (padding)                 |    |  v             ||
+// -------------  +==============================+ currentStackHeight  ||
+//         ^      |    Dynamic (variable size)   |    |                ||
+//  dynamicSize   |    ...                       |    |                ||
+//         v      |    ...                       |    v                ||
+// -------------  |    (free space, sometimes)   | ---------           v|
+//                +==============================+ <----- SP not-during calls
+//                |    Arguments (sometimes)     |                      |
+//                |    ...                       |                      v
+//                +==============================+ <----- SP during calls
+//
+// The Frame is addressed off the stack pointer.  masm.framePushed() is always
+// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
+// Frame, with the DebugFrame optionally below it.
+//
+// The Local area (including the DebugFrame and, if needed, the spilled value of
+// the stack results area pointer) is laid out by BaseLocalIter and is allocated
+// and deallocated by standard prologue and epilogue functions that manipulate
+// the stack pointer, but it is accessed via BaseStackFrame.
+//
+// The Dynamic area is maintained by and accessed via BaseStackFrame.  On some
+// systems (such as ARM64), the Dynamic memory may be allocated in chunks
+// because the SP needs a specific alignment, and in this case there will
+// normally be some free space directly above the SP.  The stack height does not
+// include the free space, it reflects the logically used space only.
+//
+// The Dynamic area is where space for stack results is allocated when calling
+// functions that return results on the stack.  If a function has stack results,
+// a pointer to the low address of the stack result area is passed as an
+// additional argument, according to the usual ABI.  See
+// ABIResultIter::HasStackResults.
+//
+// The Arguments area is allocated and deallocated via BaseStackFrame (see
+// comments later) but is accessed directly off the stack pointer.
+
+// BaseLocalIter iterates over a vector of types of locals and provides offsets
+// from the Frame address for those locals, and associated data.
+//
+// The implementation of BaseLocalIter is the property of the BaseStackFrame.
+// But it is also exposed for eg the debugger to use.
+
+BaseLocalIter::BaseLocalIter(const ValTypeVector& locals,
+                             const ArgTypeVector& args, bool debugEnabled)
+    : locals_(locals),
+      args_(args),
+      argsIter_(args_),
+      index_(0),
+      nextFrameSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
+      frameOffset_(INT32_MAX),
+      stackResultPointerOffset_(INT32_MAX),
+      mirType_(MIRType::Undefined),
+      done_(false) {
+  MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length());
+  settle();
+}
+
+int32_t BaseLocalIter::pushLocal(size_t nbytes) {
+  MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
+  nextFrameSize_ = AlignBytes(frameSize_, nbytes) + nbytes;
+  return nextFrameSize_;  // Locals grow down so capture base address.
+}
+
+void BaseLocalIter::settle() {
+  MOZ_ASSERT(!done_);
+  frameSize_ = nextFrameSize_;
+
+  if (!argsIter_.done()) {
+    mirType_ = argsIter_.mirType();
+    MIRType concreteType = mirType_;
+    switch (mirType_) {
+      case MIRType::StackResults:
+        // The pointer to stack results is handled like any other argument:
+        // either addressed in place if it is passed on the stack, or we spill
+        // it in the frame if it's in a register.
+        MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_));
+        concreteType = MIRType::Pointer;
+        [[fallthrough]];
+      case MIRType::Int32:
+      case MIRType::Int64:
+      case MIRType::Double:
+      case MIRType::Float32:
+      case MIRType::RefOrNull:
+#ifdef ENABLE_WASM_SIMD
+      case MIRType::Simd128:
+#endif
+        if (argsIter_->argInRegister()) {
+          frameOffset_ = pushLocal(MIRTypeToSize(concreteType));
+        } else {
+          frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
+        }
+        break;
+      default:
+        MOZ_CRASH("Argument type");
+    }
+    if (mirType_ == MIRType::StackResults) {
+      stackResultPointerOffset_ = frameOffset();
+      // Advance past the synthetic stack result pointer argument and fall
+      // through to the next case.
+      argsIter_++;
+      frameSize_ = nextFrameSize_;
+      MOZ_ASSERT(argsIter_.done());
+    } else {
+      return;
+    }
+  }
+
+  if (index_ < locals_.length()) {
+    switch (locals_[index_].kind()) {
+      case ValType::I32:
+      case ValType::I64:
+      case ValType::F32:
+      case ValType::F64:
+#ifdef ENABLE_WASM_SIMD
+      case ValType::V128:
+#endif
+      case ValType::Ref:
+        // TODO/AnyRef-boxing: With boxed immediates and strings, the
+        // debugger must be made aware that AnyRef != Pointer.
+        ASSERT_ANYREF_IS_JSOBJECT;
+        mirType_ = ToMIRType(locals_[index_]);
+        frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: Unexpected local type");
+    }
+    return;
+  }
+
+  done_ = true;
+}
+
+void BaseLocalIter::operator++(int) {
+  MOZ_ASSERT(!done_);
+  index_++;
+  if (!argsIter_.done()) {
+    argsIter_++;
+  }
+  settle();
+}
+
+// Abstraction of the height of the stack frame, to avoid type confusion.
+
+class StackHeight {
+  friend class BaseStackFrameAllocator;
+
+  uint32_t height;
+
+ public:
+  explicit StackHeight(uint32_t h) : height(h) {}
+  static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
+  bool isValid() const { return height != UINT32_MAX; }
+  bool operator==(StackHeight rhs) const {
+    MOZ_ASSERT(isValid() && rhs.isValid());
+    return height == rhs.height;
+  }
+  bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
+};
+
+// Abstraction for where multi-value results go on the machine stack.
+
+class StackResultsLoc {
+  uint32_t bytes_;
+  size_t count_;
+  Maybe<uint32_t> height_;
+
+ public:
+  StackResultsLoc() : bytes_(0), count_(0){};
+  StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
+      : bytes_(bytes), count_(count), height_(Some(height)) {
+    MOZ_ASSERT(bytes != 0);
+    MOZ_ASSERT(count != 0);
+    MOZ_ASSERT(height != 0);
+  }
+
+  uint32_t bytes() const { return bytes_; }
+  uint32_t count() const { return count_; }
+  uint32_t height() const { return height_.value(); }
+
+  bool hasStackResults() const { return bytes() != 0; }
+  StackResults stackResults() const {
+    return hasStackResults() ? StackResults::HasStackResults
+                             : StackResults::NoStackResults;
+  }
+};
+
+// Abstraction of the baseline compiler's stack frame (except for the Frame /
+// DebugFrame parts).  See comments above for more.  Remember, "below" on the
+// stack means at lower addresses.
+//
+// The abstraction is split into two parts: BaseStackFrameAllocator is
+// responsible for allocating and deallocating space on the stack and for
+// performing computations that are affected by how the allocation is performed;
+// BaseStackFrame then provides a pleasant interface for stack frame management.
+
+class BaseStackFrameAllocator {
+  MacroAssembler& masm;
+
+#ifdef RABALDR_CHUNKY_STACK
+  // On platforms that require the stack pointer to be aligned on a boundary
+  // greater than the typical stack item (eg, ARM64 requires 16-byte alignment
+  // but items are 8 bytes), allocate stack memory in chunks, and use a
+  // separate stack height variable to track the effective stack pointer
+  // within the allocated area.  Effectively, there's a variable amount of
+  // free space directly above the stack pointer.  See diagram above.
+
+  // The following must be true in order for the stack height to be
+  // predictable at control flow joins:
+  //
+  // - The Local area is always aligned according to WasmStackAlignment, ie,
+  //   masm.framePushed() % WasmStackAlignment is zero after allocating
+  //   locals.
+  //
+  // - ChunkSize is always a multiple of WasmStackAlignment.
+  //
+  // - Pushing and popping are always in units of ChunkSize (hence preserving
+  //   alignment).
+  //
+  // - The free space on the stack (masm.framePushed() - currentStackHeight_)
+  //   is a predictable (nonnegative) amount.
+
+  // As an optimization, we pre-allocate some space on the stack, the size of
+  // this allocation is InitialChunk and it must be a multiple of ChunkSize.
+  // It is allocated as part of the function prologue and deallocated as part
+  // of the epilogue, along with the locals.
+  //
+  // If ChunkSize is too large then we risk overflowing the stack on simple
+  // recursions with few live values where stack overflow should not be a
+  // risk; if it is too small we spend too much time adjusting the stack
+  // pointer.
+  //
+  // Good values for ChunkSize are the subject of future empirical analysis;
+  // eight words is just an educated guess.
+
+  static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
+  static constexpr uint32_t InitialChunk = ChunkSize;
+
+  // The current logical height of the frame is
+  //   currentStackHeight_ = localSize_ + dynamicSize
+  // where dynamicSize is not accounted for explicitly and localSize_ also
+  // includes size for the DebugFrame.
+  //
+  // The allocated size of the frame, provided by masm.framePushed(), is usually
+  // larger than currentStackHeight_, notably at the beginning of execution when
+  // we've allocated InitialChunk extra space.
+
+  uint32_t currentStackHeight_;
+#endif
+
+  // Size of the Local area in bytes (stable after BaseCompiler::init() has
+  // called BaseStackFrame::setupLocals(), which in turn calls
+  // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
+  // stack alignment.  The Local area is then allocated in beginFunction(),
+  // following the allocation of the Header.  See onFixedStackAllocated()
+  // below.
+
+  uint32_t localSize_;
+
+ protected:
+  ///////////////////////////////////////////////////////////////////////////
+  //
+  // Initialization
+
+  explicit BaseStackFrameAllocator(MacroAssembler& masm)
+      : masm(masm),
+#ifdef RABALDR_CHUNKY_STACK
+        currentStackHeight_(0),
+#endif
+        localSize_(UINT32_MAX) {
+  }
+
+ protected:
+  //////////////////////////////////////////////////////////////////////
+  //
+  // The Local area - the static part of the frame.
+
+  // Record the size of the Local area, once it is known.
+
+  void setLocalSize(uint32_t localSize) {
+    MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
+               "localSize_ should be aligned to at least a pointer");
+    MOZ_ASSERT(localSize_ == UINT32_MAX);
+    localSize_ = localSize;
+  }
+
+  // Record the current stack height, after it has become stable in
+  // beginFunction().  See also BaseStackFrame::onFixedStackAllocated().
+
+  void onFixedStackAllocated() {
+    MOZ_ASSERT(localSize_ != UINT32_MAX);
+#ifdef RABALDR_CHUNKY_STACK
+    currentStackHeight_ = localSize_;
+#endif
+  }
+
+ public:
+  // The fixed amount of memory, in bytes, allocated on the stack below the
+  // Header for purposes such as locals and other fixed values.  Includes all
+  // necessary alignment, and on ARM64 also the initial chunk for the working
+  // stack memory.
+
+  uint32_t fixedAllocSize() const {
+    MOZ_ASSERT(localSize_ != UINT32_MAX);
+#ifdef RABALDR_CHUNKY_STACK
+    return localSize_ + InitialChunk;
+#else
+    return localSize_;
+#endif
+  }
+
+#ifdef RABALDR_CHUNKY_STACK
+  // The allocated frame size is frequently larger than the logical stack
+  // height; we round up to a chunk boundary, and special case the initial
+  // chunk.
+  uint32_t framePushedForHeight(uint32_t logicalHeight) {
+    if (logicalHeight <= fixedAllocSize()) {
+      return fixedAllocSize();
+    }
+    return fixedAllocSize() +
+           AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
+  }
+#endif
+
+ protected:
+  //////////////////////////////////////////////////////////////////////
+  //
+  // The Dynamic area - the dynamic part of the frame, for spilling and saving
+  // intermediate values.
+
+  // Offset off of sp_ for the slot at stack area location `offset`.
+
+  int32_t stackOffset(int32_t offset) {
+    MOZ_ASSERT(offset > 0);
+    return masm.framePushed() - offset;
+  }
+
+  uint32_t computeHeightWithStackResults(StackHeight stackBase,
+                                         uint32_t stackResultBytes) {
+    MOZ_ASSERT(stackResultBytes);
+    MOZ_ASSERT(currentStackHeight() >= stackBase.height);
+    return stackBase.height + stackResultBytes;
+  }
+
+#ifdef RABALDR_CHUNKY_STACK
+  void pushChunkyBytes(uint32_t bytes) {
+    checkChunkyInvariants();
+    uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
+    if (freeSpace < bytes) {
+      uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize);
+      MOZ_ASSERT(bytesToReserve + freeSpace >= bytes);
+      masm.reserveStack(bytesToReserve);
+    }
+    currentStackHeight_ += bytes;
+    checkChunkyInvariants();
+  }
+
+  void popChunkyBytes(uint32_t bytes) {
+    checkChunkyInvariants();
+    currentStackHeight_ -= bytes;
+    // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
+    // values consumed by a call, and we may need to drop several chunks.  But
+    // never drop the initial chunk.  Crucially, the amount we drop is always an
+    // integral number of chunks.
+    uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
+    if (freeSpace >= ChunkSize) {
+      uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
+      uint32_t amountToFree = masm.framePushed() - targetAllocSize;
+      MOZ_ASSERT(amountToFree % ChunkSize == 0);
+      if (amountToFree) {
+        masm.freeStack(amountToFree);
+      }
+    }
+    checkChunkyInvariants();
+  }
+#endif
+
+  uint32_t currentStackHeight() const {
+#ifdef RABALDR_CHUNKY_STACK
+    return currentStackHeight_;
+#else
+    return masm.framePushed();
+#endif
+  }
+
+ private:
+#ifdef RABALDR_CHUNKY_STACK
+  void checkChunkyInvariants() {
+    MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
+    MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
+    MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
+               masm.framePushed() - currentStackHeight_ < ChunkSize);
+    MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
+  }
+#endif
+
+  // For a given stack height, return the appropriate size of the allocated
+  // frame.
+
+  uint32_t framePushedForHeight(StackHeight stackHeight) {
+#ifdef RABALDR_CHUNKY_STACK
+    // A more complicated adjustment is needed.
+    return framePushedForHeight(stackHeight.height);
+#else
+    // The allocated frame size equals the stack height.
+    return stackHeight.height;
+#endif
+  }
+
+ public:
+  // The current height of the stack area, not necessarily zero-based, in a
+  // type-safe way.
+
+  StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
+
+  // Set the frame height to a previously recorded value.
+
+  void setStackHeight(StackHeight amount) {
+#ifdef RABALDR_CHUNKY_STACK
+    currentStackHeight_ = amount.height;
+    masm.setFramePushed(framePushedForHeight(amount));
+    checkChunkyInvariants();
+#else
+    masm.setFramePushed(amount.height);
+#endif
+  }
+
+  // The current height of the dynamic part of the stack area (ie, the backing
+  // store for the evaluation stack), zero-based.
+
+  uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
+
+  // Before branching to an outer control label, pop the execution stack to
+  // the level expected by that region, but do not update masm.framePushed()
+  // as that will happen as compilation leaves the block.
+  //
+  // Note these operate directly on the stack pointer register.
+
+  void popStackBeforeBranch(StackHeight destStackHeight,
+                            uint32_t stackResultBytes) {
+    uint32_t framePushedHere = masm.framePushed();
+    StackHeight heightThere =
+        StackHeight(destStackHeight.height + stackResultBytes);
+    uint32_t framePushedThere = framePushedForHeight(heightThere);
+    if (framePushedHere > framePushedThere) {
+      masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
+    }
+  }
+
+  void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
+    popStackBeforeBranch(destStackHeight,
+                         ABIResultIter::MeasureStackBytes(type));
+  }
+
+  // Given that there are |stackParamSize| bytes on the dynamic stack
+  // corresponding to the stack results, return the stack height once these
+  // parameters are popped.
+
+  StackHeight stackResultsBase(uint32_t stackParamSize) {
+    return StackHeight(currentStackHeight() - stackParamSize);
+  }
+
+  // For most of WebAssembly, adjacent instructions have fallthrough control
+  // flow between them, which allows us to simply thread the current stack
+  // height through the compiler.  There are two exceptions to this rule: when
+  // leaving a block via dead code, and when entering the "else" arm of an "if".
+  // In these cases, the stack height is the block entry height, plus any stack
+  // values (results in the block exit case, parameters in the else entry case).
+
+  void resetStackHeight(StackHeight destStackHeight, ResultType type) {
+    uint32_t height = destStackHeight.height;
+    height += ABIResultIter::MeasureStackBytes(type);
+    setStackHeight(StackHeight(height));
+  }
+
+  // Return offset of stack result.
+
+  uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
+                             uint32_t stackResultBytes) {
+    MOZ_ASSERT(result.onStack());
+    MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
+    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
+    return end - result.stackOffset();
+  }
+
+ public:
+  //////////////////////////////////////////////////////////////////////
+  //
+  // The Argument area - for outgoing calls.
+  //
+  // We abstract these operations as an optimization: we can merge the freeing
+  // of the argument area and dropping values off the stack after a call.  But
+  // they always amount to manipulating the real stack pointer by some amount.
+  //
+  // Note that we do not update currentStackHeight_ for this; the frame does
+  // not know about outgoing arguments.  But we do update framePushed(), so we
+  // can still index into the frame below the outgoing arguments area.
+
+  // This is always equivalent to a masm.reserveStack() call.
+
+  void allocArgArea(size_t argSize) {
+    if (argSize) {
+      masm.reserveStack(argSize);
+    }
+  }
+
+  // This frees the argument area allocated by allocArgArea(), and `argSize`
+  // must be equal to the `argSize` argument to allocArgArea().  In addition
+  // we drop some values from the frame, corresponding to the values that were
+  // consumed by the call.
+
+  void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
+#ifdef RABALDR_CHUNKY_STACK
+    // Freeing the outgoing arguments and freeing the consumed values have
+    // different semantics here, which is why the operation is split.
+    if (argSize) {
+      masm.freeStack(argSize);
+    }
+    popChunkyBytes(dropSize);
+#else
+    if (argSize + dropSize) {
+      masm.freeStack(argSize + dropSize);
+    }
+#endif
+  }
+};
+
+class BaseStackFrame final : public BaseStackFrameAllocator {
+  MacroAssembler& masm;
+
+  // The largest observed value of masm.framePushed(), ie, the size of the
+  // stack frame.  Read this for its true value only when code generation is
+  // finished.
+  uint32_t maxFramePushed_;
+
+  // Patch point where we check for stack overflow.
+  CodeOffset stackAddOffset_;
+
+  // Low byte offset of pointer to stack results, if any.
+  Maybe<int32_t> stackResultsPtrOffset_;
+
+  // The offset of TLS pointer.
+  uint32_t tlsPointerOffset_;
+
+  // Low byte offset of local area for true locals (not parameters).
+  uint32_t varLow_;
+
+  // High byte offset + 1 of local area for true locals.
+  uint32_t varHigh_;
+
+  // The stack pointer, cached for brevity.
+  RegisterOrSP sp_;
+
+ public:
+  explicit BaseStackFrame(MacroAssembler& masm)
+      : BaseStackFrameAllocator(masm),
+        masm(masm),
+        maxFramePushed_(0),
+        stackAddOffset_(0),
+        tlsPointerOffset_(UINT32_MAX),
+        varLow_(UINT32_MAX),
+        varHigh_(UINT32_MAX),
+        sp_(masm.getStackPointer()) {}
+
+  ///////////////////////////////////////////////////////////////////////////
+  //
+  // Stack management and overflow checking
+
+  // This must be called once beginFunction has allocated space for the Header
+  // (the Frame and DebugFrame) and the Local area, and will record the current
+  // frame size for internal use by the stack abstractions.
+
+  void onFixedStackAllocated() {
+    maxFramePushed_ = masm.framePushed();
+    BaseStackFrameAllocator::onFixedStackAllocated();
+  }
+
+  // We won't know until after we've generated code how big the frame will be
+  // (we may need arbitrary spill slots and outgoing param slots) so emit a
+  // patchable add that is patched in endFunction().
+  //
+  // Note the platform scratch register may be used by branchPtr(), so
+  // generally tmp must be something else.
+
+  void checkStack(Register tmp, BytecodeOffset trapOffset) {
+    stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
+    Label ok;
+    masm.branchPtr(Assembler::Below,
+                   Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)),
+                   tmp, &ok);
+    masm.wasmTrap(Trap::StackOverflow, trapOffset);
+    masm.bind(&ok);
+  }
+
+  void patchCheckStack() {
+    masm.patchSub32FromStackPtr(stackAddOffset_,
+                                Imm32(int32_t(maxFramePushed_)));
+  }
+
+  // Very large frames are implausible, probably an attack.
+
+  bool checkStackHeight() {
+    // 512KiB should be enough, considering how Rabaldr uses the stack and
+    // what the standard limits are:
+    //
+    // - 1,000 parameters
+    // - 50,000 locals
+    // - 10,000 values on the eval stack (not an official limit)
+    //
+    // At sizeof(int64) bytes per slot this works out to about 480KiB.
+    return maxFramePushed_ <= 512 * 1024;
+  }
+
+  ///////////////////////////////////////////////////////////////////////////
+  //
+  // Local area
+
+  struct Local {
+    // Type of the value.
+    const MIRType type;
+
+    // Byte offset from Frame "into" the locals, ie positive for true locals
+    // and negative for incoming args that read directly from the arg area.
+    // It assumes the stack is growing down and that locals are on the stack
+    // at lower addresses than Frame, and is the offset from Frame of the
+    // lowest-addressed byte of the local.
+    const int32_t offs;
+
+    Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
+
+    bool isStackArgument() const { return offs < 0; }
+  };
+
+  // Profiling shows that the number of parameters and locals frequently
+  // touches or exceeds 8.  So 16 seems like a reasonable starting point.
+  using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
+
+  // Initialize `localInfo` based on the types of `locals` and `args`.
+  MOZ_MUST_USE bool setupLocals(const ValTypeVector& locals,
+                                const ArgTypeVector& args, bool debugEnabled,
+                                LocalVector* localInfo) {
+    if (!localInfo->reserve(locals.length())) {
+      return false;
+    }
+
+    DebugOnly<uint32_t> index = 0;
+    BaseLocalIter i(locals, args, debugEnabled);
+    for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) {
+      MOZ_ASSERT(i.isArg());
+      MOZ_ASSERT(i.index() == index);
+      localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
+      index++;
+    }
+
+    varLow_ = i.frameSize();
+    for (; !i.done(); i++) {
+      MOZ_ASSERT(!i.isArg());
+      MOZ_ASSERT(i.index() == index);
+      localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
+      index++;
+    }
+    varHigh_ = i.frameSize();
+
+    // Reserve an additional stack slot for the TLS pointer.
+    const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*));
+    const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*);
+    tlsPointerOffset_ = localSize;
+
+    setLocalSize(AlignBytes(localSize, WasmStackAlignment));
+
+    if (args.hasSyntheticStackResultPointerArg()) {
+      stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
+    }
+
+    return true;
+  }
+
+  void zeroLocals(BaseRegAlloc* ra);
+
+  Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) {
+    if (local.isStackArgument()) {
+      return Address(FramePointer,
+                     stackArgumentOffsetFromFp(local) + additionalOffset);
+    }
+    return Address(sp_, localOffsetFromSp(local) + additionalOffset);
+  }
+
+  void loadLocalI32(const Local& src, RegI32 dest) {
+    masm.load32(addressOfLocal(src), dest);
+  }
+
+#ifndef JS_PUNBOX64
+  void loadLocalI64Low(const Local& src, RegI32 dest) {
+    masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest);
+  }
+
+  void loadLocalI64High(const Local& src, RegI32 dest) {
+    masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest);
+  }
+#endif
+
+  void loadLocalI64(const Local& src, RegI64 dest) {
+    masm.load64(addressOfLocal(src), dest);
+  }
+
+  void loadLocalPtr(const Local& src, RegPtr dest) {
+    masm.loadPtr(addressOfLocal(src), dest);
+  }
+
+  void loadLocalF64(const Local& src, RegF64 dest) {
+    masm.loadDouble(addressOfLocal(src), dest);
+  }
+
+  void loadLocalF32(const Local& src, RegF32 dest) {
+    masm.loadFloat32(addressOfLocal(src), dest);
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void loadLocalV128(const Local& src, RegV128 dest) {
+    masm.loadUnalignedSimd128(addressOfLocal(src), dest);
+  }
+#endif
+
+  void storeLocalI32(RegI32 src, const Local& dest) {
+    masm.store32(src, addressOfLocal(dest));
+  }
+
+  void storeLocalI64(RegI64 src, const Local& dest) {
+    masm.store64(src, addressOfLocal(dest));
+  }
+
+  void storeLocalPtr(Register src, const Local& dest) {
+    masm.storePtr(src, addressOfLocal(dest));
+  }
+
+  void storeLocalF64(RegF64 src, const Local& dest) {
+    masm.storeDouble(src, addressOfLocal(dest));
+  }
+
+  void storeLocalF32(RegF32 src, const Local& dest) {
+    masm.storeFloat32(src, addressOfLocal(dest));
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void storeLocalV128(RegV128 src, const Local& dest) {
+    masm.storeUnalignedSimd128(src, addressOfLocal(dest));
+  }
+#endif
+
+  // Offset off of sp_ for `local`.
+  int32_t localOffsetFromSp(const Local& local) {
+    MOZ_ASSERT(!local.isStackArgument());
+    return localOffset(local.offs);
+  }
+
+  // Offset off of frame pointer for `stack argument`.
+  int32_t stackArgumentOffsetFromFp(const Local& local) {
+    MOZ_ASSERT(local.isStackArgument());
+    return -local.offs;
+  }
+
+  // The incoming stack result area pointer is for stack results of the function
+  // being compiled.
+  void loadIncomingStackResultAreaPtr(RegPtr reg) {
+    const int32_t offset = stackResultsPtrOffset_.value();
+    Address src = offset < 0 ? Address(FramePointer, -offset)
+                             : Address(sp_, stackOffset(offset));
+    masm.loadPtr(src, reg);
+  }
+
+  void storeIncomingStackResultAreaPtr(RegPtr reg) {
+    // If we get here, that means the pointer to the stack results area was
+    // passed in as a register, and therefore it will be spilled below the
+    // frame, so the offset is a positive height.
+    MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
+    masm.storePtr(reg,
+                  Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
+  }
+
+  void loadTlsPtr(Register dst) {
+    masm.loadPtr(Address(sp_, stackOffset(tlsPointerOffset_)), dst);
+  }
+
+  void storeTlsPtr(Register tls) {
+    masm.storePtr(tls, Address(sp_, stackOffset(tlsPointerOffset_)));
+  }
+
+  int32_t getTlsPtrOffset() { return stackOffset(tlsPointerOffset_); }
+
+  // An outgoing stack result area pointer is for stack results of callees of
+  // the function being compiled.
+  void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
+                                         RegPtr dest) {
+    MOZ_ASSERT(results.height() <= masm.framePushed());
+    uint32_t offsetFromSP = masm.framePushed() - results.height();
+    masm.moveStackPtrTo(dest);
+    if (offsetFromSP) {
+      masm.addPtr(Imm32(offsetFromSP), dest);
+    }
+  }
+
+ private:
+  // Offset off of sp_ for a local with offset `offset` from Frame.
+  int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
+
+ public:
+  ///////////////////////////////////////////////////////////////////////////
+  //
+  // Dynamic area
+
+  static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
+  static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
+  static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
+  static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
+#ifdef ENABLE_WASM_SIMD
+  static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128;
+#endif
+
+  uint32_t pushPtr(Register r) {
+    DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+    pushChunkyBytes(StackSizeOfPtr);
+    masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
+#else
+    masm.Push(r);
+#endif
+    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+    MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
+    return currentStackHeight();
+  }
+
+  uint32_t pushFloat32(FloatRegister r) {
+    DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+    pushChunkyBytes(StackSizeOfFloat);
+    masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
+#else
+    masm.Push(r);
+#endif
+    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+    MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
+    return currentStackHeight();
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  uint32_t pushV128(RegV128 r) {
+    DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#  ifdef RABALDR_CHUNKY_STACK
+    pushChunkyBytes(StackSizeOfV128);
+#  else
+    masm.adjustStack(-(int)StackSizeOfV128);
+#  endif
+    masm.storeUnalignedSimd128(r,
+                               Address(sp_, stackOffset(currentStackHeight())));
+    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+    MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight());
+    return currentStackHeight();
+  }
+#endif
+
+  uint32_t pushDouble(FloatRegister r) {
+    DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+    pushChunkyBytes(StackSizeOfDouble);
+    masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
+#else
+    masm.Push(r);
+#endif
+    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+    MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
+    return currentStackHeight();
+  }
+
+  void popPtr(Register r) {
+    DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+    masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
+    popChunkyBytes(StackSizeOfPtr);
+#else
+    masm.Pop(r);
+#endif
+    MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
+  }
+
+  void popFloat32(FloatRegister r) {
+    DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+    masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
+    popChunkyBytes(StackSizeOfFloat);
+#else
+    masm.Pop(r);
+#endif
+    MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
+  }
+
+  void popDouble(FloatRegister r) {
+    DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+    masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
+    popChunkyBytes(StackSizeOfDouble);
+#else
+    masm.Pop(r);
+#endif
+    MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void popV128(RegV128 r) {
+    DebugOnly<uint32_t> stackBefore = currentStackHeight();
+    masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())),
+                              r);
+#  ifdef RABALDR_CHUNKY_STACK
+    popChunkyBytes(StackSizeOfV128);
+#  else
+    masm.adjustStack((int)StackSizeOfV128);
+#  endif
+    MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight());
+  }
+#endif
+
+  void popBytes(size_t bytes) {
+    if (bytes > 0) {
+#ifdef RABALDR_CHUNKY_STACK
+      popChunkyBytes(bytes);
+#else
+      masm.freeStack(bytes);
+#endif
+    }
+  }
+
+  void loadStackI32(int32_t offset, RegI32 dest) {
+    masm.load32(Address(sp_, stackOffset(offset)), dest);
+  }
+
+  void loadStackI64(int32_t offset, RegI64 dest) {
+    masm.load64(Address(sp_, stackOffset(offset)), dest);
+  }
+
+#ifndef JS_PUNBOX64
+  void loadStackI64Low(int32_t offset, RegI32 dest) {
+    masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
+  }
+
+  void loadStackI64High(int32_t offset, RegI32 dest) {
+    masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
+  }
+#endif
+
+  // Disambiguation: this loads a "Ptr" value from the stack, it does not load
+  // the "StackPtr".
+
+  void loadStackPtr(int32_t offset, RegPtr dest) {
+    masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
+  }
+
+  void loadStackF64(int32_t offset, RegF64 dest) {
+    masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
+  }
+
+  void loadStackF32(int32_t offset, RegF32 dest) {
+    masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void loadStackV128(int32_t offset, RegV128 dest) {
+    masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest);
+  }
+#endif
+
+  uint32_t prepareStackResultArea(StackHeight stackBase,
+                                  uint32_t stackResultBytes) {
+    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
+    if (currentStackHeight() < end) {
+      uint32_t bytes = end - currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+      pushChunkyBytes(bytes);
+#else
+      masm.reserveStack(bytes);
+#endif
+      maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+    }
+    return end;
+  }
+
+  void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
+    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
+    MOZ_ASSERT(currentStackHeight() >= end);
+    popBytes(currentStackHeight() - end);
+  }
+
+  // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
+  void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
+                                   uint32_t bytes, Register temp) {
+    MOZ_ASSERT(destHeight < srcHeight);
+    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
+    uint32_t destOffset = stackOffset(destHeight) + bytes;
+    uint32_t srcOffset = stackOffset(srcHeight) + bytes;
+    while (bytes >= sizeof(intptr_t)) {
+      destOffset -= sizeof(intptr_t);
+      srcOffset -= sizeof(intptr_t);
+      bytes -= sizeof(intptr_t);
+      masm.loadPtr(Address(sp_, srcOffset), temp);
+      masm.storePtr(temp, Address(sp_, destOffset));
+    }
+    if (bytes) {
+      MOZ_ASSERT(bytes == sizeof(uint32_t));
+      destOffset -= sizeof(uint32_t);
+      srcOffset -= sizeof(uint32_t);
+      masm.load32(Address(sp_, srcOffset), temp);
+      masm.store32(temp, Address(sp_, destOffset));
+    }
+  }
+
+  // Unlike the overload that operates on raw heights, |srcHeight| and
+  // |destHeight| are stack heights *not including* |bytes|.
+  void shuffleStackResultsTowardFP(StackHeight srcHeight,
+                                   StackHeight destHeight, uint32_t bytes,
+                                   Register temp) {
+    MOZ_ASSERT(srcHeight.isValid());
+    MOZ_ASSERT(destHeight.isValid());
+    uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
+    uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
+    MOZ_ASSERT(src <= currentStackHeight());
+    MOZ_ASSERT(dest <= currentStackHeight());
+    shuffleStackResultsTowardFP(src, dest, bytes, temp);
+  }
+
+  // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
+  void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
+                                   uint32_t bytes, Register temp) {
+    MOZ_ASSERT(destHeight > srcHeight);
+    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
+    uint32_t destOffset = stackOffset(destHeight);
+    uint32_t srcOffset = stackOffset(srcHeight);
+    while (bytes >= sizeof(intptr_t)) {
+      masm.loadPtr(Address(sp_, srcOffset), temp);
+      masm.storePtr(temp, Address(sp_, destOffset));
+      destOffset += sizeof(intptr_t);
+      srcOffset += sizeof(intptr_t);
+      bytes -= sizeof(intptr_t);
+    }
+    if (bytes) {
+      MOZ_ASSERT(bytes == sizeof(uint32_t));
+      masm.load32(Address(sp_, srcOffset), temp);
+      masm.store32(temp, Address(sp_, destOffset));
+    }
+  }
+
+  // Copy results from the top of the current stack frame to an area of memory,
+  // and pop the stack accordingly.  `dest` is the address of the low byte of
+  // that memory.
+  void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) {
+    MOZ_ASSERT(bytes <= currentStackHeight());
+    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
+    uint32_t bytesToPop = bytes;
+    uint32_t srcOffset = stackOffset(currentStackHeight());
+    uint32_t destOffset = 0;
+    while (bytes >= sizeof(intptr_t)) {
+      masm.loadPtr(Address(sp_, srcOffset), temp);
+      masm.storePtr(temp, Address(dest, destOffset));
+      destOffset += sizeof(intptr_t);
+      srcOffset += sizeof(intptr_t);
+      bytes -= sizeof(intptr_t);
+    }
+    if (bytes) {
+      MOZ_ASSERT(bytes == sizeof(uint32_t));
+      masm.load32(Address(sp_, srcOffset), temp);
+      masm.store32(temp, Address(dest, destOffset));
+    }
+    popBytes(bytesToPop);
+  }
+
+ private:
+  void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) {
+    masm.move32(Imm32(imm), temp);
+    masm.store32(temp, Address(sp_, stackOffset(destHeight)));
+  }
+
+  void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) {
+#ifdef JS_PUNBOX64
+    masm.move64(Imm64(imm), Register64(temp));
+    masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
+#else
+    union {
+      int64_t i64;
+      int32_t i32[2];
+    } bits = {.i64 = imm};
+    static_assert(sizeof(bits) == 8);
+    store32BitsToStack(bits.i32[0], destHeight, temp);
+    store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp);
+#endif
+  }
+
+ public:
+  void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight,
+                                Register temp) {
+#ifdef JS_PUNBOX64
+    static_assert(StackSizeOfPtr == 8);
+    store64BitsToStack(imm, destHeight, temp);
+#else
+    static_assert(StackSizeOfPtr == 4);
+    store32BitsToStack(int32_t(imm), destHeight, temp);
+#endif
+  }
+
+  void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight,
+                                Register temp) {
+    store64BitsToStack(imm, destHeight, temp);
+  }
+
+  void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) {
+    union {
+      int32_t i32;
+      float f32;
+    } bits = {.f32 = imm};
+    static_assert(sizeof(bits) == 4);
+    // Do not store 4 bytes if StackSizeOfFloat == 8.  It's probably OK to do
+    // so, but it costs little to store something predictable.
+    if (StackSizeOfFloat == 4) {
+      store32BitsToStack(bits.i32, destHeight, temp);
+    } else {
+      store64BitsToStack(uint32_t(bits.i32), destHeight, temp);
+    }
+  }
+
+  void storeImmediateF64ToStack(double imm, uint32_t destHeight,
+                                Register temp) {
+    union {
+      int64_t i64;
+      double f64;
+    } bits = {.f64 = imm};
+    static_assert(sizeof(bits) == 8);
+    store64BitsToStack(bits.i64, destHeight, temp);
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) {
+    union {
+      int32_t i32[4];
+      uint8_t bytes[16];
+    } bits;
+    static_assert(sizeof(bits) == 16);
+    memcpy(bits.bytes, imm.bytes, 16);
+    for (unsigned i = 0; i < 4; i++) {
+      store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp);
+    }
+  }
+#endif
+};
+
+void BaseStackFrame::zeroLocals(BaseRegAlloc* ra) {
+  MOZ_ASSERT(varLow_ != UINT32_MAX);
+
+  if (varLow_ == varHigh_) {
+    return;
+  }
+
+  static const uint32_t wordSize = sizeof(void*);
+
+  // The adjustments to 'low' by the size of the item being stored compensates
+  // for the fact that locals offsets are the offsets from Frame to the bytes
+  // directly "above" the locals in the locals area.  See comment at Local.
+
+  // On 64-bit systems we may have 32-bit alignment for the local area as it
+  // may be preceded by parameters and prologue/debug data.
+
+  uint32_t low = varLow_;
+  if (low % wordSize) {
+    masm.store32(Imm32(0), Address(sp_, localOffset(low + 4)));
+    low += 4;
+  }
+  MOZ_ASSERT(low % wordSize == 0);
+
+  const uint32_t high = AlignBytes(varHigh_, wordSize);
+
+  // An UNROLL_LIMIT of 16 is chosen so that we only need an 8-bit signed
+  // immediate to represent the offset in the store instructions in the loop
+  // on x64.
+
+  const uint32_t UNROLL_LIMIT = 16;
+  const uint32_t initWords = (high - low) / wordSize;
+  const uint32_t tailWords = initWords % UNROLL_LIMIT;
+  const uint32_t loopHigh = high - (tailWords * wordSize);
+
+  // With only one word to initialize, just store an immediate zero.
+
+  if (initWords == 1) {
+    masm.storePtr(ImmWord(0), Address(sp_, localOffset(low + wordSize)));
+    return;
+  }
+
+  // For other cases, it's best to have a zero in a register.
+  //
+  // One can do more here with SIMD registers (store 16 bytes at a time) or
+  // with instructions like STRD on ARM (store 8 bytes at a time), but that's
+  // for another day.
+
+  RegI32 zero = ra->needI32();
+  masm.mov(ImmWord(0), zero);
+
+  // For the general case we want to have a loop body of UNROLL_LIMIT stores
+  // and then a tail of less than UNROLL_LIMIT stores.  When initWords is less
+  // than 2*UNROLL_LIMIT the loop trip count is at most 1 and there is no
+  // benefit to having the pointer calculations and the compare-and-branch.
+  // So we completely unroll when we have initWords < 2 * UNROLL_LIMIT.  (In
+  // this case we'll end up using 32-bit offsets on x64 for up to half of the
+  // stores, though.)
+
+  // Fully-unrolled case.
+
+  if (initWords < 2 * UNROLL_LIMIT) {
+    for (uint32_t i = low; i < high; i += wordSize) {
+      masm.storePtr(zero, Address(sp_, localOffset(i + wordSize)));
+    }
+    ra->freeI32(zero);
+    return;
+  }
+
+  // Unrolled loop with a tail. Stores will use negative offsets. That's OK
+  // for x86 and ARM, at least.
+
+  // Compute pointer to the highest-addressed slot on the frame.
+  RegI32 p = ra->needI32();
+  masm.computeEffectiveAddress(Address(sp_, localOffset(low + wordSize)), p);
+
+  // Compute pointer to the lowest-addressed slot on the frame that will be
+  // initialized by the loop body.
+  RegI32 lim = ra->needI32();
+  masm.computeEffectiveAddress(Address(sp_, localOffset(loopHigh + wordSize)),
+                               lim);
+
+  // The loop body.  Eventually we'll have p == lim and exit the loop.
+  Label again;
+  masm.bind(&again);
+  for (uint32_t i = 0; i < UNROLL_LIMIT; ++i) {
+    masm.storePtr(zero, Address(p, -(wordSize * i)));
+  }
+  masm.subPtr(Imm32(UNROLL_LIMIT * wordSize), p);
+  masm.branchPtr(Assembler::LessThan, lim, p, &again);
+
+  // The tail.
+  for (uint32_t i = 0; i < tailWords; ++i) {
+    masm.storePtr(zero, Address(p, -(wordSize * i)));
+  }
+
+  ra->freeI32(p);
+  ra->freeI32(lim);
+  ra->freeI32(zero);
+}
+
+// Value stack: stack elements
+
+struct Stk {
+ private:
+  Stk() : kind_(Unknown), i64val_(0) {}
+
+ public:
+  enum Kind {
+    // The Mem opcodes are all clustered at the beginning to
+    // allow for a quick test within sync().
+    MemI32,  // 32-bit integer stack value ("offs")
+    MemI64,  // 64-bit integer stack value ("offs")
+    MemF32,  // 32-bit floating stack value ("offs")
+    MemF64,  // 64-bit floating stack value ("offs")
+#ifdef ENABLE_WASM_SIMD
+    MemV128,  // 128-bit vector stack value ("offs")
+#endif
+    MemRef,  // reftype (pointer wide) stack value ("offs")
+
+    // The Local opcodes follow the Mem opcodes for a similar
+    // quick test within hasLocal().
+    LocalI32,  // Local int32 var ("slot")
+    LocalI64,  // Local int64 var ("slot")
+    LocalF32,  // Local float32 var ("slot")
+    LocalF64,  // Local double var ("slot")
+#ifdef ENABLE_WASM_SIMD
+    LocalV128,  // Local v128 var ("slot")
+#endif
+    LocalRef,  // Local reftype (pointer wide) var ("slot")
+
+    RegisterI32,  // 32-bit integer register ("i32reg")
+    RegisterI64,  // 64-bit integer register ("i64reg")
+    RegisterF32,  // 32-bit floating register ("f32reg")
+    RegisterF64,  // 64-bit floating register ("f64reg")
+#ifdef ENABLE_WASM_SIMD
+    RegisterV128,  // 128-bit vector register ("v128reg")
+#endif
+    RegisterRef,  // reftype (pointer wide) register ("refReg")
+
+    ConstI32,  // 32-bit integer constant ("i32val")
+    ConstI64,  // 64-bit integer constant ("i64val")
+    ConstF32,  // 32-bit floating constant ("f32val")
+    ConstF64,  // 64-bit floating constant ("f64val")
+#ifdef ENABLE_WASM_SIMD
+    ConstV128,  // 128-bit vector constant ("v128val")
+#endif
+    ConstRef,  // reftype (pointer wide) constant ("refval")
+
+    Unknown,
+  };
+
+  Kind kind_;
+
+  static const Kind MemLast = MemRef;
+  static const Kind LocalLast = LocalRef;
+
+  union {
+    RegI32 i32reg_;
+    RegI64 i64reg_;
+    RegPtr refReg_;
+    RegF32 f32reg_;
+    RegF64 f64reg_;
+#ifdef ENABLE_WASM_SIMD
+    RegV128 v128reg_;
+#endif
+    int32_t i32val_;
+    int64_t i64val_;
+    intptr_t refval_;
+    float f32val_;
+    double f64val_;
+#ifdef ENABLE_WASM_SIMD
+    V128 v128val_;
+#endif
+    uint32_t slot_;
+    uint32_t offs_;
+  };
+
+  explicit Stk(RegI32 r) : kind_(RegisterI32), i32reg_(r) {}
+  explicit Stk(RegI64 r) : kind_(RegisterI64), i64reg_(r) {}
+  explicit Stk(RegPtr r) : kind_(RegisterRef), refReg_(r) {}
+  explicit Stk(RegF32 r) : kind_(RegisterF32), f32reg_(r) {}
+  explicit Stk(RegF64 r) : kind_(RegisterF64), f64reg_(r) {}
+#ifdef ENABLE_WASM_SIMD
+  explicit Stk(RegV128 r) : kind_(RegisterV128), v128reg_(r) {}
+#endif
+  explicit Stk(int32_t v) : kind_(ConstI32), i32val_(v) {}
+  explicit Stk(int64_t v) : kind_(ConstI64), i64val_(v) {}
+  explicit Stk(float v) : kind_(ConstF32), f32val_(v) {}
+  explicit Stk(double v) : kind_(ConstF64), f64val_(v) {}
+#ifdef ENABLE_WASM_SIMD
+  explicit Stk(V128 v) : kind_(ConstV128), v128val_(v) {}
+#endif
+  explicit Stk(Kind k, uint32_t v) : kind_(k), slot_(v) {
+    MOZ_ASSERT(k > MemLast && k <= LocalLast);
+  }
+  static Stk StkRef(intptr_t v) {
+    Stk s;
+    s.kind_ = ConstRef;
+    s.refval_ = v;
+    return s;
+  }
+  static Stk StackResult(ValType type, uint32_t offs) {
+    Kind k;
+    switch (type.kind()) {
+      case ValType::I32:
+        k = Stk::MemI32;
+        break;
+      case ValType::I64:
+        k = Stk::MemI64;
+        break;
+      case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+        k = Stk::MemV128;
+        break;
+#else
+        MOZ_CRASH("No SIMD");
+#endif
+      case ValType::F32:
+        k = Stk::MemF32;
+        break;
+      case ValType::F64:
+        k = Stk::MemF64;
+        break;
+      case ValType::Ref:
+        k = Stk::MemRef;
+        break;
+    }
+    Stk s;
+    s.setOffs(k, offs);
+    return s;
+  }
+
+  void setOffs(Kind k, uint32_t v) {
+    MOZ_ASSERT(k <= MemLast);
+    kind_ = k;
+    offs_ = v;
+  }
+
+  Kind kind() const { return kind_; }
+  bool isMem() const { return kind_ <= MemLast; }
+
+  RegI32 i32reg() const {
+    MOZ_ASSERT(kind_ == RegisterI32);
+    return i32reg_;
+  }
+  RegI64 i64reg() const {
+    MOZ_ASSERT(kind_ == RegisterI64);
+    return i64reg_;
+  }
+  RegPtr refReg() const {
+    MOZ_ASSERT(kind_ == RegisterRef);
+    return refReg_;
+  }
+  RegF32 f32reg() const {
+    MOZ_ASSERT(kind_ == RegisterF32);
+    return f32reg_;
+  }
+  RegF64 f64reg() const {
+    MOZ_ASSERT(kind_ == RegisterF64);
+    return f64reg_;
+  }
+#ifdef ENABLE_WASM_SIMD
+  RegV128 v128reg() const {
+    MOZ_ASSERT(kind_ == RegisterV128);
+    return v128reg_;
+  }
+#endif
+  int32_t i32val() const {
+    MOZ_ASSERT(kind_ == ConstI32);
+    return i32val_;
+  }
+  int64_t i64val() const {
+    MOZ_ASSERT(kind_ == ConstI64);
+    return i64val_;
+  }
+  intptr_t refval() const {
+    MOZ_ASSERT(kind_ == ConstRef);
+    return refval_;
+  }
+
+  // For these two, use an out-param instead of simply returning, to
+  // use the normal stack and not the x87 FP stack (which has effect on
+  // NaNs with the signaling bit set).
+
+  void f32val(float* out) const {
+    MOZ_ASSERT(kind_ == ConstF32);
+    *out = f32val_;
+  }
+  void f64val(double* out) const {
+    MOZ_ASSERT(kind_ == ConstF64);
+    *out = f64val_;
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  // For SIMD, do the same as for floats since we're using float registers to
+  // hold vectors; this is just conservative.
+  void v128val(V128* out) const {
+    MOZ_ASSERT(kind_ == ConstV128);
+    *out = v128val_;
+  }
+#endif
+
+  uint32_t slot() const {
+    MOZ_ASSERT(kind_ > MemLast && kind_ <= LocalLast);
+    return slot_;
+  }
+  uint32_t offs() const {
+    MOZ_ASSERT(isMem());
+    return offs_;
+  }
+};
+
+typedef Vector<Stk, 0, SystemAllocPolicy> StkVector;
+
+// MachineStackTracker, used for stack-slot pointerness tracking.
+
+class MachineStackTracker {
+  // Simulates the machine's stack, with one bool per word.  Index zero in
+  // this vector corresponds to the highest address in the machine stack.  The
+  // last entry corresponds to what SP currently points at.  This all assumes
+  // a grow-down stack.
+  //
+  // numPtrs_ contains the number of "true" values in vec_, and is therefore
+  // redundant.  But it serves as a constant-time way to detect the common
+  // case where vec_ holds no "true" values.
+  size_t numPtrs_;
+  Vector<bool, 64, SystemAllocPolicy> vec_;
+
+ public:
+  MachineStackTracker() : numPtrs_(0) {}
+
+  ~MachineStackTracker() {
+#ifdef DEBUG
+    size_t n = 0;
+    for (bool b : vec_) {
+      n += (b ? 1 : 0);
+    }
+    MOZ_ASSERT(n == numPtrs_);
+#endif
+  }
+
+  // Clone this MachineStackTracker, writing the result at |dst|.
+  [[nodiscard]] bool cloneTo(MachineStackTracker* dst) {
+    MOZ_ASSERT(dst->vec_.empty());
+    if (!dst->vec_.appendAll(vec_)) {
+      return false;
+    }
+    dst->numPtrs_ = numPtrs_;
+    return true;
+  }
+
+  // Notionally push |n| non-pointers on the stack.
+  [[nodiscard]] bool pushNonGCPointers(size_t n) {
+    return vec_.appendN(false, n);
+  }
+
+  // Mark the stack slot |offsetFromSP| up from the bottom as holding a
+  // pointer.
+  void setGCPointer(size_t offsetFromSP) {
+    // offsetFromSP == 0 denotes the most recently pushed item, == 1 the
+    // second most recently pushed item, etc.
+    MOZ_ASSERT(offsetFromSP < vec_.length());
+
+    size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
+    numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0);
+    vec_[offsetFromTop] = true;
+  }
+
+  // Query the pointerness of the slot |offsetFromSP| up from the bottom.
+  bool isGCPointer(size_t offsetFromSP) {
+    MOZ_ASSERT(offsetFromSP < vec_.length());
+
+    size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
+    return vec_[offsetFromTop];
+  }
+
+  // Return the number of words tracked by this MachineStackTracker.
+  size_t length() { return vec_.length(); }
+
+  // Return the number of pointer-typed words tracked by this
+  // MachineStackTracker.
+  size_t numPtrs() {
+    MOZ_ASSERT(numPtrs_ <= length());
+    return numPtrs_;
+  }
+
+  // Discard all contents, but (per mozilla::Vector::clear semantics) don't
+  // free or reallocate any dynamic storage associated with |vec_|.
+  void clear() {
+    vec_.clear();
+    numPtrs_ = 0;
+  }
+};
+
+// StackMapGenerator, which carries all state needed to create stack maps.
+
+enum class HasDebugFrame { No, Yes };
+
+struct StackMapGenerator {
+ private:
+  // --- These are constant for the life of the function's compilation ---
+
+  // For generating stack maps, we'll need to know the offsets of registers
+  // as saved by the trap exit stub.
+  const MachineState& trapExitLayout_;
+  const size_t trapExitLayoutNumWords_;
+
+  // Completed stackmaps are added here
+  StackMaps* stackMaps_;
+
+  // So as to be able to get current offset when creating stack maps
+  const MacroAssembler& masm_;
+
+ public:
+  // --- These are constant once we've completed beginFunction() ---
+
+  // The number of words of arguments passed to this function in memory.
+  size_t numStackArgWords;
+
+  MachineStackTracker machineStackTracker;  // tracks machine stack pointerness
+
+  // This holds masm.framePushed at entry to the function's body.  It is a
+  // Maybe because createStackMap needs to know whether or not we're still
+  // in the prologue.  It makes a Nothing-to-Some transition just once per
+  // function.
+  Maybe<uint32_t> framePushedAtEntryToBody;
+
+  // --- These can change at any point ---
+
+  // This holds masm.framePushed at it would be be for a function call
+  // instruction, but excluding the stack area used to pass arguments in
+  // memory.  That is, for an upcoming function call, this will hold
+  //
+  //   masm.framePushed() at the call instruction -
+  //      StackArgAreaSizeUnaligned(argumentTypes)
+  //
+  // This value denotes the lowest-addressed stack word covered by the current
+  // function's stackmap.  Words below this point form the highest-addressed
+  // area of the callee's stackmap.  Note that all alignment padding above the
+  // arguments-in-memory themselves belongs to the caller's stack map, which
+  // is why this is defined in terms of StackArgAreaSizeUnaligned() rather than
+  // StackArgAreaSizeAligned().
+  //
+  // When not inside a function call setup/teardown sequence, it is Nothing.
+  // It can make Nothing-to/from-Some transitions arbitrarily as we progress
+  // through the function body.
+  Maybe<uint32_t> framePushedExcludingOutboundCallArgs;
+
+  // The number of memory-resident, ref-typed entries on the containing
+  // BaseCompiler::stk_.
+  size_t memRefsOnStk;
+
+  // This is a copy of machineStackTracker that is used only within individual
+  // calls to createStackMap. It is here only to avoid possible heap allocation
+  // costs resulting from making it local to createStackMap().
+  MachineStackTracker augmentedMst;
+
+  StackMapGenerator(StackMaps* stackMaps, const MachineState& trapExitLayout,
+                    const size_t trapExitLayoutNumWords,
+                    const MacroAssembler& masm)
+      : trapExitLayout_(trapExitLayout),
+        trapExitLayoutNumWords_(trapExitLayoutNumWords),
+        stackMaps_(stackMaps),
+        masm_(masm),
+        numStackArgWords(0),
+        memRefsOnStk(0) {}
+
+  // At the beginning of a function, we may have live roots in registers (as
+  // arguments) at the point where we perform a stack overflow check.  This
+  // method generates the "extra" stackmap entries to describe that, in the
+  // case that the check fails and we wind up calling into the wasm exit
+  // stub, as generated by GenerateTrapExit().
+  //
+  // The resulting map must correspond precisely with the stack layout
+  // created for the integer registers as saved by (code generated by)
+  // GenerateTrapExit().  To do that we use trapExitLayout_ and
+  // trapExitLayoutNumWords_, which together comprise a description of the
+  // layout and are created by GenerateTrapExitMachineState().
+  [[nodiscard]] bool generateStackmapEntriesForTrapExit(
+      const ArgTypeVector& args, ExitStubMapVector* extras) {
+    return GenerateStackmapEntriesForTrapExit(args, trapExitLayout_,
+                                              trapExitLayoutNumWords_, extras);
+  }
+
+  // Creates a stackmap associated with the instruction denoted by
+  // |assemblerOffset|, incorporating pointers from the current operand
+  // stack |stk|, incorporating possible extra pointers in |extra| at the
+  // lower addressed end, and possibly with the associated frame having a
+  // ref-typed DebugFrame as indicated by |refDebugFrame|.
+  [[nodiscard]] bool createStackMap(const char* who,
+                                    const ExitStubMapVector& extras,
+                                    uint32_t assemblerOffset,
+                                    HasDebugFrame debugFrame,
+                                    const StkVector& stk) {
+    size_t countedPointers = machineStackTracker.numPtrs() + memRefsOnStk;
+#ifndef DEBUG
+    // An important optimization.  If there are obviously no pointers, as
+    // we expect in the majority of cases, exit quickly.
+    if (countedPointers == 0 && debugFrame == HasDebugFrame::No) {
+      // We can skip creating the map if there are no |true| elements in
+      // |extras|.
+      bool extrasHasRef = false;
+      for (bool b : extras) {
+        if (b) {
+          extrasHasRef = true;
+          break;
+        }
+      }
+      if (!extrasHasRef) {
+        return true;
+      }
+    }
+#else
+    // In the debug case, create the stack map regardless, and cross-check
+    // the pointer-counting below.  We expect the final map to have
+    // |countedPointers| in total.  This doesn't include those in the
+    // DebugFrame, but they do not appear in the map's bitmap.  Note that
+    // |countedPointers| is debug-only from this point onwards.
+    for (bool b : extras) {
+      countedPointers += (b ? 1 : 0);
+    }
+#endif
+
+    // Start with the frame-setup map, and add operand-stack information to
+    // that.  augmentedMst holds live data only within individual calls to
+    // createStackMap.
+    augmentedMst.clear();
+    if (!machineStackTracker.cloneTo(&augmentedMst)) {
+      return false;
+    }
+
+    // At this point, augmentedMst only contains entries covering the
+    // incoming argument area (if any) and for the area allocated by this
+    // function's prologue.  We now need to calculate how far the machine's
+    // stack pointer is below where it was at the start of the body.  But we
+    // must take care not to include any words pushed as arguments to an
+    // upcoming function call, since those words "belong" to the stackmap of
+    // the callee, not to the stackmap of this function.  Note however that
+    // any alignment padding pushed prior to pushing the args *does* belong to
+    // this function.
+    //
+    // That padding is taken into account at the point where
+    // framePushedExcludingOutboundCallArgs is set, viz, in startCallArgs(),
+    // and comprises two components:
+    //
+    // * call->frameAlignAdjustment
+    // * the padding applied to the stack arg area itself.  That is:
+    //   StackArgAreaSize(argTys) - StackArgAreaSizeUnpadded(argTys)
+    Maybe<uint32_t> framePushedExcludingArgs;
+    if (framePushedAtEntryToBody.isNothing()) {
+      // Still in the prologue.  framePushedExcludingArgs remains Nothing.
+      MOZ_ASSERT(framePushedExcludingOutboundCallArgs.isNothing());
+    } else {
+      // In the body.
+      MOZ_ASSERT(masm_.framePushed() >= framePushedAtEntryToBody.value());
+      if (framePushedExcludingOutboundCallArgs.isSome()) {
+        // In the body, and we've potentially pushed some args onto the stack.
+        // We must ignore them when sizing the stackmap.
+        MOZ_ASSERT(masm_.framePushed() >=
+                   framePushedExcludingOutboundCallArgs.value());
+        MOZ_ASSERT(framePushedExcludingOutboundCallArgs.value() >=
+                   framePushedAtEntryToBody.value());
+        framePushedExcludingArgs =
+            Some(framePushedExcludingOutboundCallArgs.value());
+      } else {
+        // In the body, but not with call args on the stack.  The stackmap
+        // must be sized so as to extend all the way "down" to
+        // masm_.framePushed().
+        framePushedExcludingArgs = Some(masm_.framePushed());
+      }
+    }
+
+    if (framePushedExcludingArgs.isSome()) {
+      uint32_t bodyPushedBytes =
+          framePushedExcludingArgs.value() - framePushedAtEntryToBody.value();
+      MOZ_ASSERT(0 == bodyPushedBytes % sizeof(void*));
+      if (!augmentedMst.pushNonGCPointers(bodyPushedBytes / sizeof(void*))) {
+        return false;
+      }
+    }
+
+    // Scan the operand stack, marking pointers in the just-added new
+    // section.
+    MOZ_ASSERT_IF(framePushedAtEntryToBody.isNothing(), stk.empty());
+    MOZ_ASSERT_IF(framePushedExcludingArgs.isNothing(), stk.empty());
+
+    for (const Stk& v : stk) {
+#ifndef DEBUG
+      // We don't track roots in registers, per rationale below, so if this
+      // doesn't hold, something is seriously wrong, and we're likely to get a
+      // GC-related crash.
+      MOZ_RELEASE_ASSERT(v.kind() != Stk::RegisterRef);
+      if (v.kind() != Stk::MemRef) {
+        continue;
+      }
+#else
+      // Take the opportunity to check everything we reasonably can about
+      // operand stack elements.
+      switch (v.kind()) {
+        case Stk::MemI32:
+        case Stk::MemI64:
+        case Stk::MemF32:
+        case Stk::MemF64:
+        case Stk::ConstI32:
+        case Stk::ConstI64:
+        case Stk::ConstF32:
+        case Stk::ConstF64:
+#  ifdef ENABLE_WASM_SIMD
+        case Stk::MemV128:
+        case Stk::ConstV128:
+#  endif
+          // All of these have uninteresting type.
+          continue;
+        case Stk::LocalI32:
+        case Stk::LocalI64:
+        case Stk::LocalF32:
+        case Stk::LocalF64:
+#  ifdef ENABLE_WASM_SIMD
+        case Stk::LocalV128:
+#  endif
+          // These also have uninteresting type.  Check that they live in the
+          // section of stack set up by beginFunction().  The unguarded use of
+          // |value()| here is safe due to the assertion above this loop.
+          MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value());
+          continue;
+        case Stk::RegisterI32:
+        case Stk::RegisterI64:
+        case Stk::RegisterF32:
+        case Stk::RegisterF64:
+#  ifdef ENABLE_WASM_SIMD
+        case Stk::RegisterV128:
+#  endif
+          // These also have uninteresting type, but more to the point: all
+          // registers holding live values should have been flushed to the
+          // machine stack immediately prior to the instruction to which this
+          // stackmap pertains.  So these can't happen.
+          MOZ_CRASH("createStackMap: operand stack has Register-non-Ref");
+        case Stk::MemRef:
+          // This is the only case we care about.  We'll handle it after the
+          // switch.
+          break;
+        case Stk::LocalRef:
+          // We need the stackmap to mention this pointer, but it should
+          // already be in the machineStackTracker section created by
+          // beginFunction().
+          MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value());
+          continue;
+        case Stk::ConstRef:
+          // This can currently only be a null pointer.
+          MOZ_ASSERT(v.refval() == 0);
+          continue;
+        case Stk::RegisterRef:
+          // This can't happen, per rationale above.
+          MOZ_CRASH("createStackMap: operand stack contains RegisterRef");
+        default:
+          MOZ_CRASH("createStackMap: unknown operand stack element");
+      }
+#endif
+      // v.offs() holds masm.framePushed() at the point immediately after it
+      // was pushed on the stack.  Since it's still on the stack,
+      // masm.framePushed() can't be less.
+      MOZ_ASSERT(v.offs() <= framePushedExcludingArgs.value());
+      uint32_t offsFromMapLowest = framePushedExcludingArgs.value() - v.offs();
+      MOZ_ASSERT(0 == offsFromMapLowest % sizeof(void*));
+      augmentedMst.setGCPointer(offsFromMapLowest / sizeof(void*));
+    }
+
+    // Create the final StackMap.  The initial map is zeroed out, so there's
+    // no need to write zero bits in it.
+    const uint32_t extraWords = extras.length();
+    const uint32_t augmentedMstWords = augmentedMst.length();
+    const uint32_t numMappedWords = extraWords + augmentedMstWords;
+    StackMap* stackMap = StackMap::create(numMappedWords);
+    if (!stackMap) {
+      return false;
+    }
+
+    {
+      // First the exit stub extra words, if any.
+      uint32_t i = 0;
+      for (bool b : extras) {
+        if (b) {
+          stackMap->setBit(i);
+        }
+        i++;
+      }
+    }
+    // Followed by the "main" part of the map.
+    for (uint32_t i = 0; i < augmentedMstWords; i++) {
+      if (augmentedMst.isGCPointer(i)) {
+        stackMap->setBit(extraWords + i);
+      }
+    }
+
+    stackMap->setExitStubWords(extraWords);
+
+    // Record in the map, how far down from the highest address the Frame* is.
+    // Take the opportunity to check that we haven't marked any part of the
+    // Frame itself as a pointer.
+    stackMap->setFrameOffsetFromTop(numStackArgWords +
+                                    sizeof(Frame) / sizeof(void*));
+#ifdef DEBUG
+    for (uint32_t i = 0; i < sizeof(Frame) / sizeof(void*); i++) {
+      MOZ_ASSERT(stackMap->getBit(stackMap->numMappedWords -
+                                  stackMap->frameOffsetFromTop + i) == 0);
+    }
+#endif
+
+    // Note the presence of a ref-typed DebugFrame, if any.
+    if (debugFrame == HasDebugFrame::Yes) {
+      stackMap->setHasDebugFrame();
+    }
+
+    // Add the completed map to the running collection thereof.
+    if (!stackMaps_->add((uint8_t*)(uintptr_t)assemblerOffset, stackMap)) {
+      stackMap->destroy();
+      return false;
+    }
+
+#ifdef DEBUG
+    {
+      // Crosscheck the map pointer counting.
+      uint32_t nw = stackMap->numMappedWords;
+      uint32_t np = 0;
+      for (uint32_t i = 0; i < nw; i++) {
+        np += stackMap->getBit(i);
+      }
+      MOZ_ASSERT(size_t(np) == countedPointers);
+    }
+#endif
+
+    return true;
+  }
+};
+
+// The baseline compiler proper.
+
+class BaseCompiler final : public BaseCompilerInterface {
+  using Local = BaseStackFrame::Local;
+  using LabelVector = Vector<NonAssertingLabel, 8, SystemAllocPolicy>;
+
+  // Bit set used for simple bounds check elimination.  Capping this at 64
+  // locals makes sense; even 32 locals would probably be OK in practice.
+  //
+  // For more information about BCE, see the block comment above
+  // popMemoryAccess(), below.
+
+  using BCESet = uint64_t;
+
+  // Control node, representing labels and stack heights at join points.
+
+  struct Control {
+    NonAssertingLabel label;       // The "exit" label
+    NonAssertingLabel otherLabel;  // Used for the "else" branch of if-then-else
+    StackHeight stackHeight;       // From BaseStackFrame
+    uint32_t stackSize;            // Value stack height
+    BCESet bceSafeOnEntry;         // Bounds check info flowing into the item
+    BCESet bceSafeOnExit;          // Bounds check info flowing out of the item
+    bool deadOnArrival;            // deadCode_ was set on entry to the region
+    bool deadThenBranch;           // deadCode_ was set on exit from "then"
+
+    Control()
+        : stackHeight(StackHeight::Invalid()),
+          stackSize(UINT32_MAX),
+          bceSafeOnEntry(0),
+          bceSafeOnExit(~BCESet(0)),
+          deadOnArrival(false),
+          deadThenBranch(false) {}
+  };
+
+  class NothingVector {
+    Nothing unused_;
+
+   public:
+    bool resize(size_t length) { return true; }
+    Nothing& operator[](size_t) { return unused_; }
+    Nothing& back() { return unused_; }
+  };
+
+  struct BaseCompilePolicy {
+    // The baseline compiler tracks values on a stack of its own -- it
+    // needs to scan that stack for spilling -- and thus has no need
+    // for the values maintained by the iterator.
+    using Value = Nothing;
+    using ValueVector = NothingVector;
+
+    // The baseline compiler uses the iterator's control stack, attaching
+    // its own control information.
+    using ControlItem = Control;
+  };
+
+  using BaseOpIter = OpIter<BaseCompilePolicy>;
+
+  // The baseline compiler will use OOL code more sparingly than
+  // Baldr since our code is not high performance and frills like
+  // code density and branch prediction friendliness will be less
+  // important.
+
+  class OutOfLineCode : public TempObject {
+   private:
+    NonAssertingLabel entry_;
+    NonAssertingLabel rejoin_;
+    StackHeight stackHeight_;
+
+   public:
+    OutOfLineCode() : stackHeight_(StackHeight::Invalid()) {}
+
+    Label* entry() { return &entry_; }
+    Label* rejoin() { return &rejoin_; }
+
+    void setStackHeight(StackHeight stackHeight) {
+      MOZ_ASSERT(!stackHeight_.isValid());
+      stackHeight_ = stackHeight;
+    }
+
+    void bind(BaseStackFrame* fr, MacroAssembler* masm) {
+      MOZ_ASSERT(stackHeight_.isValid());
+      masm->bind(&entry_);
+      fr->setStackHeight(stackHeight_);
+    }
+
+    // The generate() method must be careful about register use
+    // because it will be invoked when there is a register
+    // assignment in the BaseCompiler that does not correspond
+    // to the available registers when the generated OOL code is
+    // executed.  The register allocator *must not* be called.
+    //
+    // The best strategy is for the creator of the OOL object to
+    // allocate all temps that the OOL code will need.
+    //
+    // Input, output, and temp registers are embedded in the OOL
+    // object and are known to the code generator.
+    //
+    // Scratch registers are available to use in OOL code.
+    //
+    // All other registers must be explicitly saved and restored
+    // by the OOL code before being used.
+
+    virtual void generate(MacroAssembler* masm) = 0;
+  };
+
+  enum class LatentOp { None, Compare, Eqz };
+
+  struct AccessCheck {
+    AccessCheck()
+        : omitBoundsCheck(false),
+          omitAlignmentCheck(false),
+          onlyPointerAlignment(false) {}
+
+    // If `omitAlignmentCheck` is true then we need check neither the
+    // pointer nor the offset.  Otherwise, if `onlyPointerAlignment` is true
+    // then we need check only the pointer.  Otherwise, check the sum of
+    // pointer and offset.
+
+    bool omitBoundsCheck;
+    bool omitAlignmentCheck;
+    bool onlyPointerAlignment;
+  };
+
+  const ModuleEnvironment& moduleEnv_;
+  const CompilerEnvironment& compilerEnv_;
+  BaseOpIter iter_;
+  const FuncCompileInput& func_;
+  size_t lastReadCallSite_;
+  TempAllocator::Fallible alloc_;
+  const ValTypeVector& locals_;  // Types of parameters and locals
+  bool deadCode_;  // Flag indicating we should decode & discard the opcode
+  BCESet
+      bceSafe_;  // Locals that have been bounds checked and not updated since
+  ValTypeVector SigD_;
+  ValTypeVector SigF_;
+  NonAssertingLabel returnLabel_;
+
+  LatentOp latentOp_;   // Latent operation for branch (seen next)
+  ValType latentType_;  // Operand type, if latentOp_ is true
+  Assembler::Condition
+      latentIntCmp_;  // Comparison operator, if latentOp_ == Compare, int types
+  Assembler::DoubleCondition
+      latentDoubleCmp_;  // Comparison operator, if latentOp_ == Compare, float
+                         // types
+
+  FuncOffsets offsets_;
+  MacroAssembler& masm;  // No '_' suffix - too tedious...
+  BaseRegAlloc ra;       // Ditto
+  BaseStackFrame fr;
+
+  StackMapGenerator stackMapGenerator_;
+
+  BaseStackFrame::LocalVector localInfo_;
+  Vector<OutOfLineCode*, 8, SystemAllocPolicy> outOfLine_;
+
+  // On specific platforms we sometimes need to use specific registers.
+
+  SpecificRegs specific_;
+
+  // There are more members scattered throughout.
+
+ public:
+  BaseCompiler(const ModuleEnvironment& moduleEnv,
+               const CompilerEnvironment& compilerEnv,
+               const FuncCompileInput& input, const ValTypeVector& locals,
+               const MachineState& trapExitLayout,
+               size_t trapExitLayoutNumWords, Decoder& decoder,
+               StkVector& stkSource, TempAllocator* alloc, MacroAssembler* masm,
+               StackMaps* stackMaps);
+  ~BaseCompiler();
+
+  [[nodiscard]] bool init();
+
+  FuncOffsets finish();
+
+  [[nodiscard]] bool emitFunction();
+  void emitInitStackLocals();
+
+  const FuncType& funcType() const {
+    return *moduleEnv_.funcs[func_.index].type;
+  }
+
+  const TypeIdDesc& funcTypeId() const {
+    return *moduleEnv_.funcs[func_.index].typeId;
+  }
+
+  // Used by some of the ScratchRegister implementations.
+  operator MacroAssembler&() const { return masm; }
+  operator BaseRegAlloc&() { return ra; }
+
+  bool usesSharedMemory() const { return moduleEnv_.usesSharedMemory(); }
+
+ private:
+  ////////////////////////////////////////////////////////////
+  //
+  // Out of line code management.
+
+  [[nodiscard]] OutOfLineCode* addOutOfLineCode(OutOfLineCode* ool) {
+    if (!ool || !outOfLine_.append(ool)) {
+      return nullptr;
+    }
+    ool->setStackHeight(fr.stackHeight());
+    return ool;
+  }
+
+  [[nodiscard]] bool generateOutOfLineCode() {
+    for (uint32_t i = 0; i < outOfLine_.length(); i++) {
+      OutOfLineCode* ool = outOfLine_[i];
+      ool->bind(&fr, &masm);
+      ool->generate(&masm);
+    }
+
+    return !masm.oom();
+  }
+
+  // Utility.
+
+  const Local& localFromSlot(uint32_t slot, MIRType type) {
+    MOZ_ASSERT(localInfo_[slot].type == type);
+    return localInfo_[slot];
+  }
+
+  ////////////////////////////////////////////////////////////
+  //
+  // High-level register management.
+
+  bool isAvailableI32(RegI32 r) { return ra.isAvailableI32(r); }
+  bool isAvailableI64(RegI64 r) { return ra.isAvailableI64(r); }
+  bool isAvailableRef(RegPtr r) { return ra.isAvailablePtr(r); }
+  bool isAvailableF32(RegF32 r) { return ra.isAvailableF32(r); }
+  bool isAvailableF64(RegF64 r) { return ra.isAvailableF64(r); }
+#ifdef ENABLE_WASM_SIMD
+  bool isAvailableV128(RegV128 r) { return ra.isAvailableV128(r); }
+#endif
+
+  [[nodiscard]] RegI32 needI32() { return ra.needI32(); }
+  [[nodiscard]] RegI64 needI64() { return ra.needI64(); }
+  [[nodiscard]] RegPtr needRef() { return ra.needPtr(); }
+  [[nodiscard]] RegF32 needF32() { return ra.needF32(); }
+  [[nodiscard]] RegF64 needF64() { return ra.needF64(); }
+#ifdef ENABLE_WASM_SIMD
+  [[nodiscard]] RegV128 needV128() { return ra.needV128(); }
+#endif
+
+  void needI32(RegI32 specific) { ra.needI32(specific); }
+  void needI64(RegI64 specific) { ra.needI64(specific); }
+  void needRef(RegPtr specific) { ra.needPtr(specific); }
+  void needF32(RegF32 specific) { ra.needF32(specific); }
+  void needF64(RegF64 specific) { ra.needF64(specific); }
+#ifdef ENABLE_WASM_SIMD
+  void needV128(RegV128 specific) { ra.needV128(specific); }
+#endif
+
+#if defined(JS_CODEGEN_ARM)
+  [[nodiscard]] RegI64 needI64Pair() { return ra.needI64Pair(); }
+#endif
+
+  void freeI32(RegI32 r) { ra.freeI32(r); }
+  void freeI64(RegI64 r) { ra.freeI64(r); }
+  void freeRef(RegPtr r) { ra.freePtr(r); }
+  void freeF32(RegF32 r) { ra.freeF32(r); }
+  void freeF64(RegF64 r) { ra.freeF64(r); }
+#ifdef ENABLE_WASM_SIMD
+  void freeV128(RegV128 r) { ra.freeV128(r); }
+#endif
+
+  void freeI64Except(RegI64 r, RegI32 except) {
+#ifdef JS_PUNBOX64
+    MOZ_ASSERT(r.reg == except);
+#else
+    MOZ_ASSERT(r.high == except || r.low == except);
+    freeI64(r);
+    needI32(except);
+#endif
+  }
+
+  void maybeFreeI32(RegI32 r) {
+    if (r.isValid()) {
+      freeI32(r);
+    }
+  }
+
+  void maybeFreeI64(RegI64 r) {
+    if (r.isValid()) {
+      freeI64(r);
+    }
+  }
+
+  void maybeFreeF64(RegF64 r) {
+    if (r.isValid()) {
+      freeF64(r);
+    }
+  }
+
+  void needI32NoSync(RegI32 r) {
+    MOZ_ASSERT(isAvailableI32(r));
+    needI32(r);
+  }
+
+  // TODO / OPTIMIZE: need2xI32() can be optimized along with needI32()
+  // to avoid sync(). (Bug 1316802)
+
+  void need2xI32(RegI32 r0, RegI32 r1) {
+    needI32(r0);
+    needI32(r1);
+  }
+
+  void need2xI64(RegI64 r0, RegI64 r1) {
+    needI64(r0);
+    needI64(r1);
+  }
+
+  RegI32 fromI64(RegI64 r) { return RegI32(lowPart(r)); }
+
+#ifdef JS_PUNBOX64
+  RegI64 fromI32(RegI32 r) { return RegI64(Register64(r)); }
+#endif
+
+  RegI64 widenI32(RegI32 r) {
+    MOZ_ASSERT(!isAvailableI32(r));
+#ifdef JS_PUNBOX64
+    return fromI32(r);
+#else
+    RegI32 high = needI32();
+    return RegI64(Register64(high, r));
+#endif
+  }
+
+  RegI32 narrowI64(RegI64 r) {
+#ifdef JS_PUNBOX64
+    return RegI32(r.reg);
+#else
+    freeI32(RegI32(r.high));
+    return RegI32(r.low);
+#endif
+  }
+
+  RegI32 narrowPtr(RegPtr r) { return RegI32(r); }
+
+  RegI32 lowPart(RegI64 r) {
+#ifdef JS_PUNBOX64
+    return RegI32(r.reg);
+#else
+    return RegI32(r.low);
+#endif
+  }
+
+  RegI32 maybeHighPart(RegI64 r) {
+#ifdef JS_PUNBOX64
+    return RegI32::Invalid();
+#else
+    return RegI32(r.high);
+#endif
+  }
+
+  void maybeClearHighPart(RegI64 r) {
+#if !defined(JS_PUNBOX64)
+    moveImm32(0, RegI32(r.high));
+#endif
+  }
+
+  void moveI32(RegI32 src, RegI32 dest) {
+    if (src != dest) {
+      masm.move32(src, dest);
+    }
+  }
+
+  void moveI64(RegI64 src, RegI64 dest) {
+    if (src != dest) {
+      masm.move64(src, dest);
+    }
+  }
+
+  void moveRef(RegPtr src, RegPtr dest) {
+    if (src != dest) {
+      masm.movePtr(src, dest);
+    }
+  }
+
+  void moveF64(RegF64 src, RegF64 dest) {
+    if (src != dest) {
+      masm.moveDouble(src, dest);
+    }
+  }
+
+  void moveF32(RegF32 src, RegF32 dest) {
+    if (src != dest) {
+      masm.moveFloat32(src, dest);
+    }
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void moveV128(RegV128 src, RegV128 dest) {
+    if (src != dest) {
+      masm.moveSimd128(src, dest);
+    }
+  }
+#endif
+
+  ////////////////////////////////////////////////////////////////////////////
+  //
+  // Block parameters and results.
+  //
+  // Blocks may have multiple parameters and multiple results.  Blocks can also
+  // be the target of branches: the entry for loops, and the exit for
+  // non-loops.
+  //
+  // Passing multiple values to a non-branch target (i.e., the entry of a
+  // "block") falls out naturally: any items on the value stack can flow
+  // directly from one block to another.
+  //
+  // However, for branch targets, we need to allocate well-known locations for
+  // the branch values.  The approach taken in the baseline compiler is to
+  // allocate registers to the top N values (currently N=1), and then stack
+  // locations for the rest.
+  //
+
+  enum class RegKind { All, OnlyGPRs };
+
+  inline void needResultRegisters(ResultType type, RegKind which) {
+    if (type.empty()) {
+      return;
+    }
+
+    for (ABIResultIter iter(type); !iter.done(); iter.next()) {
+      ABIResult result = iter.cur();
+      // Register results are visited first; when we see a stack result we're
+      // done.
+      if (!result.inRegister()) {
+        return;
+      }
+      switch (result.type().kind()) {
+        case ValType::I32:
+          needI32(RegI32(result.gpr()));
+          break;
+        case ValType::I64:
+          needI64(RegI64(result.gpr64()));
+          break;
+        case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+          if (which == RegKind::All) {
+            needV128(RegV128(result.fpr()));
+          }
+          break;
+#else
+          MOZ_CRASH("No SIMD support");
+#endif
+        case ValType::F32:
+          if (which == RegKind::All) {
+            needF32(RegF32(result.fpr()));
+          }
+          break;
+        case ValType::F64:
+          if (which == RegKind::All) {
+            needF64(RegF64(result.fpr()));
+          }
+          break;
+        case ValType::Ref:
+          needRef(RegPtr(result.gpr()));
+          break;
+      }
+    }
+  }
+
+#ifdef JS_CODEGEN_X64
+  inline void maskResultRegisters(ResultType type) {
+    MOZ_ASSERT(JitOptions.spectreIndexMasking);
+
+    if (type.empty()) {
+      return;
+    }
+
+    for (ABIResultIter iter(type); !iter.done(); iter.next()) {
+      ABIResult result = iter.cur();
+      if (result.inRegister() && result.type().kind() == ValType::I32) {
+        masm.movl(result.gpr(), result.gpr());
+      }
+    }
+  }
+#endif
+
+  inline void freeResultRegisters(ResultType type, RegKind which) {
+    if (type.empty()) {
+      return;
+    }
+
+    for (ABIResultIter iter(type); !iter.done(); iter.next()) {
+      ABIResult result = iter.cur();
+      // Register results are visited first; when we see a stack result we're
+      // done.
+      if (!result.inRegister()) {
+        return;
+      }
+      switch (result.type().kind()) {
+        case ValType::I32:
+          freeI32(RegI32(result.gpr()));
+          break;
+        case ValType::I64:
+          freeI64(RegI64(result.gpr64()));
+          break;
+        case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+          if (which == RegKind::All) {
+            freeV128(RegV128(result.fpr()));
+          }
+          break;
+#else
+          MOZ_CRASH("No SIMD support");
+#endif
+        case ValType::F32:
+          if (which == RegKind::All) {
+            freeF32(RegF32(result.fpr()));
+          }
+          break;
+        case ValType::F64:
+          if (which == RegKind::All) {
+            freeF64(RegF64(result.fpr()));
+          }
+          break;
+        case ValType::Ref:
+          freeRef(RegPtr(result.gpr()));
+          break;
+      }
+    }
+  }
+
+  void needIntegerResultRegisters(ResultType type) {
+    needResultRegisters(type, RegKind::OnlyGPRs);
+  }
+  void freeIntegerResultRegisters(ResultType type) {
+    freeResultRegisters(type, RegKind::OnlyGPRs);
+  }
+
+  void needResultRegisters(ResultType type) {
+    needResultRegisters(type, RegKind::All);
+  }
+  void freeResultRegisters(ResultType type) {
+    freeResultRegisters(type, RegKind::All);
+  }
+
+  void assertResultRegistersAvailable(ResultType type) {
+#ifdef DEBUG
+    for (ABIResultIter iter(type); !iter.done(); iter.next()) {
+      ABIResult result = iter.cur();
+      if (!result.inRegister()) {
+        return;
+      }
+      switch (result.type().kind()) {
+        case ValType::I32:
+          MOZ_ASSERT(isAvailableI32(RegI32(result.gpr())));
+          break;
+        case ValType::I64:
+          MOZ_ASSERT(isAvailableI64(RegI64(result.gpr64())));
+          break;
+        case ValType::V128:
+#  ifdef ENABLE_WASM_SIMD
+          MOZ_ASSERT(isAvailableV128(RegV128(result.fpr())));
+          break;
+#  else
+          MOZ_CRASH("No SIMD support");
+#  endif
+        case ValType::F32:
+          MOZ_ASSERT(isAvailableF32(RegF32(result.fpr())));
+          break;
+        case ValType::F64:
+          MOZ_ASSERT(isAvailableF64(RegF64(result.fpr())));
+          break;
+        case ValType::Ref:
+          MOZ_ASSERT(isAvailableRef(RegPtr(result.gpr())));
+          break;
+      }
+    }
+#endif
+  }
+
+  void captureResultRegisters(ResultType type) {
+    assertResultRegistersAvailable(type);
+    needResultRegisters(type);
+  }
+
+  void captureCallResultRegisters(ResultType type) {
+    captureResultRegisters(type);
+#ifdef JS_CODEGEN_X64
+    if (JitOptions.spectreIndexMasking) {
+      maskResultRegisters(type);
+    }
+#endif
+  }
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Value stack and spilling.
+  //
+  // The value stack facilitates some on-the-fly register allocation
+  // and immediate-constant use.  It tracks constants, latent
+  // references to locals, register contents, and values on the CPU
+  // stack.
+  //
+  // The stack can be flushed to memory using sync().  This is handy
+  // to avoid problems with control flow and messy register usage
+  // patterns.
+
+  // This is the value stack actually used during compilation.  It is a
+  // StkVector rather than a StkVector& since constantly dereferencing a
+  // StkVector& adds about 0.5% or more to the compiler's dynamic instruction
+  // count.
+  StkVector stk_;
+
+  static constexpr size_t MaxPushesPerOpcode = 10;
+
+  // BaselineCompileFunctions() "lends" us the StkVector to use in this
+  // BaseCompiler object, and that is installed in |stk_| in our constructor.
+  // This is so as to avoid having to malloc/free the vector's contents at
+  // each creation/destruction of a BaseCompiler object.  It does however mean
+  // that we need to hold on to a reference to BaselineCompileFunctions()'s
+  // vector, so we can swap (give) its contents back when this BaseCompiler
+  // object is destroyed.  This significantly reduces the heap turnover of the
+  // baseline compiler.  See bug 1532592.
+  StkVector& stkSource_;
+
+#ifdef DEBUG
+  size_t countMemRefsOnStk() {
+    size_t nRefs = 0;
+    for (Stk& v : stk_) {
+      if (v.kind() == Stk::MemRef) {
+        nRefs++;
+      }
+    }
+    return nRefs;
+  }
+#endif
+
+  template <typename T>
+  void push(T item) {
+    // None of the single-arg Stk constructors create a Stk::MemRef, so
+    // there's no need to increment stackMapGenerator_.memRefsOnStk here.
+    stk_.infallibleEmplaceBack(Stk(item));
+  }
+
+  void pushConstRef(intptr_t v) { stk_.infallibleEmplaceBack(Stk::StkRef(v)); }
+
+  void loadConstI32(const Stk& src, RegI32 dest) {
+    moveImm32(src.i32val(), dest);
+  }
+
+  void loadMemI32(const Stk& src, RegI32 dest) {
+    fr.loadStackI32(src.offs(), dest);
+  }
+
+  void loadLocalI32(const Stk& src, RegI32 dest) {
+    fr.loadLocalI32(localFromSlot(src.slot(), MIRType::Int32), dest);
+  }
+
+  void loadRegisterI32(const Stk& src, RegI32 dest) {
+    moveI32(src.i32reg(), dest);
+  }
+
+  void loadConstI64(const Stk& src, RegI64 dest) {
+    moveImm64(src.i64val(), dest);
+  }
+
+  void loadMemI64(const Stk& src, RegI64 dest) {
+    fr.loadStackI64(src.offs(), dest);
+  }
+
+  void loadLocalI64(const Stk& src, RegI64 dest) {
+    fr.loadLocalI64(localFromSlot(src.slot(), MIRType::Int64), dest);
+  }
+
+  void loadRegisterI64(const Stk& src, RegI64 dest) {
+    moveI64(src.i64reg(), dest);
+  }
+
+  void loadConstRef(const Stk& src, RegPtr dest) {
+    moveImmRef(src.refval(), dest);
+  }
+
+  void loadMemRef(const Stk& src, RegPtr dest) {
+    fr.loadStackPtr(src.offs(), dest);
+  }
+
+  void loadLocalRef(const Stk& src, RegPtr dest) {
+    fr.loadLocalPtr(localFromSlot(src.slot(), MIRType::RefOrNull), dest);
+  }
+
+  void loadRegisterRef(const Stk& src, RegPtr dest) {
+    moveRef(src.refReg(), dest);
+  }
+
+  void loadConstF64(const Stk& src, RegF64 dest) {
+    double d;
+    src.f64val(&d);
+    masm.loadConstantDouble(d, dest);
+  }
+
+  void loadMemF64(const Stk& src, RegF64 dest) {
+    fr.loadStackF64(src.offs(), dest);
+  }
+
+  void loadLocalF64(const Stk& src, RegF64 dest) {
+    fr.loadLocalF64(localFromSlot(src.slot(), MIRType::Double), dest);
+  }
+
+  void loadRegisterF64(const Stk& src, RegF64 dest) {
+    moveF64(src.f64reg(), dest);
+  }
+
+  void loadConstF32(const Stk& src, RegF32 dest) {
+    float f;
+    src.f32val(&f);
+    masm.loadConstantFloat32(f, dest);
+  }
+
+  void loadMemF32(const Stk& src, RegF32 dest) {
+    fr.loadStackF32(src.offs(), dest);
+  }
+
+  void loadLocalF32(const Stk& src, RegF32 dest) {
+    fr.loadLocalF32(localFromSlot(src.slot(), MIRType::Float32), dest);
+  }
+
+  void loadRegisterF32(const Stk& src, RegF32 dest) {
+    moveF32(src.f32reg(), dest);
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void loadConstV128(const Stk& src, RegV128 dest) {
+    V128 f;
+    src.v128val(&f);
+    masm.loadConstantSimd128(SimdConstant::CreateX16((int8_t*)f.bytes), dest);
+  }
+
+  void loadMemV128(const Stk& src, RegV128 dest) {
+    fr.loadStackV128(src.offs(), dest);
+  }
+
+  void loadLocalV128(const Stk& src, RegV128 dest) {
+    fr.loadLocalV128(localFromSlot(src.slot(), MIRType::Simd128), dest);
+  }
+
+  void loadRegisterV128(const Stk& src, RegV128 dest) {
+    moveV128(src.v128reg(), dest);
+  }
+#endif
+
+  void loadI32(const Stk& src, RegI32 dest) {
+    switch (src.kind()) {
+      case Stk::ConstI32:
+        loadConstI32(src, dest);
+        break;
+      case Stk::MemI32:
+        loadMemI32(src, dest);
+        break;
+      case Stk::LocalI32:
+        loadLocalI32(src, dest);
+        break;
+      case Stk::RegisterI32:
+        loadRegisterI32(src, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: Expected I32 on stack");
+    }
+  }
+
+  void loadI64(const Stk& src, RegI64 dest) {
+    switch (src.kind()) {
+      case Stk::ConstI64:
+        loadConstI64(src, dest);
+        break;
+      case Stk::MemI64:
+        loadMemI64(src, dest);
+        break;
+      case Stk::LocalI64:
+        loadLocalI64(src, dest);
+        break;
+      case Stk::RegisterI64:
+        loadRegisterI64(src, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: Expected I64 on stack");
+    }
+  }
+
+#if !defined(JS_PUNBOX64)
+  void loadI64Low(const Stk& src, RegI32 dest) {
+    switch (src.kind()) {
+      case Stk::ConstI64:
+        moveImm32(int32_t(src.i64val()), dest);
+        break;
+      case Stk::MemI64:
+        fr.loadStackI64Low(src.offs(), dest);
+        break;
+      case Stk::LocalI64:
+        fr.loadLocalI64Low(localFromSlot(src.slot(), MIRType::Int64), dest);
+        break;
+      case Stk::RegisterI64:
+        moveI32(RegI32(src.i64reg().low), dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: Expected I64 on stack");
+    }
+  }
+
+  void loadI64High(const Stk& src, RegI32 dest) {
+    switch (src.kind()) {
+      case Stk::ConstI64:
+        moveImm32(int32_t(src.i64val() >> 32), dest);
+        break;
+      case Stk::MemI64:
+        fr.loadStackI64High(src.offs(), dest);
+        break;
+      case Stk::LocalI64:
+        fr.loadLocalI64High(localFromSlot(src.slot(), MIRType::Int64), dest);
+        break;
+      case Stk::RegisterI64:
+        moveI32(RegI32(src.i64reg().high), dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: Expected I64 on stack");
+    }
+  }
+#endif
+
+  void loadF64(const Stk& src, RegF64 dest) {
+    switch (src.kind()) {
+      case Stk::ConstF64:
+        loadConstF64(src, dest);
+        break;
+      case Stk::MemF64:
+        loadMemF64(src, dest);
+        break;
+      case Stk::LocalF64:
+        loadLocalF64(src, dest);
+        break;
+      case Stk::RegisterF64:
+        loadRegisterF64(src, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected F64 on stack");
+    }
+  }
+
+  void loadF32(const Stk& src, RegF32 dest) {
+    switch (src.kind()) {
+      case Stk::ConstF32:
+        loadConstF32(src, dest);
+        break;
+      case Stk::MemF32:
+        loadMemF32(src, dest);
+        break;
+      case Stk::LocalF32:
+        loadLocalF32(src, dest);
+        break;
+      case Stk::RegisterF32:
+        loadRegisterF32(src, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected F32 on stack");
+    }
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void loadV128(const Stk& src, RegV128 dest) {
+    switch (src.kind()) {
+      case Stk::ConstV128:
+        loadConstV128(src, dest);
+        break;
+      case Stk::MemV128:
+        loadMemV128(src, dest);
+        break;
+      case Stk::LocalV128:
+        loadLocalV128(src, dest);
+        break;
+      case Stk::RegisterV128:
+        loadRegisterV128(src, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected V128 on stack");
+    }
+  }
+#endif
+
+  void loadRef(const Stk& src, RegPtr dest) {
+    switch (src.kind()) {
+      case Stk::ConstRef:
+        loadConstRef(src, dest);
+        break;
+      case Stk::MemRef:
+        loadMemRef(src, dest);
+        break;
+      case Stk::LocalRef:
+        loadLocalRef(src, dest);
+        break;
+      case Stk::RegisterRef:
+        loadRegisterRef(src, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected ref on stack");
+    }
+  }
+
+  // Flush all local and register value stack elements to memory.
+  //
+  // TODO / OPTIMIZE: As this is fairly expensive and causes worse
+  // code to be emitted subsequently, it is useful to avoid calling
+  // it.  (Bug 1316802)
+  //
+  // Some optimization has been done already.  Remaining
+  // opportunities:
+  //
+  //  - It would be interesting to see if we can specialize it
+  //    before calls with particularly simple signatures, or where
+  //    we can do parallel assignment of register arguments, or
+  //    similar.  See notes in emitCall().
+  //
+  //  - Operations that need specific registers: multiply, quotient,
+  //    remainder, will tend to sync because the registers we need
+  //    will tend to be allocated.  We may be able to avoid that by
+  //    prioritizing registers differently (takeLast instead of
+  //    takeFirst) but we may also be able to allocate an unused
+  //    register on demand to free up one we need, thus avoiding the
+  //    sync.  That type of fix would go into needI32().
+
+  void sync() final {
+    size_t start = 0;
+    size_t lim = stk_.length();
+
+    for (size_t i = lim; i > 0; i--) {
+      // Memory opcodes are first in the enum, single check against MemLast is
+      // fine.
+      if (stk_[i - 1].kind() <= Stk::MemLast) {
+        start = i;
+        break;
+      }
+    }
+
+    for (size_t i = start; i < lim; i++) {
+      Stk& v = stk_[i];
+      switch (v.kind()) {
+        case Stk::LocalI32: {
+          ScratchI32 scratch(*this);
+          loadLocalI32(v, scratch);
+          uint32_t offs = fr.pushPtr(scratch);
+          v.setOffs(Stk::MemI32, offs);
+          break;
+        }
+        case Stk::RegisterI32: {
+          uint32_t offs = fr.pushPtr(v.i32reg());
+          freeI32(v.i32reg());
+          v.setOffs(Stk::MemI32, offs);
+          break;
+        }
+        case Stk::LocalI64: {
+          ScratchI32 scratch(*this);
+#ifdef JS_PUNBOX64
+          loadI64(v, fromI32(scratch));
+          uint32_t offs = fr.pushPtr(scratch);
+#else
+          fr.loadLocalI64High(localFromSlot(v.slot(), MIRType::Int64), scratch);
+          fr.pushPtr(scratch);
+          fr.loadLocalI64Low(localFromSlot(v.slot(), MIRType::Int64), scratch);
+          uint32_t offs = fr.pushPtr(scratch);
+#endif
+          v.setOffs(Stk::MemI64, offs);
+          break;
+        }
+        case Stk::RegisterI64: {
+#ifdef JS_PUNBOX64
+          uint32_t offs = fr.pushPtr(v.i64reg().reg);
+          freeI64(v.i64reg());
+#else
+          fr.pushPtr(v.i64reg().high);
+          uint32_t offs = fr.pushPtr(v.i64reg().low);
+          freeI64(v.i64reg());
+#endif
+          v.setOffs(Stk::MemI64, offs);
+          break;
+        }
+        case Stk::LocalF64: {
+          ScratchF64 scratch(*this);
+          loadF64(v, scratch);
+          uint32_t offs = fr.pushDouble(scratch);
+          v.setOffs(Stk::MemF64, offs);
+          break;
+        }
+        case Stk::RegisterF64: {
+          uint32_t offs = fr.pushDouble(v.f64reg());
+          freeF64(v.f64reg());
+          v.setOffs(Stk::MemF64, offs);
+          break;
+        }
+        case Stk::LocalF32: {
+          ScratchF32 scratch(*this);
+          loadF32(v, scratch);
+          uint32_t offs = fr.pushFloat32(scratch);
+          v.setOffs(Stk::MemF32, offs);
+          break;
+        }
+        case Stk::RegisterF32: {
+          uint32_t offs = fr.pushFloat32(v.f32reg());
+          freeF32(v.f32reg());
+          v.setOffs(Stk::MemF32, offs);
+          break;
+        }
+#ifdef ENABLE_WASM_SIMD
+        case Stk::LocalV128: {
+          ScratchV128 scratch(*this);
+          loadV128(v, scratch);
+          uint32_t offs = fr.pushV128(scratch);
+          v.setOffs(Stk::MemV128, offs);
+          break;
+        }
+        case Stk::RegisterV128: {
+          uint32_t offs = fr.pushV128(v.v128reg());
+          freeV128(v.v128reg());
+          v.setOffs(Stk::MemV128, offs);
+          break;
+        }
+#endif
+        case Stk::LocalRef: {
+          ScratchPtr scratch(*this);
+          loadLocalRef(v, scratch);
+          uint32_t offs = fr.pushPtr(scratch);
+          v.setOffs(Stk::MemRef, offs);
+          stackMapGenerator_.memRefsOnStk++;
+          break;
+        }
+        case Stk::RegisterRef: {
+          uint32_t offs = fr.pushPtr(v.refReg());
+          freeRef(v.refReg());
+          v.setOffs(Stk::MemRef, offs);
+          stackMapGenerator_.memRefsOnStk++;
+          break;
+        }
+        default: {
+          break;
+        }
+      }
+    }
+  }
+
+  void saveTempPtr(RegPtr r) final {
+    MOZ_ASSERT(!ra.isAvailablePtr(r));
+    fr.pushPtr(r);
+    ra.freePtr(r);
+    MOZ_ASSERT(ra.isAvailablePtr(r));
+  }
+
+  void restoreTempPtr(RegPtr r) final {
+    MOZ_ASSERT(ra.isAvailablePtr(r));
+    ra.needPtr(r);
+    fr.popPtr(r);
+    MOZ_ASSERT(!ra.isAvailablePtr(r));
+  }
+
+  // Various methods for creating a stack map.  Stack maps are indexed by the
+  // lowest address of the instruction immediately *after* the instruction of
+  // interest.  In practice that means either: the return point of a call, the
+  // instruction immediately after a trap instruction (the "resume"
+  // instruction), or the instruction immediately following a no-op (when
+  // debugging is enabled).
+
+  // Create a vanilla stack map.
+  [[nodiscard]] bool createStackMap(const char* who) {
+    const ExitStubMapVector noExtras;
+    return createStackMap(who, noExtras, masm.currentOffset());
+  }
+
+  // Create a stack map as vanilla, but for a custom assembler offset.
+  [[nodiscard]] bool createStackMap(const char* who,
+                                    CodeOffset assemblerOffset) {
+    const ExitStubMapVector noExtras;
+    return createStackMap(who, noExtras, assemblerOffset.offset());
+  }
+
+  // The most general stack map construction.
+  [[nodiscard]] bool createStackMap(const char* who,
+                                    const ExitStubMapVector& extras,
+                                    uint32_t assemblerOffset) {
+    auto debugFrame =
+        compilerEnv_.debugEnabled() ? HasDebugFrame::Yes : HasDebugFrame::No;
+    return stackMapGenerator_.createStackMap(who, extras, assemblerOffset,
+                                             debugFrame, stk_);
+  }
+
+  // This is an optimization used to avoid calling sync() for
+  // setLocal(): if the local does not exist unresolved on the stack
+  // then we can skip the sync.
+
+  bool hasLocal(uint32_t slot) {
+    for (size_t i = stk_.length(); i > 0; i--) {
+      // Memory opcodes are first in the enum, single check against MemLast is
+      // fine.
+      Stk::Kind kind = stk_[i - 1].kind();
+      if (kind <= Stk::MemLast) {
+        return false;
+      }
+
+      // Local opcodes follow memory opcodes in the enum, single check against
+      // LocalLast is sufficient.
+      if (kind <= Stk::LocalLast && stk_[i - 1].slot() == slot) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  void syncLocal(uint32_t slot) {
+    if (hasLocal(slot)) {
+      sync();  // TODO / OPTIMIZE: Improve this?  (Bug 1316817)
+    }
+  }
+
+  // Push the register r onto the stack.
+
+  void pushI32(RegI32 r) {
+    MOZ_ASSERT(!isAvailableI32(r));
+    push(Stk(r));
+  }
+
+  void pushI64(RegI64 r) {
+    MOZ_ASSERT(!isAvailableI64(r));
+    push(Stk(r));
+  }
+
+  void pushRef(RegPtr r) {
+    MOZ_ASSERT(!isAvailableRef(r));
+    push(Stk(r));
+  }
+
+  void pushF64(RegF64 r) {
+    MOZ_ASSERT(!isAvailableF64(r));
+    push(Stk(r));
+  }
+
+  void pushF32(RegF32 r) {
+    MOZ_ASSERT(!isAvailableF32(r));
+    push(Stk(r));
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void pushV128(RegV128 r) {
+    MOZ_ASSERT(!isAvailableV128(r));
+    push(Stk(r));
+  }
+#endif
+
+  // Push the value onto the stack.
+
+  void pushI32(int32_t v) { push(Stk(v)); }
+
+  void pushI64(int64_t v) { push(Stk(v)); }
+
+  void pushRef(intptr_t v) { pushConstRef(v); }
+
+  void pushF64(double v) { push(Stk(v)); }
+
+  void pushF32(float v) { push(Stk(v)); }
+
+#ifdef ENABLE_WASM_SIMD
+  void pushV128(V128 v) { push(Stk(v)); }
+#endif
+
+  // Push the local slot onto the stack.  The slot will not be read
+  // here; it will be read when it is consumed, or when a side
+  // effect to the slot forces its value to be saved.
+
+  void pushLocalI32(uint32_t slot) {
+    stk_.infallibleEmplaceBack(Stk(Stk::LocalI32, slot));
+  }
+
+  void pushLocalI64(uint32_t slot) {
+    stk_.infallibleEmplaceBack(Stk(Stk::LocalI64, slot));
+  }
+
+  void pushLocalRef(uint32_t slot) {
+    stk_.infallibleEmplaceBack(Stk(Stk::LocalRef, slot));
+  }
+
+  void pushLocalF64(uint32_t slot) {
+    stk_.infallibleEmplaceBack(Stk(Stk::LocalF64, slot));
+  }
+
+  void pushLocalF32(uint32_t slot) {
+    stk_.infallibleEmplaceBack(Stk(Stk::LocalF32, slot));
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void pushLocalV128(uint32_t slot) {
+    stk_.infallibleEmplaceBack(Stk(Stk::LocalV128, slot));
+  }
+#endif
+
+  // Call only from other popI32() variants.
+  // v must be the stack top.  May pop the CPU stack.
+
+  void popI32(const Stk& v, RegI32 dest) {
+    MOZ_ASSERT(&v == &stk_.back());
+    switch (v.kind()) {
+      case Stk::ConstI32:
+        loadConstI32(v, dest);
+        break;
+      case Stk::LocalI32:
+        loadLocalI32(v, dest);
+        break;
+      case Stk::MemI32:
+        fr.popPtr(dest);
+        break;
+      case Stk::RegisterI32:
+        loadRegisterI32(v, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected int on stack");
+    }
+  }
+
+  [[nodiscard]] RegI32 popI32() {
+    Stk& v = stk_.back();
+    RegI32 r;
+    if (v.kind() == Stk::RegisterI32) {
+      r = v.i32reg();
+    } else {
+      popI32(v, (r = needI32()));
+    }
+    stk_.popBack();
+    return r;
+  }
+
+  RegI32 popI32(RegI32 specific) {
+    Stk& v = stk_.back();
+
+    if (!(v.kind() == Stk::RegisterI32 && v.i32reg() == specific)) {
+      needI32(specific);
+      popI32(v, specific);
+      if (v.kind() == Stk::RegisterI32) {
+        freeI32(v.i32reg());
+      }
+    }
+
+    stk_.popBack();
+    return specific;
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  // Call only from other popV128() variants.
+  // v must be the stack top.  May pop the CPU stack.
+
+  void popV128(const Stk& v, RegV128 dest) {
+    MOZ_ASSERT(&v == &stk_.back());
+    switch (v.kind()) {
+      case Stk::ConstV128:
+        loadConstV128(v, dest);
+        break;
+      case Stk::LocalV128:
+        loadLocalV128(v, dest);
+        break;
+      case Stk::MemV128:
+        fr.popV128(dest);
+        break;
+      case Stk::RegisterV128:
+        loadRegisterV128(v, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected int on stack");
+    }
+  }
+
+  [[nodiscard]] RegV128 popV128() {
+    Stk& v = stk_.back();
+    RegV128 r;
+    if (v.kind() == Stk::RegisterV128) {
+      r = v.v128reg();
+    } else {
+      popV128(v, (r = needV128()));
+    }
+    stk_.popBack();
+    return r;
+  }
+
+  RegV128 popV128(RegV128 specific) {
+    Stk& v = stk_.back();
+
+    if (!(v.kind() == Stk::RegisterV128 && v.v128reg() == specific)) {
+      needV128(specific);
+      popV128(v, specific);
+      if (v.kind() == Stk::RegisterV128) {
+        freeV128(v.v128reg());
+      }
+    }
+
+    stk_.popBack();
+    return specific;
+  }
+#endif
+
+  // Call only from other popI64() variants.
+  // v must be the stack top.  May pop the CPU stack.
+
+  void popI64(const Stk& v, RegI64 dest) {
+    MOZ_ASSERT(&v == &stk_.back());
+    switch (v.kind()) {
+      case Stk::ConstI64:
+        loadConstI64(v, dest);
+        break;
+      case Stk::LocalI64:
+        loadLocalI64(v, dest);
+        break;
+      case Stk::MemI64:
+#ifdef JS_PUNBOX64
+        fr.popPtr(dest.reg);
+#else
+        fr.popPtr(dest.low);
+        fr.popPtr(dest.high);
+#endif
+        break;
+      case Stk::RegisterI64:
+        loadRegisterI64(v, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected long on stack");
+    }
+  }
+
+  [[nodiscard]] RegI64 popI64() {
+    Stk& v = stk_.back();
+    RegI64 r;
+    if (v.kind() == Stk::RegisterI64) {
+      r = v.i64reg();
+    } else {
+      popI64(v, (r = needI64()));
+    }
+    stk_.popBack();
+    return r;
+  }
+
+  // Note, the stack top can be in one half of "specific" on 32-bit
+  // systems.  We can optimize, but for simplicity, if the register
+  // does not match exactly, then just force the stack top to memory
+  // and then read it back in.
+
+  RegI64 popI64(RegI64 specific) {
+    Stk& v = stk_.back();
+
+    if (!(v.kind() == Stk::RegisterI64 && v.i64reg() == specific)) {
+      needI64(specific);
+      popI64(v, specific);
+      if (v.kind() == Stk::RegisterI64) {
+        freeI64(v.i64reg());
+      }
+    }
+
+    stk_.popBack();
+    return specific;
+  }
+
+  // Call only from other popRef() variants.
+  // v must be the stack top.  May pop the CPU stack.
+
+  void popRef(const Stk& v, RegPtr dest) {
+    MOZ_ASSERT(&v == &stk_.back());
+    switch (v.kind()) {
+      case Stk::ConstRef:
+        loadConstRef(v, dest);
+        break;
+      case Stk::LocalRef:
+        loadLocalRef(v, dest);
+        break;
+      case Stk::MemRef:
+        fr.popPtr(dest);
+        break;
+      case Stk::RegisterRef:
+        loadRegisterRef(v, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected ref on stack");
+    }
+  }
+
+  RegPtr popRef(RegPtr specific) {
+    Stk& v = stk_.back();
+
+    if (!(v.kind() == Stk::RegisterRef && v.refReg() == specific)) {
+      needRef(specific);
+      popRef(v, specific);
+      if (v.kind() == Stk::RegisterRef) {
+        freeRef(v.refReg());
+      }
+    }
+
+    stk_.popBack();
+    if (v.kind() == Stk::MemRef) {
+      stackMapGenerator_.memRefsOnStk--;
+    }
+    return specific;
+  }
+
+  [[nodiscard]] RegPtr popRef() {
+    Stk& v = stk_.back();
+    RegPtr r;
+    if (v.kind() == Stk::RegisterRef) {
+      r = v.refReg();
+    } else {
+      popRef(v, (r = needRef()));
+    }
+    stk_.popBack();
+    if (v.kind() == Stk::MemRef) {
+      stackMapGenerator_.memRefsOnStk--;
+    }
+    return r;
+  }
+
+  // Call only from other popF64() variants.
+  // v must be the stack top.  May pop the CPU stack.
+
+  void popF64(const Stk& v, RegF64 dest) {
+    MOZ_ASSERT(&v == &stk_.back());
+    switch (v.kind()) {
+      case Stk::ConstF64:
+        loadConstF64(v, dest);
+        break;
+      case Stk::LocalF64:
+        loadLocalF64(v, dest);
+        break;
+      case Stk::MemF64:
+        fr.popDouble(dest);
+        break;
+      case Stk::RegisterF64:
+        loadRegisterF64(v, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected double on stack");
+    }
+  }
+
+  [[nodiscard]] RegF64 popF64() {
+    Stk& v = stk_.back();
+    RegF64 r;
+    if (v.kind() == Stk::RegisterF64) {
+      r = v.f64reg();
+    } else {
+      popF64(v, (r = needF64()));
+    }
+    stk_.popBack();
+    return r;
+  }
+
+  RegF64 popF64(RegF64 specific) {
+    Stk& v = stk_.back();
+
+    if (!(v.kind() == Stk::RegisterF64 && v.f64reg() == specific)) {
+      needF64(specific);
+      popF64(v, specific);
+      if (v.kind() == Stk::RegisterF64) {
+        freeF64(v.f64reg());
+      }
+    }
+
+    stk_.popBack();
+    return specific;
+  }
+
+  // Call only from other popF32() variants.
+  // v must be the stack top.  May pop the CPU stack.
+
+  void popF32(const Stk& v, RegF32 dest) {
+    MOZ_ASSERT(&v == &stk_.back());
+    switch (v.kind()) {
+      case Stk::ConstF32:
+        loadConstF32(v, dest);
+        break;
+      case Stk::LocalF32:
+        loadLocalF32(v, dest);
+        break;
+      case Stk::MemF32:
+        fr.popFloat32(dest);
+        break;
+      case Stk::RegisterF32:
+        loadRegisterF32(v, dest);
+        break;
+      default:
+        MOZ_CRASH("Compiler bug: expected float on stack");
+    }
+  }
+
+  [[nodiscard]] RegF32 popF32() {
+    Stk& v = stk_.back();
+    RegF32 r;
+    if (v.kind() == Stk::RegisterF32) {
+      r = v.f32reg();
+    } else {
+      popF32(v, (r = needF32()));
+    }
+    stk_.popBack();
+    return r;
+  }
+
+  RegF32 popF32(RegF32 specific) {
+    Stk& v = stk_.back();
+
+    if (!(v.kind() == Stk::RegisterF32 && v.f32reg() == specific)) {
+      needF32(specific);
+      popF32(v, specific);
+      if (v.kind() == Stk::RegisterF32) {
+        freeF32(v.f32reg());
+      }
+    }
+
+    stk_.popBack();
+    return specific;
+  }
+
+  [[nodiscard]] bool popConstI32(int32_t* c) {
+    Stk& v = stk_.back();
+    if (v.kind() != Stk::ConstI32) {
+      return false;
+    }
+    *c = v.i32val();
+    stk_.popBack();
+    return true;
+  }
+
+  [[nodiscard]] bool popConstI64(int64_t* c) {
+    Stk& v = stk_.back();
+    if (v.kind() != Stk::ConstI64) {
+      return false;
+    }
+    *c = v.i64val();
+    stk_.popBack();
+    return true;
+  }
+
+  [[nodiscard]] bool peekConstI32(int32_t* c) {
+    Stk& v = stk_.back();
+    if (v.kind() != Stk::ConstI32) {
+      return false;
+    }
+    *c = v.i32val();
+    return true;
+  }
+
+  [[nodiscard]] bool peekConstI64(int64_t* c) {
+    Stk& v = stk_.back();
+    if (v.kind() != Stk::ConstI64) {
+      return false;
+    }
+    *c = v.i64val();
+    return true;
+  }
+
+  [[nodiscard]] bool peek2xI32(int32_t* c0, int32_t* c1) {
+    MOZ_ASSERT(stk_.length() >= 2);
+    const Stk& v0 = *(stk_.end() - 1);
+    const Stk& v1 = *(stk_.end() - 2);
+    if (v0.kind() != Stk::ConstI32 || v1.kind() != Stk::ConstI32) {
+      return false;
+    }
+    *c0 = v0.i32val();
+    *c1 = v1.i32val();
+    return true;
+  }
+
+  [[nodiscard]] bool popConstPositivePowerOfTwoI32(int32_t* c,
+                                                   uint_fast8_t* power,
+                                                   int32_t cutoff) {
+    Stk& v = stk_.back();
+    if (v.kind() != Stk::ConstI32) {
+      return false;
+    }
+    *c = v.i32val();
+    if (*c <= cutoff || !IsPowerOfTwo(static_cast<uint32_t>(*c))) {
+      return false;
+    }
+    *power = FloorLog2(*c);
+    stk_.popBack();
+    return true;
+  }
+
+  [[nodiscard]] bool popConstPositivePowerOfTwoI64(int64_t* c,
+                                                   uint_fast8_t* power,
+                                                   int64_t cutoff) {
+    Stk& v = stk_.back();
+    if (v.kind() != Stk::ConstI64) {
+      return false;
+    }
+    *c = v.i64val();
+    if (*c <= cutoff || !IsPowerOfTwo(static_cast<uint64_t>(*c))) {
+      return false;
+    }
+    *power = FloorLog2(*c);
+    stk_.popBack();
+    return true;
+  }
+
+  [[nodiscard]] bool peekLocalI32(uint32_t* local) {
+    Stk& v = stk_.back();
+    if (v.kind() != Stk::LocalI32) {
+      return false;
+    }
+    *local = v.slot();
+    return true;
+  }
+
+  // TODO / OPTIMIZE (Bug 1316818): At the moment we use the Wasm
+  // inter-procedure ABI for block returns, which allocates ReturnReg as the
+  // single block result register.  It is possible other choices would lead to
+  // better register allocation, as ReturnReg is often first in the register set
+  // and will be heavily wanted by the register allocator that uses takeFirst().
+  //
+  // Obvious options:
+  //  - pick a register at the back of the register set
+  //  - pick a random register per block (different blocks have
+  //    different join regs)
+
+  void popRegisterResults(ABIResultIter& iter) {
+    // Pop register results.  Note that in the single-value case, popping to a
+    // register may cause a sync(); for multi-value we sync'd already.
+    for (; !iter.done(); iter.next()) {
+      const ABIResult& result = iter.cur();
+      if (!result.inRegister()) {
+        // TODO / OPTIMIZE: We sync here to avoid solving the general parallel
+        // move problem in popStackResults.  However we could avoid syncing the
+        // values that are going to registers anyway, if they are already in
+        // registers.
+        sync();
+        break;
+      }
+      switch (result.type().kind()) {
+        case ValType::I32:
+          popI32(RegI32(result.gpr()));
+          break;
+        case ValType::I64:
+          popI64(RegI64(result.gpr64()));
+          break;
+        case ValType::F32:
+          popF32(RegF32(result.fpr()));
+          break;
+        case ValType::F64:
+          popF64(RegF64(result.fpr()));
+          break;
+        case ValType::Ref:
+          popRef(RegPtr(result.gpr()));
+          break;
+        case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+          popV128(RegV128(result.fpr()));
+#else
+          MOZ_CRASH("No SIMD support");
+#endif
+      }
+    }
+  }
+
+  void popStackResults(ABIResultIter& iter, StackHeight stackBase) {
+    MOZ_ASSERT(!iter.done());
+
+    // The iterator should be advanced beyond register results, and register
+    // results should be popped already from the value stack.
+    uint32_t alreadyPopped = iter.index();
+
+    // At this point, only stack arguments are remaining.  Iterate through them
+    // to measure how much stack space they will take up.
+    for (; !iter.done(); iter.next()) {
+      MOZ_ASSERT(iter.cur().onStack());
+    }
+
+    // Calculate the space needed to store stack results, in bytes.
+    uint32_t stackResultBytes = iter.stackBytesConsumedSoFar();
+    MOZ_ASSERT(stackResultBytes);
+
+    // Compute the stack height including the stack results.  Note that it's
+    // possible that this call expands the stack, for example if some of the
+    // results are supplied by constants and so are not already on the machine
+    // stack.
+    uint32_t endHeight = fr.prepareStackResultArea(stackBase, stackResultBytes);
+
+    // Find a free GPR to use when shuffling stack values.  If none is
+    // available, push ReturnReg and restore it after we're done.
+    bool saved = false;
+    RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved);
+
+    // The sequence of Stk values is in the same order on the machine stack as
+    // the result locations, but there is a complication: constant values are
+    // not actually pushed on the machine stack.  (At this point registers and
+    // locals have been spilled already.)  So, moving the Stk values into place
+    // isn't simply a shuffle-down or shuffle-up operation.  There is a part of
+    // the Stk sequence that shuffles toward the FP, a part that's already in
+    // place, and a part that shuffles toward the SP.  After shuffling, we have
+    // to materialize the constants.
+
+    // Shuffle mem values toward the frame pointer, copying deepest values
+    // first.  Stop when we run out of results, get to a register result, or
+    // find a Stk value that is closer to the FP than the result.
+    for (iter.switchToPrev(); !iter.done(); iter.prev()) {
+      const ABIResult& result = iter.cur();
+      if (!result.onStack()) {
+        break;
+      }
+      MOZ_ASSERT(result.stackOffset() < stackResultBytes);
+      uint32_t destHeight = endHeight - result.stackOffset();
+      uint32_t stkBase = stk_.length() - (iter.count() - alreadyPopped);
+      Stk& v = stk_[stkBase + iter.index()];
+      if (v.isMem()) {
+        uint32_t srcHeight = v.offs();
+        if (srcHeight <= destHeight) {
+          break;
+        }
+        fr.shuffleStackResultsTowardFP(srcHeight, destHeight, result.size(),
+                                       temp);
+      }
+    }
+
+    // Reset iterator and skip register results.
+    for (iter.reset(); !iter.done(); iter.next()) {
+      if (iter.cur().onStack()) {
+        break;
+      }
+    }
+
+    // Revisit top stack values, shuffling mem values toward the stack pointer,
+    // copying shallowest values first.
+    for (; !iter.done(); iter.next()) {
+      const ABIResult& result = iter.cur();
+      MOZ_ASSERT(result.onStack());
+      MOZ_ASSERT(result.stackOffset() < stackResultBytes);
+      uint32_t destHeight = endHeight - result.stackOffset();
+      Stk& v = stk_[stk_.length() - (iter.index() - alreadyPopped) - 1];
+      if (v.isMem()) {
+        uint32_t srcHeight = v.offs();
+        if (srcHeight >= destHeight) {
+          break;
+        }
+        fr.shuffleStackResultsTowardSP(srcHeight, destHeight, result.size(),
+                                       temp);
+      }
+    }
+
+    // Reset iterator and skip register results, which are already popped off
+    // the value stack.
+    for (iter.reset(); !iter.done(); iter.next()) {
+      if (iter.cur().onStack()) {
+        break;
+      }
+    }
+
+    // Materialize constants and pop the remaining items from the value stack.
+    for (; !iter.done(); iter.next()) {
+      const ABIResult& result = iter.cur();
+      uint32_t resultHeight = endHeight - result.stackOffset();
+      Stk& v = stk_.back();
+      switch (v.kind()) {
+        case Stk::ConstI32:
+          fr.storeImmediatePtrToStack(uint32_t(v.i32val_), resultHeight, temp);
+          break;
+        case Stk::ConstF32:
+          fr.storeImmediateF32ToStack(v.f32val_, resultHeight, temp);
+          break;
+        case Stk::ConstI64:
+          fr.storeImmediateI64ToStack(v.i64val_, resultHeight, temp);
+          break;
+        case Stk::ConstF64:
+          fr.storeImmediateF64ToStack(v.f64val_, resultHeight, temp);
+          break;
+#ifdef ENABLE_WASM_SIMD
+        case Stk::ConstV128:
+          fr.storeImmediateV128ToStack(v.v128val_, resultHeight, temp);
+          break;
+#endif
+        case Stk::ConstRef:
+          fr.storeImmediatePtrToStack(v.refval_, resultHeight, temp);
+          break;
+        case Stk::MemRef:
+          // Update bookkeeping as we pop the Stk entry.
+          stackMapGenerator_.memRefsOnStk--;
+          break;
+        default:
+          MOZ_ASSERT(v.isMem());
+          break;
+      }
+      stk_.popBack();
+    }
+
+    ra.freeTempPtr(temp, saved);
+
+    // This will pop the stack if needed.
+    fr.finishStackResultArea(stackBase, stackResultBytes);
+  }
+
+  enum class ContinuationKind { Fallthrough, Jump };
+
+  void popBlockResults(ResultType type, StackHeight stackBase,
+                       ContinuationKind kind) {
+    if (!type.empty()) {
+      ABIResultIter iter(type);
+      popRegisterResults(iter);
+      if (!iter.done()) {
+        popStackResults(iter, stackBase);
+        // Because popStackResults might clobber the stack, it leaves the stack
+        // pointer already in the right place for the continuation, whether the
+        // continuation is a jump or fallthrough.
+        return;
+      }
+    }
+    // We get here if there are no stack results.  For a fallthrough, the stack
+    // is already at the right height.  For a jump, we may need to pop the stack
+    // pointer if the continuation's stack height is lower than the current
+    // stack height.
+    if (kind == ContinuationKind::Jump) {
+      fr.popStackBeforeBranch(stackBase, type);
+    }
+  }
+
+  Stk captureStackResult(const ABIResult& result, StackHeight resultsBase,
+                         uint32_t stackResultBytes) {
+    MOZ_ASSERT(result.onStack());
+    uint32_t offs = fr.locateStackResult(result, resultsBase, stackResultBytes);
+    return Stk::StackResult(result.type(), offs);
+  }
+
+  MOZ_MUST_USE bool pushResults(ResultType type, StackHeight resultsBase) {
+    if (type.empty()) {
+      return true;
+    }
+
+    if (type.length() > 1) {
+      if (!stk_.reserve(stk_.length() + type.length() + MaxPushesPerOpcode)) {
+        return false;
+      }
+    }
+
+    // We need to push the results in reverse order, so first iterate through
+    // all results to determine the locations of stack result types.
+    ABIResultIter iter(type);
+    while (!iter.done()) {
+      iter.next();
+    }
+    uint32_t stackResultBytes = iter.stackBytesConsumedSoFar();
+    for (iter.switchToPrev(); !iter.done(); iter.prev()) {
+      const ABIResult& result = iter.cur();
+      if (!result.onStack()) {
+        break;
+      }
+      Stk v = captureStackResult(result, resultsBase, stackResultBytes);
+      push(v);
+      if (v.kind() == Stk::MemRef) {
+        stackMapGenerator_.memRefsOnStk++;
+      }
+    }
+
+    for (; !iter.done(); iter.prev()) {
+      const ABIResult& result = iter.cur();
+      MOZ_ASSERT(result.inRegister());
+      switch (result.type().kind()) {
+        case ValType::I32:
+          pushI32(RegI32(result.gpr()));
+          break;
+        case ValType::I64:
+          pushI64(RegI64(result.gpr64()));
+          break;
+        case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+          pushV128(RegV128(result.fpr()));
+          break;
+#else
+          MOZ_CRASH("No SIMD support");
+#endif
+        case ValType::F32:
+          pushF32(RegF32(result.fpr()));
+          break;
+        case ValType::F64:
+          pushF64(RegF64(result.fpr()));
+          break;
+        case ValType::Ref:
+          pushRef(RegPtr(result.gpr()));
+          break;
+      }
+    }
+
+    return true;
+  }
+
+  MOZ_MUST_USE bool pushBlockResults(ResultType type) {
+    return pushResults(type, controlItem().stackHeight);
+  }
+
+  // A combination of popBlockResults + pushBlockResults, used when entering a
+  // block with a control-flow join (loops) or split (if) to shuffle the
+  // fallthrough block parameters into the locations expected by the
+  // continuation.
+  MOZ_MUST_USE bool topBlockParams(ResultType type) {
+    // This function should only be called when entering a block with a
+    // control-flow join at the entry, where there are no live temporaries in
+    // the current block.
+    StackHeight base = controlItem().stackHeight;
+    MOZ_ASSERT(fr.stackResultsBase(stackConsumed(type.length())) == base);
+    popBlockResults(type, base, ContinuationKind::Fallthrough);
+    return pushBlockResults(type);
+  }
+
+  // A combination of popBlockResults + pushBlockResults, used before branches
+  // where we don't know the target (br_if / br_table).  If and when the branch
+  // is taken, the stack results will be shuffled down into place.  For br_if
+  // that has fallthrough, the parameters for the untaken branch flow through to
+  // the continuation.
+  MOZ_MUST_USE bool topBranchParams(ResultType type, StackHeight* height) {
+    if (type.empty()) {
+      *height = fr.stackHeight();
+      return true;
+    }
+    // There may be temporary values that need spilling; delay computation of
+    // the stack results base until after the popRegisterResults(), which spills
+    // if needed.
+    ABIResultIter iter(type);
+    popRegisterResults(iter);
+    StackHeight base = fr.stackResultsBase(stackConsumed(iter.remaining()));
+    if (!iter.done()) {
+      popStackResults(iter, base);
+    }
+    if (!pushResults(type, base)) {
+      return false;
+    }
+    *height = base;
+    return true;
+  }
+
+  // Conditional branches with fallthrough are preceded by a topBranchParams, so
+  // we know that there are no stack results that need to be materialized.  In
+  // that case, we can just shuffle the whole block down before popping the
+  // stack.
+  void shuffleStackResultsBeforeBranch(StackHeight srcHeight,
+                                       StackHeight destHeight,
+                                       ResultType type) {
+    uint32_t stackResultBytes = 0;
+
+    if (ABIResultIter::HasStackResults(type)) {
+      MOZ_ASSERT(stk_.length() >= type.length());
+      ABIResultIter iter(type);
+      for (; !iter.done(); iter.next()) {
+#ifdef DEBUG
+        const ABIResult& result = iter.cur();
+        const Stk& v = stk_[stk_.length() - iter.index() - 1];
+        MOZ_ASSERT(v.isMem() == result.onStack());
+#endif
+      }
+
+      stackResultBytes = iter.stackBytesConsumedSoFar();
+      MOZ_ASSERT(stackResultBytes > 0);
+
+      if (srcHeight != destHeight) {
+        // Find a free GPR to use when shuffling stack values.  If none
+        // is available, push ReturnReg and restore it after we're done.
+        bool saved = false;
+        RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved);
+        fr.shuffleStackResultsTowardFP(srcHeight, destHeight, stackResultBytes,
+                                       temp);
+        ra.freeTempPtr(temp, saved);
+      }
+    }
+
+    fr.popStackBeforeBranch(destHeight, stackResultBytes);
+  }
+
+  // Return the amount of execution stack consumed by the top numval
+  // values on the value stack.
+
+  size_t stackConsumed(size_t numval) {
+    size_t size = 0;
+    MOZ_ASSERT(numval <= stk_.length());
+    for (uint32_t i = stk_.length() - 1; numval > 0; numval--, i--) {
+      Stk& v = stk_[i];
+      switch (v.kind()) {
+        case Stk::MemRef:
+          size += BaseStackFrame::StackSizeOfPtr;
+          break;
+        case Stk::MemI32:
+          size += BaseStackFrame::StackSizeOfPtr;
+          break;
+        case Stk::MemI64:
+          size += BaseStackFrame::StackSizeOfInt64;
+          break;
+        case Stk::MemF64:
+          size += BaseStackFrame::StackSizeOfDouble;
+          break;
+        case Stk::MemF32:
+          size += BaseStackFrame::StackSizeOfFloat;
+          break;
+#ifdef ENABLE_WASM_SIMD
+        case Stk::MemV128:
+          size += BaseStackFrame::StackSizeOfV128;
+          break;
+#endif
+        default:
+          break;
+      }
+    }
+    return size;
+  }
+
+  void popValueStackTo(uint32_t stackSize) {
+    for (uint32_t i = stk_.length(); i > stackSize; i--) {
+      Stk& v = stk_[i - 1];
+      switch (v.kind()) {
+        case Stk::RegisterI32:
+          freeI32(v.i32reg());
+          break;
+        case Stk::RegisterI64:
+          freeI64(v.i64reg());
+          break;
+        case Stk::RegisterF64:
+          freeF64(v.f64reg());
+          break;
+        case Stk::RegisterF32:
+          freeF32(v.f32reg());
+          break;
+#ifdef ENABLE_WASM_SIMD
+        case Stk::RegisterV128:
+          freeV128(v.v128reg());
+          break;
+#endif
+        case Stk::RegisterRef:
+          freeRef(v.refReg());
+          break;
+        case Stk::MemRef:
+          stackMapGenerator_.memRefsOnStk--;
+          break;
+        default:
+          break;
+      }
+    }
+    stk_.shrinkTo(stackSize);
+  }
+
+  void popValueStackBy(uint32_t items) {
+    popValueStackTo(stk_.length() - items);
+  }
+
+  void dropValue() {
+    if (peek(0).isMem()) {
+      fr.popBytes(stackConsumed(1));
+    }
+    popValueStackBy(1);
+  }
+
+  // Peek at the stack, for calls.
+
+  Stk& peek(uint32_t relativeDepth) {
+    return stk_[stk_.length() - 1 - relativeDepth];
+  }
+
+#ifdef DEBUG
+  // Check that we're not leaking registers by comparing the
+  // state of the stack + available registers with the set of
+  // all available registers.
+
+  // Call this between opcodes.
+  void performRegisterLeakCheck() {
+    BaseRegAlloc::LeakCheck check(ra);
+    for (size_t i = 0; i < stk_.length(); i++) {
+      Stk& item = stk_[i];
+      switch (item.kind_) {
+        case Stk::RegisterI32:
+          check.addKnownI32(item.i32reg());
+          break;
+        case Stk::RegisterI64:
+          check.addKnownI64(item.i64reg());
+          break;
+        case Stk::RegisterF32:
+          check.addKnownF32(item.f32reg());
+          break;
+        case Stk::RegisterF64:
+          check.addKnownF64(item.f64reg());
+          break;
+#  ifdef ENABLE_WASM_SIMD
+        case Stk::RegisterV128:
+          check.addKnownV128(item.v128reg());
+          break;
+#  endif
+        case Stk::RegisterRef:
+          check.addKnownRef(item.refReg());
+          break;
+        default:
+          break;
+      }
+    }
+  }
+
+  void assertStackInvariants() const {
+    if (deadCode_) {
+      // Nonlocal control flow can pass values in stack locations in a way that
+      // isn't accounted for by the value stack.  In dead code, which occurs
+      // after unconditional non-local control flow, there is no invariant to
+      // assert.
+      return;
+    }
+    size_t size = 0;
+    for (const Stk& v : stk_) {
+      switch (v.kind()) {
+        case Stk::MemRef:
+          size += BaseStackFrame::StackSizeOfPtr;
+          break;
+        case Stk::MemI32:
+          size += BaseStackFrame::StackSizeOfPtr;
+          break;
+        case Stk::MemI64:
+          size += BaseStackFrame::StackSizeOfInt64;
+          break;
+        case Stk::MemF64:
+          size += BaseStackFrame::StackSizeOfDouble;
+          break;
+        case Stk::MemF32:
+          size += BaseStackFrame::StackSizeOfFloat;
+          break;
+#  ifdef ENABLE_WASM_SIMD
+        case Stk::MemV128:
+          size += BaseStackFrame::StackSizeOfV128;
+          break;
+#  endif
+        default:
+          MOZ_ASSERT(!v.isMem());
+          break;
+      }
+    }
+    MOZ_ASSERT(size == fr.dynamicHeight());
+  }
+
+#endif
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Control stack
+
+  void initControl(Control& item, ResultType params) {
+    // Make sure the constructor was run properly
+    MOZ_ASSERT(!item.stackHeight.isValid() && item.stackSize == UINT32_MAX);
+
+    uint32_t paramCount = deadCode_ ? 0 : params.length();
+    uint32_t stackParamSize = stackConsumed(paramCount);
+    item.stackHeight = fr.stackResultsBase(stackParamSize);
+    item.stackSize = stk_.length() - paramCount;
+    item.deadOnArrival = deadCode_;
+    item.bceSafeOnEntry = bceSafe_;
+  }
+
+  Control& controlItem() { return iter_.controlItem(); }
+
+  Control& controlItem(uint32_t relativeDepth) {
+    return iter_.controlItem(relativeDepth);
+  }
+
+  Control& controlOutermost() { return iter_.controlOutermost(); }
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Labels
+
+  void insertBreakablePoint(CallSiteDesc::Kind kind) {
+    fr.loadTlsPtr(WasmTlsReg);
+    masm.nopPatchableToCall(CallSiteDesc(iter_.lastOpcodeOffset(), kind));
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  //
+  // Function prologue and epilogue.
+
+  [[nodiscard]] bool beginFunction() {
+    JitSpew(JitSpew_Codegen, "# ========================================");
+    JitSpew(JitSpew_Codegen, "# Emitting wasm baseline code");
+    JitSpew(JitSpew_Codegen,
+            "# beginFunction: start of function prologue for index %d",
+            (int)func_.index);
+
+    // Make a start on the stack map for this function.  Inspect the args so
+    // as to determine which of them are both in-memory and pointer-typed, and
+    // add entries to machineStackTracker as appropriate.
+
+    ArgTypeVector args(funcType());
+    size_t inboundStackArgBytes = StackArgAreaSizeUnaligned(args);
+    MOZ_ASSERT(inboundStackArgBytes % sizeof(void*) == 0);
+    stackMapGenerator_.numStackArgWords = inboundStackArgBytes / sizeof(void*);
+
+    MOZ_ASSERT(stackMapGenerator_.machineStackTracker.length() == 0);
+    if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
+            stackMapGenerator_.numStackArgWords)) {
+      return false;
+    }
+
+    // Identify GC-managed pointers passed on the stack.
+    for (WasmABIArgIter i(args); !i.done(); i++) {
+      ABIArg argLoc = *i;
+      if (argLoc.kind() == ABIArg::Stack &&
+          args[i.index()] == MIRType::RefOrNull) {
+        uint32_t offset = argLoc.offsetFromArgBase();
+        MOZ_ASSERT(offset < inboundStackArgBytes);
+        MOZ_ASSERT(offset % sizeof(void*) == 0);
+        stackMapGenerator_.machineStackTracker.setGCPointer(offset /
+                                                            sizeof(void*));
+      }
+    }
+
+    GenerateFunctionPrologue(masm, *moduleEnv_.funcs[func_.index].typeId,
+                             compilerEnv_.mode() == CompileMode::Tier1
+                                 ? Some(func_.index)
+                                 : Nothing(),
+                             &offsets_);
+
+    // GenerateFunctionPrologue pushes exactly one wasm::Frame's worth of
+    // stuff, and none of the values are GC pointers.  Hence:
+    if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
+            sizeof(Frame) / sizeof(void*))) {
+      return false;
+    }
+
+    // Initialize DebugFrame fields before the stack overflow trap so that
+    // we have the invariant that all observable Frames in a debugEnabled
+    // Module have valid DebugFrames.
+    if (compilerEnv_.debugEnabled()) {
+#ifdef JS_CODEGEN_ARM64
+      static_assert(DebugFrame::offsetOfFrame() % WasmStackAlignment == 0,
+                    "aligned");
+#endif
+      masm.reserveStack(DebugFrame::offsetOfFrame());
+      if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
+              DebugFrame::offsetOfFrame() / sizeof(void*))) {
+        return false;
+      }
+
+      masm.store32(
+          Imm32(func_.index),
+          Address(masm.getStackPointer(), DebugFrame::offsetOfFuncIndex()));
+      masm.store32(Imm32(0), Address(masm.getStackPointer(),
+                                     DebugFrame::offsetOfFlags()));
+
+      // No need to initialize cachedReturnJSValue_ or any ref-typed spilled
+      // register results, as they are traced if and only if a corresponding
+      // flag (hasCachedReturnJSValue or hasSpilledRefRegisterResult) is set.
+    }
+
+    // Generate a stack-overflow check and its associated stack map.
+
+    fr.checkStack(ABINonArgReg0, BytecodeOffset(func_.lineOrBytecode));
+
+    ExitStubMapVector extras;
+    if (!stackMapGenerator_.generateStackmapEntriesForTrapExit(args, &extras)) {
+      return false;
+    }
+    if (!createStackMap("stack check", extras, masm.currentOffset())) {
+      return false;
+    }
+
+    size_t reservedBytes = fr.fixedAllocSize() - masm.framePushed();
+    MOZ_ASSERT(0 == (reservedBytes % sizeof(void*)));
+
+    masm.reserveStack(reservedBytes);
+    fr.onFixedStackAllocated();
+    if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
+            reservedBytes / sizeof(void*))) {
+      return false;
+    }
+
+    // Locals are stack allocated.  Mark ref-typed ones in the stackmap
+    // accordingly.
+    for (const Local& l : localInfo_) {
+      // Locals that are stack arguments were already added to the stack map
+      // before pushing the frame.
+      if (l.type == MIRType::RefOrNull && !l.isStackArgument()) {
+        uint32_t offs = fr.localOffsetFromSp(l);
+        MOZ_ASSERT(0 == (offs % sizeof(void*)));
+        stackMapGenerator_.machineStackTracker.setGCPointer(offs /
+                                                            sizeof(void*));
+      }
+    }
+
+    // Copy arguments from registers to stack.
+    for (WasmABIArgIter i(args); !i.done(); i++) {
+      if (args.isSyntheticStackResultPointerArg(i.index())) {
+        // If there are stack results and the pointer to stack results
+        // was passed in a register, store it to the stack.
+        if (i->argInRegister()) {
+          fr.storeIncomingStackResultAreaPtr(RegPtr(i->gpr()));
+        }
+        // If we're in a debug frame, copy the stack result pointer arg
+        // to a well-known place.
+        if (compilerEnv_.debugEnabled()) {
+          Register target = ABINonArgReturnReg0;
+          fr.loadIncomingStackResultAreaPtr(RegPtr(target));
+          size_t debugFrameOffset =
+              masm.framePushed() - DebugFrame::offsetOfFrame();
+          size_t debugStackResultsPointerOffset =
+              debugFrameOffset + DebugFrame::offsetOfStackResultsPointer();
+          masm.storePtr(target, Address(masm.getStackPointer(),
+                                        debugStackResultsPointerOffset));
+        }
+        continue;
+      }
+      if (!i->argInRegister()) {
+        continue;
+      }
+      Local& l = localInfo_[args.naturalIndex(i.index())];
+      switch (i.mirType()) {
+        case MIRType::Int32:
+          fr.storeLocalI32(RegI32(i->gpr()), l);
+          break;
+        case MIRType::Int64:
+          fr.storeLocalI64(RegI64(i->gpr64()), l);
+          break;
+        case MIRType::RefOrNull: {
+          DebugOnly<uint32_t> offs = fr.localOffsetFromSp(l);
+          MOZ_ASSERT(0 == (offs % sizeof(void*)));
+          fr.storeLocalPtr(RegPtr(i->gpr()), l);
+          // We should have just visited this local in the preceding loop.
+          MOZ_ASSERT(stackMapGenerator_.machineStackTracker.isGCPointer(
+              offs / sizeof(void*)));
+          break;
+        }
+        case MIRType::Double:
+          fr.storeLocalF64(RegF64(i->fpu()), l);
+          break;
+        case MIRType::Float32:
+          fr.storeLocalF32(RegF32(i->fpu()), l);
+          break;
+#ifdef ENABLE_WASM_SIMD
+        case MIRType::Simd128:
+          fr.storeLocalV128(RegV128(i->fpu()), l);
+          break;
+#endif
+        default:
+          MOZ_CRASH("Function argument type");
+      }
+    }
+
+    fr.zeroLocals(&ra);
+    fr.storeTlsPtr(WasmTlsReg);
+
+    if (compilerEnv_.debugEnabled()) {
+      insertBreakablePoint(CallSiteDesc::EnterFrame);
+      if (!createStackMap("debug: breakable point")) {
+        return false;
+      }
+    }
+
+    JitSpew(JitSpew_Codegen,
+            "# beginFunction: enter body with masm.framePushed = %u",
+            masm.framePushed());
+    MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isNothing());
+    stackMapGenerator_.framePushedAtEntryToBody.emplace(masm.framePushed());
+
+    return true;
+  }
+
+  void popStackReturnValues(const ResultType& resultType) {
+    uint32_t bytes = ABIResultIter::MeasureStackBytes(resultType);
+    if (bytes == 0) {
+      return;
+    }
+    Register target = ABINonArgReturnReg0;
+    Register temp = ABINonArgReturnReg1;
+    fr.loadIncomingStackResultAreaPtr(RegPtr(target));
+    fr.popStackResultsToMemory(target, bytes, temp);
+  }
+
+  void saveRegisterReturnValues(const ResultType& resultType) {
+    MOZ_ASSERT(compilerEnv_.debugEnabled());
+    size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame();
+    size_t registerResultIdx = 0;
+    for (ABIResultIter i(resultType); !i.done(); i.next()) {
+      const ABIResult result = i.cur();
+      if (!result.inRegister()) {
+#ifdef DEBUG
+        for (i.next(); !i.done(); i.next()) {
+          MOZ_ASSERT(!i.cur().inRegister());
+        }
+#endif
+        break;
+      }
+
+      size_t resultOffset =
+          DebugFrame::offsetOfRegisterResult(registerResultIdx);
+      Address dest(masm.getStackPointer(), debugFrameOffset + resultOffset);
+      switch (result.type().kind()) {
+        case ValType::I32:
+          masm.store32(RegI32(result.gpr()), dest);
+          break;
+        case ValType::I64:
+          masm.store64(RegI64(result.gpr64()), dest);
+          break;
+        case ValType::F64:
+          masm.storeDouble(RegF64(result.fpr()), dest);
+          break;
+        case ValType::F32:
+          masm.storeFloat32(RegF32(result.fpr()), dest);
+          break;
+        case ValType::Ref: {
+          uint32_t flag =
+              DebugFrame::hasSpilledRegisterRefResultBitMask(registerResultIdx);
+          // Tell Instance::traceFrame that we have a pointer to trace.
+          masm.or32(Imm32(flag),
+                    Address(masm.getStackPointer(),
+                            debugFrameOffset + DebugFrame::offsetOfFlags()));
+          masm.storePtr(RegPtr(result.gpr()), dest);
+          break;
+        }
+        case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+          masm.storeUnalignedSimd128(RegV128(result.fpr()), dest);
+          break;
+#else
+          MOZ_CRASH("No SIMD support");
+#endif
+      }
+      registerResultIdx++;
+    }
+  }
+
+  void restoreRegisterReturnValues(const ResultType& resultType) {
+    MOZ_ASSERT(compilerEnv_.debugEnabled());
+    size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame();
+    size_t registerResultIdx = 0;
+    for (ABIResultIter i(resultType); !i.done(); i.next()) {
+      const ABIResult result = i.cur();
+      if (!result.inRegister()) {
+#ifdef DEBUG
+        for (i.next(); !i.done(); i.next()) {
+          MOZ_ASSERT(!i.cur().inRegister());
+        }
+#endif
+        break;
+      }
+      size_t resultOffset =
+          DebugFrame::offsetOfRegisterResult(registerResultIdx++);
+      Address src(masm.getStackPointer(), debugFrameOffset + resultOffset);
+      switch (result.type().kind()) {
+        case ValType::I32:
+          masm.load32(src, RegI32(result.gpr()));
+          break;
+        case ValType::I64:
+          masm.load64(src, RegI64(result.gpr64()));
+          break;
+        case ValType::F64:
+          masm.loadDouble(src, RegF64(result.fpr()));
+          break;
+        case ValType::F32:
+          masm.loadFloat32(src, RegF32(result.fpr()));
+          break;
+        case ValType::Ref:
+          masm.loadPtr(src, RegPtr(result.gpr()));
+          break;
+        case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+          masm.loadUnalignedSimd128(src, RegV128(result.fpr()));
+          break;
+#else
+          MOZ_CRASH("No SIMD support");
+#endif
+      }
+    }
+  }
+
+  [[nodiscard]] bool endFunction() {
+    JitSpew(JitSpew_Codegen, "# endFunction: start of function epilogue");
+
+    // Always branch to returnLabel_.
+    masm.breakpoint();
+
+    // Patch the add in the prologue so that it checks against the correct
+    // frame size. Flush the constant pool in case it needs to be patched.
+    masm.flush();
+
+    // Precondition for patching.
+    if (masm.oom()) {
+      return false;
+    }
+
+    fr.patchCheckStack();
+
+    masm.bind(&returnLabel_);
+
+    ResultType resultType(ResultType::Vector(funcType().results()));
+
+    popStackReturnValues(resultType);
+
+    if (compilerEnv_.debugEnabled()) {
+      // Store and reload the return value from DebugFrame::return so that
+      // it can be clobbered, and/or modified by the debug trap.
+      saveRegisterReturnValues(resultType);
+      insertBreakablePoint(CallSiteDesc::Breakpoint);
+      if (!createStackMap("debug: breakpoint")) {
+        return false;
+      }
+      insertBreakablePoint(CallSiteDesc::LeaveFrame);
+      if (!createStackMap("debug: leave frame")) {
+        return false;
+      }
+      restoreRegisterReturnValues(resultType);
+    }
+
+    // To satisy Tls extent invariant we need to reload WasmTlsReg because
+    // baseline can clobber it.
+    fr.loadTlsPtr(WasmTlsReg);
+    GenerateFunctionEpilogue(masm, fr.fixedAllocSize(), &offsets_);
+
+#if defined(JS_ION_PERF)
+    // FIXME - profiling code missing.  No bug for this.
+
+    // Note the end of the inline code and start of the OOL code.
+    // gen->perfSpewer().noteEndInlineCode(masm);
+#endif
+
+    JitSpew(JitSpew_Codegen, "# endFunction: end of function epilogue");
+    JitSpew(JitSpew_Codegen, "# endFunction: start of OOL code");
+    if (!generateOutOfLineCode()) {
+      return false;
+    }
+
+    offsets_.end = masm.currentOffset();
+
+    if (!fr.checkStackHeight()) {
+      return false;
+    }
+
+    JitSpew(JitSpew_Codegen, "# endFunction: end of OOL code for index %d",
+            (int)func_.index);
+    return !masm.oom();
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  //
+  // Calls.
+
+  struct FunctionCall {
+    explicit FunctionCall(uint32_t lineOrBytecode)
+        : lineOrBytecode(lineOrBytecode),
+          isInterModule(false),
+          usesSystemAbi(false),
+#ifdef JS_CODEGEN_ARM
+          hardFP(true),
+#endif
+          frameAlignAdjustment(0),
+          stackArgAreaSize(0) {
+    }
+
+    uint32_t lineOrBytecode;
+    WasmABIArgGenerator abi;
+    bool isInterModule;
+    bool usesSystemAbi;
+#ifdef JS_CODEGEN_ARM
+    bool hardFP;
+#endif
+    size_t frameAlignAdjustment;
+    size_t stackArgAreaSize;
+  };
+
+  void beginCall(FunctionCall& call, UseABI useABI, InterModule interModule) {
+    MOZ_ASSERT_IF(useABI == UseABI::Builtin, interModule == InterModule::False);
+
+    call.isInterModule = interModule == InterModule::True;
+    call.usesSystemAbi = useABI == UseABI::System;
+
+    if (call.usesSystemAbi) {
+      // Call-outs need to use the appropriate system ABI.
+#if defined(JS_CODEGEN_ARM)
+      call.hardFP = UseHardFpABI();
+      call.abi.setUseHardFp(call.hardFP);
+#elif defined(JS_CODEGEN_MIPS32)
+      call.abi.enforceO32ABI();
+#endif
+    } else {
+#if defined(JS_CODEGEN_ARM)
+      MOZ_ASSERT(call.hardFP,
+                 "All private ABIs pass FP arguments in registers");
+#endif
+    }
+
+    // Use masm.framePushed() because the value we want here does not depend
+    // on the height of the frame's stack area, but the actual size of the
+    // allocated frame.
+    call.frameAlignAdjustment = ComputeByteAlignment(
+        masm.framePushed() + sizeof(Frame), JitStackAlignment);
+  }
+
+  void endCall(FunctionCall& call, size_t stackSpace) {
+    size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment;
+    fr.freeArgAreaAndPopBytes(adjustment, stackSpace);
+
+    MOZ_ASSERT(
+        stackMapGenerator_.framePushedExcludingOutboundCallArgs.isSome());
+    stackMapGenerator_.framePushedExcludingOutboundCallArgs.reset();
+
+    if (call.isInterModule) {
+      fr.loadTlsPtr(WasmTlsReg);
+      masm.loadWasmPinnedRegsFromTls();
+      masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
+    } else if (call.usesSystemAbi) {
+      // On x86 there are no pinned registers, so don't waste time
+      // reloading the Tls.
+#ifndef JS_CODEGEN_X86
+      fr.loadTlsPtr(WasmTlsReg);
+      masm.loadWasmPinnedRegsFromTls();
+#endif
+    }
+  }
+
+  void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call) {
+    size_t stackArgAreaSizeAligned =
+        AlignStackArgAreaSize(stackArgAreaSizeUnaligned);
+    MOZ_ASSERT(stackArgAreaSizeUnaligned <= stackArgAreaSizeAligned);
+
+    // Record the masm.framePushed() value at this point, before we push args
+    // for the call, but including the alignment space placed above the args.
+    // This defines the lower limit of the stackmap that will be created for
+    // this call.
+    MOZ_ASSERT(
+        stackMapGenerator_.framePushedExcludingOutboundCallArgs.isNothing());
+    stackMapGenerator_.framePushedExcludingOutboundCallArgs.emplace(
+        // However much we've pushed so far
+        masm.framePushed() +
+        // Extra space we'll push to get the frame aligned
+        call->frameAlignAdjustment +
+        // Extra space we'll push to get the outbound arg area 16-aligned
+        (stackArgAreaSizeAligned - stackArgAreaSizeUnaligned));
+
+    call->stackArgAreaSize = stackArgAreaSizeAligned;
+
+    size_t adjustment = call->stackArgAreaSize + call->frameAlignAdjustment;
+    fr.allocArgArea(adjustment);
+  }
+
+  const ABIArg reservePointerArgument(FunctionCall* call) {
+    return call->abi.next(MIRType::Pointer);
+  }
+
+  // TODO / OPTIMIZE (Bug 1316821): Note passArg is used only in one place.
+  // (Or it was, until Luke wandered through, but that can be fixed again.)
+  // I'm not saying we should manually inline it, but we could hoist the
+  // dispatch into the caller and have type-specific implementations of
+  // passArg: passArgI32(), etc.  Then those might be inlined, at least in PGO
+  // builds.
+  //
+  // The bulk of the work here (60%) is in the next() call, though.
+  //
+  // Notably, since next() is so expensive, StackArgAreaSizeUnaligned()
+  // becomes expensive too.
+  //
+  // Somehow there could be a trick here where the sequence of argument types
+  // (read from the input stream) leads to a cached entry for
+  // StackArgAreaSizeUnaligned() and for how to pass arguments...
+  //
+  // But at least we could reduce the cost of StackArgAreaSizeUnaligned() by
+  // first reading the argument types into a (reusable) vector, then we have
+  // the outgoing size at low cost, and then we can pass args based on the
+  // info we read.
+
+  void passArg(ValType type, const Stk& arg, FunctionCall* call) {
+    switch (type.kind()) {
+      case ValType::I32: {
+        ABIArg argLoc = call->abi.next(MIRType::Int32);
+        if (argLoc.kind() == ABIArg::Stack) {
+          ScratchI32 scratch(*this);
+          loadI32(arg, scratch);
+          masm.store32(scratch, Address(masm.getStackPointer(),
+                                        argLoc.offsetFromArgBase()));
+        } else {
+          loadI32(arg, RegI32(argLoc.gpr()));
+        }
+        break;
+      }
+      case ValType::I64: {
+        ABIArg argLoc = call->abi.next(MIRType::Int64);
+        if (argLoc.kind() == ABIArg::Stack) {
+          ScratchI32 scratch(*this);
+#ifdef JS_PUNBOX64
+          loadI64(arg, fromI32(scratch));
+          masm.storePtr(scratch, Address(masm.getStackPointer(),
+                                         argLoc.offsetFromArgBase()));
+#else
+          loadI64Low(arg, scratch);
+          masm.store32(scratch, LowWord(Address(masm.getStackPointer(),
+                                                argLoc.offsetFromArgBase())));
+          loadI64High(arg, scratch);
+          masm.store32(scratch, HighWord(Address(masm.getStackPointer(),
+                                                 argLoc.offsetFromArgBase())));
+#endif
+        } else {
+          loadI64(arg, RegI64(argLoc.gpr64()));
+        }
+        break;
+      }
+      case ValType::V128: {
+#ifdef ENABLE_WASM_SIMD
+        ABIArg argLoc = call->abi.next(MIRType::Simd128);
+        switch (argLoc.kind()) {
+          case ABIArg::Stack: {
+            ScratchV128 scratch(*this);
+            loadV128(arg, scratch);
+            masm.storeUnalignedSimd128(
+                (RegV128)scratch,
+                Address(masm.getStackPointer(), argLoc.offsetFromArgBase()));
+            break;
+          }
+          case ABIArg::GPR: {
+            MOZ_CRASH("Unexpected parameter passing discipline");
+          }
+          case ABIArg::FPU: {
+            loadV128(arg, RegV128(argLoc.fpu()));
+            break;
+          }
+#  if defined(JS_CODEGEN_REGISTER_PAIR)
+          case ABIArg::GPR_PAIR: {
+            MOZ_CRASH("Unexpected parameter passing discipline");
+          }
+#  endif
+          case ABIArg::Uninitialized:
+            MOZ_CRASH("Uninitialized ABIArg kind");
+        }
+        break;
+#else
+        MOZ_CRASH("No SIMD support");
+#endif
+      }
+      case ValType::F64: {
+        ABIArg argLoc = call->abi.next(MIRType::Double);
+        switch (argLoc.kind()) {
+          case ABIArg::Stack: {
+            ScratchF64 scratch(*this);
+            loadF64(arg, scratch);
+            masm.storeDouble(scratch, Address(masm.getStackPointer(),
+                                              argLoc.offsetFromArgBase()));
+            break;
+          }
+#if defined(JS_CODEGEN_REGISTER_PAIR)
+          case ABIArg::GPR_PAIR: {
+#  if defined(JS_CODEGEN_ARM)
+            ScratchF64 scratch(*this);
+            loadF64(arg, scratch);
+            masm.ma_vxfer(scratch, argLoc.evenGpr(), argLoc.oddGpr());
+            break;
+#  elif defined(JS_CODEGEN_MIPS32)
+            ScratchF64 scratch(*this);
+            loadF64(arg, scratch);
+            MOZ_ASSERT(MOZ_LITTLE_ENDIAN());
+            masm.moveFromDoubleLo(scratch, argLoc.evenGpr());
+            masm.moveFromDoubleHi(scratch, argLoc.oddGpr());
+            break;
+#  else
+            MOZ_CRASH("BaseCompiler platform hook: passArg F64 pair");
+#  endif
+          }
+#endif
+          case ABIArg::FPU: {
+            loadF64(arg, RegF64(argLoc.fpu()));
+            break;
+          }
+          case ABIArg::GPR: {
+            MOZ_CRASH("Unexpected parameter passing discipline");
+          }
+          case ABIArg::Uninitialized:
+            MOZ_CRASH("Uninitialized ABIArg kind");
+        }
+        break;
+      }
+      case ValType::F32: {
+        ABIArg argLoc = call->abi.next(MIRType::Float32);
+        switch (argLoc.kind()) {
+          case ABIArg::Stack: {
+            ScratchF32 scratch(*this);
+            loadF32(arg, scratch);
+            masm.storeFloat32(scratch, Address(masm.getStackPointer(),
+                                               argLoc.offsetFromArgBase()));
+            break;
+          }
+          case ABIArg::GPR: {
+            ScratchF32 scratch(*this);
+            loadF32(arg, scratch);
+            masm.moveFloat32ToGPR(scratch, argLoc.gpr());
+            break;
+          }
+          case ABIArg::FPU: {
+            loadF32(arg, RegF32(argLoc.fpu()));
+            break;
+          }
+#if defined(JS_CODEGEN_REGISTER_PAIR)
+          case ABIArg::GPR_PAIR: {
+            MOZ_CRASH("Unexpected parameter passing discipline");
+          }
+#endif
+          case ABIArg::Uninitialized:
+            MOZ_CRASH("Uninitialized ABIArg kind");
+        }
+        break;
+      }
+      case ValType::Ref: {
+        ABIArg argLoc = call->abi.next(MIRType::RefOrNull);
+        if (argLoc.kind() == ABIArg::Stack) {
+          ScratchPtr scratch(*this);
+          loadRef(arg, scratch);
+          masm.storePtr(scratch, Address(masm.getStackPointer(),
+                                         argLoc.offsetFromArgBase()));
+        } else {
+          loadRef(arg, RegPtr(argLoc.gpr()));
+        }
+        break;
+      }
+    }
+  }
+
+  CodeOffset callDefinition(uint32_t funcIndex, const FunctionCall& call) {
+    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Func);
+    return masm.call(desc, funcIndex);
+  }
+
+  CodeOffset callSymbolic(SymbolicAddress callee, const FunctionCall& call) {
+    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
+    return masm.call(desc, callee);
+  }
+
+  // Precondition: sync()
+
+  CodeOffset callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex,
+                          const Stk& indexVal, const FunctionCall& call) {
+    const TypeIdDesc& funcTypeId = moduleEnv_.typeIds[funcTypeIndex];
+    MOZ_ASSERT(funcTypeId.kind() != TypeIdDescKind::None);
+
+    const TableDesc& table = moduleEnv_.tables[tableIndex];
+
+    loadI32(indexVal, RegI32(WasmTableCallIndexReg));
+
+    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic);
+    CalleeDesc callee = CalleeDesc::wasmTable(table, funcTypeId);
+    return masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true));
+  }
+
+  // Precondition: sync()
+
+  CodeOffset callImport(unsigned globalDataOffset, const FunctionCall& call) {
+    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic);
+    CalleeDesc callee = CalleeDesc::import(globalDataOffset);
+    return masm.wasmCallImport(desc, callee);
+  }
+
+  CodeOffset builtinCall(SymbolicAddress builtin, const FunctionCall& call) {
+    return callSymbolic(builtin, call);
+  }
+
+  CodeOffset builtinInstanceMethodCall(const SymbolicAddressSignature& builtin,
+                                       const ABIArg& instanceArg,
+                                       const FunctionCall& call) {
+    // Builtin method calls assume the TLS register has been set.
+    fr.loadTlsPtr(WasmTlsReg);
+
+    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
+    return masm.wasmCallBuiltinInstanceMethod(
+        desc, instanceArg, builtin.identity, builtin.failureMode);
+  }
+
+  MOZ_MUST_USE bool pushCallResults(const FunctionCall& call, ResultType type,
+                                    const StackResultsLoc& loc) {
+#if defined(JS_CODEGEN_ARM)
+    // pushResults currently bypasses special case code in captureReturnedFxx()
+    // that converts GPR results to FPR results for systemABI+softFP.  If we
+    // ever start using that combination for calls we need more code.  This
+    // assert is stronger than we need - we only care about results in return
+    // registers - but that's OK.
+    MOZ_ASSERT(!call.usesSystemAbi || call.hardFP);
+#endif
+    return pushResults(type, fr.stackResultsBase(loc.bytes()));
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  //
+  // Sundry low-level code generators.
+
+  // The compiler depends on moveImm32() clearing the high bits of a 64-bit
+  // register on 64-bit systems except MIPS64 where high bits are sign extended
+  // from lower bits.
+
+  void moveImm32(int32_t v, RegI32 dest) { masm.move32(Imm32(v), dest); }
+
+  void moveImm64(int64_t v, RegI64 dest) { masm.move64(Imm64(v), dest); }
+
+  void moveImmRef(intptr_t v, RegPtr dest) { masm.movePtr(ImmWord(v), dest); }
+
+  void moveImmF32(float f, RegF32 dest) { masm.loadConstantFloat32(f, dest); }
+
+  void moveImmF64(double d, RegF64 dest) { masm.loadConstantDouble(d, dest); }
+
+  [[nodiscard]] bool addInterruptCheck() {
+    ScratchI32 tmp(*this);
+    fr.loadTlsPtr(tmp);
+    masm.wasmInterruptCheck(tmp, bytecodeOffset());
+    return createStackMap("addInterruptCheck");
+  }
+
+  void jumpTable(const LabelVector& labels, Label* theTable) {
+    // Flush constant pools to ensure that the table is never interrupted by
+    // constant pool entries.
+    masm.flush();
+
+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+    // Prevent nop sequences to appear in the jump table.
+    AutoForbidNops afn(&masm);
+#endif
+    masm.bind(theTable);
+
+    for (uint32_t i = 0; i < labels.length(); i++) {
+      CodeLabel cl;
+      masm.writeCodePointer(&cl);
+      cl.target()->bind(labels[i].offset());
+      masm.addCodeLabel(cl);
+    }
+  }
+
+  void tableSwitch(Label* theTable, RegI32 switchValue, Label* dispatchCode) {
+    masm.bind(dispatchCode);
+
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+    ScratchI32 scratch(*this);
+    CodeLabel tableCl;
+
+    masm.mov(&tableCl, scratch);
+
+    tableCl.target()->bind(theTable->offset());
+    masm.addCodeLabel(tableCl);
+
+    masm.jmp(Operand(scratch, switchValue, ScalePointer));
+#elif defined(JS_CODEGEN_ARM)
+    // Flush constant pools: offset must reflect the distance from the MOV
+    // to the start of the table; as the address of the MOV is given by the
+    // label, nothing must come between the bind() and the ma_mov().
+    AutoForbidPoolsAndNops afp(&masm,
+                               /* number of instructions in scope = */ 5);
+
+    ScratchI32 scratch(*this);
+
+    // Compute the offset from the ma_mov instruction to the jump table.
+    Label here;
+    masm.bind(&here);
+    uint32_t offset = here.offset() - theTable->offset();
+
+    // Read PC+8
+    masm.ma_mov(pc, scratch);
+
+    // ARM scratch register is required by ma_sub.
+    ScratchRegisterScope arm_scratch(*this);
+
+    // Compute the absolute table base pointer into `scratch`, offset by 8
+    // to account for the fact that ma_mov read PC+8.
+    masm.ma_sub(Imm32(offset + 8), scratch, arm_scratch);
+
+    // Jump indirect via table element.
+    masm.ma_ldr(DTRAddr(scratch, DtrRegImmShift(switchValue, LSL, 2)), pc,
+                Offset, Assembler::Always);
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    ScratchI32 scratch(*this);
+    CodeLabel tableCl;
+
+    masm.ma_li(scratch, &tableCl);
+
+    tableCl.target()->bind(theTable->offset());
+    masm.addCodeLabel(tableCl);
+
+    masm.branchToComputedAddress(BaseIndex(scratch, switchValue, ScalePointer));
+#elif defined(JS_CODEGEN_ARM64)
+    AutoForbidPoolsAndNops afp(&masm,
+                               /* number of instructions in scope = */ 4);
+
+    ScratchI32 scratch(*this);
+
+    ARMRegister s(scratch, 64);
+    ARMRegister v(switchValue, 64);
+    masm.Adr(s, theTable);
+    masm.Add(s, s, Operand(v, vixl::LSL, 3));
+    masm.Ldr(s, MemOperand(s, 0));
+    masm.Br(s);
+#else
+    MOZ_CRASH("BaseCompiler platform hook: tableSwitch");
+#endif
+  }
+
+  RegI32 captureReturnedI32() {
+    RegI32 r = RegI32(ReturnReg);
+    MOZ_ASSERT(isAvailableI32(r));
+    needI32(r);
+#if defined(JS_CODEGEN_X64)
+    if (JitOptions.spectreIndexMasking) {
+      masm.movl(r, r);
+    }
+#endif
+    return r;
+  }
+
+  RegI64 captureReturnedI64() {
+    RegI64 r = RegI64(ReturnReg64);
+    MOZ_ASSERT(isAvailableI64(r));
+    needI64(r);
+    return r;
+  }
+
+  RegF32 captureReturnedF32(const FunctionCall& call) {
+    RegF32 r = RegF32(ReturnFloat32Reg);
+    MOZ_ASSERT(isAvailableF32(r));
+    needF32(r);
+#if defined(JS_CODEGEN_ARM)
+    if (call.usesSystemAbi && !call.hardFP) {
+      masm.ma_vxfer(ReturnReg, r);
+    }
+#endif
+    return r;
+  }
+
+  RegF64 captureReturnedF64(const FunctionCall& call) {
+    RegF64 r = RegF64(ReturnDoubleReg);
+    MOZ_ASSERT(isAvailableF64(r));
+    needF64(r);
+#if defined(JS_CODEGEN_ARM)
+    if (call.usesSystemAbi && !call.hardFP) {
+      masm.ma_vxfer(ReturnReg64.low, ReturnReg64.high, r);
+    }
+#endif
+    return r;
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  RegV128 captureReturnedV128(const FunctionCall& call) {
+    RegV128 r = RegV128(ReturnSimd128Reg);
+    MOZ_ASSERT(isAvailableV128(r));
+    needV128(r);
+    return r;
+  }
+#endif
+
+  RegPtr captureReturnedRef() {
+    RegPtr r = RegPtr(ReturnReg);
+    MOZ_ASSERT(isAvailableRef(r));
+    needRef(r);
+    return r;
+  }
+
+  void checkDivideByZeroI32(RegI32 rhs) {
+    Label nonZero;
+    masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero);
+    trap(Trap::IntegerDivideByZero);
+    masm.bind(&nonZero);
+  }
+
+  void checkDivideByZeroI64(RegI64 r) {
+    Label nonZero;
+    ScratchI32 scratch(*this);
+    masm.branchTest64(Assembler::NonZero, r, r, scratch, &nonZero);
+    trap(Trap::IntegerDivideByZero);
+    masm.bind(&nonZero);
+  }
+
+  void checkDivideSignedOverflowI32(RegI32 rhs, RegI32 srcDest, Label* done,
+                                    bool zeroOnOverflow) {
+    Label notMin;
+    masm.branch32(Assembler::NotEqual, srcDest, Imm32(INT32_MIN), &notMin);
+    if (zeroOnOverflow) {
+      masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notMin);
+      moveImm32(0, srcDest);
+      masm.jump(done);
+    } else {
+      masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notMin);
+      trap(Trap::IntegerOverflow);
+    }
+    masm.bind(&notMin);
+  }
+
+  void checkDivideSignedOverflowI64(RegI64 rhs, RegI64 srcDest, Label* done,
+                                    bool zeroOnOverflow) {
+    Label notmin;
+    masm.branch64(Assembler::NotEqual, srcDest, Imm64(INT64_MIN), &notmin);
+    masm.branch64(Assembler::NotEqual, rhs, Imm64(-1), &notmin);
+    if (zeroOnOverflow) {
+      masm.xor64(srcDest, srcDest);
+      masm.jump(done);
+    } else {
+      trap(Trap::IntegerOverflow);
+    }
+    masm.bind(&notmin);
+  }
+
+#ifndef RABALDR_INT_DIV_I64_CALLOUT
+  void quotientI64(RegI64 rhs, RegI64 srcDest, RegI64 reserved,
+                   IsUnsigned isUnsigned, bool isConst, int64_t c) {
+    Label done;
+
+    if (!isConst || c == 0) {
+      checkDivideByZeroI64(rhs);
+    }
+
+    if (!isUnsigned && (!isConst || c == -1)) {
+      checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false));
+    }
+
+#  if defined(JS_CODEGEN_X64)
+    // The caller must set up the following situation.
+    MOZ_ASSERT(srcDest.reg == rax);
+    MOZ_ASSERT(reserved == specific_.rdx);
+    if (isUnsigned) {
+      masm.xorq(rdx, rdx);
+      masm.udivq(rhs.reg);
+    } else {
+      masm.cqo();
+      masm.idivq(rhs.reg);
+    }
+#  elif defined(JS_CODEGEN_MIPS64)
+    if (isUnsigned) {
+      masm.as_ddivu(srcDest.reg, rhs.reg);
+    } else {
+      masm.as_ddiv(srcDest.reg, rhs.reg);
+    }
+    masm.as_mflo(srcDest.reg);
+#  elif defined(JS_CODEGEN_ARM64)
+    ARMRegister sd(srcDest.reg, 64);
+    ARMRegister r(rhs.reg, 64);
+    if (isUnsigned) {
+      masm.Udiv(sd, sd, r);
+    } else {
+      masm.Sdiv(sd, sd, r);
+    }
+#  else
+    MOZ_CRASH("BaseCompiler platform hook: quotientI64");
+#  endif
+    masm.bind(&done);
+  }
+
+  void remainderI64(RegI64 rhs, RegI64 srcDest, RegI64 reserved,
+                    IsUnsigned isUnsigned, bool isConst, int64_t c) {
+    Label done;
+
+    if (!isConst || c == 0) {
+      checkDivideByZeroI64(rhs);
+    }
+
+    if (!isUnsigned && (!isConst || c == -1)) {
+      checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true));
+    }
+
+#  if defined(JS_CODEGEN_X64)
+    // The caller must set up the following situation.
+    MOZ_ASSERT(srcDest.reg == rax);
+    MOZ_ASSERT(reserved == specific_.rdx);
+
+    if (isUnsigned) {
+      masm.xorq(rdx, rdx);
+      masm.udivq(rhs.reg);
+    } else {
+      masm.cqo();
+      masm.idivq(rhs.reg);
+    }
+    masm.movq(rdx, rax);
+#  elif defined(JS_CODEGEN_MIPS64)
+    if (isUnsigned) {
+      masm.as_ddivu(srcDest.reg, rhs.reg);
+    } else {
+      masm.as_ddiv(srcDest.reg, rhs.reg);
+    }
+    masm.as_mfhi(srcDest.reg);
+#  elif defined(JS_CODEGEN_ARM64)
+    MOZ_ASSERT(reserved.isInvalid());
+    ARMRegister sd(srcDest.reg, 64);
+    ARMRegister r(rhs.reg, 64);
+    ScratchI32 temp(*this);
+    ARMRegister t(temp, 64);
+    if (isUnsigned) {
+      masm.Udiv(t, sd, r);
+    } else {
+      masm.Sdiv(t, sd, r);
+    }
+    masm.Mul(t, t, r);
+    masm.Sub(sd, sd, t);
+#  else
+    MOZ_CRASH("BaseCompiler platform hook: remainderI64");
+#  endif
+    masm.bind(&done);
+  }
+#endif  // RABALDR_INT_DIV_I64_CALLOUT
+
+  RegI32 needRotate64Temp() {
+#if defined(JS_CODEGEN_X86)
+    return needI32();
+#elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) ||    \
+    defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
+    defined(JS_CODEGEN_MIPS64)
+    return RegI32::Invalid();
+#else
+    MOZ_CRASH("BaseCompiler platform hook: needRotate64Temp");
+#endif
+  }
+
+  void maskShiftCount32(RegI32 r) {
+#if defined(JS_CODEGEN_ARM)
+    masm.and32(Imm32(31), r);
+#endif
+  }
+
+  RegI32 needPopcnt32Temp() {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+    return AssemblerX86Shared::HasPOPCNT() ? RegI32::Invalid() : needI32();
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    return needI32();
+#else
+    MOZ_CRASH("BaseCompiler platform hook: needPopcnt32Temp");
+#endif
+  }
+
+  RegI32 needPopcnt64Temp() {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+    return AssemblerX86Shared::HasPOPCNT() ? RegI32::Invalid() : needI32();
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    return needI32();
+#else
+    MOZ_CRASH("BaseCompiler platform hook: needPopcnt64Temp");
+#endif
+  }
+
+  class OutOfLineTruncateCheckF32OrF64ToI32 : public OutOfLineCode {
+    AnyReg src;
+    RegI32 dest;
+    TruncFlags flags;
+    BytecodeOffset off;
+
+   public:
+    OutOfLineTruncateCheckF32OrF64ToI32(AnyReg src, RegI32 dest,
+                                        TruncFlags flags, BytecodeOffset off)
+        : src(src), dest(dest), flags(flags), off(off) {}
+
+    virtual void generate(MacroAssembler* masm) override {
+      if (src.tag == AnyReg::F32) {
+        masm->oolWasmTruncateCheckF32ToI32(src.f32(), dest, flags, off,
+                                           rejoin());
+      } else if (src.tag == AnyReg::F64) {
+        masm->oolWasmTruncateCheckF64ToI32(src.f64(), dest, flags, off,
+                                           rejoin());
+      } else {
+        MOZ_CRASH("unexpected type");
+      }
+    }
+  };
+
+  [[nodiscard]] bool truncateF32ToI32(RegF32 src, RegI32 dest,
+                                      TruncFlags flags) {
+    BytecodeOffset off = bytecodeOffset();
+    OutOfLineCode* ool =
+        addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI32(
+            AnyReg(src), dest, flags, off));
+    if (!ool) {
+      return false;
+    }
+    bool isSaturating = flags & TRUNC_SATURATING;
+    if (flags & TRUNC_UNSIGNED) {
+      masm.wasmTruncateFloat32ToUInt32(src, dest, isSaturating, ool->entry());
+    } else {
+      masm.wasmTruncateFloat32ToInt32(src, dest, isSaturating, ool->entry());
+    }
+    masm.bind(ool->rejoin());
+    return true;
+  }
+
+  [[nodiscard]] bool truncateF64ToI32(RegF64 src, RegI32 dest,
+                                      TruncFlags flags) {
+    BytecodeOffset off = bytecodeOffset();
+    OutOfLineCode* ool =
+        addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI32(
+            AnyReg(src), dest, flags, off));
+    if (!ool) {
+      return false;
+    }
+    bool isSaturating = flags & TRUNC_SATURATING;
+    if (flags & TRUNC_UNSIGNED) {
+      masm.wasmTruncateDoubleToUInt32(src, dest, isSaturating, ool->entry());
+    } else {
+      masm.wasmTruncateDoubleToInt32(src, dest, isSaturating, ool->entry());
+    }
+    masm.bind(ool->rejoin());
+    return true;
+  }
+
+  class OutOfLineTruncateCheckF32OrF64ToI64 : public OutOfLineCode {
+    AnyReg src;
+    RegI64 dest;
+    TruncFlags flags;
+    BytecodeOffset off;
+
+   public:
+    OutOfLineTruncateCheckF32OrF64ToI64(AnyReg src, RegI64 dest,
+                                        TruncFlags flags, BytecodeOffset off)
+        : src(src), dest(dest), flags(flags), off(off) {}
+
+    virtual void generate(MacroAssembler* masm) override {
+      if (src.tag == AnyReg::F32) {
+        masm->oolWasmTruncateCheckF32ToI64(src.f32(), dest, flags, off,
+                                           rejoin());
+      } else if (src.tag == AnyReg::F64) {
+        masm->oolWasmTruncateCheckF64ToI64(src.f64(), dest, flags, off,
+                                           rejoin());
+      } else {
+        MOZ_CRASH("unexpected type");
+      }
+    }
+  };
+
+#ifndef RABALDR_FLOAT_TO_I64_CALLOUT
+  [[nodiscard]] RegF64 needTempForFloatingToI64(TruncFlags flags) {
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+    if (flags & TRUNC_UNSIGNED) {
+      return needF64();
+    }
+#  endif
+    return RegF64::Invalid();
+  }
+
+  [[nodiscard]] bool truncateF32ToI64(RegF32 src, RegI64 dest, TruncFlags flags,
+                                      RegF64 temp) {
+    OutOfLineCode* ool =
+        addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64(
+            AnyReg(src), dest, flags, bytecodeOffset()));
+    if (!ool) {
+      return false;
+    }
+    bool isSaturating = flags & TRUNC_SATURATING;
+    if (flags & TRUNC_UNSIGNED) {
+      masm.wasmTruncateFloat32ToUInt64(src, dest, isSaturating, ool->entry(),
+                                       ool->rejoin(), temp);
+    } else {
+      masm.wasmTruncateFloat32ToInt64(src, dest, isSaturating, ool->entry(),
+                                      ool->rejoin(), temp);
+    }
+    return true;
+  }
+
+  [[nodiscard]] bool truncateF64ToI64(RegF64 src, RegI64 dest, TruncFlags flags,
+                                      RegF64 temp) {
+    OutOfLineCode* ool =
+        addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64(
+            AnyReg(src), dest, flags, bytecodeOffset()));
+    if (!ool) {
+      return false;
+    }
+    bool isSaturating = flags & TRUNC_SATURATING;
+    if (flags & TRUNC_UNSIGNED) {
+      masm.wasmTruncateDoubleToUInt64(src, dest, isSaturating, ool->entry(),
+                                      ool->rejoin(), temp);
+    } else {
+      masm.wasmTruncateDoubleToInt64(src, dest, isSaturating, ool->entry(),
+                                     ool->rejoin(), temp);
+    }
+    return true;
+  }
+#endif  // RABALDR_FLOAT_TO_I64_CALLOUT
+
+#ifndef RABALDR_I64_TO_FLOAT_CALLOUT
+  RegI32 needConvertI64ToFloatTemp(ValType to, bool isUnsigned) {
+    bool needs = false;
+    if (to == ValType::F64) {
+      needs = isUnsigned && masm.convertUInt64ToDoubleNeedsTemp();
+    } else {
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+      needs = true;
+#  endif
+    }
+    return needs ? needI32() : RegI32::Invalid();
+  }
+
+  void convertI64ToF32(RegI64 src, bool isUnsigned, RegF32 dest, RegI32 temp) {
+    if (isUnsigned) {
+      masm.convertUInt64ToFloat32(src, dest, temp);
+    } else {
+      masm.convertInt64ToFloat32(src, dest);
+    }
+  }
+
+  void convertI64ToF64(RegI64 src, bool isUnsigned, RegF64 dest, RegI32 temp) {
+    if (isUnsigned) {
+      masm.convertUInt64ToDouble(src, dest, temp);
+    } else {
+      masm.convertInt64ToDouble(src, dest);
+    }
+  }
+#endif  // RABALDR_I64_TO_FLOAT_CALLOUT
+
+  void cmp64Set(Assembler::Condition cond, RegI64 lhs, RegI64 rhs,
+                RegI32 dest) {
+#if defined(JS_PUNBOX64)
+    masm.cmpPtrSet(cond, lhs.reg, rhs.reg, dest);
+#elif defined(JS_CODEGEN_MIPS32)
+    masm.cmp64Set(cond, lhs, rhs, dest);
+#else
+    // TODO / OPTIMIZE (Bug 1316822): This is pretty branchy, we should be
+    // able to do better.
+    Label done, condTrue;
+    masm.branch64(cond, lhs, rhs, &condTrue);
+    moveImm32(0, dest);
+    masm.jump(&done);
+    masm.bind(&condTrue);
+    moveImm32(1, dest);
+    masm.bind(&done);
+#endif
+  }
+
+  void eqz64(RegI64 src, RegI32 dest) {
+#ifdef JS_PUNBOX64
+    masm.cmpPtrSet(Assembler::Equal, src.reg, ImmWord(0), dest);
+#else
+    masm.or32(src.high, src.low);
+    masm.cmp32Set(Assembler::Equal, src.low, Imm32(0), dest);
+#endif
+  }
+
+  [[nodiscard]] bool supportsRoundInstruction(RoundingMode mode) {
+    return Assembler::HasRoundInstruction(mode);
+  }
+
+  void roundF32(RoundingMode roundingMode, RegF32 f0) {
+    masm.nearbyIntFloat32(roundingMode, f0, f0);
+  }
+
+  void roundF64(RoundingMode roundingMode, RegF64 f0) {
+    masm.nearbyIntDouble(roundingMode, f0, f0);
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  //
+  // Global variable access.
+
+  Address addressOfGlobalVar(const GlobalDesc& global, RegI32 tmp) {
+    uint32_t globalToTlsOffset =
+        offsetof(TlsData, globalArea) + global.offset();
+    fr.loadTlsPtr(tmp);
+    if (global.isIndirect()) {
+      masm.loadPtr(Address(tmp, globalToTlsOffset), tmp);
+      return Address(tmp, 0);
+    }
+    return Address(tmp, globalToTlsOffset);
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  //
+  // Heap access.
+
+  void bceCheckLocal(MemoryAccessDesc* access, AccessCheck* check,
+                     uint32_t local) {
+    if (local >= sizeof(BCESet) * 8) {
+      return;
+    }
+
+    uint32_t offsetGuardLimit =
+        GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled());
+
+    if ((bceSafe_ & (BCESet(1) << local)) &&
+        access->offset() < offsetGuardLimit) {
+      check->omitBoundsCheck = true;
+    }
+
+    // The local becomes safe even if the offset is beyond the guard limit.
+    bceSafe_ |= (BCESet(1) << local);
+  }
+
+  void bceLocalIsUpdated(uint32_t local) {
+    if (local >= sizeof(BCESet) * 8) {
+      return;
+    }
+
+    bceSafe_ &= ~(BCESet(1) << local);
+  }
+
+  void prepareMemoryAccess(MemoryAccessDesc* access, AccessCheck* check,
+                           RegI32 tls, RegI32 ptr) {
+    uint32_t offsetGuardLimit =
+        GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled());
+
+    // Fold offset if necessary for further computations.
+    if (access->offset() >= offsetGuardLimit ||
+        (access->isAtomic() && !check->omitAlignmentCheck &&
+         !check->onlyPointerAlignment)) {
+      Label ok;
+      masm.branchAdd32(Assembler::CarryClear, Imm32(access->offset()), ptr,
+                       &ok);
+      masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+      masm.bind(&ok);
+      access->clearOffset();
+      check->onlyPointerAlignment = true;
+    }
+
+    // Alignment check if required.
+
+    if (access->isAtomic() && !check->omitAlignmentCheck) {
+      MOZ_ASSERT(check->onlyPointerAlignment);
+      // We only care about the low pointer bits here.
+      Label ok;
+      masm.branchTest32(Assembler::Zero, ptr, Imm32(access->byteSize() - 1),
+                        &ok);
+      masm.wasmTrap(Trap::UnalignedAccess, bytecodeOffset());
+      masm.bind(&ok);
+    }
+
+    // Ensure no tls if we don't need it.
+
+    if (moduleEnv_.hugeMemoryEnabled()) {
+      // We have HeapReg and no bounds checking and need load neither
+      // memoryBase nor boundsCheckLimit from tls.
+      MOZ_ASSERT_IF(check->omitBoundsCheck, tls.isInvalid());
+    }
+#ifdef JS_CODEGEN_ARM
+    // We have HeapReg on ARM and don't need to load the memoryBase from tls.
+    MOZ_ASSERT_IF(check->omitBoundsCheck, tls.isInvalid());
+#endif
+
+    // Bounds check if required.
+
+    if (!moduleEnv_.hugeMemoryEnabled() && !check->omitBoundsCheck) {
+      Label ok;
+      masm.wasmBoundsCheck32(
+          Assembler::Below, ptr,
+          Address(tls, offsetof(TlsData, boundsCheckLimit32)), &ok);
+      masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+      masm.bind(&ok);
+    }
+  }
+
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) ||      \
+    defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
+    defined(JS_CODEGEN_MIPS64)
+  BaseIndex prepareAtomicMemoryAccess(MemoryAccessDesc* access,
+                                      AccessCheck* check, RegI32 tls,
+                                      RegI32 ptr) {
+    MOZ_ASSERT(needTlsForAccess(*check) == tls.isValid());
+    prepareMemoryAccess(access, check, tls, ptr);
+    return BaseIndex(HeapReg, ptr, TimesOne, access->offset());
+  }
+#elif defined(JS_CODEGEN_X86)
+  // Some consumers depend on the address not retaining tls, as tls may be the
+  // scratch register.
+
+  Address prepareAtomicMemoryAccess(MemoryAccessDesc* access,
+                                    AccessCheck* check, RegI32 tls,
+                                    RegI32 ptr) {
+    MOZ_ASSERT(needTlsForAccess(*check) == tls.isValid());
+    prepareMemoryAccess(access, check, tls, ptr);
+    masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
+    return Address(ptr, access->offset());
+  }
+#else
+  Address prepareAtomicMemoryAccess(MemoryAccessDesc* access,
+                                    AccessCheck* check, RegI32 tls,
+                                    RegI32 ptr) {
+    MOZ_CRASH("BaseCompiler platform hook: prepareAtomicMemoryAccess");
+  }
+#endif
+
+  void computeEffectiveAddress(MemoryAccessDesc* access) {
+    if (access->offset()) {
+      Label ok;
+      RegI32 ptr = popI32();
+      masm.branchAdd32(Assembler::CarryClear, Imm32(access->offset()), ptr,
+                       &ok);
+      masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+      masm.bind(&ok);
+      access->clearOffset();
+      pushI32(ptr);
+    }
+  }
+
+  void needLoadTemps(const MemoryAccessDesc& access, RegI32* temp1,
+                     RegI32* temp2, RegI32* temp3) {
+#if defined(JS_CODEGEN_ARM)
+    if (IsUnaligned(access)) {
+      switch (access.type()) {
+        case Scalar::Float64:
+          *temp3 = needI32();
+          [[fallthrough]];
+        case Scalar::Float32:
+          *temp2 = needI32();
+          [[fallthrough]];
+        default:
+          *temp1 = needI32();
+          break;
+      }
+    }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    *temp1 = needI32();
+#endif
+  }
+
+  [[nodiscard]] bool needTlsForAccess(const AccessCheck& check) {
+#if defined(JS_CODEGEN_X86)
+    // x86 requires Tls for memory base
+    return true;
+#else
+    return !moduleEnv_.hugeMemoryEnabled() && !check.omitBoundsCheck;
+#endif
+  }
+
+  // ptr and dest may be the same iff dest is I32.
+  // This may destroy ptr even if ptr and dest are not the same.
+  [[nodiscard]] bool load(MemoryAccessDesc* access, AccessCheck* check,
+                          RegI32 tls, RegI32 ptr, AnyReg dest, RegI32 temp1,
+                          RegI32 temp2, RegI32 temp3) {
+    prepareMemoryAccess(access, check, tls, ptr);
+
+#if defined(JS_CODEGEN_X64)
+    Operand srcAddr(HeapReg, ptr, TimesOne, access->offset());
+
+    if (dest.tag == AnyReg::I64) {
+      masm.wasmLoadI64(*access, srcAddr, dest.i64());
+    } else {
+      masm.wasmLoad(*access, srcAddr, dest.any());
+    }
+#elif defined(JS_CODEGEN_X86)
+    masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
+    Operand srcAddr(ptr, access->offset());
+
+    if (dest.tag == AnyReg::I64) {
+      MOZ_ASSERT(dest.i64() == specific_.abiReturnRegI64);
+      masm.wasmLoadI64(*access, srcAddr, dest.i64());
+    } else {
+      // For 8 bit loads, this will generate movsbl or movzbl, so
+      // there's no constraint on what the output register may be.
+      masm.wasmLoad(*access, srcAddr, dest.any());
+    }
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32) || \
+    defined(JS_CODEGEN_MIPS64)
+    if (IsUnaligned(*access)) {
+      switch (dest.tag) {
+        case AnyReg::I64:
+          masm.wasmUnalignedLoadI64(*access, HeapReg, ptr, ptr, dest.i64(),
+                                    temp1);
+          break;
+        case AnyReg::F32:
+          masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f32(),
+                                   temp1, temp2, RegI32::Invalid());
+          break;
+        case AnyReg::F64:
+          masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f64(),
+                                   temp1, temp2, temp3);
+          break;
+        case AnyReg::I32:
+          masm.wasmUnalignedLoad(*access, HeapReg, ptr, ptr, dest.i32(), temp1);
+          break;
+        default:
+          MOZ_CRASH("Unexpected type");
+      }
+    } else {
+      if (dest.tag == AnyReg::I64) {
+        masm.wasmLoadI64(*access, HeapReg, ptr, ptr, dest.i64());
+      } else {
+        masm.wasmLoad(*access, HeapReg, ptr, ptr, dest.any());
+      }
+    }
+#elif defined(JS_CODEGEN_ARM64)
+    if (dest.tag == AnyReg::I64) {
+      masm.wasmLoadI64(*access, HeapReg, ptr, dest.i64());
+    } else {
+      masm.wasmLoad(*access, HeapReg, ptr, dest.any());
+    }
+#else
+    MOZ_CRASH("BaseCompiler platform hook: load");
+#endif
+
+    return true;
+  }
+
+  RegI32 needStoreTemp(const MemoryAccessDesc& access, ValType srcType) {
+#if defined(JS_CODEGEN_ARM)
+    if (IsUnaligned(access) && srcType != ValType::I32) {
+      return needI32();
+    }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    return needI32();
+#endif
+    return RegI32::Invalid();
+  }
+
+  // ptr and src must not be the same register.
+  // This may destroy ptr and src.
+  [[nodiscard]] bool store(MemoryAccessDesc* access, AccessCheck* check,
+                           RegI32 tls, RegI32 ptr, AnyReg src, RegI32 temp) {
+    prepareMemoryAccess(access, check, tls, ptr);
+
+    // Emit the store
+#if defined(JS_CODEGEN_X64)
+    MOZ_ASSERT(temp.isInvalid());
+    Operand dstAddr(HeapReg, ptr, TimesOne, access->offset());
+
+    masm.wasmStore(*access, src.any(), dstAddr);
+#elif defined(JS_CODEGEN_X86)
+    MOZ_ASSERT(temp.isInvalid());
+    masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
+    Operand dstAddr(ptr, access->offset());
+
+    if (access->type() == Scalar::Int64) {
+      masm.wasmStoreI64(*access, src.i64(), dstAddr);
+    } else {
+      AnyRegister value;
+      ScratchI8 scratch(*this);
+      if (src.tag == AnyReg::I64) {
+        if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) {
+          masm.mov(src.i64().low, scratch);
+          value = AnyRegister(scratch);
+        } else {
+          value = AnyRegister(src.i64().low);
+        }
+      } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) {
+        masm.mov(src.i32(), scratch);
+        value = AnyRegister(scratch);
+      } else {
+        value = src.any();
+      }
+
+      masm.wasmStore(*access, value, dstAddr);
+    }
+#elif defined(JS_CODEGEN_ARM)
+    if (IsUnaligned(*access)) {
+      switch (src.tag) {
+        case AnyReg::I64:
+          masm.wasmUnalignedStoreI64(*access, src.i64(), HeapReg, ptr, ptr,
+                                     temp);
+          break;
+        case AnyReg::F32:
+          masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr,
+                                    temp);
+          break;
+        case AnyReg::F64:
+          masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr,
+                                    temp);
+          break;
+        case AnyReg::I32:
+          MOZ_ASSERT(temp.isInvalid());
+          masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr, temp);
+          break;
+        default:
+          MOZ_CRASH("Unexpected type");
+      }
+    } else {
+      MOZ_ASSERT(temp.isInvalid());
+      if (access->type() == Scalar::Int64) {
+        masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr);
+      } else if (src.tag == AnyReg::I64) {
+        masm.wasmStore(*access, AnyRegister(src.i64().low), HeapReg, ptr, ptr);
+      } else {
+        masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr);
+      }
+    }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    if (IsUnaligned(*access)) {
+      switch (src.tag) {
+        case AnyReg::I64:
+          masm.wasmUnalignedStoreI64(*access, src.i64(), HeapReg, ptr, ptr,
+                                     temp);
+          break;
+        case AnyReg::F32:
+          masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr,
+                                    temp);
+          break;
+        case AnyReg::F64:
+          masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr,
+                                    temp);
+          break;
+        case AnyReg::I32:
+          masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr, temp);
+          break;
+        default:
+          MOZ_CRASH("Unexpected type");
+      }
+    } else {
+      if (src.tag == AnyReg::I64) {
+        masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr);
+      } else {
+        masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr);
+      }
+    }
+#elif defined(JS_CODEGEN_ARM64)
+    MOZ_ASSERT(temp.isInvalid());
+    if (access->type() == Scalar::Int64) {
+      masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr);
+    } else {
+      masm.wasmStore(*access, src.any(), HeapReg, ptr);
+    }
+#else
+    MOZ_CRASH("BaseCompiler platform hook: store");
+#endif
+
+    return true;
+  }
+
+  template <size_t Count>
+  struct Atomic32Temps : mozilla::Array<RegI32, Count> {
+    // Allocate all temp registers if 'allocate' is not specified.
+    void allocate(BaseCompiler* bc, size_t allocate = Count) {
+      static_assert(Count != 0);
+      for (size_t i = 0; i < allocate; ++i) {
+        this->operator[](i) = bc->needI32();
+      }
+    }
+    void maybeFree(BaseCompiler* bc) {
+      for (size_t i = 0; i < Count; ++i) {
+        bc->maybeFreeI32(this->operator[](i));
+      }
+    }
+  };
+
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+  using AtomicRMW32Temps = Atomic32Temps<3>;
+#else
+  using AtomicRMW32Temps = Atomic32Temps<1>;
+#endif
+
+  template <typename T>
+  void atomicRMW32(const MemoryAccessDesc& access, T srcAddr, AtomicOp op,
+                   RegI32 rv, RegI32 rd, const AtomicRMW32Temps& temps) {
+    switch (access.type()) {
+      case Scalar::Uint8:
+#ifdef JS_CODEGEN_X86
+      {
+        RegI32 temp = temps[0];
+        // The temp, if used, must be a byte register.
+        MOZ_ASSERT(temp.isInvalid());
+        ScratchI8 scratch(*this);
+        if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) {
+          temp = scratch;
+        }
+        masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temp, rd);
+        break;
+      }
+#endif
+      case Scalar::Uint16:
+      case Scalar::Int32:
+      case Scalar::Uint32:
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+        masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps[0], temps[1],
+                               temps[2], rd);
+#else
+        masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps[0], rd);
+#endif
+        break;
+      default: {
+        MOZ_CRASH("Bad type for atomic operation");
+      }
+    }
+  }
+
+  // On x86, V is Address.  On other platforms, it is Register64.
+  // T is BaseIndex or Address.
+  template <typename T, typename V>
+  void atomicRMW64(const MemoryAccessDesc& access, const T& srcAddr,
+                   AtomicOp op, V value, Register64 temp, Register64 rd) {
+    masm.wasmAtomicFetchOp64(access, op, value, srcAddr, temp, rd);
+  }
+
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+  using AtomicCmpXchg32Temps = Atomic32Temps<3>;
+#else
+  using AtomicCmpXchg32Temps = Atomic32Temps<0>;
+#endif
+
+  template <typename T>
+  void atomicCmpXchg32(const MemoryAccessDesc& access, T srcAddr,
+                       RegI32 rexpect, RegI32 rnew, RegI32 rd,
+                       const AtomicCmpXchg32Temps& temps) {
+    switch (access.type()) {
+      case Scalar::Uint8:
+#if defined(JS_CODEGEN_X86)
+      {
+        ScratchI8 scratch(*this);
+        MOZ_ASSERT(rd == specific_.eax);
+        if (!ra.isSingleByteI32(rnew)) {
+          // The replacement value must have a byte persona.
+          masm.movl(rnew, scratch);
+          rnew = scratch;
+        }
+        masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd);
+        break;
+      }
+#endif
+      case Scalar::Uint16:
+      case Scalar::Int32:
+      case Scalar::Uint32:
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+        masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, temps[0],
+                                 temps[1], temps[2], rd);
+#else
+        masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd);
+#endif
+        break;
+      default:
+        MOZ_CRASH("Bad type for atomic operation");
+    }
+  }
+
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+  using AtomicXchg32Temps = Atomic32Temps<3>;
+#else
+  using AtomicXchg32Temps = Atomic32Temps<0>;
+#endif
+
+  template <typename T>
+  void atomicXchg32(const MemoryAccessDesc& access, T srcAddr, RegI32 rv,
+                    RegI32 rd, const AtomicXchg32Temps& temps) {
+    switch (access.type()) {
+      case Scalar::Uint8:
+#if defined(JS_CODEGEN_X86)
+      {
+        if (!ra.isSingleByteI32(rd)) {
+          ScratchI8 scratch(*this);
+          // The output register must have a byte persona.
+          masm.wasmAtomicExchange(access, srcAddr, rv, scratch);
+          masm.movl(scratch, rd);
+        } else {
+          masm.wasmAtomicExchange(access, srcAddr, rv, rd);
+        }
+        break;
+      }
+#endif
+      case Scalar::Uint16:
+      case Scalar::Int32:
+      case Scalar::Uint32:
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+        masm.wasmAtomicExchange(access, srcAddr, rv, temps[0], temps[1],
+                                temps[2], rd);
+#else
+        masm.wasmAtomicExchange(access, srcAddr, rv, rd);
+#endif
+        break;
+      default:
+        MOZ_CRASH("Bad type for atomic operation");
+    }
+  }
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Generally speaking, ABOVE this point there should be no
+  // value stack manipulation (calls to popI32 etc).
+  //
+  ////////////////////////////////////////////////////////////
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Platform-specific popping and register targeting.
+  //
+  // These fall into two groups, popping methods for simple needs, and RAII
+  // wrappers for more complex behavior.
+
+  // The simple popping methods pop values into targeted registers; the caller
+  // can free registers using standard functions.  These are always called
+  // popXForY where X says something about types and Y something about the
+  // operation being targeted.
+
+  void pop2xI32ForMulDivI32(RegI32* r0, RegI32* r1, RegI32* reserved) {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+    // r0 must be eax, and edx will be clobbered.
+    need2xI32(specific_.eax, specific_.edx);
+    *r1 = popI32();
+    *r0 = popI32ToSpecific(specific_.eax);
+    *reserved = specific_.edx;
+#else
+    pop2xI32(r0, r1);
+#endif
+  }
+
+  void pop2xI64ForMulI64(RegI64* r0, RegI64* r1, RegI32* temp,
+                         RegI64* reserved) {
+#if defined(JS_CODEGEN_X64)
+    // r0 must be rax, and rdx will be clobbered.
+    need2xI64(specific_.rax, specific_.rdx);
+    *r1 = popI64();
+    *r0 = popI64ToSpecific(specific_.rax);
+    *reserved = specific_.rdx;
+#elif defined(JS_CODEGEN_X86)
+    // As for x64, though edx is part of r0.
+    need2xI32(specific_.eax, specific_.edx);
+    *r1 = popI64();
+    *r0 = popI64ToSpecific(specific_.edx_eax);
+    *temp = needI32();
+#elif defined(JS_CODEGEN_MIPS64)
+    pop2xI64(r0, r1);
+#elif defined(JS_CODEGEN_MIPS32)
+    pop2xI64(r0, r1);
+    *temp = needI32();
+#elif defined(JS_CODEGEN_ARM)
+    pop2xI64(r0, r1);
+    *temp = needI32();
+#elif defined(JS_CODEGEN_ARM64)
+    pop2xI64(r0, r1);
+#else
+    MOZ_CRASH("BaseCompiler porting interface: pop2xI64ForMulI64");
+#endif
+  }
+
+  void pop2xI64ForDivI64(RegI64* r0, RegI64* r1, RegI64* reserved) {
+#if defined(JS_CODEGEN_X64)
+    // r0 must be rax, and rdx will be clobbered.
+    need2xI64(specific_.rax, specific_.rdx);
+    *r1 = popI64();
+    *r0 = popI64ToSpecific(specific_.rax);
+    *reserved = specific_.rdx;
+#else
+    pop2xI64(r0, r1);
+#endif
+  }
+
+  void pop2xI32ForShift(RegI32* r0, RegI32* r1) {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+    // r1 must be ecx for a variable shift, unless BMI2 is available.
+    if (!Assembler::HasBMI2()) {
+      *r1 = popI32(specific_.ecx);
+      *r0 = popI32();
+      return;
+    }
+#endif
+    pop2xI32(r0, r1);
+  }
+
+  void pop2xI64ForShift(RegI64* r0, RegI64* r1) {
+#if defined(JS_CODEGEN_X86)
+    // r1 must be ecx for a variable shift.
+    needI32(specific_.ecx);
+    *r1 = popI64ToSpecific(widenI32(specific_.ecx));
+    *r0 = popI64();
+#else
+#  if defined(JS_CODEGEN_X64)
+    // r1 must be rcx for a variable shift, unless BMI2 is available.
+    if (!Assembler::HasBMI2()) {
+      needI64(specific_.rcx);
+      *r1 = popI64ToSpecific(specific_.rcx);
+      *r0 = popI64();
+      return;
+    }
+#  endif
+    pop2xI64(r0, r1);
+#endif
+  }
+
+  void pop2xI32ForRotate(RegI32* r0, RegI32* r1) {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+    // r1 must be ecx for a variable rotate.
+    *r1 = popI32(specific_.ecx);
+    *r0 = popI32();
+#else
+    pop2xI32(r0, r1);
+#endif
+  }
+
+  void pop2xI64ForRotate(RegI64* r0, RegI64* r1) {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+    // r1 must be ecx for a variable rotate.
+    needI32(specific_.ecx);
+    *r1 = popI64ToSpecific(widenI32(specific_.ecx));
+    *r0 = popI64();
+#else
+    pop2xI64(r0, r1);
+#endif
+  }
+
+  void popI32ForSignExtendI64(RegI64* r0) {
+#if defined(JS_CODEGEN_X86)
+    // r0 must be edx:eax for cdq
+    need2xI32(specific_.edx, specific_.eax);
+    *r0 = specific_.edx_eax;
+    popI32ToSpecific(specific_.eax);
+#else
+    *r0 = widenI32(popI32());
+#endif
+  }
+
+  void popI64ForSignExtendI64(RegI64* r0) {
+#if defined(JS_CODEGEN_X86)
+    // r0 must be edx:eax for cdq
+    need2xI32(specific_.edx, specific_.eax);
+    // Low on top, high underneath
+    *r0 = popI64ToSpecific(specific_.edx_eax);
+#else
+    *r0 = popI64();
+#endif
+  }
+
+  // The RAII wrappers are used because we sometimes have to free partial
+  // registers, as when part of a register is the scratch register that has
+  // been temporarily used, or not free a register at all, as when the
+  // register is the same as the destination register (but only on some
+  // platforms, not on all).  These are called PopX{32,64}Regs where X is the
+  // operation being targeted.
+
+  // Utility struct that holds the BaseCompiler and the destination, and frees
+  // the destination if it has not been extracted.
+
+  template <typename T>
+  class PopBase {
+    T rd_;
+
+    void maybeFree(RegI32 r) { bc->maybeFreeI32(r); }
+    void maybeFree(RegI64 r) { bc->maybeFreeI64(r); }
+
+   protected:
+    BaseCompiler* const bc;
+
+    void setRd(T r) {
+      MOZ_ASSERT(rd_.isInvalid());
+      rd_ = r;
+    }
+    T getRd() const {
+      MOZ_ASSERT(rd_.isValid());
+      return rd_;
+    }
+
+   public:
+    explicit PopBase(BaseCompiler* bc) : bc(bc) {}
+    ~PopBase() { maybeFree(rd_); }
+
+    // Take and clear the Rd - use this when pushing Rd.
+    T takeRd() {
+      MOZ_ASSERT(rd_.isValid());
+      T r = rd_;
+      rd_ = T::Invalid();
+      return r;
+    }
+  };
+
+  friend class PopAtomicCmpXchg32Regs;
+  class PopAtomicCmpXchg32Regs : public PopBase<RegI32> {
+    using Base = PopBase<RegI32>;
+    RegI32 rexpect, rnew;
+    AtomicCmpXchg32Temps temps;
+
+   public:
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+    explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type,
+                                    Scalar::Type viewType)
+        : Base(bc) {
+      // For cmpxchg, the expected value and the result are both in eax.
+      bc->needI32(bc->specific_.eax);
+      if (type == ValType::I64) {
+        rnew = bc->popI64ToI32();
+        rexpect = bc->popI64ToSpecificI32(bc->specific_.eax);
+      } else {
+        rnew = bc->popI32();
+        rexpect = bc->popI32ToSpecific(bc->specific_.eax);
+      }
+      setRd(rexpect);
+    }
+    ~PopAtomicCmpXchg32Regs() { bc->freeI32(rnew); }
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+    explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type,
+                                    Scalar::Type viewType)
+        : Base(bc) {
+      if (type == ValType::I64) {
+        rnew = bc->popI64ToI32();
+        rexpect = bc->popI64ToI32();
+      } else {
+        rnew = bc->popI32();
+        rexpect = bc->popI32();
+      }
+      setRd(bc->needI32());
+    }
+    ~PopAtomicCmpXchg32Regs() {
+      bc->freeI32(rnew);
+      bc->freeI32(rexpect);
+    }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type,
+                                    Scalar::Type viewType)
+        : Base(bc) {
+      if (type == ValType::I64) {
+        rnew = bc->popI64ToI32();
+        rexpect = bc->popI64ToI32();
+      } else {
+        rnew = bc->popI32();
+        rexpect = bc->popI32();
+      }
+      if (Scalar::byteSize(viewType) < 4) {
+        temps.allocate(bc);
+      }
+      setRd(bc->needI32());
+    }
+    ~PopAtomicCmpXchg32Regs() {
+      bc->freeI32(rnew);
+      bc->freeI32(rexpect);
+      temps.maybeFree(bc);
+    }
+#else
+    explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type,
+                                    Scalar::Type viewType)
+        : Base(bc) {
+      MOZ_CRASH("BaseCompiler porting interface: PopAtomicCmpXchg32Regs");
+    }
+#endif
+
+    template <typename T>
+    void atomicCmpXchg32(const MemoryAccessDesc& access, T srcAddr) {
+      bc->atomicCmpXchg32(access, srcAddr, rexpect, rnew, getRd(), temps);
+    }
+  };
+
+  friend class PopAtomicCmpXchg64Regs;
+  class PopAtomicCmpXchg64Regs : public PopBase<RegI64> {
+    using Base = PopBase<RegI64>;
+    RegI64 rexpect, rnew;
+
+   public:
+#ifdef JS_CODEGEN_X64
+    explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      // For cmpxchg, the expected value and the result are both in rax.
+      bc->needI64(bc->specific_.rax);
+      rnew = bc->popI64();
+      rexpect = bc->popI64ToSpecific(bc->specific_.rax);
+      setRd(rexpect);
+    }
+    ~PopAtomicCmpXchg64Regs() { bc->freeI64(rnew); }
+#elif defined(JS_CODEGEN_X86)
+    explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      // For cmpxchg8b, the expected value and the result are both in
+      // edx:eax, and the replacement value is in ecx:ebx.  But we can't
+      // allocate ebx here, so instead we allocate a temp to hold the low
+      // word of 'new'.
+      bc->needI64(bc->specific_.edx_eax);
+      bc->needI32(bc->specific_.ecx);
+
+      rnew = bc->popI64ToSpecific(
+          RegI64(Register64(bc->specific_.ecx, bc->needI32())));
+      rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax);
+      setRd(rexpect);
+    }
+    ~PopAtomicCmpXchg64Regs() { bc->freeI64(rnew); }
+#elif defined(JS_CODEGEN_ARM)
+    explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      // The replacement value and the result must both be odd/even pairs.
+      rnew = bc->popI64Pair();
+      rexpect = bc->popI64();
+      setRd(bc->needI64Pair());
+    }
+    ~PopAtomicCmpXchg64Regs() {
+      bc->freeI64(rexpect);
+      bc->freeI64(rnew);
+    }
+#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
+    defined(JS_CODEGEN_MIPS64)
+    explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      rnew = bc->popI64();
+      rexpect = bc->popI64();
+      setRd(bc->needI64());
+    }
+    ~PopAtomicCmpXchg64Regs() {
+      bc->freeI64(rexpect);
+      bc->freeI64(rnew);
+    }
+#else
+    explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      MOZ_CRASH("BaseCompiler porting interface: PopAtomicCmpXchg64Regs");
+    }
+#endif
+
+#ifdef JS_CODEGEN_X86
+    template <typename T>
+    void atomicCmpXchg64(const MemoryAccessDesc& access, T srcAddr,
+                         RegI32 ebx) {
+      MOZ_ASSERT(ebx == js::jit::ebx);
+      bc->masm.move32(rnew.low, ebx);
+      bc->masm.wasmCompareExchange64(access, srcAddr, rexpect,
+                                     bc->specific_.ecx_ebx, getRd());
+    }
+#else
+    template <typename T>
+    void atomicCmpXchg64(const MemoryAccessDesc& access, T srcAddr) {
+      bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, getRd());
+    }
+#endif
+  };
+
+#ifndef JS_64BIT
+  class PopAtomicLoad64Regs : public PopBase<RegI64> {
+    using Base = PopBase<RegI64>;
+
+   public:
+#  if defined(JS_CODEGEN_X86)
+    explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) {
+      // The result is in edx:eax, and we need ecx:ebx as a temp.  But we
+      // can't reserve ebx yet, so we'll accept it as an argument to the
+      // operation (below).
+      bc->needI32(bc->specific_.ecx);
+      bc->needI64(bc->specific_.edx_eax);
+      setRd(bc->specific_.edx_eax);
+    }
+    ~PopAtomicLoad64Regs() { bc->freeI32(bc->specific_.ecx); }
+#  elif defined(JS_CODEGEN_ARM)
+    explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) {
+      setRd(bc->needI64Pair());
+    }
+#  elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) {
+      setRd(bc->needI64());
+    }
+#  else
+    explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) {
+      MOZ_CRASH("BaseCompiler porting interface: PopAtomicLoad64Regs");
+    }
+#  endif
+
+#  ifdef JS_CODEGEN_X86
+    template <typename T>
+    void atomicLoad64(const MemoryAccessDesc& access, T srcAddr, RegI32 ebx) {
+      MOZ_ASSERT(ebx == js::jit::ebx);
+      bc->masm.wasmAtomicLoad64(access, srcAddr, bc->specific_.ecx_ebx,
+                                getRd());
+    }
+#  else  // ARM, MIPS32
+    template <typename T>
+    void atomicLoad64(const MemoryAccessDesc& access, T srcAddr) {
+      bc->masm.wasmAtomicLoad64(access, srcAddr, RegI64::Invalid(), getRd());
+    }
+#  endif
+  };
+#endif  // JS_64BIT
+
+  friend class PopAtomicRMW32Regs;
+  class PopAtomicRMW32Regs : public PopBase<RegI32> {
+    using Base = PopBase<RegI32>;
+    RegI32 rv;
+    AtomicRMW32Temps temps;
+
+   public:
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+    explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type,
+                                Scalar::Type viewType, AtomicOp op)
+        : Base(bc) {
+      bc->needI32(bc->specific_.eax);
+      if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) {
+        // We use xadd, so source and destination are the same.  Using
+        // eax here is overconstraining, but for byte operations on x86
+        // we do need something with a byte register.
+        if (type == ValType::I64) {
+          rv = bc->popI64ToSpecificI32(bc->specific_.eax);
+        } else {
+          rv = bc->popI32ToSpecific(bc->specific_.eax);
+        }
+        setRd(rv);
+      } else {
+        // We use a cmpxchg loop.  The output must be eax; the input
+        // must be in a separate register since it may be used several
+        // times.
+        if (type == ValType::I64) {
+          rv = bc->popI64ToI32();
+        } else {
+          rv = bc->popI32();
+        }
+        setRd(bc->specific_.eax);
+#  if defined(JS_CODEGEN_X86)
+        // Single-byte is a special case handled very locally with
+        // ScratchReg, see atomicRMW32 above.
+        if (Scalar::byteSize(viewType) > 1) {
+          temps.allocate(bc);
+        }
+#  else
+        temps.allocate(bc);
+#  endif
+      }
+    }
+    ~PopAtomicRMW32Regs() {
+      if (rv != bc->specific_.eax) {
+        bc->freeI32(rv);
+      }
+      temps.maybeFree(bc);
+    }
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+    explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type,
+                                Scalar::Type viewType, AtomicOp op)
+        : Base(bc) {
+      rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
+      temps.allocate(bc);
+      setRd(bc->needI32());
+    }
+    ~PopAtomicRMW32Regs() {
+      bc->freeI32(rv);
+      temps.maybeFree(bc);
+    }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type,
+                                Scalar::Type viewType, AtomicOp op)
+        : Base(bc) {
+      rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
+      if (Scalar::byteSize(viewType) < 4) {
+        temps.allocate(bc);
+      }
+
+      setRd(bc->needI32());
+    }
+    ~PopAtomicRMW32Regs() {
+      bc->freeI32(rv);
+      temps.maybeFree(bc);
+    }
+#else
+    explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type,
+                                Scalar::Type viewType, AtomicOp op)
+        : Base(bc) {
+      MOZ_CRASH("BaseCompiler porting interface: PopAtomicRMW32Regs");
+    }
+#endif
+
+    template <typename T>
+    void atomicRMW32(const MemoryAccessDesc& access, T srcAddr, AtomicOp op) {
+      bc->atomicRMW32(access, srcAddr, op, rv, getRd(), temps);
+    }
+  };
+
+  friend class PopAtomicRMW64Regs;
+  class PopAtomicRMW64Regs : public PopBase<RegI64> {
+    using Base = PopBase<RegI64>;
+#if defined(JS_CODEGEN_X64)
+    AtomicOp op;
+#endif
+    RegI64 rv, temp;
+
+   public:
+#if defined(JS_CODEGEN_X64)
+    explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp op)
+        : Base(bc), op(op) {
+      if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) {
+        // We use xaddq, so input and output must be the same register.
+        rv = bc->popI64();
+        setRd(rv);
+      } else {
+        // We use a cmpxchgq loop, so the output must be rax.
+        bc->needI64(bc->specific_.rax);
+        rv = bc->popI64();
+        temp = bc->needI64();
+        setRd(bc->specific_.rax);
+      }
+    }
+    ~PopAtomicRMW64Regs() {
+      bc->maybeFreeI64(temp);
+      if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) {
+        bc->freeI64(rv);
+      }
+    }
+#elif defined(JS_CODEGEN_X86)
+    // We'll use cmpxchg8b, so rv must be in ecx:ebx, and rd must be
+    // edx:eax.  But we can't reserve ebx here because we need it later, so
+    // use a separate temp and set up ebx when we perform the operation.
+    explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) {
+      bc->needI32(bc->specific_.ecx);
+      bc->needI64(bc->specific_.edx_eax);
+
+      temp = RegI64(Register64(bc->specific_.ecx, bc->needI32()));
+      bc->popI64ToSpecific(temp);
+
+      setRd(bc->specific_.edx_eax);
+    }
+    ~PopAtomicRMW64Regs() { bc->freeI64(temp); }
+    RegI32 valueHigh() const { return RegI32(temp.high); }
+    RegI32 valueLow() const { return RegI32(temp.low); }
+#elif defined(JS_CODEGEN_ARM)
+    explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) {
+      // We use a ldrex/strexd loop so the temp and the output must be
+      // odd/even pairs.
+      rv = bc->popI64();
+      temp = bc->needI64Pair();
+      setRd(bc->needI64Pair());
+    }
+    ~PopAtomicRMW64Regs() {
+      bc->freeI64(rv);
+      bc->freeI64(temp);
+    }
+#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
+    defined(JS_CODEGEN_MIPS64)
+    explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) {
+      rv = bc->popI64();
+      temp = bc->needI64();
+      setRd(bc->needI64());
+    }
+    ~PopAtomicRMW64Regs() {
+      bc->freeI64(rv);
+      bc->freeI64(temp);
+    }
+#else
+    explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) {
+      MOZ_CRASH("BaseCompiler porting interface: PopAtomicRMW64Regs");
+    }
+#endif
+
+#ifdef JS_CODEGEN_X86
+    template <typename T, typename V>
+    void atomicRMW64(const MemoryAccessDesc& access, T srcAddr, AtomicOp op,
+                     const V& value, RegI32 ebx) {
+      MOZ_ASSERT(ebx == js::jit::ebx);
+      bc->atomicRMW64(access, srcAddr, op, value, bc->specific_.ecx_ebx,
+                      getRd());
+    }
+#else
+    template <typename T>
+    void atomicRMW64(const MemoryAccessDesc& access, T srcAddr, AtomicOp op) {
+      bc->atomicRMW64(access, srcAddr, op, rv, temp, getRd());
+    }
+#endif
+  };
+
+  friend class PopAtomicXchg32Regs;
+  class PopAtomicXchg32Regs : public PopBase<RegI32> {
+    using Base = PopBase<RegI32>;
+    RegI32 rv;
+    AtomicXchg32Temps temps;
+
+   public:
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+    explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type,
+                                 Scalar::Type viewType)
+        : Base(bc) {
+      // The xchg instruction reuses rv as rd.
+      rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+      setRd(rv);
+    }
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+    explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type,
+                                 Scalar::Type viewType)
+        : Base(bc) {
+      rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+      setRd(bc->needI32());
+    }
+    ~PopAtomicXchg32Regs() { bc->freeI32(rv); }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type,
+                                 Scalar::Type viewType)
+        : Base(bc) {
+      rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+      if (Scalar::byteSize(viewType) < 4) {
+        temps.allocate(bc);
+      }
+      setRd(bc->needI32());
+    }
+    ~PopAtomicXchg32Regs() {
+      temps.maybeFree(bc);
+      bc->freeI32(rv);
+    }
+#else
+    explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type,
+                                 Scalar::Type viewType)
+        : Base(bc) {
+      MOZ_CRASH("BaseCompiler porting interface: PopAtomicXchg32Regs");
+    }
+#endif
+
+    template <typename T>
+    void atomicXchg32(const MemoryAccessDesc& access, T srcAddr) {
+      bc->atomicXchg32(access, srcAddr, rv, getRd(), temps);
+    }
+  };
+
+  friend class PopAtomicXchg64Regs;
+  class PopAtomicXchg64Regs : public PopBase<RegI64> {
+    using Base = PopBase<RegI64>;
+    RegI64 rv;
+
+   public:
+#if defined(JS_CODEGEN_X64)
+    explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      rv = bc->popI64();
+      setRd(rv);
+    }
+#elif defined(JS_CODEGEN_ARM64)
+    explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      rv = bc->popI64();
+      setRd(bc->needI64());
+    }
+    ~PopAtomicXchg64Regs() { bc->freeI64(rv); }
+#elif defined(JS_CODEGEN_X86)
+    // We'll use cmpxchg8b, so rv must be in ecx:ebx, and rd must be
+    // edx:eax.  But we can't reserve ebx here because we need it later, so
+    // use a separate temp and set up ebx when we perform the operation.
+    explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      bc->needI32(bc->specific_.ecx);
+      bc->needI64(bc->specific_.edx_eax);
+
+      rv = RegI64(Register64(bc->specific_.ecx, bc->needI32()));
+      bc->popI64ToSpecific(rv);
+
+      setRd(bc->specific_.edx_eax);
+    }
+    ~PopAtomicXchg64Regs() { bc->freeI64(rv); }
+#elif defined(JS_CODEGEN_ARM)
+    // Both rv and rd must be odd/even pairs.
+    explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      rv = bc->popI64ToSpecific(bc->needI64Pair());
+      setRd(bc->needI64Pair());
+    }
+    ~PopAtomicXchg64Regs() { bc->freeI64(rv); }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+    explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      rv = bc->popI64ToSpecific(bc->needI64());
+      setRd(bc->needI64());
+    }
+    ~PopAtomicXchg64Regs() { bc->freeI64(rv); }
+#else
+    explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+      MOZ_CRASH("BaseCompiler porting interface: xchg64");
+    }
+#endif
+
+#ifdef JS_CODEGEN_X86
+    template <typename T>
+    void atomicXchg64(const MemoryAccessDesc& access, T srcAddr,
+                      RegI32 ebx) const {
+      MOZ_ASSERT(ebx == js::jit::ebx);
+      bc->masm.move32(rv.low, ebx);
+      bc->masm.wasmAtomicExchange64(access, srcAddr, bc->specific_.ecx_ebx,
+                                    getRd());
+    }
+#else
+    template <typename T>
+    void atomicXchg64(const MemoryAccessDesc& access, T srcAddr) const {
+      bc->masm.wasmAtomicExchange64(access, srcAddr, rv, getRd());
+    }
+#endif
+  };
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Generally speaking, BELOW this point there should be no
+  // platform dependencies.  We make very occasional exceptions
+  // when it doesn't become messy and further abstraction is
+  // not desirable.
+  //
+  ////////////////////////////////////////////////////////////
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Sundry wrappers.
+
+  void pop2xI32(RegI32* r0, RegI32* r1) {
+    *r1 = popI32();
+    *r0 = popI32();
+  }
+
+  RegI32 popI32ToSpecific(RegI32 specific) {
+    freeI32(specific);
+    return popI32(specific);
+  }
+
+  void pop2xI64(RegI64* r0, RegI64* r1) {
+    *r1 = popI64();
+    *r0 = popI64();
+  }
+
+  RegI64 popI64ToSpecific(RegI64 specific) {
+    freeI64(specific);
+    return popI64(specific);
+  }
+
+#ifdef JS_CODEGEN_ARM
+  RegI64 popI64Pair() {
+    RegI64 r = needI64Pair();
+    popI64ToSpecific(r);
+    return r;
+  }
+#endif
+
+  void pop2xF32(RegF32* r0, RegF32* r1) {
+    *r1 = popF32();
+    *r0 = popF32();
+  }
+
+  void pop2xF64(RegF64* r0, RegF64* r1) {
+    *r1 = popF64();
+    *r0 = popF64();
+  }
+
+#ifdef ENABLE_WASM_SIMD
+  void pop2xV128(RegV128* r0, RegV128* r1) {
+    *r1 = popV128();
+    *r0 = popV128();
+  }
+#endif
+
+  void pop2xRef(RegPtr* r0, RegPtr* r1) {
+    *r1 = popRef();
+    *r0 = popRef();
+  }
+
+  RegI32 popI64ToI32() {
+    RegI64 r = popI64();
+    return narrowI64(r);
+  }
+
+  RegI32 popI64ToSpecificI32(RegI32 specific) {
+    RegI64 rd = widenI32(specific);
+    popI64ToSpecific(rd);
+    return narrowI64(rd);
+  }
+
+  void pushU32AsI64(RegI32 rs) {
+    RegI64 rd = widenI32(rs);
+    masm.move32To64ZeroExtend(rs, rd);
+    pushI64(rd);
+  }
+
+  RegI32 popMemoryAccess(MemoryAccessDesc* access, AccessCheck* check);
+
+  void pushHeapBase();
+
+  template <typename RegType>
+  RegType pop();
+  template <typename RegType>
+  RegType need();
+  template <typename RegType>
+  void free(RegType r);
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Sundry helpers.
+
+  uint32_t readCallSiteLineOrBytecode() {
+    if (!func_.callSiteLineNums.empty()) {
+      return func_.callSiteLineNums[lastReadCallSite_++];
+    }
+    return iter_.lastOpcodeOffset();
+  }
+
+  bool done() const { return iter_.done(); }
+
+  BytecodeOffset bytecodeOffset() const { return iter_.bytecodeOffset(); }
+
+  void trap(Trap t) const { masm.wasmTrap(t, bytecodeOffset()); }
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Object support.
+
+  // This emits a GC pre-write barrier.  The pre-barrier is needed when we
+  // replace a member field with a new value, and the previous field value
+  // might have no other referents, and incremental GC is ongoing. The field
+  // might belong to an object or be a stack slot or a register or a heap
+  // allocated value.
+  //
+  // let obj = { field: previousValue };
+  // obj.field = newValue; // previousValue must be marked with a pre-barrier.
+  //
+  // The `valueAddr` is the address of the location that we are about to
+  // update.  This function preserves that register.
+
+  void emitPreBarrier(RegPtr valueAddr) {
+    Label skipBarrier;
+    ScratchPtr scratch(*this);
+
+    fr.loadTlsPtr(scratch);
+    EmitWasmPreBarrierGuard(masm, scratch, scratch, valueAddr, &skipBarrier);
+
+    fr.loadTlsPtr(scratch);
+#ifdef JS_CODEGEN_ARM64
+    // The prebarrier stub assumes the PseudoStackPointer is set up.  It is OK
+    // to just move the sp to x28 here because x28 is not being used by the
+    // baseline compiler and need not be saved or restored.
+    MOZ_ASSERT(!GeneralRegisterSet::All().hasRegisterIndex(x28.asUnsized()));
+    masm.Mov(x28, sp);
+#endif
+    EmitWasmPreBarrierCall(masm, scratch, scratch, valueAddr);
+
+    masm.bind(&skipBarrier);
+  }
+
+  // This frees the register `valueAddr`.
+
+  [[nodiscard]] bool emitPostBarrierCall(RegPtr valueAddr) {
+    uint32_t bytecodeOffset = iter_.lastOpcodeOffset();
+
+    // The `valueAddr` is a raw pointer to the cell within some GC object or
+    // TLS area, and we guarantee that the GC will not run while the
+    // postbarrier call is active, so push a uintptr_t value.
+#ifdef JS_64BIT
+    pushI64(RegI64(Register64(valueAddr)));
+#else
+    pushI32(RegI32(valueAddr));
+#endif
+    if (!emitInstanceCall(bytecodeOffset, SASigPostBarrier,
+                          /*pushReturnedValue=*/false)) {
+      return false;
+    }
+    return true;
+  }
+
+  [[nodiscard]] bool emitBarrieredStore(const Maybe<RegPtr>& object,
+                                        RegPtr valueAddr, RegPtr value) {
+    // TODO/AnyRef-boxing: With boxed immediates and strings, the write
+    // barrier is going to have to be more complicated.
+    ASSERT_ANYREF_IS_JSOBJECT;
+
+    emitPreBarrier(valueAddr);  // Preserves valueAddr
+    masm.storePtr(value, Address(valueAddr, 0));
+
+    Label skipBarrier;
+    sync();
+
+    RegPtr otherScratch = needRef();
+    EmitWasmPostBarrierGuard(masm, object, otherScratch, value, &skipBarrier);
+    freeRef(otherScratch);
+
+    if (!emitPostBarrierCall(valueAddr)) {
+      return false;
+    }
+    masm.bind(&skipBarrier);
+    return true;
+  }
+
+  ////////////////////////////////////////////////////////////
+  //
+  // Machinery for optimized conditional branches.
+  //
+  // To disable this optimization it is enough always to return false from
+  // sniffConditionalControl{Cmp,Eqz}.
+
+  struct BranchState {
+    union {
+      struct {
+        RegI32 lhs;
+        RegI32 rhs;
+        int32_t imm;
+        bool rhsImm;
+      } i32;
+      struct {
+        RegI64 lhs;
+        RegI64 rhs;
+        int64_t imm;
+        bool rhsImm;
+      } i64;
+      struct {
+        RegF32 lhs;
+        RegF32 rhs;
+      } f32;
+      struct {
+        RegF64 lhs;
+        RegF64 rhs;
+      } f64;
+    };
+
+    Label* const label;             // The target of the branch, never NULL
+    const StackHeight stackHeight;  // The stack base above which to place
+                                    // stack-spilled block results, if
+                                    // hasBlockResults().
+    const bool invertBranch;        // If true, invert the sense of the branch
+    const ResultType resultType;    // The result propagated along the edges
+
+    explicit BranchState(Label* label)
+        : label(label),
+          stackHeight(StackHeight::Invalid()),
+          invertBranch(false),
+          resultType(ResultType::Empty()) {}
+
+    BranchState(Label* label, bool invertBranch)
+        : label(label),
+          stackHeight(StackHeight::Invalid()),
+          invertBranch(invertBranch),
+          resultType(ResultType::Empty()) {}
+
+    BranchState(Label* label, StackHeight stackHeight, bool invertBranch,
+                ResultType resultType)
+        : label(label),
+          stackHeight(stackHeight),
+          invertBranch(invertBranch),
+          resultType(resultType) {}
+
+    bool hasBlockResults() const { return stackHeight.isValid(); }
+  };
+
+  void setLatentCompare(Assembler::Condition compareOp, ValType operandType) {
+    latentOp_ = LatentOp::Compare;
+    latentType_ = operandType;
+    latentIntCmp_ = compareOp;
+  }
+
+  void setLatentCompare(Assembler::DoubleCondition compareOp,
+                        ValType operandType) {
+    latentOp_ = LatentOp::Compare;
+    latentType_ = operandType;
+    latentDoubleCmp_ = compareOp;
+  }
+
+  void setLatentEqz(ValType operandType) {
+    latentOp_ = LatentOp::Eqz;
+    latentType_ = operandType;
+  }
+
+  bool hasLatentOp() const { return latentOp_ != LatentOp::None; }
+
+  void resetLatentOp() { latentOp_ = LatentOp::None; }
+
+  void branchTo(Assembler::DoubleCondition c, RegF64 lhs, RegF64 rhs,
+                Label* l) {
+    masm.branchDouble(c, lhs, rhs, l);
+  }
+
+  void branchTo(Assembler::DoubleCondition c, RegF32 lhs, RegF32 rhs,
+                Label* l) {
+    masm.branchFloat(c, lhs, rhs, l);
+  }
+
+  void branchTo(Assembler::Condition c, RegI32 lhs, RegI32 rhs, Label* l) {
+    masm.branch32(c, lhs, rhs, l);
+  }
+
+  void branchTo(Assembler::Condition c, RegI32 lhs, Imm32 rhs, Label* l) {
+    masm.branch32(c, lhs, rhs, l);
+  }
+
+  void branchTo(Assembler::Condition c, RegI64 lhs, RegI64 rhs, Label* l) {
+    masm.branch64(c, lhs, rhs, l);
+  }
+
+  void branchTo(Assembler::Condition c, RegI64 lhs, Imm64 rhs, Label* l) {
+    masm.branch64(c, lhs, rhs, l);
+  }
+
+  void branchTo(Assembler::Condition c, RegPtr lhs, ImmWord rhs, Label* l) {
+    masm.branchPtr(c, lhs, rhs, l);
+  }
+
+  // Emit a conditional branch that optionally and optimally cleans up the CPU
+  // stack before we branch.
+  //
+  // Cond is either Assembler::Condition or Assembler::DoubleCondition.
+  //
+  // Lhs is RegI32, RegI64, or RegF32, RegF64, or RegPtr.
+  //
+  // Rhs is either the same as Lhs, or an immediate expression compatible with
+  // Lhs "when applicable".
+
+  template <typename Cond, typename Lhs, typename Rhs>
+  MOZ_MUST_USE bool jumpConditionalWithResults(BranchState* b, Cond cond,
+                                               Lhs lhs, Rhs rhs) {
+    if (b->hasBlockResults()) {
+      StackHeight resultsBase(0);
+      if (!topBranchParams(b->resultType, &resultsBase)) {
+        return false;
+      }
+      if (b->stackHeight != resultsBase) {
+        Label notTaken;
+        branchTo(b->invertBranch ? cond : Assembler::InvertCondition(cond), lhs,
+                 rhs, &notTaken);
+
+        // Shuffle stack args.
+        shuffleStackResultsBeforeBranch(resultsBase, b->stackHeight,
+                                        b->resultType);
+        masm.jump(b->label);
+        masm.bind(&notTaken);
+        return true;
+      }
+    }
+
+    branchTo(b->invertBranch ? Assembler::InvertCondition(cond) : cond, lhs,
+             rhs, b->label);
+    return true;
+  }
+
+  // sniffConditionalControl{Cmp,Eqz} may modify the latentWhatever_ state in
+  // the BaseCompiler so that a subsequent conditional branch can be compiled
+  // optimally.  emitBranchSetup() and emitBranchPerform() will consume that
+  // state.  If the latter methods are not called because deadCode_ is true
+  // then the compiler MUST instead call resetLatentOp() to reset the state.
+
+  template <typename Cond>
+  MOZ_MUST_USE bool sniffConditionalControlCmp(Cond compareOp,
+                                               ValType operandType);
+  MOZ_MUST_USE bool sniffConditionalControlEqz(ValType operandType);
+  void emitBranchSetup(BranchState* b);
+  MOZ_MUST_USE bool emitBranchPerform(BranchState* b);
+
+  //////////////////////////////////////////////////////////////////////
+
+  [[nodiscard]] bool emitBody();
+  [[nodiscard]] bool emitBlock();
+  [[nodiscard]] bool emitLoop();
+  [[nodiscard]] bool emitIf();
+  [[nodiscard]] bool emitElse();
+#ifdef ENABLE_WASM_EXCEPTIONS
+  [[nodiscard]] bool emitTry();
+  [[nodiscard]] bool emitCatch();
+  [[nodiscard]] bool emitThrow();
+#endif
+  [[nodiscard]] bool emitEnd();
+  [[nodiscard]] bool emitBr();
+  [[nodiscard]] bool emitBrIf();
+  [[nodiscard]] bool emitBrTable();
+  [[nodiscard]] bool emitDrop();
+  [[nodiscard]] bool emitReturn();
+
+  enum class CalleeOnStack {
+    // After the arguments to the call, there is a callee pushed onto value
+    // stack.  This is only the case for callIndirect.  To get the arguments to
+    // the call, emitCallArgs has to reach one element deeper into the value
+    // stack, to skip the callee.
+    True,
+
+    // No callee on the stack.
+    False
+  };
+
+  [[nodiscard]] bool emitCallArgs(const ValTypeVector& args,
+                                  const StackResultsLoc& results,
+                                  FunctionCall* baselineCall,
+                                  CalleeOnStack calleeOnStack);
+
+  [[nodiscard]] bool emitCall();
+  [[nodiscard]] bool emitCallIndirect();
+  [[nodiscard]] bool emitUnaryMathBuiltinCall(SymbolicAddress callee,
+                                              ValType operandType);
+  [[nodiscard]] bool emitGetLocal();
+  [[nodiscard]] bool emitSetLocal();
+  [[nodiscard]] bool emitTeeLocal();
+  [[nodiscard]] bool emitGetGlobal();
+  [[nodiscard]] bool emitSetGlobal();
+  [[nodiscard]] RegI32 maybeLoadTlsForAccess(const AccessCheck& check);
+  [[nodiscard]] RegI32 maybeLoadTlsForAccess(const AccessCheck& check,
+                                             RegI32 specific);
+  [[nodiscard]] bool emitLoad(ValType type, Scalar::Type viewType);
+  [[nodiscard]] bool loadCommon(MemoryAccessDesc* access, AccessCheck check,
+                                ValType type);
+  [[nodiscard]] bool emitStore(ValType resultType, Scalar::Type viewType);
+  [[nodiscard]] bool storeCommon(MemoryAccessDesc* access, AccessCheck check,
+                                 ValType resultType);
+  [[nodiscard]] bool emitSelect(bool typed);
+
+  template <bool isSetLocal>
+  [[nodiscard]] bool emitSetOrTeeLocal(uint32_t slot);
+
+  MOZ_MUST_USE bool endBlock(ResultType type);
+  MOZ_MUST_USE bool endIfThen(ResultType type);
+  MOZ_MUST_USE bool endIfThenElse(ResultType type);
+
+  void doReturn(ContinuationKind kind);
+  void pushReturnValueOfCall(const FunctionCall& call, MIRType type);
+
+  MOZ_MUST_USE bool pushStackResultsForCall(const ResultType& type, RegPtr temp,
+                                            StackResultsLoc* loc);
+  void popStackResultsAfterCall(const StackResultsLoc& results,
+                                uint32_t stackArgBytes);
+
+  void emitCompareI32(Assembler::Condition compareOp, ValType compareType);
+  void emitCompareI64(Assembler::Condition compareOp, ValType compareType);
+  void emitCompareF32(Assembler::DoubleCondition compareOp,
+                      ValType compareType);
+  void emitCompareF64(Assembler::DoubleCondition compareOp,
+                      ValType compareType);
+  void emitCompareRef(Assembler::Condition compareOp, ValType compareType);
+
+  void emitAddI32();
+  void emitAddI64();
+  void emitAddF64();
+  void emitAddF32();
+  void emitSubtractI32();
+  void emitSubtractI64();
+  void emitSubtractF32();
+  void emitSubtractF64();
+  void emitMultiplyI32();
+  void emitMultiplyI64();
+  void emitMultiplyF32();
+  void emitMultiplyF64();
+  void emitQuotientI32();
+  void emitQuotientU32();
+  void emitRemainderI32();
+  void emitRemainderU32();
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+  [[nodiscard]] bool emitDivOrModI64BuiltinCall(SymbolicAddress callee,
+                                                ValType operandType);
+#else
+  void emitQuotientI64();
+  void emitQuotientU64();
+  void emitRemainderI64();
+  void emitRemainderU64();
+#endif
+  void emitDivideF32();
+  void emitDivideF64();
+  void emitMinF32();
+  void emitMaxF32();
+  void emitMinF64();
+  void emitMaxF64();
+  void emitCopysignF32();
+  void emitCopysignF64();
+  void emitOrI32();
+  void emitOrI64();
+  void emitAndI32();
+  void emitAndI64();
+  void emitXorI32();
+  void emitXorI64();
+  void emitShlI32();
+  void emitShlI64();
+  void emitShrI32();
+  void emitShrI64();
+  void emitShrU32();
+  void emitShrU64();
+  void emitRotrI32();
+  void emitRotrI64();
+  void emitRotlI32();
+  void emitRotlI64();
+  void emitEqzI32();
+  void emitEqzI64();
+  void emitClzI32();
+  void emitClzI64();
+  void emitCtzI32();
+  void emitCtzI64();
+  void emitPopcntI32();
+  void emitPopcntI64();
+  void emitAbsF32();
+  void emitAbsF64();
+  void emitNegateF32();
+  void emitNegateF64();
+  void emitSqrtF32();
+  void emitSqrtF64();
+  template <TruncFlags flags>
+  [[nodiscard]] bool emitTruncateF32ToI32();
+  template <TruncFlags flags>
+  [[nodiscard]] bool emitTruncateF64ToI32();
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+  [[nodiscard]] bool emitConvertFloatingToInt64Callout(SymbolicAddress callee,
+                                                       ValType operandType,
+                                                       ValType resultType);
+#else
+  template <TruncFlags flags>
+  [[nodiscard]] bool emitTruncateF32ToI64();
+  template <TruncFlags flags>
+  [[nodiscard]] bool emitTruncateF64ToI64();
+#endif
+  void emitWrapI64ToI32();
+  void emitExtendI32_8();
+  void emitExtendI32_16();
+  void emitExtendI64_8();
+  void emitExtendI64_16();
+  void emitExtendI64_32();
+  void emitExtendI32ToI64();
+  void emitExtendU32ToI64();
+  void emitReinterpretF32AsI32();
+  void emitReinterpretF64AsI64();
+  void emitConvertF64ToF32();
+  void emitConvertI32ToF32();
+  void emitConvertU32ToF32();
+  void emitConvertF32ToF64();
+  void emitConvertI32ToF64();
+  void emitConvertU32ToF64();
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+  [[nodiscard]] bool emitConvertInt64ToFloatingCallout(SymbolicAddress callee,
+                                                       ValType operandType,
+                                                       ValType resultType);
+#else
+  void emitConvertI64ToF32();
+  void emitConvertU64ToF32();
+  void emitConvertI64ToF64();
+  void emitConvertU64ToF64();
+#endif
+  void emitReinterpretI32AsF32();
+  void emitReinterpretI64AsF64();
+  void emitRound(RoundingMode roundingMode, ValType operandType);
+  [[nodiscard]] bool emitInstanceCall(uint32_t lineOrBytecode,
+                                      const SymbolicAddressSignature& builtin,
+                                      bool pushReturnedValue = true);
+  [[nodiscard]] bool emitMemoryGrow();
+  [[nodiscard]] bool emitMemorySize();
+
+  [[nodiscard]] bool emitRefFunc();
+  [[nodiscard]] bool emitRefNull();
+  [[nodiscard]] bool emitRefIsNull();
+#ifdef ENABLE_WASM_FUNCTION_REFERENCES
+  [[nodiscard]] bool emitRefAsNonNull();
+  [[nodiscard]] bool emitBrOnNull();
+#endif
+
+  [[nodiscard]] bool emitAtomicCmpXchg(ValType type, Scalar::Type viewType);
+  [[nodiscard]] bool emitAtomicLoad(ValType type, Scalar::Type viewType);
+  [[nodiscard]] bool emitAtomicRMW(ValType type, Scalar::Type viewType,
+                                   AtomicOp op);
+  [[nodiscard]] bool emitAtomicStore(ValType type, Scalar::Type viewType);
+  [[nodiscard]] bool emitWait(ValType type, uint32_t byteSize);
+  [[nodiscard]] bool emitWake();
+  [[nodiscard]] bool emitFence();
+  [[nodiscard]] bool emitAtomicXchg(ValType type, Scalar::Type viewType);
+  void emitAtomicXchg64(MemoryAccessDesc* access, WantResult wantResult);
+  [[nodiscard]] bool emitMemCopy();
+  [[nodiscard]] bool emitMemCopyCall(uint32_t lineOrBytecode);
+  [[nodiscard]] bool emitMemCopyInline();
+  [[nodiscard]] bool emitTableCopy();
+  [[nodiscard]] bool emitDataOrElemDrop(bool isData);
+  [[nodiscard]] bool emitMemFill();
+  [[nodiscard]] bool emitMemFillCall(uint32_t lineOrBytecode);
+  [[nodiscard]] bool emitMemFillInline();
+  [[nodiscard]] bool emitMemOrTableInit(bool isMem);
+#ifdef ENABLE_WASM_REFTYPES
+  [[nodiscard]] bool emitTableFill();
+  [[nodiscard]] bool emitTableGet();
+  [[nodiscard]] bool emitTableGrow();
+  [[nodiscard]] bool emitTableSet();
+  [[nodiscard]] bool emitTableSize();
+#endif
+  [[nodiscard]] bool emitStructNew();
+  [[nodiscard]] bool emitStructGet();
+  [[nodiscard]] bool emitStructSet();
+  [[nodiscard]] bool emitStructNarrow();
+#ifdef ENABLE_WASM_SIMD
+  template <typename SourceType, typename DestType>
+  void emitVectorUnop(void (*op)(MacroAssembler& masm, SourceType rs,
+                                 DestType rd));
+
+  template <typename SourceType, typename DestType, typename TempType>
+  void emitVectorUnop(void (*op)(MacroAssembler& masm, SourceType rs,
+                                 DestType rd, TempType temp));
+
+  template <typename SourceType, typename DestType, typename ImmType>
+  void emitVectorUnop(ImmType immediate, void (*op)(MacroAssembler&, ImmType,
+                                                    SourceType, DestType));
+
+  template <typename RhsType, typename LhsDestType>
+  void emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType src,
+                                  LhsDestType srcDest));
+
+  template <typename RhsDestType, typename LhsType>
+  void emitVectorBinop(void (*op)(MacroAssembler& masm, RhsDestType src,
+                                  LhsType srcDest, RhsDestOp));
+
+  template <typename RhsType, typename LhsDestType, typename TempType>
+  void emitVectorBinop(void (*)(MacroAssembler& masm, RhsType rs,
+                                LhsDestType rsd, TempType temp));
+
+  template <typename RhsType, typename LhsDestType, typename TempType1,
+            typename TempType2>
+  void emitVectorBinop(void (*)(MacroAssembler& masm, RhsType rs,
+                                LhsDestType rsd, TempType1 temp1,
+                                TempType2 temp2));
+
+  template <typename RhsType, typename LhsDestType, typename ImmType>
+  void emitVectorBinop(ImmType immediate, void (*op)(MacroAssembler&, ImmType,
+                                                     RhsType, LhsDestType));
+
+  template <typename RhsType, typename LhsDestType, typename ImmType,
+            typename TempType1, typename TempType2>
+  void emitVectorBinop(ImmType immediate,
+                       void (*op)(MacroAssembler&, ImmType, RhsType,
+                                  LhsDestType, TempType1 temp1,
+                                  TempType2 temp2));
+
+  void emitVectorAndNot();
+
+  [[nodiscard]] bool emitLoadSplat(Scalar::Type viewType);
+  [[nodiscard]] bool emitLoadZero(Scalar::Type viewType);
+  [[nodiscard]] bool emitLoadExtend(Scalar::Type viewType);
+  [[nodiscard]] bool emitBitselect();
+  [[nodiscard]] bool emitVectorShuffle();
+  [[nodiscard]] bool emitVectorShiftRightI64x2(bool isUnsigned);
+  [[nodiscard]] bool emitVectorMulI64x2();
+#endif
+};
+
+// TODO: We want these to be inlined for sure; do we need an `inline` somewhere?
+
+template <>
+RegI32 BaseCompiler::need<RegI32>() {
+  return needI32();
+}
+template <>
+RegI64 BaseCompiler::need<RegI64>() {
+  return needI64();
+}
+template <>
+RegF32 BaseCompiler::need<RegF32>() {
+  return needF32();
+}
+template <>
+RegF64 BaseCompiler::need<RegF64>() {
+  return needF64();
+}
+
+template <>
+RegI32 BaseCompiler::pop<RegI32>() {
+  return popI32();
+}
+template <>
+RegI64 BaseCompiler::pop<RegI64>() {
+  return popI64();
+}
+template <>
+RegF32 BaseCompiler::pop<RegF32>() {
+  return popF32();
+}
+template <>
+RegF64 BaseCompiler::pop<RegF64>() {
+  return popF64();
+}
+
+template <>
+void BaseCompiler::free<RegI32>(RegI32 r) {
+  freeI32(r);
+}
+template <>
+void BaseCompiler::free<RegI64>(RegI64 r) {
+  freeI64(r);
+}
+template <>
+void BaseCompiler::free<RegF32>(RegF32 r) {
+  freeF32(r);
+}
+template <>
+void BaseCompiler::free<RegF64>(RegF64 r) {
+  freeF64(r);
+}
+
+#ifdef ENABLE_WASM_SIMD
+template <>
+RegV128 BaseCompiler::need<RegV128>() {
+  return needV128();
+}
+template <>
+RegV128 BaseCompiler::pop<RegV128>() {
+  return popV128();
+}
+template <>
+void BaseCompiler::free<RegV128>(RegV128 r) {
+  freeV128(r);
+}
+#endif
+
+void BaseCompiler::emitAddI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.add32(Imm32(c), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32(&r, &rs);
+    masm.add32(rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitAddI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    masm.add64(Imm64(c), r);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64(&r, &rs);
+    masm.add64(rs, r);
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitAddF64() {
+  RegF64 r, rs;
+  pop2xF64(&r, &rs);
+  masm.addDouble(rs, r);
+  freeF64(rs);
+  pushF64(r);
+}
+
+void BaseCompiler::emitAddF32() {
+  RegF32 r, rs;
+  pop2xF32(&r, &rs);
+  masm.addFloat32(rs, r);
+  freeF32(rs);
+  pushF32(r);
+}
+
+void BaseCompiler::emitSubtractI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.sub32(Imm32(c), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32(&r, &rs);
+    masm.sub32(rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitSubtractI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    masm.sub64(Imm64(c), r);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64(&r, &rs);
+    masm.sub64(rs, r);
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitSubtractF32() {
+  RegF32 r, rs;
+  pop2xF32(&r, &rs);
+  masm.subFloat32(rs, r);
+  freeF32(rs);
+  pushF32(r);
+}
+
+void BaseCompiler::emitSubtractF64() {
+  RegF64 r, rs;
+  pop2xF64(&r, &rs);
+  masm.subDouble(rs, r);
+  freeF64(rs);
+  pushF64(r);
+}
+
+void BaseCompiler::emitMultiplyI32() {
+  RegI32 r, rs, reserved;
+  pop2xI32ForMulDivI32(&r, &rs, &reserved);
+  masm.mul32(rs, r);
+  maybeFreeI32(reserved);
+  freeI32(rs);
+  pushI32(r);
+}
+
+void BaseCompiler::emitMultiplyI64() {
+  RegI64 r, rs, reserved;
+  RegI32 temp;
+  pop2xI64ForMulI64(&r, &rs, &temp, &reserved);
+  masm.mul64(rs, r, temp);
+  maybeFreeI64(reserved);
+  maybeFreeI32(temp);
+  freeI64(rs);
+  pushI64(r);
+}
+
+void BaseCompiler::emitMultiplyF32() {
+  RegF32 r, rs;
+  pop2xF32(&r, &rs);
+  masm.mulFloat32(rs, r);
+  freeF32(rs);
+  pushF32(r);
+}
+
+void BaseCompiler::emitMultiplyF64() {
+  RegF64 r, rs;
+  pop2xF64(&r, &rs);
+  masm.mulDouble(rs, r);
+  freeF64(rs);
+  pushF64(r);
+}
+
+void BaseCompiler::emitQuotientI32() {
+  int32_t c;
+  uint_fast8_t power;
+  if (popConstPositivePowerOfTwoI32(&c, &power, 0)) {
+    if (power != 0) {
+      RegI32 r = popI32();
+      Label positive;
+      masm.branchTest32(Assembler::NotSigned, r, r, &positive);
+      masm.add32(Imm32(c - 1), r);
+      masm.bind(&positive);
+
+      masm.rshift32Arithmetic(Imm32(power & 31), r);
+      pushI32(r);
+    }
+  } else {
+    bool isConst = peekConstI32(&c);
+    RegI32 r, rs, reserved;
+    pop2xI32ForMulDivI32(&r, &rs, &reserved);
+
+    if (!isConst || c == 0) {
+      checkDivideByZeroI32(rs);
+    }
+
+    Label done;
+    if (!isConst || c == -1) {
+      checkDivideSignedOverflowI32(rs, r, &done, ZeroOnOverflow(false));
+    }
+    masm.quotient32(rs, r, IsUnsigned(false));
+    masm.bind(&done);
+
+    maybeFreeI32(reserved);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitQuotientU32() {
+  int32_t c;
+  uint_fast8_t power;
+  if (popConstPositivePowerOfTwoI32(&c, &power, 0)) {
+    if (power != 0) {
+      RegI32 r = popI32();
+      masm.rshift32(Imm32(power & 31), r);
+      pushI32(r);
+    }
+  } else {
+    bool isConst = peekConstI32(&c);
+    RegI32 r, rs, reserved;
+    pop2xI32ForMulDivI32(&r, &rs, &reserved);
+
+    if (!isConst || c == 0) {
+      checkDivideByZeroI32(rs);
+    }
+    masm.quotient32(rs, r, IsUnsigned(true));
+
+    maybeFreeI32(reserved);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitRemainderI32() {
+  int32_t c;
+  uint_fast8_t power;
+  if (popConstPositivePowerOfTwoI32(&c, &power, 1)) {
+    RegI32 r = popI32();
+    RegI32 temp = needI32();
+    moveI32(r, temp);
+
+    Label positive;
+    masm.branchTest32(Assembler::NotSigned, temp, temp, &positive);
+    masm.add32(Imm32(c - 1), temp);
+    masm.bind(&positive);
+
+    masm.rshift32Arithmetic(Imm32(power & 31), temp);
+    masm.lshift32(Imm32(power & 31), temp);
+    masm.sub32(temp, r);
+    freeI32(temp);
+
+    pushI32(r);
+  } else {
+    bool isConst = peekConstI32(&c);
+    RegI32 r, rs, reserved;
+    pop2xI32ForMulDivI32(&r, &rs, &reserved);
+
+    if (!isConst || c == 0) {
+      checkDivideByZeroI32(rs);
+    }
+
+    Label done;
+    if (!isConst || c == -1) {
+      checkDivideSignedOverflowI32(rs, r, &done, ZeroOnOverflow(true));
+    }
+    masm.remainder32(rs, r, IsUnsigned(false));
+    masm.bind(&done);
+
+    maybeFreeI32(reserved);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitRemainderU32() {
+  int32_t c;
+  uint_fast8_t power;
+  if (popConstPositivePowerOfTwoI32(&c, &power, 1)) {
+    RegI32 r = popI32();
+    masm.and32(Imm32(c - 1), r);
+    pushI32(r);
+  } else {
+    bool isConst = peekConstI32(&c);
+    RegI32 r, rs, reserved;
+    pop2xI32ForMulDivI32(&r, &rs, &reserved);
+
+    if (!isConst || c == 0) {
+      checkDivideByZeroI32(rs);
+    }
+    masm.remainder32(rs, r, IsUnsigned(true));
+
+    maybeFreeI32(reserved);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+#ifndef RABALDR_INT_DIV_I64_CALLOUT
+void BaseCompiler::emitQuotientI64() {
+#  ifdef JS_64BIT
+  int64_t c;
+  uint_fast8_t power;
+  if (popConstPositivePowerOfTwoI64(&c, &power, 0)) {
+    if (power != 0) {
+      RegI64 r = popI64();
+      Label positive;
+      masm.branchTest64(Assembler::NotSigned, r, r, RegI32::Invalid(),
+                        &positive);
+      masm.add64(Imm64(c - 1), r);
+      masm.bind(&positive);
+
+      masm.rshift64Arithmetic(Imm32(power & 63), r);
+      pushI64(r);
+    }
+  } else {
+    bool isConst = peekConstI64(&c);
+    RegI64 r, rs, reserved;
+    pop2xI64ForDivI64(&r, &rs, &reserved);
+    quotientI64(rs, r, reserved, IsUnsigned(false), isConst, c);
+    maybeFreeI64(reserved);
+    freeI64(rs);
+    pushI64(r);
+  }
+#  else
+  MOZ_CRASH("BaseCompiler platform hook: emitQuotientI64");
+#  endif
+}
+
+void BaseCompiler::emitQuotientU64() {
+#  ifdef JS_64BIT
+  int64_t c;
+  uint_fast8_t power;
+  if (popConstPositivePowerOfTwoI64(&c, &power, 0)) {
+    if (power != 0) {
+      RegI64 r = popI64();
+      masm.rshift64(Imm32(power & 63), r);
+      pushI64(r);
+    }
+  } else {
+    bool isConst = peekConstI64(&c);
+    RegI64 r, rs, reserved;
+    pop2xI64ForDivI64(&r, &rs, &reserved);
+    quotientI64(rs, r, reserved, IsUnsigned(true), isConst, c);
+    maybeFreeI64(reserved);
+    freeI64(rs);
+    pushI64(r);
+  }
+#  else
+  MOZ_CRASH("BaseCompiler platform hook: emitQuotientU64");
+#  endif
+}
+
+void BaseCompiler::emitRemainderI64() {
+#  ifdef JS_64BIT
+  int64_t c;
+  uint_fast8_t power;
+  if (popConstPositivePowerOfTwoI64(&c, &power, 1)) {
+    RegI64 r = popI64();
+    RegI64 temp = needI64();
+    moveI64(r, temp);
+
+    Label positive;
+    masm.branchTest64(Assembler::NotSigned, temp, temp, RegI32::Invalid(),
+                      &positive);
+    masm.add64(Imm64(c - 1), temp);
+    masm.bind(&positive);
+
+    masm.rshift64Arithmetic(Imm32(power & 63), temp);
+    masm.lshift64(Imm32(power & 63), temp);
+    masm.sub64(temp, r);
+    freeI64(temp);
+
+    pushI64(r);
+  } else {
+    bool isConst = peekConstI64(&c);
+    RegI64 r, rs, reserved;
+    pop2xI64ForDivI64(&r, &rs, &reserved);
+    remainderI64(rs, r, reserved, IsUnsigned(false), isConst, c);
+    maybeFreeI64(reserved);
+    freeI64(rs);
+    pushI64(r);
+  }
+#  else
+  MOZ_CRASH("BaseCompiler platform hook: emitRemainderI64");
+#  endif
+}
+
+void BaseCompiler::emitRemainderU64() {
+#  ifdef JS_64BIT
+  int64_t c;
+  uint_fast8_t power;
+  if (popConstPositivePowerOfTwoI64(&c, &power, 1)) {
+    RegI64 r = popI64();
+    masm.and64(Imm64(c - 1), r);
+    pushI64(r);
+  } else {
+    bool isConst = peekConstI64(&c);
+    RegI64 r, rs, reserved;
+    pop2xI64ForDivI64(&r, &rs, &reserved);
+    remainderI64(rs, r, reserved, IsUnsigned(true), isConst, c);
+    maybeFreeI64(reserved);
+    freeI64(rs);
+    pushI64(r);
+  }
+#  else
+  MOZ_CRASH("BaseCompiler platform hook: emitRemainderU64");
+#  endif
+}
+#endif  // RABALDR_INT_DIV_I64_CALLOUT
+
+void BaseCompiler::emitDivideF32() {
+  RegF32 r, rs;
+  pop2xF32(&r, &rs);
+  masm.divFloat32(rs, r);
+  freeF32(rs);
+  pushF32(r);
+}
+
+void BaseCompiler::emitDivideF64() {
+  RegF64 r, rs;
+  pop2xF64(&r, &rs);
+  masm.divDouble(rs, r);
+  freeF64(rs);
+  pushF64(r);
+}
+
+void BaseCompiler::emitMinF32() {
+  RegF32 r, rs;
+  pop2xF32(&r, &rs);
+  // Convert signaling NaN to quiet NaNs.
+  //
+  // TODO / OPTIMIZE (bug 1316824): Don't do this if one of the operands
+  // is known to be a constant.
+  ScratchF32 zero(*this);
+  moveImmF32(0.f, zero);
+  masm.subFloat32(zero, r);
+  masm.subFloat32(zero, rs);
+  masm.minFloat32(rs, r, HandleNaNSpecially(true));
+  freeF32(rs);
+  pushF32(r);
+}
+
+void BaseCompiler::emitMaxF32() {
+  RegF32 r, rs;
+  pop2xF32(&r, &rs);
+  // Convert signaling NaN to quiet NaNs.
+  //
+  // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32.
+  ScratchF32 zero(*this);
+  moveImmF32(0.f, zero);
+  masm.subFloat32(zero, r);
+  masm.subFloat32(zero, rs);
+  masm.maxFloat32(rs, r, HandleNaNSpecially(true));
+  freeF32(rs);
+  pushF32(r);
+}
+
+void BaseCompiler::emitMinF64() {
+  RegF64 r, rs;
+  pop2xF64(&r, &rs);
+  // Convert signaling NaN to quiet NaNs.
+  //
+  // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32.
+  ScratchF64 zero(*this);
+  moveImmF64(0, zero);
+  masm.subDouble(zero, r);
+  masm.subDouble(zero, rs);
+  masm.minDouble(rs, r, HandleNaNSpecially(true));
+  freeF64(rs);
+  pushF64(r);
+}
+
+void BaseCompiler::emitMaxF64() {
+  RegF64 r, rs;
+  pop2xF64(&r, &rs);
+  // Convert signaling NaN to quiet NaNs.
+  //
+  // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32.
+  ScratchF64 zero(*this);
+  moveImmF64(0, zero);
+  masm.subDouble(zero, r);
+  masm.subDouble(zero, rs);
+  masm.maxDouble(rs, r, HandleNaNSpecially(true));
+  freeF64(rs);
+  pushF64(r);
+}
+
+void BaseCompiler::emitCopysignF32() {
+  RegF32 r, rs;
+  pop2xF32(&r, &rs);
+  RegI32 temp0 = needI32();
+  RegI32 temp1 = needI32();
+  masm.moveFloat32ToGPR(r, temp0);
+  masm.moveFloat32ToGPR(rs, temp1);
+  masm.and32(Imm32(INT32_MAX), temp0);
+  masm.and32(Imm32(INT32_MIN), temp1);
+  masm.or32(temp1, temp0);
+  masm.moveGPRToFloat32(temp0, r);
+  freeI32(temp0);
+  freeI32(temp1);
+  freeF32(rs);
+  pushF32(r);
+}
+
+void BaseCompiler::emitCopysignF64() {
+  RegF64 r, rs;
+  pop2xF64(&r, &rs);
+  RegI64 temp0 = needI64();
+  RegI64 temp1 = needI64();
+  masm.moveDoubleToGPR64(r, temp0);
+  masm.moveDoubleToGPR64(rs, temp1);
+  masm.and64(Imm64(INT64_MAX), temp0);
+  masm.and64(Imm64(INT64_MIN), temp1);
+  masm.or64(temp1, temp0);
+  masm.moveGPR64ToDouble(temp0, r);
+  freeI64(temp0);
+  freeI64(temp1);
+  freeF64(rs);
+  pushF64(r);
+}
+
+void BaseCompiler::emitOrI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.or32(Imm32(c), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32(&r, &rs);
+    masm.or32(rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitOrI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    masm.or64(Imm64(c), r);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64(&r, &rs);
+    masm.or64(rs, r);
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitAndI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.and32(Imm32(c), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32(&r, &rs);
+    masm.and32(rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitAndI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    masm.and64(Imm64(c), r);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64(&r, &rs);
+    masm.and64(rs, r);
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitXorI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.xor32(Imm32(c), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32(&r, &rs);
+    masm.xor32(rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitXorI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    masm.xor64(Imm64(c), r);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64(&r, &rs);
+    masm.xor64(rs, r);
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitShlI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.lshift32(Imm32(c & 31), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32ForShift(&r, &rs);
+    maskShiftCount32(rs);
+    masm.lshift32(rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitShlI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    masm.lshift64(Imm32(c & 63), r);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64ForShift(&r, &rs);
+    masm.lshift64(lowPart(rs), r);
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitShrI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.rshift32Arithmetic(Imm32(c & 31), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32ForShift(&r, &rs);
+    maskShiftCount32(rs);
+    masm.rshift32Arithmetic(rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitShrI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    masm.rshift64Arithmetic(Imm32(c & 63), r);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64ForShift(&r, &rs);
+    masm.rshift64Arithmetic(lowPart(rs), r);
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitShrU32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.rshift32(Imm32(c & 31), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32ForShift(&r, &rs);
+    maskShiftCount32(rs);
+    masm.rshift32(rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitShrU64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    masm.rshift64(Imm32(c & 63), r);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64ForShift(&r, &rs);
+    masm.rshift64(lowPart(rs), r);
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitRotrI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.rotateRight(Imm32(c & 31), r, r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32ForRotate(&r, &rs);
+    masm.rotateRight(rs, r, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitRotrI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    RegI32 temp = needRotate64Temp();
+    masm.rotateRight64(Imm32(c & 63), r, r, temp);
+    maybeFreeI32(temp);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64ForRotate(&r, &rs);
+    masm.rotateRight64(lowPart(rs), r, r, maybeHighPart(rs));
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitRotlI32() {
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.rotateLeft(Imm32(c & 31), r, r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32ForRotate(&r, &rs);
+    masm.rotateLeft(rs, r, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitRotlI64() {
+  int64_t c;
+  if (popConstI64(&c)) {
+    RegI64 r = popI64();
+    RegI32 temp = needRotate64Temp();
+    masm.rotateLeft64(Imm32(c & 63), r, r, temp);
+    maybeFreeI32(temp);
+    pushI64(r);
+  } else {
+    RegI64 r, rs;
+    pop2xI64ForRotate(&r, &rs);
+    masm.rotateLeft64(lowPart(rs), r, r, maybeHighPart(rs));
+    freeI64(rs);
+    pushI64(r);
+  }
+}
+
+void BaseCompiler::emitEqzI32() {
+  if (sniffConditionalControlEqz(ValType::I32)) {
+    return;
+  }
+
+  RegI32 r = popI32();
+  masm.cmp32Set(Assembler::Equal, r, Imm32(0), r);
+  pushI32(r);
+}
+
+void BaseCompiler::emitEqzI64() {
+  if (sniffConditionalControlEqz(ValType::I64)) {
+    return;
+  }
+
+  RegI64 rs = popI64();
+  RegI32 rd = fromI64(rs);
+  eqz64(rs, rd);
+  freeI64Except(rs, rd);
+  pushI32(rd);
+}
+
+void BaseCompiler::emitClzI32() {
+  RegI32 r = popI32();
+  masm.clz32(r, r, IsKnownNotZero(false));
+  pushI32(r);
+}
+
+void BaseCompiler::emitClzI64() {
+  RegI64 r = popI64();
+  masm.clz64(r, lowPart(r));
+  maybeClearHighPart(r);
+  pushI64(r);
+}
+
+void BaseCompiler::emitCtzI32() {
+  RegI32 r = popI32();
+  masm.ctz32(r, r, IsKnownNotZero(false));
+  pushI32(r);
+}
+
+void BaseCompiler::emitCtzI64() {
+  RegI64 r = popI64();
+  masm.ctz64(r, lowPart(r));
+  maybeClearHighPart(r);
+  pushI64(r);
+}
+
+void BaseCompiler::emitPopcntI32() {
+  RegI32 r = popI32();
+  RegI32 temp = needPopcnt32Temp();
+  masm.popcnt32(r, r, temp);
+  maybeFreeI32(temp);
+  pushI32(r);
+}
+
+void BaseCompiler::emitPopcntI64() {
+  RegI64 r = popI64();
+  RegI32 temp = needPopcnt64Temp();
+  masm.popcnt64(r, r, temp);
+  maybeFreeI32(temp);
+  pushI64(r);
+}
+
+void BaseCompiler::emitAbsF32() {
+  RegF32 r = popF32();
+  masm.absFloat32(r, r);
+  pushF32(r);
+}
+
+void BaseCompiler::emitAbsF64() {
+  RegF64 r = popF64();
+  masm.absDouble(r, r);
+  pushF64(r);
+}
+
+void BaseCompiler::emitNegateF32() {
+  RegF32 r = popF32();
+  masm.negateFloat(r);
+  pushF32(r);
+}
+
+void BaseCompiler::emitNegateF64() {
+  RegF64 r = popF64();
+  masm.negateDouble(r);
+  pushF64(r);
+}
+
+void BaseCompiler::emitSqrtF32() {
+  RegF32 r = popF32();
+  masm.sqrtFloat32(r, r);
+  pushF32(r);
+}
+
+void BaseCompiler::emitSqrtF64() {
+  RegF64 r = popF64();
+  masm.sqrtDouble(r, r);
+  pushF64(r);
+}
+
+template <TruncFlags flags>
+bool BaseCompiler::emitTruncateF32ToI32() {
+  RegF32 rs = popF32();
+  RegI32 rd = needI32();
+  if (!truncateF32ToI32(rs, rd, flags)) {
+    return false;
+  }
+  freeF32(rs);
+  pushI32(rd);
+  return true;
+}
+
+template <TruncFlags flags>
+bool BaseCompiler::emitTruncateF64ToI32() {
+  RegF64 rs = popF64();
+  RegI32 rd = needI32();
+  if (!truncateF64ToI32(rs, rd, flags)) {
+    return false;
+  }
+  freeF64(rs);
+  pushI32(rd);
+  return true;
+}
+
+#ifndef RABALDR_FLOAT_TO_I64_CALLOUT
+template <TruncFlags flags>
+bool BaseCompiler::emitTruncateF32ToI64() {
+  RegF32 rs = popF32();
+  RegI64 rd = needI64();
+  RegF64 temp = needTempForFloatingToI64(flags);
+  if (!truncateF32ToI64(rs, rd, flags, temp)) {
+    return false;
+  }
+  maybeFreeF64(temp);
+  freeF32(rs);
+  pushI64(rd);
+  return true;
+}
+
+template <TruncFlags flags>
+bool BaseCompiler::emitTruncateF64ToI64() {
+  RegF64 rs = popF64();
+  RegI64 rd = needI64();
+  RegF64 temp = needTempForFloatingToI64(flags);
+  if (!truncateF64ToI64(rs, rd, flags, temp)) {
+    return false;
+  }
+  maybeFreeF64(temp);
+  freeF64(rs);
+  pushI64(rd);
+  return true;
+}
+#endif  // RABALDR_FLOAT_TO_I64_CALLOUT
+
+void BaseCompiler::emitWrapI64ToI32() {
+  RegI64 rs = popI64();
+  RegI32 rd = fromI64(rs);
+  masm.move64To32(rs, rd);
+  freeI64Except(rs, rd);
+  pushI32(rd);
+}
+
+void BaseCompiler::emitExtendI32_8() {
+  RegI32 r = popI32();
+#ifdef JS_CODEGEN_X86
+  if (!ra.isSingleByteI32(r)) {
+    ScratchI8 scratch(*this);
+    moveI32(r, scratch);
+    masm.move8SignExtend(scratch, r);
+    pushI32(r);
+    return;
+  }
+#endif
+  masm.move8SignExtend(r, r);
+  pushI32(r);
+}
+
+void BaseCompiler::emitExtendI32_16() {
+  RegI32 r = popI32();
+  masm.move16SignExtend(r, r);
+  pushI32(r);
+}
+
+void BaseCompiler::emitExtendI64_8() {
+  RegI64 r;
+  popI64ForSignExtendI64(&r);
+  masm.move8To64SignExtend(lowPart(r), r);
+  pushI64(r);
+}
+
+void BaseCompiler::emitExtendI64_16() {
+  RegI64 r;
+  popI64ForSignExtendI64(&r);
+  masm.move16To64SignExtend(lowPart(r), r);
+  pushI64(r);
+}
+
+void BaseCompiler::emitExtendI64_32() {
+  RegI64 r;
+  popI64ForSignExtendI64(&r);
+  masm.move32To64SignExtend(lowPart(r), r);
+  pushI64(r);
+}
+
+void BaseCompiler::emitExtendI32ToI64() {
+  RegI64 r;
+  popI32ForSignExtendI64(&r);
+  masm.move32To64SignExtend(lowPart(r), r);
+  pushI64(r);
+}
+
+void BaseCompiler::emitExtendU32ToI64() {
+  RegI32 rs = popI32();
+  RegI64 rd = widenI32(rs);
+  masm.move32To64ZeroExtend(rs, rd);
+  pushI64(rd);
+}
+
+void BaseCompiler::emitReinterpretF32AsI32() {
+  RegF32 rs = popF32();
+  RegI32 rd = needI32();
+  masm.moveFloat32ToGPR(rs, rd);
+  freeF32(rs);
+  pushI32(rd);
+}
+
+void BaseCompiler::emitReinterpretF64AsI64() {
+  RegF64 rs = popF64();
+  RegI64 rd = needI64();
+  masm.moveDoubleToGPR64(rs, rd);
+  freeF64(rs);
+  pushI64(rd);
+}
+
+void BaseCompiler::emitConvertF64ToF32() {
+  RegF64 rs = popF64();
+  RegF32 rd = needF32();
+  masm.convertDoubleToFloat32(rs, rd);
+  freeF64(rs);
+  pushF32(rd);
+}
+
+void BaseCompiler::emitConvertI32ToF32() {
+  RegI32 rs = popI32();
+  RegF32 rd = needF32();
+  masm.convertInt32ToFloat32(rs, rd);
+  freeI32(rs);
+  pushF32(rd);
+}
+
+void BaseCompiler::emitConvertU32ToF32() {
+  RegI32 rs = popI32();
+  RegF32 rd = needF32();
+  masm.convertUInt32ToFloat32(rs, rd);
+  freeI32(rs);
+  pushF32(rd);
+}
+
+#ifndef RABALDR_I64_TO_FLOAT_CALLOUT
+void BaseCompiler::emitConvertI64ToF32() {
+  RegI64 rs = popI64();
+  RegF32 rd = needF32();
+  convertI64ToF32(rs, IsUnsigned(false), rd, RegI32());
+  freeI64(rs);
+  pushF32(rd);
+}
+
+void BaseCompiler::emitConvertU64ToF32() {
+  RegI64 rs = popI64();
+  RegF32 rd = needF32();
+  RegI32 temp = needConvertI64ToFloatTemp(ValType::F32, IsUnsigned(true));
+  convertI64ToF32(rs, IsUnsigned(true), rd, temp);
+  maybeFreeI32(temp);
+  freeI64(rs);
+  pushF32(rd);
+}
+#endif
+
+void BaseCompiler::emitConvertF32ToF64() {
+  RegF32 rs = popF32();
+  RegF64 rd = needF64();
+  masm.convertFloat32ToDouble(rs, rd);
+  freeF32(rs);
+  pushF64(rd);
+}
+
+void BaseCompiler::emitConvertI32ToF64() {
+  RegI32 rs = popI32();
+  RegF64 rd = needF64();
+  masm.convertInt32ToDouble(rs, rd);
+  freeI32(rs);
+  pushF64(rd);
+}
+
+void BaseCompiler::emitConvertU32ToF64() {
+  RegI32 rs = popI32();
+  RegF64 rd = needF64();
+  masm.convertUInt32ToDouble(rs, rd);
+  freeI32(rs);
+  pushF64(rd);
+}
+
+#ifndef RABALDR_I64_TO_FLOAT_CALLOUT
+void BaseCompiler::emitConvertI64ToF64() {
+  RegI64 rs = popI64();
+  RegF64 rd = needF64();
+  convertI64ToF64(rs, IsUnsigned(false), rd, RegI32());
+  freeI64(rs);
+  pushF64(rd);
+}
+
+void BaseCompiler::emitConvertU64ToF64() {
+  RegI64 rs = popI64();
+  RegF64 rd = needF64();
+  RegI32 temp = needConvertI64ToFloatTemp(ValType::F64, IsUnsigned(true));
+  convertI64ToF64(rs, IsUnsigned(true), rd, temp);
+  maybeFreeI32(temp);
+  freeI64(rs);
+  pushF64(rd);
+}
+#endif  // RABALDR_I64_TO_FLOAT_CALLOUT
+
+void BaseCompiler::emitReinterpretI32AsF32() {
+  RegI32 rs = popI32();
+  RegF32 rd = needF32();
+  masm.moveGPRToFloat32(rs, rd);
+  freeI32(rs);
+  pushF32(rd);
+}
+
+void BaseCompiler::emitReinterpretI64AsF64() {
+  RegI64 rs = popI64();
+  RegF64 rd = needF64();
+  masm.moveGPR64ToDouble(rs, rd);
+  freeI64(rs);
+  pushF64(rd);
+}
+
+template <typename Cond>
+bool BaseCompiler::sniffConditionalControlCmp(Cond compareOp,
+                                              ValType operandType) {
+  MOZ_ASSERT(latentOp_ == LatentOp::None,
+             "Latent comparison state not properly reset");
+
+#ifdef JS_CODEGEN_X86
+  // On x86, latent i64 binary comparisons use too many registers: the
+  // reserved join register and the lhs and rhs operands require six, but we
+  // only have five.
+  if (operandType == ValType::I64) {
+    return false;
+  }
+#endif
+
+  // No optimization for pointer compares yet.
+  if (operandType.isReference()) {
+    return false;
+  }
+
+  OpBytes op;
+  iter_.peekOp(&op);
+  switch (op.b0) {
+    case uint16_t(Op::BrIf):
+    case uint16_t(Op::If):
+    case uint16_t(Op::SelectNumeric):
+    case uint16_t(Op::SelectTyped):
+      setLatentCompare(compareOp, operandType);
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool BaseCompiler::sniffConditionalControlEqz(ValType operandType) {
+  MOZ_ASSERT(latentOp_ == LatentOp::None,
+             "Latent comparison state not properly reset");
+
+  OpBytes op;
+  iter_.peekOp(&op);
+  switch (op.b0) {
+    case uint16_t(Op::BrIf):
+    case uint16_t(Op::SelectNumeric):
+    case uint16_t(Op::SelectTyped):
+    case uint16_t(Op::If):
+      setLatentEqz(operandType);
+      return true;
+    default:
+      return false;
+  }
+}
+
+void BaseCompiler::emitBranchSetup(BranchState* b) {
+  // Avoid allocating operands to latentOp_ to result registers.
+  if (b->hasBlockResults()) {
+    needResultRegisters(b->resultType);
+  }
+
+  // Set up fields so that emitBranchPerform() need not switch on latentOp_.
+  switch (latentOp_) {
+    case LatentOp::None: {
+      latentIntCmp_ = Assembler::NotEqual;
+      latentType_ = ValType::I32;
+      b->i32.lhs = popI32();
+      b->i32.rhsImm = true;
+      b->i32.imm = 0;
+      break;
+    }
+    case LatentOp::Compare: {
+      switch (latentType_.kind()) {
+        case ValType::I32: {
+          if (popConstI32(&b->i32.imm)) {
+            b->i32.lhs = popI32();
+            b->i32.rhsImm = true;
+          } else {
+            pop2xI32(&b->i32.lhs, &b->i32.rhs);
+            b->i32.rhsImm = false;
+          }
+          break;
+        }
+        case ValType::I64: {
+          pop2xI64(&b->i64.lhs, &b->i64.rhs);
+          b->i64.rhsImm = false;
+          break;
+        }
+        case ValType::F32: {
+          pop2xF32(&b->f32.lhs, &b->f32.rhs);
+          break;
+        }
+        case ValType::F64: {
+          pop2xF64(&b->f64.lhs, &b->f64.rhs);
+          break;
+        }
+        default: {
+          MOZ_CRASH("Unexpected type for LatentOp::Compare");
+        }
+      }
+      break;
+    }
+    case LatentOp::Eqz: {
+      switch (latentType_.kind()) {
+        case ValType::I32: {
+          latentIntCmp_ = Assembler::Equal;
+          b->i32.lhs = popI32();
+          b->i32.rhsImm = true;
+          b->i32.imm = 0;
+          break;
+        }
+        case ValType::I64: {
+          latentIntCmp_ = Assembler::Equal;
+          b->i64.lhs = popI64();
+          b->i64.rhsImm = true;
+          b->i64.imm = 0;
+          break;
+        }
+        default: {
+          MOZ_CRASH("Unexpected type for LatentOp::Eqz");
+        }
+      }
+      break;
+    }
+  }
+
+  if (b->hasBlockResults()) {
+    freeResultRegisters(b->resultType);
+  }
+}
+
+bool BaseCompiler::emitBranchPerform(BranchState* b) {
+  switch (latentType_.kind()) {
+    case ValType::I32: {
+      if (b->i32.rhsImm) {
+        if (!jumpConditionalWithResults(b, latentIntCmp_, b->i32.lhs,
+                                        Imm32(b->i32.imm))) {
+          return false;
+        }
+      } else {
+        if (!jumpConditionalWithResults(b, latentIntCmp_, b->i32.lhs,
+                                        b->i32.rhs)) {
+          return false;
+        }
+        freeI32(b->i32.rhs);
+      }
+      freeI32(b->i32.lhs);
+      break;
+    }
+    case ValType::I64: {
+      if (b->i64.rhsImm) {
+        if (!jumpConditionalWithResults(b, latentIntCmp_, b->i64.lhs,
+                                        Imm64(b->i64.imm))) {
+          return false;
+        }
+      } else {
+        if (!jumpConditionalWithResults(b, latentIntCmp_, b->i64.lhs,
+                                        b->i64.rhs)) {
+          return false;
+        }
+        freeI64(b->i64.rhs);
+      }
+      freeI64(b->i64.lhs);
+      break;
+    }
+    case ValType::F32: {
+      if (!jumpConditionalWithResults(b, latentDoubleCmp_, b->f32.lhs,
+                                      b->f32.rhs)) {
+        return false;
+      }
+      freeF32(b->f32.lhs);
+      freeF32(b->f32.rhs);
+      break;
+    }
+    case ValType::F64: {
+      if (!jumpConditionalWithResults(b, latentDoubleCmp_, b->f64.lhs,
+                                      b->f64.rhs)) {
+        return false;
+      }
+      freeF64(b->f64.lhs);
+      freeF64(b->f64.rhs);
+      break;
+    }
+    default: {
+      MOZ_CRASH("Unexpected type for LatentOp::Compare");
+    }
+  }
+  resetLatentOp();
+  return true;
+}
+
+// For blocks and loops and ifs:
+//
+//  - Sync the value stack before going into the block in order to simplify exit
+//    from the block: all exits from the block can assume that there are no
+//    live registers except the one carrying the exit value.
+//  - The block can accumulate a number of dead values on the stacks, so when
+//    branching out of the block or falling out at the end be sure to
+//    pop the appropriate stacks back to where they were on entry, while
+//    preserving the exit value.
+//  - A continue branch in a loop is much like an exit branch, but the branch
+//    value must not be preserved.
+//  - The exit value is always in a designated join register (type dependent).
+
+bool BaseCompiler::emitBlock() {
+  ResultType params;
+  if (!iter_.readBlock(&params)) {
+    return false;
+  }
+
+  if (!deadCode_) {
+    sync();  // Simplifies branching out from block
+  }
+
+  initControl(controlItem(), params);
+
+  return true;
+}
+
+bool BaseCompiler::endBlock(ResultType type) {
+  Control& block = controlItem();
+
+  if (deadCode_) {
+    // Block does not fall through; reset stack.
+    fr.resetStackHeight(block.stackHeight, type);
+    popValueStackTo(block.stackSize);
+  } else {
+    // If the block label is used, we have a control join, so we need to shuffle
+    // fallthrough values into place.  Otherwise if it's not a control join, we
+    // can leave the value stack alone.
+    MOZ_ASSERT(stk_.length() == block.stackSize + type.length());
+    if (block.label.used()) {
+      popBlockResults(type, block.stackHeight, ContinuationKind::Fallthrough);
+    }
+    block.bceSafeOnExit &= bceSafe_;
+  }
+
+  // Bind after cleanup: branches out will have popped the stack.
+  if (block.label.used()) {
+    masm.bind(&block.label);
+    if (deadCode_) {
+      captureResultRegisters(type);
+      deadCode_ = false;
+    }
+    if (!pushBlockResults(type)) {
+      return false;
+    }
+  }
+
+  bceSafe_ = block.bceSafeOnExit;
+
+  return true;
+}
+
+bool BaseCompiler::emitLoop() {
+  ResultType params;
+  if (!iter_.readLoop(&params)) {
+    return false;
+  }
+
+  if (!deadCode_) {
+    sync();  // Simplifies branching out from block
+  }
+
+  initControl(controlItem(), params);
+  bceSafe_ = 0;
+
+  if (!deadCode_) {
+    // Loop entry is a control join, so shuffle the entry parameters into the
+    // well-known locations.
+    if (!topBlockParams(params)) {
+      return false;
+    }
+    masm.nopAlign(CodeAlignment);
+    masm.bind(&controlItem(0).label);
+    // The interrupt check barfs if there are live registers.
+    sync();
+    if (!addInterruptCheck()) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// The bodies of the "then" and "else" arms can be arbitrary sequences
+// of expressions, they push control and increment the nesting and can
+// even be targeted by jumps.  A branch to the "if" block branches to
+// the exit of the if, ie, it's like "break".  Consider:
+//
+//      (func (result i32)
+//       (if (i32.const 1)
+//           (begin (br 1) (unreachable))
+//           (begin (unreachable)))
+//       (i32.const 1))
+//
+// The branch causes neither of the unreachable expressions to be
+// evaluated.
+
+bool BaseCompiler::emitIf() {
+  ResultType params;
+  Nothing unused_cond;
+  if (!iter_.readIf(&params, &unused_cond)) {
+    return false;
+  }
+
+  BranchState b(&controlItem().otherLabel, InvertBranch(true));
+  if (!deadCode_) {
+    needResultRegisters(params);
+    emitBranchSetup(&b);
+    freeResultRegisters(params);
+    sync();
+  } else {
+    resetLatentOp();
+  }
+
+  initControl(controlItem(), params);
+
+  if (!deadCode_) {
+    // Because params can flow immediately to results in the case of an empty
+    // "then" or "else" block, and the result of an if/then is a join in
+    // general, we shuffle params eagerly to the result allocations.
+    if (!topBlockParams(params)) {
+      return false;
+    }
+    if (!emitBranchPerform(&b)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool BaseCompiler::endIfThen(ResultType type) {
+  Control& ifThen = controlItem();
+
+  // The parameters to the "if" logically flow to both the "then" and "else"
+  // blocks, but the "else" block is empty.  Since we know that the "if"
+  // type-checks, that means that the "else" parameters are the "else" results,
+  // and that the "if"'s result type is the same as its parameter type.
+
+  if (deadCode_) {
+    // "then" arm does not fall through; reset stack.
+    fr.resetStackHeight(ifThen.stackHeight, type);
+    popValueStackTo(ifThen.stackSize);
+    if (!ifThen.deadOnArrival) {
+      captureResultRegisters(type);
+    }
+  } else {
+    MOZ_ASSERT(stk_.length() == ifThen.stackSize + type.length());
+    // Assume we have a control join, so place results in block result
+    // allocations.
+    popBlockResults(type, ifThen.stackHeight, ContinuationKind::Fallthrough);
+    MOZ_ASSERT(!ifThen.deadOnArrival);
+  }
+
+  if (ifThen.otherLabel.used()) {
+    masm.bind(&ifThen.otherLabel);
+  }
+
+  if (ifThen.label.used()) {
+    masm.bind(&ifThen.label);
+  }
+
+  if (!deadCode_) {
+    ifThen.bceSafeOnExit &= bceSafe_;
+  }
+
+  deadCode_ = ifThen.deadOnArrival;
+  if (!deadCode_) {
+    if (!pushBlockResults(type)) {
+      return false;
+    }
+  }
+
+  bceSafe_ = ifThen.bceSafeOnExit & ifThen.bceSafeOnEntry;
+
+  return true;
+}
+
+bool BaseCompiler::emitElse() {
+  ResultType params, results;
+  NothingVector unused_thenValues;
+
+  if (!iter_.readElse(&params, &results, &unused_thenValues)) {
+    return false;
+  }
+
+  Control& ifThenElse = controlItem(0);
+
+  // See comment in endIfThenElse, below.
+
+  // Exit the "then" branch.
+
+  ifThenElse.deadThenBranch = deadCode_;
+
+  if (deadCode_) {
+    fr.resetStackHeight(ifThenElse.stackHeight, results);
+    popValueStackTo(ifThenElse.stackSize);
+  } else {
+    MOZ_ASSERT(stk_.length() == ifThenElse.stackSize + results.length());
+    popBlockResults(results, ifThenElse.stackHeight, ContinuationKind::Jump);
+    freeResultRegisters(results);
+    MOZ_ASSERT(!ifThenElse.deadOnArrival);
+  }
+
+  if (!deadCode_) {
+    masm.jump(&ifThenElse.label);
+  }
+
+  if (ifThenElse.otherLabel.used()) {
+    masm.bind(&ifThenElse.otherLabel);
+  }
+
+  // Reset to the "else" branch.
+
+  if (!deadCode_) {
+    ifThenElse.bceSafeOnExit &= bceSafe_;
+  }
+
+  deadCode_ = ifThenElse.deadOnArrival;
+  bceSafe_ = ifThenElse.bceSafeOnEntry;
+
+  fr.resetStackHeight(ifThenElse.stackHeight, params);
+
+  if (!deadCode_) {
+    captureResultRegisters(params);
+    if (!pushBlockResults(params)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool BaseCompiler::endIfThenElse(ResultType type) {
+  Control& ifThenElse = controlItem();
+
+  // The expression type is not a reliable guide to what we'll find
+  // on the stack, we could have (if E (i32.const 1) (unreachable))
+  // in which case the "else" arm is AnyType but the type of the
+  // full expression is I32.  So restore whatever's there, not what
+  // we want to find there.  The "then" arm has the same constraint.
+
+  if (deadCode_) {
+    // "then" arm does not fall through; reset stack.
+    fr.resetStackHeight(ifThenElse.stackHeight, type);
+    popValueStackTo(ifThenElse.stackSize);
+  } else {
+    MOZ_ASSERT(stk_.length() == ifThenElse.stackSize + type.length());
+    // Assume we have a control join, so place results in block result
+    // allocations.
+    popBlockResults(type, ifThenElse.stackHeight,
+                    ContinuationKind::Fallthrough);
+    ifThenElse.bceSafeOnExit &= bceSafe_;
+    MOZ_ASSERT(!ifThenElse.deadOnArrival);
+  }
+
+  if (ifThenElse.label.used()) {
+    masm.bind(&ifThenElse.label);
+  }
+
+  bool joinLive =
+      !ifThenElse.deadOnArrival &&
+      (!ifThenElse.deadThenBranch || !deadCode_ || ifThenElse.label.bound());
+
+  if (joinLive) {
+    // No values were provided by the "then" path, but capture the values
+    // provided by the "else" path.
+    if (deadCode_) {
+      captureResultRegisters(type);
+    }
+    deadCode_ = false;
+  }
+
+  bceSafe_ = ifThenElse.bceSafeOnExit;
+
+  if (!deadCode_) {
+    if (!pushBlockResults(type)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool BaseCompiler::emitEnd() {
+  LabelKind kind;
+  ResultType type;
+  NothingVector unused_values;
+  if (!iter_.readEnd(&kind, &type, &unused_values, &unused_values)) {
+    return false;
+  }
+
+  switch (kind) {
+    case LabelKind::Body:
+      if (!endBlock(type)) {
+        return false;
+      }
+      doReturn(ContinuationKind::Fallthrough);
+      iter_.popEnd();
+      MOZ_ASSERT(iter_.controlStackEmpty());
+      return iter_.readFunctionEnd(iter_.end());
+    case LabelKind::Block:
+      if (!endBlock(type)) {
+        return false;
+      }
+      break;
+    case LabelKind::Loop:
+      // The end of a loop isn't a branch target, so we can just leave its
+      // results on the expression stack to be consumed by the outer block.
+      break;
+    case LabelKind::Then:
+      if (!endIfThen(type)) {
+        return false;
+      }
+      break;
+    case LabelKind::Else:
+      if (!endIfThenElse(type)) {
+        return false;
+      }
+      break;
+#ifdef ENABLE_WASM_EXCEPTIONS
+    case LabelKind::Try:
+      MOZ_CRASH("NYI");
+      break;
+    case LabelKind::Catch:
+      MOZ_CRASH("NYI");
+      break;
+#endif
+  }
+
+  iter_.popEnd();
+
+  return true;
+}
+
+bool BaseCompiler::emitBr() {
+  uint32_t relativeDepth;
+  ResultType type;
+  NothingVector unused_values;
+  if (!iter_.readBr(&relativeDepth, &type, &unused_values)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  Control& target = controlItem(relativeDepth);
+  target.bceSafeOnExit &= bceSafe_;
+
+  // Save any values in the designated join registers, as if the target block
+  // returned normally.
+
+  popBlockResults(type, target.stackHeight, ContinuationKind::Jump);
+  masm.jump(&target.label);
+
+  // The registers holding the join values are free for the remainder of this
+  // block.
+
+  freeResultRegisters(type);
+
+  deadCode_ = true;
+
+  return true;
+}
+
+bool BaseCompiler::emitBrIf() {
+  uint32_t relativeDepth;
+  ResultType type;
+  NothingVector unused_values;
+  Nothing unused_condition;
+  if (!iter_.readBrIf(&relativeDepth, &type, &unused_values,
+                      &unused_condition)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    resetLatentOp();
+    return true;
+  }
+
+  Control& target = controlItem(relativeDepth);
+  target.bceSafeOnExit &= bceSafe_;
+
+  BranchState b(&target.label, target.stackHeight, InvertBranch(false), type);
+  emitBranchSetup(&b);
+  return emitBranchPerform(&b);
+}
+
+#ifdef ENABLE_WASM_FUNCTION_REFERENCES
+bool BaseCompiler::emitBrOnNull() {
+  MOZ_ASSERT(!hasLatentOp());
+
+  uint32_t relativeDepth;
+  ResultType type;
+  NothingVector unused_values;
+  Nothing unused_condition;
+  if (!iter_.readBrOnNull(&relativeDepth, &type, &unused_values,
+                          &unused_condition)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  Control& target = controlItem(relativeDepth);
+  target.bceSafeOnExit &= bceSafe_;
+
+  BranchState b(&target.label, target.stackHeight, InvertBranch(false), type);
+  if (b.hasBlockResults()) {
+    needResultRegisters(b.resultType);
+  }
+  RegPtr rp = popRef();
+  if (b.hasBlockResults()) {
+    freeResultRegisters(b.resultType);
+  }
+  if (!jumpConditionalWithResults(&b, Assembler::Equal, rp,
+                                  ImmWord(NULLREF_VALUE))) {
+    return false;
+  }
+  pushRef(rp);
+
+  return true;
+}
+#endif
+
+bool BaseCompiler::emitBrTable() {
+  Uint32Vector depths;
+  uint32_t defaultDepth;
+  ResultType branchParams;
+  NothingVector unused_values;
+  Nothing unused_index;
+  // N.B., `branchParams' gets set to the type of the default branch target.  In
+  // the presence of subtyping, it could be that the different branch targets
+  // have different types.  Here we rely on the assumption that the value
+  // representations (e.g. Stk value types) of all branch target types are the
+  // same, in the baseline compiler.  Notably, this means that all Ref types
+  // should be represented the same.
+  if (!iter_.readBrTable(&depths, &defaultDepth, &branchParams, &unused_values,
+                         &unused_index)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  // Don't use param registers for rc
+  needIntegerResultRegisters(branchParams);
+
+  // Table switch value always on top.
+  RegI32 rc = popI32();
+
+  freeIntegerResultRegisters(branchParams);
+
+  StackHeight resultsBase(0);
+  if (!topBranchParams(branchParams, &resultsBase)) {
+    return false;
+  }
+
+  Label dispatchCode;
+  masm.branch32(Assembler::Below, rc, Imm32(depths.length()), &dispatchCode);
+
+  // This is the out-of-range stub.  rc is dead here but we don't need it.
+
+  shuffleStackResultsBeforeBranch(
+      resultsBase, controlItem(defaultDepth).stackHeight, branchParams);
+  controlItem(defaultDepth).bceSafeOnExit &= bceSafe_;
+  masm.jump(&controlItem(defaultDepth).label);
+
+  // Emit stubs.  rc is dead in all of these but we don't need it.
+  //
+  // The labels in the vector are in the TempAllocator and will
+  // be freed by and by.
+  //
+  // TODO / OPTIMIZE (Bug 1316804): Branch directly to the case code if we
+  // can, don't emit an intermediate stub.
+
+  LabelVector stubs;
+  if (!stubs.reserve(depths.length())) {
+    return false;
+  }
+
+  for (uint32_t depth : depths) {
+    stubs.infallibleEmplaceBack(NonAssertingLabel());
+    masm.bind(&stubs.back());
+    shuffleStackResultsBeforeBranch(resultsBase, controlItem(depth).stackHeight,
+                                    branchParams);
+    controlItem(depth).bceSafeOnExit &= bceSafe_;
+    masm.jump(&controlItem(depth).label);
+  }
+
+  // Emit table.
+
+  Label theTable;
+  jumpTable(stubs, &theTable);
+
+  // Emit indirect jump.  rc is live here.
+
+  tableSwitch(&theTable, rc, &dispatchCode);
+
+  deadCode_ = true;
+
+  // Clean up.
+
+  freeI32(rc);
+  popValueStackBy(branchParams.length());
+
+  return true;
+}
+
+#ifdef ENABLE_WASM_EXCEPTIONS
+bool BaseCompiler::emitTry() {
+  ResultType params;
+  if (!iter_.readTry(&params)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MOZ_CRASH("NYI");
+}
+
+bool BaseCompiler::emitCatch() {
+  LabelKind kind;
+  uint32_t eventIndex;
+  ResultType paramType, resultType;
+  NothingVector unused_tryValues;
+
+  if (!iter_.readCatch(&kind, &eventIndex, &paramType, &resultType,
+                       &unused_tryValues)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MOZ_CRASH("NYI");
+}
+
+bool BaseCompiler::emitThrow() {
+  uint32_t exnIndex;
+  NothingVector unused_argValues;
+
+  if (!iter_.readThrow(&exnIndex, &unused_argValues)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MOZ_CRASH("NYI");
+}
+#endif
+
+bool BaseCompiler::emitDrop() {
+  if (!iter_.readDrop()) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  dropValue();
+  return true;
+}
+
+void BaseCompiler::doReturn(ContinuationKind kind) {
+  if (deadCode_) {
+    return;
+  }
+
+  StackHeight height = controlOutermost().stackHeight;
+  ResultType type = ResultType::Vector(funcType().results());
+  popBlockResults(type, height, kind);
+  masm.jump(&returnLabel_);
+  freeResultRegisters(type);
+}
+
+bool BaseCompiler::emitReturn() {
+  NothingVector unused_values;
+  if (!iter_.readReturn(&unused_values)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  doReturn(ContinuationKind::Jump);
+  deadCode_ = true;
+
+  return true;
+}
+
+bool BaseCompiler::emitCallArgs(const ValTypeVector& argTypes,
+                                const StackResultsLoc& results,
+                                FunctionCall* baselineCall,
+                                CalleeOnStack calleeOnStack) {
+  MOZ_ASSERT(!deadCode_);
+
+  ArgTypeVector args(argTypes, results.stackResults());
+  uint32_t naturalArgCount = argTypes.length();
+  uint32_t abiArgCount = args.lengthWithStackResults();
+  startCallArgs(StackArgAreaSizeUnaligned(args), baselineCall);
+
+  // Args are deeper on the stack than the stack result area, if any.
+  size_t argsDepth = results.count();
+  // They're deeper than the callee too, for callIndirect.
+  if (calleeOnStack == CalleeOnStack::True) {
+    argsDepth++;
+  }
+
+  for (size_t i = 0; i < abiArgCount; ++i) {
+    if (args.isNaturalArg(i)) {
+      size_t naturalIndex = args.naturalIndex(i);
+      size_t stackIndex = naturalArgCount - 1 - naturalIndex + argsDepth;
+      passArg(argTypes[naturalIndex], peek(stackIndex), baselineCall);
+    } else {
+      // The synthetic stack result area pointer.
+      ABIArg argLoc = baselineCall->abi.next(MIRType::Pointer);
+      if (argLoc.kind() == ABIArg::Stack) {
+        ScratchPtr scratch(*this);
+        fr.computeOutgoingStackResultAreaPtr(results, scratch);
+        masm.storePtr(scratch, Address(masm.getStackPointer(),
+                                       argLoc.offsetFromArgBase()));
+      } else {
+        fr.computeOutgoingStackResultAreaPtr(results, RegPtr(argLoc.gpr()));
+      }
+    }
+  }
+
+  fr.loadTlsPtr(WasmTlsReg);
+  return true;
+}
+
+void BaseCompiler::pushReturnValueOfCall(const FunctionCall& call,
+                                         MIRType type) {
+  switch (type) {
+    case MIRType::Int32: {
+      RegI32 rv = captureReturnedI32();
+      pushI32(rv);
+      break;
+    }
+    case MIRType::Int64: {
+      RegI64 rv = captureReturnedI64();
+      pushI64(rv);
+      break;
+    }
+    case MIRType::Float32: {
+      RegF32 rv = captureReturnedF32(call);
+      pushF32(rv);
+      break;
+    }
+    case MIRType::Double: {
+      RegF64 rv = captureReturnedF64(call);
+      pushF64(rv);
+      break;
+    }
+#ifdef ENABLE_WASM_SIMD
+    case MIRType::Simd128: {
+      RegV128 rv = captureReturnedV128(call);
+      pushV128(rv);
+      break;
+    }
+#endif
+    case MIRType::RefOrNull: {
+      RegPtr rv = captureReturnedRef();
+      pushRef(rv);
+      break;
+    }
+    default:
+      // In particular, passing |type| as MIRType::Void or MIRType::Pointer to
+      // this function is an error.
+      MOZ_CRASH("Function return type");
+  }
+}
+
+bool BaseCompiler::pushStackResultsForCall(const ResultType& type, RegPtr temp,
+                                           StackResultsLoc* loc) {
+  if (!ABIResultIter::HasStackResults(type)) {
+    return true;
+  }
+
+  // This method is the only one in the class that can increase stk_.length() by
+  // an unbounded amount, so it's the only one that requires an allocation.
+  // (The general case is handled in emitBody.)
+  if (!stk_.reserve(stk_.length() + type.length())) {
+    return false;
+  }
+
+  // Measure stack results.
+  ABIResultIter i(type);
+  size_t count = 0;
+  for (; !i.done(); i.next()) {
+    if (i.cur().onStack()) {
+      count++;
+    }
+  }
+  uint32_t bytes = i.stackBytesConsumedSoFar();
+
+  // Reserve space for the stack results.
+  StackHeight resultsBase = fr.stackHeight();
+  uint32_t height = fr.prepareStackResultArea(resultsBase, bytes);
+
+  // Push Stk values onto the value stack, and zero out Ref values.
+  for (i.switchToPrev(); !i.done(); i.prev()) {
+    const ABIResult& result = i.cur();
+    if (result.onStack()) {
+      Stk v = captureStackResult(result, resultsBase, bytes);
+      push(v);
+      if (v.kind() == Stk::MemRef) {
+        stackMapGenerator_.memRefsOnStk++;
+        fr.storeImmediatePtrToStack(intptr_t(0), v.offs(), temp);
+      }
+    }
+  }
+
+  *loc = StackResultsLoc(bytes, count, height);
+
+  return true;
+}
+
+// After a call, some results may be written to the stack result locations that
+// are pushed on the machine stack after any stack args.  If there are stack
+// args and stack results, these results need to be shuffled down, as the args
+// are "consumed" by the call.
+void BaseCompiler::popStackResultsAfterCall(const StackResultsLoc& results,
+                                            uint32_t stackArgBytes) {
+  if (results.bytes() != 0) {
+    popValueStackBy(results.count());
+    if (stackArgBytes != 0) {
+      uint32_t srcHeight = results.height();
+      MOZ_ASSERT(srcHeight >= stackArgBytes + results.bytes());
+      uint32_t destHeight = srcHeight - stackArgBytes;
+
+      fr.shuffleStackResultsTowardFP(srcHeight, destHeight, results.bytes(),
+                                     ABINonArgReturnVolatileReg);
+    }
+  }
+}
+
+// For now, always sync() at the beginning of the call to easily save live
+// values.
+//
+// TODO / OPTIMIZE (Bug 1316806): We may be able to avoid a full sync(), since
+// all we want is to save live registers that won't be saved by the callee or
+// that we need for outgoing args - we don't need to sync the locals.  We can
+// just push the necessary registers, it'll be like a lightweight sync.
+//
+// Even some of the pushing may be unnecessary if the registers will be consumed
+// by the call, because then what we want is parallel assignment to the argument
+// registers or onto the stack for outgoing arguments.  A sync() is just
+// simpler.
+
+bool BaseCompiler::emitCall() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  uint32_t funcIndex;
+  NothingVector args_;
+  if (!iter_.readCall(&funcIndex, &args_)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  sync();
+
+  const FuncType& funcType = *moduleEnv_.funcs[funcIndex].type;
+  bool import = moduleEnv_.funcIsImport(funcIndex);
+
+  uint32_t numArgs = funcType.args().length();
+  size_t stackArgBytes = stackConsumed(numArgs);
+
+  ResultType resultType(ResultType::Vector(funcType.results()));
+  StackResultsLoc results;
+  if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) {
+    return false;
+  }
+
+  FunctionCall baselineCall(lineOrBytecode);
+  beginCall(baselineCall, UseABI::Wasm,
+            import ? InterModule::True : InterModule::False);
+
+  if (!emitCallArgs(funcType.args(), results, &baselineCall,
+                    CalleeOnStack::False)) {
+    return false;
+  }
+
+  CodeOffset raOffset;
+  if (import) {
+    raOffset = callImport(moduleEnv_.funcImportGlobalDataOffsets[funcIndex],
+                          baselineCall);
+  } else {
+    raOffset = callDefinition(funcIndex, baselineCall);
+  }
+
+  if (!createStackMap("emitCall", raOffset)) {
+    return false;
+  }
+
+  popStackResultsAfterCall(results, stackArgBytes);
+
+  endCall(baselineCall, stackArgBytes);
+
+  popValueStackBy(numArgs);
+
+  captureCallResultRegisters(resultType);
+  return pushCallResults(baselineCall, resultType, results);
+}
+
+bool BaseCompiler::emitCallIndirect() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  uint32_t funcTypeIndex;
+  uint32_t tableIndex;
+  Nothing callee_;
+  NothingVector args_;
+  if (!iter_.readCallIndirect(&funcTypeIndex, &tableIndex, &callee_, &args_)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  sync();
+
+  const FuncType& funcType = moduleEnv_.types[funcTypeIndex].funcType();
+
+  // Stack: ... arg1 .. argn callee
+
+  uint32_t numArgs = funcType.args().length() + 1;
+  size_t stackArgBytes = stackConsumed(numArgs);
+
+  ResultType resultType(ResultType::Vector(funcType.results()));
+  StackResultsLoc results;
+  if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) {
+    return false;
+  }
+
+  FunctionCall baselineCall(lineOrBytecode);
+  beginCall(baselineCall, UseABI::Wasm, InterModule::True);
+
+  if (!emitCallArgs(funcType.args(), results, &baselineCall,
+                    CalleeOnStack::True)) {
+    return false;
+  }
+
+  const Stk& callee = peek(results.count());
+  CodeOffset raOffset =
+      callIndirect(funcTypeIndex, tableIndex, callee, baselineCall);
+  if (!createStackMap("emitCallIndirect", raOffset)) {
+    return false;
+  }
+
+  popStackResultsAfterCall(results, stackArgBytes);
+
+  endCall(baselineCall, stackArgBytes);
+
+  popValueStackBy(numArgs);
+
+  captureCallResultRegisters(resultType);
+  return pushCallResults(baselineCall, resultType, results);
+}
+
+void BaseCompiler::emitRound(RoundingMode roundingMode, ValType operandType) {
+  if (operandType == ValType::F32) {
+    RegF32 f0 = popF32();
+    roundF32(roundingMode, f0);
+    pushF32(f0);
+  } else if (operandType == ValType::F64) {
+    RegF64 f0 = popF64();
+    roundF64(roundingMode, f0);
+    pushF64(f0);
+  } else {
+    MOZ_CRASH("unexpected type");
+  }
+}
+
+bool BaseCompiler::emitUnaryMathBuiltinCall(SymbolicAddress callee,
+                                            ValType operandType) {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  Nothing operand_;
+  if (!iter_.readUnary(operandType, &operand_)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  RoundingMode roundingMode;
+  if (IsRoundingFunction(callee, &roundingMode) &&
+      supportsRoundInstruction(roundingMode)) {
+    emitRound(roundingMode, operandType);
+    return true;
+  }
+
+  sync();
+
+  ValTypeVector& signature = operandType == ValType::F32 ? SigF_ : SigD_;
+  ValType retType = operandType;
+  uint32_t numArgs = signature.length();
+  size_t stackSpace = stackConsumed(numArgs);
+  StackResultsLoc noStackResults;
+
+  FunctionCall baselineCall(lineOrBytecode);
+  beginCall(baselineCall, UseABI::Builtin, InterModule::False);
+
+  if (!emitCallArgs(signature, noStackResults, &baselineCall,
+                    CalleeOnStack::False)) {
+    return false;
+  }
+
+  CodeOffset raOffset = builtinCall(callee, baselineCall);
+  if (!createStackMap("emitUnaryMathBuiltin[..]", raOffset)) {
+    return false;
+  }
+
+  endCall(baselineCall, stackSpace);
+
+  popValueStackBy(numArgs);
+
+  pushReturnValueOfCall(baselineCall, ToMIRType(retType));
+
+  return true;
+}
+
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+bool BaseCompiler::emitDivOrModI64BuiltinCall(SymbolicAddress callee,
+                                              ValType operandType) {
+  MOZ_ASSERT(operandType == ValType::I64);
+  MOZ_ASSERT(!deadCode_);
+
+  sync();
+
+  needI64(specific_.abiReturnRegI64);
+
+  RegI64 rhs = popI64();
+  RegI64 srcDest = popI64ToSpecific(specific_.abiReturnRegI64);
+
+  Label done;
+
+  checkDivideByZeroI64(rhs);
+
+  if (callee == SymbolicAddress::DivI64) {
+    checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false));
+  } else if (callee == SymbolicAddress::ModI64) {
+    checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true));
+  }
+
+  masm.setupWasmABICall();
+  masm.passABIArg(srcDest.high);
+  masm.passABIArg(srcDest.low);
+  masm.passABIArg(rhs.high);
+  masm.passABIArg(rhs.low);
+  CodeOffset raOffset = masm.callWithABI(bytecodeOffset(), callee,
+                                         mozilla::Some(fr.getTlsPtrOffset()));
+  if (!createStackMap("emitDivOrModI64Bui[..]", raOffset)) {
+    return false;
+  }
+
+  masm.bind(&done);
+
+  freeI64(rhs);
+  pushI64(srcDest);
+  return true;
+}
+#endif  // RABALDR_INT_DIV_I64_CALLOUT
+
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+bool BaseCompiler::emitConvertInt64ToFloatingCallout(SymbolicAddress callee,
+                                                     ValType operandType,
+                                                     ValType resultType) {
+  sync();
+
+  RegI64 input = popI64();
+
+  FunctionCall call(0);
+
+  masm.setupWasmABICall();
+#  ifdef JS_PUNBOX64
+  MOZ_CRASH("BaseCompiler platform hook: emitConvertInt64ToFloatingCallout");
+#  else
+  masm.passABIArg(input.high);
+  masm.passABIArg(input.low);
+#  endif
+  CodeOffset raOffset = masm.callWithABI(
+      bytecodeOffset(), callee, mozilla::Some(fr.getTlsPtrOffset()),
+      resultType == ValType::F32 ? MoveOp::FLOAT32 : MoveOp::DOUBLE);
+  if (!createStackMap("emitConvertInt64To[..]", raOffset)) {
+    return false;
+  }
+
+  freeI64(input);
+
+  if (resultType == ValType::F32) {
+    pushF32(captureReturnedF32(call));
+  } else {
+    pushF64(captureReturnedF64(call));
+  }
+
+  return true;
+}
+#endif  // RABALDR_I64_TO_FLOAT_CALLOUT
+
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+// `Callee` always takes a double, so a float32 input must be converted.
+bool BaseCompiler::emitConvertFloatingToInt64Callout(SymbolicAddress callee,
+                                                     ValType operandType,
+                                                     ValType resultType) {
+  RegF64 doubleInput;
+  if (operandType == ValType::F32) {
+    doubleInput = needF64();
+    RegF32 input = popF32();
+    masm.convertFloat32ToDouble(input, doubleInput);
+    freeF32(input);
+  } else {
+    doubleInput = popF64();
+  }
+
+  // We may need the value after the call for the ool check.
+  RegF64 otherReg = needF64();
+  moveF64(doubleInput, otherReg);
+  pushF64(otherReg);
+
+  sync();
+
+  FunctionCall call(0);
+
+  masm.setupWasmABICall();
+  masm.passABIArg(doubleInput, MoveOp::DOUBLE);
+  CodeOffset raOffset = masm.callWithABI(bytecodeOffset(), callee,
+                                         mozilla::Some(fr.getTlsPtrOffset()));
+  if (!createStackMap("emitConvertFloatin[..]", raOffset)) {
+    return false;
+  }
+
+  freeF64(doubleInput);
+
+  RegI64 rv = captureReturnedI64();
+
+  RegF64 inputVal = popF64();
+
+  TruncFlags flags = 0;
+  if (callee == SymbolicAddress::TruncateDoubleToUint64) {
+    flags |= TRUNC_UNSIGNED;
+  }
+  if (callee == SymbolicAddress::SaturatingTruncateDoubleToInt64 ||
+      callee == SymbolicAddress::SaturatingTruncateDoubleToUint64) {
+    flags |= TRUNC_SATURATING;
+  }
+
+  // If we're saturating, the callout will always produce the final result
+  // value. Otherwise, the callout value will return 0x8000000000000000
+  // and we need to produce traps.
+  OutOfLineCode* ool = nullptr;
+  if (!(flags & TRUNC_SATURATING)) {
+    // The OOL check just succeeds or fails, it does not generate a value.
+    ool = addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64(
+        AnyReg(inputVal), rv, flags, bytecodeOffset()));
+    if (!ool) {
+      return false;
+    }
+
+    masm.branch64(Assembler::Equal, rv, Imm64(0x8000000000000000),
+                  ool->entry());
+    masm.bind(ool->rejoin());
+  }
+
+  pushI64(rv);
+  freeF64(inputVal);
+
+  return true;
+}
+#endif  // RABALDR_FLOAT_TO_I64_CALLOUT
+
+bool BaseCompiler::emitGetLocal() {
+  uint32_t slot;
+  if (!iter_.readGetLocal(locals_, &slot)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  // Local loads are pushed unresolved, ie, they may be deferred
+  // until needed, until they may be affected by a store, or until a
+  // sync.  This is intended to reduce register pressure.
+
+  switch (locals_[slot].kind()) {
+    case ValType::I32:
+      pushLocalI32(slot);
+      break;
+    case ValType::I64:
+      pushLocalI64(slot);
+      break;
+    case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+      pushLocalV128(slot);
+      break;
+#else
+      MOZ_CRASH("No SIMD support");
+#endif
+    case ValType::F64:
+      pushLocalF64(slot);
+      break;
+    case ValType::F32:
+      pushLocalF32(slot);
+      break;
+    case ValType::Ref:
+      pushLocalRef(slot);
+      break;
+  }
+
+  return true;
+}
+
+template <bool isSetLocal>
+bool BaseCompiler::emitSetOrTeeLocal(uint32_t slot) {
+  if (deadCode_) {
+    return true;
+  }
+
+  bceLocalIsUpdated(slot);
+  switch (locals_[slot].kind()) {
+    case ValType::I32: {
+      RegI32 rv = popI32();
+      syncLocal(slot);
+      fr.storeLocalI32(rv, localFromSlot(slot, MIRType::Int32));
+      if (isSetLocal) {
+        freeI32(rv);
+      } else {
+        pushI32(rv);
+      }
+      break;
+    }
+    case ValType::I64: {
+      RegI64 rv = popI64();
+      syncLocal(slot);
+      fr.storeLocalI64(rv, localFromSlot(slot, MIRType::Int64));
+      if (isSetLocal) {
+        freeI64(rv);
+      } else {
+        pushI64(rv);
+      }
+      break;
+    }
+    case ValType::F64: {
+      RegF64 rv = popF64();
+      syncLocal(slot);
+      fr.storeLocalF64(rv, localFromSlot(slot, MIRType::Double));
+      if (isSetLocal) {
+        freeF64(rv);
+      } else {
+        pushF64(rv);
+      }
+      break;
+    }
+    case ValType::F32: {
+      RegF32 rv = popF32();
+      syncLocal(slot);
+      fr.storeLocalF32(rv, localFromSlot(slot, MIRType::Float32));
+      if (isSetLocal) {
+        freeF32(rv);
+      } else {
+        pushF32(rv);
+      }
+      break;
+    }
+    case ValType::V128: {
+#ifdef ENABLE_WASM_SIMD
+      RegV128 rv = popV128();
+      syncLocal(slot);
+      fr.storeLocalV128(rv, localFromSlot(slot, MIRType::Simd128));
+      if (isSetLocal) {
+        freeV128(rv);
+      } else {
+        pushV128(rv);
+      }
+      break;
+#else
+      MOZ_CRASH("No SIMD support");
+#endif
+    }
+    case ValType::Ref: {
+      RegPtr rv = popRef();
+      syncLocal(slot);
+      fr.storeLocalPtr(rv, localFromSlot(slot, MIRType::RefOrNull));
+      if (isSetLocal) {
+        freeRef(rv);
+      } else {
+        pushRef(rv);
+      }
+      break;
+    }
+  }
+
+  return true;
+}
+
+bool BaseCompiler::emitSetLocal() {
+  uint32_t slot;
+  Nothing unused_value;
+  if (!iter_.readSetLocal(locals_, &slot, &unused_value)) {
+    return false;
+  }
+  return emitSetOrTeeLocal<true>(slot);
+}
+
+bool BaseCompiler::emitTeeLocal() {
+  uint32_t slot;
+  Nothing unused_value;
+  if (!iter_.readTeeLocal(locals_, &slot, &unused_value)) {
+    return false;
+  }
+  return emitSetOrTeeLocal<false>(slot);
+}
+
+bool BaseCompiler::emitGetGlobal() {
+  uint32_t id;
+  if (!iter_.readGetGlobal(&id)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  const GlobalDesc& global = moduleEnv_.globals[id];
+
+  if (global.isConstant()) {
+    LitVal value = global.constantValue();
+    switch (value.type().kind()) {
+      case ValType::I32:
+        pushI32(value.i32());
+        break;
+      case ValType::I64:
+        pushI64(value.i64());
+        break;
+      case ValType::F32:
+        pushF32(value.f32());
+        break;
+      case ValType::F64:
+        pushF64(value.f64());
+        break;
+      case ValType::Ref:
+        pushRef(intptr_t(value.ref().forCompiledCode()));
+        break;
+#ifdef ENABLE_WASM_SIMD
+      case ValType::V128:
+        pushV128(value.v128());
+        break;
+#endif
+      default:
+        MOZ_CRASH("Global constant type");
+    }
+    return true;
+  }
+
+  switch (global.type().kind()) {
+    case ValType::I32: {
+      RegI32 rv = needI32();
+      ScratchI32 tmp(*this);
+      masm.load32(addressOfGlobalVar(global, tmp), rv);
+      pushI32(rv);
+      break;
+    }
+    case ValType::I64: {
+      RegI64 rv = needI64();
+      ScratchI32 tmp(*this);
+      masm.load64(addressOfGlobalVar(global, tmp), rv);
+      pushI64(rv);
+      break;
+    }
+    case ValType::F32: {
+      RegF32 rv = needF32();
+      ScratchI32 tmp(*this);
+      masm.loadFloat32(addressOfGlobalVar(global, tmp), rv);
+      pushF32(rv);
+      break;
+    }
+    case ValType::F64: {
+      RegF64 rv = needF64();
+      ScratchI32 tmp(*this);
+      masm.loadDouble(addressOfGlobalVar(global, tmp), rv);
+      pushF64(rv);
+      break;
+    }
+    case ValType::Ref: {
+      RegPtr rv = needRef();
+      ScratchI32 tmp(*this);
+      masm.loadPtr(addressOfGlobalVar(global, tmp), rv);
+      pushRef(rv);
+      break;
+    }
+#ifdef ENABLE_WASM_SIMD
+    case ValType::V128: {
+      RegV128 rv = needV128();
+      ScratchI32 tmp(*this);
+      masm.loadUnalignedSimd128(addressOfGlobalVar(global, tmp), rv);
+      pushV128(rv);
+      break;
+    }
+#endif
+    default:
+      MOZ_CRASH("Global variable type");
+      break;
+  }
+  return true;
+}
+
+bool BaseCompiler::emitSetGlobal() {
+  uint32_t id;
+  Nothing unused_value;
+  if (!iter_.readSetGlobal(&id, &unused_value)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  const GlobalDesc& global = moduleEnv_.globals[id];
+
+  switch (global.type().kind()) {
+    case ValType::I32: {
+      RegI32 rv = popI32();
+      ScratchI32 tmp(*this);
+      masm.store32(rv, addressOfGlobalVar(global, tmp));
+      freeI32(rv);
+      break;
+    }
+    case ValType::I64: {
+      RegI64 rv = popI64();
+      ScratchI32 tmp(*this);
+      masm.store64(rv, addressOfGlobalVar(global, tmp));
+      freeI64(rv);
+      break;
+    }
+    case ValType::F32: {
+      RegF32 rv = popF32();
+      ScratchI32 tmp(*this);
+      masm.storeFloat32(rv, addressOfGlobalVar(global, tmp));
+      freeF32(rv);
+      break;
+    }
+    case ValType::F64: {
+      RegF64 rv = popF64();
+      ScratchI32 tmp(*this);
+      masm.storeDouble(rv, addressOfGlobalVar(global, tmp));
+      freeF64(rv);
+      break;
+    }
+    case ValType::Ref: {
+      RegPtr valueAddr(PreBarrierReg);
+      needRef(valueAddr);
+      {
+        ScratchI32 tmp(*this);
+        masm.computeEffectiveAddress(addressOfGlobalVar(global, tmp),
+                                     valueAddr);
+      }
+      RegPtr rv = popRef();
+      // emitBarrieredStore consumes valueAddr
+      if (!emitBarrieredStore(Nothing(), valueAddr, rv)) {
+        return false;
+      }
+      freeRef(rv);
+      break;
+    }
+#ifdef ENABLE_WASM_SIMD
+    case ValType::V128: {
+      RegV128 rv = popV128();
+      ScratchI32 tmp(*this);
+      masm.storeUnalignedSimd128(rv, addressOfGlobalVar(global, tmp));
+      freeV128(rv);
+      break;
+    }
+#endif
+    default:
+      MOZ_CRASH("Global variable type");
+      break;
+  }
+  return true;
+}
+
+// Bounds check elimination.
+//
+// We perform BCE on two kinds of address expressions: on constant heap pointers
+// that are known to be in the heap or will be handled by the out-of-bounds trap
+// handler; and on local variables that have been checked in dominating code
+// without being updated since.
+//
+// For an access through a constant heap pointer + an offset we can eliminate
+// the bounds check if the sum of the address and offset is below the sum of the
+// minimum memory length and the offset guard length.
+//
+// For an access through a local variable + an offset we can eliminate the
+// bounds check if the local variable has already been checked and has not been
+// updated since, and the offset is less than the guard limit.
+//
+// To track locals for which we can eliminate checks we use a bit vector
+// bceSafe_ that has a bit set for those locals whose bounds have been checked
+// and which have not subsequently been set.  Initially this vector is zero.
+//
+// In straight-line code a bit is set when we perform a bounds check on an
+// access via the local and is reset when the variable is updated.
+//
+// In control flow, the bit vector is manipulated as follows.  Each ControlItem
+// has a value bceSafeOnEntry, which is the value of bceSafe_ on entry to the
+// item, and a value bceSafeOnExit, which is initially ~0.  On a branch (br,
+// brIf, brTable), we always AND the branch target's bceSafeOnExit with the
+// value of bceSafe_ at the branch point.  On exiting an item by falling out of
+// it, provided we're not in dead code, we AND the current value of bceSafe_
+// into the item's bceSafeOnExit.  Additional processing depends on the item
+// type:
+//
+//  - After a block, set bceSafe_ to the block's bceSafeOnExit.
+//
+//  - On loop entry, after pushing the ControlItem, set bceSafe_ to zero; the
+//    back edges would otherwise require us to iterate to a fixedpoint.
+//
+//  - After a loop, the bceSafe_ is left unchanged, because only fallthrough
+//    control flow will reach that point and the bceSafe_ value represents the
+//    correct state of the fallthrough path.
+//
+//  - Set bceSafe_ to the ControlItem's bceSafeOnEntry at both the 'then' branch
+//    and the 'else' branch.
+//
+//  - After an if-then-else, set bceSafe_ to the if-then-else's bceSafeOnExit.
+//
+//  - After an if-then, set bceSafe_ to the if-then's bceSafeOnExit AND'ed with
+//    the if-then's bceSafeOnEntry.
+//
+// Finally, when the debugger allows locals to be mutated we must disable BCE
+// for references via a local, by returning immediately from bceCheckLocal if
+// compilerEnv_.debugEnabled() is true.
+//
+//
+// Alignment check elimination.
+//
+// Alignment checks for atomic operations can be omitted if the pointer is a
+// constant and the pointer + offset is aligned.  Alignment checking that can't
+// be omitted can still be simplified by checking only the pointer if the offset
+// is aligned.
+//
+// (In addition, alignment checking of the pointer can be omitted if the pointer
+// has been checked in dominating code, but we don't do that yet.)
+
+// TODO / OPTIMIZE (bug 1329576): There are opportunities to generate better
+// code by not moving a constant address with a zero offset into a register.
+
+RegI32 BaseCompiler::popMemoryAccess(MemoryAccessDesc* access,
+                                     AccessCheck* check) {
+  check->onlyPointerAlignment =
+      (access->offset() & (access->byteSize() - 1)) == 0;
+
+  int32_t addrTemp;
+  if (popConstI32(&addrTemp)) {
+    uint32_t addr = addrTemp;
+
+    uint32_t offsetGuardLimit =
+        GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled());
+
+    uint64_t ea = uint64_t(addr) + uint64_t(access->offset());
+    uint64_t limit = moduleEnv_.minMemoryLength + offsetGuardLimit;
+
+    check->omitBoundsCheck = ea < limit;
+    check->omitAlignmentCheck = (ea & (access->byteSize() - 1)) == 0;
+
+    // Fold the offset into the pointer if we can, as this is always
+    // beneficial.
+
+    if (ea <= UINT32_MAX) {
+      addr = uint32_t(ea);
+      access->clearOffset();
+    }
+
+    RegI32 r = needI32();
+    moveImm32(int32_t(addr), r);
+    return r;
+  }
+
+  uint32_t local;
+  if (peekLocalI32(&local)) {
+    bceCheckLocal(access, check, local);
+  }
+
+  return popI32();
+}
+
+void BaseCompiler::pushHeapBase() {
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) || \
+    defined(JS_CODEGEN_MIPS64)
+  RegI64 heapBase = needI64();
+  moveI64(RegI64(Register64(HeapReg)), heapBase);
+  pushI64(heapBase);
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32)
+  RegI32 heapBase = needI32();
+  moveI32(RegI32(HeapReg), heapBase);
+  pushI32(heapBase);
+#elif defined(JS_CODEGEN_X86)
+  RegI32 heapBase = needI32();
+  fr.loadTlsPtr(heapBase);
+  masm.loadPtr(Address(heapBase, offsetof(TlsData, memoryBase)), heapBase);
+  pushI32(heapBase);
+#else
+  MOZ_CRASH("BaseCompiler platform hook: pushHeapBase");
+#endif
+}
+
+RegI32 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check) {
+  RegI32 tls;
+  if (needTlsForAccess(check)) {
+    tls = needI32();
+    fr.loadTlsPtr(tls);
+  }
+  return tls;
+}
+
+RegI32 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check,
+                                           RegI32 specific) {
+  if (needTlsForAccess(check)) {
+    fr.loadTlsPtr(specific);
+    return specific;
+  }
+  return RegI32::Invalid();
+}
+
+bool BaseCompiler::loadCommon(MemoryAccessDesc* access, AccessCheck check,
+                              ValType type) {
+  RegI32 tls, temp1, temp2, temp3;
+  needLoadTemps(*access, &temp1, &temp2, &temp3);
+
+  switch (type.kind()) {
+    case ValType::I32: {
+      RegI32 rp = popMemoryAccess(access, &check);
+#ifdef JS_CODEGEN_ARM
+      RegI32 rv = IsUnaligned(*access) ? needI32() : rp;
+#else
+      RegI32 rv = rp;
+#endif
+      tls = maybeLoadTlsForAccess(check);
+      if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+        return false;
+      }
+      pushI32(rv);
+      if (rp != rv) {
+        freeI32(rp);
+      }
+      break;
+    }
+    case ValType::I64: {
+      RegI64 rv;
+      RegI32 rp;
+#ifdef JS_CODEGEN_X86
+      rv = specific_.abiReturnRegI64;
+      needI64(rv);
+      rp = popMemoryAccess(access, &check);
+#else
+      rp = popMemoryAccess(access, &check);
+      rv = needI64();
+#endif
+      tls = maybeLoadTlsForAccess(check);
+      if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+        return false;
+      }
+      pushI64(rv);
+      freeI32(rp);
+      break;
+    }
+    case ValType::F32: {
+      RegI32 rp = popMemoryAccess(access, &check);
+      RegF32 rv = needF32();
+      tls = maybeLoadTlsForAccess(check);
+      if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+        return false;
+      }
+      pushF32(rv);
+      freeI32(rp);
+      break;
+    }
+    case ValType::F64: {
+      RegI32 rp = popMemoryAccess(access, &check);
+      RegF64 rv = needF64();
+      tls = maybeLoadTlsForAccess(check);
+      if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+        return false;
+      }
+      pushF64(rv);
+      freeI32(rp);
+      break;
+    }
+#ifdef ENABLE_WASM_SIMD
+    case ValType::V128: {
+      RegI32 rp = popMemoryAccess(access, &check);
+      RegV128 rv = needV128();
+      tls = maybeLoadTlsForAccess(check);
+      if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+        return false;
+      }
+      pushV128(rv);
+      freeI32(rp);
+      break;
+    }
+#endif
+    default:
+      MOZ_CRASH("load type");
+      break;
+  }
+
+  maybeFreeI32(tls);
+  maybeFreeI32(temp1);
+  maybeFreeI32(temp2);
+  maybeFreeI32(temp3);
+
+  return true;
+}
+
+bool BaseCompiler::emitLoad(ValType type, Scalar::Type viewType) {
+  LinearMemoryAddress<Nothing> addr;
+  if (!iter_.readLoad(type, Scalar::byteSize(viewType), &addr)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset());
+  return loadCommon(&access, AccessCheck(), type);
+}
+
+bool BaseCompiler::storeCommon(MemoryAccessDesc* access, AccessCheck check,
+                               ValType resultType) {
+  RegI32 tls;
+  RegI32 temp = needStoreTemp(*access, resultType);
+
+  switch (resultType.kind()) {
+    case ValType::I32: {
+      RegI32 rv = popI32();
+      RegI32 rp = popMemoryAccess(access, &check);
+      tls = maybeLoadTlsForAccess(check);
+      if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+        return false;
+      }
+      freeI32(rp);
+      freeI32(rv);
+      break;
+    }
+    case ValType::I64: {
+      RegI64 rv = popI64();
+      RegI32 rp = popMemoryAccess(access, &check);
+      tls = maybeLoadTlsForAccess(check);
+      if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+        return false;
+      }
+      freeI32(rp);
+      freeI64(rv);
+      break;
+    }
+    case ValType::F32: {
+      RegF32 rv = popF32();
+      RegI32 rp = popMemoryAccess(access, &check);
+      tls = maybeLoadTlsForAccess(check);
+      if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+        return false;
+      }
+      freeI32(rp);
+      freeF32(rv);
+      break;
+    }
+    case ValType::F64: {
+      RegF64 rv = popF64();
+      RegI32 rp = popMemoryAccess(access, &check);
+      tls = maybeLoadTlsForAccess(check);
+      if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+        return false;
+      }
+      freeI32(rp);
+      freeF64(rv);
+      break;
+    }
+#ifdef ENABLE_WASM_SIMD
+    case ValType::V128: {
+      RegV128 rv = popV128();
+      RegI32 rp = popMemoryAccess(access, &check);
+      tls = maybeLoadTlsForAccess(check);
+      if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+        return false;
+      }
+      freeI32(rp);
+      freeV128(rv);
+      break;
+    }
+#endif
+    default:
+      MOZ_CRASH("store type");
+      break;
+  }
+
+  maybeFreeI32(tls);
+  maybeFreeI32(temp);
+
+  return true;
+}
+
+bool BaseCompiler::emitStore(ValType resultType, Scalar::Type viewType) {
+  LinearMemoryAddress<Nothing> addr;
+  Nothing unused_value;
+  if (!iter_.readStore(resultType, Scalar::byteSize(viewType), &addr,
+                       &unused_value)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset());
+  return storeCommon(&access, AccessCheck(), resultType);
+}
+
+bool BaseCompiler::emitSelect(bool typed) {
+  StackType type;
+  Nothing unused_trueValue;
+  Nothing unused_falseValue;
+  Nothing unused_condition;
+  if (!iter_.readSelect(typed, &type, &unused_trueValue, &unused_falseValue,
+                        &unused_condition)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    resetLatentOp();
+    return true;
+  }
+
+  // I32 condition on top, then false, then true.
+
+  Label done;
+  BranchState b(&done);
+  emitBranchSetup(&b);
+
+  switch (type.valType().kind()) {
+    case ValType::I32: {
+      RegI32 r, rs;
+      pop2xI32(&r, &rs);
+      if (!emitBranchPerform(&b)) {
+        return false;
+      }
+      moveI32(rs, r);
+      masm.bind(&done);
+      freeI32(rs);
+      pushI32(r);
+      break;
+    }
+    case ValType::I64: {
+#ifdef JS_CODEGEN_X86
+      // There may be as many as four Int64 values in registers at a time: two
+      // for the latent branch operands, and two for the true/false values we
+      // normally pop before executing the branch.  On x86 this is one value
+      // too many, so we need to generate more complicated code here, and for
+      // simplicity's sake we do so even if the branch operands are not Int64.
+      // However, the resulting control flow diamond is complicated since the
+      // arms of the diamond will have to stay synchronized with respect to
+      // their evaluation stack and regalloc state.  To simplify further, we
+      // use a double branch and a temporary boolean value for now.
+      RegI32 temp = needI32();
+      moveImm32(0, temp);
+      if (!emitBranchPerform(&b)) {
+        return false;
+      }
+      moveImm32(1, temp);
+      masm.bind(&done);
+
+      Label trueValue;
+      RegI64 r, rs;
+      pop2xI64(&r, &rs);
+      masm.branch32(Assembler::Equal, temp, Imm32(0), &trueValue);
+      moveI64(rs, r);
+      masm.bind(&trueValue);
+      freeI32(temp);
+      freeI64(rs);
+      pushI64(r);
+#else
+      RegI64 r, rs;
+      pop2xI64(&r, &rs);
+      if (!emitBranchPerform(&b)) {
+        return false;
+      }
+      moveI64(rs, r);
+      masm.bind(&done);
+      freeI64(rs);
+      pushI64(r);
+#endif
+      break;
+    }
+    case ValType::F32: {
+      RegF32 r, rs;
+      pop2xF32(&r, &rs);
+      if (!emitBranchPerform(&b)) {
+        return false;
+      }
+      moveF32(rs, r);
+      masm.bind(&done);
+      freeF32(rs);
+      pushF32(r);
+      break;
+    }
+    case ValType::F64: {
+      RegF64 r, rs;
+      pop2xF64(&r, &rs);
+      if (!emitBranchPerform(&b)) {
+        return false;
+      }
+      moveF64(rs, r);
+      masm.bind(&done);
+      freeF64(rs);
+      pushF64(r);
+      break;
+    }
+#ifdef ENABLE_WASM_SIMD
+    case ValType::V128: {
+      RegV128 r, rs;
+      pop2xV128(&r, &rs);
+      if (!emitBranchPerform(&b)) {
+        return false;
+      }
+      moveV128(rs, r);
+      masm.bind(&done);
+      freeV128(rs);
+      pushV128(r);
+      break;
+    }
+#endif
+    case ValType::Ref: {
+      RegPtr r, rs;
+      pop2xRef(&r, &rs);
+      if (!emitBranchPerform(&b)) {
+        return false;
+      }
+      moveRef(rs, r);
+      masm.bind(&done);
+      freeRef(rs);
+      pushRef(r);
+      break;
+    }
+    default: {
+      MOZ_CRASH("select type");
+    }
+  }
+
+  return true;
+}
+
+void BaseCompiler::emitCompareI32(Assembler::Condition compareOp,
+                                  ValType compareType) {
+  MOZ_ASSERT(compareType == ValType::I32);
+
+  if (sniffConditionalControlCmp(compareOp, compareType)) {
+    return;
+  }
+
+  int32_t c;
+  if (popConstI32(&c)) {
+    RegI32 r = popI32();
+    masm.cmp32Set(compareOp, r, Imm32(c), r);
+    pushI32(r);
+  } else {
+    RegI32 r, rs;
+    pop2xI32(&r, &rs);
+    masm.cmp32Set(compareOp, r, rs, r);
+    freeI32(rs);
+    pushI32(r);
+  }
+}
+
+void BaseCompiler::emitCompareI64(Assembler::Condition compareOp,
+                                  ValType compareType) {
+  MOZ_ASSERT(compareType == ValType::I64);
+
+  if (sniffConditionalControlCmp(compareOp, compareType)) {
+    return;
+  }
+
+  RegI64 rs0, rs1;
+  pop2xI64(&rs0, &rs1);
+  RegI32 rd(fromI64(rs0));
+  cmp64Set(compareOp, rs0, rs1, rd);
+  freeI64(rs1);
+  freeI64Except(rs0, rd);
+  pushI32(rd);
+}
+
+void BaseCompiler::emitCompareF32(Assembler::DoubleCondition compareOp,
+                                  ValType compareType) {
+  MOZ_ASSERT(compareType == ValType::F32);
+
+  if (sniffConditionalControlCmp(compareOp, compareType)) {
+    return;
+  }
+
+  Label across;
+  RegF32 rs0, rs1;
+  pop2xF32(&rs0, &rs1);
+  RegI32 rd = needI32();
+  moveImm32(1, rd);
+  masm.branchFloat(compareOp, rs0, rs1, &across);
+  moveImm32(0, rd);
+  masm.bind(&across);
+  freeF32(rs0);
+  freeF32(rs1);
+  pushI32(rd);
+}
+
+void BaseCompiler::emitCompareF64(Assembler::DoubleCondition compareOp,
+                                  ValType compareType) {
+  MOZ_ASSERT(compareType == ValType::F64);
+
+  if (sniffConditionalControlCmp(compareOp, compareType)) {
+    return;
+  }
+
+  Label across;
+  RegF64 rs0, rs1;
+  pop2xF64(&rs0, &rs1);
+  RegI32 rd = needI32();
+  moveImm32(1, rd);
+  masm.branchDouble(compareOp, rs0, rs1, &across);
+  moveImm32(0, rd);
+  masm.bind(&across);
+  freeF64(rs0);
+  freeF64(rs1);
+  pushI32(rd);
+}
+
+void BaseCompiler::emitCompareRef(Assembler::Condition compareOp,
+                                  ValType compareType) {
+  MOZ_ASSERT(!sniffConditionalControlCmp(compareOp, compareType));
+
+  RegPtr rs1, rs2;
+  pop2xRef(&rs1, &rs2);
+  RegI32 rd = needI32();
+  masm.cmpPtrSet(compareOp, rs1, rs2, rd);
+  freeRef(rs1);
+  freeRef(rs2);
+  pushI32(rd);
+}
+
+bool BaseCompiler::emitInstanceCall(uint32_t lineOrBytecode,
+                                    const SymbolicAddressSignature& builtin,
+                                    bool pushReturnedValue /*=true*/) {
+  const MIRType* argTypes = builtin.argTypes;
+  MOZ_ASSERT(argTypes[0] == MIRType::Pointer);
+
+  sync();
+
+  uint32_t numNonInstanceArgs = builtin.numArgs - 1 /* instance */;
+  size_t stackSpace = stackConsumed(numNonInstanceArgs);
+
+  FunctionCall baselineCall(lineOrBytecode);
+  beginCall(baselineCall, UseABI::System, InterModule::True);
+
+  ABIArg instanceArg = reservePointerArgument(&baselineCall);
+
+  startCallArgs(StackArgAreaSizeUnaligned(builtin), &baselineCall);
+  for (uint32_t i = 1; i < builtin.numArgs; i++) {
+    ValType t;
+    switch (argTypes[i]) {
+      case MIRType::Int32:
+        t = ValType::I32;
+        break;
+      case MIRType::Int64:
+        t = ValType::I64;
+        break;
+      case MIRType::RefOrNull:
+        t = RefType::extern_();
+        break;
+      case MIRType::Pointer:
+        // Instance function args can now be uninterpreted pointers (eg, for
+        // the cases PostBarrier and PostBarrierFilter) so we simply treat
+        // them like the equivalently sized integer.
+        t = sizeof(void*) == 4 ? ValType::I32 : ValType::I64;
+        break;
+      default:
+        MOZ_CRASH("Unexpected type");
+    }
+    passArg(t, peek(numNonInstanceArgs - i), &baselineCall);
+  }
+  CodeOffset raOffset =
+      builtinInstanceMethodCall(builtin, instanceArg, baselineCall);
+  if (!createStackMap("emitInstanceCall", raOffset)) {
+    return false;
+  }
+
+  endCall(baselineCall, stackSpace);
+
+  popValueStackBy(numNonInstanceArgs);
+
+  // Note, many clients of emitInstanceCall currently assume that pushing the
+  // result here does not destroy ReturnReg.
+  //
+  // Furthermore, clients assume that if builtin.retType != MIRType::None, the
+  // callee will have returned a result and left it in ReturnReg for us to
+  // find, and that that register will not be destroyed here (or above).
+
+  if (pushReturnedValue) {
+    // For the return type only, MIRType::None is used to indicate that the
+    // call doesn't return a result, that is, returns a C/C++ "void".
+    MOZ_ASSERT(builtin.retType != MIRType::None);
+    pushReturnValueOfCall(baselineCall, builtin.retType);
+  }
+  return true;
+}
+
+bool BaseCompiler::emitMemoryGrow() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  Nothing arg;
+  if (!iter_.readMemoryGrow(&arg)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  return emitInstanceCall(lineOrBytecode, SASigMemoryGrow);
+}
+
+bool BaseCompiler::emitMemorySize() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  if (!iter_.readMemorySize()) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  return emitInstanceCall(lineOrBytecode, SASigMemorySize);
+}
+
+bool BaseCompiler::emitRefFunc() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+  uint32_t funcIndex;
+  if (!iter_.readRefFunc(&funcIndex)) {
+    return false;
+  }
+  if (deadCode_) {
+    return true;
+  }
+
+  pushI32(funcIndex);
+  return emitInstanceCall(lineOrBytecode, SASigRefFunc);
+}
+
+bool BaseCompiler::emitRefNull() {
+  if (!iter_.readRefNull()) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  pushRef(NULLREF_VALUE);
+  return true;
+}
+
+bool BaseCompiler::emitRefIsNull() {
+  Nothing nothing;
+  if (!iter_.readRefIsNull(&nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  RegPtr r = popRef();
+  RegI32 rd = narrowPtr(r);
+
+  masm.cmpPtrSet(Assembler::Equal, r, ImmWord(NULLREF_VALUE), rd);
+  pushI32(rd);
+  return true;
+}
+
+#ifdef ENABLE_WASM_FUNCTION_REFERENCES
+bool BaseCompiler::emitRefAsNonNull() {
+  Nothing nothing;
+  if (!iter_.readRefAsNonNull(&nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  RegPtr rp = popRef();
+  Label ok;
+  masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok);
+  trap(Trap::NullPointerDereference);
+  masm.bind(&ok);
+  pushRef(rp);
+
+  return true;
+}
+#endif
+
+bool BaseCompiler::emitAtomicCmpXchg(ValType type, Scalar::Type viewType) {
+  LinearMemoryAddress<Nothing> addr;
+  Nothing unused;
+
+  if (!iter_.readAtomicCmpXchg(&addr, type, Scalar::byteSize(viewType), &unused,
+                               &unused)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+                          Synchronization::Full());
+
+  if (Scalar::byteSize(viewType) <= 4) {
+    PopAtomicCmpXchg32Regs regs(this, type, viewType);
+
+    AccessCheck check;
+    RegI32 rp = popMemoryAccess(&access, &check);
+    RegI32 tls = maybeLoadTlsForAccess(check);
+
+    auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+    regs.atomicCmpXchg32(access, memaddr);
+
+    maybeFreeI32(tls);
+    freeI32(rp);
+
+    if (type == ValType::I64) {
+      pushU32AsI64(regs.takeRd());
+    } else {
+      pushI32(regs.takeRd());
+    }
+
+    return true;
+  }
+
+  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+  PopAtomicCmpXchg64Regs regs(this);
+
+  AccessCheck check;
+  RegI32 rp = popMemoryAccess(&access, &check);
+
+#ifdef JS_CODEGEN_X86
+  ScratchEBX ebx(*this);
+  RegI32 tls = maybeLoadTlsForAccess(check, ebx);
+  auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+  regs.atomicCmpXchg64(access, memaddr, ebx);
+#else
+  RegI32 tls = maybeLoadTlsForAccess(check);
+  auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+  regs.atomicCmpXchg64(access, memaddr);
+  maybeFreeI32(tls);
+#endif
+
+  freeI32(rp);
+
+  pushI64(regs.takeRd());
+  return true;
+}
+
+bool BaseCompiler::emitAtomicLoad(ValType type, Scalar::Type viewType) {
+  LinearMemoryAddress<Nothing> addr;
+  if (!iter_.readAtomicLoad(&addr, type, Scalar::byteSize(viewType))) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+                          Synchronization::Load());
+
+  if (Scalar::byteSize(viewType) <= sizeof(void*)) {
+    return loadCommon(&access, AccessCheck(), type);
+  }
+
+  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+#if defined(JS_64BIT)
+  MOZ_CRASH("Should not happen");
+#else
+  PopAtomicLoad64Regs regs(this);
+
+  AccessCheck check;
+  RegI32 rp = popMemoryAccess(&access, &check);
+
+#  ifdef JS_CODEGEN_X86
+  ScratchEBX ebx(*this);
+  RegI32 tls = maybeLoadTlsForAccess(check, ebx);
+  auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+  regs.atomicLoad64(access, memaddr, ebx);
+#  else
+  RegI32 tls = maybeLoadTlsForAccess(check);
+  auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+  regs.atomicLoad64(access, memaddr);
+  maybeFreeI32(tls);
+#  endif
+
+  freeI32(rp);
+
+  pushI64(regs.takeRd());
+  return true;
+#endif  // JS_64BIT
+}
+
+bool BaseCompiler::emitAtomicRMW(ValType type, Scalar::Type viewType,
+                                 AtomicOp op) {
+  LinearMemoryAddress<Nothing> addr;
+  Nothing unused_value;
+  if (!iter_.readAtomicRMW(&addr, type, Scalar::byteSize(viewType),
+                           &unused_value)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+                          Synchronization::Full());
+
+  if (Scalar::byteSize(viewType) <= 4) {
+    PopAtomicRMW32Regs regs(this, type, viewType, op);
+
+    AccessCheck check;
+    RegI32 rp = popMemoryAccess(&access, &check);
+    RegI32 tls = maybeLoadTlsForAccess(check);
+
+    auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+    regs.atomicRMW32(access, memaddr, op);
+
+    maybeFreeI32(tls);
+    freeI32(rp);
+
+    if (type == ValType::I64) {
+      pushU32AsI64(regs.takeRd());
+    } else {
+      pushI32(regs.takeRd());
+    }
+    return true;
+  }
+
+  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+  PopAtomicRMW64Regs regs(this, op);
+
+  AccessCheck check;
+  RegI32 rp = popMemoryAccess(&access, &check);
+
+#ifdef JS_CODEGEN_X86
+  ScratchEBX ebx(*this);
+  RegI32 tls = maybeLoadTlsForAccess(check, ebx);
+
+  fr.pushPtr(regs.valueHigh());
+  fr.pushPtr(regs.valueLow());
+  Address value(esp, 0);
+
+  auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+  regs.atomicRMW64(access, memaddr, op, value, ebx);
+
+  fr.popBytes(8);
+#else
+  RegI32 tls = maybeLoadTlsForAccess(check);
+  auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+  regs.atomicRMW64(access, memaddr, op);
+  maybeFreeI32(tls);
+#endif
+
+  freeI32(rp);
+
+  pushI64(regs.takeRd());
+  return true;
+}
+
+bool BaseCompiler::emitAtomicStore(ValType type, Scalar::Type viewType) {
+  LinearMemoryAddress<Nothing> addr;
+  Nothing unused_value;
+  if (!iter_.readAtomicStore(&addr, type, Scalar::byteSize(viewType),
+                             &unused_value)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+                          Synchronization::Store());
+
+  if (Scalar::byteSize(viewType) <= sizeof(void*)) {
+    return storeCommon(&access, AccessCheck(), type);
+  }
+
+  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+#ifdef JS_64BIT
+  MOZ_CRASH("Should not happen");
+#else
+  emitAtomicXchg64(&access, WantResult(false));
+  return true;
+#endif
+}
+
+bool BaseCompiler::emitAtomicXchg(ValType type, Scalar::Type viewType) {
+  LinearMemoryAddress<Nothing> addr;
+  Nothing unused_value;
+  if (!iter_.readAtomicRMW(&addr, type, Scalar::byteSize(viewType),
+                           &unused_value)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  AccessCheck check;
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+                          Synchronization::Full());
+
+  if (Scalar::byteSize(viewType) <= 4) {
+    PopAtomicXchg32Regs regs(this, type, viewType);
+    RegI32 rp = popMemoryAccess(&access, &check);
+    RegI32 tls = maybeLoadTlsForAccess(check);
+
+    auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+    regs.atomicXchg32(access, memaddr);
+
+    maybeFreeI32(tls);
+    freeI32(rp);
+
+    if (type == ValType::I64) {
+      pushU32AsI64(regs.takeRd());
+    } else {
+      pushI32(regs.takeRd());
+    }
+    return true;
+  }
+
+  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+  emitAtomicXchg64(&access, WantResult(true));
+  return true;
+}
+
+void BaseCompiler::emitAtomicXchg64(MemoryAccessDesc* access,
+                                    WantResult wantResult) {
+  PopAtomicXchg64Regs regs(this);
+
+  AccessCheck check;
+  RegI32 rp = popMemoryAccess(access, &check);
+
+#ifdef JS_CODEGEN_X86
+  ScratchEBX ebx(*this);
+  RegI32 tls = maybeLoadTlsForAccess(check, ebx);
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, tls, rp);
+  regs.atomicXchg64(*access, memaddr, ebx);
+#else
+  RegI32 tls = maybeLoadTlsForAccess(check);
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, tls, rp);
+  regs.atomicXchg64(*access, memaddr);
+  maybeFreeI32(tls);
+#endif
+
+  freeI32(rp);
+
+  if (wantResult) {
+    pushI64(regs.takeRd());
+  }
+}
+
+bool BaseCompiler::emitWait(ValType type, uint32_t byteSize) {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  Nothing nothing;
+  LinearMemoryAddress<Nothing> addr;
+  if (!iter_.readWait(&addr, type, byteSize, &nothing, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  switch (type.kind()) {
+    case ValType::I32: {
+      RegI64 timeout = popI64();
+      RegI32 val = popI32();
+
+      MemoryAccessDesc access(Scalar::Int32, addr.align, addr.offset,
+                              bytecodeOffset());
+      computeEffectiveAddress(&access);
+
+      pushI32(val);
+      pushI64(timeout);
+
+      if (!emitInstanceCall(lineOrBytecode, SASigWaitI32)) {
+        return false;
+      }
+      break;
+    }
+    case ValType::I64: {
+      RegI64 timeout = popI64();
+      RegI64 val = popI64();
+
+      MemoryAccessDesc access(Scalar::Int64, addr.align, addr.offset,
+                              bytecodeOffset());
+      computeEffectiveAddress(&access);
+
+      pushI64(val);
+      pushI64(timeout);
+
+      if (!emitInstanceCall(lineOrBytecode, SASigWaitI64)) {
+        return false;
+      }
+      break;
+    }
+    default:
+      MOZ_CRASH();
+  }
+
+  return true;
+}
+
+bool BaseCompiler::emitWake() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  Nothing nothing;
+  LinearMemoryAddress<Nothing> addr;
+  if (!iter_.readWake(&addr, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  RegI32 count = popI32();
+
+  MemoryAccessDesc access(Scalar::Int32, addr.align, addr.offset,
+                          bytecodeOffset());
+  computeEffectiveAddress(&access);
+
+  pushI32(count);
+
+  return emitInstanceCall(lineOrBytecode, SASigWake);
+}
+
+bool BaseCompiler::emitFence() {
+  if (!iter_.readFence()) {
+    return false;
+  }
+  if (deadCode_) {
+    return true;
+  }
+
+  masm.memoryBarrier(MembarFull);
+  return true;
+}
+
+bool BaseCompiler::emitMemCopy() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  uint32_t dstMemOrTableIndex = 0;
+  uint32_t srcMemOrTableIndex = 0;
+  Nothing nothing;
+  if (!iter_.readMemOrTableCopy(true, &dstMemOrTableIndex, &nothing,
+                                &srcMemOrTableIndex, &nothing, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  int32_t signedLength;
+  if (MacroAssembler::SupportsFastUnalignedAccesses() &&
+      peekConstI32(&signedLength) && signedLength != 0 &&
+      uint32_t(signedLength) <= MaxInlineMemoryCopyLength) {
+    return emitMemCopyInline();
+  }
+
+  return emitMemCopyCall(lineOrBytecode);
+}
+
+bool BaseCompiler::emitMemCopyCall(uint32_t lineOrBytecode) {
+  pushHeapBase();
+  if (!emitInstanceCall(lineOrBytecode,
+                        usesSharedMemory() ? SASigMemCopyShared : SASigMemCopy,
+                        /*pushReturnedValue=*/false)) {
+    return false;
+  }
+
+  return true;
+}
+
+bool BaseCompiler::emitMemCopyInline() {
+  MOZ_ASSERT(MaxInlineMemoryCopyLength != 0);
+
+  int32_t signedLength;
+  MOZ_ALWAYS_TRUE(popConstI32(&signedLength));
+  uint32_t length = signedLength;
+  MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryCopyLength);
+
+  RegI32 src = popI32();
+  RegI32 dest = popI32();
+
+  // Compute the number of copies of each width we will need to do
+  size_t remainder = length;
+#ifdef JS_64BIT
+  size_t numCopies8 = remainder / sizeof(uint64_t);
+  remainder %= sizeof(uint64_t);
+#endif
+  size_t numCopies4 = remainder / sizeof(uint32_t);
+  remainder %= sizeof(uint32_t);
+  size_t numCopies2 = remainder / sizeof(uint16_t);
+  remainder %= sizeof(uint16_t);
+  size_t numCopies1 = remainder;
+
+  // Load all source bytes onto the value stack from low to high using the
+  // widest transfer width we can for the system. We will trap without writing
+  // anything if any source byte is out-of-bounds.
+  bool omitBoundsCheck = false;
+  size_t offset = 0;
+
+#ifdef JS_64BIT
+  for (uint32_t i = 0; i < numCopies8; i++) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!loadCommon(&access, check, ValType::I64)) {
+      return false;
+    }
+
+    offset += sizeof(uint64_t);
+    omitBoundsCheck = true;
+  }
+#endif
+
+  for (uint32_t i = 0; i < numCopies4; i++) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!loadCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+
+    offset += sizeof(uint32_t);
+    omitBoundsCheck = true;
+  }
+
+  if (numCopies2) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!loadCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+
+    offset += sizeof(uint16_t);
+    omitBoundsCheck = true;
+  }
+
+  if (numCopies1) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!loadCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+  }
+
+  // Store all source bytes from the value stack to the destination from
+  // high to low. We will trap without writing anything on the first store
+  // if any dest byte is out-of-bounds.
+  offset = length;
+  omitBoundsCheck = false;
+
+  if (numCopies1) {
+    offset -= sizeof(uint8_t);
+
+    RegI32 value = popI32();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(value);
+
+    MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    if (!storeCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+
+    omitBoundsCheck = true;
+  }
+
+  if (numCopies2) {
+    offset -= sizeof(uint16_t);
+
+    RegI32 value = popI32();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(value);
+
+    MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!storeCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+
+    omitBoundsCheck = true;
+  }
+
+  for (uint32_t i = 0; i < numCopies4; i++) {
+    offset -= sizeof(uint32_t);
+
+    RegI32 value = popI32();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(value);
+
+    MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!storeCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+
+    omitBoundsCheck = true;
+  }
+
+#ifdef JS_64BIT
+  for (uint32_t i = 0; i < numCopies8; i++) {
+    offset -= sizeof(uint64_t);
+
+    RegI64 value = popI64();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI64(value);
+
+    MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!storeCommon(&access, check, ValType::I64)) {
+      return false;
+    }
+
+    omitBoundsCheck = true;
+  }
+#endif
+
+  freeI32(dest);
+  freeI32(src);
+  return true;
+}
+
+bool BaseCompiler::emitTableCopy() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  uint32_t dstMemOrTableIndex = 0;
+  uint32_t srcMemOrTableIndex = 0;
+  Nothing nothing;
+  if (!iter_.readMemOrTableCopy(false, &dstMemOrTableIndex, &nothing,
+                                &srcMemOrTableIndex, &nothing, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  pushI32(dstMemOrTableIndex);
+  pushI32(srcMemOrTableIndex);
+  if (!emitInstanceCall(lineOrBytecode, SASigTableCopy,
+                        /*pushReturnedValue=*/false)) {
+    return false;
+  }
+
+  return true;
+}
+
+bool BaseCompiler::emitDataOrElemDrop(bool isData) {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  uint32_t segIndex = 0;
+  if (!iter_.readDataOrElemDrop(isData, &segIndex)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  // Despite the cast to int32_t, the callee regards the value as unsigned.
+  pushI32(int32_t(segIndex));
+
+  return emitInstanceCall(lineOrBytecode,
+                          isData ? SASigDataDrop : SASigElemDrop,
+                          /*pushReturnedValue=*/false);
+}
+
+bool BaseCompiler::emitMemFill() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  Nothing nothing;
+  if (!iter_.readMemFill(&nothing, &nothing, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  int32_t signedLength;
+  int32_t signedValue;
+  if (MacroAssembler::SupportsFastUnalignedAccesses() &&
+      peek2xI32(&signedLength, &signedValue) && signedLength != 0 &&
+      uint32_t(signedLength) <= MaxInlineMemoryFillLength) {
+    return emitMemFillInline();
+  }
+  return emitMemFillCall(lineOrBytecode);
+}
+
+bool BaseCompiler::emitMemFillCall(uint32_t lineOrBytecode) {
+  pushHeapBase();
+  return emitInstanceCall(
+      lineOrBytecode, usesSharedMemory() ? SASigMemFillShared : SASigMemFill,
+      /*pushReturnedValue=*/false);
+}
+
+bool BaseCompiler::emitMemFillInline() {
+  MOZ_ASSERT(MaxInlineMemoryFillLength != 0);
+
+  int32_t signedLength;
+  int32_t signedValue;
+  MOZ_ALWAYS_TRUE(popConstI32(&signedLength));
+  MOZ_ALWAYS_TRUE(popConstI32(&signedValue));
+  uint32_t length = uint32_t(signedLength);
+  uint32_t value = uint32_t(signedValue);
+  MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryFillLength);
+
+  RegI32 dest = popI32();
+
+  // Compute the number of copies of each width we will need to do
+  size_t remainder = length;
+#ifdef JS_64BIT
+  size_t numCopies8 = remainder / sizeof(uint64_t);
+  remainder %= sizeof(uint64_t);
+#endif
+  size_t numCopies4 = remainder / sizeof(uint32_t);
+  remainder %= sizeof(uint32_t);
+  size_t numCopies2 = remainder / sizeof(uint16_t);
+  remainder %= sizeof(uint16_t);
+  size_t numCopies1 = remainder;
+
+  MOZ_ASSERT(numCopies2 <= 1 && numCopies1 <= 1);
+
+  // Generate splatted definitions for wider fills as needed
+#ifdef JS_64BIT
+  uint64_t val8 = SplatByteToUInt<uint64_t>(value, 8);
+#endif
+  uint32_t val4 = SplatByteToUInt<uint32_t>(value, 4);
+  uint32_t val2 = SplatByteToUInt<uint32_t>(value, 2);
+  uint32_t val1 = value;
+
+  // Store the fill value to the destination from high to low. We will trap
+  // without writing anything on the first store if any dest byte is
+  // out-of-bounds.
+  size_t offset = length;
+  bool omitBoundsCheck = false;
+
+  if (numCopies1) {
+    offset -= sizeof(uint8_t);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(val1);
+
+    MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    if (!storeCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+
+    omitBoundsCheck = true;
+  }
+
+  if (numCopies2) {
+    offset -= sizeof(uint16_t);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(val2);
+
+    MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!storeCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+
+    omitBoundsCheck = true;
+  }
+
+  for (uint32_t i = 0; i < numCopies4; i++) {
+    offset -= sizeof(uint32_t);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(val4);
+
+    MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!storeCommon(&access, check, ValType::I32)) {
+      return false;
+    }
+
+    omitBoundsCheck = true;
+  }
+
+#ifdef JS_64BIT
+  for (uint32_t i = 0; i < numCopies8; i++) {
+    offset -= sizeof(uint64_t);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI64(val8);
+
+    MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset());
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    if (!storeCommon(&access, check, ValType::I64)) {
+      return false;
+    }
+
+    omitBoundsCheck = true;
+  }
+#endif
+
+  freeI32(dest);
+  return true;
+}
+
+bool BaseCompiler::emitMemOrTableInit(bool isMem) {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  uint32_t segIndex = 0;
+  uint32_t dstTableIndex = 0;
+  Nothing nothing;
+  if (!iter_.readMemOrTableInit(isMem, &segIndex, &dstTableIndex, &nothing,
+                                &nothing, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  pushI32(int32_t(segIndex));
+  if (isMem) {
+    if (!emitInstanceCall(lineOrBytecode, SASigMemInit,
+                          /*pushReturnedValue=*/false)) {
+      return false;
+    }
+  } else {
+    pushI32(dstTableIndex);
+    if (!emitInstanceCall(lineOrBytecode, SASigTableInit,
+                          /*pushReturnedValue=*/false)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+#ifdef ENABLE_WASM_REFTYPES
+[[nodiscard]] bool BaseCompiler::emitTableFill() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  Nothing nothing;
+  uint32_t tableIndex;
+  if (!iter_.readTableFill(&tableIndex, &nothing, &nothing, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  // fill(start:u32, val:ref, len:u32, table:u32) -> u32
+  pushI32(tableIndex);
+  return emitInstanceCall(lineOrBytecode, SASigTableFill,
+                          /*pushReturnedValue=*/false);
+}
+
+[[nodiscard]] bool BaseCompiler::emitTableGet() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+  Nothing index;
+  uint32_t tableIndex;
+  if (!iter_.readTableGet(&tableIndex, &index)) {
+    return false;
+  }
+  if (deadCode_) {
+    return true;
+  }
+  // get(index:u32, table:u32) -> uintptr_t(AnyRef)
+  pushI32(tableIndex);
+  if (!emitInstanceCall(lineOrBytecode, SASigTableGet,
+                        /*pushReturnedValue=*/false)) {
+    return false;
+  }
+
+  // Push the resulting anyref back on the eval stack.  NOTE: needRef() must
+  // not kill the value in the register.
+  RegPtr r = RegPtr(ReturnReg);
+  needRef(r);
+  pushRef(r);
+
+  return true;
+}
+
+[[nodiscard]] bool BaseCompiler::emitTableGrow() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+  Nothing delta;
+  Nothing initValue;
+  uint32_t tableIndex;
+  if (!iter_.readTableGrow(&tableIndex, &initValue, &delta)) {
+    return false;
+  }
+  if (deadCode_) {
+    return true;
+  }
+  // grow(initValue:anyref, delta:u32, table:u32) -> u32
+  pushI32(tableIndex);
+  return emitInstanceCall(lineOrBytecode, SASigTableGrow);
+}
+
+[[nodiscard]] bool BaseCompiler::emitTableSet() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+  Nothing index, value;
+  uint32_t tableIndex;
+  if (!iter_.readTableSet(&tableIndex, &index, &value)) {
+    return false;
+  }
+  if (deadCode_) {
+    return true;
+  }
+  // set(index:u32, value:ref, table:u32) -> i32
+  pushI32(tableIndex);
+  return emitInstanceCall(lineOrBytecode, SASigTableSet,
+                          /*pushReturnedValue=*/false);
+}
+
+[[nodiscard]] bool BaseCompiler::emitTableSize() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+  uint32_t tableIndex;
+  if (!iter_.readTableSize(&tableIndex)) {
+    return false;
+  }
+  if (deadCode_) {
+    return true;
+  }
+  // size(table:u32) -> u32
+  pushI32(tableIndex);
+  return emitInstanceCall(lineOrBytecode, SASigTableSize);
+}
+#endif
+
+bool BaseCompiler::emitStructNew() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  uint32_t typeIndex;
+  NothingVector args;
+  if (!iter_.readStructNew(&typeIndex, &args)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  // Allocate zeroed storage.  The parameter to StructNew is an index into a
+  // descriptor table that the instance has.
+  //
+  // Returns null on OOM.
+
+  const StructType& structType = moduleEnv_.types[typeIndex].structType();
+  const TypeIdDesc& structTypeId = moduleEnv_.typeIds[typeIndex];
+  RegPtr rst = needRef();
+  fr.loadTlsPtr(WasmTlsReg);
+  masm.loadWasmGlobalPtr(structTypeId.globalDataOffset(), rst);
+  pushRef(rst);
+
+  if (!emitInstanceCall(lineOrBytecode, SASigStructNew)) {
+    return false;
+  }
+
+  // Optimization opportunity: Iterate backward to pop arguments off the
+  // stack.  This will generate more instructions than we want, since we
+  // really only need to pop the stack once at the end, not for every element,
+  // but to do better we need a bit more machinery to load elements off the
+  // stack into registers.
+
+  RegPtr rp = popRef();
+  RegPtr rdata = rp;
+
+  if (!structType.isInline_) {
+    rdata = needRef();
+    masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rdata);
+  }
+
+  // Optimization opportunity: when the value being stored is a known
+  // zero/null we need store nothing.  This case may be somewhat common
+  // because struct.new forces a value to be specified for every field.
+
+  uint32_t fieldNo = structType.fields_.length();
+  while (fieldNo-- > 0) {
+    uint32_t offs = structType.objectBaseFieldOffset(fieldNo);
+    switch (structType.fields_[fieldNo].type.kind()) {
+      case ValType::I32: {
+        RegI32 r = popI32();
+        masm.store32(r, Address(rdata, offs));
+        freeI32(r);
+        break;
+      }
+      case ValType::I64: {
+        RegI64 r = popI64();
+        masm.store64(r, Address(rdata, offs));
+        freeI64(r);
+        break;
+      }
+      case ValType::F32: {
+        RegF32 r = popF32();
+        masm.storeFloat32(r, Address(rdata, offs));
+        freeF32(r);
+        break;
+      }
+      case ValType::F64: {
+        RegF64 r = popF64();
+        masm.storeDouble(r, Address(rdata, offs));
+        freeF64(r);
+        break;
+      }
+      case ValType::Ref: {
+        RegPtr value = popRef();
+        masm.storePtr(value, Address(rdata, offs));
+
+        // A write barrier is needed here for the extremely unlikely case
+        // that the object is allocated in the tenured area - a result of
+        // a GC artifact.
+
+        Label skipBarrier;
+
+        sync();
+
+        RegPtr rowner = rp;
+        if (!structType.isInline_) {
+          rowner = needRef();
+          masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfOwner()),
+                       rowner);
+        }
+
+        RegPtr otherScratch = needRef();
+        EmitWasmPostBarrierGuard(masm, Some(rowner), otherScratch, value,
+                                 &skipBarrier);
+        freeRef(otherScratch);
+
+        if (!structType.isInline_) {
+          freeRef(rowner);
+        }
+
+        freeRef(value);
+
+        // TODO/AnyRef-boxing: With boxed immediates and strings, the write
+        // barrier is going to have to be more complicated.
+        ASSERT_ANYREF_IS_JSOBJECT;
+
+        pushRef(rp);  // Save rp across the call
+        RegPtr valueAddr = needRef();
+        masm.computeEffectiveAddress(Address(rdata, offs), valueAddr);
+        if (!emitPostBarrierCall(valueAddr)) {  // Consumes valueAddr
+          return false;
+        }
+        popRef(rp);  // Restore rp
+        if (!structType.isInline_) {
+          masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rdata);
+        }
+
+        masm.bind(&skipBarrier);
+        break;
+      }
+      default: {
+        MOZ_CRASH("Unexpected field type");
+      }
+    }
+  }
+
+  if (!structType.isInline_) {
+    freeRef(rdata);
+  }
+
+  pushRef(rp);
+
+  return true;
+}
+
+bool BaseCompiler::emitStructGet() {
+  uint32_t typeIndex;
+  uint32_t fieldIndex;
+  Nothing nothing;
+  if (!iter_.readStructGet(&typeIndex, &fieldIndex, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  const StructType& structType = moduleEnv_.types[typeIndex].structType();
+
+  RegPtr rp = popRef();
+
+  Label ok;
+  masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok);
+  trap(Trap::NullPointerDereference);
+  masm.bind(&ok);
+
+  if (!structType.isInline_) {
+    masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rp);
+  }
+
+  uint32_t offs = structType.objectBaseFieldOffset(fieldIndex);
+  switch (structType.fields_[fieldIndex].type.kind()) {
+    case ValType::I32: {
+      RegI32 r = needI32();
+      masm.load32(Address(rp, offs), r);
+      pushI32(r);
+      break;
+    }
+    case ValType::I64: {
+      RegI64 r = needI64();
+      masm.load64(Address(rp, offs), r);
+      pushI64(r);
+      break;
+    }
+    case ValType::F32: {
+      RegF32 r = needF32();
+      masm.loadFloat32(Address(rp, offs), r);
+      pushF32(r);
+      break;
+    }
+    case ValType::F64: {
+      RegF64 r = needF64();
+      masm.loadDouble(Address(rp, offs), r);
+      pushF64(r);
+      break;
+    }
+    case ValType::Ref: {
+      RegPtr r = needRef();
+      masm.loadPtr(Address(rp, offs), r);
+      pushRef(r);
+      break;
+    }
+    default: {
+      MOZ_CRASH("Unexpected field type");
+    }
+  }
+
+  freeRef(rp);
+
+  return true;
+}
+
+bool BaseCompiler::emitStructSet() {
+  uint32_t typeIndex;
+  uint32_t fieldIndex;
+  Nothing nothing;
+  if (!iter_.readStructSet(&typeIndex, &fieldIndex, &nothing, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  const StructType& structType = moduleEnv_.types[typeIndex].structType();
+
+  RegI32 ri;
+  RegI64 rl;
+  RegF32 rf;
+  RegF64 rd;
+  RegPtr rr;
+
+  // Reserve this register early if we will need it so that it is not taken by
+  // rr or rp.
+  RegPtr valueAddr;
+  if (structType.fields_[fieldIndex].type.isReference()) {
+    valueAddr = RegPtr(PreBarrierReg);
+    needRef(valueAddr);
+  }
+
+  switch (structType.fields_[fieldIndex].type.kind()) {
+    case ValType::I32:
+      ri = popI32();
+      break;
+    case ValType::I64:
+      rl = popI64();
+      break;
+    case ValType::F32:
+      rf = popF32();
+      break;
+    case ValType::F64:
+      rd = popF64();
+      break;
+    case ValType::Ref:
+      rr = popRef();
+      break;
+    default:
+      MOZ_CRASH("Unexpected field type");
+  }
+
+  RegPtr rp = popRef();
+
+  Label ok;
+  masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok);
+  trap(Trap::NullPointerDereference);
+  masm.bind(&ok);
+
+  if (!structType.isInline_) {
+    masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rp);
+  }
+
+  uint32_t offs = structType.objectBaseFieldOffset(fieldIndex);
+  switch (structType.fields_[fieldIndex].type.kind()) {
+    case ValType::I32: {
+      masm.store32(ri, Address(rp, offs));
+      freeI32(ri);
+      break;
+    }
+    case ValType::I64: {
+      masm.store64(rl, Address(rp, offs));
+      freeI64(rl);
+      break;
+    }
+    case ValType::F32: {
+      masm.storeFloat32(rf, Address(rp, offs));
+      freeF32(rf);
+      break;
+    }
+    case ValType::F64: {
+      masm.storeDouble(rd, Address(rp, offs));
+      freeF64(rd);
+      break;
+    }
+    case ValType::Ref: {
+      masm.computeEffectiveAddress(Address(rp, offs), valueAddr);
+
+      // Bug 1617908.  Ensure that if a TypedObject is not inline, then its
+      // underlying ArrayBuffer also is not inline, or the barrier logic fails.
+      static_assert(InlineTypedObject::MaxInlineBytes >=
+                    ArrayBufferObject::MaxInlineBytes);
+
+      // emitBarrieredStore consumes valueAddr
+      if (!emitBarrieredStore(structType.isInline_ ? Some(rp) : Nothing(),
+                              valueAddr, rr)) {
+        return false;
+      }
+      freeRef(rr);
+      break;
+    }
+    default: {
+      MOZ_CRASH("Unexpected field type");
+    }
+  }
+
+  freeRef(rp);
+
+  return true;
+}
+
+bool BaseCompiler::emitStructNarrow() {
+  uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+  ValType inputType, outputType;
+  Nothing nothing;
+  if (!iter_.readStructNarrow(&inputType, &outputType, &nothing)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  // struct.narrow validation ensures that these hold.
+
+  MOZ_ASSERT(inputType.isEqRef() ||
+             moduleEnv_.types.isStructType(inputType.refType()));
+  MOZ_ASSERT(outputType.isEqRef() ||
+             moduleEnv_.types.isStructType(outputType.refType()));
+  MOZ_ASSERT_IF(outputType.isEqRef(), inputType.isEqRef());
+
+  // EqRef -> EqRef is a no-op, just leave the value on the stack.
+
+  if (inputType.isEqRef() && outputType.isEqRef()) {
+    return true;
+  }
+
+  RegPtr rp = popRef();
+
+  // Dynamic downcast eqref|(optref T) -> (optref U), leaves rp or null
+  const TypeIdDesc& outputStructTypeId =
+      moduleEnv_.typeIds[outputType.refType().typeIndex()];
+  RegPtr rst = needRef();
+  fr.loadTlsPtr(WasmTlsReg);
+  masm.loadWasmGlobalPtr(outputStructTypeId.globalDataOffset(), rst);
+  pushRef(rst);
+
+  pushRef(rp);
+  return emitInstanceCall(lineOrBytecode, SASigStructNarrow);
+}
+
+#ifdef ENABLE_WASM_SIMD
+
+// Emitter trampolines used by abstracted SIMD operations.  Naming here follows
+// the SIMD spec pretty closely.
+
+static void AndV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.bitwiseAndSimd128(rs, rsd);
+}
+
+static void OrV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.bitwiseOrSimd128(rs, rsd);
+}
+
+static void XorV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.bitwiseXorSimd128(rs, rsd);
+}
+
+static void AddI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.addInt8x16(rs, rsd);
+}
+
+static void AddI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.addInt16x8(rs, rsd);
+}
+
+static void AddI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.addInt32x4(rs, rsd);
+}
+
+static void AddF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.addFloat32x4(rs, rsd);
+}
+
+static void AddI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.addInt64x2(rs, rsd);
+}
+
+static void AddF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.addFloat64x2(rs, rsd);
+}
+
+static void AddSatI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.addSatInt8x16(rs, rsd);
+}
+
+static void AddSatUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedAddSatInt8x16(rs, rsd);
+}
+
+static void AddSatI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.addSatInt16x8(rs, rsd);
+}
+
+static void AddSatUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedAddSatInt16x8(rs, rsd);
+}
+
+static void SubI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.subInt8x16(rs, rsd);
+}
+
+static void SubI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.subInt16x8(rs, rsd);
+}
+
+static void SubI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.subInt32x4(rs, rsd);
+}
+
+static void SubF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.subFloat32x4(rs, rsd);
+}
+
+static void SubI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.subInt64x2(rs, rsd);
+}
+
+static void SubF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.subFloat64x2(rs, rsd);
+}
+
+static void SubSatI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.subSatInt8x16(rs, rsd);
+}
+
+static void SubSatUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedSubSatInt8x16(rs, rsd);
+}
+
+static void SubSatI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.subSatInt16x8(rs, rsd);
+}
+
+static void SubSatUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedSubSatInt16x8(rs, rsd);
+}
+
+static void MulI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.mulInt16x8(rs, rsd);
+}
+
+static void MulI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.mulInt32x4(rs, rsd);
+}
+
+static void MulF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.mulFloat32x4(rs, rsd);
+}
+
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void MulI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+                     RegV128 temp) {
+  masm.mulInt64x2(rs, rsd, temp);
+}
+#  endif
+
+static void MulF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.mulFloat64x2(rs, rsd);
+}
+
+static void DivF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.divFloat32x4(rs, rsd);
+}
+
+static void DivF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.divFloat64x2(rs, rsd);
+}
+
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void MinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+                     RegV128 temp1, RegV128 temp2) {
+  masm.minFloat32x4(rs, rsd, temp1, temp2);
+}
+
+static void MinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+                     RegV128 temp1, RegV128 temp2) {
+  masm.minFloat64x2(rs, rsd, temp1, temp2);
+}
+
+static void MaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+                     RegV128 temp1, RegV128 temp2) {
+  masm.maxFloat32x4(rs, rsd, temp1, temp2);
+}
+
+static void MaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+                     RegV128 temp1, RegV128 temp2) {
+  masm.maxFloat64x2(rs, rsd, temp1, temp2);
+}
+
+static void PMinF32x4(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
+                      RhsDestOp) {
+  masm.pseudoMinFloat32x4(rsd, rs);
+}
+
+static void PMinF64x2(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
+                      RhsDestOp) {
+  masm.pseudoMinFloat64x2(rsd, rs);
+}
+
+static void PMaxF32x4(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
+                      RhsDestOp) {
+  masm.pseudoMaxFloat32x4(rsd, rs);
+}
+
+static void PMaxF64x2(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
+                      RhsDestOp) {
+  masm.pseudoMaxFloat64x2(rsd, rs);
+}
+#  elif defined(JS_CODEGEN_ARM64)
+static void MinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.minFloat32x4(rs, rsd);
+}
+
+static void MinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.minFloat64x2(rs, rsd);
+}
+
+static void MaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.maxFloat32x4(rs, rsd);
+}
+
+static void MaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.maxFloat64x2(rs, rsd);
+}
+
+static void PMinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.pseudoMinFloat32x4(rs, rsd);
+}
+
+static void PMinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.pseudoMinFloat64x2(rs, rsd);
+}
+
+static void PMaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.pseudoMaxFloat32x4(rs, rsd);
+}
+
+static void PMaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.pseudoMaxFloat64x2(rs, rsd);
+}
+#  endif
+
+static void DotI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.widenDotInt16x8(rs, rsd);
+}
+
+static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,
+                     RegV128 rs, RegV128 rsd) {
+  masm.compareInt8x16(cond, rs, rsd);
+}
+
+static void CmpI16x8(MacroAssembler& masm, Assembler::Condition cond,
+                     RegV128 rs, RegV128 rsd) {
+  masm.compareInt16x8(cond, rs, rsd);
+}
+
+static void CmpI32x4(MacroAssembler& masm, Assembler::Condition cond,
+                     RegV128 rs, RegV128 rsd) {
+  masm.compareInt32x4(cond, rs, rsd);
+}
+
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void CmpUI8x16(MacroAssembler& masm, Assembler::Condition cond,
+                      RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) {
+  masm.unsignedCompareInt8x16(cond, rs, rsd, temp1, temp2);
+}
+
+static void CmpUI16x8(MacroAssembler& masm, Assembler::Condition cond,
+                      RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) {
+  masm.unsignedCompareInt16x8(cond, rs, rsd, temp1, temp2);
+}
+
+static void CmpUI32x4(MacroAssembler& masm, Assembler::Condition cond,
+                      RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) {
+  masm.unsignedCompareInt32x4(cond, rs, rsd, temp1, temp2);
+}
+#  else
+static void CmpUI8x16(MacroAssembler& masm, Assembler::Condition cond,
+                      RegV128 rs, RegV128 rsd) {
+  masm.compareInt8x16(cond, rs, rsd);
+}
+
+static void CmpUI16x8(MacroAssembler& masm, Assembler::Condition cond,
+                      RegV128 rs, RegV128 rsd) {
+  masm.compareInt16x8(cond, rs, rsd);
+}
+
+static void CmpUI32x4(MacroAssembler& masm, Assembler::Condition cond,
+                      RegV128 rs, RegV128 rsd) {
+  masm.compareInt32x4(cond, rs, rsd);
+}
+#  endif
+
+static void CmpF32x4(MacroAssembler& masm, Assembler::Condition cond,
+                     RegV128 rs, RegV128 rsd) {
+  masm.compareFloat32x4(cond, rs, rsd);
+}
+
+static void CmpF64x2(MacroAssembler& masm, Assembler::Condition cond,
+                     RegV128 rs, RegV128 rsd) {
+  masm.compareFloat64x2(cond, rs, rsd);
+}
+
+static void NegI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.negInt8x16(rs, rd);
+}
+
+static void NegI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.negInt16x8(rs, rd);
+}
+
+static void NegI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.negInt32x4(rs, rd);
+}
+
+static void NegI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.negInt64x2(rs, rd);
+}
+
+static void NegF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.negFloat32x4(rs, rd);
+}
+
+static void NegF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.negFloat64x2(rs, rd);
+}
+
+static void AbsF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.absFloat32x4(rs, rd);
+}
+
+static void AbsF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.absFloat64x2(rs, rd);
+}
+
+static void SqrtF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.sqrtFloat32x4(rs, rd);
+}
+
+static void SqrtF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.sqrtFloat64x2(rs, rd);
+}
+
+static void CeilF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.ceilFloat32x4(rs, rd);
+}
+
+static void FloorF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.floorFloat32x4(rs, rd);
+}
+
+static void TruncF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.truncFloat32x4(rs, rd);
+}
+
+static void NearestF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.nearestFloat32x4(rs, rd);
+}
+
+static void CeilF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.ceilFloat64x2(rs, rd);
+}
+
+static void FloorF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.floorFloat64x2(rs, rd);
+}
+
+static void TruncF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.truncFloat64x2(rs, rd);
+}
+
+static void NearestF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.nearestFloat64x2(rs, rd);
+}
+
+static void NotV128(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.bitwiseNotSimd128(rs, rd);
+}
+
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                           RegI32 temp1, RegV128 temp2) {
+  masm.leftShiftInt8x16(rs, rsd, temp1, temp2);
+}
+
+static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                           RegI32 temp) {
+  masm.leftShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                           RegI32 temp) {
+  masm.leftShiftInt32x4(rs, rsd, temp);
+}
+
+static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                           RegI32 temp) {
+  masm.leftShiftInt64x2(rs, rsd, temp);
+}
+
+static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                            RegI32 temp1, RegV128 temp2) {
+  masm.rightShiftInt8x16(rs, rsd, temp1, temp2);
+}
+
+static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                             RegI32 temp1, RegV128 temp2) {
+  masm.unsignedRightShiftInt8x16(rs, rsd, temp1, temp2);
+}
+
+static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                            RegI32 temp) {
+  masm.rightShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                             RegI32 temp) {
+  masm.unsignedRightShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                            RegI32 temp) {
+  masm.rightShiftInt32x4(rs, rsd, temp);
+}
+
+static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                             RegI32 temp) {
+  masm.unsignedRightShiftInt32x4(rs, rsd, temp);
+}
+
+static void ShiftRightUI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                             RegI32 temp) {
+  masm.unsignedRightShiftInt64x2(rs, rsd, temp);
+}
+#  elif defined(JS_CODEGEN_ARM64)
+static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd) {
+  masm.leftShiftInt8x16(rs, rsd);
+}
+
+static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd) {
+  masm.leftShiftInt16x8(rs, rsd);
+}
+
+static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd) {
+  masm.leftShiftInt32x4(rs, rsd);
+}
+
+static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd) {
+  masm.leftShiftInt64x2(rs, rsd);
+}
+
+static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                            RegV128 temp) {
+  masm.rightShiftInt8x16(rs, rsd, temp);
+}
+
+static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                             RegV128 temp) {
+  masm.unsignedRightShiftInt8x16(rs, rsd, temp);
+}
+
+static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                            RegV128 temp) {
+  masm.rightShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                             RegV128 temp) {
+  masm.unsignedRightShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                            RegV128 temp) {
+  masm.rightShiftInt32x4(rs, rsd, temp);
+}
+
+static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+                             RegV128 temp) {
+  masm.unsignedRightShiftInt32x4(rs, rsd, temp);
+}
+#  endif
+
+static void AverageUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedAverageInt8x16(rs, rsd);
+}
+
+static void AverageUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedAverageInt16x8(rs, rsd);
+}
+
+static void MinI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.minInt8x16(rs, rsd);
+}
+
+static void MinUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedMinInt8x16(rs, rsd);
+}
+
+static void MaxI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.maxInt8x16(rs, rsd);
+}
+
+static void MaxUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedMaxInt8x16(rs, rsd);
+}
+
+static void MinI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.minInt16x8(rs, rsd);
+}
+
+static void MinUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedMinInt16x8(rs, rsd);
+}
+
+static void MaxI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.maxInt16x8(rs, rsd);
+}
+
+static void MaxUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedMaxInt16x8(rs, rsd);
+}
+
+static void MinI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.minInt32x4(rs, rsd);
+}
+
+static void MinUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedMinInt32x4(rs, rsd);
+}
+
+static void MaxI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.maxInt32x4(rs, rsd);
+}
+
+static void MaxUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedMaxInt32x4(rs, rsd);
+}
+
+static void NarrowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.narrowInt16x8(rs, rsd);
+}
+
+static void NarrowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedNarrowInt16x8(rs, rsd);
+}
+
+static void NarrowI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.narrowInt32x4(rs, rsd);
+}
+
+static void NarrowUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.unsignedNarrowInt32x4(rs, rsd);
+}
+
+static void WidenLowI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.widenLowInt8x16(rs, rd);
+}
+
+static void WidenHighI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.widenHighInt8x16(rs, rd);
+}
+
+static void WidenLowUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.unsignedWidenLowInt8x16(rs, rd);
+}
+
+static void WidenHighUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.unsignedWidenHighInt8x16(rs, rd);
+}
+
+static void WidenLowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.widenLowInt16x8(rs, rd);
+}
+
+static void WidenHighI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.widenHighInt16x8(rs, rd);
+}
+
+static void WidenLowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.unsignedWidenLowInt16x8(rs, rd);
+}
+
+static void WidenHighUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.unsignedWidenHighInt16x8(rs, rd);
+}
+
+static void AbsI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.absInt8x16(rs, rd);
+}
+
+static void AbsI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.absInt16x8(rs, rd);
+}
+
+static void AbsI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.absInt32x4(rs, rd);
+}
+
+static void ExtractLaneI8x16(MacroAssembler& masm, uint32_t laneIndex,
+                             RegV128 rs, RegI32 rd) {
+  masm.extractLaneInt8x16(laneIndex, rs, rd);
+}
+
+static void ExtractLaneUI8x16(MacroAssembler& masm, uint32_t laneIndex,
+                              RegV128 rs, RegI32 rd) {
+  masm.unsignedExtractLaneInt8x16(laneIndex, rs, rd);
+}
+
+static void ExtractLaneI16x8(MacroAssembler& masm, uint32_t laneIndex,
+                             RegV128 rs, RegI32 rd) {
+  masm.extractLaneInt16x8(laneIndex, rs, rd);
+}
+
+static void ExtractLaneUI16x8(MacroAssembler& masm, uint32_t laneIndex,
+                              RegV128 rs, RegI32 rd) {
+  masm.unsignedExtractLaneInt16x8(laneIndex, rs, rd);
+}
+
+static void ExtractLaneI32x4(MacroAssembler& masm, uint32_t laneIndex,
+                             RegV128 rs, RegI32 rd) {
+  masm.extractLaneInt32x4(laneIndex, rs, rd);
+}
+
+static void ExtractLaneI64x2(MacroAssembler& masm, uint32_t laneIndex,
+                             RegV128 rs, RegI64 rd) {
+  masm.extractLaneInt64x2(laneIndex, rs, rd);
+}
+
+static void ExtractLaneF32x4(MacroAssembler& masm, uint32_t laneIndex,
+                             RegV128 rs, RegF32 rd) {
+  masm.extractLaneFloat32x4(laneIndex, rs, rd);
+}
+
+static void ExtractLaneF64x2(MacroAssembler& masm, uint32_t laneIndex,
+                             RegV128 rs, RegF64 rd) {
+  masm.extractLaneFloat64x2(laneIndex, rs, rd);
+}
+
+static void ReplaceLaneI8x16(MacroAssembler& masm, uint32_t laneIndex,
+                             RegI32 rs, RegV128 rsd) {
+  masm.replaceLaneInt8x16(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneI16x8(MacroAssembler& masm, uint32_t laneIndex,
+                             RegI32 rs, RegV128 rsd) {
+  masm.replaceLaneInt16x8(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneI32x4(MacroAssembler& masm, uint32_t laneIndex,
+                             RegI32 rs, RegV128 rsd) {
+  masm.replaceLaneInt32x4(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneI64x2(MacroAssembler& masm, uint32_t laneIndex,
+                             RegI64 rs, RegV128 rsd) {
+  masm.replaceLaneInt64x2(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneF32x4(MacroAssembler& masm, uint32_t laneIndex,
+                             RegF32 rs, RegV128 rsd) {
+  masm.replaceLaneFloat32x4(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneF64x2(MacroAssembler& masm, uint32_t laneIndex,
+                             RegF64 rs, RegV128 rsd) {
+  masm.replaceLaneFloat64x2(laneIndex, rs, rsd);
+}
+
+static void SplatI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rd) {
+  masm.splatX16(rs, rd);
+}
+
+static void SplatI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rd) {
+  masm.splatX8(rs, rd);
+}
+
+static void SplatI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rd) {
+  masm.splatX4(rs, rd);
+}
+
+static void SplatI64x2(MacroAssembler& masm, RegI64 rs, RegV128 rd) {
+  masm.splatX2(rs, rd);
+}
+
+static void SplatF32x4(MacroAssembler& masm, RegF32 rs, RegV128 rd) {
+  masm.splatX4(rs, rd);
+}
+
+static void SplatF64x2(MacroAssembler& masm, RegF64 rs, RegV128 rd) {
+  masm.splatX2(rs, rd);
+}
+
+// This is the same op independent of lanes: it tests for any nonzero bit.
+static void AnyTrue(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+  masm.anyTrueSimd128(rs, rd);
+}
+
+static void AllTrueI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+  masm.allTrueInt8x16(rs, rd);
+}
+
+static void AllTrueI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+  masm.allTrueInt16x8(rs, rd);
+}
+
+static void AllTrueI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+  masm.allTrueInt32x4(rs, rd);
+}
+
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void BitmaskI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+  masm.bitmaskInt8x16(rs, rd);
+}
+
+static void BitmaskI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+  masm.bitmaskInt16x8(rs, rd);
+}
+
+static void BitmaskI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+  masm.bitmaskInt32x4(rs, rd);
+}
+
+static void Swizzle(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+                    RegV128 temp) {
+  masm.swizzleInt8x16(rs, rsd, temp);
+}
+#  elif defined(JS_CODEGEN_ARM64)
+static void BitmaskI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd,
+                         RegV128 temp) {
+  masm.bitmaskInt8x16(rs, rd, temp);
+}
+
+static void BitmaskI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd,
+                         RegV128 temp) {
+  masm.bitmaskInt16x8(rs, rd, temp);
+}
+
+static void BitmaskI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd,
+                         RegV128 temp) {
+  masm.bitmaskInt32x4(rs, rd, temp);
+}
+
+static void Swizzle(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.swizzleInt8x16(rs, rsd);
+}
+#  endif
+
+static void ConvertI32x4ToF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.convertInt32x4ToFloat32x4(rs, rd);
+}
+
+static void ConvertUI32x4ToF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.unsignedConvertInt32x4ToFloat32x4(rs, rd);
+}
+
+static void ConvertF32x4ToI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+  masm.truncSatFloat32x4ToInt32x4(rs, rd);
+}
+
+static void ConvertF32x4ToUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd,
+                                 RegV128 temp) {
+  masm.unsignedTruncSatFloat32x4ToInt32x4(rs, rd, temp);
+}
+
+template <typename SourceType, typename DestType>
+void BaseCompiler::emitVectorUnop(void (*op)(MacroAssembler& masm,
+                                             SourceType rs, DestType rd)) {
+  SourceType rs = pop<SourceType>();
+  DestType rd = need<DestType>();
+  op(masm, rs, rd);
+  free(rs);
+  push(rd);
+}
+
+template <typename SourceType, typename DestType, typename TempType>
+void BaseCompiler::emitVectorUnop(void (*op)(MacroAssembler& masm,
+                                             SourceType rs, DestType rd,
+                                             TempType temp)) {
+  SourceType rs = pop<SourceType>();
+  DestType rd = need<DestType>();
+  TempType temp = need<TempType>();
+  op(masm, rs, rd, temp);
+  free(rs);
+  free(temp);
+  push(rd);
+}
+
+template <typename SourceType, typename DestType, typename ImmType>
+void BaseCompiler::emitVectorUnop(ImmType immediate,
+                                  void (*op)(MacroAssembler&, ImmType,
+                                             SourceType, DestType)) {
+  SourceType rs = pop<SourceType>();
+  DestType rd = need<DestType>();
+  op(masm, immediate, rs, rd);
+  free(rs);
+  push(rd);
+}
+
+template <typename RhsType, typename LhsDestType>
+void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType src,
+                                              LhsDestType srcDest)) {
+  RhsType rs = pop<RhsType>();
+  LhsDestType rsd = pop<LhsDestType>();
+  op(masm, rs, rsd);
+  free(rs);
+  push(rsd);
+}
+
+template <typename RhsDestType, typename LhsType>
+void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm,
+                                              RhsDestType src, LhsType srcDest,
+                                              RhsDestOp)) {
+  RhsDestType rsd = pop<RhsDestType>();
+  LhsType rs = pop<LhsType>();
+  op(masm, rsd, rs, RhsDestOp::True);
+  free(rs);
+  push(rsd);
+}
+
+template <typename RhsType, typename LhsDestType, typename TempType>
+void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType rs,
+                                              LhsDestType rsd, TempType temp)) {
+  RhsType rs = pop<RhsType>();
+  LhsDestType rsd = pop<LhsDestType>();
+  TempType temp = need<TempType>();
+  op(masm, rs, rsd, temp);
+  free(rs);
+  free(temp);
+  push(rsd);
+}
+
+template <typename RhsType, typename LhsDestType, typename TempType1,
+          typename TempType2>
+void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType rs,
+                                              LhsDestType rsd, TempType1 temp1,
+                                              TempType2 temp2)) {
+  RhsType rs = pop<RhsType>();
+  LhsDestType rsd = pop<LhsDestType>();
+  TempType1 temp1 = need<TempType1>();
+  TempType2 temp2 = need<TempType2>();
+  op(masm, rs, rsd, temp1, temp2);
+  free(rs);
+  free(temp1);
+  free(temp2);
+  push(rsd);
+}
+
+template <typename RhsType, typename LhsDestType, typename ImmType>
+void BaseCompiler::emitVectorBinop(ImmType immediate,
+                                   void (*op)(MacroAssembler&, ImmType, RhsType,
+                                              LhsDestType)) {
+  RhsType rs = pop<RhsType>();
+  LhsDestType rsd = pop<LhsDestType>();
+  op(masm, immediate, rs, rsd);
+  free(rs);
+  push(rsd);
+}
+
+template <typename RhsType, typename LhsDestType, typename ImmType,
+          typename TempType1, typename TempType2>
+void BaseCompiler::emitVectorBinop(ImmType immediate,
+                                   void (*op)(MacroAssembler&, ImmType, RhsType,
+                                              LhsDestType, TempType1 temp1,
+                                              TempType2 temp2)) {
+  RhsType rs = pop<RhsType>();
+  LhsDestType rsd = pop<LhsDestType>();
+  TempType1 temp1 = need<TempType1>();
+  TempType2 temp2 = need<TempType2>();
+  op(masm, immediate, rs, rsd, temp1, temp2);
+  free(rs);
+  free(temp1);
+  free(temp2);
+  push(rsd);
+}
+
+void BaseCompiler::emitVectorAndNot() {
+  // We want x & ~y but the available operation is ~x & y, so reverse the
+  // operands.
+  RegV128 r, rs;
+  pop2xV128(&r, &rs);
+  masm.bitwiseNotAndSimd128(r, rs);
+  freeV128(r);
+  pushV128(rs);
+}
+
+bool BaseCompiler::emitLoadSplat(Scalar::Type viewType) {
+  // We can implement loadSplat mostly as load + splat because the push of the
+  // result onto the value stack in loadCommon normally will not generate any
+  // code, it will leave the value in a register which we will consume.
+
+  LinearMemoryAddress<Nothing> addr;
+  if (!iter_.readLoadSplat(Scalar::byteSize(viewType), &addr)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  // We use uint types when we can on the general assumption that unsigned loads
+  // might be smaller/faster on some platforms, because no sign extension needs
+  // to be done after the sub-register load.
+
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset());
+  switch (viewType) {
+    case Scalar::Uint8:
+      if (!loadCommon(&access, AccessCheck(), ValType::I32)) {
+        return false;
+      }
+      emitVectorUnop(SplatI8x16);
+      break;
+    case Scalar::Uint16:
+      if (!loadCommon(&access, AccessCheck(), ValType::I32)) {
+        return false;
+      }
+      emitVectorUnop(SplatI16x8);
+      break;
+    case Scalar::Uint32:
+      if (!loadCommon(&access, AccessCheck(), ValType::I32)) {
+        return false;
+      }
+      emitVectorUnop(SplatI32x4);
+      break;
+    case Scalar::Int64:
+      if (!loadCommon(&access, AccessCheck(), ValType::I64)) {
+        return false;
+      }
+      emitVectorUnop(SplatI64x2);
+      break;
+    default:
+      MOZ_CRASH();
+  }
+  return true;
+}
+
+bool BaseCompiler::emitLoadZero(Scalar::Type viewType) {
+  // LoadZero has the structure of LoadSplat
+  LinearMemoryAddress<Nothing> addr;
+  if (!iter_.readLoadSplat(Scalar::byteSize(viewType), &addr)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset());
+  access.setZeroExtendSimd128Load();
+  return loadCommon(&access, AccessCheck(), ValType::V128);
+}
+
+bool BaseCompiler::emitLoadExtend(Scalar::Type viewType) {
+  LinearMemoryAddress<Nothing> addr;
+  if (!iter_.readLoadExtend(&addr)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  MemoryAccessDesc access(Scalar::Int64, addr.align, addr.offset,
+                          bytecodeOffset());
+  if (!loadCommon(&access, AccessCheck(), ValType::I64)) {
+    return false;
+  }
+
+  RegI64 rs = popI64();
+  RegV128 rd = needV128();
+  masm.moveGPR64ToDouble(rs, rd);
+  switch (viewType) {
+    case Scalar::Int8:
+      masm.widenLowInt8x16(rd, rd);
+      break;
+    case Scalar::Uint8:
+      masm.unsignedWidenLowInt8x16(rd, rd);
+      break;
+    case Scalar::Int16:
+      masm.widenLowInt16x8(rd, rd);
+      break;
+    case Scalar::Uint16:
+      masm.unsignedWidenLowInt16x8(rd, rd);
+      break;
+    case Scalar::Int32:
+      masm.widenLowInt32x4(rd, rd);
+      break;
+    case Scalar::Uint32:
+      masm.unsignedWidenLowInt32x4(rd, rd);
+      break;
+    default:
+      MOZ_CRASH();
+  }
+  freeI64(rs);
+  pushV128(rd);
+
+  return true;
+}
+
+bool BaseCompiler::emitBitselect() {
+  Nothing unused_a, unused_b, unused_c;
+
+  if (!iter_.readVectorSelect(&unused_a, &unused_b, &unused_c)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  RegV128 rs3 = popV128();  // Control
+  RegV128 rs2 = popV128();  // 'false' vector
+  RegV128 rs1 = popV128();  // 'true' vector
+
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+  // On x86, certain register assignments will result in more compact code: we
+  // want output=rs1 and tmp=rs3.  Attend to this after we see what other
+  // platforms want/need.
+  RegV128 tmp = needV128();  // Distinguished tmp, for now
+  masm.bitwiseSelectSimd128(rs3, rs1, rs2, rs1, tmp);
+  freeV128(rs2);
+  freeV128(rs3);
+  freeV128(tmp);
+  pushV128(rs1);
+#  elif defined(JS_CODEGEN_ARM64)
+  // Note register conventions differ significantly from x86.
+  masm.bitwiseSelectSimd128(rs1, rs2, rs3);
+  freeV128(rs1);
+  freeV128(rs2);
+  pushV128(rs3);
+#  else
+  MOZ_CRASH("NYI");
+#  endif
+  return true;
+}
+
+bool BaseCompiler::emitVectorShuffle() {
+  Nothing unused_a, unused_b;
+  V128 shuffleMask;
+
+  if (!iter_.readVectorShuffle(&unused_a, &unused_b, &shuffleMask)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+  RegV128 rd, rs;
+  pop2xV128(&rd, &rs);
+  masm.shuffleInt8x16(shuffleMask.bytes, rs, rd);
+  freeV128(rs);
+  pushV128(rd);
+
+  return true;
+}
+
+// Signed case must be scalarized on x86/x64 and requires CL.
+// Signed and unsigned cases must be scalarized on ARM64.
+bool BaseCompiler::emitVectorShiftRightI64x2(bool isUnsigned) {
+  Nothing unused_a, unused_b;
+
+  if (!iter_.readVectorShift(&unused_a, &unused_b)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+  if (isUnsigned) {
+    emitVectorBinop(ShiftRightUI64x2);
+    return true;
+  }
+#  endif
+
+#  if defined(JS_CODEGEN_X86)
+  needI32(specific_.ecx);
+  RegI32 count = popI32ToSpecific(specific_.ecx);
+#  elif defined(JS_CODEGEN_X64)
+  RegI32 count;
+  if (Assembler::HasBMI2()) {
+    count = popI32();
+  } else {
+    needI32(specific_.ecx);
+    count = popI32ToSpecific(specific_.ecx);
+  }
+#  elif defined(JS_CODEGEN_ARM64)
+  RegI32 count = popI32();
+#  endif
+  RegV128 lhsDest = popV128();
+  RegI64 tmp = needI64();
+  masm.and32(Imm32(63), count);
+  masm.extractLaneInt64x2(0, lhsDest, tmp);
+  if (isUnsigned) {
+    masm.rshift64(count, tmp);
+  } else {
+    masm.rshift64Arithmetic(count, tmp);
+  }
+  masm.replaceLaneInt64x2(0, tmp, lhsDest);
+  masm.extractLaneInt64x2(1, lhsDest, tmp);
+  if (isUnsigned) {
+    masm.rshift64(count, tmp);
+  } else {
+    masm.rshift64Arithmetic(count, tmp);
+  }
+  masm.replaceLaneInt64x2(1, tmp, lhsDest);
+  freeI64(tmp);
+  freeI32(count);
+  pushV128(lhsDest);
+
+  return true;
+}
+
+// Must be scalarized on ARM64.
+bool BaseCompiler::emitVectorMulI64x2() {
+  Nothing unused_a, unused_b;
+
+  if (!iter_.readBinary(ValType::V128, &unused_a, &unused_b)) {
+    return false;
+  }
+
+  if (deadCode_) {
+    return true;
+  }
+
+#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+  emitVectorBinop(MulI64x2);
+#  elif defined(JS_CODEGEN_ARM64)
+  RegV128 r, rs;
+  pop2xV128(&r, &rs);
+  RegI64 temp1 = needI64();
+  RegI64 temp2 = needI64();
+  masm.extractLaneInt64x2(0, r, temp1);
+  masm.extractLaneInt64x2(0, rs, temp2);
+  masm.mul64(temp2, temp1, Register::Invalid());
+  masm.replaceLaneInt64x2(0, temp1, r);
+  masm.extractLaneInt64x2(1, r, temp1);
+  masm.extractLaneInt64x2(1, rs, temp2);
+  masm.mul64(temp2, temp1, Register::Invalid());
+  masm.replaceLaneInt64x2(1, temp1, r);
+  freeI64(temp1);
+  freeI64(temp2);
+  freeV128(rs);
+  pushV128(r);
+#  else
+  MOZ_CRASH("NYI");
+#  endif
+
+  return true;
+}
+#endif
+
+bool BaseCompiler::emitBody() {
+  MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isSome());
+
+  if (!iter_.readFunctionStart(func_.index)) {
+    return false;
+  }
+
+  initControl(controlItem(), ResultType::Empty());
+
+  for (;;) {
+    Nothing unused_a, unused_b;
+
+#ifdef DEBUG
+    performRegisterLeakCheck();
+    assertStackInvariants();
+#endif
+
+#define dispatchBinary(doEmit, type)              \
+  iter_.readBinary(type, &unused_a, &unused_b) && \
+      (deadCode_ || (doEmit(), true))
+
+#define dispatchUnary(doEmit, type) \
+  iter_.readUnary(type, &unused_a) && (deadCode_ || (doEmit(), true))
+
+#define dispatchComparison(doEmit, operandType, compareOp)   \
+  iter_.readComparison(operandType, &unused_a, &unused_b) && \
+      (deadCode_ || (doEmit(compareOp, operandType), true))
+
+#define dispatchConversion(doEmit, inType, outType)   \
+  iter_.readConversion(inType, outType, &unused_a) && \
+      (deadCode_ || (doEmit(), true))
+
+#define dispatchConversionOOM(doEmit, inType, outType) \
+  iter_.readConversion(inType, outType, &unused_a) && (deadCode_ || doEmit())
+
+#define dispatchCalloutConversionOOM(doEmit, symbol, inType, outType) \
+  iter_.readConversion(inType, outType, &unused_a) &&                 \
+      (deadCode_ || doEmit(symbol, inType, outType))
+
+#define dispatchIntDivCallout(doEmit, symbol, type) \
+  iter_.readBinary(type, &unused_a, &unused_b) &&   \
+      (deadCode_ || doEmit(symbol, type))
+
+#define dispatchVectorBinary(op)                           \
+  iter_.readBinary(ValType::V128, &unused_a, &unused_b) && \
+      (deadCode_ || (emitVectorBinop(op), true))
+
+#define dispatchVectorUnary(op)                \
+  iter_.readUnary(ValType::V128, &unused_a) && \
+      (deadCode_ || (emitVectorUnop(op), true))
+
+#define dispatchVectorComparison(op, compareOp)            \
+  iter_.readBinary(ValType::V128, &unused_a, &unused_b) && \
+      (deadCode_ || (emitVectorBinop(compareOp, op), true))
+
+#define dispatchVectorVariableShift(op)          \
+  iter_.readVectorShift(&unused_a, &unused_b) && \
+      (deadCode_ || (emitVectorBinop(op), true))
+
+#define dispatchExtractLane(op, outType, laneLimit)                   \
+  iter_.readExtractLane(outType, laneLimit, &laneIndex, &unused_a) && \
+      (deadCode_ || (emitVectorUnop(laneIndex, op), true))
+
+#define dispatchReplaceLane(op, inType, laneLimit)                \
+  iter_.readReplaceLane(inType, laneLimit, &laneIndex, &unused_a, \
+                        &unused_b) &&                             \
+      (deadCode_ || (emitVectorBinop(laneIndex, op), true))
+
+#define dispatchSplat(op, inType)                           \
+  iter_.readConversion(inType, ValType::V128, &unused_a) && \
+      (deadCode_ || (emitVectorUnop(op), true))
+
+#define dispatchVectorReduction(op)                               \
+  iter_.readConversion(ValType::V128, ValType::I32, &unused_a) && \
+      (deadCode_ || (emitVectorUnop(op), true))
+
+#ifdef DEBUG
+    // Check that the number of ref-typed entries in the operand stack matches
+    // reality.
+#  define CHECK_POINTER_COUNT                                             \
+    do {                                                                  \
+      MOZ_ASSERT(countMemRefsOnStk() == stackMapGenerator_.memRefsOnStk); \
+    } while (0)
+#else
+#  define CHECK_POINTER_COUNT \
+    do {                      \
+    } while (0)
+#endif
+
+#ifdef ENABLE_WASM_SIMD_EXPERIMENTAL
+#  define CHECK_SIMD_EXPERIMENTAL() (void)(0)
+#else
+#  define CHECK_SIMD_EXPERIMENTAL() break
+#endif
+
+#define CHECK(E) \
+  if (!(E)) return false
+#define NEXT()           \
+  {                      \
+    CHECK_POINTER_COUNT; \
+    continue;            \
+  }
+#define CHECK_NEXT(E)     \
+  if (!(E)) return false; \
+  {                       \
+    CHECK_POINTER_COUNT;  \
+    continue;             \
+  }
+
+    CHECK(stk_.reserve(stk_.length() + MaxPushesPerOpcode));
+
+    OpBytes op;
+    CHECK(iter_.readOp(&op));
+
+    // When compilerEnv_.debugEnabled(), every operator has breakpoint site but
+    // Op::End.
+    if (compilerEnv_.debugEnabled() && op.b0 != (uint16_t)Op::End) {
+      // TODO sync only registers that can be clobbered by the exit
+      // prologue/epilogue or disable these registers for use in
+      // baseline compiler when compilerEnv_.debugEnabled() is set.
+      sync();
+
+      insertBreakablePoint(CallSiteDesc::Breakpoint);
+      if (!createStackMap("debug: per insn")) {
+        return false;
+      }
+    }
+
+    // Going below framePushedAtEntryToBody would imply that we've
+    // popped off the machine stack, part of the frame created by
+    // beginFunction().
+    MOZ_ASSERT(masm.framePushed() >=
+               stackMapGenerator_.framePushedAtEntryToBody.value());
+
+    // At this point we're definitely not generating code for a function call.
+    MOZ_ASSERT(
+        stackMapGenerator_.framePushedExcludingOutboundCallArgs.isNothing());
+
+    switch (op.b0) {
+      case uint16_t(Op::End):
+        if (!emitEnd()) {
+          return false;
+        }
+        if (iter_.controlStackEmpty()) {
+          return true;
+        }
+        NEXT();
+
+      // Control opcodes
+      case uint16_t(Op::Nop):
+        CHECK_NEXT(iter_.readNop());
+      case uint16_t(Op::Drop):
+        CHECK_NEXT(emitDrop());
+      case uint16_t(Op::Block):
+        CHECK_NEXT(emitBlock());
+      case uint16_t(Op::Loop):
+        CHECK_NEXT(emitLoop());
+      case uint16_t(Op::If):
+        CHECK_NEXT(emitIf());
+      case uint16_t(Op::Else):
+        CHECK_NEXT(emitElse());
+#ifdef ENABLE_WASM_EXCEPTIONS
+      case uint16_t(Op::Try):
+        if (!moduleEnv_.exceptionsEnabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        CHECK_NEXT(emitTry());
+      case uint16_t(Op::Catch):
+        if (!moduleEnv_.exceptionsEnabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        CHECK_NEXT(emitCatch());
+      case uint16_t(Op::Throw):
+        if (!moduleEnv_.exceptionsEnabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        CHECK_NEXT(emitThrow());
+#endif
+      case uint16_t(Op::Br):
+        CHECK_NEXT(emitBr());
+      case uint16_t(Op::BrIf):
+        CHECK_NEXT(emitBrIf());
+      case uint16_t(Op::BrTable):
+        CHECK_NEXT(emitBrTable());
+      case uint16_t(Op::Return):
+        CHECK_NEXT(emitReturn());
+      case uint16_t(Op::Unreachable):
+        CHECK(iter_.readUnreachable());
+        if (!deadCode_) {
+          trap(Trap::Unreachable);
+          deadCode_ = true;
+        }
+        NEXT();
+
+      // Calls
+      case uint16_t(Op::Call):
+        CHECK_NEXT(emitCall());
+      case uint16_t(Op::CallIndirect):
+        CHECK_NEXT(emitCallIndirect());
+
+      // Locals and globals
+      case uint16_t(Op::GetLocal):
+        CHECK_NEXT(emitGetLocal());
+      case uint16_t(Op::SetLocal):
+        CHECK_NEXT(emitSetLocal());
+      case uint16_t(Op::TeeLocal):
+        CHECK_NEXT(emitTeeLocal());
+      case uint16_t(Op::GetGlobal):
+        CHECK_NEXT(emitGetGlobal());
+      case uint16_t(Op::SetGlobal):
+        CHECK_NEXT(emitSetGlobal());
+#ifdef ENABLE_WASM_REFTYPES
+      case uint16_t(Op::TableGet):
+        CHECK_NEXT(emitTableGet());
+      case uint16_t(Op::TableSet):
+        CHECK_NEXT(emitTableSet());
+#endif
+
+      // Select
+      case uint16_t(Op::SelectNumeric):
+        CHECK_NEXT(emitSelect(/*typed*/ false));
+      case uint16_t(Op::SelectTyped):
+        if (!moduleEnv_.refTypesEnabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        CHECK_NEXT(emitSelect(/*typed*/ true));
+
+      // I32
+      case uint16_t(Op::I32Const): {
+        int32_t i32;
+        CHECK(iter_.readI32Const(&i32));
+        if (!deadCode_) {
+          pushI32(i32);
+        }
+        NEXT();
+      }
+      case uint16_t(Op::I32Add):
+        CHECK_NEXT(dispatchBinary(emitAddI32, ValType::I32));
+      case uint16_t(Op::I32Sub):
+        CHECK_NEXT(dispatchBinary(emitSubtractI32, ValType::I32));
+      case uint16_t(Op::I32Mul):
+        CHECK_NEXT(dispatchBinary(emitMultiplyI32, ValType::I32));
+      case uint16_t(Op::I32DivS):
+        CHECK_NEXT(dispatchBinary(emitQuotientI32, ValType::I32));
+      case uint16_t(Op::I32DivU):
+        CHECK_NEXT(dispatchBinary(emitQuotientU32, ValType::I32));
+      case uint16_t(Op::I32RemS):
+        CHECK_NEXT(dispatchBinary(emitRemainderI32, ValType::I32));
+      case uint16_t(Op::I32RemU):
+        CHECK_NEXT(dispatchBinary(emitRemainderU32, ValType::I32));
+      case uint16_t(Op::I32Eqz):
+        CHECK_NEXT(dispatchConversion(emitEqzI32, ValType::I32, ValType::I32));
+      case uint16_t(Op::I32TruncSF32):
+        CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI32<0>, ValType::F32,
+                                         ValType::I32));
+      case uint16_t(Op::I32TruncUF32):
+        CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI32<TRUNC_UNSIGNED>,
+                                         ValType::F32, ValType::I32));
+      case uint16_t(Op::I32TruncSF64):
+        CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI32<0>, ValType::F64,
+                                         ValType::I32));
+      case uint16_t(Op::I32TruncUF64):
+        CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI32<TRUNC_UNSIGNED>,
+                                         ValType::F64, ValType::I32));
+      case uint16_t(Op::I32WrapI64):
+        CHECK_NEXT(
+            dispatchConversion(emitWrapI64ToI32, ValType::I64, ValType::I32));
+      case uint16_t(Op::I32ReinterpretF32):
+        CHECK_NEXT(dispatchConversion(emitReinterpretF32AsI32, ValType::F32,
+                                      ValType::I32));
+      case uint16_t(Op::I32Clz):
+        CHECK_NEXT(dispatchUnary(emitClzI32, ValType::I32));
+      case uint16_t(Op::I32Ctz):
+        CHECK_NEXT(dispatchUnary(emitCtzI32, ValType::I32));
+      case uint16_t(Op::I32Popcnt):
+        CHECK_NEXT(dispatchUnary(emitPopcntI32, ValType::I32));
+      case uint16_t(Op::I32Or):
+        CHECK_NEXT(dispatchBinary(emitOrI32, ValType::I32));
+      case uint16_t(Op::I32And):
+        CHECK_NEXT(dispatchBinary(emitAndI32, ValType::I32));
+      case uint16_t(Op::I32Xor):
+        CHECK_NEXT(dispatchBinary(emitXorI32, ValType::I32));
+      case uint16_t(Op::I32Shl):
+        CHECK_NEXT(dispatchBinary(emitShlI32, ValType::I32));
+      case uint16_t(Op::I32ShrS):
+        CHECK_NEXT(dispatchBinary(emitShrI32, ValType::I32));
+      case uint16_t(Op::I32ShrU):
+        CHECK_NEXT(dispatchBinary(emitShrU32, ValType::I32));
+      case uint16_t(Op::I32Load8S):
+        CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int8));
+      case uint16_t(Op::I32Load8U):
+        CHECK_NEXT(emitLoad(ValType::I32, Scalar::Uint8));
+      case uint16_t(Op::I32Load16S):
+        CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int16));
+      case uint16_t(Op::I32Load16U):
+        CHECK_NEXT(emitLoad(ValType::I32, Scalar::Uint16));
+      case uint16_t(Op::I32Load):
+        CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int32));
+      case uint16_t(Op::I32Store8):
+        CHECK_NEXT(emitStore(ValType::I32, Scalar::Int8));
+      case uint16_t(Op::I32Store16):
+        CHECK_NEXT(emitStore(ValType::I32, Scalar::Int16));
+      case uint16_t(Op::I32Store):
+        CHECK_NEXT(emitStore(ValType::I32, Scalar::Int32));
+      case uint16_t(Op::I32Rotr):
+        CHECK_NEXT(dispatchBinary(emitRotrI32, ValType::I32));
+      case uint16_t(Op::I32Rotl):
+        CHECK_NEXT(dispatchBinary(emitRotlI32, ValType::I32));
+
+      // I64
+      case uint16_t(Op::I64Const): {
+        int64_t i64;
+        CHECK(iter_.readI64Const(&i64));
+        if (!deadCode_) {
+          pushI64(i64);
+        }
+        NEXT();
+      }
+      case uint16_t(Op::I64Add):
+        CHECK_NEXT(dispatchBinary(emitAddI64, ValType::I64));
+      case uint16_t(Op::I64Sub):
+        CHECK_NEXT(dispatchBinary(emitSubtractI64, ValType::I64));
+      case uint16_t(Op::I64Mul):
+        CHECK_NEXT(dispatchBinary(emitMultiplyI64, ValType::I64));
+      case uint16_t(Op::I64DivS):
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+        CHECK_NEXT(dispatchIntDivCallout(
+            emitDivOrModI64BuiltinCall, SymbolicAddress::DivI64, ValType::I64));
+#else
+        CHECK_NEXT(dispatchBinary(emitQuotientI64, ValType::I64));
+#endif
+      case uint16_t(Op::I64DivU):
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+        CHECK_NEXT(dispatchIntDivCallout(emitDivOrModI64BuiltinCall,
+                                         SymbolicAddress::UDivI64,
+                                         ValType::I64));
+#else
+        CHECK_NEXT(dispatchBinary(emitQuotientU64, ValType::I64));
+#endif
+      case uint16_t(Op::I64RemS):
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+        CHECK_NEXT(dispatchIntDivCallout(
+            emitDivOrModI64BuiltinCall, SymbolicAddress::ModI64, ValType::I64));
+#else
+        CHECK_NEXT(dispatchBinary(emitRemainderI64, ValType::I64));
+#endif
+      case uint16_t(Op::I64RemU):
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+        CHECK_NEXT(dispatchIntDivCallout(emitDivOrModI64BuiltinCall,
+                                         SymbolicAddress::UModI64,
+                                         ValType::I64));
+#else
+        CHECK_NEXT(dispatchBinary(emitRemainderU64, ValType::I64));
+#endif
+      case uint16_t(Op::I64TruncSF32):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+        CHECK_NEXT(
+            dispatchCalloutConversionOOM(emitConvertFloatingToInt64Callout,
+                                         SymbolicAddress::TruncateDoubleToInt64,
+                                         ValType::F32, ValType::I64));
+#else
+        CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI64<0>, ValType::F32,
+                                         ValType::I64));
+#endif
+      case uint16_t(Op::I64TruncUF32):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+        CHECK_NEXT(dispatchCalloutConversionOOM(
+            emitConvertFloatingToInt64Callout,
+            SymbolicAddress::TruncateDoubleToUint64, ValType::F32,
+            ValType::I64));
+#else
+        CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI64<TRUNC_UNSIGNED>,
+                                         ValType::F32, ValType::I64));
+#endif
+      case uint16_t(Op::I64TruncSF64):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+        CHECK_NEXT(
+            dispatchCalloutConversionOOM(emitConvertFloatingToInt64Callout,
+                                         SymbolicAddress::TruncateDoubleToInt64,
+                                         ValType::F64, ValType::I64));
+#else
+        CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI64<0>, ValType::F64,
+                                         ValType::I64));
+#endif
+      case uint16_t(Op::I64TruncUF64):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+        CHECK_NEXT(dispatchCalloutConversionOOM(
+            emitConvertFloatingToInt64Callout,
+            SymbolicAddress::TruncateDoubleToUint64, ValType::F64,
+            ValType::I64));
+#else
+        CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI64<TRUNC_UNSIGNED>,
+                                         ValType::F64, ValType::I64));
+#endif
+      case uint16_t(Op::I64ExtendSI32):
+        CHECK_NEXT(
+            dispatchConversion(emitExtendI32ToI64, ValType::I32, ValType::I64));
+      case uint16_t(Op::I64ExtendUI32):
+        CHECK_NEXT(
+            dispatchConversion(emitExtendU32ToI64, ValType::I32, ValType::I64));
+      case uint16_t(Op::I64ReinterpretF64):
+        CHECK_NEXT(dispatchConversion(emitReinterpretF64AsI64, ValType::F64,
+                                      ValType::I64));
+      case uint16_t(Op::I64Or):
+        CHECK_NEXT(dispatchBinary(emitOrI64, ValType::I64));
+      case uint16_t(Op::I64And):
+        CHECK_NEXT(dispatchBinary(emitAndI64, ValType::I64));
+      case uint16_t(Op::I64Xor):
+        CHECK_NEXT(dispatchBinary(emitXorI64, ValType::I64));
+      case uint16_t(Op::I64Shl):
+        CHECK_NEXT(dispatchBinary(emitShlI64, ValType::I64));
+      case uint16_t(Op::I64ShrS):
+        CHECK_NEXT(dispatchBinary(emitShrI64, ValType::I64));
+      case uint16_t(Op::I64ShrU):
+        CHECK_NEXT(dispatchBinary(emitShrU64, ValType::I64));
+      case uint16_t(Op::I64Rotr):
+        CHECK_NEXT(dispatchBinary(emitRotrI64, ValType::I64));
+      case uint16_t(Op::I64Rotl):
+        CHECK_NEXT(dispatchBinary(emitRotlI64, ValType::I64));
+      case uint16_t(Op::I64Clz):
+        CHECK_NEXT(dispatchUnary(emitClzI64, ValType::I64));
+      case uint16_t(Op::I64Ctz):
+        CHECK_NEXT(dispatchUnary(emitCtzI64, ValType::I64));
+      case uint16_t(Op::I64Popcnt):
+        CHECK_NEXT(dispatchUnary(emitPopcntI64, ValType::I64));
+      case uint16_t(Op::I64Eqz):
+        CHECK_NEXT(dispatchConversion(emitEqzI64, ValType::I64, ValType::I32));
+      case uint16_t(Op::I64Load8S):
+        CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int8));
+      case uint16_t(Op::I64Load16S):
+        CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int16));
+      case uint16_t(Op::I64Load32S):
+        CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int32));
+      case uint16_t(Op::I64Load8U):
+        CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint8));
+      case uint16_t(Op::I64Load16U):
+        CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint16));
+      case uint16_t(Op::I64Load32U):
+        CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint32));
+      case uint16_t(Op::I64Load):
+        CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int64));
+      case uint16_t(Op::I64Store8):
+        CHECK_NEXT(emitStore(ValType::I64, Scalar::Int8));
+      case uint16_t(Op::I64Store16):
+        CHECK_NEXT(emitStore(ValType::I64, Scalar::Int16));
+      case uint16_t(Op::I64Store32):
+        CHECK_NEXT(emitStore(ValType::I64, Scalar::Int32));
+      case uint16_t(Op::I64Store):
+        CHECK_NEXT(emitStore(ValType::I64, Scalar::Int64));
+
+      // F32
+      case uint16_t(Op::F32Const): {
+        float f32;
+        CHECK(iter_.readF32Const(&f32));
+        if (!deadCode_) {
+          pushF32(f32);
+        }
+        NEXT();
+      }
+      case uint16_t(Op::F32Add):
+        CHECK_NEXT(dispatchBinary(emitAddF32, ValType::F32));
+      case uint16_t(Op::F32Sub):
+        CHECK_NEXT(dispatchBinary(emitSubtractF32, ValType::F32));
+      case uint16_t(Op::F32Mul):
+        CHECK_NEXT(dispatchBinary(emitMultiplyF32, ValType::F32));
+      case uint16_t(Op::F32Div):
+        CHECK_NEXT(dispatchBinary(emitDivideF32, ValType::F32));
+      case uint16_t(Op::F32Min):
+        CHECK_NEXT(dispatchBinary(emitMinF32, ValType::F32));
+      case uint16_t(Op::F32Max):
+        CHECK_NEXT(dispatchBinary(emitMaxF32, ValType::F32));
+      case uint16_t(Op::F32Neg):
+        CHECK_NEXT(dispatchUnary(emitNegateF32, ValType::F32));
+      case uint16_t(Op::F32Abs):
+        CHECK_NEXT(dispatchUnary(emitAbsF32, ValType::F32));
+      case uint16_t(Op::F32Sqrt):
+        CHECK_NEXT(dispatchUnary(emitSqrtF32, ValType::F32));
+      case uint16_t(Op::F32Ceil):
+        CHECK_NEXT(
+            emitUnaryMathBuiltinCall(SymbolicAddress::CeilF, ValType::F32));
+      case uint16_t(Op::F32Floor):
+        CHECK_NEXT(
+            emitUnaryMathBuiltinCall(SymbolicAddress::FloorF, ValType::F32));
+      case uint16_t(Op::F32DemoteF64):
+        CHECK_NEXT(dispatchConversion(emitConvertF64ToF32, ValType::F64,
+                                      ValType::F32));
+      case uint16_t(Op::F32ConvertSI32):
+        CHECK_NEXT(dispatchConversion(emitConvertI32ToF32, ValType::I32,
+                                      ValType::F32));
+      case uint16_t(Op::F32ConvertUI32):
+        CHECK_NEXT(dispatchConversion(emitConvertU32ToF32, ValType::I32,
+                                      ValType::F32));
+      case uint16_t(Op::F32ConvertSI64):
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+        CHECK_NEXT(dispatchCalloutConversionOOM(
+            emitConvertInt64ToFloatingCallout, SymbolicAddress::Int64ToFloat32,
+            ValType::I64, ValType::F32));
+#else
+        CHECK_NEXT(dispatchConversion(emitConvertI64ToF32, ValType::I64,
+                                      ValType::F32));
+#endif
+      case uint16_t(Op::F32ConvertUI64):
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+        CHECK_NEXT(dispatchCalloutConversionOOM(
+            emitConvertInt64ToFloatingCallout, SymbolicAddress::Uint64ToFloat32,
+            ValType::I64, ValType::F32));
+#else
+        CHECK_NEXT(dispatchConversion(emitConvertU64ToF32, ValType::I64,
+                                      ValType::F32));
+#endif
+      case uint16_t(Op::F32ReinterpretI32):
+        CHECK_NEXT(dispatchConversion(emitReinterpretI32AsF32, ValType::I32,
+                                      ValType::F32));
+      case uint16_t(Op::F32Load):
+        CHECK_NEXT(emitLoad(ValType::F32, Scalar::Float32));
+      case uint16_t(Op::F32Store):
+        CHECK_NEXT(emitStore(ValType::F32, Scalar::Float32));
+      case uint16_t(Op::F32CopySign):
+        CHECK_NEXT(dispatchBinary(emitCopysignF32, ValType::F32));
+      case uint16_t(Op::F32Nearest):
+        CHECK_NEXT(emitUnaryMathBuiltinCall(SymbolicAddress::NearbyIntF,
+                                            ValType::F32));
+      case uint16_t(Op::F32Trunc):
+        CHECK_NEXT(
+            emitUnaryMathBuiltinCall(SymbolicAddress::TruncF, ValType::F32));
+
+      // F64
+      case uint16_t(Op::F64Const): {
+        double f64;
+        CHECK(iter_.readF64Const(&f64));
+        if (!deadCode_) {
+          pushF64(f64);
+        }
+        NEXT();
+      }
+      case uint16_t(Op::F64Add):
+        CHECK_NEXT(dispatchBinary(emitAddF64, ValType::F64));
+      case uint16_t(Op::F64Sub):
+        CHECK_NEXT(dispatchBinary(emitSubtractF64, ValType::F64));
+      case uint16_t(Op::F64Mul):
+        CHECK_NEXT(dispatchBinary(emitMultiplyF64, ValType::F64));
+      case uint16_t(Op::F64Div):
+        CHECK_NEXT(dispatchBinary(emitDivideF64, ValType::F64));
+      case uint16_t(Op::F64Min):
+        CHECK_NEXT(dispatchBinary(emitMinF64, ValType::F64));
+      case uint16_t(Op::F64Max):
+        CHECK_NEXT(dispatchBinary(emitMaxF64, ValType::F64));
+      case uint16_t(Op::F64Neg):
+        CHECK_NEXT(dispatchUnary(emitNegateF64, ValType::F64));
+      case uint16_t(Op::F64Abs):
+        CHECK_NEXT(dispatchUnary(emitAbsF64, ValType::F64));
+      case uint16_t(Op::F64Sqrt):
+        CHECK_NEXT(dispatchUnary(emitSqrtF64, ValType::F64));
+      case uint16_t(Op::F64Ceil):
+        CHECK_NEXT(
+            emitUnaryMathBuiltinCall(SymbolicAddress::CeilD, ValType::F64));
+      case uint16_t(Op::F64Floor):
+        CHECK_NEXT(
+            emitUnaryMathBuiltinCall(SymbolicAddress::FloorD, ValType::F64));
+      case uint16_t(Op::F64PromoteF32):
+        CHECK_NEXT(dispatchConversion(emitConvertF32ToF64, ValType::F32,
+                                      ValType::F64));
+      case uint16_t(Op::F64ConvertSI32):
+        CHECK_NEXT(dispatchConversion(emitConvertI32ToF64, ValType::I32,
+                                      ValType::F64));
+      case uint16_t(Op::F64ConvertUI32):
+        CHECK_NEXT(dispatchConversion(emitConvertU32ToF64, ValType::I32,
+                                      ValType::F64));
+      case uint16_t(Op::F64ConvertSI64):
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+        CHECK_NEXT(dispatchCalloutConversionOOM(
+            emitConvertInt64ToFloatingCallout, SymbolicAddress::Int64ToDouble,
+            ValType::I64, ValType::F64));
+#else
+        CHECK_NEXT(dispatchConversion(emitConvertI64ToF64, ValType::I64,
+                                      ValType::F64));
+#endif
+      case uint16_t(Op::F64ConvertUI64):
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+        CHECK_NEXT(dispatchCalloutConversionOOM(
+            emitConvertInt64ToFloatingCallout, SymbolicAddress::Uint64ToDouble,
+            ValType::I64, ValType::F64));
+#else
+        CHECK_NEXT(dispatchConversion(emitConvertU64ToF64, ValType::I64,
+                                      ValType::F64));
+#endif
+      case uint16_t(Op::F64Load):
+        CHECK_NEXT(emitLoad(ValType::F64, Scalar::Float64));
+      case uint16_t(Op::F64Store):
+        CHECK_NEXT(emitStore(ValType::F64, Scalar::Float64));
+      case uint16_t(Op::F64ReinterpretI64):
+        CHECK_NEXT(dispatchConversion(emitReinterpretI64AsF64, ValType::I64,
+                                      ValType::F64));
+      case uint16_t(Op::F64CopySign):
+        CHECK_NEXT(dispatchBinary(emitCopysignF64, ValType::F64));
+      case uint16_t(Op::F64Nearest):
+        CHECK_NEXT(emitUnaryMathBuiltinCall(SymbolicAddress::NearbyIntD,
+                                            ValType::F64));
+      case uint16_t(Op::F64Trunc):
+        CHECK_NEXT(
+            emitUnaryMathBuiltinCall(SymbolicAddress::TruncD, ValType::F64));
+
+      // Comparisons
+      case uint16_t(Op::I32Eq):
+        CHECK_NEXT(
+            dispatchComparison(emitCompareI32, ValType::I32, Assembler::Equal));
+      case uint16_t(Op::I32Ne):
+        CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+                                      Assembler::NotEqual));
+      case uint16_t(Op::I32LtS):
+        CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+                                      Assembler::LessThan));
+      case uint16_t(Op::I32LeS):
+        CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+                                      Assembler::LessThanOrEqual));
+      case uint16_t(Op::I32GtS):
+        CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+                                      Assembler::GreaterThan));
+      case uint16_t(Op::I32GeS):
+        CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+                                      Assembler::GreaterThanOrEqual));
+      case uint16_t(Op::I32LtU):
+        CHECK_NEXT(
+            dispatchComparison(emitCompareI32, ValType::I32, Assembler::Below));
+      case uint16_t(Op::I32LeU):
+        CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+                                      Assembler::BelowOrEqual));
+      case uint16_t(Op::I32GtU):
+        CHECK_NEXT(
+            dispatchComparison(emitCompareI32, ValType::I32, Assembler::Above));
+      case uint16_t(Op::I32GeU):
+        CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+                                      Assembler::AboveOrEqual));
+      case uint16_t(Op::I64Eq):
+        CHECK_NEXT(
+            dispatchComparison(emitCompareI64, ValType::I64, Assembler::Equal));
+      case uint16_t(Op::I64Ne):
+        CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+                                      Assembler::NotEqual));
+      case uint16_t(Op::I64LtS):
+        CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+                                      Assembler::LessThan));
+      case uint16_t(Op::I64LeS):
+        CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+                                      Assembler::LessThanOrEqual));
+      case uint16_t(Op::I64GtS):
+        CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+                                      Assembler::GreaterThan));
+      case uint16_t(Op::I64GeS):
+        CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+                                      Assembler::GreaterThanOrEqual));
+      case uint16_t(Op::I64LtU):
+        CHECK_NEXT(
+            dispatchComparison(emitCompareI64, ValType::I64, Assembler::Below));
+      case uint16_t(Op::I64LeU):
+        CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+                                      Assembler::BelowOrEqual));
+      case uint16_t(Op::I64GtU):
+        CHECK_NEXT(
+            dispatchComparison(emitCompareI64, ValType::I64, Assembler::Above));
+      case uint16_t(Op::I64GeU):
+        CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+                                      Assembler::AboveOrEqual));
+      case uint16_t(Op::F32Eq):
+        CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+                                      Assembler::DoubleEqual));
+      case uint16_t(Op::F32Ne):
+        CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+                                      Assembler::DoubleNotEqualOrUnordered));
+      case uint16_t(Op::F32Lt):
+        CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+                                      Assembler::DoubleLessThan));
+      case uint16_t(Op::F32Le):
+        CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+                                      Assembler::DoubleLessThanOrEqual));
+      case uint16_t(Op::F32Gt):
+        CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+                                      Assembler::DoubleGreaterThan));
+      case uint16_t(Op::F32Ge):
+        CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+                                      Assembler::DoubleGreaterThanOrEqual));
+      case uint16_t(Op::F64Eq):
+        CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+                                      Assembler::DoubleEqual));
+      case uint16_t(Op::F64Ne):
+        CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+                                      Assembler::DoubleNotEqualOrUnordered));
+      case uint16_t(Op::F64Lt):
+        CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+                                      Assembler::DoubleLessThan));
+      case uint16_t(Op::F64Le):
+        CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+                                      Assembler::DoubleLessThanOrEqual));
+      case uint16_t(Op::F64Gt):
+        CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+                                      Assembler::DoubleGreaterThan));
+      case uint16_t(Op::F64Ge):
+        CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+                                      Assembler::DoubleGreaterThanOrEqual));
+
+      // Sign extensions
+      case uint16_t(Op::I32Extend8S):
+        CHECK_NEXT(
+            dispatchConversion(emitExtendI32_8, ValType::I32, ValType::I32));
+      case uint16_t(Op::I32Extend16S):
+        CHECK_NEXT(
+            dispatchConversion(emitExtendI32_16, ValType::I32, ValType::I32));
+      case uint16_t(Op::I64Extend8S):
+        CHECK_NEXT(
+            dispatchConversion(emitExtendI64_8, ValType::I64, ValType::I64));
+      case uint16_t(Op::I64Extend16S):
+        CHECK_NEXT(
+            dispatchConversion(emitExtendI64_16, ValType::I64, ValType::I64));
+      case uint16_t(Op::I64Extend32S):
+        CHECK_NEXT(
+            dispatchConversion(emitExtendI64_32, ValType::I64, ValType::I64));
+
+      // Memory Related
+      case uint16_t(Op::MemoryGrow):
+        CHECK_NEXT(emitMemoryGrow());
+      case uint16_t(Op::MemorySize):
+        CHECK_NEXT(emitMemorySize());
+
+#ifdef ENABLE_WASM_FUNCTION_REFERENCES
+      case uint16_t(Op::RefAsNonNull):
+        if (!moduleEnv_.functionReferencesEnabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        CHECK_NEXT(emitRefAsNonNull());
+      case uint16_t(Op::BrOnNull):
+        if (!moduleEnv_.functionReferencesEnabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        CHECK_NEXT(emitBrOnNull());
+#endif
+#ifdef ENABLE_WASM_GC
+      case uint16_t(Op::RefEq):
+        if (!moduleEnv_.gcTypesEnabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        CHECK_NEXT(dispatchComparison(emitCompareRef, RefType::eq(),
+                                      Assembler::Equal));
+#endif
+#ifdef ENABLE_WASM_REFTYPES
+      case uint16_t(Op::RefFunc):
+        CHECK_NEXT(emitRefFunc());
+        break;
+      case uint16_t(Op::RefNull):
+        CHECK_NEXT(emitRefNull());
+        break;
+      case uint16_t(Op::RefIsNull):
+        CHECK_NEXT(emitRefIsNull());
+        break;
+#endif
+
+#ifdef ENABLE_WASM_GC
+      // "GC" operations
+      case uint16_t(Op::GcPrefix): {
+        if (!moduleEnv_.gcTypesEnabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        switch (op.b1) {
+          case uint32_t(GcOp::StructNew):
+            CHECK_NEXT(emitStructNew());
+          case uint32_t(GcOp::StructGet):
+            CHECK_NEXT(emitStructGet());
+          case uint32_t(GcOp::StructSet):
+            CHECK_NEXT(emitStructSet());
+          case uint32_t(GcOp::StructNarrow):
+            CHECK_NEXT(emitStructNarrow());
+          default:
+            break;
+        }  // switch (op.b1)
+        return iter_.unrecognizedOpcode(&op);
+      }
+#endif
+
+#ifdef ENABLE_WASM_SIMD
+      // SIMD operations
+      case uint16_t(Op::SimdPrefix): {
+        uint32_t laneIndex;
+        if (!moduleEnv_.v128Enabled()) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        switch (op.b1) {
+          case uint32_t(SimdOp::I8x16ExtractLaneS):
+            CHECK_NEXT(dispatchExtractLane(ExtractLaneI8x16, ValType::I32, 16));
+          case uint32_t(SimdOp::I8x16ExtractLaneU):
+            CHECK_NEXT(
+                dispatchExtractLane(ExtractLaneUI8x16, ValType::I32, 16));
+          case uint32_t(SimdOp::I16x8ExtractLaneS):
+            CHECK_NEXT(dispatchExtractLane(ExtractLaneI16x8, ValType::I32, 8));
+          case uint32_t(SimdOp::I16x8ExtractLaneU):
+            CHECK_NEXT(dispatchExtractLane(ExtractLaneUI16x8, ValType::I32, 8));
+          case uint32_t(SimdOp::I32x4ExtractLane):
+            CHECK_NEXT(dispatchExtractLane(ExtractLaneI32x4, ValType::I32, 4));
+          case uint32_t(SimdOp::I64x2ExtractLane):
+            CHECK_NEXT(dispatchExtractLane(ExtractLaneI64x2, ValType::I64, 2));
+          case uint32_t(SimdOp::F32x4ExtractLane):
+            CHECK_NEXT(dispatchExtractLane(ExtractLaneF32x4, ValType::F32, 4));
+          case uint32_t(SimdOp::F64x2ExtractLane):
+            CHECK_NEXT(dispatchExtractLane(ExtractLaneF64x2, ValType::F64, 2));
+          case uint32_t(SimdOp::I8x16Splat):
+            CHECK_NEXT(dispatchSplat(SplatI8x16, ValType::I32));
+          case uint32_t(SimdOp::I16x8Splat):
+            CHECK_NEXT(dispatchSplat(SplatI16x8, ValType::I32));
+          case uint32_t(SimdOp::I32x4Splat):
+            CHECK_NEXT(dispatchSplat(SplatI32x4, ValType::I32));
+          case uint32_t(SimdOp::I64x2Splat):
+            CHECK_NEXT(dispatchSplat(SplatI64x2, ValType::I64));
+          case uint32_t(SimdOp::F32x4Splat):
+            CHECK_NEXT(dispatchSplat(SplatF32x4, ValType::F32));
+          case uint32_t(SimdOp::F64x2Splat):
+            CHECK_NEXT(dispatchSplat(SplatF64x2, ValType::F64));
+          case uint32_t(SimdOp::I8x16AnyTrue):
+          case uint32_t(SimdOp::I16x8AnyTrue):
+          case uint32_t(SimdOp::I32x4AnyTrue):
+            CHECK_NEXT(dispatchVectorReduction(AnyTrue));
+          case uint32_t(SimdOp::I8x16AllTrue):
+            CHECK_NEXT(dispatchVectorReduction(AllTrueI8x16));
+          case uint32_t(SimdOp::I16x8AllTrue):
+            CHECK_NEXT(dispatchVectorReduction(AllTrueI16x8));
+          case uint32_t(SimdOp::I32x4AllTrue):
+            CHECK_NEXT(dispatchVectorReduction(AllTrueI32x4));
+          case uint32_t(SimdOp::I8x16Bitmask):
+            CHECK_NEXT(dispatchVectorReduction(BitmaskI8x16));
+          case uint32_t(SimdOp::I16x8Bitmask):
+            CHECK_NEXT(dispatchVectorReduction(BitmaskI16x8));
+          case uint32_t(SimdOp::I32x4Bitmask):
+            CHECK_NEXT(dispatchVectorReduction(BitmaskI32x4));
+          case uint32_t(SimdOp::I8x16ReplaceLane):
+            CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI8x16, ValType::I32, 16));
+          case uint32_t(SimdOp::I16x8ReplaceLane):
+            CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI16x8, ValType::I32, 8));
+          case uint32_t(SimdOp::I32x4ReplaceLane):
+            CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI32x4, ValType::I32, 4));
+          case uint32_t(SimdOp::I64x2ReplaceLane):
+            CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI64x2, ValType::I64, 2));
+          case uint32_t(SimdOp::F32x4ReplaceLane):
+            CHECK_NEXT(dispatchReplaceLane(ReplaceLaneF32x4, ValType::F32, 4));
+          case uint32_t(SimdOp::F64x2ReplaceLane):
+            CHECK_NEXT(dispatchReplaceLane(ReplaceLaneF64x2, ValType::F64, 2));
+          case uint32_t(SimdOp::I8x16Eq):
+            CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::Equal));
+          case uint32_t(SimdOp::I8x16Ne):
+            CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::NotEqual));
+          case uint32_t(SimdOp::I8x16LtS):
+            CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::LessThan));
+          case uint32_t(SimdOp::I8x16LtU):
+            CHECK_NEXT(dispatchVectorComparison(CmpUI8x16, Assembler::Below));
+          case uint32_t(SimdOp::I8x16GtS):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpI8x16, Assembler::GreaterThan));
+          case uint32_t(SimdOp::I8x16GtU):
+            CHECK_NEXT(dispatchVectorComparison(CmpUI8x16, Assembler::Above));
+          case uint32_t(SimdOp::I8x16LeS):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpI8x16, Assembler::LessThanOrEqual));
+          case uint32_t(SimdOp::I8x16LeU):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpUI8x16, Assembler::BelowOrEqual));
+          case uint32_t(SimdOp::I8x16GeS):
+            CHECK_NEXT(dispatchVectorComparison(CmpI8x16,
+                                                Assembler::GreaterThanOrEqual));
+          case uint32_t(SimdOp::I8x16GeU):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpUI8x16, Assembler::AboveOrEqual));
+          case uint32_t(SimdOp::I16x8Eq):
+            CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::Equal));
+          case uint32_t(SimdOp::I16x8Ne):
+            CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::NotEqual));
+          case uint32_t(SimdOp::I16x8LtS):
+            CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::LessThan));
+          case uint32_t(SimdOp::I16x8LtU):
+            CHECK_NEXT(dispatchVectorComparison(CmpUI16x8, Assembler::Below));
+          case uint32_t(SimdOp::I16x8GtS):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpI16x8, Assembler::GreaterThan));
+          case uint32_t(SimdOp::I16x8GtU):
+            CHECK_NEXT(dispatchVectorComparison(CmpUI16x8, Assembler::Above));
+          case uint32_t(SimdOp::I16x8LeS):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpI16x8, Assembler::LessThanOrEqual));
+          case uint32_t(SimdOp::I16x8LeU):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpUI16x8, Assembler::BelowOrEqual));
+          case uint32_t(SimdOp::I16x8GeS):
+            CHECK_NEXT(dispatchVectorComparison(CmpI16x8,
+                                                Assembler::GreaterThanOrEqual));
+          case uint32_t(SimdOp::I16x8GeU):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpUI16x8, Assembler::AboveOrEqual));
+          case uint32_t(SimdOp::I32x4Eq):
+            CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::Equal));
+          case uint32_t(SimdOp::I32x4Ne):
+            CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::NotEqual));
+          case uint32_t(SimdOp::I32x4LtS):
+            CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::LessThan));
+          case uint32_t(SimdOp::I32x4LtU):
+            CHECK_NEXT(dispatchVectorComparison(CmpUI32x4, Assembler::Below));
+          case uint32_t(SimdOp::I32x4GtS):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpI32x4, Assembler::GreaterThan));
+          case uint32_t(SimdOp::I32x4GtU):
+            CHECK_NEXT(dispatchVectorComparison(CmpUI32x4, Assembler::Above));
+          case uint32_t(SimdOp::I32x4LeS):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpI32x4, Assembler::LessThanOrEqual));
+          case uint32_t(SimdOp::I32x4LeU):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpUI32x4, Assembler::BelowOrEqual));
+          case uint32_t(SimdOp::I32x4GeS):
+            CHECK_NEXT(dispatchVectorComparison(CmpI32x4,
+                                                Assembler::GreaterThanOrEqual));
+          case uint32_t(SimdOp::I32x4GeU):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpUI32x4, Assembler::AboveOrEqual));
+          case uint32_t(SimdOp::F32x4Eq):
+            CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::Equal));
+          case uint32_t(SimdOp::F32x4Ne):
+            CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::NotEqual));
+          case uint32_t(SimdOp::F32x4Lt):
+            CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::LessThan));
+          case uint32_t(SimdOp::F32x4Gt):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpF32x4, Assembler::GreaterThan));
+          case uint32_t(SimdOp::F32x4Le):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpF32x4, Assembler::LessThanOrEqual));
+          case uint32_t(SimdOp::F32x4Ge):
+            CHECK_NEXT(dispatchVectorComparison(CmpF32x4,
+                                                Assembler::GreaterThanOrEqual));
+          case uint32_t(SimdOp::F64x2Eq):
+            CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::Equal));
+          case uint32_t(SimdOp::F64x2Ne):
+            CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::NotEqual));
+          case uint32_t(SimdOp::F64x2Lt):
+            CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::LessThan));
+          case uint32_t(SimdOp::F64x2Gt):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpF64x2, Assembler::GreaterThan));
+          case uint32_t(SimdOp::F64x2Le):
+            CHECK_NEXT(
+                dispatchVectorComparison(CmpF64x2, Assembler::LessThanOrEqual));
+          case uint32_t(SimdOp::F64x2Ge):
+            CHECK_NEXT(dispatchVectorComparison(CmpF64x2,
+                                                Assembler::GreaterThanOrEqual));
+          case uint32_t(SimdOp::V128And):
+            CHECK_NEXT(dispatchVectorBinary(AndV128));
+          case uint32_t(SimdOp::V128Or):
+            CHECK_NEXT(dispatchVectorBinary(OrV128));
+          case uint32_t(SimdOp::V128Xor):
+            CHECK_NEXT(dispatchVectorBinary(XorV128));
+          case uint32_t(SimdOp::V128AndNot):
+            CHECK_NEXT(dispatchBinary(emitVectorAndNot, ValType::V128));
+          case uint32_t(SimdOp::I8x16AvgrU):
+            CHECK_NEXT(dispatchVectorBinary(AverageUI8x16));
+          case uint32_t(SimdOp::I16x8AvgrU):
+            CHECK_NEXT(dispatchVectorBinary(AverageUI16x8));
+          case uint32_t(SimdOp::I8x16Add):
+            CHECK_NEXT(dispatchVectorBinary(AddI8x16));
+          case uint32_t(SimdOp::I8x16AddSaturateS):
+            CHECK_NEXT(dispatchVectorBinary(AddSatI8x16));
+          case uint32_t(SimdOp::I8x16AddSaturateU):
+            CHECK_NEXT(dispatchVectorBinary(AddSatUI8x16));
+          case uint32_t(SimdOp::I8x16Sub):
+            CHECK_NEXT(dispatchVectorBinary(SubI8x16));
+          case uint32_t(SimdOp::I8x16SubSaturateS):
+            CHECK_NEXT(dispatchVectorBinary(SubSatI8x16));
+          case uint32_t(SimdOp::I8x16SubSaturateU):
+            CHECK_NEXT(dispatchVectorBinary(SubSatUI8x16));
+          case uint32_t(SimdOp::I8x16MinS):
+            CHECK_NEXT(dispatchVectorBinary(MinI8x16));
+          case uint32_t(SimdOp::I8x16MinU):
+            CHECK_NEXT(dispatchVectorBinary(MinUI8x16));
+          case uint32_t(SimdOp::I8x16MaxS):
+            CHECK_NEXT(dispatchVectorBinary(MaxI8x16));
+          case uint32_t(SimdOp::I8x16MaxU):
+            CHECK_NEXT(dispatchVectorBinary(MaxUI8x16));
+          case uint32_t(SimdOp::I16x8Add):
+            CHECK_NEXT(dispatchVectorBinary(AddI16x8));
+          case uint32_t(SimdOp::I16x8AddSaturateS):
+            CHECK_NEXT(dispatchVectorBinary(AddSatI16x8));
+          case uint32_t(SimdOp::I16x8AddSaturateU):
+            CHECK_NEXT(dispatchVectorBinary(AddSatUI16x8));
+          case uint32_t(SimdOp::I16x8Sub):
+            CHECK_NEXT(dispatchVectorBinary(SubI16x8));
+          case uint32_t(SimdOp::I16x8SubSaturateS):
+            CHECK_NEXT(dispatchVectorBinary(SubSatI16x8));
+          case uint32_t(SimdOp::I16x8SubSaturateU):
+            CHECK_NEXT(dispatchVectorBinary(SubSatUI16x8));
+          case uint32_t(SimdOp::I16x8Mul):
+            CHECK_NEXT(dispatchVectorBinary(MulI16x8));
+          case uint32_t(SimdOp::I16x8MinS):
+            CHECK_NEXT(dispatchVectorBinary(MinI16x8));
+          case uint32_t(SimdOp::I16x8MinU):
+            CHECK_NEXT(dispatchVectorBinary(MinUI16x8));
+          case uint32_t(SimdOp::I16x8MaxS):
+            CHECK_NEXT(dispatchVectorBinary(MaxI16x8));
+          case uint32_t(SimdOp::I16x8MaxU):
+            CHECK_NEXT(dispatchVectorBinary(MaxUI16x8));
+          case uint32_t(SimdOp::I32x4Add):
+            CHECK_NEXT(dispatchVectorBinary(AddI32x4));
+          case uint32_t(SimdOp::I32x4Sub):
+            CHECK_NEXT(dispatchVectorBinary(SubI32x4));
+          case uint32_t(SimdOp::I32x4Mul):
+            CHECK_NEXT(dispatchVectorBinary(MulI32x4));
+          case uint32_t(SimdOp::I32x4MinS):
+            CHECK_NEXT(dispatchVectorBinary(MinI32x4));
+          case uint32_t(SimdOp::I32x4MinU):
+            CHECK_NEXT(dispatchVectorBinary(MinUI32x4));
+          case uint32_t(SimdOp::I32x4MaxS):
+            CHECK_NEXT(dispatchVectorBinary(MaxI32x4));
+          case uint32_t(SimdOp::I32x4MaxU):
+            CHECK_NEXT(dispatchVectorBinary(MaxUI32x4));
+          case uint32_t(SimdOp::I64x2Add):
+            CHECK_NEXT(dispatchVectorBinary(AddI64x2));
+          case uint32_t(SimdOp::I64x2Sub):
+            CHECK_NEXT(dispatchVectorBinary(SubI64x2));
+          case uint32_t(SimdOp::I64x2Mul):
+            CHECK_NEXT(emitVectorMulI64x2());
+          case uint32_t(SimdOp::F32x4Add):
+            CHECK_NEXT(dispatchVectorBinary(AddF32x4));
+          case uint32_t(SimdOp::F32x4Sub):
+            CHECK_NEXT(dispatchVectorBinary(SubF32x4));
+          case uint32_t(SimdOp::F32x4Mul):
+            CHECK_NEXT(dispatchVectorBinary(MulF32x4));
+          case uint32_t(SimdOp::F32x4Div):
+            CHECK_NEXT(dispatchVectorBinary(DivF32x4));
+          case uint32_t(SimdOp::F32x4Min):
+            CHECK_NEXT(dispatchVectorBinary(MinF32x4));
+          case uint32_t(SimdOp::F32x4Max):
+            CHECK_NEXT(dispatchVectorBinary(MaxF32x4));
+          case uint32_t(SimdOp::F64x2Add):
+            CHECK_NEXT(dispatchVectorBinary(AddF64x2));
+          case uint32_t(SimdOp::F64x2Sub):
+            CHECK_NEXT(dispatchVectorBinary(SubF64x2));
+          case uint32_t(SimdOp::F64x2Mul):
+            CHECK_NEXT(dispatchVectorBinary(MulF64x2));
+          case uint32_t(SimdOp::F64x2Div):
+            CHECK_NEXT(dispatchVectorBinary(DivF64x2));
+          case uint32_t(SimdOp::F64x2Min):
+            CHECK_NEXT(dispatchVectorBinary(MinF64x2));
+          case uint32_t(SimdOp::F64x2Max):
+            CHECK_NEXT(dispatchVectorBinary(MaxF64x2));
+          case uint32_t(SimdOp::I8x16NarrowSI16x8):
+            CHECK_NEXT(dispatchVectorBinary(NarrowI16x8));
+          case uint32_t(SimdOp::I8x16NarrowUI16x8):
+            CHECK_NEXT(dispatchVectorBinary(NarrowUI16x8));
+          case uint32_t(SimdOp::I16x8NarrowSI32x4):
+            CHECK_NEXT(dispatchVectorBinary(NarrowI32x4));
+          case uint32_t(SimdOp::I16x8NarrowUI32x4):
+            CHECK_NEXT(dispatchVectorBinary(NarrowUI32x4));
+          case uint32_t(SimdOp::V8x16Swizzle):
+            CHECK_NEXT(dispatchVectorBinary(Swizzle));
+          case uint32_t(SimdOp::F32x4PMax):
+            CHECK_NEXT(dispatchVectorBinary(PMaxF32x4));
+          case uint32_t(SimdOp::F32x4PMin):
+            CHECK_NEXT(dispatchVectorBinary(PMinF32x4));
+          case uint32_t(SimdOp::F64x2PMax):
+            CHECK_NEXT(dispatchVectorBinary(PMaxF64x2));
+          case uint32_t(SimdOp::F64x2PMin):
+            CHECK_NEXT(dispatchVectorBinary(PMinF64x2));
+          case uint32_t(SimdOp::I32x4DotSI16x8):
+            CHECK_NEXT(dispatchVectorBinary(DotI16x8));
+          case uint32_t(SimdOp::I8x16Neg):
+            CHECK_NEXT(dispatchVectorUnary(NegI8x16));
+          case uint32_t(SimdOp::I16x8Neg):
+            CHECK_NEXT(dispatchVectorUnary(NegI16x8));
+          case uint32_t(SimdOp::I16x8WidenLowSI8x16):
+            CHECK_NEXT(dispatchVectorUnary(WidenLowI8x16));
+          case uint32_t(SimdOp::I16x8WidenHighSI8x16):
+            CHECK_NEXT(dispatchVectorUnary(WidenHighI8x16));
+          case uint32_t(SimdOp::I16x8WidenLowUI8x16):
+            CHECK_NEXT(dispatchVectorUnary(WidenLowUI8x16));
+          case uint32_t(SimdOp::I16x8WidenHighUI8x16):
+            CHECK_NEXT(dispatchVectorUnary(WidenHighUI8x16));
+          case uint32_t(SimdOp::I32x4Neg):
+            CHECK_NEXT(dispatchVectorUnary(NegI32x4));
+          case uint32_t(SimdOp::I32x4WidenLowSI16x8):
+            CHECK_NEXT(dispatchVectorUnary(WidenLowI16x8));
+          case uint32_t(SimdOp::I32x4WidenHighSI16x8):
+            CHECK_NEXT(dispatchVectorUnary(WidenHighI16x8));
+          case uint32_t(SimdOp::I32x4WidenLowUI16x8):
+            CHECK_NEXT(dispatchVectorUnary(WidenLowUI16x8));
+          case uint32_t(SimdOp::I32x4WidenHighUI16x8):
+            CHECK_NEXT(dispatchVectorUnary(WidenHighUI16x8));
+          case uint32_t(SimdOp::I32x4TruncSSatF32x4):
+            CHECK_NEXT(dispatchVectorUnary(ConvertF32x4ToI32x4));
+          case uint32_t(SimdOp::I32x4TruncUSatF32x4):
+            CHECK_NEXT(dispatchVectorUnary(ConvertF32x4ToUI32x4));
+          case uint32_t(SimdOp::I64x2Neg):
+            CHECK_NEXT(dispatchVectorUnary(NegI64x2));
+          case uint32_t(SimdOp::F32x4Abs):
+            CHECK_NEXT(dispatchVectorUnary(AbsF32x4));
+          case uint32_t(SimdOp::F32x4Neg):
+            CHECK_NEXT(dispatchVectorUnary(NegF32x4));
+          case uint32_t(SimdOp::F32x4Sqrt):
+            CHECK_NEXT(dispatchVectorUnary(SqrtF32x4));
+          case uint32_t(SimdOp::F32x4ConvertSI32x4):
+            CHECK_NEXT(dispatchVectorUnary(ConvertI32x4ToF32x4));
+          case uint32_t(SimdOp::F32x4ConvertUI32x4):
+            CHECK_NEXT(dispatchVectorUnary(ConvertUI32x4ToF32x4));
+          case uint32_t(SimdOp::F64x2Abs):
+            CHECK_NEXT(dispatchVectorUnary(AbsF64x2));
+          case uint32_t(SimdOp::F64x2Neg):
+            CHECK_NEXT(dispatchVectorUnary(NegF64x2));
+          case uint32_t(SimdOp::F64x2Sqrt):
+            CHECK_NEXT(dispatchVectorUnary(SqrtF64x2));
+          case uint32_t(SimdOp::V128Not):
+            CHECK_NEXT(dispatchVectorUnary(NotV128));
+          case uint32_t(SimdOp::I8x16Abs):
+            CHECK_NEXT(dispatchVectorUnary(AbsI8x16));
+          case uint32_t(SimdOp::I16x8Abs):
+            CHECK_NEXT(dispatchVectorUnary(AbsI16x8));
+          case uint32_t(SimdOp::I32x4Abs):
+            CHECK_NEXT(dispatchVectorUnary(AbsI32x4));
+          case uint32_t(SimdOp::F32x4Ceil):
+            CHECK_NEXT(dispatchVectorUnary(CeilF32x4));
+          case uint32_t(SimdOp::F32x4Floor):
+            CHECK_NEXT(dispatchVectorUnary(FloorF32x4));
+          case uint32_t(SimdOp::F32x4Trunc):
+            CHECK_NEXT(dispatchVectorUnary(TruncF32x4));
+          case uint32_t(SimdOp::F32x4Nearest):
+            CHECK_NEXT(dispatchVectorUnary(NearestF32x4));
+          case uint32_t(SimdOp::F64x2Ceil):
+            CHECK_NEXT(dispatchVectorUnary(CeilF64x2));
+          case uint32_t(SimdOp::F64x2Floor):
+            CHECK_NEXT(dispatchVectorUnary(FloorF64x2));
+          case uint32_t(SimdOp::F64x2Trunc):
+            CHECK_NEXT(dispatchVectorUnary(TruncF64x2));
+          case uint32_t(SimdOp::F64x2Nearest):
+            CHECK_NEXT(dispatchVectorUnary(NearestF64x2));
+          case uint32_t(SimdOp::I8x16Shl):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI8x16));
+          case uint32_t(SimdOp::I8x16ShrS):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI8x16));
+          case uint32_t(SimdOp::I8x16ShrU):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI8x16));
+          case uint32_t(SimdOp::I16x8Shl):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI16x8));
+          case uint32_t(SimdOp::I16x8ShrS):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI16x8));
+          case uint32_t(SimdOp::I16x8ShrU):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI16x8));
+          case uint32_t(SimdOp::I32x4Shl):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI32x4));
+          case uint32_t(SimdOp::I32x4ShrS):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI32x4));
+          case uint32_t(SimdOp::I32x4ShrU):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI32x4));
+          case uint32_t(SimdOp::I64x2Shl):
+            CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI64x2));
+          case uint32_t(SimdOp::I64x2ShrS):
+            CHECK_NEXT(emitVectorShiftRightI64x2(/* isUnsigned */ false));
+          case uint32_t(SimdOp::I64x2ShrU):
+            CHECK_NEXT(emitVectorShiftRightI64x2(/* isUnsigned */ true));
+          case uint32_t(SimdOp::V128Bitselect):
+            CHECK_NEXT(emitBitselect());
+          case uint32_t(SimdOp::V8x16Shuffle):
+            CHECK_NEXT(emitVectorShuffle());
+          case uint32_t(SimdOp::V128Const): {
+            V128 v128;
+            CHECK(iter_.readV128Const(&v128));
+            if (!deadCode_) {
+              pushV128(v128);
+            }
+            NEXT();
+          }
+          case uint32_t(SimdOp::V128Load):
+            CHECK_NEXT(emitLoad(ValType::V128, Scalar::Simd128));
+          case uint32_t(SimdOp::V8x16LoadSplat):
+            CHECK_NEXT(emitLoadSplat(Scalar::Uint8));
+          case uint32_t(SimdOp::V16x8LoadSplat):
+            CHECK_NEXT(emitLoadSplat(Scalar::Uint16));
+          case uint32_t(SimdOp::V32x4LoadSplat):
+            CHECK_NEXT(emitLoadSplat(Scalar::Uint32));
+          case uint32_t(SimdOp::V64x2LoadSplat):
+            CHECK_NEXT(emitLoadSplat(Scalar::Int64));
+          case uint32_t(SimdOp::I16x8LoadS8x8):
+            CHECK_NEXT(emitLoadExtend(Scalar::Int8));
+          case uint32_t(SimdOp::I16x8LoadU8x8):
+            CHECK_NEXT(emitLoadExtend(Scalar::Uint8));
+          case uint32_t(SimdOp::I32x4LoadS16x4):
+            CHECK_NEXT(emitLoadExtend(Scalar::Int16));
+          case uint32_t(SimdOp::I32x4LoadU16x4):
+            CHECK_NEXT(emitLoadExtend(Scalar::Uint16));
+          case uint32_t(SimdOp::I64x2LoadS32x2):
+            CHECK_NEXT(emitLoadExtend(Scalar::Int32));
+          case uint32_t(SimdOp::I64x2LoadU32x2):
+            CHECK_NEXT(emitLoadExtend(Scalar::Uint32));
+          case uint32_t(SimdOp::V128Load32Zero):
+            CHECK_NEXT(emitLoadZero(Scalar::Float32));
+          case uint32_t(SimdOp::V128Load64Zero):
+            CHECK_NEXT(emitLoadZero(Scalar::Float64));
+          case uint32_t(SimdOp::V128Store):
+            CHECK_NEXT(emitStore(ValType::V128, Scalar::Simd128));
+          default:
+            break;
+        }  // switch (op.b1)
+        return iter_.unrecognizedOpcode(&op);
+      }
+#endif  // ENABLE_WASM_SIMD
+
+      // "Miscellaneous" operations
+      case uint16_t(Op::MiscPrefix): {
+        switch (op.b1) {
+          case uint32_t(MiscOp::I32TruncSSatF32):
+            CHECK_NEXT(
+                dispatchConversionOOM(emitTruncateF32ToI32<TRUNC_SATURATING>,
+                                      ValType::F32, ValType::I32));
+          case uint32_t(MiscOp::I32TruncUSatF32):
+            CHECK_NEXT(dispatchConversionOOM(
+                emitTruncateF32ToI32<TRUNC_UNSIGNED | TRUNC_SATURATING>,
+                ValType::F32, ValType::I32));
+          case uint32_t(MiscOp::I32TruncSSatF64):
+            CHECK_NEXT(
+                dispatchConversionOOM(emitTruncateF64ToI32<TRUNC_SATURATING>,
+                                      ValType::F64, ValType::I32));
+          case uint32_t(MiscOp::I32TruncUSatF64):
+            CHECK_NEXT(dispatchConversionOOM(
+                emitTruncateF64ToI32<TRUNC_UNSIGNED | TRUNC_SATURATING>,
+                ValType::F64, ValType::I32));
+          case uint32_t(MiscOp::I64TruncSSatF32):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+            CHECK_NEXT(dispatchCalloutConversionOOM(
+                emitConvertFloatingToInt64Callout,
+                SymbolicAddress::SaturatingTruncateDoubleToInt64, ValType::F32,
+                ValType::I64));
+#else
+            CHECK_NEXT(
+                dispatchConversionOOM(emitTruncateF32ToI64<TRUNC_SATURATING>,
+                                      ValType::F32, ValType::I64));
+#endif
+          case uint32_t(MiscOp::I64TruncUSatF32):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+            CHECK_NEXT(dispatchCalloutConversionOOM(
+                emitConvertFloatingToInt64Callout,
+                SymbolicAddress::SaturatingTruncateDoubleToUint64, ValType::F32,
+                ValType::I64));
+#else
+            CHECK_NEXT(dispatchConversionOOM(
+                emitTruncateF32ToI64<TRUNC_UNSIGNED | TRUNC_SATURATING>,
+                ValType::F32, ValType::I64));
+#endif
+          case uint32_t(MiscOp::I64TruncSSatF64):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+            CHECK_NEXT(dispatchCalloutConversionOOM(
+                emitConvertFloatingToInt64Callout,
+                SymbolicAddress::SaturatingTruncateDoubleToInt64, ValType::F64,
+                ValType::I64));
+#else
+            CHECK_NEXT(
+                dispatchConversionOOM(emitTruncateF64ToI64<TRUNC_SATURATING>,
+                                      ValType::F64, ValType::I64));
+#endif
+          case uint32_t(MiscOp::I64TruncUSatF64):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+            CHECK_NEXT(dispatchCalloutConversionOOM(
+                emitConvertFloatingToInt64Callout,
+                SymbolicAddress::SaturatingTruncateDoubleToUint64, ValType::F64,
+                ValType::I64));
+#else
+            CHECK_NEXT(dispatchConversionOOM(
+                emitTruncateF64ToI64<TRUNC_UNSIGNED | TRUNC_SATURATING>,
+                ValType::F64, ValType::I64));
+#endif
+          case uint32_t(MiscOp::MemCopy):
+            CHECK_NEXT(emitMemCopy());
+          case uint32_t(MiscOp::DataDrop):
+            CHECK_NEXT(emitDataOrElemDrop(/*isData=*/true));
+          case uint32_t(MiscOp::MemFill):
+            CHECK_NEXT(emitMemFill());
+          case uint32_t(MiscOp::MemInit):
+            CHECK_NEXT(emitMemOrTableInit(/*isMem=*/true));
+          case uint32_t(MiscOp::TableCopy):
+            CHECK_NEXT(emitTableCopy());
+          case uint32_t(MiscOp::ElemDrop):
+            CHECK_NEXT(emitDataOrElemDrop(/*isData=*/false));
+          case uint32_t(MiscOp::TableInit):
+            CHECK_NEXT(emitMemOrTableInit(/*isMem=*/false));
+#ifdef ENABLE_WASM_REFTYPES
+          case uint32_t(MiscOp::TableFill):
+            CHECK_NEXT(emitTableFill());
+          case uint32_t(MiscOp::TableGrow):
+            CHECK_NEXT(emitTableGrow());
+          case uint32_t(MiscOp::TableSize):
+            CHECK_NEXT(emitTableSize());
+#endif
+          default:
+            break;
+        }  // switch (op.b1)
+        return iter_.unrecognizedOpcode(&op);
+      }
+
+      // Thread operations
+      case uint16_t(Op::ThreadPrefix): {
+        if (moduleEnv_.sharedMemoryEnabled() == Shareable::False) {
+          return iter_.unrecognizedOpcode(&op);
+        }
+        switch (op.b1) {
+          case uint32_t(ThreadOp::Wake):
+            CHECK_NEXT(emitWake());
+
+          case uint32_t(ThreadOp::I32Wait):
+            CHECK_NEXT(emitWait(ValType::I32, 4));
+          case uint32_t(ThreadOp::I64Wait):
+            CHECK_NEXT(emitWait(ValType::I64, 8));
+          case uint32_t(ThreadOp::Fence):
+            CHECK_NEXT(emitFence());
+
+          case uint32_t(ThreadOp::I32AtomicLoad):
+            CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Int32));
+          case uint32_t(ThreadOp::I64AtomicLoad):
+            CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Int64));
+          case uint32_t(ThreadOp::I32AtomicLoad8U):
+            CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Uint8));
+          case uint32_t(ThreadOp::I32AtomicLoad16U):
+            CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Uint16));
+          case uint32_t(ThreadOp::I64AtomicLoad8U):
+            CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint8));
+          case uint32_t(ThreadOp::I64AtomicLoad16U):
+            CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint16));
+          case uint32_t(ThreadOp::I64AtomicLoad32U):
+            CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint32));
+
+          case uint32_t(ThreadOp::I32AtomicStore):
+            CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Int32));
+          case uint32_t(ThreadOp::I64AtomicStore):
+            CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Int64));
+          case uint32_t(ThreadOp::I32AtomicStore8U):
+            CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Uint8));
+          case uint32_t(ThreadOp::I32AtomicStore16U):
+            CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Uint16));
+          case uint32_t(ThreadOp::I64AtomicStore8U):
+            CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint8));
+          case uint32_t(ThreadOp::I64AtomicStore16U):
+            CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint16));
+          case uint32_t(ThreadOp::I64AtomicStore32U):
+            CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint32));
+
+          case uint32_t(ThreadOp::I32AtomicAdd):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchAddOp));
+          case uint32_t(ThreadOp::I64AtomicAdd):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchAddOp));
+          case uint32_t(ThreadOp::I32AtomicAdd8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchAddOp));
+          case uint32_t(ThreadOp::I32AtomicAdd16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchAddOp));
+          case uint32_t(ThreadOp::I64AtomicAdd8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchAddOp));
+          case uint32_t(ThreadOp::I64AtomicAdd16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchAddOp));
+          case uint32_t(ThreadOp::I64AtomicAdd32U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchAddOp));
+
+          case uint32_t(ThreadOp::I32AtomicSub):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchSubOp));
+          case uint32_t(ThreadOp::I64AtomicSub):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchSubOp));
+          case uint32_t(ThreadOp::I32AtomicSub8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchSubOp));
+          case uint32_t(ThreadOp::I32AtomicSub16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchSubOp));
+          case uint32_t(ThreadOp::I64AtomicSub8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchSubOp));
+          case uint32_t(ThreadOp::I64AtomicSub16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchSubOp));
+          case uint32_t(ThreadOp::I64AtomicSub32U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchSubOp));
+
+          case uint32_t(ThreadOp::I32AtomicAnd):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchAndOp));
+          case uint32_t(ThreadOp::I64AtomicAnd):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchAndOp));
+          case uint32_t(ThreadOp::I32AtomicAnd8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchAndOp));
+          case uint32_t(ThreadOp::I32AtomicAnd16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchAndOp));
+          case uint32_t(ThreadOp::I64AtomicAnd8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchAndOp));
+          case uint32_t(ThreadOp::I64AtomicAnd16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchAndOp));
+          case uint32_t(ThreadOp::I64AtomicAnd32U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchAndOp));
+
+          case uint32_t(ThreadOp::I32AtomicOr):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchOrOp));
+          case uint32_t(ThreadOp::I64AtomicOr):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchOrOp));
+          case uint32_t(ThreadOp::I32AtomicOr8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchOrOp));
+          case uint32_t(ThreadOp::I32AtomicOr16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchOrOp));
+          case uint32_t(ThreadOp::I64AtomicOr8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchOrOp));
+          case uint32_t(ThreadOp::I64AtomicOr16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchOrOp));
+          case uint32_t(ThreadOp::I64AtomicOr32U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchOrOp));
+
+          case uint32_t(ThreadOp::I32AtomicXor):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchXorOp));
+          case uint32_t(ThreadOp::I64AtomicXor):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchXorOp));
+          case uint32_t(ThreadOp::I32AtomicXor8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchXorOp));
+          case uint32_t(ThreadOp::I32AtomicXor16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchXorOp));
+          case uint32_t(ThreadOp::I64AtomicXor8U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchXorOp));
+          case uint32_t(ThreadOp::I64AtomicXor16U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchXorOp));
+          case uint32_t(ThreadOp::I64AtomicXor32U):
+            CHECK_NEXT(
+                emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchXorOp));
+
+          case uint32_t(ThreadOp::I32AtomicXchg):
+            CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Int32));
+          case uint32_t(ThreadOp::I64AtomicXchg):
+            CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Int64));
+          case uint32_t(ThreadOp::I32AtomicXchg8U):
+            CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Uint8));
+          case uint32_t(ThreadOp::I32AtomicXchg16U):
+            CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Uint16));
+          case uint32_t(ThreadOp::I64AtomicXchg8U):
+            CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint8));
+          case uint32_t(ThreadOp::I64AtomicXchg16U):
+            CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint16));
+          case uint32_t(ThreadOp::I64AtomicXchg32U):
+            CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint32));
+
+          case uint32_t(ThreadOp::I32AtomicCmpXchg):
+            CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Int32));
+          case uint32_t(ThreadOp::I64AtomicCmpXchg):
+            CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Int64));
+          case uint32_t(ThreadOp::I32AtomicCmpXchg8U):
+            CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Uint8));
+          case uint32_t(ThreadOp::I32AtomicCmpXchg16U):
+            CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Uint16));
+          case uint32_t(ThreadOp::I64AtomicCmpXchg8U):
+            CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint8));
+          case uint32_t(ThreadOp::I64AtomicCmpXchg16U):
+            CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint16));
+          case uint32_t(ThreadOp::I64AtomicCmpXchg32U):
+            CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint32));
+
+          default:
+            return iter_.unrecognizedOpcode(&op);
+        }
+        break;
+      }
+
+      // asm.js and other private operations
+      case uint16_t(Op::MozPrefix):
+        return iter_.unrecognizedOpcode(&op);
+
+      default:
+        return iter_.unrecognizedOpcode(&op);
+    }
+
+#undef CHECK
+#undef NEXT
+#undef CHECK_NEXT
+#undef CHECK_POINTER_COUNT
+#undef CHECK_SIMD_EXPERIMENTAL
+#undef dispatchBinary
+#undef dispatchUnary
+#undef dispatchComparison
+#undef dispatchConversion
+#undef dispatchConversionOOM
+#undef dispatchCalloutConversionOOM
+#undef dispatchIntDivCallout
+#undef dispatchVectorBinary
+#undef dispatchVectorUnary
+#undef dispatchVectorComparison
+#undef dispatchExtractLane
+#undef dispatchReplaceLane
+#undef dispatchSplat
+#undef dispatchVectorReduction
+
+    MOZ_CRASH("unreachable");
+  }
+
+  MOZ_CRASH("unreachable");
+}
+
+bool BaseCompiler::emitFunction() {
+  if (!beginFunction()) {
+    return false;
+  }
+
+  if (!emitBody()) {
+    return false;
+  }
+
+  if (!endFunction()) {
+    return false;
+  }
+
+  return true;
+}
+
+BaseCompiler::BaseCompiler(const ModuleEnvironment& moduleEnv,
+                           const CompilerEnvironment& compilerEnv,
+                           const FuncCompileInput& func,
+                           const ValTypeVector& locals,
+                           const MachineState& trapExitLayout,
+                           size_t trapExitLayoutNumWords, Decoder& decoder,
+                           StkVector& stkSource, TempAllocator* alloc,
+                           MacroAssembler* masm, StackMaps* stackMaps)
+    : moduleEnv_(moduleEnv),
+      compilerEnv_(compilerEnv),
+      iter_(moduleEnv, decoder),
+      func_(func),
+      lastReadCallSite_(0),
+      alloc_(alloc->fallible()),
+      locals_(locals),
+      deadCode_(false),
+      bceSafe_(0),
+      latentOp_(LatentOp::None),
+      latentType_(ValType::I32),
+      latentIntCmp_(Assembler::Equal),
+      latentDoubleCmp_(Assembler::DoubleEqual),
+      masm(*masm),
+      fr(*masm),
+      stackMapGenerator_(stackMaps, trapExitLayout, trapExitLayoutNumWords,
+                         *masm),
+      stkSource_(stkSource) {
+  // Our caller, BaselineCompileFunctions, will lend us the vector contents to
+  // use for the eval stack.  To get hold of those contents, we'll temporarily
+  // installing an empty one in its place.
+  MOZ_ASSERT(stk_.empty());
+  stk_.swap(stkSource_);
+
+  // Assuming that previously processed wasm functions are well formed, the
+  // eval stack should now be empty.  But empty it anyway; any non-emptyness
+  // at this point will cause chaos.
+  stk_.clear();
+}
+
+BaseCompiler::~BaseCompiler() {
+  stk_.swap(stkSource_);
+  // We've returned the eval stack vector contents to our caller,
+  // BaselineCompileFunctions.  We expect the vector we get in return to be
+  // empty since that's what we swapped for the stack vector in our
+  // constructor.
+  MOZ_ASSERT(stk_.empty());
+}
+
+bool BaseCompiler::init() {
+  ra.init(this);
+
+  if (!SigD_.append(ValType::F64)) {
+    return false;
+  }
+  if (!SigF_.append(ValType::F32)) {
+    return false;
+  }
+
+  ArgTypeVector args(funcType());
+  if (!fr.setupLocals(locals_, args, compilerEnv_.debugEnabled(),
+                      &localInfo_)) {
+    return false;
+  }
+
+  return true;
+}
+
+FuncOffsets BaseCompiler::finish() {
+  MOZ_ASSERT(done(), "all bytes must be consumed");
+  MOZ_ASSERT(func_.callSiteLineNums.length() == lastReadCallSite_);
+
+  MOZ_ASSERT(stk_.empty());
+  MOZ_ASSERT(stackMapGenerator_.memRefsOnStk == 0);
+
+  masm.flushBuffer();
+
+  return offsets_;
+}
+
+}  // namespace wasm
+}  // namespace js
+
+bool js::wasm::BaselinePlatformSupport() {
+#if defined(JS_CODEGEN_ARM)
+  // Simplifying assumption: require SDIV and UDIV.
+  //
+  // I have no good data on ARM populations allowing me to say that
+  // X% of devices in the market implement SDIV and UDIV.  However,
+  // they are definitely implemented on the Cortex-A7 and Cortex-A15
+  // and on all ARMv8 systems.
+  if (!HasIDIV()) {
+    return false;
+  }
+#endif
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) ||   \
+    defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+  return true;
+#else
+  return false;
+#endif
+}
+
+bool js::wasm::BaselineCompileFunctions(const ModuleEnvironment& moduleEnv,
+                                        const CompilerEnvironment& compilerEnv,
+                                        LifoAlloc& lifo,
+                                        const FuncCompileInputVector& inputs,
+                                        CompiledCode* code,
+                                        UniqueChars* error) {
+  MOZ_ASSERT(compilerEnv.tier() == Tier::Baseline);
+  MOZ_ASSERT(moduleEnv.kind == ModuleKind::Wasm);
+
+  // The MacroAssembler will sometimes access the jitContext.
+
+  TempAllocator alloc(&lifo);
+  JitContext jitContext(&alloc);
+  MOZ_ASSERT(IsCompilingWasm());
+  WasmMacroAssembler masm(alloc, moduleEnv);
+
+  // Swap in already-allocated empty vectors to avoid malloc/free.
+  MOZ_ASSERT(code->empty());
+  if (!code->swap(masm)) {
+    return false;
+  }
+
+  // Create a description of the stack layout created by GenerateTrapExit().
+  MachineState trapExitLayout;
+  size_t trapExitLayoutNumWords;
+  GenerateTrapExitMachineState(&trapExitLayout, &trapExitLayoutNumWords);
+
+  // The compiler's operand stack.  We reuse it across all functions so as to
+  // avoid malloc/free.  Presize it to 128 elements in the hope of avoiding
+  // reallocation later.
+  StkVector stk;
+  if (!stk.reserve(128)) {
+    return false;
+  }
+
+  for (const FuncCompileInput& func : inputs) {
+    Decoder d(func.begin, func.end, func.lineOrBytecode, error);
+
+    // Build the local types vector.
+
+    ValTypeVector locals;
+    if (!locals.appendAll(moduleEnv.funcs[func.index].type->args())) {
+      return false;
+    }
+    if (!DecodeLocalEntries(d, moduleEnv.types, moduleEnv.features, &locals)) {
+      return false;
+    }
+
+    // One-pass baseline compilation.
+
+    BaseCompiler f(moduleEnv, compilerEnv, func, locals, trapExitLayout,
+                   trapExitLayoutNumWords, d, stk, &alloc, &masm,
+                   &code->stackMaps);
+    if (!f.init()) {
+      return false;
+    }
+    if (!f.emitFunction()) {
+      return false;
+    }
+    if (!code->codeRanges.emplaceBack(func.index, func.lineOrBytecode,
+                                      f.finish())) {
+      return false;
+    }
+  }
+
+  masm.finish();
+  if (masm.oom()) {
+    return false;
+  }
+
+  return code->swap(masm);
+}
+
+#ifdef DEBUG
+bool js::wasm::IsValidStackMapKey(bool debugEnabled, const uint8_t* nextPC) {
+#  if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+  const uint8_t* insn = nextPC;
+  return (insn[-2] == 0x0F && insn[-1] == 0x0B) ||           // ud2
+         (insn[-2] == 0xFF && (insn[-1] & 0xF8) == 0xD0) ||  // call *%r_
+         insn[-5] == 0xE8 ||                                 // call simm32
+         (debugEnabled && insn[-5] == 0x0F && insn[-4] == 0x1F &&
+          insn[-3] == 0x44 && insn[-2] == 0x00 &&
+          insn[-1] == 0x00);  // nop_five
+
+#  elif defined(JS_CODEGEN_ARM)
+  const uint32_t* insn = (const uint32_t*)nextPC;
+  return ((uintptr_t(insn) & 3) == 0) &&              // must be ARM, not Thumb
+         (insn[-1] == 0xe7f000f0 ||                   // udf
+          (insn[-1] & 0xfffffff0) == 0xe12fff30 ||    // blx reg (ARM, enc A1)
+          (insn[-1] & 0xff000000) == 0xeb000000 ||    // bl simm24 (ARM, enc A1)
+          (debugEnabled && insn[-1] == 0xe320f000));  // "as_nop"
+
+#  elif defined(JS_CODEGEN_ARM64)
+  const uint32_t hltInsn = 0xd4a00000;
+  const uint32_t* insn = (const uint32_t*)nextPC;
+  return ((uintptr_t(insn) & 3) == 0) &&
+         (insn[-1] == hltInsn ||                      // hlt
+          (insn[-1] & 0xfffffc1f) == 0xd63f0000 ||    // blr reg
+          (insn[-1] & 0xfc000000) == 0x94000000 ||    // bl simm26
+          (debugEnabled && insn[-1] == 0xd503201f));  // nop
+
+#  else
+  MOZ_CRASH("IsValidStackMapKey: requires implementation on this platform");
+#  endif
+}
+#endif
+
+#undef RABALDR_INT_DIV_I64_CALLOUT
+#undef RABALDR_I64_TO_FLOAT_CALLOUT
+#undef RABALDR_FLOAT_TO_I64_CALLOUT