From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Fri, 19 Apr 2024 02:47:55 +0200
Subject: Adding upstream version 124.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 js/src/wasm/WasmBCMemory.cpp | 2991 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 2991 insertions(+)
 create mode 100644 js/src/wasm/WasmBCMemory.cpp

(limited to 'js/src/wasm/WasmBCMemory.cpp')

diff --git a/js/src/wasm/WasmBCMemory.cpp b/js/src/wasm/WasmBCMemory.cpp
new file mode 100644
index 0000000000..f4e19d95e8
--- /dev/null
+++ b/js/src/wasm/WasmBCMemory.cpp
@@ -0,0 +1,2991 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ *
+ * Copyright 2016 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wasm/WasmBCClass.h"
+#include "wasm/WasmBCDefs.h"
+#include "wasm/WasmBCRegDefs.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+#include "wasm/WasmBCClass-inl.h"
+#include "wasm/WasmBCCodegen-inl.h"
+#include "wasm/WasmBCRegDefs-inl.h"
+#include "wasm/WasmBCRegMgmt-inl.h"
+#include "wasm/WasmBCStkMgmt-inl.h"
+
+namespace js {
+namespace wasm {
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Heap access subroutines.
+
+// Bounds check elimination.
+//
+// We perform BCE on two kinds of address expressions: on constant heap pointers
+// that are known to be in the heap or will be handled by the out-of-bounds trap
+// handler; and on local variables that have been checked in dominating code
+// without being updated since.
+//
+// For an access through a constant heap pointer + an offset we can eliminate
+// the bounds check if the sum of the address and offset is below the sum of the
+// minimum memory length and the offset guard length.
+//
+// For an access through a local variable + an offset we can eliminate the
+// bounds check if the local variable has already been checked and has not been
+// updated since, and the offset is less than the guard limit.
+//
+// To track locals for which we can eliminate checks we use a bit vector
+// bceSafe_ that has a bit set for those locals whose bounds have been checked
+// and which have not subsequently been set.  Initially this vector is zero.
+//
+// In straight-line code a bit is set when we perform a bounds check on an
+// access via the local and is reset when the variable is updated.
+//
+// In control flow, the bit vector is manipulated as follows.  Each ControlItem
+// has a value bceSafeOnEntry, which is the value of bceSafe_ on entry to the
+// item, and a value bceSafeOnExit, which is initially ~0.  On a branch (br,
+// brIf, brTable), we always AND the branch target's bceSafeOnExit with the
+// value of bceSafe_ at the branch point.  On exiting an item by falling out of
+// it, provided we're not in dead code, we AND the current value of bceSafe_
+// into the item's bceSafeOnExit.  Additional processing depends on the item
+// type:
+//
+//  - After a block, set bceSafe_ to the block's bceSafeOnExit.
+//
+//  - On loop entry, after pushing the ControlItem, set bceSafe_ to zero; the
+//    back edges would otherwise require us to iterate to a fixedpoint.
+//
+//  - After a loop, the bceSafe_ is left unchanged, because only fallthrough
+//    control flow will reach that point and the bceSafe_ value represents the
+//    correct state of the fallthrough path.
+//
+//  - Set bceSafe_ to the ControlItem's bceSafeOnEntry at both the 'then' branch
+//    and the 'else' branch.
+//
+//  - After an if-then-else, set bceSafe_ to the if-then-else's bceSafeOnExit.
+//
+//  - After an if-then, set bceSafe_ to the if-then's bceSafeOnExit AND'ed with
+//    the if-then's bceSafeOnEntry.
+//
+// Finally, when the debugger allows locals to be mutated we must disable BCE
+// for references via a local, by returning immediately from bceCheckLocal if
+// compilerEnv_.debugEnabled() is true.
+
+void BaseCompiler::bceCheckLocal(MemoryAccessDesc* access, AccessCheck* check,
+                                 uint32_t local) {
+  // We only eliminate bounds checks for memory 0
+  if (access->memoryIndex() != 0) {
+    return;
+  }
+
+  if (local >= sizeof(BCESet) * 8) {
+    return;
+  }
+
+  uint32_t offsetGuardLimit =
+      GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled(0));
+
+  if ((bceSafe_ & (BCESet(1) << local)) &&
+      access->offset64() < offsetGuardLimit) {
+    check->omitBoundsCheck = true;
+  }
+
+  // The local becomes safe even if the offset is beyond the guard limit.
+  bceSafe_ |= (BCESet(1) << local);
+}
+
+void BaseCompiler::bceLocalIsUpdated(uint32_t local) {
+  if (local >= sizeof(BCESet) * 8) {
+    return;
+  }
+
+  bceSafe_ &= ~(BCESet(1) << local);
+}
+
+// Alignment check elimination.
+//
+// Alignment checks for atomic operations can be omitted if the pointer is a
+// constant and the pointer + offset is aligned.  Alignment checking that can't
+// be omitted can still be simplified by checking only the pointer if the offset
+// is aligned.
+//
+// (In addition, alignment checking of the pointer can be omitted if the pointer
+// has been checked in dominating code, but we don't do that yet.)
+
+template <>
+RegI32 BaseCompiler::popConstMemoryAccess<RegI32>(MemoryAccessDesc* access,
+                                                  AccessCheck* check) {
+  int32_t addrTemp;
+  MOZ_ALWAYS_TRUE(popConst(&addrTemp));
+  uint32_t addr = addrTemp;
+
+  uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(
+      moduleEnv_.hugeMemoryEnabled(access->memoryIndex()));
+
+  uint64_t ea = uint64_t(addr) + uint64_t(access->offset());
+  uint64_t limit =
+      moduleEnv_.memories[access->memoryIndex()].initialLength32() +
+      offsetGuardLimit;
+
+  check->omitBoundsCheck = ea < limit;
+  check->omitAlignmentCheck = (ea & (access->byteSize() - 1)) == 0;
+
+  // Fold the offset into the pointer if we can, as this is always
+  // beneficial.
+  if (ea <= UINT32_MAX) {
+    addr = uint32_t(ea);
+    access->clearOffset();
+  }
+
+  RegI32 r = needI32();
+  moveImm32(int32_t(addr), r);
+  return r;
+}
+
+#ifdef ENABLE_WASM_MEMORY64
+template <>
+RegI64 BaseCompiler::popConstMemoryAccess<RegI64>(MemoryAccessDesc* access,
+                                                  AccessCheck* check) {
+  int64_t addrTemp;
+  MOZ_ALWAYS_TRUE(popConst(&addrTemp));
+  uint64_t addr = addrTemp;
+
+  uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(
+      moduleEnv_.hugeMemoryEnabled(access->memoryIndex()));
+
+  uint64_t ea = addr + access->offset64();
+  bool overflow = ea < addr;
+  uint64_t limit =
+      moduleEnv_.memories[access->memoryIndex()].initialLength64() +
+      offsetGuardLimit;
+
+  if (!overflow) {
+    check->omitBoundsCheck = ea < limit;
+    check->omitAlignmentCheck = (ea & (access->byteSize() - 1)) == 0;
+
+    // Fold the offset into the pointer if we can, as this is always
+    // beneficial.
+    addr = uint64_t(ea);
+    access->clearOffset();
+  }
+
+  RegI64 r = needI64();
+  moveImm64(int64_t(addr), r);
+  return r;
+}
+#endif
+
+template <typename RegType>
+RegType BaseCompiler::popMemoryAccess(MemoryAccessDesc* access,
+                                      AccessCheck* check) {
+  check->onlyPointerAlignment =
+      (access->offset64() & (access->byteSize() - 1)) == 0;
+
+  // If there's a constant it will have the correct type for RegType.
+  if (hasConst()) {
+    return popConstMemoryAccess<RegType>(access, check);
+  }
+
+  // If there's a local it will have the correct type for RegType.
+  uint32_t local;
+  if (peekLocal(&local)) {
+    bceCheckLocal(access, check, local);
+  }
+
+  return pop<RegType>();
+}
+
+#ifdef JS_64BIT
+static inline RegI64 RegPtrToRegIntptr(RegPtr r) {
+  return RegI64(Register64(Register(r)));
+}
+
+#  ifndef WASM_HAS_HEAPREG
+static inline RegPtr RegIntptrToRegPtr(RegI64 r) {
+  return RegPtr(Register64(r).reg);
+}
+#  endif
+#else
+static inline RegI32 RegPtrToRegIntptr(RegPtr r) { return RegI32(Register(r)); }
+
+#  ifndef WASM_HAS_HEAPREG
+static inline RegPtr RegIntptrToRegPtr(RegI32 r) { return RegPtr(Register(r)); }
+#  endif
+#endif
+
+void BaseCompiler::pushHeapBase(uint32_t memoryIndex) {
+  RegPtr heapBase = need<RegPtr>();
+
+#ifdef WASM_HAS_HEAPREG
+  if (memoryIndex == 0) {
+    move(RegPtr(HeapReg), heapBase);
+    push(RegPtrToRegIntptr(heapBase));
+    return;
+  }
+#endif
+
+#ifdef RABALDR_PIN_INSTANCE
+  movePtr(RegPtr(InstanceReg), heapBase);
+#else
+  fr.loadInstancePtr(heapBase);
+#endif
+
+  uint32_t offset = instanceOffsetOfMemoryBase(memoryIndex);
+  masm.loadPtr(Address(heapBase, offset), heapBase);
+  push(RegPtrToRegIntptr(heapBase));
+}
+
+void BaseCompiler::branchAddNoOverflow(uint64_t offset, RegI32 ptr, Label* ok) {
+  // The invariant holds because ptr is RegI32 - this is m32.
+  MOZ_ASSERT(offset <= UINT32_MAX);
+  masm.branchAdd32(Assembler::CarryClear, Imm32(uint32_t(offset)), ptr, ok);
+}
+
+#ifdef ENABLE_WASM_MEMORY64
+void BaseCompiler::branchAddNoOverflow(uint64_t offset, RegI64 ptr, Label* ok) {
+#  if defined(JS_64BIT)
+  masm.branchAddPtr(Assembler::CarryClear, ImmWord(offset), Register64(ptr).reg,
+                    ok);
+#  else
+  masm.branchAdd64(Assembler::CarryClear, Imm64(offset), ptr, ok);
+#  endif
+}
+#endif
+
+void BaseCompiler::branchTestLowZero(RegI32 ptr, Imm32 mask, Label* ok) {
+  masm.branchTest32(Assembler::Zero, ptr, mask, ok);
+}
+
+#ifdef ENABLE_WASM_MEMORY64
+void BaseCompiler::branchTestLowZero(RegI64 ptr, Imm32 mask, Label* ok) {
+#  ifdef JS_64BIT
+  masm.branchTestPtr(Assembler::Zero, Register64(ptr).reg, mask, ok);
+#  else
+  masm.branchTestPtr(Assembler::Zero, ptr.low, mask, ok);
+#  endif
+}
+#endif
+
+void BaseCompiler::boundsCheck4GBOrLargerAccess(uint32_t memoryIndex,
+                                                RegPtr instance, RegI32 ptr,
+                                                Label* ok) {
+#ifdef JS_64BIT
+  // Extend the value to 64 bits, check the 64-bit value against the 64-bit
+  // bound, then chop back to 32 bits.  On most platform the extending and
+  // chopping are no-ops.  It's important that the value we end up with has
+  // flowed through the Spectre mask
+
+  // Note, ptr and ptr64 are the same register.
+  RegI64 ptr64 = fromI32(ptr);
+
+  // In principle there may be non-zero bits in the upper bits of the
+  // register; clear them.
+#  ifdef RABALDR_ZERO_EXTENDS
+  masm.debugAssertCanonicalInt32(ptr);
+#  else
+  masm.move32To64ZeroExtend(ptr, ptr64);
+#  endif
+
+  boundsCheck4GBOrLargerAccess(memoryIndex, instance, ptr64, ok);
+
+  // Restore the value to the canonical form for a 32-bit value in a
+  // 64-bit register and/or the appropriate form for further use in the
+  // indexing instruction.
+#  ifdef RABALDR_ZERO_EXTENDS
+  // The canonical value is zero-extended; we already have that.
+#  else
+  masm.move64To32(ptr64, ptr);
+#  endif
+#else
+  // No support needed, we have max 2GB heap on 32-bit
+  MOZ_CRASH("No 32-bit support");
+#endif
+}
+
+void BaseCompiler::boundsCheckBelow4GBAccess(uint32_t memoryIndex,
+                                             RegPtr instance, RegI32 ptr,
+                                             Label* ok) {
+  // If the memory's max size is known to be smaller than 64K pages exactly,
+  // we can use a 32-bit check and avoid extension and wrapping.
+  masm.wasmBoundsCheck32(
+      Assembler::Below, ptr,
+      Address(instance, instanceOffsetOfBoundsCheckLimit(memoryIndex)), ok);
+}
+
+void BaseCompiler::boundsCheck4GBOrLargerAccess(uint32_t memoryIndex,
+                                                RegPtr instance, RegI64 ptr,
+                                                Label* ok) {
+  // Any Spectre mitigation will appear to update the ptr64 register.
+  masm.wasmBoundsCheck64(
+      Assembler::Below, ptr,
+      Address(instance, instanceOffsetOfBoundsCheckLimit(memoryIndex)), ok);
+}
+
+void BaseCompiler::boundsCheckBelow4GBAccess(uint32_t memoryIndex,
+                                             RegPtr instance, RegI64 ptr,
+                                             Label* ok) {
+  // The bounds check limit is valid to 64 bits, so there's no sense in doing
+  // anything complicated here.  There may be optimization paths here in the
+  // future and they may differ on 32-bit and 64-bit.
+  boundsCheck4GBOrLargerAccess(memoryIndex, instance, ptr, ok);
+}
+
+// Make sure the ptr could be used as an index register.
+static inline void ToValidIndex(MacroAssembler& masm, RegI32 ptr) {
+#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
+    defined(JS_CODEGEN_RISCV64)
+  // When ptr is used as an index, it will be added to a 64-bit register.
+  // So we should explicitly promote ptr to 64-bit. Since now ptr holds a
+  // unsigned 32-bit value, we zero-extend it to 64-bit here.
+  masm.move32To64ZeroExtend(ptr, Register64(ptr));
+#endif
+}
+
+#if defined(ENABLE_WASM_MEMORY64)
+static inline void ToValidIndex(MacroAssembler& masm, RegI64 ptr) {}
+#endif
+
+// RegIndexType is RegI32 for Memory32 and RegI64 for Memory64.
+template <typename RegIndexType>
+void BaseCompiler::prepareMemoryAccess(MemoryAccessDesc* access,
+                                       AccessCheck* check, RegPtr instance,
+                                       RegIndexType ptr) {
+  uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(
+      moduleEnv_.hugeMemoryEnabled(access->memoryIndex()));
+
+  // Fold offset if necessary for further computations.
+  if (access->offset64() >= offsetGuardLimit ||
+      access->offset64() > UINT32_MAX ||
+      (access->isAtomic() && !check->omitAlignmentCheck &&
+       !check->onlyPointerAlignment)) {
+    Label ok;
+    branchAddNoOverflow(access->offset64(), ptr, &ok);
+    masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+    masm.bind(&ok);
+    access->clearOffset();
+    check->onlyPointerAlignment = true;
+  }
+
+  // Alignment check if required.
+
+  if (access->isAtomic() && !check->omitAlignmentCheck) {
+    MOZ_ASSERT(check->onlyPointerAlignment);
+    // We only care about the low pointer bits here.
+    Label ok;
+    branchTestLowZero(ptr, Imm32(access->byteSize() - 1), &ok);
+    masm.wasmTrap(Trap::UnalignedAccess, bytecodeOffset());
+    masm.bind(&ok);
+  }
+
+  // Ensure no instance if we don't need it.
+
+  if (moduleEnv_.hugeMemoryEnabled(access->memoryIndex()) &&
+      access->memoryIndex() == 0) {
+    // We have HeapReg and no bounds checking and need load neither
+    // memoryBase nor boundsCheckLimit from instance.
+    MOZ_ASSERT_IF(check->omitBoundsCheck, instance.isInvalid());
+  }
+#ifdef WASM_HAS_HEAPREG
+  // We have HeapReg and don't need to load the memoryBase from instance.
+  MOZ_ASSERT_IF(check->omitBoundsCheck && access->memoryIndex() == 0,
+                instance.isInvalid());
+#endif
+
+  // Bounds check if required.
+
+  if (!moduleEnv_.hugeMemoryEnabled(access->memoryIndex()) &&
+      !check->omitBoundsCheck) {
+    Label ok;
+#ifdef JS_64BIT
+    // The checking depends on how many bits are in the pointer and how many
+    // bits are in the bound.
+    static_assert(0x100000000 % PageSize == 0);
+    if (!moduleEnv_.memories[access->memoryIndex()]
+             .boundsCheckLimitIs32Bits() &&
+        MaxMemoryPages(
+            moduleEnv_.memories[access->memoryIndex()].indexType()) >=
+            Pages(0x100000000 / PageSize)) {
+      boundsCheck4GBOrLargerAccess(access->memoryIndex(), instance, ptr, &ok);
+    } else {
+      boundsCheckBelow4GBAccess(access->memoryIndex(), instance, ptr, &ok);
+    }
+#else
+    boundsCheckBelow4GBAccess(access->memoryIndex(), instance, ptr, &ok);
+#endif
+    masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+    masm.bind(&ok);
+  }
+
+  ToValidIndex(masm, ptr);
+}
+
+template <typename RegIndexType>
+void BaseCompiler::computeEffectiveAddress(MemoryAccessDesc* access) {
+  if (access->offset()) {
+    Label ok;
+    RegIndexType ptr = pop<RegIndexType>();
+    branchAddNoOverflow(access->offset64(), ptr, &ok);
+    masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+    masm.bind(&ok);
+    access->clearOffset();
+    push(ptr);
+  }
+}
+
+RegPtr BaseCompiler::maybeLoadMemoryBaseForAccess(
+    RegPtr instance, const MemoryAccessDesc* access) {
+#ifdef JS_CODEGEN_X86
+  // x86 adds the memory base to the wasm pointer directly using an addressing
+  // mode and doesn't require the memory base to be loaded to a register.
+  return RegPtr();
+#endif
+
+#ifdef WASM_HAS_HEAPREG
+  if (access->memoryIndex() == 0) {
+    return RegPtr(HeapReg);
+  }
+#endif
+  RegPtr memoryBase = needPtr();
+  uint32_t offset = instanceOffsetOfMemoryBase(access->memoryIndex());
+  masm.loadPtr(Address(instance, offset), memoryBase);
+  return memoryBase;
+}
+
+bool BaseCompiler::needInstanceForAccess(const MemoryAccessDesc* access,
+                                         const AccessCheck& check) {
+#ifndef WASM_HAS_HEAPREG
+  // Platform requires instance for memory base.
+  return true;
+#else
+  if (access->memoryIndex() != 0) {
+    // Need instance to load the memory base
+    return true;
+  }
+  return !moduleEnv_.hugeMemoryEnabled(access->memoryIndex()) &&
+         !check.omitBoundsCheck;
+#endif
+}
+
+RegPtr BaseCompiler::maybeLoadInstanceForAccess(const MemoryAccessDesc* access,
+                                                const AccessCheck& check) {
+  if (needInstanceForAccess(access, check)) {
+#ifdef RABALDR_PIN_INSTANCE
+    // NOTE, returning InstanceReg here depends for correctness on *ALL*
+    // clients not attempting to free this register and not push it on the value
+    // stack.
+    //
+    // We have assertions in place to guard against that, so the risk of the
+    // leaky abstraction is acceptable.  performRegisterLeakCheck() will ensure
+    // that after every bytecode, the union of available registers from the
+    // regalloc and used registers from the stack equals the set of allocatable
+    // registers at startup.  Thus if the instance is freed incorrectly it will
+    // end up in that union via the regalloc, and if it is pushed incorrectly it
+    // will end up in the union via the stack.
+    return RegPtr(InstanceReg);
+#else
+    RegPtr instance = need<RegPtr>();
+    fr.loadInstancePtr(instance);
+    return instance;
+#endif
+  }
+  return RegPtr::Invalid();
+}
+
+RegPtr BaseCompiler::maybeLoadInstanceForAccess(const MemoryAccessDesc* access,
+                                                const AccessCheck& check,
+                                                RegPtr specific) {
+  if (needInstanceForAccess(access, check)) {
+#ifdef RABALDR_PIN_INSTANCE
+    movePtr(RegPtr(InstanceReg), specific);
+#else
+    fr.loadInstancePtr(specific);
+#endif
+    return specific;
+  }
+  return RegPtr::Invalid();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Load and store.
+
+void BaseCompiler::executeLoad(MemoryAccessDesc* access, AccessCheck* check,
+                               RegPtr instance, RegPtr memoryBase, RegI32 ptr,
+                               AnyReg dest, RegI32 temp) {
+  // Emit the load.  At this point, 64-bit offsets will have been resolved.
+#if defined(JS_CODEGEN_X64)
+  MOZ_ASSERT(temp.isInvalid());
+  Operand srcAddr(memoryBase, ptr, TimesOne, access->offset());
+
+  if (dest.tag == AnyReg::I64) {
+    masm.wasmLoadI64(*access, srcAddr, dest.i64());
+  } else {
+    masm.wasmLoad(*access, srcAddr, dest.any());
+  }
+#elif defined(JS_CODEGEN_X86)
+  MOZ_ASSERT(memoryBase.isInvalid() && temp.isInvalid());
+  masm.addPtr(
+      Address(instance, instanceOffsetOfMemoryBase(access->memoryIndex())),
+      ptr);
+  Operand srcAddr(ptr, access->offset());
+
+  if (dest.tag == AnyReg::I64) {
+    MOZ_ASSERT(dest.i64() == specific_.abiReturnRegI64);
+    masm.wasmLoadI64(*access, srcAddr, dest.i64());
+  } else {
+    // For 8 bit loads, this will generate movsbl or movzbl, so
+    // there's no constraint on what the output register may be.
+    masm.wasmLoad(*access, srcAddr, dest.any());
+  }
+#elif defined(JS_CODEGEN_MIPS64)
+  if (IsUnaligned(*access)) {
+    switch (dest.tag) {
+      case AnyReg::I64:
+        masm.wasmUnalignedLoadI64(*access, memoryBase, ptr, ptr, dest.i64(),
+                                  temp);
+        break;
+      case AnyReg::F32:
+        masm.wasmUnalignedLoadFP(*access, memoryBase, ptr, ptr, dest.f32(),
+                                 temp);
+        break;
+      case AnyReg::F64:
+        masm.wasmUnalignedLoadFP(*access, memoryBase, ptr, ptr, dest.f64(),
+                                 temp);
+        break;
+      case AnyReg::I32:
+        masm.wasmUnalignedLoad(*access, memoryBase, ptr, ptr, dest.i32(), temp);
+        break;
+      default:
+        MOZ_CRASH("Unexpected type");
+    }
+  } else {
+    if (dest.tag == AnyReg::I64) {
+      masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
+    } else {
+      masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
+    }
+  }
+#elif defined(JS_CODEGEN_ARM)
+  MOZ_ASSERT(temp.isInvalid());
+  if (dest.tag == AnyReg::I64) {
+    masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
+  } else {
+    masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
+  }
+#elif defined(JS_CODEGEN_ARM64)
+  MOZ_ASSERT(temp.isInvalid());
+  if (dest.tag == AnyReg::I64) {
+    masm.wasmLoadI64(*access, memoryBase, ptr, dest.i64());
+  } else {
+    masm.wasmLoad(*access, memoryBase, ptr, dest.any());
+  }
+#elif defined(JS_CODEGEN_LOONG64)
+  MOZ_ASSERT(temp.isInvalid());
+  if (dest.tag == AnyReg::I64) {
+    masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
+  } else {
+    masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
+  }
+#elif defined(JS_CODEGEN_RISCV64)
+  MOZ_ASSERT(temp.isInvalid());
+  if (dest.tag == AnyReg::I64) {
+    masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
+  } else {
+    masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
+  }
+#else
+  MOZ_CRASH("BaseCompiler platform hook: load");
+#endif
+}
+
+// ptr and dest may be the same iff dest is I32.
+// This may destroy ptr even if ptr and dest are not the same.
+void BaseCompiler::load(MemoryAccessDesc* access, AccessCheck* check,
+                        RegPtr instance, RegPtr memoryBase, RegI32 ptr,
+                        AnyReg dest, RegI32 temp) {
+  prepareMemoryAccess(access, check, instance, ptr);
+  executeLoad(access, check, instance, memoryBase, ptr, dest, temp);
+}
+
+#ifdef ENABLE_WASM_MEMORY64
+void BaseCompiler::load(MemoryAccessDesc* access, AccessCheck* check,
+                        RegPtr instance, RegPtr memoryBase, RegI64 ptr,
+                        AnyReg dest, RegI64 temp) {
+  prepareMemoryAccess(access, check, instance, ptr);
+
+#  if !defined(JS_64BIT)
+  // On 32-bit systems we have a maximum 2GB heap and bounds checking has
+  // been applied to ensure that the 64-bit pointer is valid.
+  return executeLoad(access, check, instance, memoryBase, RegI32(ptr.low), dest,
+                     maybeFromI64(temp));
+#  elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64)
+  // On x64 and arm64 the 32-bit code simply assumes that the high bits of the
+  // 64-bit pointer register are zero and performs a 64-bit add.  Thus the code
+  // generated is the same for the 64-bit and the 32-bit case.
+  return executeLoad(access, check, instance, memoryBase, RegI32(ptr.reg), dest,
+                     maybeFromI64(temp));
+#  elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
+  // On mips64 and loongarch64, the 'prepareMemoryAccess' function will make
+  // sure that ptr holds a valid 64-bit index value. Thus the code generated in
+  // 'executeLoad' is the same for the 64-bit and the 32-bit case.
+  return executeLoad(access, check, instance, memoryBase, RegI32(ptr.reg), dest,
+                     maybeFromI64(temp));
+#  elif defined(JS_CODEGEN_RISCV64)
+  // RISCV the 'prepareMemoryAccess' function will make
+  // sure that ptr holds a valid 64-bit index value. Thus the code generated in
+  // 'executeLoad' is the same for the 64-bit and the 32-bit case.
+  return executeLoad(access, check, instance, memoryBase, RegI32(ptr.reg), dest,
+                     maybeFromI64(temp));
+#  else
+  MOZ_CRASH("Missing platform hook");
+#  endif
+}
+#endif
+
+void BaseCompiler::executeStore(MemoryAccessDesc* access, AccessCheck* check,
+                                RegPtr instance, RegPtr memoryBase, RegI32 ptr,
+                                AnyReg src, RegI32 temp) {
+  // Emit the store.  At this point, 64-bit offsets will have been resolved.
+#if defined(JS_CODEGEN_X64)
+  MOZ_ASSERT(temp.isInvalid());
+  Operand dstAddr(memoryBase, ptr, TimesOne, access->offset());
+
+  masm.wasmStore(*access, src.any(), dstAddr);
+#elif defined(JS_CODEGEN_X86)
+  MOZ_ASSERT(memoryBase.isInvalid() && temp.isInvalid());
+  masm.addPtr(
+      Address(instance, instanceOffsetOfMemoryBase(access->memoryIndex())),
+      ptr);
+  Operand dstAddr(ptr, access->offset());
+
+  if (access->type() == Scalar::Int64) {
+    masm.wasmStoreI64(*access, src.i64(), dstAddr);
+  } else {
+    AnyRegister value;
+    ScratchI8 scratch(*this);
+    if (src.tag == AnyReg::I64) {
+      if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) {
+        masm.mov(src.i64().low, scratch);
+        value = AnyRegister(scratch);
+      } else {
+        value = AnyRegister(src.i64().low);
+      }
+    } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) {
+      masm.mov(src.i32(), scratch);
+      value = AnyRegister(scratch);
+    } else {
+      value = src.any();
+    }
+
+    masm.wasmStore(*access, value, dstAddr);
+  }
+#elif defined(JS_CODEGEN_ARM)
+  MOZ_ASSERT(temp.isInvalid());
+  if (access->type() == Scalar::Int64) {
+    masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
+  } else if (src.tag == AnyReg::I64) {
+    masm.wasmStore(*access, AnyRegister(src.i64().low), memoryBase, ptr, ptr);
+  } else {
+    masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
+  }
+#elif defined(JS_CODEGEN_MIPS64)
+  if (IsUnaligned(*access)) {
+    switch (src.tag) {
+      case AnyReg::I64:
+        masm.wasmUnalignedStoreI64(*access, src.i64(), memoryBase, ptr, ptr,
+                                   temp);
+        break;
+      case AnyReg::F32:
+        masm.wasmUnalignedStoreFP(*access, src.f32(), memoryBase, ptr, ptr,
+                                  temp);
+        break;
+      case AnyReg::F64:
+        masm.wasmUnalignedStoreFP(*access, src.f64(), memoryBase, ptr, ptr,
+                                  temp);
+        break;
+      case AnyReg::I32:
+        masm.wasmUnalignedStore(*access, src.i32(), memoryBase, ptr, ptr, temp);
+        break;
+      default:
+        MOZ_CRASH("Unexpected type");
+    }
+  } else {
+    if (src.tag == AnyReg::I64) {
+      masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
+    } else {
+      masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
+    }
+  }
+#elif defined(JS_CODEGEN_ARM64)
+  MOZ_ASSERT(temp.isInvalid());
+  if (access->type() == Scalar::Int64) {
+    masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr);
+  } else {
+    masm.wasmStore(*access, src.any(), memoryBase, ptr);
+  }
+#elif defined(JS_CODEGEN_LOONG64)
+  MOZ_ASSERT(temp.isInvalid());
+  if (access->type() == Scalar::Int64) {
+    masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
+  } else {
+    masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
+  }
+#elif defined(JS_CODEGEN_RISCV64)
+  MOZ_ASSERT(temp.isInvalid());
+  if (access->type() == Scalar::Int64) {
+    masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
+  } else {
+    masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
+  }
+#else
+  MOZ_CRASH("BaseCompiler platform hook: store");
+#endif
+}
+
+// ptr and src must not be the same register.
+// This may destroy ptr and src.
+void BaseCompiler::store(MemoryAccessDesc* access, AccessCheck* check,
+                         RegPtr instance, RegPtr memoryBase, RegI32 ptr,
+                         AnyReg src, RegI32 temp) {
+  prepareMemoryAccess(access, check, instance, ptr);
+  executeStore(access, check, instance, memoryBase, ptr, src, temp);
+}
+
+#ifdef ENABLE_WASM_MEMORY64
+void BaseCompiler::store(MemoryAccessDesc* access, AccessCheck* check,
+                         RegPtr instance, RegPtr memoryBase, RegI64 ptr,
+                         AnyReg src, RegI64 temp) {
+  prepareMemoryAccess(access, check, instance, ptr);
+  // See comments in load()
+#  if !defined(JS_64BIT)
+  return executeStore(access, check, instance, memoryBase, RegI32(ptr.low), src,
+                      maybeFromI64(temp));
+#  elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) ||    \
+      defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
+      defined(JS_CODEGEN_RISCV64)
+  return executeStore(access, check, instance, memoryBase, RegI32(ptr.reg), src,
+                      maybeFromI64(temp));
+#  else
+  MOZ_CRASH("Missing platform hook");
+#  endif
+}
+#endif
+
+template <typename RegType>
+void BaseCompiler::doLoadCommon(MemoryAccessDesc* access, AccessCheck check,
+                                ValType type) {
+  RegPtr instance;
+  RegPtr memoryBase;
+  RegType temp;
+#if defined(JS_CODEGEN_MIPS64)
+  temp = need<RegType>();
+#endif
+
+  switch (type.kind()) {
+    case ValType::I32: {
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      RegI32 rv = needI32();
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      push(rv);
+      free(rp);
+      break;
+    }
+    case ValType::I64: {
+      RegI64 rv;
+      RegType rp;
+#ifdef JS_CODEGEN_X86
+      rv = specific_.abiReturnRegI64;
+      needI64(rv);
+      rp = popMemoryAccess<RegType>(access, &check);
+#else
+      rp = popMemoryAccess<RegType>(access, &check);
+      rv = needI64();
+#endif
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      push(rv);
+      free(rp);
+      break;
+    }
+    case ValType::F32: {
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      RegF32 rv = needF32();
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      push(rv);
+      free(rp);
+      break;
+    }
+    case ValType::F64: {
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      RegF64 rv = needF64();
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      push(rv);
+      free(rp);
+      break;
+    }
+#ifdef ENABLE_WASM_SIMD
+    case ValType::V128: {
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      RegV128 rv = needV128();
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      load(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      push(rv);
+      free(rp);
+      break;
+    }
+#endif
+    default:
+      MOZ_CRASH("load type");
+      break;
+  }
+
+#ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#endif
+#ifdef WASM_HAS_HEAPREG
+  if (memoryBase != HeapReg) {
+    maybeFree(memoryBase);
+  }
+#else
+  maybeFree(memoryBase);
+#endif
+  maybeFree(temp);
+}
+
+void BaseCompiler::loadCommon(MemoryAccessDesc* access, AccessCheck check,
+                              ValType type) {
+  if (isMem32(access->memoryIndex())) {
+    doLoadCommon<RegI32>(access, check, type);
+  } else {
+#ifdef ENABLE_WASM_MEMORY64
+    doLoadCommon<RegI64>(access, check, type);
+#else
+    MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+  }
+}
+
+template <typename RegType>
+void BaseCompiler::doStoreCommon(MemoryAccessDesc* access, AccessCheck check,
+                                 ValType resultType) {
+  RegPtr instance;
+  RegPtr memoryBase;
+  RegType temp;
+#if defined(JS_CODEGEN_MIPS64)
+  temp = need<RegType>();
+#endif
+
+  switch (resultType.kind()) {
+    case ValType::I32: {
+      RegI32 rv = popI32();
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      free(rp);
+      free(rv);
+      break;
+    }
+    case ValType::I64: {
+      RegI64 rv = popI64();
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      free(rp);
+      free(rv);
+      break;
+    }
+    case ValType::F32: {
+      RegF32 rv = popF32();
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      free(rp);
+      free(rv);
+      break;
+    }
+    case ValType::F64: {
+      RegF64 rv = popF64();
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      free(rp);
+      free(rv);
+      break;
+    }
+#ifdef ENABLE_WASM_SIMD
+    case ValType::V128: {
+      RegV128 rv = popV128();
+      RegType rp = popMemoryAccess<RegType>(access, &check);
+      instance = maybeLoadInstanceForAccess(access, check);
+      memoryBase = maybeLoadMemoryBaseForAccess(instance, access);
+      store(access, &check, instance, memoryBase, rp, AnyReg(rv), temp);
+      free(rp);
+      free(rv);
+      break;
+    }
+#endif
+    default:
+      MOZ_CRASH("store type");
+      break;
+  }
+
+#ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#endif
+#ifdef WASM_HAS_HEAPREG
+  if (memoryBase != HeapReg) {
+    maybeFree(memoryBase);
+  }
+#else
+  maybeFree(memoryBase);
+#endif
+  maybeFree(temp);
+}
+
+void BaseCompiler::storeCommon(MemoryAccessDesc* access, AccessCheck check,
+                               ValType type) {
+  if (isMem32(access->memoryIndex())) {
+    doStoreCommon<RegI32>(access, check, type);
+  } else {
+#ifdef ENABLE_WASM_MEMORY64
+    doStoreCommon<RegI64>(access, check, type);
+#else
+    MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+  }
+}
+
+// Convert something that may contain a heap index into a Register that can be
+// used in an access.
+
+static inline Register ToRegister(RegI32 r) { return Register(r); }
+#ifdef ENABLE_WASM_MEMORY64
+#  ifdef JS_PUNBOX64
+static inline Register ToRegister(RegI64 r) { return r.reg; }
+#  else
+static inline Register ToRegister(RegI64 r) { return r.low; }
+#  endif
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Atomic operations.
+//
+// The atomic operations have very diverse per-platform needs for register
+// allocation and temps.  To handle that, the implementations are structured as
+// a per-operation framework method that calls into platform-specific helpers
+// (usually called PopAndAllocate, Perform, and Deallocate) in a per-operation
+// namespace.  This structure results in a little duplication and boilerplate
+// but is otherwise clean and flexible and keeps code and supporting definitions
+// entirely co-located.
+
+// Some consumers depend on the returned Address not incorporating instance, as
+// instance may be the scratch register.
+//
+// RegIndexType is RegI32 for Memory32 and RegI64 for Memory64.
+template <typename RegIndexType>
+Address BaseCompiler::prepareAtomicMemoryAccess(MemoryAccessDesc* access,
+                                                AccessCheck* check,
+                                                RegPtr instance,
+                                                RegIndexType ptr) {
+  MOZ_ASSERT(needInstanceForAccess(access, *check) == instance.isValid());
+  prepareMemoryAccess(access, check, instance, ptr);
+
+#ifdef WASM_HAS_HEAPREG
+  if (access->memoryIndex() == 0) {
+    masm.addPtr(HeapReg, ToRegister(ptr));
+  } else {
+    masm.addPtr(
+        Address(instance, instanceOffsetOfMemoryBase(access->memoryIndex())),
+        ToRegister(ptr));
+  }
+#else
+  masm.addPtr(
+      Address(instance, instanceOffsetOfMemoryBase(access->memoryIndex())),
+      ToRegister(ptr));
+#endif
+
+  // At this point, 64-bit offsets will have been resolved.
+  return Address(ToRegister(ptr), access->offset());
+}
+
+#ifndef WASM_HAS_HEAPREG
+#  ifdef JS_CODEGEN_X86
+using ScratchAtomicNoHeapReg = ScratchEBX;
+#  else
+#    error "Unimplemented porting interface"
+#  endif
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Atomic load and store.
+
+namespace atomic_load64 {
+
+#ifdef JS_CODEGEN_ARM
+
+static void Allocate(BaseCompiler* bc, RegI64* rd, RegI64*) {
+  *rd = bc->needI64Pair();
+}
+
+static void Deallocate(BaseCompiler* bc, RegI64) {}
+
+#elif defined JS_CODEGEN_X86
+
+static void Allocate(BaseCompiler* bc, RegI64* rd, RegI64* temp) {
+  // The result is in edx:eax, and we need ecx:ebx as a temp.  But ebx will also
+  // be used as a scratch, so don't manage that here.
+  bc->needI32(bc->specific_.ecx);
+  *temp = bc->specific_.ecx_ebx;
+  bc->needI64(bc->specific_.edx_eax);
+  *rd = bc->specific_.edx_eax;
+}
+
+static void Deallocate(BaseCompiler* bc, RegI64 temp) {
+  // See comment above.
+  MOZ_ASSERT(temp.high == js::jit::ecx);
+  bc->freeI32(bc->specific_.ecx);
+}
+
+#elif defined(__wasi__) || (defined(JS_CODEGEN_NONE) && !defined(JS_64BIT))
+
+static void Allocate(BaseCompiler*, RegI64*, RegI64*) {}
+static void Deallocate(BaseCompiler*, RegI64) {}
+
+#endif
+
+}  // namespace atomic_load64
+
+#if !defined(JS_64BIT)
+template <typename RegIndexType>
+void BaseCompiler::atomicLoad64(MemoryAccessDesc* access) {
+  RegI64 rd, temp;
+  atomic_load64::Allocate(this, &rd, &temp);
+
+  AccessCheck check;
+  RegIndexType rp = popMemoryAccess<RegIndexType>(access, &check);
+
+#  ifdef WASM_HAS_HEAPREG
+  RegPtr instance = maybeLoadInstanceForAccess(access, check);
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  masm.wasmAtomicLoad64(*access, memaddr, temp, rd);
+#    ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#    endif
+#  else
+  ScratchAtomicNoHeapReg scratch(*this);
+  RegPtr instance =
+      maybeLoadInstanceForAccess(access, check, RegIntptrToRegPtr(scratch));
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  masm.wasmAtomicLoad64(*access, memaddr, temp, rd);
+  MOZ_ASSERT(instance == scratch);
+#  endif
+
+  free(rp);
+  atomic_load64::Deallocate(this, temp);
+  pushI64(rd);
+}
+#endif
+
+void BaseCompiler::atomicLoad(MemoryAccessDesc* access, ValType type) {
+  Scalar::Type viewType = access->type();
+  if (Scalar::byteSize(viewType) <= sizeof(void*)) {
+    loadCommon(access, AccessCheck(), type);
+    return;
+  }
+
+  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+#if !defined(JS_64BIT)
+  if (isMem32(access->memoryIndex())) {
+    atomicLoad64<RegI32>(access);
+  } else {
+#  ifdef ENABLE_WASM_MEMORY64
+    atomicLoad64<RegI64>(access);
+#  else
+    MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#  endif
+  }
+#else
+  MOZ_CRASH("Should not happen");
+#endif
+}
+
+void BaseCompiler::atomicStore(MemoryAccessDesc* access, ValType type) {
+  Scalar::Type viewType = access->type();
+
+  if (Scalar::byteSize(viewType) <= sizeof(void*)) {
+    storeCommon(access, AccessCheck(), type);
+    return;
+  }
+
+  MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+#if !defined(JS_64BIT)
+  if (isMem32(access->memoryIndex())) {
+    atomicXchg64<RegI32>(access, WantResult(false));
+  } else {
+#  ifdef ENABLE_WASM_MEMORY64
+    atomicXchg64<RegI64>(access, WantResult(false));
+#  else
+    MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#  endif
+  }
+#else
+  MOZ_CRASH("Should not happen");
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Atomic RMW op= operations.
+
+void BaseCompiler::atomicRMW(MemoryAccessDesc* access, ValType type,
+                             AtomicOp op) {
+  Scalar::Type viewType = access->type();
+  if (Scalar::byteSize(viewType) <= 4) {
+    if (isMem32(access->memoryIndex())) {
+      atomicRMW32<RegI32>(access, type, op);
+    } else {
+#ifdef ENABLE_WASM_MEMORY64
+      atomicRMW32<RegI64>(access, type, op);
+#else
+      MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+    }
+  } else {
+    MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+    if (isMem32(access->memoryIndex())) {
+      atomicRMW64<RegI32>(access, type, op);
+    } else {
+#ifdef ENABLE_WASM_MEMORY64
+      atomicRMW64<RegI64>(access, type, op);
+#else
+      MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+    }
+  }
+}
+
+namespace atomic_rmw32 {
+
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+
+struct Temps {
+  // On x86 we use the ScratchI32 for the temp, otherwise we'd run out of
+  // registers for 64-bit operations.
+#  if defined(JS_CODEGEN_X64)
+  RegI32 t0;
+#  endif
+};
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, AtomicOp op, RegI32* rd,
+                           RegI32* rv, Temps* temps) {
+  bc->needI32(bc->specific_.eax);
+  if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) {
+    // We use xadd, so source and destination are the same.  Using
+    // eax here is overconstraining, but for byte operations on x86
+    // we do need something with a byte register.
+    if (type == ValType::I64) {
+      *rv = bc->popI64ToSpecificI32(bc->specific_.eax);
+    } else {
+      *rv = bc->popI32ToSpecific(bc->specific_.eax);
+    }
+    *rd = *rv;
+  } else {
+    // We use a cmpxchg loop.  The output must be eax; the input
+    // must be in a separate register since it may be used several
+    // times.
+    if (type == ValType::I64) {
+      *rv = bc->popI64ToI32();
+    } else {
+      *rv = bc->popI32();
+    }
+    *rd = bc->specific_.eax;
+#  ifdef JS_CODEGEN_X64
+    temps->t0 = bc->needI32();
+#  endif
+  }
+}
+
+template <typename T>
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, T srcAddr,
+                    AtomicOp op, RegI32 rv, RegI32 rd, const Temps& temps) {
+#  ifdef JS_CODEGEN_X64
+  RegI32 temp = temps.t0;
+#  else
+  RegI32 temp;
+  ScratchI32 scratch(*bc);
+  if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) {
+    temp = scratch;
+  }
+#  endif
+  bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temp, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
+  if (rv != bc->specific_.eax) {
+    bc->freeI32(rv);
+  }
+#  ifdef JS_CODEGEN_X64
+  bc->maybeFree(temps.t0);
+#  endif
+}
+
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+
+struct Temps {
+  RegI32 t0;
+};
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, AtomicOp op, RegI32* rd,
+                           RegI32* rv, Temps* temps) {
+  *rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
+  temps->t0 = bc->needI32();
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, AtomicOp op, RegI32 rv, RegI32 rd,
+                    const Temps& temps) {
+  bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps.t0, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
+  bc->freeI32(rv);
+  bc->freeI32(temps.t0);
+}
+
+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
+
+struct Temps {
+  RegI32 t0, t1, t2;
+};
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, AtomicOp op, RegI32* rd,
+                           RegI32* rv, Temps* temps) {
+  *rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
+  if (Scalar::byteSize(viewType) < 4) {
+    temps->t0 = bc->needI32();
+    temps->t1 = bc->needI32();
+    temps->t2 = bc->needI32();
+  }
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, AtomicOp op, RegI32 rv, RegI32 rd,
+                    const Temps& temps) {
+  bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps.t0, temps.t1,
+                             temps.t2, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
+  bc->freeI32(rv);
+  bc->maybeFree(temps.t0);
+  bc->maybeFree(temps.t1);
+  bc->maybeFree(temps.t2);
+}
+
+#elif defined(JS_CODEGEN_RISCV64)
+
+struct Temps {
+  RegI32 t0, t1, t2;
+};
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, AtomicOp op, RegI32* rd,
+                           RegI32* rv, Temps* temps) {
+  *rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
+  if (Scalar::byteSize(viewType) < 4) {
+    temps->t0 = bc->needI32();
+    temps->t1 = bc->needI32();
+    temps->t2 = bc->needI32();
+  }
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, AtomicOp op, RegI32 rv, RegI32 rd,
+                    const Temps& temps) {
+  bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps.t0, temps.t1,
+                             temps.t2, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
+  bc->freeI32(rv);
+  bc->maybeFree(temps.t0);
+  bc->maybeFree(temps.t1);
+  bc->maybeFree(temps.t2);
+}
+
+#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
+
+using Temps = Nothing;
+
+static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, AtomicOp,
+                           RegI32*, RegI32*, Temps*) {}
+
+static void Perform(BaseCompiler*, const MemoryAccessDesc&, Address, AtomicOp,
+                    RegI32, RegI32, const Temps&) {}
+
+static void Deallocate(BaseCompiler*, RegI32, const Temps&) {}
+
+#endif
+
+}  // namespace atomic_rmw32
+
+template <typename RegIndexType>
+void BaseCompiler::atomicRMW32(MemoryAccessDesc* access, ValType type,
+                               AtomicOp op) {
+  Scalar::Type viewType = access->type();
+  RegI32 rd, rv;
+  atomic_rmw32::Temps temps;
+  atomic_rmw32::PopAndAllocate(this, type, viewType, op, &rd, &rv, &temps);
+
+  AccessCheck check;
+  RegIndexType rp = popMemoryAccess<RegIndexType>(access, &check);
+  RegPtr instance = maybeLoadInstanceForAccess(access, check);
+
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  atomic_rmw32::Perform(this, *access, memaddr, op, rv, rd, temps);
+
+#ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#endif
+  atomic_rmw32::Deallocate(this, rv, temps);
+  free(rp);
+
+  if (type == ValType::I64) {
+    pushU32AsI64(rd);
+  } else {
+    pushI32(rd);
+  }
+}
+
+namespace atomic_rmw64 {
+
+#if defined(JS_CODEGEN_X64)
+
+static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
+                           RegI64* rv, RegI64* temp) {
+  if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) {
+    // We use xaddq, so input and output must be the same register.
+    *rv = bc->popI64();
+    *rd = *rv;
+  } else {
+    // We use a cmpxchgq loop, so the output must be rax and we need a temp.
+    bc->needI64(bc->specific_.rax);
+    *rd = bc->specific_.rax;
+    *rv = bc->popI64();
+    *temp = bc->needI64();
+  }
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64 temp,
+                    RegI64 rd) {
+  bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
+  bc->maybeFree(temp);
+  if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) {
+    bc->freeI64(rv);
+  }
+}
+
+#elif defined(JS_CODEGEN_X86)
+
+// Register allocation is tricky, see comments at atomic_xchg64 below.
+//
+// - Initially rv=ecx:edx and eax is reserved, rd=unallocated.
+// - Then rp is popped into esi+edi because those are the only available.
+// - The Setup operation makes rd=edx:eax.
+// - Deallocation then frees only the ecx part of rv.
+//
+// The temp is unused here.
+
+static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
+                           RegI64* rv, RegI64*) {
+  bc->needI32(bc->specific_.eax);
+  bc->needI32(bc->specific_.ecx);
+  bc->needI32(bc->specific_.edx);
+  *rv = RegI64(Register64(bc->specific_.ecx, bc->specific_.edx));
+  bc->popI64ToSpecific(*rv);
+}
+
+static void Setup(BaseCompiler* bc, RegI64* rd) { *rd = bc->specific_.edx_eax; }
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64, RegI64 rd,
+                    const ScratchAtomicNoHeapReg& scratch) {
+  MOZ_ASSERT(rv.high == bc->specific_.ecx);
+  MOZ_ASSERT(Register(scratch) == js::jit::ebx);
+
+  bc->fr.pushGPR(rv.high);
+  bc->fr.pushGPR(rv.low);
+  Address value(StackPointer, 0);
+
+  bc->masm.wasmAtomicFetchOp64(access, op, value, srcAddr,
+                               bc->specific_.ecx_ebx, rd);
+
+  bc->fr.popBytes(8);
+}
+
+static void Deallocate(BaseCompiler* bc, AtomicOp, RegI64, RegI64) {
+  bc->freeI32(bc->specific_.ecx);
+}
+
+#elif defined(JS_CODEGEN_ARM)
+
+static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
+                           RegI64* rv, RegI64* temp) {
+  // We use a ldrex/strexd loop so the temp and the output must be
+  // odd/even pairs.
+  *rv = bc->popI64();
+  *temp = bc->needI64Pair();
+  *rd = bc->needI64Pair();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64 temp,
+                    RegI64 rd) {
+  bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
+  bc->freeI64(rv);
+  bc->freeI64(temp);
+}
+
+#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
+    defined(JS_CODEGEN_LOONG64)
+
+static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
+                           RegI64* rv, RegI64* temp) {
+  *rv = bc->popI64();
+  *temp = bc->needI64();
+  *rd = bc->needI64();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64 temp,
+                    RegI64 rd) {
+  bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
+  bc->freeI64(rv);
+  bc->freeI64(temp);
+}
+#elif defined(JS_CODEGEN_RISCV64)
+
+static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
+                           RegI64* rv, RegI64* temp) {
+  *rv = bc->popI64();
+  *temp = bc->needI64();
+  *rd = bc->needI64();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, AtomicOp op, RegI64 rv, RegI64 temp,
+                    RegI64 rd) {
+  bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
+  bc->freeI64(rv);
+  bc->freeI64(temp);
+}
+
+#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
+
+static void PopAndAllocate(BaseCompiler*, AtomicOp, RegI64*, RegI64*, RegI64*) {
+}
+
+static void Perform(BaseCompiler*, const MemoryAccessDesc&, Address,
+                    AtomicOp op, RegI64, RegI64, RegI64) {}
+
+static void Deallocate(BaseCompiler*, AtomicOp, RegI64, RegI64) {}
+
+#endif
+
+}  // namespace atomic_rmw64
+
+template <typename RegIndexType>
+void BaseCompiler::atomicRMW64(MemoryAccessDesc* access, ValType type,
+                               AtomicOp op) {
+  RegI64 rd, rv, temp;
+  atomic_rmw64::PopAndAllocate(this, op, &rd, &rv, &temp);
+
+  AccessCheck check;
+  RegIndexType rp = popMemoryAccess<RegIndexType>(access, &check);
+
+#if defined(WASM_HAS_HEAPREG)
+  RegPtr instance = maybeLoadInstanceForAccess(access, check);
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  atomic_rmw64::Perform(this, *access, memaddr, op, rv, temp, rd);
+#  ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#  endif
+#else
+  ScratchAtomicNoHeapReg scratch(*this);
+  RegPtr instance =
+      maybeLoadInstanceForAccess(access, check, RegIntptrToRegPtr(scratch));
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  atomic_rmw64::Setup(this, &rd);
+  atomic_rmw64::Perform(this, *access, memaddr, op, rv, temp, rd, scratch);
+  MOZ_ASSERT(instance == scratch);
+#endif
+
+  free(rp);
+  atomic_rmw64::Deallocate(this, op, rv, temp);
+
+  pushI64(rd);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Atomic exchange (also used for atomic store in some cases).
+
+void BaseCompiler::atomicXchg(MemoryAccessDesc* access, ValType type) {
+  Scalar::Type viewType = access->type();
+  if (Scalar::byteSize(viewType) <= 4) {
+    if (isMem32(access->memoryIndex())) {
+      atomicXchg32<RegI32>(access, type);
+    } else {
+#ifdef ENABLE_WASM_MEMORY64
+      atomicXchg32<RegI64>(access, type);
+#else
+      MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+    }
+  } else {
+    MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+    if (isMem32(access->memoryIndex())) {
+      atomicXchg64<RegI32>(access, WantResult(true));
+    } else {
+#ifdef ENABLE_WASM_MEMORY64
+      atomicXchg64<RegI64>(access, WantResult(true));
+#else
+      MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+    }
+  }
+}
+
+namespace atomic_xchg32 {
+
+#if defined(JS_CODEGEN_X64)
+
+using Temps = Nothing;
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
+                           Temps*) {
+  // The xchg instruction reuses rv as rd.
+  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+  *rd = *rv;
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps&) {
+  bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32, const Temps&) {}
+
+#elif defined(JS_CODEGEN_X86)
+
+using Temps = Nothing;
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
+                           Temps*) {
+  // The xchg instruction reuses rv as rd.
+  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+  *rd = *rv;
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps&) {
+  if (access.type() == Scalar::Uint8 && !bc->ra.isSingleByteI32(rd)) {
+    ScratchI8 scratch(*bc);
+    // The output register must have a byte persona.
+    bc->masm.wasmAtomicExchange(access, srcAddr, rv, scratch);
+    bc->masm.movl(scratch, rd);
+  } else {
+    bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd);
+  }
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32, const Temps&) {}
+
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+
+using Temps = Nothing;
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
+                           Temps*) {
+  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps&) {
+  bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps&) {
+  bc->freeI32(rv);
+}
+
+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
+
+struct Temps {
+  RegI32 t0, t1, t2;
+};
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
+                           Temps* temps) {
+  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+  if (Scalar::byteSize(viewType) < 4) {
+    temps->t0 = bc->needI32();
+    temps->t1 = bc->needI32();
+    temps->t2 = bc->needI32();
+  }
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps& temps) {
+  bc->masm.wasmAtomicExchange(access, srcAddr, rv, temps.t0, temps.t1, temps.t2,
+                              rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
+  bc->freeI32(rv);
+  bc->maybeFree(temps.t0);
+  bc->maybeFree(temps.t1);
+  bc->maybeFree(temps.t2);
+}
+
+#elif defined(JS_CODEGEN_RISCV64)
+
+struct Temps {
+  RegI32 t0, t1, t2;
+};
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rd, RegI32* rv,
+                           Temps* temps) {
+  *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+  if (Scalar::byteSize(viewType) < 4) {
+    temps->t0 = bc->needI32();
+    temps->t1 = bc->needI32();
+    temps->t2 = bc->needI32();
+  }
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI32 rv, RegI32 rd, const Temps& temps) {
+  bc->masm.wasmAtomicExchange(access, srcAddr, rv, temps.t0, temps.t1, temps.t2,
+                              rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
+  bc->freeI32(rv);
+  bc->maybeFree(temps.t0);
+  bc->maybeFree(temps.t1);
+  bc->maybeFree(temps.t2);
+}
+
+#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
+
+using Temps = Nothing;
+
+static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, RegI32*,
+                           RegI32*, Temps*) {}
+static void Perform(BaseCompiler*, const MemoryAccessDesc&, Address, RegI32,
+                    RegI32, const Temps&) {}
+static void Deallocate(BaseCompiler*, RegI32, const Temps&) {}
+
+#endif
+
+}  // namespace atomic_xchg32
+
+template <typename RegIndexType>
+void BaseCompiler::atomicXchg32(MemoryAccessDesc* access, ValType type) {
+  Scalar::Type viewType = access->type();
+
+  RegI32 rd, rv;
+  atomic_xchg32::Temps temps;
+  atomic_xchg32::PopAndAllocate(this, type, viewType, &rd, &rv, &temps);
+
+  AccessCheck check;
+
+  RegIndexType rp = popMemoryAccess<RegIndexType>(access, &check);
+  RegPtr instance = maybeLoadInstanceForAccess(access, check);
+
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  atomic_xchg32::Perform(this, *access, memaddr, rv, rd, temps);
+
+#ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#endif
+  free(rp);
+  atomic_xchg32::Deallocate(this, rv, temps);
+
+  if (type == ValType::I64) {
+    pushU32AsI64(rd);
+  } else {
+    pushI32(rd);
+  }
+}
+
+namespace atomic_xchg64 {
+
+#if defined(JS_CODEGEN_X64)
+
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
+  *rv = bc->popI64();
+  *rd = *rv;
+}
+
+static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64) {
+  bc->maybeFree(rd);
+}
+
+#elif defined(JS_CODEGEN_X86)
+
+// Register allocation is tricky in several ways.
+//
+// - For a 64-bit access on memory64 we need six registers for rd, rv, and rp,
+//   but have only five (as the temp ebx is needed too), so we target all
+//   registers explicitly to make sure there's space.
+//
+// - We'll be using cmpxchg8b, and when we do the operation, rv must be in
+//   ecx:ebx, and rd must be edx:eax.  We can't use ebx for rv initially because
+//   we need ebx for a scratch also, so use a separate temp and move the value
+//   to ebx just before the operation.
+//
+// In sum:
+//
+// - Initially rv=ecx:edx and eax is reserved, rd=unallocated.
+// - Then rp is popped into esi+edi because those are the only available.
+// - The Setup operation makes rv=ecx:ebx and rd=edx:eax and moves edx->ebx.
+// - Deallocation then frees only the ecx part of rv.
+
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
+  bc->needI32(bc->specific_.ecx);
+  bc->needI32(bc->specific_.edx);
+  bc->needI32(bc->specific_.eax);
+  *rv = RegI64(Register64(bc->specific_.ecx, bc->specific_.edx));
+  bc->popI64ToSpecific(*rv);
+}
+
+static void Setup(BaseCompiler* bc, RegI64* rv, RegI64* rd,
+                  const ScratchAtomicNoHeapReg& scratch) {
+  MOZ_ASSERT(rv->high == bc->specific_.ecx);
+  MOZ_ASSERT(Register(scratch) == js::jit::ebx);
+  bc->masm.move32(rv->low, scratch);
+  *rv = bc->specific_.ecx_ebx;
+  *rd = bc->specific_.edx_eax;
+}
+
+static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
+  MOZ_ASSERT(rd == bc->specific_.edx_eax || rd == RegI64::Invalid());
+  bc->maybeFree(rd);
+  bc->freeI32(bc->specific_.ecx);
+}
+
+#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
+    defined(JS_CODEGEN_LOONG64)
+
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
+  *rv = bc->popI64();
+  *rd = bc->needI64();
+}
+
+static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
+  bc->freeI64(rv);
+  bc->maybeFree(rd);
+}
+
+#elif defined(JS_CODEGEN_ARM)
+
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
+  // Both rv and rd must be odd/even pairs.
+  *rv = bc->popI64ToSpecific(bc->needI64Pair());
+  *rd = bc->needI64Pair();
+}
+
+static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
+  bc->freeI64(rv);
+  bc->maybeFree(rd);
+}
+
+#elif defined(JS_CODEGEN_RISCV64)
+
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
+  *rv = bc->popI64();
+  *rd = bc->needI64();
+}
+
+static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
+  bc->freeI64(rv);
+  bc->maybeFree(rd);
+}
+#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
+
+static void PopAndAllocate(BaseCompiler*, RegI64*, RegI64*) {}
+static void Deallocate(BaseCompiler*, RegI64, RegI64) {}
+
+#endif
+
+}  // namespace atomic_xchg64
+
+template <typename RegIndexType>
+void BaseCompiler::atomicXchg64(MemoryAccessDesc* access,
+                                WantResult wantResult) {
+  RegI64 rd, rv;
+  atomic_xchg64::PopAndAllocate(this, &rd, &rv);
+
+  AccessCheck check;
+  RegIndexType rp = popMemoryAccess<RegIndexType>(access, &check);
+
+#ifdef WASM_HAS_HEAPREG
+  RegPtr instance = maybeLoadInstanceForAccess(access, check);
+  auto memaddr =
+      prepareAtomicMemoryAccess<RegIndexType>(access, &check, instance, rp);
+  masm.wasmAtomicExchange64(*access, memaddr, rv, rd);
+#  ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#  endif
+#else
+  ScratchAtomicNoHeapReg scratch(*this);
+  RegPtr instance =
+      maybeLoadInstanceForAccess(access, check, RegIntptrToRegPtr(scratch));
+  Address memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  atomic_xchg64::Setup(this, &rv, &rd, scratch);
+  masm.wasmAtomicExchange64(*access, memaddr, rv, rd);
+  MOZ_ASSERT(instance == scratch);
+#endif
+
+  free(rp);
+  if (wantResult) {
+    pushI64(rd);
+    rd = RegI64::Invalid();
+  }
+  atomic_xchg64::Deallocate(this, rd, rv);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Atomic compare-exchange.
+
+void BaseCompiler::atomicCmpXchg(MemoryAccessDesc* access, ValType type) {
+  Scalar::Type viewType = access->type();
+  if (Scalar::byteSize(viewType) <= 4) {
+    if (isMem32(access->memoryIndex())) {
+      atomicCmpXchg32<RegI32>(access, type);
+    } else {
+#ifdef ENABLE_WASM_MEMORY64
+      atomicCmpXchg32<RegI64>(access, type);
+#else
+      MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+    }
+  } else {
+    MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+    if (isMem32(access->memoryIndex())) {
+      atomicCmpXchg64<RegI32>(access, type);
+    } else {
+#ifdef ENABLE_WASM_MEMORY64
+      atomicCmpXchg64<RegI64>(access, type);
+#else
+      MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+    }
+  }
+}
+
+namespace atomic_cmpxchg32 {
+
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+
+using Temps = Nothing;
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rexpect, RegI32* rnew,
+                           RegI32* rd, Temps*) {
+  // For cmpxchg, the expected value and the result are both in eax.
+  bc->needI32(bc->specific_.eax);
+  if (type == ValType::I64) {
+    *rnew = bc->popI64ToI32();
+    *rexpect = bc->popI64ToSpecificI32(bc->specific_.eax);
+  } else {
+    *rnew = bc->popI32();
+    *rexpect = bc->popI32ToSpecific(bc->specific_.eax);
+  }
+  *rd = *rexpect;
+}
+
+template <typename T>
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, T srcAddr,
+                    RegI32 rexpect, RegI32 rnew, RegI32 rd, const Temps&) {
+#  if defined(JS_CODEGEN_X86)
+  ScratchI8 scratch(*bc);
+  if (access.type() == Scalar::Uint8) {
+    MOZ_ASSERT(rd == bc->specific_.eax);
+    if (!bc->ra.isSingleByteI32(rnew)) {
+      // The replacement value must have a byte persona.
+      bc->masm.movl(rnew, scratch);
+      rnew = scratch;
+    }
+  }
+#  endif
+  bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32, RegI32 rnew, const Temps&) {
+  bc->freeI32(rnew);
+}
+
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+
+using Temps = Nothing;
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rexpect, RegI32* rnew,
+                           RegI32* rd, Temps*) {
+  if (type == ValType::I64) {
+    *rnew = bc->popI64ToI32();
+    *rexpect = bc->popI64ToI32();
+  } else {
+    *rnew = bc->popI32();
+    *rexpect = bc->popI32();
+  }
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd,
+                    const Temps&) {
+  bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew,
+                       const Temps&) {
+  bc->freeI32(rnew);
+  bc->freeI32(rexpect);
+}
+
+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
+
+struct Temps {
+  RegI32 t0, t1, t2;
+};
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rexpect, RegI32* rnew,
+                           RegI32* rd, Temps* temps) {
+  if (type == ValType::I64) {
+    *rnew = bc->popI64ToI32();
+    *rexpect = bc->popI64ToI32();
+  } else {
+    *rnew = bc->popI32();
+    *rexpect = bc->popI32();
+  }
+  if (Scalar::byteSize(viewType) < 4) {
+    temps->t0 = bc->needI32();
+    temps->t1 = bc->needI32();
+    temps->t2 = bc->needI32();
+  }
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd,
+                    const Temps& temps) {
+  bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, temps.t0,
+                               temps.t1, temps.t2, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew,
+                       const Temps& temps) {
+  bc->freeI32(rnew);
+  bc->freeI32(rexpect);
+  bc->maybeFree(temps.t0);
+  bc->maybeFree(temps.t1);
+  bc->maybeFree(temps.t2);
+}
+
+#elif defined(JS_CODEGEN_RISCV64)
+
+struct Temps {
+  RegI32 t0, t1, t2;
+};
+
+static void PopAndAllocate(BaseCompiler* bc, ValType type,
+                           Scalar::Type viewType, RegI32* rexpect, RegI32* rnew,
+                           RegI32* rd, Temps* temps) {
+  if (type == ValType::I64) {
+    *rnew = bc->popI64ToI32();
+    *rexpect = bc->popI64ToI32();
+  } else {
+    *rnew = bc->popI32();
+    *rexpect = bc->popI32();
+  }
+  if (Scalar::byteSize(viewType) < 4) {
+    temps->t0 = bc->needI32();
+    temps->t1 = bc->needI32();
+    temps->t2 = bc->needI32();
+  }
+  *rd = bc->needI32();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd,
+                    const Temps& temps) {
+  bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, temps.t0,
+                               temps.t1, temps.t2, rd);
+}
+
+static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew,
+                       const Temps& temps) {
+  bc->freeI32(rnew);
+  bc->freeI32(rexpect);
+  bc->maybeFree(temps.t0);
+  bc->maybeFree(temps.t1);
+  bc->maybeFree(temps.t2);
+}
+
+#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
+
+using Temps = Nothing;
+
+static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, RegI32*,
+                           RegI32*, RegI32*, Temps*) {}
+
+static void Perform(BaseCompiler*, const MemoryAccessDesc&, Address, RegI32,
+                    RegI32, RegI32, const Temps& temps) {}
+
+static void Deallocate(BaseCompiler*, RegI32, RegI32, const Temps&) {}
+
+#endif
+
+}  // namespace atomic_cmpxchg32
+
+template <typename RegIndexType>
+void BaseCompiler::atomicCmpXchg32(MemoryAccessDesc* access, ValType type) {
+  Scalar::Type viewType = access->type();
+  RegI32 rexpect, rnew, rd;
+  atomic_cmpxchg32::Temps temps;
+  atomic_cmpxchg32::PopAndAllocate(this, type, viewType, &rexpect, &rnew, &rd,
+                                   &temps);
+
+  AccessCheck check;
+  RegIndexType rp = popMemoryAccess<RegIndexType>(access, &check);
+  RegPtr instance = maybeLoadInstanceForAccess(access, check);
+
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  atomic_cmpxchg32::Perform(this, *access, memaddr, rexpect, rnew, rd, temps);
+
+#ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#endif
+  free(rp);
+  atomic_cmpxchg32::Deallocate(this, rexpect, rnew, temps);
+
+  if (type == ValType::I64) {
+    pushU32AsI64(rd);
+  } else {
+    pushI32(rd);
+  }
+}
+
+namespace atomic_cmpxchg64 {
+
+// The templates are needed for x86 code generation, which needs complicated
+// register allocation for memory64.
+
+template <typename RegIndexType>
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
+                           RegI64* rd);
+
+template <typename RegIndexType>
+static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew);
+
+#if defined(JS_CODEGEN_X64)
+
+template <typename RegIndexType>
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
+                           RegI64* rd) {
+  // For cmpxchg, the expected value and the result are both in rax.
+  bc->needI64(bc->specific_.rax);
+  *rnew = bc->popI64();
+  *rexpect = bc->popI64ToSpecific(bc->specific_.rax);
+  *rd = *rexpect;
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {
+  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
+}
+
+template <typename RegIndexType>
+static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
+  bc->freeI64(rnew);
+}
+
+#elif defined(JS_CODEGEN_X86)
+
+template <typename RegIndexType>
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd,
+                    ScratchAtomicNoHeapReg& scratch);
+
+// Memory32: For cmpxchg8b, the expected value and the result are both in
+// edx:eax, and the replacement value is in ecx:ebx.  But we can't allocate ebx
+// initially because we need it later for a scratch, so instead we allocate a
+// temp to hold the low word of 'new'.
+
+template <>
+void PopAndAllocate<RegI32>(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
+                            RegI64* rd) {
+  bc->needI64(bc->specific_.edx_eax);
+  bc->needI32(bc->specific_.ecx);
+  RegI32 tmp = bc->needI32();
+  *rnew = bc->popI64ToSpecific(RegI64(Register64(bc->specific_.ecx, tmp)));
+  *rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax);
+  *rd = *rexpect;
+}
+
+template <>
+void Perform<RegI32>(BaseCompiler* bc, const MemoryAccessDesc& access,
+                     Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd,
+                     ScratchAtomicNoHeapReg& scratch) {
+  MOZ_ASSERT(Register(scratch) == js::jit::ebx);
+  MOZ_ASSERT(rnew.high == bc->specific_.ecx);
+  bc->masm.move32(rnew.low, ebx);
+  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect,
+                                 bc->specific_.ecx_ebx, rd);
+}
+
+template <>
+void Deallocate<RegI32>(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
+  bc->freeI64(rnew);
+}
+
+// Memory64: Register allocation is particularly hairy here.  With memory64, we
+// have up to seven live values: i64 expected-value, i64 new-value, i64 pointer,
+// and instance.  The instance can use the scratch but there's no avoiding that
+// we'll run out of registers.
+//
+// Unlike for the rmw ops, we can't use edx as the rnew.low since it's used
+// for the rexpect.high.  And we can't push anything onto the stack while we're
+// popping the memory address because the memory address may be on the stack.
+
+#  ifdef ENABLE_WASM_MEMORY64
+template <>
+void PopAndAllocate<RegI64>(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
+                            RegI64* rd) {
+  // We reserve these (and ebx).  The 64-bit pointer will end up in esi+edi.
+  bc->needI32(bc->specific_.eax);
+  bc->needI32(bc->specific_.ecx);
+  bc->needI32(bc->specific_.edx);
+
+  // Pop the 'new' value and stash it in the instance scratch area.  Do not
+  // initialize *rnew to anything.
+  RegI64 tmp(Register64(bc->specific_.ecx, bc->specific_.edx));
+  bc->popI64ToSpecific(tmp);
+  {
+    ScratchPtr instanceScratch(*bc);
+    bc->stashI64(instanceScratch, tmp);
+  }
+
+  *rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax);
+  *rd = *rexpect;
+}
+
+template <>
+void Perform<RegI64>(BaseCompiler* bc, const MemoryAccessDesc& access,
+                     Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd,
+                     ScratchAtomicNoHeapReg& scratch) {
+  MOZ_ASSERT(rnew.isInvalid());
+  rnew = bc->specific_.ecx_ebx;
+
+  bc->unstashI64(RegPtr(Register(bc->specific_.ecx)), rnew);
+  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
+}
+
+template <>
+void Deallocate<RegI64>(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
+  // edx:ebx have been pushed as the result, and the pointer was freed
+  // separately in the caller, so just free ecx.
+  bc->free(bc->specific_.ecx);
+}
+#  endif
+
+#elif defined(JS_CODEGEN_ARM)
+
+template <typename RegIndexType>
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
+                           RegI64* rd) {
+  // The replacement value and the result must both be odd/even pairs.
+  *rnew = bc->popI64Pair();
+  *rexpect = bc->popI64();
+  *rd = bc->needI64Pair();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {
+  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
+}
+
+template <typename RegIndexType>
+static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
+  bc->freeI64(rexpect);
+  bc->freeI64(rnew);
+}
+
+#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
+    defined(JS_CODEGEN_LOONG64)
+
+template <typename RegIndexType>
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
+                           RegI64* rd) {
+  *rnew = bc->popI64();
+  *rexpect = bc->popI64();
+  *rd = bc->needI64();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {
+  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
+}
+
+template <typename RegIndexType>
+static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
+  bc->freeI64(rexpect);
+  bc->freeI64(rnew);
+}
+
+#elif defined(JS_CODEGEN_RISCV64)
+
+template <typename RegIndexType>
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
+                           RegI64* rd) {
+  *rnew = bc->popI64();
+  *rexpect = bc->popI64();
+  *rd = bc->needI64();
+}
+
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {
+  bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd);
+}
+
+template <typename RegIndexType>
+static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
+  bc->freeI64(rexpect);
+  bc->freeI64(rnew);
+}
+
+#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
+
+template <typename RegIndexType>
+static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
+                           RegI64* rd) {}
+static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access,
+                    Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) {}
+template <typename RegIndexType>
+static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {}
+
+#endif
+
+}  // namespace atomic_cmpxchg64
+
+template <typename RegIndexType>
+void BaseCompiler::atomicCmpXchg64(MemoryAccessDesc* access, ValType type) {
+  RegI64 rexpect, rnew, rd;
+  atomic_cmpxchg64::PopAndAllocate<RegIndexType>(this, &rexpect, &rnew, &rd);
+
+  AccessCheck check;
+  RegIndexType rp = popMemoryAccess<RegIndexType>(access, &check);
+
+#ifdef WASM_HAS_HEAPREG
+  RegPtr instance = maybeLoadInstanceForAccess(access, check);
+  auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  atomic_cmpxchg64::Perform(this, *access, memaddr, rexpect, rnew, rd);
+#  ifndef RABALDR_PIN_INSTANCE
+  maybeFree(instance);
+#  endif
+#else
+  ScratchAtomicNoHeapReg scratch(*this);
+  RegPtr instance =
+      maybeLoadInstanceForAccess(access, check, RegIntptrToRegPtr(scratch));
+  Address memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp);
+  atomic_cmpxchg64::Perform<RegIndexType>(this, *access, memaddr, rexpect, rnew,
+                                          rd, scratch);
+  MOZ_ASSERT(instance == scratch);
+#endif
+
+  free(rp);
+  atomic_cmpxchg64::Deallocate<RegIndexType>(this, rexpect, rnew);
+
+  pushI64(rd);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Synchronization.
+
+bool BaseCompiler::atomicWait(ValType type, MemoryAccessDesc* access) {
+  switch (type.kind()) {
+    case ValType::I32: {
+      RegI64 timeout = popI64();
+      RegI32 val = popI32();
+
+      if (isMem32(access->memoryIndex())) {
+        computeEffectiveAddress<RegI32>(access);
+      } else {
+#ifdef ENABLE_WASM_MEMORY64
+        computeEffectiveAddress<RegI64>(access);
+#else
+        MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+      }
+
+      pushI32(val);
+      pushI64(timeout);
+      pushI32(access->memoryIndex());
+
+      if (!emitInstanceCall(isMem32(access->memoryIndex()) ? SASigWaitI32M32
+                                                           : SASigWaitI32M64)) {
+        return false;
+      }
+      break;
+    }
+    case ValType::I64: {
+      RegI64 timeout = popI64();
+      RegI64 val = popI64();
+
+      if (isMem32(access->memoryIndex())) {
+        computeEffectiveAddress<RegI32>(access);
+      } else {
+#ifdef ENABLE_WASM_MEMORY64
+#  ifdef JS_CODEGEN_X86
+        {
+          ScratchPtr scratch(*this);
+          stashI64(scratch, val);
+          freeI64(val);
+        }
+#  endif
+        computeEffectiveAddress<RegI64>(access);
+#  ifdef JS_CODEGEN_X86
+        {
+          ScratchPtr scratch(*this);
+          val = needI64();
+          unstashI64(scratch, val);
+        }
+#  endif
+#else
+        MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+      }
+
+      pushI64(val);
+      pushI64(timeout);
+      pushI32(access->memoryIndex());
+
+      if (!emitInstanceCall(isMem32(access->memoryIndex()) ? SASigWaitI64M32
+                                                           : SASigWaitI64M64)) {
+        return false;
+      }
+      break;
+    }
+    default:
+      MOZ_CRASH();
+  }
+
+  return true;
+}
+
+bool BaseCompiler::atomicWake(MemoryAccessDesc* access) {
+  RegI32 count = popI32();
+
+  if (isMem32(access->memoryIndex())) {
+    computeEffectiveAddress<RegI32>(access);
+    RegI32 byteOffset = popI32();
+    pushI32(byteOffset);
+  } else {
+#ifdef ENABLE_WASM_MEMORY64
+    computeEffectiveAddress<RegI64>(access);
+    RegI64 byteOffset = popI64();
+    pushI64(byteOffset);
+#else
+    MOZ_CRASH("Memory64 not enabled / supported on this platform");
+#endif
+  }
+
+  pushI32(count);
+  pushI32(access->memoryIndex());
+  return emitInstanceCall(isMem32(access->memoryIndex()) ? SASigWakeM32
+                                                         : SASigWakeM64);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Bulk memory.
+
+void BaseCompiler::memCopyInlineM32() {
+  MOZ_ASSERT(MaxInlineMemoryCopyLength != 0);
+
+  // This function assumes a memory index of zero
+  uint32_t memoryIndex = 0;
+  int32_t signedLength;
+  MOZ_ALWAYS_TRUE(popConst(&signedLength));
+  uint32_t length = signedLength;
+  MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryCopyLength);
+
+  RegI32 src = popI32();
+  RegI32 dest = popI32();
+
+  // Compute the number of copies of each width we will need to do
+  size_t remainder = length;
+#ifdef ENABLE_WASM_SIMD
+  size_t numCopies16 = 0;
+  if (MacroAssembler::SupportsFastUnalignedFPAccesses()) {
+    numCopies16 = remainder / sizeof(V128);
+    remainder %= sizeof(V128);
+  }
+#endif
+#ifdef JS_64BIT
+  size_t numCopies8 = remainder / sizeof(uint64_t);
+  remainder %= sizeof(uint64_t);
+#endif
+  size_t numCopies4 = remainder / sizeof(uint32_t);
+  remainder %= sizeof(uint32_t);
+  size_t numCopies2 = remainder / sizeof(uint16_t);
+  remainder %= sizeof(uint16_t);
+  size_t numCopies1 = remainder;
+
+  // Load all source bytes onto the value stack from low to high using the
+  // widest transfer width we can for the system. We will trap without writing
+  // anything if any source byte is out-of-bounds.
+  bool omitBoundsCheck = false;
+  size_t offset = 0;
+
+#ifdef ENABLE_WASM_SIMD
+  for (uint32_t i = 0; i < numCopies16; i++) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Simd128, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    loadCommon(&access, check, ValType::V128);
+
+    offset += sizeof(V128);
+    omitBoundsCheck = true;
+  }
+#endif
+
+#ifdef JS_64BIT
+  for (uint32_t i = 0; i < numCopies8; i++) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Int64, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    loadCommon(&access, check, ValType::I64);
+
+    offset += sizeof(uint64_t);
+    omitBoundsCheck = true;
+  }
+#endif
+
+  for (uint32_t i = 0; i < numCopies4; i++) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint32, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    loadCommon(&access, check, ValType::I32);
+
+    offset += sizeof(uint32_t);
+    omitBoundsCheck = true;
+  }
+
+  if (numCopies2) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint16, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    loadCommon(&access, check, ValType::I32);
+
+    offset += sizeof(uint16_t);
+    omitBoundsCheck = true;
+  }
+
+  if (numCopies1) {
+    RegI32 temp = needI32();
+    moveI32(src, temp);
+    pushI32(temp);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint8, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    loadCommon(&access, check, ValType::I32);
+  }
+
+  // Store all source bytes from the value stack to the destination from
+  // high to low. We will trap without writing anything on the first store
+  // if any dest byte is out-of-bounds.
+  offset = length;
+  omitBoundsCheck = false;
+
+  if (numCopies1) {
+    offset -= sizeof(uint8_t);
+
+    RegI32 value = popI32();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(value);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint8, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    storeCommon(&access, check, ValType::I32);
+
+    omitBoundsCheck = true;
+  }
+
+  if (numCopies2) {
+    offset -= sizeof(uint16_t);
+
+    RegI32 value = popI32();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(value);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint16, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    storeCommon(&access, check, ValType::I32);
+
+    omitBoundsCheck = true;
+  }
+
+  for (uint32_t i = 0; i < numCopies4; i++) {
+    offset -= sizeof(uint32_t);
+
+    RegI32 value = popI32();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(value);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint32, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    storeCommon(&access, check, ValType::I32);
+
+    omitBoundsCheck = true;
+  }
+
+#ifdef JS_64BIT
+  for (uint32_t i = 0; i < numCopies8; i++) {
+    offset -= sizeof(uint64_t);
+
+    RegI64 value = popI64();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI64(value);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Int64, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    storeCommon(&access, check, ValType::I64);
+
+    omitBoundsCheck = true;
+  }
+#endif
+
+#ifdef ENABLE_WASM_SIMD
+  for (uint32_t i = 0; i < numCopies16; i++) {
+    offset -= sizeof(V128);
+
+    RegV128 value = popV128();
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushV128(value);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Simd128, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    storeCommon(&access, check, ValType::V128);
+
+    omitBoundsCheck = true;
+  }
+#endif
+
+  freeI32(dest);
+  freeI32(src);
+}
+
+void BaseCompiler::memFillInlineM32() {
+  MOZ_ASSERT(MaxInlineMemoryFillLength != 0);
+
+  // This function assumes a memory index of zero
+  uint32_t memoryIndex = 0;
+  int32_t signedLength;
+  int32_t signedValue;
+  MOZ_ALWAYS_TRUE(popConst(&signedLength));
+  MOZ_ALWAYS_TRUE(popConst(&signedValue));
+  uint32_t length = uint32_t(signedLength);
+  uint32_t value = uint32_t(signedValue);
+  MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryFillLength);
+
+  RegI32 dest = popI32();
+
+  // Compute the number of copies of each width we will need to do
+  size_t remainder = length;
+#ifdef ENABLE_WASM_SIMD
+  size_t numCopies16 = 0;
+  if (MacroAssembler::SupportsFastUnalignedFPAccesses()) {
+    numCopies16 = remainder / sizeof(V128);
+    remainder %= sizeof(V128);
+  }
+#endif
+#ifdef JS_64BIT
+  size_t numCopies8 = remainder / sizeof(uint64_t);
+  remainder %= sizeof(uint64_t);
+#endif
+  size_t numCopies4 = remainder / sizeof(uint32_t);
+  remainder %= sizeof(uint32_t);
+  size_t numCopies2 = remainder / sizeof(uint16_t);
+  remainder %= sizeof(uint16_t);
+  size_t numCopies1 = remainder;
+
+  MOZ_ASSERT(numCopies2 <= 1 && numCopies1 <= 1);
+
+  // Generate splatted definitions for wider fills as needed
+#ifdef ENABLE_WASM_SIMD
+  V128 val16(value);
+#endif
+#ifdef JS_64BIT
+  uint64_t val8 = SplatByteToUInt<uint64_t>(value, 8);
+#endif
+  uint32_t val4 = SplatByteToUInt<uint32_t>(value, 4);
+  uint32_t val2 = SplatByteToUInt<uint32_t>(value, 2);
+  uint32_t val1 = value;
+
+  // Store the fill value to the destination from high to low. We will trap
+  // without writing anything on the first store if any dest byte is
+  // out-of-bounds.
+  size_t offset = length;
+  bool omitBoundsCheck = false;
+
+  if (numCopies1) {
+    offset -= sizeof(uint8_t);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(val1);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint8, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    storeCommon(&access, check, ValType::I32);
+
+    omitBoundsCheck = true;
+  }
+
+  if (numCopies2) {
+    offset -= sizeof(uint16_t);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(val2);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint16, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    storeCommon(&access, check, ValType::I32);
+
+    omitBoundsCheck = true;
+  }
+
+  for (uint32_t i = 0; i < numCopies4; i++) {
+    offset -= sizeof(uint32_t);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI32(val4);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Uint32, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    storeCommon(&access, check, ValType::I32);
+
+    omitBoundsCheck = true;
+  }
+
+#ifdef JS_64BIT
+  for (uint32_t i = 0; i < numCopies8; i++) {
+    offset -= sizeof(uint64_t);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushI64(val8);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Int64, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    storeCommon(&access, check, ValType::I64);
+
+    omitBoundsCheck = true;
+  }
+#endif
+
+#ifdef ENABLE_WASM_SIMD
+  for (uint32_t i = 0; i < numCopies16; i++) {
+    offset -= sizeof(V128);
+
+    RegI32 temp = needI32();
+    moveI32(dest, temp);
+    pushI32(temp);
+    pushV128(val16);
+
+    MemoryAccessDesc access(memoryIndex, Scalar::Simd128, 1, offset,
+                            bytecodeOffset(), hugeMemoryEnabled(memoryIndex));
+    AccessCheck check;
+    check.omitBoundsCheck = omitBoundsCheck;
+    storeCommon(&access, check, ValType::V128);
+
+    omitBoundsCheck = true;
+  }
+#endif
+
+  freeI32(dest);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// SIMD and Relaxed SIMD.
+
+#ifdef ENABLE_WASM_SIMD
+void BaseCompiler::loadSplat(MemoryAccessDesc* access) {
+  // We can implement loadSplat mostly as load + splat because the push of the
+  // result onto the value stack in loadCommon normally will not generate any
+  // code, it will leave the value in a register which we will consume.
+
+  // We use uint types when we can on the general assumption that unsigned loads
+  // might be smaller/faster on some platforms, because no sign extension needs
+  // to be done after the sub-register load.
+  RegV128 rd = needV128();
+  switch (access->type()) {
+    case Scalar::Uint8: {
+      loadCommon(access, AccessCheck(), ValType::I32);
+      RegI32 rs = popI32();
+      masm.splatX16(rs, rd);
+      free(rs);
+      break;
+    }
+    case Scalar::Uint16: {
+      loadCommon(access, AccessCheck(), ValType::I32);
+      RegI32 rs = popI32();
+      masm.splatX8(rs, rd);
+      free(rs);
+      break;
+    }
+    case Scalar::Uint32: {
+      loadCommon(access, AccessCheck(), ValType::I32);
+      RegI32 rs = popI32();
+      masm.splatX4(rs, rd);
+      free(rs);
+      break;
+    }
+    case Scalar::Int64: {
+      loadCommon(access, AccessCheck(), ValType::I64);
+      RegI64 rs = popI64();
+      masm.splatX2(rs, rd);
+      free(rs);
+      break;
+    }
+    default:
+      MOZ_CRASH();
+  }
+  pushV128(rd);
+}
+
+void BaseCompiler::loadZero(MemoryAccessDesc* access) {
+  access->setZeroExtendSimd128Load();
+  loadCommon(access, AccessCheck(), ValType::V128);
+}
+
+void BaseCompiler::loadExtend(MemoryAccessDesc* access, Scalar::Type viewType) {
+  loadCommon(access, AccessCheck(), ValType::I64);
+
+  RegI64 rs = popI64();
+  RegV128 rd = needV128();
+  masm.moveGPR64ToDouble(rs, rd);
+  switch (viewType) {
+    case Scalar::Int8:
+      masm.widenLowInt8x16(rd, rd);
+      break;
+    case Scalar::Uint8:
+      masm.unsignedWidenLowInt8x16(rd, rd);
+      break;
+    case Scalar::Int16:
+      masm.widenLowInt16x8(rd, rd);
+      break;
+    case Scalar::Uint16:
+      masm.unsignedWidenLowInt16x8(rd, rd);
+      break;
+    case Scalar::Int32:
+      masm.widenLowInt32x4(rd, rd);
+      break;
+    case Scalar::Uint32:
+      masm.unsignedWidenLowInt32x4(rd, rd);
+      break;
+    default:
+      MOZ_CRASH();
+  }
+  freeI64(rs);
+  pushV128(rd);
+}
+
+void BaseCompiler::loadLane(MemoryAccessDesc* access, uint32_t laneIndex) {
+  ValType type = access->type() == Scalar::Int64 ? ValType::I64 : ValType::I32;
+
+  RegV128 rsd = popV128();
+  loadCommon(access, AccessCheck(), type);
+
+  if (type == ValType::I32) {
+    RegI32 rs = popI32();
+    switch (access->type()) {
+      case Scalar::Uint8:
+        masm.replaceLaneInt8x16(laneIndex, rs, rsd);
+        break;
+      case Scalar::Uint16:
+        masm.replaceLaneInt16x8(laneIndex, rs, rsd);
+        break;
+      case Scalar::Int32:
+        masm.replaceLaneInt32x4(laneIndex, rs, rsd);
+        break;
+      default:
+        MOZ_CRASH("unsupported access type");
+    }
+    freeI32(rs);
+  } else {
+    MOZ_ASSERT(type == ValType::I64);
+    RegI64 rs = popI64();
+    masm.replaceLaneInt64x2(laneIndex, rs, rsd);
+    freeI64(rs);
+  }
+
+  pushV128(rsd);
+}
+
+void BaseCompiler::storeLane(MemoryAccessDesc* access, uint32_t laneIndex) {
+  ValType type = access->type() == Scalar::Int64 ? ValType::I64 : ValType::I32;
+
+  RegV128 rs = popV128();
+  if (type == ValType::I32) {
+    RegI32 tmp = needI32();
+    switch (access->type()) {
+      case Scalar::Uint8:
+        masm.extractLaneInt8x16(laneIndex, rs, tmp);
+        break;
+      case Scalar::Uint16:
+        masm.extractLaneInt16x8(laneIndex, rs, tmp);
+        break;
+      case Scalar::Int32:
+        masm.extractLaneInt32x4(laneIndex, rs, tmp);
+        break;
+      default:
+        MOZ_CRASH("unsupported laneSize");
+    }
+    pushI32(tmp);
+  } else {
+    MOZ_ASSERT(type == ValType::I64);
+    RegI64 tmp = needI64();
+    masm.extractLaneInt64x2(laneIndex, rs, tmp);
+    pushI64(tmp);
+  }
+  freeV128(rs);
+
+  storeCommon(access, AccessCheck(), type);
+}
+#endif  // ENABLE_WASM_SIMD
+
+}  // namespace wasm
+}  // namespace js
-- 
cgit v1.2.3