summaryrefslogtreecommitdiffstats
path: root/js/src/wasm/WasmBaselineCompile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/wasm/WasmBaselineCompile.cpp')
-rw-r--r--js/src/wasm/WasmBaselineCompile.cpp15908
1 files changed, 15908 insertions, 0 deletions
diff --git a/js/src/wasm/WasmBaselineCompile.cpp b/js/src/wasm/WasmBaselineCompile.cpp
new file mode 100644
index 0000000000..a22a07b944
--- /dev/null
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -0,0 +1,15908 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ *
+ * Copyright 2016 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * [SMDOC] WebAssembly baseline compiler (RabaldrMonkey)
+ *
+ * General assumptions for 32-bit vs 64-bit code:
+ *
+ * - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
+ * systems.
+ *
+ * - Code that knows that Register64 has a '.reg' member on 64-bit systems and
+ * '.high' and '.low' members on 32-bit systems, or knows the implications
+ * thereof, is #ifdef JS_PUNBOX64. All other code is #if(n)?def JS_64BIT.
+ *
+ *
+ * Coding standards:
+ *
+ * - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
+ * and surrounding functions; most functions fall into this class) where the
+ * meaning is obvious:
+ *
+ * - if there is a single source + destination register, it is called 'r'
+ * - if there is one source and a different destination, they are called 'rs'
+ * and 'rd'
+ * - if there is one source + destination register and another source register
+ * they are called 'r' and 'rs'
+ * - if there are two source registers and a destination register they are
+ * called 'rs0', 'rs1', and 'rd'.
+ *
+ * - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
+ *
+ * - Registers can be named non-generically for their function ('rp' for the
+ * 'pointer' register and 'rv' for the 'value' register are typical) and those
+ * names may or may not have an 'r' prefix.
+ *
+ * - "Larger" code generating functions make their own rules.
+ *
+ *
+ * General status notes:
+ *
+ * "FIXME" indicates a known or suspected bug. Always has a bug#.
+ *
+ * "TODO" indicates an opportunity for a general improvement, with an additional
+ * tag to indicate the area of improvement. Usually has a bug#.
+ *
+ * There are lots of machine dependencies here but they are pretty well isolated
+ * to a segment of the compiler. Many dependencies will eventually be factored
+ * into the MacroAssembler layer and shared with other code generators.
+ *
+ *
+ * High-value compiler performance improvements:
+ *
+ * - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
+ * etc methods) can avoid syncing the value stack if the specific register is
+ * in use but there is a free register to shuffle the specific register into.
+ * (This will also improve the generated code.) The sync happens often enough
+ * here to show up in profiles, because it is triggered by integer multiply
+ * and divide.
+ *
+ *
+ * High-value code generation improvements:
+ *
+ * - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
+ * the stack and then jumps to the code for the target case. If no cleanup is
+ * needed we could just branch conditionally to the target; if the same amount
+ * of cleanup is needed for all cases then the cleanup can be done before the
+ * dispatch. Both are highly likely.
+ *
+ * - (Bug 1316806) Register management around calls: At the moment we sync the
+ * value stack unconditionally (this is simple) but there are probably many
+ * common cases where we could instead save/restore live caller-saves
+ * registers and perform parallel assignment into argument registers. This
+ * may be important if we keep some locals in registers.
+ *
+ * - (Bug 1316808) Allocate some locals to registers on machines where there are
+ * enough registers. This is probably hard to do well in a one-pass compiler
+ * but it might be that just keeping register arguments and the first few
+ * locals in registers is a viable strategy; another (more general) strategy
+ * is caching locals in registers in straight-line code. Such caching could
+ * also track constant values in registers, if that is deemed valuable. A
+ * combination of techniques may be desirable: parameters and the first few
+ * locals could be cached on entry to the function but not statically assigned
+ * to registers throughout.
+ *
+ * (On a large corpus of code it should be possible to compute, for every
+ * signature comprising the types of parameters and locals, and using a static
+ * weight for loops, a list in priority order of which parameters and locals
+ * that should be assigned to registers. Or something like that. Wasm makes
+ * this simple. Static assignments are desirable because they are not flushed
+ * to memory by the pre-block sync() call.)
+ */
+
+#include "wasm/WasmBaselineCompile.h"
+
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "jit/AtomicOp.h"
+#include "jit/IonTypes.h"
+#include "jit/JitAllocPolicy.h"
+#include "jit/Label.h"
+#include "jit/MIR.h"
+#include "jit/RegisterAllocator.h"
+#include "jit/Registers.h"
+#include "jit/RegisterSets.h"
+#if defined(JS_CODEGEN_ARM)
+# include "jit/arm/Assembler-arm.h"
+#endif
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+# include "jit/x86-shared/Architecture-x86-shared.h"
+# include "jit/x86-shared/Assembler-x86-shared.h"
+#endif
+#if defined(JS_CODEGEN_MIPS32)
+# include "jit/mips-shared/Assembler-mips-shared.h"
+# include "jit/mips32/Assembler-mips32.h"
+#endif
+#if defined(JS_CODEGEN_MIPS64)
+# include "jit/mips-shared/Assembler-mips-shared.h"
+# include "jit/mips64/Assembler-mips64.h"
+#endif
+#include "js/ScalarType.h" // js::Scalar::Type
+#include "util/Memory.h"
+#include "wasm/WasmGC.h"
+#include "wasm/WasmGenerator.h"
+#include "wasm/WasmInstance.h"
+#include "wasm/WasmOpIter.h"
+#include "wasm/WasmSignalHandlers.h"
+#include "wasm/WasmStubs.h"
+#include "wasm/WasmValidate.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using mozilla::DebugOnly;
+using mozilla::FloorLog2;
+using mozilla::IsPowerOfTwo;
+using mozilla::Maybe;
+
+namespace js {
+namespace wasm {
+
+using namespace js::jit;
+
+using HandleNaNSpecially = bool;
+using InvertBranch = bool;
+using IsKnownNotZero = bool;
+using IsUnsigned = bool;
+using NeedsBoundsCheck = bool;
+using WantResult = bool;
+using ZeroOnOverflow = bool;
+
+class BaseStackFrame;
+
+// Two flags, useABI and interModule, control how calls are made.
+//
+// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
+// except when InterModule::True is also set, when they are volatile.
+//
+// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
+// In this case, we require InterModule::False. The calling convention
+// is otherwise like UseABI::Wasm.
+//
+// UseABI::System implies that the Tls/Heap/Global registers are volatile.
+// Additionally, the parameter passing mechanism may be slightly different from
+// the UseABI::Wasm convention.
+//
+// When the Tls/Heap/Global registers are not volatile, the baseline compiler
+// will restore the Tls register from its save slot before the call, since the
+// baseline compiler uses the Tls register for other things.
+//
+// When those registers are volatile, the baseline compiler will reload them
+// after the call (it will restore the Tls register from the save slot and load
+// the other two from the Tls data).
+
+enum class UseABI { Wasm, Builtin, System };
+enum class InterModule { False = false, True = true };
+enum class RhsDestOp { True = true };
+
+#if defined(JS_CODEGEN_NONE)
+# define RABALDR_SCRATCH_I32
+# define RABALDR_SCRATCH_F32
+# define RABALDR_SCRATCH_F64
+
+static constexpr Register RabaldrScratchI32 = Register::Invalid();
+static constexpr FloatRegister RabaldrScratchF32 = InvalidFloatReg;
+static constexpr FloatRegister RabaldrScratchF64 = InvalidFloatReg;
+#endif
+
+#ifdef JS_CODEGEN_ARM64
+# define RABALDR_CHUNKY_STACK
+# define RABALDR_SIDEALLOC_V128
+# define RABALDR_SCRATCH_I32
+# define RABALDR_SCRATCH_F32
+# define RABALDR_SCRATCH_F64
+# define RABALDR_SCRATCH_V128
+# define RABALDR_SCRATCH_F32_ALIASES_F64
+
+static constexpr Register RabaldrScratchI32{Registers::x15};
+
+// Note, the float scratch regs cannot be registers that are used for parameter
+// passing in any ABI we use. Argregs tend to be low-numbered; register 30
+// should be safe.
+
+static constexpr FloatRegister RabaldrScratchF32{FloatRegisters::s30,
+ FloatRegisters::Single};
+static constexpr FloatRegister RabaldrScratchF64{FloatRegisters::d30,
+ FloatRegisters::Double};
+# ifdef ENABLE_WASM_SIMD
+static constexpr FloatRegister RabaldrScratchV128{FloatRegisters::d30,
+ FloatRegisters::Simd128};
+# endif
+
+static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
+static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
+# ifdef ENABLE_WASM_SIMD
+static_assert(RabaldrScratchV128 != ScratchSimd128Reg, "Too busy");
+# endif
+#endif
+
+#ifdef JS_CODEGEN_X86
+// The selection of EBX here steps gingerly around: the need for EDX
+// to be allocatable for multiply/divide; ECX to be allocatable for
+// shift/rotate; EAX (= ReturnReg) to be allocatable as the result
+// register; EBX not being one of the WasmTableCall registers; and
+// needing a temp register for load/store that has a single-byte
+// persona.
+//
+// The compiler assumes that RabaldrScratchI32 has a single-byte
+// persona. Code for 8-byte atomic operations assumes that
+// RabaldrScratchI32 is in fact ebx.
+
+# define RABALDR_SCRATCH_I32
+static constexpr Register RabaldrScratchI32 = ebx;
+
+# define RABALDR_INT_DIV_I64_CALLOUT
+#endif
+
+#ifdef JS_CODEGEN_ARM
+// We use our own scratch register, because the macro assembler uses
+// the regular scratch register(s) pretty liberally. We could
+// work around that in several cases but the mess does not seem
+// worth it yet. CallTempReg2 seems safe.
+
+# define RABALDR_SCRATCH_I32
+static constexpr Register RabaldrScratchI32 = CallTempReg2;
+
+# define RABALDR_INT_DIV_I64_CALLOUT
+# define RABALDR_I64_TO_FLOAT_CALLOUT
+# define RABALDR_FLOAT_TO_I64_CALLOUT
+#endif
+
+#ifdef JS_CODEGEN_MIPS32
+# define RABALDR_SCRATCH_I32
+static constexpr Register RabaldrScratchI32 = CallTempReg2;
+
+# define RABALDR_INT_DIV_I64_CALLOUT
+# define RABALDR_I64_TO_FLOAT_CALLOUT
+# define RABALDR_FLOAT_TO_I64_CALLOUT
+#endif
+
+#ifdef JS_CODEGEN_MIPS64
+# define RABALDR_SCRATCH_I32
+static constexpr Register RabaldrScratchI32 = CallTempReg2;
+#endif
+
+#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
+# if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
+# error "Bad configuration"
+# endif
+#endif
+
+template <MIRType t>
+struct RegTypeOf {
+#ifdef ENABLE_WASM_SIMD
+ static_assert(t == MIRType::Float32 || t == MIRType::Double ||
+ t == MIRType::Simd128,
+ "Float mask type");
+#else
+ static_assert(t == MIRType::Float32 || t == MIRType::Double,
+ "Float mask type");
+#endif
+};
+
+template <>
+struct RegTypeOf<MIRType::Float32> {
+ static constexpr RegTypeName value = RegTypeName::Float32;
+};
+template <>
+struct RegTypeOf<MIRType::Double> {
+ static constexpr RegTypeName value = RegTypeName::Float64;
+};
+#ifdef ENABLE_WASM_SIMD
+template <>
+struct RegTypeOf<MIRType::Simd128> {
+ static constexpr RegTypeName value = RegTypeName::Vector128;
+};
+#endif
+
+// The strongly typed register wrappers are especially useful to distinguish
+// float registers from double registers, but they also clearly distinguish
+// 32-bit registers from 64-bit register pairs on 32-bit systems.
+
+struct RegI32 : public Register {
+ RegI32() : Register(Register::Invalid()) {}
+ explicit RegI32(Register reg) : Register(reg) {
+ MOZ_ASSERT(reg != Invalid());
+ }
+ bool isInvalid() const { return *this == Invalid(); }
+ bool isValid() const { return !isInvalid(); }
+ static RegI32 Invalid() { return RegI32(); }
+};
+
+struct RegI64 : public Register64 {
+ RegI64() : Register64(Register64::Invalid()) {}
+ explicit RegI64(Register64 reg) : Register64(reg) {
+ MOZ_ASSERT(reg != Invalid());
+ }
+ bool isInvalid() const { return *this == Invalid(); }
+ bool isValid() const { return !isInvalid(); }
+ static RegI64 Invalid() { return RegI64(); }
+};
+
+struct RegPtr : public Register {
+ RegPtr() : Register(Register::Invalid()) {}
+ explicit RegPtr(Register reg) : Register(reg) {
+ MOZ_ASSERT(reg != Invalid());
+ }
+ bool isInvalid() const { return *this == Invalid(); }
+ bool isValid() const { return !isInvalid(); }
+ static RegPtr Invalid() { return RegPtr(); }
+};
+
+struct RegF32 : public FloatRegister {
+ RegF32() : FloatRegister() {}
+ explicit RegF32(FloatRegister reg) : FloatRegister(reg) {
+ MOZ_ASSERT(isSingle());
+ }
+ bool isValid() const { return !isInvalid(); }
+ static RegF32 Invalid() { return RegF32(); }
+};
+
+struct RegF64 : public FloatRegister {
+ RegF64() : FloatRegister() {}
+ explicit RegF64(FloatRegister reg) : FloatRegister(reg) {
+ MOZ_ASSERT(isDouble());
+ }
+ bool isValid() const { return !isInvalid(); }
+ static RegF64 Invalid() { return RegF64(); }
+};
+
+#ifdef ENABLE_WASM_SIMD
+# ifdef RABALDR_SIDEALLOC_V128
+class RegV128 {
+ // fpr_ is either invalid or a double that aliases the simd register, see
+ // comments below at BaseRegAlloc.
+ FloatRegister fpr_;
+
+ public:
+ RegV128() : fpr_(FloatRegister()) {}
+ explicit RegV128(FloatRegister reg)
+ : fpr_(FloatRegister(reg.encoding(), FloatRegisters::Double)) {
+ MOZ_ASSERT(reg.isSimd128());
+ }
+ static RegV128 fromDouble(FloatRegister reg) {
+ MOZ_ASSERT(reg.isDouble());
+ return RegV128(FloatRegister(reg.encoding(), FloatRegisters::Simd128));
+ }
+ FloatRegister asDouble() const { return fpr_; }
+ bool isInvalid() const { return fpr_.isInvalid(); }
+ bool isValid() const { return !isInvalid(); }
+ static RegV128 Invalid() { return RegV128(); }
+
+ operator FloatRegister() const {
+ return FloatRegister(fpr_.encoding(), FloatRegisters::Simd128);
+ }
+
+ bool operator==(const RegV128& that) const {
+ return asDouble() == that.asDouble();
+ }
+
+ bool operator!=(const RegV128& that) const {
+ return asDouble() != that.asDouble();
+ }
+};
+# else
+struct RegV128 : public FloatRegister {
+ RegV128() : FloatRegister() {}
+ explicit RegV128(FloatRegister reg) : FloatRegister(reg) {
+ MOZ_ASSERT(isSimd128());
+ }
+ bool isValid() const { return !isInvalid(); }
+ static RegV128 Invalid() { return RegV128(); }
+};
+# endif
+#endif
+
+struct AnyReg {
+ union {
+ RegI32 i32_;
+ RegI64 i64_;
+ RegPtr ref_;
+ RegF32 f32_;
+ RegF64 f64_;
+#ifdef ENABLE_WASM_SIMD
+ RegV128 v128_;
+#endif
+ };
+
+ enum {
+ I32,
+ I64,
+ REF,
+ F32,
+ F64,
+#ifdef ENABLE_WASM_SIMD
+ V128
+#endif
+ } tag;
+
+ explicit AnyReg(RegI32 r) {
+ tag = I32;
+ i32_ = r;
+ }
+ explicit AnyReg(RegI64 r) {
+ tag = I64;
+ i64_ = r;
+ }
+ explicit AnyReg(RegF32 r) {
+ tag = F32;
+ f32_ = r;
+ }
+ explicit AnyReg(RegF64 r) {
+ tag = F64;
+ f64_ = r;
+ }
+#ifdef ENABLE_WASM_SIMD
+ explicit AnyReg(RegV128 r) {
+ tag = V128;
+ v128_ = r;
+ }
+#endif
+ explicit AnyReg(RegPtr r) {
+ tag = REF;
+ ref_ = r;
+ }
+
+ RegI32 i32() const {
+ MOZ_ASSERT(tag == I32);
+ return i32_;
+ }
+ RegI64 i64() const {
+ MOZ_ASSERT(tag == I64);
+ return i64_;
+ }
+ RegF32 f32() const {
+ MOZ_ASSERT(tag == F32);
+ return f32_;
+ }
+ RegF64 f64() const {
+ MOZ_ASSERT(tag == F64);
+ return f64_;
+ }
+#ifdef ENABLE_WASM_SIMD
+ RegV128 v128() const {
+ MOZ_ASSERT(tag == V128);
+ return v128_;
+ }
+#endif
+ RegPtr ref() const {
+ MOZ_ASSERT(tag == REF);
+ return ref_;
+ }
+
+ AnyRegister any() const {
+ switch (tag) {
+ case F32:
+ return AnyRegister(f32_);
+ case F64:
+ return AnyRegister(f64_);
+#ifdef ENABLE_WASM_SIMD
+ case V128:
+ return AnyRegister(v128_);
+#endif
+ case I32:
+ return AnyRegister(i32_);
+ case I64:
+#ifdef JS_PUNBOX64
+ return AnyRegister(i64_.reg);
+#else
+ // The compiler is written so that this is never needed: any() is
+ // called on arbitrary registers for asm.js but asm.js does not have
+ // 64-bit ints. For wasm, any() is called on arbitrary registers
+ // only on 64-bit platforms.
+ MOZ_CRASH("AnyReg::any() on 32-bit platform");
+#endif
+ case REF:
+ MOZ_CRASH("AnyReg::any() not implemented for ref types");
+ default:
+ MOZ_CRASH();
+ }
+ // Work around GCC 5 analysis/warning bug.
+ MOZ_CRASH("AnyReg::any(): impossible case");
+ }
+};
+
+// Platform-specific registers.
+//
+// All platforms must define struct SpecificRegs. All 32-bit platforms must
+// have an abiReturnRegI64 member in that struct.
+
+#if defined(JS_CODEGEN_X64)
+struct SpecificRegs {
+ RegI32 eax, ecx, edx, edi, esi;
+ RegI64 rax, rcx, rdx;
+
+ SpecificRegs()
+ : eax(RegI32(js::jit::eax)),
+ ecx(RegI32(js::jit::ecx)),
+ edx(RegI32(js::jit::edx)),
+ edi(RegI32(js::jit::edi)),
+ esi(RegI32(js::jit::esi)),
+ rax(RegI64(Register64(js::jit::rax))),
+ rcx(RegI64(Register64(js::jit::rcx))),
+ rdx(RegI64(Register64(js::jit::rdx))) {}
+};
+#elif defined(JS_CODEGEN_X86)
+struct SpecificRegs {
+ RegI32 eax, ecx, edx, edi, esi;
+ RegI64 ecx_ebx, edx_eax, abiReturnRegI64;
+
+ SpecificRegs()
+ : eax(RegI32(js::jit::eax)),
+ ecx(RegI32(js::jit::ecx)),
+ edx(RegI32(js::jit::edx)),
+ edi(RegI32(js::jit::edi)),
+ esi(RegI32(js::jit::esi)),
+ ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
+ edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
+ abiReturnRegI64(edx_eax) {}
+};
+#elif defined(JS_CODEGEN_ARM)
+struct SpecificRegs {
+ RegI64 abiReturnRegI64;
+
+ SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
+};
+#elif defined(JS_CODEGEN_ARM64)
+struct SpecificRegs {};
+#elif defined(JS_CODEGEN_MIPS32)
+struct SpecificRegs {
+ RegI64 abiReturnRegI64;
+
+ SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
+};
+#elif defined(JS_CODEGEN_MIPS64)
+struct SpecificRegs {};
+#else
+struct SpecificRegs {
+# ifndef JS_64BIT
+ RegI64 abiReturnRegI64;
+# endif
+
+ SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
+};
+#endif
+
+class BaseCompilerInterface {
+ public:
+ // Spill all spillable registers.
+ //
+ // TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
+ // spilling only enough registers to satisfy current needs.
+ virtual void sync() = 0;
+ virtual void saveTempPtr(RegPtr r) = 0;
+ virtual void restoreTempPtr(RegPtr r) = 0;
+};
+
+// Register allocator.
+
+class BaseRegAlloc {
+ // Notes on float register allocation.
+ //
+ // The general rule in SpiderMonkey is that float registers can alias double
+ // registers, but there are predicates to handle exceptions to that rule:
+ // hasUnaliasedDouble() and hasMultiAlias(). The way aliasing actually
+ // works is platform dependent and exposed through the aliased(n, &r)
+ // predicate, etc.
+ //
+ // - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
+ // cannot be treated as float.
+ // - hasMultiAlias(): on ARM and MIPS a double register aliases two float
+ // registers.
+ //
+ // On some platforms (x86, x64, ARM64) but not all (ARM)
+ // ScratchFloat32Register is the same as ScratchDoubleRegister.
+ //
+ // It's a basic invariant of the AllocatableRegisterSet that it deals
+ // properly with aliasing of registers: if s0 or s1 are allocated then d0 is
+ // not allocatable; if s0 and s1 are freed individually then d0 becomes
+ // allocatable.
+ //
+ // On platforms with RABALDR_SIDEALLOC_V128, the register set does not
+ // represent SIMD registers. Instead, we allocate and free these registers as
+ // doubles and change the kind to Simd128 while the register is exposed to
+ // masm. (This is the case on ARM64 for now, and is a consequence of needing
+ // more than 64 bits for FloatRegisters::SetType to represent SIMD registers.
+ // See lengty comment in Architecture-arm64.h.)
+
+ BaseCompilerInterface* bc;
+ AllocatableGeneralRegisterSet availGPR;
+ AllocatableFloatRegisterSet availFPU;
+#ifdef DEBUG
+ // The registers available after removing ScratchReg, HeapReg, etc.
+ AllocatableGeneralRegisterSet allGPR;
+ AllocatableFloatRegisterSet allFPU;
+ uint32_t scratchTaken;
+#endif
+#ifdef JS_CODEGEN_X86
+ AllocatableGeneralRegisterSet singleByteRegs;
+#endif
+
+ bool hasGPR() { return !availGPR.empty(); }
+
+ bool hasGPR64() {
+#ifdef JS_PUNBOX64
+ return !availGPR.empty();
+#else
+ if (availGPR.empty()) {
+ return false;
+ }
+ Register r = allocGPR();
+ bool available = !availGPR.empty();
+ freeGPR(r);
+ return available;
+#endif
+ }
+
+ template <MIRType t>
+ bool hasFPU() {
+#ifdef RABALDR_SIDEALLOC_V128
+ // Workaround for GCC problem, bug 1677690
+ if constexpr (t == MIRType::Simd128) {
+ MOZ_CRASH("Should not happen");
+ } else
+#endif
+ {
+ return availFPU.hasAny<RegTypeOf<t>::value>();
+ }
+ }
+
+ bool isAvailableGPR(Register r) { return availGPR.has(r); }
+
+ bool isAvailableFPU(FloatRegister r) {
+#ifdef RABALDR_SIDEALLOC_V128
+ MOZ_ASSERT(!r.isSimd128());
+#endif
+ return availFPU.has(r);
+ }
+
+ void allocGPR(Register r) {
+ MOZ_ASSERT(isAvailableGPR(r));
+ availGPR.take(r);
+ }
+
+ Register allocGPR() {
+ MOZ_ASSERT(hasGPR());
+ return availGPR.takeAny();
+ }
+
+ void allocInt64(Register64 r) {
+#ifdef JS_PUNBOX64
+ allocGPR(r.reg);
+#else
+ allocGPR(r.low);
+ allocGPR(r.high);
+#endif
+ }
+
+ Register64 allocInt64() {
+ MOZ_ASSERT(hasGPR64());
+#ifdef JS_PUNBOX64
+ return Register64(availGPR.takeAny());
+#else
+ Register high = availGPR.takeAny();
+ Register low = availGPR.takeAny();
+ return Register64(high, low);
+#endif
+ }
+
+#ifdef JS_CODEGEN_ARM
+ // r12 is normally the ScratchRegister and r13 is always the stack pointer,
+ // so the highest possible pair has r10 as the even-numbered register.
+
+ static constexpr uint32_t PAIR_LIMIT = 10;
+
+ bool hasGPRPair() {
+ for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
+ if (isAvailableGPR(Register::FromCode(i)) &&
+ isAvailableGPR(Register::FromCode(i + 1))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ void allocGPRPair(Register* low, Register* high) {
+ MOZ_ASSERT(hasGPRPair());
+ for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
+ if (isAvailableGPR(Register::FromCode(i)) &&
+ isAvailableGPR(Register::FromCode(i + 1))) {
+ *low = Register::FromCode(i);
+ *high = Register::FromCode(i + 1);
+ allocGPR(*low);
+ allocGPR(*high);
+ return;
+ }
+ }
+ MOZ_CRASH("No pair");
+ }
+#endif
+
+ void allocFPU(FloatRegister r) {
+#ifdef RABALDR_SIDEALLOC_V128
+ MOZ_ASSERT(!r.isSimd128());
+#endif
+ MOZ_ASSERT(isAvailableFPU(r));
+ availFPU.take(r);
+ }
+
+ template <MIRType t>
+ FloatRegister allocFPU() {
+#ifdef RABALDR_SIDEALLOC_V128
+ // Workaround for GCC problem, bug 1677690
+ if constexpr (t == MIRType::Simd128) {
+ MOZ_CRASH("Should not happen");
+ } else
+#endif
+ {
+ return availFPU.takeAny<RegTypeOf<t>::value>();
+ }
+ }
+
+ void freeGPR(Register r) { availGPR.add(r); }
+
+ void freeInt64(Register64 r) {
+#ifdef JS_PUNBOX64
+ freeGPR(r.reg);
+#else
+ freeGPR(r.low);
+ freeGPR(r.high);
+#endif
+ }
+
+ void freeFPU(FloatRegister r) {
+#ifdef RABALDR_SIDEALLOC_V128
+ MOZ_ASSERT(!r.isSimd128());
+#endif
+ availFPU.add(r);
+ }
+
+ public:
+ explicit BaseRegAlloc()
+ : bc(nullptr),
+ availGPR(GeneralRegisterSet::All()),
+ availFPU(FloatRegisterSet::All())
+#ifdef DEBUG
+ ,
+ scratchTaken(0)
+#endif
+#ifdef JS_CODEGEN_X86
+ ,
+ singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
+#endif
+ {
+ RegisterAllocator::takeWasmRegisters(availGPR);
+
+ // Allocate any private scratch registers.
+#if defined(RABALDR_SCRATCH_I32)
+ if (RabaldrScratchI32 != RegI32::Invalid()) {
+ availGPR.take(RabaldrScratchI32);
+ }
+#endif
+
+#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
+ static_assert(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
+ static_assert(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
+#endif
+
+#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
+ if (RabaldrScratchF32 != RegF32::Invalid()) {
+ availFPU.take(RabaldrScratchF32);
+ }
+#endif
+
+#if defined(RABALDR_SCRATCH_F64)
+# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
+ MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
+# endif
+ if (RabaldrScratchF64 != RegF64::Invalid()) {
+ availFPU.take(RabaldrScratchF64);
+ }
+# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
+ MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
+# endif
+#endif
+
+#ifdef DEBUG
+ allGPR = availGPR;
+ allFPU = availFPU;
+#endif
+ }
+
+ void init(BaseCompilerInterface* bc) { this->bc = bc; }
+
+ enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4, V128 = 8 };
+
+#ifdef DEBUG
+ bool isScratchRegisterTaken(ScratchKind s) const {
+ return (scratchTaken & uint32_t(s)) != 0;
+ }
+
+ void setScratchRegisterTaken(ScratchKind s, bool state) {
+ if (state) {
+ scratchTaken |= uint32_t(s);
+ } else {
+ scratchTaken &= ~uint32_t(s);
+ }
+ }
+#endif
+
+#ifdef JS_CODEGEN_X86
+ bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
+#endif
+
+ bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }
+
+ bool isAvailableI64(RegI64 r) {
+#ifdef JS_PUNBOX64
+ return isAvailableGPR(r.reg);
+#else
+ return isAvailableGPR(r.low) && isAvailableGPR(r.high);
+#endif
+ }
+
+ bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }
+
+ bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }
+
+ bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }
+
+#ifdef ENABLE_WASM_SIMD
+# ifdef RABALDR_SIDEALLOC_V128
+ bool isAvailableV128(RegV128 r) { return isAvailableFPU(r.asDouble()); }
+# else
+ bool isAvailableV128(RegV128 r) { return isAvailableFPU(r); }
+# endif
+#endif
+
+ // TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
+ // failure, only as much as we need.
+
+ [[nodiscard]] RegI32 needI32() {
+ if (!hasGPR()) {
+ bc->sync();
+ }
+ return RegI32(allocGPR());
+ }
+
+ void needI32(RegI32 specific) {
+ if (!isAvailableI32(specific)) {
+ bc->sync();
+ }
+ allocGPR(specific);
+ }
+
+ [[nodiscard]] RegI64 needI64() {
+ if (!hasGPR64()) {
+ bc->sync();
+ }
+ return RegI64(allocInt64());
+ }
+
+ void needI64(RegI64 specific) {
+ if (!isAvailableI64(specific)) {
+ bc->sync();
+ }
+ allocInt64(specific);
+ }
+
+ [[nodiscard]] RegPtr needPtr() {
+ if (!hasGPR()) {
+ bc->sync();
+ }
+ return RegPtr(allocGPR());
+ }
+
+ void needPtr(RegPtr specific) {
+ if (!isAvailablePtr(specific)) {
+ bc->sync();
+ }
+ allocGPR(specific);
+ }
+
+ // Use when you need a register for a short time but explicitly want to avoid
+ // a full sync().
+ [[nodiscard]] RegPtr needTempPtr(RegPtr fallback, bool* saved) {
+ if (hasGPR()) {
+ *saved = false;
+ return RegPtr(allocGPR());
+ }
+ *saved = true;
+ bc->saveTempPtr(fallback);
+ MOZ_ASSERT(isAvailablePtr(fallback));
+ allocGPR(fallback);
+ return RegPtr(fallback);
+ }
+
+ [[nodiscard]] RegF32 needF32() {
+ if (!hasFPU<MIRType::Float32>()) {
+ bc->sync();
+ }
+ return RegF32(allocFPU<MIRType::Float32>());
+ }
+
+ void needF32(RegF32 specific) {
+ if (!isAvailableF32(specific)) {
+ bc->sync();
+ }
+ allocFPU(specific);
+ }
+
+ [[nodiscard]] RegF64 needF64() {
+ if (!hasFPU<MIRType::Double>()) {
+ bc->sync();
+ }
+ return RegF64(allocFPU<MIRType::Double>());
+ }
+
+ void needF64(RegF64 specific) {
+ if (!isAvailableF64(specific)) {
+ bc->sync();
+ }
+ allocFPU(specific);
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ [[nodiscard]] RegV128 needV128() {
+# ifdef RABALDR_SIDEALLOC_V128
+ if (!hasFPU<MIRType::Double>()) {
+ bc->sync();
+ }
+ return RegV128::fromDouble(allocFPU<MIRType::Double>());
+# else
+ if (!hasFPU<MIRType::Simd128>()) {
+ bc->sync();
+ }
+ return RegV128(allocFPU<MIRType::Simd128>());
+# endif
+ }
+
+ void needV128(RegV128 specific) {
+# ifdef RABALDR_SIDEALLOC_V128
+ if (!isAvailableV128(specific)) {
+ bc->sync();
+ }
+ allocFPU(specific.asDouble());
+# else
+ if (!isAvailableV128(specific)) {
+ bc->sync();
+ }
+ allocFPU(specific);
+# endif
+ }
+#endif
+
+ void freeI32(RegI32 r) { freeGPR(r); }
+
+ void freeI64(RegI64 r) { freeInt64(r); }
+
+ void freePtr(RegPtr r) { freeGPR(r); }
+
+ void freeF64(RegF64 r) { freeFPU(r); }
+
+ void freeF32(RegF32 r) { freeFPU(r); }
+
+#ifdef ENABLE_WASM_SIMD
+ void freeV128(RegV128 r) {
+# ifdef RABALDR_SIDEALLOC_V128
+ freeFPU(r.asDouble());
+# else
+ freeFPU(r);
+# endif
+ }
+#endif
+
+ void freeTempPtr(RegPtr r, bool saved) {
+ freePtr(r);
+ if (saved) {
+ bc->restoreTempPtr(r);
+ MOZ_ASSERT(!isAvailablePtr(r));
+ }
+ }
+
+#ifdef JS_CODEGEN_ARM
+ [[nodiscard]] RegI64 needI64Pair() {
+ if (!hasGPRPair()) {
+ bc->sync();
+ }
+ Register low, high;
+ allocGPRPair(&low, &high);
+ return RegI64(Register64(high, low));
+ }
+#endif
+
+#ifdef DEBUG
+ friend class LeakCheck;
+
+ class MOZ_RAII LeakCheck {
+ private:
+ const BaseRegAlloc& ra;
+ AllocatableGeneralRegisterSet knownGPR_;
+ AllocatableFloatRegisterSet knownFPU_;
+
+ public:
+ explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
+ knownGPR_ = ra.availGPR;
+ knownFPU_ = ra.availFPU;
+ }
+
+ ~LeakCheck() {
+ MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
+ MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
+ }
+
+ void addKnownI32(RegI32 r) { knownGPR_.add(r); }
+
+ void addKnownI64(RegI64 r) {
+# ifdef JS_PUNBOX64
+ knownGPR_.add(r.reg);
+# else
+ knownGPR_.add(r.high);
+ knownGPR_.add(r.low);
+# endif
+ }
+
+ void addKnownF32(RegF32 r) { knownFPU_.add(r); }
+
+ void addKnownF64(RegF64 r) { knownFPU_.add(r); }
+
+# ifdef ENABLE_WASM_SIMD
+ void addKnownV128(RegV128 r) {
+# ifdef RABALDR_SIDEALLOC_V128
+ knownFPU_.add(r.asDouble());
+# else
+ knownFPU_.add(r);
+# endif
+ }
+# endif
+
+ void addKnownRef(RegPtr r) { knownGPR_.add(r); }
+ };
+#endif
+};
+
+// Scratch register abstractions.
+//
+// We define our own scratch registers when the platform doesn't provide what we
+// need. A notable use case is that we will need a private scratch register
+// when the platform masm uses its scratch register very frequently (eg, ARM).
+
+class BaseScratchRegister {
+#ifdef DEBUG
+ BaseRegAlloc& ra;
+ BaseRegAlloc::ScratchKind kind_;
+
+ public:
+ explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
+ : ra(ra), kind_(kind) {
+ MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
+ ra.setScratchRegisterTaken(kind_, true);
+ }
+ ~BaseScratchRegister() {
+ MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
+ ra.setScratchRegisterTaken(kind_, false);
+ }
+#else
+ public:
+ explicit BaseScratchRegister(BaseRegAlloc& ra,
+ BaseRegAlloc::ScratchKind kind) {}
+#endif
+};
+
+#ifdef ENABLE_WASM_SIMD
+# ifdef RABALDR_SCRATCH_V128
+class ScratchV128 : public BaseScratchRegister {
+ public:
+ explicit ScratchV128(BaseRegAlloc& ra)
+ : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::V128) {}
+ operator RegV128() const { return RegV128(RabaldrScratchV128); }
+};
+# else
+class ScratchV128 : public ScratchSimd128Scope {
+ public:
+ explicit ScratchV128(MacroAssembler& m) : ScratchSimd128Scope(m) {}
+ operator RegV128() const { return RegV128(FloatRegister(*this)); }
+};
+# endif
+#endif
+
+#ifdef RABALDR_SCRATCH_F64
+class ScratchF64 : public BaseScratchRegister {
+ public:
+ explicit ScratchF64(BaseRegAlloc& ra)
+ : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
+ operator RegF64() const { return RegF64(RabaldrScratchF64); }
+};
+#else
+class ScratchF64 : public ScratchDoubleScope {
+ public:
+ explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
+ operator RegF64() const { return RegF64(FloatRegister(*this)); }
+};
+#endif
+
+#ifdef RABALDR_SCRATCH_F32
+class ScratchF32 : public BaseScratchRegister {
+ public:
+ explicit ScratchF32(BaseRegAlloc& ra)
+ : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
+ operator RegF32() const { return RegF32(RabaldrScratchF32); }
+};
+#else
+class ScratchF32 : public ScratchFloat32Scope {
+ public:
+ explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
+ operator RegF32() const { return RegF32(FloatRegister(*this)); }
+};
+#endif
+
+#ifdef RABALDR_SCRATCH_I32
+template <class RegType>
+class ScratchGPR : public BaseScratchRegister {
+ public:
+ explicit ScratchGPR(BaseRegAlloc& ra)
+ : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
+ operator RegType() const { return RegType(RabaldrScratchI32); }
+};
+#else
+template <class RegType>
+class ScratchGPR : public ScratchRegisterScope {
+ public:
+ explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
+ operator RegType() const { return RegType(Register(*this)); }
+};
+#endif
+
+using ScratchI32 = ScratchGPR<RegI32>;
+using ScratchPtr = ScratchGPR<RegPtr>;
+
+#if defined(JS_CODEGEN_X86)
+// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
+// no other register will do. And we would normally have to allocate that
+// register using ScratchI32 since normally the scratch register is EBX.
+// But the whole point of ScratchI32 is to hide that relationship. By using
+// the ScratchEBX alias, we document that at that point we require the
+// scratch register to be EBX.
+using ScratchEBX = ScratchI32;
+
+// ScratchI8 is a mnemonic device: For some ops we need a register with a
+// byte subregister.
+using ScratchI8 = ScratchI32;
+#endif
+
+// The stack frame.
+//
+// The stack frame has four parts ("below" means at lower addresses):
+//
+// - the Frame element;
+// - the Local area, including the DebugFrame element and possibly a spilled
+// pointer to stack results, if any; allocated below the header with various
+// forms of alignment;
+// - the Dynamic area, comprising the temporary storage the compiler uses for
+// register spilling, allocated below the Local area;
+// - the Arguments area, comprising memory allocated for outgoing calls,
+// allocated below the Dynamic area.
+//
+// +==============================+
+// | Incoming stack arg |
+// | ... |
+// ------------- +==============================+
+// | Frame (fixed size) |
+// ------------- +==============================+ <-------------------- FP
+// ^ | DebugFrame (optional) | ^ ^ ^^
+// localSize | Register arg local | | | ||
+// | | ... | | | framePushed
+// | | Register stack result ptr?| | | ||
+// | | Non-arg local | | | ||
+// | | ... | | | ||
+// | | (padding) | | | ||
+// | | Tls pointer | | | ||
+// | +------------------------------+ | | ||
+// v | (padding) | | v ||
+// ------------- +==============================+ currentStackHeight ||
+// ^ | Dynamic (variable size) | | ||
+// dynamicSize | ... | | ||
+// v | ... | v ||
+// ------------- | (free space, sometimes) | --------- v|
+// +==============================+ <----- SP not-during calls
+// | Arguments (sometimes) | |
+// | ... | v
+// +==============================+ <----- SP during calls
+//
+// The Frame is addressed off the stack pointer. masm.framePushed() is always
+// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
+// Frame, with the DebugFrame optionally below it.
+//
+// The Local area (including the DebugFrame and, if needed, the spilled value of
+// the stack results area pointer) is laid out by BaseLocalIter and is allocated
+// and deallocated by standard prologue and epilogue functions that manipulate
+// the stack pointer, but it is accessed via BaseStackFrame.
+//
+// The Dynamic area is maintained by and accessed via BaseStackFrame. On some
+// systems (such as ARM64), the Dynamic memory may be allocated in chunks
+// because the SP needs a specific alignment, and in this case there will
+// normally be some free space directly above the SP. The stack height does not
+// include the free space, it reflects the logically used space only.
+//
+// The Dynamic area is where space for stack results is allocated when calling
+// functions that return results on the stack. If a function has stack results,
+// a pointer to the low address of the stack result area is passed as an
+// additional argument, according to the usual ABI. See
+// ABIResultIter::HasStackResults.
+//
+// The Arguments area is allocated and deallocated via BaseStackFrame (see
+// comments later) but is accessed directly off the stack pointer.
+
+// BaseLocalIter iterates over a vector of types of locals and provides offsets
+// from the Frame address for those locals, and associated data.
+//
+// The implementation of BaseLocalIter is the property of the BaseStackFrame.
+// But it is also exposed for eg the debugger to use.
+
+BaseLocalIter::BaseLocalIter(const ValTypeVector& locals,
+ const ArgTypeVector& args, bool debugEnabled)
+ : locals_(locals),
+ args_(args),
+ argsIter_(args_),
+ index_(0),
+ nextFrameSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
+ frameOffset_(INT32_MAX),
+ stackResultPointerOffset_(INT32_MAX),
+ mirType_(MIRType::Undefined),
+ done_(false) {
+ MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length());
+ settle();
+}
+
+int32_t BaseLocalIter::pushLocal(size_t nbytes) {
+ MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
+ nextFrameSize_ = AlignBytes(frameSize_, nbytes) + nbytes;
+ return nextFrameSize_; // Locals grow down so capture base address.
+}
+
+void BaseLocalIter::settle() {
+ MOZ_ASSERT(!done_);
+ frameSize_ = nextFrameSize_;
+
+ if (!argsIter_.done()) {
+ mirType_ = argsIter_.mirType();
+ MIRType concreteType = mirType_;
+ switch (mirType_) {
+ case MIRType::StackResults:
+ // The pointer to stack results is handled like any other argument:
+ // either addressed in place if it is passed on the stack, or we spill
+ // it in the frame if it's in a register.
+ MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_));
+ concreteType = MIRType::Pointer;
+ [[fallthrough]];
+ case MIRType::Int32:
+ case MIRType::Int64:
+ case MIRType::Double:
+ case MIRType::Float32:
+ case MIRType::RefOrNull:
+#ifdef ENABLE_WASM_SIMD
+ case MIRType::Simd128:
+#endif
+ if (argsIter_->argInRegister()) {
+ frameOffset_ = pushLocal(MIRTypeToSize(concreteType));
+ } else {
+ frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
+ }
+ break;
+ default:
+ MOZ_CRASH("Argument type");
+ }
+ if (mirType_ == MIRType::StackResults) {
+ stackResultPointerOffset_ = frameOffset();
+ // Advance past the synthetic stack result pointer argument and fall
+ // through to the next case.
+ argsIter_++;
+ frameSize_ = nextFrameSize_;
+ MOZ_ASSERT(argsIter_.done());
+ } else {
+ return;
+ }
+ }
+
+ if (index_ < locals_.length()) {
+ switch (locals_[index_].kind()) {
+ case ValType::I32:
+ case ValType::I64:
+ case ValType::F32:
+ case ValType::F64:
+#ifdef ENABLE_WASM_SIMD
+ case ValType::V128:
+#endif
+ case ValType::Ref:
+ // TODO/AnyRef-boxing: With boxed immediates and strings, the
+ // debugger must be made aware that AnyRef != Pointer.
+ ASSERT_ANYREF_IS_JSOBJECT;
+ mirType_ = ToMIRType(locals_[index_]);
+ frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: Unexpected local type");
+ }
+ return;
+ }
+
+ done_ = true;
+}
+
+void BaseLocalIter::operator++(int) {
+ MOZ_ASSERT(!done_);
+ index_++;
+ if (!argsIter_.done()) {
+ argsIter_++;
+ }
+ settle();
+}
+
+// Abstraction of the height of the stack frame, to avoid type confusion.
+
+class StackHeight {
+ friend class BaseStackFrameAllocator;
+
+ uint32_t height;
+
+ public:
+ explicit StackHeight(uint32_t h) : height(h) {}
+ static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
+ bool isValid() const { return height != UINT32_MAX; }
+ bool operator==(StackHeight rhs) const {
+ MOZ_ASSERT(isValid() && rhs.isValid());
+ return height == rhs.height;
+ }
+ bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
+};
+
+// Abstraction for where multi-value results go on the machine stack.
+
+class StackResultsLoc {
+ uint32_t bytes_;
+ size_t count_;
+ Maybe<uint32_t> height_;
+
+ public:
+ StackResultsLoc() : bytes_(0), count_(0){};
+ StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
+ : bytes_(bytes), count_(count), height_(Some(height)) {
+ MOZ_ASSERT(bytes != 0);
+ MOZ_ASSERT(count != 0);
+ MOZ_ASSERT(height != 0);
+ }
+
+ uint32_t bytes() const { return bytes_; }
+ uint32_t count() const { return count_; }
+ uint32_t height() const { return height_.value(); }
+
+ bool hasStackResults() const { return bytes() != 0; }
+ StackResults stackResults() const {
+ return hasStackResults() ? StackResults::HasStackResults
+ : StackResults::NoStackResults;
+ }
+};
+
+// Abstraction of the baseline compiler's stack frame (except for the Frame /
+// DebugFrame parts). See comments above for more. Remember, "below" on the
+// stack means at lower addresses.
+//
+// The abstraction is split into two parts: BaseStackFrameAllocator is
+// responsible for allocating and deallocating space on the stack and for
+// performing computations that are affected by how the allocation is performed;
+// BaseStackFrame then provides a pleasant interface for stack frame management.
+
+class BaseStackFrameAllocator {
+ MacroAssembler& masm;
+
+#ifdef RABALDR_CHUNKY_STACK
+ // On platforms that require the stack pointer to be aligned on a boundary
+ // greater than the typical stack item (eg, ARM64 requires 16-byte alignment
+ // but items are 8 bytes), allocate stack memory in chunks, and use a
+ // separate stack height variable to track the effective stack pointer
+ // within the allocated area. Effectively, there's a variable amount of
+ // free space directly above the stack pointer. See diagram above.
+
+ // The following must be true in order for the stack height to be
+ // predictable at control flow joins:
+ //
+ // - The Local area is always aligned according to WasmStackAlignment, ie,
+ // masm.framePushed() % WasmStackAlignment is zero after allocating
+ // locals.
+ //
+ // - ChunkSize is always a multiple of WasmStackAlignment.
+ //
+ // - Pushing and popping are always in units of ChunkSize (hence preserving
+ // alignment).
+ //
+ // - The free space on the stack (masm.framePushed() - currentStackHeight_)
+ // is a predictable (nonnegative) amount.
+
+ // As an optimization, we pre-allocate some space on the stack, the size of
+ // this allocation is InitialChunk and it must be a multiple of ChunkSize.
+ // It is allocated as part of the function prologue and deallocated as part
+ // of the epilogue, along with the locals.
+ //
+ // If ChunkSize is too large then we risk overflowing the stack on simple
+ // recursions with few live values where stack overflow should not be a
+ // risk; if it is too small we spend too much time adjusting the stack
+ // pointer.
+ //
+ // Good values for ChunkSize are the subject of future empirical analysis;
+ // eight words is just an educated guess.
+
+ static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
+ static constexpr uint32_t InitialChunk = ChunkSize;
+
+ // The current logical height of the frame is
+ // currentStackHeight_ = localSize_ + dynamicSize
+ // where dynamicSize is not accounted for explicitly and localSize_ also
+ // includes size for the DebugFrame.
+ //
+ // The allocated size of the frame, provided by masm.framePushed(), is usually
+ // larger than currentStackHeight_, notably at the beginning of execution when
+ // we've allocated InitialChunk extra space.
+
+ uint32_t currentStackHeight_;
+#endif
+
+ // Size of the Local area in bytes (stable after BaseCompiler::init() has
+ // called BaseStackFrame::setupLocals(), which in turn calls
+ // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
+ // stack alignment. The Local area is then allocated in beginFunction(),
+ // following the allocation of the Header. See onFixedStackAllocated()
+ // below.
+
+ uint32_t localSize_;
+
+ protected:
+ ///////////////////////////////////////////////////////////////////////////
+ //
+ // Initialization
+
+ explicit BaseStackFrameAllocator(MacroAssembler& masm)
+ : masm(masm),
+#ifdef RABALDR_CHUNKY_STACK
+ currentStackHeight_(0),
+#endif
+ localSize_(UINT32_MAX) {
+ }
+
+ protected:
+ //////////////////////////////////////////////////////////////////////
+ //
+ // The Local area - the static part of the frame.
+
+ // Record the size of the Local area, once it is known.
+
+ void setLocalSize(uint32_t localSize) {
+ MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
+ "localSize_ should be aligned to at least a pointer");
+ MOZ_ASSERT(localSize_ == UINT32_MAX);
+ localSize_ = localSize;
+ }
+
+ // Record the current stack height, after it has become stable in
+ // beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
+
+ void onFixedStackAllocated() {
+ MOZ_ASSERT(localSize_ != UINT32_MAX);
+#ifdef RABALDR_CHUNKY_STACK
+ currentStackHeight_ = localSize_;
+#endif
+ }
+
+ public:
+ // The fixed amount of memory, in bytes, allocated on the stack below the
+ // Header for purposes such as locals and other fixed values. Includes all
+ // necessary alignment, and on ARM64 also the initial chunk for the working
+ // stack memory.
+
+ uint32_t fixedAllocSize() const {
+ MOZ_ASSERT(localSize_ != UINT32_MAX);
+#ifdef RABALDR_CHUNKY_STACK
+ return localSize_ + InitialChunk;
+#else
+ return localSize_;
+#endif
+ }
+
+#ifdef RABALDR_CHUNKY_STACK
+ // The allocated frame size is frequently larger than the logical stack
+ // height; we round up to a chunk boundary, and special case the initial
+ // chunk.
+ uint32_t framePushedForHeight(uint32_t logicalHeight) {
+ if (logicalHeight <= fixedAllocSize()) {
+ return fixedAllocSize();
+ }
+ return fixedAllocSize() +
+ AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
+ }
+#endif
+
+ protected:
+ //////////////////////////////////////////////////////////////////////
+ //
+ // The Dynamic area - the dynamic part of the frame, for spilling and saving
+ // intermediate values.
+
+ // Offset off of sp_ for the slot at stack area location `offset`.
+
+ int32_t stackOffset(int32_t offset) {
+ MOZ_ASSERT(offset > 0);
+ return masm.framePushed() - offset;
+ }
+
+ uint32_t computeHeightWithStackResults(StackHeight stackBase,
+ uint32_t stackResultBytes) {
+ MOZ_ASSERT(stackResultBytes);
+ MOZ_ASSERT(currentStackHeight() >= stackBase.height);
+ return stackBase.height + stackResultBytes;
+ }
+
+#ifdef RABALDR_CHUNKY_STACK
+ void pushChunkyBytes(uint32_t bytes) {
+ checkChunkyInvariants();
+ uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
+ if (freeSpace < bytes) {
+ uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize);
+ MOZ_ASSERT(bytesToReserve + freeSpace >= bytes);
+ masm.reserveStack(bytesToReserve);
+ }
+ currentStackHeight_ += bytes;
+ checkChunkyInvariants();
+ }
+
+ void popChunkyBytes(uint32_t bytes) {
+ checkChunkyInvariants();
+ currentStackHeight_ -= bytes;
+ // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
+ // values consumed by a call, and we may need to drop several chunks. But
+ // never drop the initial chunk. Crucially, the amount we drop is always an
+ // integral number of chunks.
+ uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
+ if (freeSpace >= ChunkSize) {
+ uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
+ uint32_t amountToFree = masm.framePushed() - targetAllocSize;
+ MOZ_ASSERT(amountToFree % ChunkSize == 0);
+ if (amountToFree) {
+ masm.freeStack(amountToFree);
+ }
+ }
+ checkChunkyInvariants();
+ }
+#endif
+
+ uint32_t currentStackHeight() const {
+#ifdef RABALDR_CHUNKY_STACK
+ return currentStackHeight_;
+#else
+ return masm.framePushed();
+#endif
+ }
+
+ private:
+#ifdef RABALDR_CHUNKY_STACK
+ void checkChunkyInvariants() {
+ MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
+ MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
+ MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
+ masm.framePushed() - currentStackHeight_ < ChunkSize);
+ MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
+ }
+#endif
+
+ // For a given stack height, return the appropriate size of the allocated
+ // frame.
+
+ uint32_t framePushedForHeight(StackHeight stackHeight) {
+#ifdef RABALDR_CHUNKY_STACK
+ // A more complicated adjustment is needed.
+ return framePushedForHeight(stackHeight.height);
+#else
+ // The allocated frame size equals the stack height.
+ return stackHeight.height;
+#endif
+ }
+
+ public:
+ // The current height of the stack area, not necessarily zero-based, in a
+ // type-safe way.
+
+ StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
+
+ // Set the frame height to a previously recorded value.
+
+ void setStackHeight(StackHeight amount) {
+#ifdef RABALDR_CHUNKY_STACK
+ currentStackHeight_ = amount.height;
+ masm.setFramePushed(framePushedForHeight(amount));
+ checkChunkyInvariants();
+#else
+ masm.setFramePushed(amount.height);
+#endif
+ }
+
+ // The current height of the dynamic part of the stack area (ie, the backing
+ // store for the evaluation stack), zero-based.
+
+ uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
+
+ // Before branching to an outer control label, pop the execution stack to
+ // the level expected by that region, but do not update masm.framePushed()
+ // as that will happen as compilation leaves the block.
+ //
+ // Note these operate directly on the stack pointer register.
+
+ void popStackBeforeBranch(StackHeight destStackHeight,
+ uint32_t stackResultBytes) {
+ uint32_t framePushedHere = masm.framePushed();
+ StackHeight heightThere =
+ StackHeight(destStackHeight.height + stackResultBytes);
+ uint32_t framePushedThere = framePushedForHeight(heightThere);
+ if (framePushedHere > framePushedThere) {
+ masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
+ }
+ }
+
+ void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
+ popStackBeforeBranch(destStackHeight,
+ ABIResultIter::MeasureStackBytes(type));
+ }
+
+ // Given that there are |stackParamSize| bytes on the dynamic stack
+ // corresponding to the stack results, return the stack height once these
+ // parameters are popped.
+
+ StackHeight stackResultsBase(uint32_t stackParamSize) {
+ return StackHeight(currentStackHeight() - stackParamSize);
+ }
+
+ // For most of WebAssembly, adjacent instructions have fallthrough control
+ // flow between them, which allows us to simply thread the current stack
+ // height through the compiler. There are two exceptions to this rule: when
+ // leaving a block via dead code, and when entering the "else" arm of an "if".
+ // In these cases, the stack height is the block entry height, plus any stack
+ // values (results in the block exit case, parameters in the else entry case).
+
+ void resetStackHeight(StackHeight destStackHeight, ResultType type) {
+ uint32_t height = destStackHeight.height;
+ height += ABIResultIter::MeasureStackBytes(type);
+ setStackHeight(StackHeight(height));
+ }
+
+ // Return offset of stack result.
+
+ uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
+ uint32_t stackResultBytes) {
+ MOZ_ASSERT(result.onStack());
+ MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
+ uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
+ return end - result.stackOffset();
+ }
+
+ public:
+ //////////////////////////////////////////////////////////////////////
+ //
+ // The Argument area - for outgoing calls.
+ //
+ // We abstract these operations as an optimization: we can merge the freeing
+ // of the argument area and dropping values off the stack after a call. But
+ // they always amount to manipulating the real stack pointer by some amount.
+ //
+ // Note that we do not update currentStackHeight_ for this; the frame does
+ // not know about outgoing arguments. But we do update framePushed(), so we
+ // can still index into the frame below the outgoing arguments area.
+
+ // This is always equivalent to a masm.reserveStack() call.
+
+ void allocArgArea(size_t argSize) {
+ if (argSize) {
+ masm.reserveStack(argSize);
+ }
+ }
+
+ // This frees the argument area allocated by allocArgArea(), and `argSize`
+ // must be equal to the `argSize` argument to allocArgArea(). In addition
+ // we drop some values from the frame, corresponding to the values that were
+ // consumed by the call.
+
+ void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
+#ifdef RABALDR_CHUNKY_STACK
+ // Freeing the outgoing arguments and freeing the consumed values have
+ // different semantics here, which is why the operation is split.
+ if (argSize) {
+ masm.freeStack(argSize);
+ }
+ popChunkyBytes(dropSize);
+#else
+ if (argSize + dropSize) {
+ masm.freeStack(argSize + dropSize);
+ }
+#endif
+ }
+};
+
+class BaseStackFrame final : public BaseStackFrameAllocator {
+ MacroAssembler& masm;
+
+ // The largest observed value of masm.framePushed(), ie, the size of the
+ // stack frame. Read this for its true value only when code generation is
+ // finished.
+ uint32_t maxFramePushed_;
+
+ // Patch point where we check for stack overflow.
+ CodeOffset stackAddOffset_;
+
+ // Low byte offset of pointer to stack results, if any.
+ Maybe<int32_t> stackResultsPtrOffset_;
+
+ // The offset of TLS pointer.
+ uint32_t tlsPointerOffset_;
+
+ // Low byte offset of local area for true locals (not parameters).
+ uint32_t varLow_;
+
+ // High byte offset + 1 of local area for true locals.
+ uint32_t varHigh_;
+
+ // The stack pointer, cached for brevity.
+ RegisterOrSP sp_;
+
+ public:
+ explicit BaseStackFrame(MacroAssembler& masm)
+ : BaseStackFrameAllocator(masm),
+ masm(masm),
+ maxFramePushed_(0),
+ stackAddOffset_(0),
+ tlsPointerOffset_(UINT32_MAX),
+ varLow_(UINT32_MAX),
+ varHigh_(UINT32_MAX),
+ sp_(masm.getStackPointer()) {}
+
+ ///////////////////////////////////////////////////////////////////////////
+ //
+ // Stack management and overflow checking
+
+ // This must be called once beginFunction has allocated space for the Header
+ // (the Frame and DebugFrame) and the Local area, and will record the current
+ // frame size for internal use by the stack abstractions.
+
+ void onFixedStackAllocated() {
+ maxFramePushed_ = masm.framePushed();
+ BaseStackFrameAllocator::onFixedStackAllocated();
+ }
+
+ // We won't know until after we've generated code how big the frame will be
+ // (we may need arbitrary spill slots and outgoing param slots) so emit a
+ // patchable add that is patched in endFunction().
+ //
+ // Note the platform scratch register may be used by branchPtr(), so
+ // generally tmp must be something else.
+
+ void checkStack(Register tmp, BytecodeOffset trapOffset) {
+ stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
+ Label ok;
+ masm.branchPtr(Assembler::Below,
+ Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)),
+ tmp, &ok);
+ masm.wasmTrap(Trap::StackOverflow, trapOffset);
+ masm.bind(&ok);
+ }
+
+ void patchCheckStack() {
+ masm.patchSub32FromStackPtr(stackAddOffset_,
+ Imm32(int32_t(maxFramePushed_)));
+ }
+
+ // Very large frames are implausible, probably an attack.
+
+ bool checkStackHeight() {
+ // 512KiB should be enough, considering how Rabaldr uses the stack and
+ // what the standard limits are:
+ //
+ // - 1,000 parameters
+ // - 50,000 locals
+ // - 10,000 values on the eval stack (not an official limit)
+ //
+ // At sizeof(int64) bytes per slot this works out to about 480KiB.
+ return maxFramePushed_ <= 512 * 1024;
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ //
+ // Local area
+
+ struct Local {
+ // Type of the value.
+ const MIRType type;
+
+ // Byte offset from Frame "into" the locals, ie positive for true locals
+ // and negative for incoming args that read directly from the arg area.
+ // It assumes the stack is growing down and that locals are on the stack
+ // at lower addresses than Frame, and is the offset from Frame of the
+ // lowest-addressed byte of the local.
+ const int32_t offs;
+
+ Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
+
+ bool isStackArgument() const { return offs < 0; }
+ };
+
+ // Profiling shows that the number of parameters and locals frequently
+ // touches or exceeds 8. So 16 seems like a reasonable starting point.
+ using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
+
+ // Initialize `localInfo` based on the types of `locals` and `args`.
+ MOZ_MUST_USE bool setupLocals(const ValTypeVector& locals,
+ const ArgTypeVector& args, bool debugEnabled,
+ LocalVector* localInfo) {
+ if (!localInfo->reserve(locals.length())) {
+ return false;
+ }
+
+ DebugOnly<uint32_t> index = 0;
+ BaseLocalIter i(locals, args, debugEnabled);
+ for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) {
+ MOZ_ASSERT(i.isArg());
+ MOZ_ASSERT(i.index() == index);
+ localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
+ index++;
+ }
+
+ varLow_ = i.frameSize();
+ for (; !i.done(); i++) {
+ MOZ_ASSERT(!i.isArg());
+ MOZ_ASSERT(i.index() == index);
+ localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
+ index++;
+ }
+ varHigh_ = i.frameSize();
+
+ // Reserve an additional stack slot for the TLS pointer.
+ const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*));
+ const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*);
+ tlsPointerOffset_ = localSize;
+
+ setLocalSize(AlignBytes(localSize, WasmStackAlignment));
+
+ if (args.hasSyntheticStackResultPointerArg()) {
+ stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
+ }
+
+ return true;
+ }
+
+ void zeroLocals(BaseRegAlloc* ra);
+
+ Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) {
+ if (local.isStackArgument()) {
+ return Address(FramePointer,
+ stackArgumentOffsetFromFp(local) + additionalOffset);
+ }
+ return Address(sp_, localOffsetFromSp(local) + additionalOffset);
+ }
+
+ void loadLocalI32(const Local& src, RegI32 dest) {
+ masm.load32(addressOfLocal(src), dest);
+ }
+
+#ifndef JS_PUNBOX64
+ void loadLocalI64Low(const Local& src, RegI32 dest) {
+ masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest);
+ }
+
+ void loadLocalI64High(const Local& src, RegI32 dest) {
+ masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest);
+ }
+#endif
+
+ void loadLocalI64(const Local& src, RegI64 dest) {
+ masm.load64(addressOfLocal(src), dest);
+ }
+
+ void loadLocalPtr(const Local& src, RegPtr dest) {
+ masm.loadPtr(addressOfLocal(src), dest);
+ }
+
+ void loadLocalF64(const Local& src, RegF64 dest) {
+ masm.loadDouble(addressOfLocal(src), dest);
+ }
+
+ void loadLocalF32(const Local& src, RegF32 dest) {
+ masm.loadFloat32(addressOfLocal(src), dest);
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void loadLocalV128(const Local& src, RegV128 dest) {
+ masm.loadUnalignedSimd128(addressOfLocal(src), dest);
+ }
+#endif
+
+ void storeLocalI32(RegI32 src, const Local& dest) {
+ masm.store32(src, addressOfLocal(dest));
+ }
+
+ void storeLocalI64(RegI64 src, const Local& dest) {
+ masm.store64(src, addressOfLocal(dest));
+ }
+
+ void storeLocalPtr(Register src, const Local& dest) {
+ masm.storePtr(src, addressOfLocal(dest));
+ }
+
+ void storeLocalF64(RegF64 src, const Local& dest) {
+ masm.storeDouble(src, addressOfLocal(dest));
+ }
+
+ void storeLocalF32(RegF32 src, const Local& dest) {
+ masm.storeFloat32(src, addressOfLocal(dest));
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void storeLocalV128(RegV128 src, const Local& dest) {
+ masm.storeUnalignedSimd128(src, addressOfLocal(dest));
+ }
+#endif
+
+ // Offset off of sp_ for `local`.
+ int32_t localOffsetFromSp(const Local& local) {
+ MOZ_ASSERT(!local.isStackArgument());
+ return localOffset(local.offs);
+ }
+
+ // Offset off of frame pointer for `stack argument`.
+ int32_t stackArgumentOffsetFromFp(const Local& local) {
+ MOZ_ASSERT(local.isStackArgument());
+ return -local.offs;
+ }
+
+ // The incoming stack result area pointer is for stack results of the function
+ // being compiled.
+ void loadIncomingStackResultAreaPtr(RegPtr reg) {
+ const int32_t offset = stackResultsPtrOffset_.value();
+ Address src = offset < 0 ? Address(FramePointer, -offset)
+ : Address(sp_, stackOffset(offset));
+ masm.loadPtr(src, reg);
+ }
+
+ void storeIncomingStackResultAreaPtr(RegPtr reg) {
+ // If we get here, that means the pointer to the stack results area was
+ // passed in as a register, and therefore it will be spilled below the
+ // frame, so the offset is a positive height.
+ MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
+ masm.storePtr(reg,
+ Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
+ }
+
+ void loadTlsPtr(Register dst) {
+ masm.loadPtr(Address(sp_, stackOffset(tlsPointerOffset_)), dst);
+ }
+
+ void storeTlsPtr(Register tls) {
+ masm.storePtr(tls, Address(sp_, stackOffset(tlsPointerOffset_)));
+ }
+
+ int32_t getTlsPtrOffset() { return stackOffset(tlsPointerOffset_); }
+
+ // An outgoing stack result area pointer is for stack results of callees of
+ // the function being compiled.
+ void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
+ RegPtr dest) {
+ MOZ_ASSERT(results.height() <= masm.framePushed());
+ uint32_t offsetFromSP = masm.framePushed() - results.height();
+ masm.moveStackPtrTo(dest);
+ if (offsetFromSP) {
+ masm.addPtr(Imm32(offsetFromSP), dest);
+ }
+ }
+
+ private:
+ // Offset off of sp_ for a local with offset `offset` from Frame.
+ int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
+
+ public:
+ ///////////////////////////////////////////////////////////////////////////
+ //
+ // Dynamic area
+
+ static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
+ static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
+ static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
+ static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
+#ifdef ENABLE_WASM_SIMD
+ static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128;
+#endif
+
+ uint32_t pushPtr(Register r) {
+ DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+ pushChunkyBytes(StackSizeOfPtr);
+ masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
+#else
+ masm.Push(r);
+#endif
+ maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+ MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
+ return currentStackHeight();
+ }
+
+ uint32_t pushFloat32(FloatRegister r) {
+ DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+ pushChunkyBytes(StackSizeOfFloat);
+ masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
+#else
+ masm.Push(r);
+#endif
+ maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+ MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
+ return currentStackHeight();
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ uint32_t pushV128(RegV128 r) {
+ DebugOnly<uint32_t> stackBefore = currentStackHeight();
+# ifdef RABALDR_CHUNKY_STACK
+ pushChunkyBytes(StackSizeOfV128);
+# else
+ masm.adjustStack(-(int)StackSizeOfV128);
+# endif
+ masm.storeUnalignedSimd128(r,
+ Address(sp_, stackOffset(currentStackHeight())));
+ maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+ MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight());
+ return currentStackHeight();
+ }
+#endif
+
+ uint32_t pushDouble(FloatRegister r) {
+ DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+ pushChunkyBytes(StackSizeOfDouble);
+ masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
+#else
+ masm.Push(r);
+#endif
+ maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+ MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
+ return currentStackHeight();
+ }
+
+ void popPtr(Register r) {
+ DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+ masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
+ popChunkyBytes(StackSizeOfPtr);
+#else
+ masm.Pop(r);
+#endif
+ MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
+ }
+
+ void popFloat32(FloatRegister r) {
+ DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+ masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
+ popChunkyBytes(StackSizeOfFloat);
+#else
+ masm.Pop(r);
+#endif
+ MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
+ }
+
+ void popDouble(FloatRegister r) {
+ DebugOnly<uint32_t> stackBefore = currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+ masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
+ popChunkyBytes(StackSizeOfDouble);
+#else
+ masm.Pop(r);
+#endif
+ MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void popV128(RegV128 r) {
+ DebugOnly<uint32_t> stackBefore = currentStackHeight();
+ masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())),
+ r);
+# ifdef RABALDR_CHUNKY_STACK
+ popChunkyBytes(StackSizeOfV128);
+# else
+ masm.adjustStack((int)StackSizeOfV128);
+# endif
+ MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight());
+ }
+#endif
+
+ void popBytes(size_t bytes) {
+ if (bytes > 0) {
+#ifdef RABALDR_CHUNKY_STACK
+ popChunkyBytes(bytes);
+#else
+ masm.freeStack(bytes);
+#endif
+ }
+ }
+
+ void loadStackI32(int32_t offset, RegI32 dest) {
+ masm.load32(Address(sp_, stackOffset(offset)), dest);
+ }
+
+ void loadStackI64(int32_t offset, RegI64 dest) {
+ masm.load64(Address(sp_, stackOffset(offset)), dest);
+ }
+
+#ifndef JS_PUNBOX64
+ void loadStackI64Low(int32_t offset, RegI32 dest) {
+ masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
+ }
+
+ void loadStackI64High(int32_t offset, RegI32 dest) {
+ masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
+ }
+#endif
+
+ // Disambiguation: this loads a "Ptr" value from the stack, it does not load
+ // the "StackPtr".
+
+ void loadStackPtr(int32_t offset, RegPtr dest) {
+ masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
+ }
+
+ void loadStackF64(int32_t offset, RegF64 dest) {
+ masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
+ }
+
+ void loadStackF32(int32_t offset, RegF32 dest) {
+ masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void loadStackV128(int32_t offset, RegV128 dest) {
+ masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest);
+ }
+#endif
+
+ uint32_t prepareStackResultArea(StackHeight stackBase,
+ uint32_t stackResultBytes) {
+ uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
+ if (currentStackHeight() < end) {
+ uint32_t bytes = end - currentStackHeight();
+#ifdef RABALDR_CHUNKY_STACK
+ pushChunkyBytes(bytes);
+#else
+ masm.reserveStack(bytes);
+#endif
+ maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
+ }
+ return end;
+ }
+
+ void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
+ uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
+ MOZ_ASSERT(currentStackHeight() >= end);
+ popBytes(currentStackHeight() - end);
+ }
+
+ // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
+ void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
+ uint32_t bytes, Register temp) {
+ MOZ_ASSERT(destHeight < srcHeight);
+ MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
+ uint32_t destOffset = stackOffset(destHeight) + bytes;
+ uint32_t srcOffset = stackOffset(srcHeight) + bytes;
+ while (bytes >= sizeof(intptr_t)) {
+ destOffset -= sizeof(intptr_t);
+ srcOffset -= sizeof(intptr_t);
+ bytes -= sizeof(intptr_t);
+ masm.loadPtr(Address(sp_, srcOffset), temp);
+ masm.storePtr(temp, Address(sp_, destOffset));
+ }
+ if (bytes) {
+ MOZ_ASSERT(bytes == sizeof(uint32_t));
+ destOffset -= sizeof(uint32_t);
+ srcOffset -= sizeof(uint32_t);
+ masm.load32(Address(sp_, srcOffset), temp);
+ masm.store32(temp, Address(sp_, destOffset));
+ }
+ }
+
+ // Unlike the overload that operates on raw heights, |srcHeight| and
+ // |destHeight| are stack heights *not including* |bytes|.
+ void shuffleStackResultsTowardFP(StackHeight srcHeight,
+ StackHeight destHeight, uint32_t bytes,
+ Register temp) {
+ MOZ_ASSERT(srcHeight.isValid());
+ MOZ_ASSERT(destHeight.isValid());
+ uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
+ uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
+ MOZ_ASSERT(src <= currentStackHeight());
+ MOZ_ASSERT(dest <= currentStackHeight());
+ shuffleStackResultsTowardFP(src, dest, bytes, temp);
+ }
+
+ // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
+ void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
+ uint32_t bytes, Register temp) {
+ MOZ_ASSERT(destHeight > srcHeight);
+ MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
+ uint32_t destOffset = stackOffset(destHeight);
+ uint32_t srcOffset = stackOffset(srcHeight);
+ while (bytes >= sizeof(intptr_t)) {
+ masm.loadPtr(Address(sp_, srcOffset), temp);
+ masm.storePtr(temp, Address(sp_, destOffset));
+ destOffset += sizeof(intptr_t);
+ srcOffset += sizeof(intptr_t);
+ bytes -= sizeof(intptr_t);
+ }
+ if (bytes) {
+ MOZ_ASSERT(bytes == sizeof(uint32_t));
+ masm.load32(Address(sp_, srcOffset), temp);
+ masm.store32(temp, Address(sp_, destOffset));
+ }
+ }
+
+ // Copy results from the top of the current stack frame to an area of memory,
+ // and pop the stack accordingly. `dest` is the address of the low byte of
+ // that memory.
+ void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) {
+ MOZ_ASSERT(bytes <= currentStackHeight());
+ MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
+ uint32_t bytesToPop = bytes;
+ uint32_t srcOffset = stackOffset(currentStackHeight());
+ uint32_t destOffset = 0;
+ while (bytes >= sizeof(intptr_t)) {
+ masm.loadPtr(Address(sp_, srcOffset), temp);
+ masm.storePtr(temp, Address(dest, destOffset));
+ destOffset += sizeof(intptr_t);
+ srcOffset += sizeof(intptr_t);
+ bytes -= sizeof(intptr_t);
+ }
+ if (bytes) {
+ MOZ_ASSERT(bytes == sizeof(uint32_t));
+ masm.load32(Address(sp_, srcOffset), temp);
+ masm.store32(temp, Address(dest, destOffset));
+ }
+ popBytes(bytesToPop);
+ }
+
+ private:
+ void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) {
+ masm.move32(Imm32(imm), temp);
+ masm.store32(temp, Address(sp_, stackOffset(destHeight)));
+ }
+
+ void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) {
+#ifdef JS_PUNBOX64
+ masm.move64(Imm64(imm), Register64(temp));
+ masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
+#else
+ union {
+ int64_t i64;
+ int32_t i32[2];
+ } bits = {.i64 = imm};
+ static_assert(sizeof(bits) == 8);
+ store32BitsToStack(bits.i32[0], destHeight, temp);
+ store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp);
+#endif
+ }
+
+ public:
+ void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight,
+ Register temp) {
+#ifdef JS_PUNBOX64
+ static_assert(StackSizeOfPtr == 8);
+ store64BitsToStack(imm, destHeight, temp);
+#else
+ static_assert(StackSizeOfPtr == 4);
+ store32BitsToStack(int32_t(imm), destHeight, temp);
+#endif
+ }
+
+ void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight,
+ Register temp) {
+ store64BitsToStack(imm, destHeight, temp);
+ }
+
+ void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) {
+ union {
+ int32_t i32;
+ float f32;
+ } bits = {.f32 = imm};
+ static_assert(sizeof(bits) == 4);
+ // Do not store 4 bytes if StackSizeOfFloat == 8. It's probably OK to do
+ // so, but it costs little to store something predictable.
+ if (StackSizeOfFloat == 4) {
+ store32BitsToStack(bits.i32, destHeight, temp);
+ } else {
+ store64BitsToStack(uint32_t(bits.i32), destHeight, temp);
+ }
+ }
+
+ void storeImmediateF64ToStack(double imm, uint32_t destHeight,
+ Register temp) {
+ union {
+ int64_t i64;
+ double f64;
+ } bits = {.f64 = imm};
+ static_assert(sizeof(bits) == 8);
+ store64BitsToStack(bits.i64, destHeight, temp);
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) {
+ union {
+ int32_t i32[4];
+ uint8_t bytes[16];
+ } bits;
+ static_assert(sizeof(bits) == 16);
+ memcpy(bits.bytes, imm.bytes, 16);
+ for (unsigned i = 0; i < 4; i++) {
+ store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp);
+ }
+ }
+#endif
+};
+
+void BaseStackFrame::zeroLocals(BaseRegAlloc* ra) {
+ MOZ_ASSERT(varLow_ != UINT32_MAX);
+
+ if (varLow_ == varHigh_) {
+ return;
+ }
+
+ static const uint32_t wordSize = sizeof(void*);
+
+ // The adjustments to 'low' by the size of the item being stored compensates
+ // for the fact that locals offsets are the offsets from Frame to the bytes
+ // directly "above" the locals in the locals area. See comment at Local.
+
+ // On 64-bit systems we may have 32-bit alignment for the local area as it
+ // may be preceded by parameters and prologue/debug data.
+
+ uint32_t low = varLow_;
+ if (low % wordSize) {
+ masm.store32(Imm32(0), Address(sp_, localOffset(low + 4)));
+ low += 4;
+ }
+ MOZ_ASSERT(low % wordSize == 0);
+
+ const uint32_t high = AlignBytes(varHigh_, wordSize);
+
+ // An UNROLL_LIMIT of 16 is chosen so that we only need an 8-bit signed
+ // immediate to represent the offset in the store instructions in the loop
+ // on x64.
+
+ const uint32_t UNROLL_LIMIT = 16;
+ const uint32_t initWords = (high - low) / wordSize;
+ const uint32_t tailWords = initWords % UNROLL_LIMIT;
+ const uint32_t loopHigh = high - (tailWords * wordSize);
+
+ // With only one word to initialize, just store an immediate zero.
+
+ if (initWords == 1) {
+ masm.storePtr(ImmWord(0), Address(sp_, localOffset(low + wordSize)));
+ return;
+ }
+
+ // For other cases, it's best to have a zero in a register.
+ //
+ // One can do more here with SIMD registers (store 16 bytes at a time) or
+ // with instructions like STRD on ARM (store 8 bytes at a time), but that's
+ // for another day.
+
+ RegI32 zero = ra->needI32();
+ masm.mov(ImmWord(0), zero);
+
+ // For the general case we want to have a loop body of UNROLL_LIMIT stores
+ // and then a tail of less than UNROLL_LIMIT stores. When initWords is less
+ // than 2*UNROLL_LIMIT the loop trip count is at most 1 and there is no
+ // benefit to having the pointer calculations and the compare-and-branch.
+ // So we completely unroll when we have initWords < 2 * UNROLL_LIMIT. (In
+ // this case we'll end up using 32-bit offsets on x64 for up to half of the
+ // stores, though.)
+
+ // Fully-unrolled case.
+
+ if (initWords < 2 * UNROLL_LIMIT) {
+ for (uint32_t i = low; i < high; i += wordSize) {
+ masm.storePtr(zero, Address(sp_, localOffset(i + wordSize)));
+ }
+ ra->freeI32(zero);
+ return;
+ }
+
+ // Unrolled loop with a tail. Stores will use negative offsets. That's OK
+ // for x86 and ARM, at least.
+
+ // Compute pointer to the highest-addressed slot on the frame.
+ RegI32 p = ra->needI32();
+ masm.computeEffectiveAddress(Address(sp_, localOffset(low + wordSize)), p);
+
+ // Compute pointer to the lowest-addressed slot on the frame that will be
+ // initialized by the loop body.
+ RegI32 lim = ra->needI32();
+ masm.computeEffectiveAddress(Address(sp_, localOffset(loopHigh + wordSize)),
+ lim);
+
+ // The loop body. Eventually we'll have p == lim and exit the loop.
+ Label again;
+ masm.bind(&again);
+ for (uint32_t i = 0; i < UNROLL_LIMIT; ++i) {
+ masm.storePtr(zero, Address(p, -(wordSize * i)));
+ }
+ masm.subPtr(Imm32(UNROLL_LIMIT * wordSize), p);
+ masm.branchPtr(Assembler::LessThan, lim, p, &again);
+
+ // The tail.
+ for (uint32_t i = 0; i < tailWords; ++i) {
+ masm.storePtr(zero, Address(p, -(wordSize * i)));
+ }
+
+ ra->freeI32(p);
+ ra->freeI32(lim);
+ ra->freeI32(zero);
+}
+
+// Value stack: stack elements
+
+struct Stk {
+ private:
+ Stk() : kind_(Unknown), i64val_(0) {}
+
+ public:
+ enum Kind {
+ // The Mem opcodes are all clustered at the beginning to
+ // allow for a quick test within sync().
+ MemI32, // 32-bit integer stack value ("offs")
+ MemI64, // 64-bit integer stack value ("offs")
+ MemF32, // 32-bit floating stack value ("offs")
+ MemF64, // 64-bit floating stack value ("offs")
+#ifdef ENABLE_WASM_SIMD
+ MemV128, // 128-bit vector stack value ("offs")
+#endif
+ MemRef, // reftype (pointer wide) stack value ("offs")
+
+ // The Local opcodes follow the Mem opcodes for a similar
+ // quick test within hasLocal().
+ LocalI32, // Local int32 var ("slot")
+ LocalI64, // Local int64 var ("slot")
+ LocalF32, // Local float32 var ("slot")
+ LocalF64, // Local double var ("slot")
+#ifdef ENABLE_WASM_SIMD
+ LocalV128, // Local v128 var ("slot")
+#endif
+ LocalRef, // Local reftype (pointer wide) var ("slot")
+
+ RegisterI32, // 32-bit integer register ("i32reg")
+ RegisterI64, // 64-bit integer register ("i64reg")
+ RegisterF32, // 32-bit floating register ("f32reg")
+ RegisterF64, // 64-bit floating register ("f64reg")
+#ifdef ENABLE_WASM_SIMD
+ RegisterV128, // 128-bit vector register ("v128reg")
+#endif
+ RegisterRef, // reftype (pointer wide) register ("refReg")
+
+ ConstI32, // 32-bit integer constant ("i32val")
+ ConstI64, // 64-bit integer constant ("i64val")
+ ConstF32, // 32-bit floating constant ("f32val")
+ ConstF64, // 64-bit floating constant ("f64val")
+#ifdef ENABLE_WASM_SIMD
+ ConstV128, // 128-bit vector constant ("v128val")
+#endif
+ ConstRef, // reftype (pointer wide) constant ("refval")
+
+ Unknown,
+ };
+
+ Kind kind_;
+
+ static const Kind MemLast = MemRef;
+ static const Kind LocalLast = LocalRef;
+
+ union {
+ RegI32 i32reg_;
+ RegI64 i64reg_;
+ RegPtr refReg_;
+ RegF32 f32reg_;
+ RegF64 f64reg_;
+#ifdef ENABLE_WASM_SIMD
+ RegV128 v128reg_;
+#endif
+ int32_t i32val_;
+ int64_t i64val_;
+ intptr_t refval_;
+ float f32val_;
+ double f64val_;
+#ifdef ENABLE_WASM_SIMD
+ V128 v128val_;
+#endif
+ uint32_t slot_;
+ uint32_t offs_;
+ };
+
+ explicit Stk(RegI32 r) : kind_(RegisterI32), i32reg_(r) {}
+ explicit Stk(RegI64 r) : kind_(RegisterI64), i64reg_(r) {}
+ explicit Stk(RegPtr r) : kind_(RegisterRef), refReg_(r) {}
+ explicit Stk(RegF32 r) : kind_(RegisterF32), f32reg_(r) {}
+ explicit Stk(RegF64 r) : kind_(RegisterF64), f64reg_(r) {}
+#ifdef ENABLE_WASM_SIMD
+ explicit Stk(RegV128 r) : kind_(RegisterV128), v128reg_(r) {}
+#endif
+ explicit Stk(int32_t v) : kind_(ConstI32), i32val_(v) {}
+ explicit Stk(int64_t v) : kind_(ConstI64), i64val_(v) {}
+ explicit Stk(float v) : kind_(ConstF32), f32val_(v) {}
+ explicit Stk(double v) : kind_(ConstF64), f64val_(v) {}
+#ifdef ENABLE_WASM_SIMD
+ explicit Stk(V128 v) : kind_(ConstV128), v128val_(v) {}
+#endif
+ explicit Stk(Kind k, uint32_t v) : kind_(k), slot_(v) {
+ MOZ_ASSERT(k > MemLast && k <= LocalLast);
+ }
+ static Stk StkRef(intptr_t v) {
+ Stk s;
+ s.kind_ = ConstRef;
+ s.refval_ = v;
+ return s;
+ }
+ static Stk StackResult(ValType type, uint32_t offs) {
+ Kind k;
+ switch (type.kind()) {
+ case ValType::I32:
+ k = Stk::MemI32;
+ break;
+ case ValType::I64:
+ k = Stk::MemI64;
+ break;
+ case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+ k = Stk::MemV128;
+ break;
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+ case ValType::F32:
+ k = Stk::MemF32;
+ break;
+ case ValType::F64:
+ k = Stk::MemF64;
+ break;
+ case ValType::Ref:
+ k = Stk::MemRef;
+ break;
+ }
+ Stk s;
+ s.setOffs(k, offs);
+ return s;
+ }
+
+ void setOffs(Kind k, uint32_t v) {
+ MOZ_ASSERT(k <= MemLast);
+ kind_ = k;
+ offs_ = v;
+ }
+
+ Kind kind() const { return kind_; }
+ bool isMem() const { return kind_ <= MemLast; }
+
+ RegI32 i32reg() const {
+ MOZ_ASSERT(kind_ == RegisterI32);
+ return i32reg_;
+ }
+ RegI64 i64reg() const {
+ MOZ_ASSERT(kind_ == RegisterI64);
+ return i64reg_;
+ }
+ RegPtr refReg() const {
+ MOZ_ASSERT(kind_ == RegisterRef);
+ return refReg_;
+ }
+ RegF32 f32reg() const {
+ MOZ_ASSERT(kind_ == RegisterF32);
+ return f32reg_;
+ }
+ RegF64 f64reg() const {
+ MOZ_ASSERT(kind_ == RegisterF64);
+ return f64reg_;
+ }
+#ifdef ENABLE_WASM_SIMD
+ RegV128 v128reg() const {
+ MOZ_ASSERT(kind_ == RegisterV128);
+ return v128reg_;
+ }
+#endif
+ int32_t i32val() const {
+ MOZ_ASSERT(kind_ == ConstI32);
+ return i32val_;
+ }
+ int64_t i64val() const {
+ MOZ_ASSERT(kind_ == ConstI64);
+ return i64val_;
+ }
+ intptr_t refval() const {
+ MOZ_ASSERT(kind_ == ConstRef);
+ return refval_;
+ }
+
+ // For these two, use an out-param instead of simply returning, to
+ // use the normal stack and not the x87 FP stack (which has effect on
+ // NaNs with the signaling bit set).
+
+ void f32val(float* out) const {
+ MOZ_ASSERT(kind_ == ConstF32);
+ *out = f32val_;
+ }
+ void f64val(double* out) const {
+ MOZ_ASSERT(kind_ == ConstF64);
+ *out = f64val_;
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ // For SIMD, do the same as for floats since we're using float registers to
+ // hold vectors; this is just conservative.
+ void v128val(V128* out) const {
+ MOZ_ASSERT(kind_ == ConstV128);
+ *out = v128val_;
+ }
+#endif
+
+ uint32_t slot() const {
+ MOZ_ASSERT(kind_ > MemLast && kind_ <= LocalLast);
+ return slot_;
+ }
+ uint32_t offs() const {
+ MOZ_ASSERT(isMem());
+ return offs_;
+ }
+};
+
+typedef Vector<Stk, 0, SystemAllocPolicy> StkVector;
+
+// MachineStackTracker, used for stack-slot pointerness tracking.
+
+class MachineStackTracker {
+ // Simulates the machine's stack, with one bool per word. Index zero in
+ // this vector corresponds to the highest address in the machine stack. The
+ // last entry corresponds to what SP currently points at. This all assumes
+ // a grow-down stack.
+ //
+ // numPtrs_ contains the number of "true" values in vec_, and is therefore
+ // redundant. But it serves as a constant-time way to detect the common
+ // case where vec_ holds no "true" values.
+ size_t numPtrs_;
+ Vector<bool, 64, SystemAllocPolicy> vec_;
+
+ public:
+ MachineStackTracker() : numPtrs_(0) {}
+
+ ~MachineStackTracker() {
+#ifdef DEBUG
+ size_t n = 0;
+ for (bool b : vec_) {
+ n += (b ? 1 : 0);
+ }
+ MOZ_ASSERT(n == numPtrs_);
+#endif
+ }
+
+ // Clone this MachineStackTracker, writing the result at |dst|.
+ [[nodiscard]] bool cloneTo(MachineStackTracker* dst) {
+ MOZ_ASSERT(dst->vec_.empty());
+ if (!dst->vec_.appendAll(vec_)) {
+ return false;
+ }
+ dst->numPtrs_ = numPtrs_;
+ return true;
+ }
+
+ // Notionally push |n| non-pointers on the stack.
+ [[nodiscard]] bool pushNonGCPointers(size_t n) {
+ return vec_.appendN(false, n);
+ }
+
+ // Mark the stack slot |offsetFromSP| up from the bottom as holding a
+ // pointer.
+ void setGCPointer(size_t offsetFromSP) {
+ // offsetFromSP == 0 denotes the most recently pushed item, == 1 the
+ // second most recently pushed item, etc.
+ MOZ_ASSERT(offsetFromSP < vec_.length());
+
+ size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
+ numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0);
+ vec_[offsetFromTop] = true;
+ }
+
+ // Query the pointerness of the slot |offsetFromSP| up from the bottom.
+ bool isGCPointer(size_t offsetFromSP) {
+ MOZ_ASSERT(offsetFromSP < vec_.length());
+
+ size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
+ return vec_[offsetFromTop];
+ }
+
+ // Return the number of words tracked by this MachineStackTracker.
+ size_t length() { return vec_.length(); }
+
+ // Return the number of pointer-typed words tracked by this
+ // MachineStackTracker.
+ size_t numPtrs() {
+ MOZ_ASSERT(numPtrs_ <= length());
+ return numPtrs_;
+ }
+
+ // Discard all contents, but (per mozilla::Vector::clear semantics) don't
+ // free or reallocate any dynamic storage associated with |vec_|.
+ void clear() {
+ vec_.clear();
+ numPtrs_ = 0;
+ }
+};
+
+// StackMapGenerator, which carries all state needed to create stack maps.
+
+enum class HasDebugFrame { No, Yes };
+
+struct StackMapGenerator {
+ private:
+ // --- These are constant for the life of the function's compilation ---
+
+ // For generating stack maps, we'll need to know the offsets of registers
+ // as saved by the trap exit stub.
+ const MachineState& trapExitLayout_;
+ const size_t trapExitLayoutNumWords_;
+
+ // Completed stackmaps are added here
+ StackMaps* stackMaps_;
+
+ // So as to be able to get current offset when creating stack maps
+ const MacroAssembler& masm_;
+
+ public:
+ // --- These are constant once we've completed beginFunction() ---
+
+ // The number of words of arguments passed to this function in memory.
+ size_t numStackArgWords;
+
+ MachineStackTracker machineStackTracker; // tracks machine stack pointerness
+
+ // This holds masm.framePushed at entry to the function's body. It is a
+ // Maybe because createStackMap needs to know whether or not we're still
+ // in the prologue. It makes a Nothing-to-Some transition just once per
+ // function.
+ Maybe<uint32_t> framePushedAtEntryToBody;
+
+ // --- These can change at any point ---
+
+ // This holds masm.framePushed at it would be be for a function call
+ // instruction, but excluding the stack area used to pass arguments in
+ // memory. That is, for an upcoming function call, this will hold
+ //
+ // masm.framePushed() at the call instruction -
+ // StackArgAreaSizeUnaligned(argumentTypes)
+ //
+ // This value denotes the lowest-addressed stack word covered by the current
+ // function's stackmap. Words below this point form the highest-addressed
+ // area of the callee's stackmap. Note that all alignment padding above the
+ // arguments-in-memory themselves belongs to the caller's stack map, which
+ // is why this is defined in terms of StackArgAreaSizeUnaligned() rather than
+ // StackArgAreaSizeAligned().
+ //
+ // When not inside a function call setup/teardown sequence, it is Nothing.
+ // It can make Nothing-to/from-Some transitions arbitrarily as we progress
+ // through the function body.
+ Maybe<uint32_t> framePushedExcludingOutboundCallArgs;
+
+ // The number of memory-resident, ref-typed entries on the containing
+ // BaseCompiler::stk_.
+ size_t memRefsOnStk;
+
+ // This is a copy of machineStackTracker that is used only within individual
+ // calls to createStackMap. It is here only to avoid possible heap allocation
+ // costs resulting from making it local to createStackMap().
+ MachineStackTracker augmentedMst;
+
+ StackMapGenerator(StackMaps* stackMaps, const MachineState& trapExitLayout,
+ const size_t trapExitLayoutNumWords,
+ const MacroAssembler& masm)
+ : trapExitLayout_(trapExitLayout),
+ trapExitLayoutNumWords_(trapExitLayoutNumWords),
+ stackMaps_(stackMaps),
+ masm_(masm),
+ numStackArgWords(0),
+ memRefsOnStk(0) {}
+
+ // At the beginning of a function, we may have live roots in registers (as
+ // arguments) at the point where we perform a stack overflow check. This
+ // method generates the "extra" stackmap entries to describe that, in the
+ // case that the check fails and we wind up calling into the wasm exit
+ // stub, as generated by GenerateTrapExit().
+ //
+ // The resulting map must correspond precisely with the stack layout
+ // created for the integer registers as saved by (code generated by)
+ // GenerateTrapExit(). To do that we use trapExitLayout_ and
+ // trapExitLayoutNumWords_, which together comprise a description of the
+ // layout and are created by GenerateTrapExitMachineState().
+ [[nodiscard]] bool generateStackmapEntriesForTrapExit(
+ const ArgTypeVector& args, ExitStubMapVector* extras) {
+ return GenerateStackmapEntriesForTrapExit(args, trapExitLayout_,
+ trapExitLayoutNumWords_, extras);
+ }
+
+ // Creates a stackmap associated with the instruction denoted by
+ // |assemblerOffset|, incorporating pointers from the current operand
+ // stack |stk|, incorporating possible extra pointers in |extra| at the
+ // lower addressed end, and possibly with the associated frame having a
+ // ref-typed DebugFrame as indicated by |refDebugFrame|.
+ [[nodiscard]] bool createStackMap(const char* who,
+ const ExitStubMapVector& extras,
+ uint32_t assemblerOffset,
+ HasDebugFrame debugFrame,
+ const StkVector& stk) {
+ size_t countedPointers = machineStackTracker.numPtrs() + memRefsOnStk;
+#ifndef DEBUG
+ // An important optimization. If there are obviously no pointers, as
+ // we expect in the majority of cases, exit quickly.
+ if (countedPointers == 0 && debugFrame == HasDebugFrame::No) {
+ // We can skip creating the map if there are no |true| elements in
+ // |extras|.
+ bool extrasHasRef = false;
+ for (bool b : extras) {
+ if (b) {
+ extrasHasRef = true;
+ break;
+ }
+ }
+ if (!extrasHasRef) {
+ return true;
+ }
+ }
+#else
+ // In the debug case, create the stack map regardless, and cross-check
+ // the pointer-counting below. We expect the final map to have
+ // |countedPointers| in total. This doesn't include those in the
+ // DebugFrame, but they do not appear in the map's bitmap. Note that
+ // |countedPointers| is debug-only from this point onwards.
+ for (bool b : extras) {
+ countedPointers += (b ? 1 : 0);
+ }
+#endif
+
+ // Start with the frame-setup map, and add operand-stack information to
+ // that. augmentedMst holds live data only within individual calls to
+ // createStackMap.
+ augmentedMst.clear();
+ if (!machineStackTracker.cloneTo(&augmentedMst)) {
+ return false;
+ }
+
+ // At this point, augmentedMst only contains entries covering the
+ // incoming argument area (if any) and for the area allocated by this
+ // function's prologue. We now need to calculate how far the machine's
+ // stack pointer is below where it was at the start of the body. But we
+ // must take care not to include any words pushed as arguments to an
+ // upcoming function call, since those words "belong" to the stackmap of
+ // the callee, not to the stackmap of this function. Note however that
+ // any alignment padding pushed prior to pushing the args *does* belong to
+ // this function.
+ //
+ // That padding is taken into account at the point where
+ // framePushedExcludingOutboundCallArgs is set, viz, in startCallArgs(),
+ // and comprises two components:
+ //
+ // * call->frameAlignAdjustment
+ // * the padding applied to the stack arg area itself. That is:
+ // StackArgAreaSize(argTys) - StackArgAreaSizeUnpadded(argTys)
+ Maybe<uint32_t> framePushedExcludingArgs;
+ if (framePushedAtEntryToBody.isNothing()) {
+ // Still in the prologue. framePushedExcludingArgs remains Nothing.
+ MOZ_ASSERT(framePushedExcludingOutboundCallArgs.isNothing());
+ } else {
+ // In the body.
+ MOZ_ASSERT(masm_.framePushed() >= framePushedAtEntryToBody.value());
+ if (framePushedExcludingOutboundCallArgs.isSome()) {
+ // In the body, and we've potentially pushed some args onto the stack.
+ // We must ignore them when sizing the stackmap.
+ MOZ_ASSERT(masm_.framePushed() >=
+ framePushedExcludingOutboundCallArgs.value());
+ MOZ_ASSERT(framePushedExcludingOutboundCallArgs.value() >=
+ framePushedAtEntryToBody.value());
+ framePushedExcludingArgs =
+ Some(framePushedExcludingOutboundCallArgs.value());
+ } else {
+ // In the body, but not with call args on the stack. The stackmap
+ // must be sized so as to extend all the way "down" to
+ // masm_.framePushed().
+ framePushedExcludingArgs = Some(masm_.framePushed());
+ }
+ }
+
+ if (framePushedExcludingArgs.isSome()) {
+ uint32_t bodyPushedBytes =
+ framePushedExcludingArgs.value() - framePushedAtEntryToBody.value();
+ MOZ_ASSERT(0 == bodyPushedBytes % sizeof(void*));
+ if (!augmentedMst.pushNonGCPointers(bodyPushedBytes / sizeof(void*))) {
+ return false;
+ }
+ }
+
+ // Scan the operand stack, marking pointers in the just-added new
+ // section.
+ MOZ_ASSERT_IF(framePushedAtEntryToBody.isNothing(), stk.empty());
+ MOZ_ASSERT_IF(framePushedExcludingArgs.isNothing(), stk.empty());
+
+ for (const Stk& v : stk) {
+#ifndef DEBUG
+ // We don't track roots in registers, per rationale below, so if this
+ // doesn't hold, something is seriously wrong, and we're likely to get a
+ // GC-related crash.
+ MOZ_RELEASE_ASSERT(v.kind() != Stk::RegisterRef);
+ if (v.kind() != Stk::MemRef) {
+ continue;
+ }
+#else
+ // Take the opportunity to check everything we reasonably can about
+ // operand stack elements.
+ switch (v.kind()) {
+ case Stk::MemI32:
+ case Stk::MemI64:
+ case Stk::MemF32:
+ case Stk::MemF64:
+ case Stk::ConstI32:
+ case Stk::ConstI64:
+ case Stk::ConstF32:
+ case Stk::ConstF64:
+# ifdef ENABLE_WASM_SIMD
+ case Stk::MemV128:
+ case Stk::ConstV128:
+# endif
+ // All of these have uninteresting type.
+ continue;
+ case Stk::LocalI32:
+ case Stk::LocalI64:
+ case Stk::LocalF32:
+ case Stk::LocalF64:
+# ifdef ENABLE_WASM_SIMD
+ case Stk::LocalV128:
+# endif
+ // These also have uninteresting type. Check that they live in the
+ // section of stack set up by beginFunction(). The unguarded use of
+ // |value()| here is safe due to the assertion above this loop.
+ MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value());
+ continue;
+ case Stk::RegisterI32:
+ case Stk::RegisterI64:
+ case Stk::RegisterF32:
+ case Stk::RegisterF64:
+# ifdef ENABLE_WASM_SIMD
+ case Stk::RegisterV128:
+# endif
+ // These also have uninteresting type, but more to the point: all
+ // registers holding live values should have been flushed to the
+ // machine stack immediately prior to the instruction to which this
+ // stackmap pertains. So these can't happen.
+ MOZ_CRASH("createStackMap: operand stack has Register-non-Ref");
+ case Stk::MemRef:
+ // This is the only case we care about. We'll handle it after the
+ // switch.
+ break;
+ case Stk::LocalRef:
+ // We need the stackmap to mention this pointer, but it should
+ // already be in the machineStackTracker section created by
+ // beginFunction().
+ MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value());
+ continue;
+ case Stk::ConstRef:
+ // This can currently only be a null pointer.
+ MOZ_ASSERT(v.refval() == 0);
+ continue;
+ case Stk::RegisterRef:
+ // This can't happen, per rationale above.
+ MOZ_CRASH("createStackMap: operand stack contains RegisterRef");
+ default:
+ MOZ_CRASH("createStackMap: unknown operand stack element");
+ }
+#endif
+ // v.offs() holds masm.framePushed() at the point immediately after it
+ // was pushed on the stack. Since it's still on the stack,
+ // masm.framePushed() can't be less.
+ MOZ_ASSERT(v.offs() <= framePushedExcludingArgs.value());
+ uint32_t offsFromMapLowest = framePushedExcludingArgs.value() - v.offs();
+ MOZ_ASSERT(0 == offsFromMapLowest % sizeof(void*));
+ augmentedMst.setGCPointer(offsFromMapLowest / sizeof(void*));
+ }
+
+ // Create the final StackMap. The initial map is zeroed out, so there's
+ // no need to write zero bits in it.
+ const uint32_t extraWords = extras.length();
+ const uint32_t augmentedMstWords = augmentedMst.length();
+ const uint32_t numMappedWords = extraWords + augmentedMstWords;
+ StackMap* stackMap = StackMap::create(numMappedWords);
+ if (!stackMap) {
+ return false;
+ }
+
+ {
+ // First the exit stub extra words, if any.
+ uint32_t i = 0;
+ for (bool b : extras) {
+ if (b) {
+ stackMap->setBit(i);
+ }
+ i++;
+ }
+ }
+ // Followed by the "main" part of the map.
+ for (uint32_t i = 0; i < augmentedMstWords; i++) {
+ if (augmentedMst.isGCPointer(i)) {
+ stackMap->setBit(extraWords + i);
+ }
+ }
+
+ stackMap->setExitStubWords(extraWords);
+
+ // Record in the map, how far down from the highest address the Frame* is.
+ // Take the opportunity to check that we haven't marked any part of the
+ // Frame itself as a pointer.
+ stackMap->setFrameOffsetFromTop(numStackArgWords +
+ sizeof(Frame) / sizeof(void*));
+#ifdef DEBUG
+ for (uint32_t i = 0; i < sizeof(Frame) / sizeof(void*); i++) {
+ MOZ_ASSERT(stackMap->getBit(stackMap->numMappedWords -
+ stackMap->frameOffsetFromTop + i) == 0);
+ }
+#endif
+
+ // Note the presence of a ref-typed DebugFrame, if any.
+ if (debugFrame == HasDebugFrame::Yes) {
+ stackMap->setHasDebugFrame();
+ }
+
+ // Add the completed map to the running collection thereof.
+ if (!stackMaps_->add((uint8_t*)(uintptr_t)assemblerOffset, stackMap)) {
+ stackMap->destroy();
+ return false;
+ }
+
+#ifdef DEBUG
+ {
+ // Crosscheck the map pointer counting.
+ uint32_t nw = stackMap->numMappedWords;
+ uint32_t np = 0;
+ for (uint32_t i = 0; i < nw; i++) {
+ np += stackMap->getBit(i);
+ }
+ MOZ_ASSERT(size_t(np) == countedPointers);
+ }
+#endif
+
+ return true;
+ }
+};
+
+// The baseline compiler proper.
+
+class BaseCompiler final : public BaseCompilerInterface {
+ using Local = BaseStackFrame::Local;
+ using LabelVector = Vector<NonAssertingLabel, 8, SystemAllocPolicy>;
+
+ // Bit set used for simple bounds check elimination. Capping this at 64
+ // locals makes sense; even 32 locals would probably be OK in practice.
+ //
+ // For more information about BCE, see the block comment above
+ // popMemoryAccess(), below.
+
+ using BCESet = uint64_t;
+
+ // Control node, representing labels and stack heights at join points.
+
+ struct Control {
+ NonAssertingLabel label; // The "exit" label
+ NonAssertingLabel otherLabel; // Used for the "else" branch of if-then-else
+ StackHeight stackHeight; // From BaseStackFrame
+ uint32_t stackSize; // Value stack height
+ BCESet bceSafeOnEntry; // Bounds check info flowing into the item
+ BCESet bceSafeOnExit; // Bounds check info flowing out of the item
+ bool deadOnArrival; // deadCode_ was set on entry to the region
+ bool deadThenBranch; // deadCode_ was set on exit from "then"
+
+ Control()
+ : stackHeight(StackHeight::Invalid()),
+ stackSize(UINT32_MAX),
+ bceSafeOnEntry(0),
+ bceSafeOnExit(~BCESet(0)),
+ deadOnArrival(false),
+ deadThenBranch(false) {}
+ };
+
+ class NothingVector {
+ Nothing unused_;
+
+ public:
+ bool resize(size_t length) { return true; }
+ Nothing& operator[](size_t) { return unused_; }
+ Nothing& back() { return unused_; }
+ };
+
+ struct BaseCompilePolicy {
+ // The baseline compiler tracks values on a stack of its own -- it
+ // needs to scan that stack for spilling -- and thus has no need
+ // for the values maintained by the iterator.
+ using Value = Nothing;
+ using ValueVector = NothingVector;
+
+ // The baseline compiler uses the iterator's control stack, attaching
+ // its own control information.
+ using ControlItem = Control;
+ };
+
+ using BaseOpIter = OpIter<BaseCompilePolicy>;
+
+ // The baseline compiler will use OOL code more sparingly than
+ // Baldr since our code is not high performance and frills like
+ // code density and branch prediction friendliness will be less
+ // important.
+
+ class OutOfLineCode : public TempObject {
+ private:
+ NonAssertingLabel entry_;
+ NonAssertingLabel rejoin_;
+ StackHeight stackHeight_;
+
+ public:
+ OutOfLineCode() : stackHeight_(StackHeight::Invalid()) {}
+
+ Label* entry() { return &entry_; }
+ Label* rejoin() { return &rejoin_; }
+
+ void setStackHeight(StackHeight stackHeight) {
+ MOZ_ASSERT(!stackHeight_.isValid());
+ stackHeight_ = stackHeight;
+ }
+
+ void bind(BaseStackFrame* fr, MacroAssembler* masm) {
+ MOZ_ASSERT(stackHeight_.isValid());
+ masm->bind(&entry_);
+ fr->setStackHeight(stackHeight_);
+ }
+
+ // The generate() method must be careful about register use
+ // because it will be invoked when there is a register
+ // assignment in the BaseCompiler that does not correspond
+ // to the available registers when the generated OOL code is
+ // executed. The register allocator *must not* be called.
+ //
+ // The best strategy is for the creator of the OOL object to
+ // allocate all temps that the OOL code will need.
+ //
+ // Input, output, and temp registers are embedded in the OOL
+ // object and are known to the code generator.
+ //
+ // Scratch registers are available to use in OOL code.
+ //
+ // All other registers must be explicitly saved and restored
+ // by the OOL code before being used.
+
+ virtual void generate(MacroAssembler* masm) = 0;
+ };
+
+ enum class LatentOp { None, Compare, Eqz };
+
+ struct AccessCheck {
+ AccessCheck()
+ : omitBoundsCheck(false),
+ omitAlignmentCheck(false),
+ onlyPointerAlignment(false) {}
+
+ // If `omitAlignmentCheck` is true then we need check neither the
+ // pointer nor the offset. Otherwise, if `onlyPointerAlignment` is true
+ // then we need check only the pointer. Otherwise, check the sum of
+ // pointer and offset.
+
+ bool omitBoundsCheck;
+ bool omitAlignmentCheck;
+ bool onlyPointerAlignment;
+ };
+
+ const ModuleEnvironment& moduleEnv_;
+ const CompilerEnvironment& compilerEnv_;
+ BaseOpIter iter_;
+ const FuncCompileInput& func_;
+ size_t lastReadCallSite_;
+ TempAllocator::Fallible alloc_;
+ const ValTypeVector& locals_; // Types of parameters and locals
+ bool deadCode_; // Flag indicating we should decode & discard the opcode
+ BCESet
+ bceSafe_; // Locals that have been bounds checked and not updated since
+ ValTypeVector SigD_;
+ ValTypeVector SigF_;
+ NonAssertingLabel returnLabel_;
+
+ LatentOp latentOp_; // Latent operation for branch (seen next)
+ ValType latentType_; // Operand type, if latentOp_ is true
+ Assembler::Condition
+ latentIntCmp_; // Comparison operator, if latentOp_ == Compare, int types
+ Assembler::DoubleCondition
+ latentDoubleCmp_; // Comparison operator, if latentOp_ == Compare, float
+ // types
+
+ FuncOffsets offsets_;
+ MacroAssembler& masm; // No '_' suffix - too tedious...
+ BaseRegAlloc ra; // Ditto
+ BaseStackFrame fr;
+
+ StackMapGenerator stackMapGenerator_;
+
+ BaseStackFrame::LocalVector localInfo_;
+ Vector<OutOfLineCode*, 8, SystemAllocPolicy> outOfLine_;
+
+ // On specific platforms we sometimes need to use specific registers.
+
+ SpecificRegs specific_;
+
+ // There are more members scattered throughout.
+
+ public:
+ BaseCompiler(const ModuleEnvironment& moduleEnv,
+ const CompilerEnvironment& compilerEnv,
+ const FuncCompileInput& input, const ValTypeVector& locals,
+ const MachineState& trapExitLayout,
+ size_t trapExitLayoutNumWords, Decoder& decoder,
+ StkVector& stkSource, TempAllocator* alloc, MacroAssembler* masm,
+ StackMaps* stackMaps);
+ ~BaseCompiler();
+
+ [[nodiscard]] bool init();
+
+ FuncOffsets finish();
+
+ [[nodiscard]] bool emitFunction();
+ void emitInitStackLocals();
+
+ const FuncType& funcType() const {
+ return *moduleEnv_.funcs[func_.index].type;
+ }
+
+ const TypeIdDesc& funcTypeId() const {
+ return *moduleEnv_.funcs[func_.index].typeId;
+ }
+
+ // Used by some of the ScratchRegister implementations.
+ operator MacroAssembler&() const { return masm; }
+ operator BaseRegAlloc&() { return ra; }
+
+ bool usesSharedMemory() const { return moduleEnv_.usesSharedMemory(); }
+
+ private:
+ ////////////////////////////////////////////////////////////
+ //
+ // Out of line code management.
+
+ [[nodiscard]] OutOfLineCode* addOutOfLineCode(OutOfLineCode* ool) {
+ if (!ool || !outOfLine_.append(ool)) {
+ return nullptr;
+ }
+ ool->setStackHeight(fr.stackHeight());
+ return ool;
+ }
+
+ [[nodiscard]] bool generateOutOfLineCode() {
+ for (uint32_t i = 0; i < outOfLine_.length(); i++) {
+ OutOfLineCode* ool = outOfLine_[i];
+ ool->bind(&fr, &masm);
+ ool->generate(&masm);
+ }
+
+ return !masm.oom();
+ }
+
+ // Utility.
+
+ const Local& localFromSlot(uint32_t slot, MIRType type) {
+ MOZ_ASSERT(localInfo_[slot].type == type);
+ return localInfo_[slot];
+ }
+
+ ////////////////////////////////////////////////////////////
+ //
+ // High-level register management.
+
+ bool isAvailableI32(RegI32 r) { return ra.isAvailableI32(r); }
+ bool isAvailableI64(RegI64 r) { return ra.isAvailableI64(r); }
+ bool isAvailableRef(RegPtr r) { return ra.isAvailablePtr(r); }
+ bool isAvailableF32(RegF32 r) { return ra.isAvailableF32(r); }
+ bool isAvailableF64(RegF64 r) { return ra.isAvailableF64(r); }
+#ifdef ENABLE_WASM_SIMD
+ bool isAvailableV128(RegV128 r) { return ra.isAvailableV128(r); }
+#endif
+
+ [[nodiscard]] RegI32 needI32() { return ra.needI32(); }
+ [[nodiscard]] RegI64 needI64() { return ra.needI64(); }
+ [[nodiscard]] RegPtr needRef() { return ra.needPtr(); }
+ [[nodiscard]] RegF32 needF32() { return ra.needF32(); }
+ [[nodiscard]] RegF64 needF64() { return ra.needF64(); }
+#ifdef ENABLE_WASM_SIMD
+ [[nodiscard]] RegV128 needV128() { return ra.needV128(); }
+#endif
+
+ void needI32(RegI32 specific) { ra.needI32(specific); }
+ void needI64(RegI64 specific) { ra.needI64(specific); }
+ void needRef(RegPtr specific) { ra.needPtr(specific); }
+ void needF32(RegF32 specific) { ra.needF32(specific); }
+ void needF64(RegF64 specific) { ra.needF64(specific); }
+#ifdef ENABLE_WASM_SIMD
+ void needV128(RegV128 specific) { ra.needV128(specific); }
+#endif
+
+#if defined(JS_CODEGEN_ARM)
+ [[nodiscard]] RegI64 needI64Pair() { return ra.needI64Pair(); }
+#endif
+
+ void freeI32(RegI32 r) { ra.freeI32(r); }
+ void freeI64(RegI64 r) { ra.freeI64(r); }
+ void freeRef(RegPtr r) { ra.freePtr(r); }
+ void freeF32(RegF32 r) { ra.freeF32(r); }
+ void freeF64(RegF64 r) { ra.freeF64(r); }
+#ifdef ENABLE_WASM_SIMD
+ void freeV128(RegV128 r) { ra.freeV128(r); }
+#endif
+
+ void freeI64Except(RegI64 r, RegI32 except) {
+#ifdef JS_PUNBOX64
+ MOZ_ASSERT(r.reg == except);
+#else
+ MOZ_ASSERT(r.high == except || r.low == except);
+ freeI64(r);
+ needI32(except);
+#endif
+ }
+
+ void maybeFreeI32(RegI32 r) {
+ if (r.isValid()) {
+ freeI32(r);
+ }
+ }
+
+ void maybeFreeI64(RegI64 r) {
+ if (r.isValid()) {
+ freeI64(r);
+ }
+ }
+
+ void maybeFreeF64(RegF64 r) {
+ if (r.isValid()) {
+ freeF64(r);
+ }
+ }
+
+ void needI32NoSync(RegI32 r) {
+ MOZ_ASSERT(isAvailableI32(r));
+ needI32(r);
+ }
+
+ // TODO / OPTIMIZE: need2xI32() can be optimized along with needI32()
+ // to avoid sync(). (Bug 1316802)
+
+ void need2xI32(RegI32 r0, RegI32 r1) {
+ needI32(r0);
+ needI32(r1);
+ }
+
+ void need2xI64(RegI64 r0, RegI64 r1) {
+ needI64(r0);
+ needI64(r1);
+ }
+
+ RegI32 fromI64(RegI64 r) { return RegI32(lowPart(r)); }
+
+#ifdef JS_PUNBOX64
+ RegI64 fromI32(RegI32 r) { return RegI64(Register64(r)); }
+#endif
+
+ RegI64 widenI32(RegI32 r) {
+ MOZ_ASSERT(!isAvailableI32(r));
+#ifdef JS_PUNBOX64
+ return fromI32(r);
+#else
+ RegI32 high = needI32();
+ return RegI64(Register64(high, r));
+#endif
+ }
+
+ RegI32 narrowI64(RegI64 r) {
+#ifdef JS_PUNBOX64
+ return RegI32(r.reg);
+#else
+ freeI32(RegI32(r.high));
+ return RegI32(r.low);
+#endif
+ }
+
+ RegI32 narrowPtr(RegPtr r) { return RegI32(r); }
+
+ RegI32 lowPart(RegI64 r) {
+#ifdef JS_PUNBOX64
+ return RegI32(r.reg);
+#else
+ return RegI32(r.low);
+#endif
+ }
+
+ RegI32 maybeHighPart(RegI64 r) {
+#ifdef JS_PUNBOX64
+ return RegI32::Invalid();
+#else
+ return RegI32(r.high);
+#endif
+ }
+
+ void maybeClearHighPart(RegI64 r) {
+#if !defined(JS_PUNBOX64)
+ moveImm32(0, RegI32(r.high));
+#endif
+ }
+
+ void moveI32(RegI32 src, RegI32 dest) {
+ if (src != dest) {
+ masm.move32(src, dest);
+ }
+ }
+
+ void moveI64(RegI64 src, RegI64 dest) {
+ if (src != dest) {
+ masm.move64(src, dest);
+ }
+ }
+
+ void moveRef(RegPtr src, RegPtr dest) {
+ if (src != dest) {
+ masm.movePtr(src, dest);
+ }
+ }
+
+ void moveF64(RegF64 src, RegF64 dest) {
+ if (src != dest) {
+ masm.moveDouble(src, dest);
+ }
+ }
+
+ void moveF32(RegF32 src, RegF32 dest) {
+ if (src != dest) {
+ masm.moveFloat32(src, dest);
+ }
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void moveV128(RegV128 src, RegV128 dest) {
+ if (src != dest) {
+ masm.moveSimd128(src, dest);
+ }
+ }
+#endif
+
+ ////////////////////////////////////////////////////////////////////////////
+ //
+ // Block parameters and results.
+ //
+ // Blocks may have multiple parameters and multiple results. Blocks can also
+ // be the target of branches: the entry for loops, and the exit for
+ // non-loops.
+ //
+ // Passing multiple values to a non-branch target (i.e., the entry of a
+ // "block") falls out naturally: any items on the value stack can flow
+ // directly from one block to another.
+ //
+ // However, for branch targets, we need to allocate well-known locations for
+ // the branch values. The approach taken in the baseline compiler is to
+ // allocate registers to the top N values (currently N=1), and then stack
+ // locations for the rest.
+ //
+
+ enum class RegKind { All, OnlyGPRs };
+
+ inline void needResultRegisters(ResultType type, RegKind which) {
+ if (type.empty()) {
+ return;
+ }
+
+ for (ABIResultIter iter(type); !iter.done(); iter.next()) {
+ ABIResult result = iter.cur();
+ // Register results are visited first; when we see a stack result we're
+ // done.
+ if (!result.inRegister()) {
+ return;
+ }
+ switch (result.type().kind()) {
+ case ValType::I32:
+ needI32(RegI32(result.gpr()));
+ break;
+ case ValType::I64:
+ needI64(RegI64(result.gpr64()));
+ break;
+ case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+ if (which == RegKind::All) {
+ needV128(RegV128(result.fpr()));
+ }
+ break;
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ case ValType::F32:
+ if (which == RegKind::All) {
+ needF32(RegF32(result.fpr()));
+ }
+ break;
+ case ValType::F64:
+ if (which == RegKind::All) {
+ needF64(RegF64(result.fpr()));
+ }
+ break;
+ case ValType::Ref:
+ needRef(RegPtr(result.gpr()));
+ break;
+ }
+ }
+ }
+
+#ifdef JS_CODEGEN_X64
+ inline void maskResultRegisters(ResultType type) {
+ MOZ_ASSERT(JitOptions.spectreIndexMasking);
+
+ if (type.empty()) {
+ return;
+ }
+
+ for (ABIResultIter iter(type); !iter.done(); iter.next()) {
+ ABIResult result = iter.cur();
+ if (result.inRegister() && result.type().kind() == ValType::I32) {
+ masm.movl(result.gpr(), result.gpr());
+ }
+ }
+ }
+#endif
+
+ inline void freeResultRegisters(ResultType type, RegKind which) {
+ if (type.empty()) {
+ return;
+ }
+
+ for (ABIResultIter iter(type); !iter.done(); iter.next()) {
+ ABIResult result = iter.cur();
+ // Register results are visited first; when we see a stack result we're
+ // done.
+ if (!result.inRegister()) {
+ return;
+ }
+ switch (result.type().kind()) {
+ case ValType::I32:
+ freeI32(RegI32(result.gpr()));
+ break;
+ case ValType::I64:
+ freeI64(RegI64(result.gpr64()));
+ break;
+ case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+ if (which == RegKind::All) {
+ freeV128(RegV128(result.fpr()));
+ }
+ break;
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ case ValType::F32:
+ if (which == RegKind::All) {
+ freeF32(RegF32(result.fpr()));
+ }
+ break;
+ case ValType::F64:
+ if (which == RegKind::All) {
+ freeF64(RegF64(result.fpr()));
+ }
+ break;
+ case ValType::Ref:
+ freeRef(RegPtr(result.gpr()));
+ break;
+ }
+ }
+ }
+
+ void needIntegerResultRegisters(ResultType type) {
+ needResultRegisters(type, RegKind::OnlyGPRs);
+ }
+ void freeIntegerResultRegisters(ResultType type) {
+ freeResultRegisters(type, RegKind::OnlyGPRs);
+ }
+
+ void needResultRegisters(ResultType type) {
+ needResultRegisters(type, RegKind::All);
+ }
+ void freeResultRegisters(ResultType type) {
+ freeResultRegisters(type, RegKind::All);
+ }
+
+ void assertResultRegistersAvailable(ResultType type) {
+#ifdef DEBUG
+ for (ABIResultIter iter(type); !iter.done(); iter.next()) {
+ ABIResult result = iter.cur();
+ if (!result.inRegister()) {
+ return;
+ }
+ switch (result.type().kind()) {
+ case ValType::I32:
+ MOZ_ASSERT(isAvailableI32(RegI32(result.gpr())));
+ break;
+ case ValType::I64:
+ MOZ_ASSERT(isAvailableI64(RegI64(result.gpr64())));
+ break;
+ case ValType::V128:
+# ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(isAvailableV128(RegV128(result.fpr())));
+ break;
+# else
+ MOZ_CRASH("No SIMD support");
+# endif
+ case ValType::F32:
+ MOZ_ASSERT(isAvailableF32(RegF32(result.fpr())));
+ break;
+ case ValType::F64:
+ MOZ_ASSERT(isAvailableF64(RegF64(result.fpr())));
+ break;
+ case ValType::Ref:
+ MOZ_ASSERT(isAvailableRef(RegPtr(result.gpr())));
+ break;
+ }
+ }
+#endif
+ }
+
+ void captureResultRegisters(ResultType type) {
+ assertResultRegistersAvailable(type);
+ needResultRegisters(type);
+ }
+
+ void captureCallResultRegisters(ResultType type) {
+ captureResultRegisters(type);
+#ifdef JS_CODEGEN_X64
+ if (JitOptions.spectreIndexMasking) {
+ maskResultRegisters(type);
+ }
+#endif
+ }
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Value stack and spilling.
+ //
+ // The value stack facilitates some on-the-fly register allocation
+ // and immediate-constant use. It tracks constants, latent
+ // references to locals, register contents, and values on the CPU
+ // stack.
+ //
+ // The stack can be flushed to memory using sync(). This is handy
+ // to avoid problems with control flow and messy register usage
+ // patterns.
+
+ // This is the value stack actually used during compilation. It is a
+ // StkVector rather than a StkVector& since constantly dereferencing a
+ // StkVector& adds about 0.5% or more to the compiler's dynamic instruction
+ // count.
+ StkVector stk_;
+
+ static constexpr size_t MaxPushesPerOpcode = 10;
+
+ // BaselineCompileFunctions() "lends" us the StkVector to use in this
+ // BaseCompiler object, and that is installed in |stk_| in our constructor.
+ // This is so as to avoid having to malloc/free the vector's contents at
+ // each creation/destruction of a BaseCompiler object. It does however mean
+ // that we need to hold on to a reference to BaselineCompileFunctions()'s
+ // vector, so we can swap (give) its contents back when this BaseCompiler
+ // object is destroyed. This significantly reduces the heap turnover of the
+ // baseline compiler. See bug 1532592.
+ StkVector& stkSource_;
+
+#ifdef DEBUG
+ size_t countMemRefsOnStk() {
+ size_t nRefs = 0;
+ for (Stk& v : stk_) {
+ if (v.kind() == Stk::MemRef) {
+ nRefs++;
+ }
+ }
+ return nRefs;
+ }
+#endif
+
+ template <typename T>
+ void push(T item) {
+ // None of the single-arg Stk constructors create a Stk::MemRef, so
+ // there's no need to increment stackMapGenerator_.memRefsOnStk here.
+ stk_.infallibleEmplaceBack(Stk(item));
+ }
+
+ void pushConstRef(intptr_t v) { stk_.infallibleEmplaceBack(Stk::StkRef(v)); }
+
+ void loadConstI32(const Stk& src, RegI32 dest) {
+ moveImm32(src.i32val(), dest);
+ }
+
+ void loadMemI32(const Stk& src, RegI32 dest) {
+ fr.loadStackI32(src.offs(), dest);
+ }
+
+ void loadLocalI32(const Stk& src, RegI32 dest) {
+ fr.loadLocalI32(localFromSlot(src.slot(), MIRType::Int32), dest);
+ }
+
+ void loadRegisterI32(const Stk& src, RegI32 dest) {
+ moveI32(src.i32reg(), dest);
+ }
+
+ void loadConstI64(const Stk& src, RegI64 dest) {
+ moveImm64(src.i64val(), dest);
+ }
+
+ void loadMemI64(const Stk& src, RegI64 dest) {
+ fr.loadStackI64(src.offs(), dest);
+ }
+
+ void loadLocalI64(const Stk& src, RegI64 dest) {
+ fr.loadLocalI64(localFromSlot(src.slot(), MIRType::Int64), dest);
+ }
+
+ void loadRegisterI64(const Stk& src, RegI64 dest) {
+ moveI64(src.i64reg(), dest);
+ }
+
+ void loadConstRef(const Stk& src, RegPtr dest) {
+ moveImmRef(src.refval(), dest);
+ }
+
+ void loadMemRef(const Stk& src, RegPtr dest) {
+ fr.loadStackPtr(src.offs(), dest);
+ }
+
+ void loadLocalRef(const Stk& src, RegPtr dest) {
+ fr.loadLocalPtr(localFromSlot(src.slot(), MIRType::RefOrNull), dest);
+ }
+
+ void loadRegisterRef(const Stk& src, RegPtr dest) {
+ moveRef(src.refReg(), dest);
+ }
+
+ void loadConstF64(const Stk& src, RegF64 dest) {
+ double d;
+ src.f64val(&d);
+ masm.loadConstantDouble(d, dest);
+ }
+
+ void loadMemF64(const Stk& src, RegF64 dest) {
+ fr.loadStackF64(src.offs(), dest);
+ }
+
+ void loadLocalF64(const Stk& src, RegF64 dest) {
+ fr.loadLocalF64(localFromSlot(src.slot(), MIRType::Double), dest);
+ }
+
+ void loadRegisterF64(const Stk& src, RegF64 dest) {
+ moveF64(src.f64reg(), dest);
+ }
+
+ void loadConstF32(const Stk& src, RegF32 dest) {
+ float f;
+ src.f32val(&f);
+ masm.loadConstantFloat32(f, dest);
+ }
+
+ void loadMemF32(const Stk& src, RegF32 dest) {
+ fr.loadStackF32(src.offs(), dest);
+ }
+
+ void loadLocalF32(const Stk& src, RegF32 dest) {
+ fr.loadLocalF32(localFromSlot(src.slot(), MIRType::Float32), dest);
+ }
+
+ void loadRegisterF32(const Stk& src, RegF32 dest) {
+ moveF32(src.f32reg(), dest);
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void loadConstV128(const Stk& src, RegV128 dest) {
+ V128 f;
+ src.v128val(&f);
+ masm.loadConstantSimd128(SimdConstant::CreateX16((int8_t*)f.bytes), dest);
+ }
+
+ void loadMemV128(const Stk& src, RegV128 dest) {
+ fr.loadStackV128(src.offs(), dest);
+ }
+
+ void loadLocalV128(const Stk& src, RegV128 dest) {
+ fr.loadLocalV128(localFromSlot(src.slot(), MIRType::Simd128), dest);
+ }
+
+ void loadRegisterV128(const Stk& src, RegV128 dest) {
+ moveV128(src.v128reg(), dest);
+ }
+#endif
+
+ void loadI32(const Stk& src, RegI32 dest) {
+ switch (src.kind()) {
+ case Stk::ConstI32:
+ loadConstI32(src, dest);
+ break;
+ case Stk::MemI32:
+ loadMemI32(src, dest);
+ break;
+ case Stk::LocalI32:
+ loadLocalI32(src, dest);
+ break;
+ case Stk::RegisterI32:
+ loadRegisterI32(src, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: Expected I32 on stack");
+ }
+ }
+
+ void loadI64(const Stk& src, RegI64 dest) {
+ switch (src.kind()) {
+ case Stk::ConstI64:
+ loadConstI64(src, dest);
+ break;
+ case Stk::MemI64:
+ loadMemI64(src, dest);
+ break;
+ case Stk::LocalI64:
+ loadLocalI64(src, dest);
+ break;
+ case Stk::RegisterI64:
+ loadRegisterI64(src, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: Expected I64 on stack");
+ }
+ }
+
+#if !defined(JS_PUNBOX64)
+ void loadI64Low(const Stk& src, RegI32 dest) {
+ switch (src.kind()) {
+ case Stk::ConstI64:
+ moveImm32(int32_t(src.i64val()), dest);
+ break;
+ case Stk::MemI64:
+ fr.loadStackI64Low(src.offs(), dest);
+ break;
+ case Stk::LocalI64:
+ fr.loadLocalI64Low(localFromSlot(src.slot(), MIRType::Int64), dest);
+ break;
+ case Stk::RegisterI64:
+ moveI32(RegI32(src.i64reg().low), dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: Expected I64 on stack");
+ }
+ }
+
+ void loadI64High(const Stk& src, RegI32 dest) {
+ switch (src.kind()) {
+ case Stk::ConstI64:
+ moveImm32(int32_t(src.i64val() >> 32), dest);
+ break;
+ case Stk::MemI64:
+ fr.loadStackI64High(src.offs(), dest);
+ break;
+ case Stk::LocalI64:
+ fr.loadLocalI64High(localFromSlot(src.slot(), MIRType::Int64), dest);
+ break;
+ case Stk::RegisterI64:
+ moveI32(RegI32(src.i64reg().high), dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: Expected I64 on stack");
+ }
+ }
+#endif
+
+ void loadF64(const Stk& src, RegF64 dest) {
+ switch (src.kind()) {
+ case Stk::ConstF64:
+ loadConstF64(src, dest);
+ break;
+ case Stk::MemF64:
+ loadMemF64(src, dest);
+ break;
+ case Stk::LocalF64:
+ loadLocalF64(src, dest);
+ break;
+ case Stk::RegisterF64:
+ loadRegisterF64(src, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected F64 on stack");
+ }
+ }
+
+ void loadF32(const Stk& src, RegF32 dest) {
+ switch (src.kind()) {
+ case Stk::ConstF32:
+ loadConstF32(src, dest);
+ break;
+ case Stk::MemF32:
+ loadMemF32(src, dest);
+ break;
+ case Stk::LocalF32:
+ loadLocalF32(src, dest);
+ break;
+ case Stk::RegisterF32:
+ loadRegisterF32(src, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected F32 on stack");
+ }
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void loadV128(const Stk& src, RegV128 dest) {
+ switch (src.kind()) {
+ case Stk::ConstV128:
+ loadConstV128(src, dest);
+ break;
+ case Stk::MemV128:
+ loadMemV128(src, dest);
+ break;
+ case Stk::LocalV128:
+ loadLocalV128(src, dest);
+ break;
+ case Stk::RegisterV128:
+ loadRegisterV128(src, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected V128 on stack");
+ }
+ }
+#endif
+
+ void loadRef(const Stk& src, RegPtr dest) {
+ switch (src.kind()) {
+ case Stk::ConstRef:
+ loadConstRef(src, dest);
+ break;
+ case Stk::MemRef:
+ loadMemRef(src, dest);
+ break;
+ case Stk::LocalRef:
+ loadLocalRef(src, dest);
+ break;
+ case Stk::RegisterRef:
+ loadRegisterRef(src, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected ref on stack");
+ }
+ }
+
+ // Flush all local and register value stack elements to memory.
+ //
+ // TODO / OPTIMIZE: As this is fairly expensive and causes worse
+ // code to be emitted subsequently, it is useful to avoid calling
+ // it. (Bug 1316802)
+ //
+ // Some optimization has been done already. Remaining
+ // opportunities:
+ //
+ // - It would be interesting to see if we can specialize it
+ // before calls with particularly simple signatures, or where
+ // we can do parallel assignment of register arguments, or
+ // similar. See notes in emitCall().
+ //
+ // - Operations that need specific registers: multiply, quotient,
+ // remainder, will tend to sync because the registers we need
+ // will tend to be allocated. We may be able to avoid that by
+ // prioritizing registers differently (takeLast instead of
+ // takeFirst) but we may also be able to allocate an unused
+ // register on demand to free up one we need, thus avoiding the
+ // sync. That type of fix would go into needI32().
+
+ void sync() final {
+ size_t start = 0;
+ size_t lim = stk_.length();
+
+ for (size_t i = lim; i > 0; i--) {
+ // Memory opcodes are first in the enum, single check against MemLast is
+ // fine.
+ if (stk_[i - 1].kind() <= Stk::MemLast) {
+ start = i;
+ break;
+ }
+ }
+
+ for (size_t i = start; i < lim; i++) {
+ Stk& v = stk_[i];
+ switch (v.kind()) {
+ case Stk::LocalI32: {
+ ScratchI32 scratch(*this);
+ loadLocalI32(v, scratch);
+ uint32_t offs = fr.pushPtr(scratch);
+ v.setOffs(Stk::MemI32, offs);
+ break;
+ }
+ case Stk::RegisterI32: {
+ uint32_t offs = fr.pushPtr(v.i32reg());
+ freeI32(v.i32reg());
+ v.setOffs(Stk::MemI32, offs);
+ break;
+ }
+ case Stk::LocalI64: {
+ ScratchI32 scratch(*this);
+#ifdef JS_PUNBOX64
+ loadI64(v, fromI32(scratch));
+ uint32_t offs = fr.pushPtr(scratch);
+#else
+ fr.loadLocalI64High(localFromSlot(v.slot(), MIRType::Int64), scratch);
+ fr.pushPtr(scratch);
+ fr.loadLocalI64Low(localFromSlot(v.slot(), MIRType::Int64), scratch);
+ uint32_t offs = fr.pushPtr(scratch);
+#endif
+ v.setOffs(Stk::MemI64, offs);
+ break;
+ }
+ case Stk::RegisterI64: {
+#ifdef JS_PUNBOX64
+ uint32_t offs = fr.pushPtr(v.i64reg().reg);
+ freeI64(v.i64reg());
+#else
+ fr.pushPtr(v.i64reg().high);
+ uint32_t offs = fr.pushPtr(v.i64reg().low);
+ freeI64(v.i64reg());
+#endif
+ v.setOffs(Stk::MemI64, offs);
+ break;
+ }
+ case Stk::LocalF64: {
+ ScratchF64 scratch(*this);
+ loadF64(v, scratch);
+ uint32_t offs = fr.pushDouble(scratch);
+ v.setOffs(Stk::MemF64, offs);
+ break;
+ }
+ case Stk::RegisterF64: {
+ uint32_t offs = fr.pushDouble(v.f64reg());
+ freeF64(v.f64reg());
+ v.setOffs(Stk::MemF64, offs);
+ break;
+ }
+ case Stk::LocalF32: {
+ ScratchF32 scratch(*this);
+ loadF32(v, scratch);
+ uint32_t offs = fr.pushFloat32(scratch);
+ v.setOffs(Stk::MemF32, offs);
+ break;
+ }
+ case Stk::RegisterF32: {
+ uint32_t offs = fr.pushFloat32(v.f32reg());
+ freeF32(v.f32reg());
+ v.setOffs(Stk::MemF32, offs);
+ break;
+ }
+#ifdef ENABLE_WASM_SIMD
+ case Stk::LocalV128: {
+ ScratchV128 scratch(*this);
+ loadV128(v, scratch);
+ uint32_t offs = fr.pushV128(scratch);
+ v.setOffs(Stk::MemV128, offs);
+ break;
+ }
+ case Stk::RegisterV128: {
+ uint32_t offs = fr.pushV128(v.v128reg());
+ freeV128(v.v128reg());
+ v.setOffs(Stk::MemV128, offs);
+ break;
+ }
+#endif
+ case Stk::LocalRef: {
+ ScratchPtr scratch(*this);
+ loadLocalRef(v, scratch);
+ uint32_t offs = fr.pushPtr(scratch);
+ v.setOffs(Stk::MemRef, offs);
+ stackMapGenerator_.memRefsOnStk++;
+ break;
+ }
+ case Stk::RegisterRef: {
+ uint32_t offs = fr.pushPtr(v.refReg());
+ freeRef(v.refReg());
+ v.setOffs(Stk::MemRef, offs);
+ stackMapGenerator_.memRefsOnStk++;
+ break;
+ }
+ default: {
+ break;
+ }
+ }
+ }
+ }
+
+ void saveTempPtr(RegPtr r) final {
+ MOZ_ASSERT(!ra.isAvailablePtr(r));
+ fr.pushPtr(r);
+ ra.freePtr(r);
+ MOZ_ASSERT(ra.isAvailablePtr(r));
+ }
+
+ void restoreTempPtr(RegPtr r) final {
+ MOZ_ASSERT(ra.isAvailablePtr(r));
+ ra.needPtr(r);
+ fr.popPtr(r);
+ MOZ_ASSERT(!ra.isAvailablePtr(r));
+ }
+
+ // Various methods for creating a stack map. Stack maps are indexed by the
+ // lowest address of the instruction immediately *after* the instruction of
+ // interest. In practice that means either: the return point of a call, the
+ // instruction immediately after a trap instruction (the "resume"
+ // instruction), or the instruction immediately following a no-op (when
+ // debugging is enabled).
+
+ // Create a vanilla stack map.
+ [[nodiscard]] bool createStackMap(const char* who) {
+ const ExitStubMapVector noExtras;
+ return createStackMap(who, noExtras, masm.currentOffset());
+ }
+
+ // Create a stack map as vanilla, but for a custom assembler offset.
+ [[nodiscard]] bool createStackMap(const char* who,
+ CodeOffset assemblerOffset) {
+ const ExitStubMapVector noExtras;
+ return createStackMap(who, noExtras, assemblerOffset.offset());
+ }
+
+ // The most general stack map construction.
+ [[nodiscard]] bool createStackMap(const char* who,
+ const ExitStubMapVector& extras,
+ uint32_t assemblerOffset) {
+ auto debugFrame =
+ compilerEnv_.debugEnabled() ? HasDebugFrame::Yes : HasDebugFrame::No;
+ return stackMapGenerator_.createStackMap(who, extras, assemblerOffset,
+ debugFrame, stk_);
+ }
+
+ // This is an optimization used to avoid calling sync() for
+ // setLocal(): if the local does not exist unresolved on the stack
+ // then we can skip the sync.
+
+ bool hasLocal(uint32_t slot) {
+ for (size_t i = stk_.length(); i > 0; i--) {
+ // Memory opcodes are first in the enum, single check against MemLast is
+ // fine.
+ Stk::Kind kind = stk_[i - 1].kind();
+ if (kind <= Stk::MemLast) {
+ return false;
+ }
+
+ // Local opcodes follow memory opcodes in the enum, single check against
+ // LocalLast is sufficient.
+ if (kind <= Stk::LocalLast && stk_[i - 1].slot() == slot) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ void syncLocal(uint32_t slot) {
+ if (hasLocal(slot)) {
+ sync(); // TODO / OPTIMIZE: Improve this? (Bug 1316817)
+ }
+ }
+
+ // Push the register r onto the stack.
+
+ void pushI32(RegI32 r) {
+ MOZ_ASSERT(!isAvailableI32(r));
+ push(Stk(r));
+ }
+
+ void pushI64(RegI64 r) {
+ MOZ_ASSERT(!isAvailableI64(r));
+ push(Stk(r));
+ }
+
+ void pushRef(RegPtr r) {
+ MOZ_ASSERT(!isAvailableRef(r));
+ push(Stk(r));
+ }
+
+ void pushF64(RegF64 r) {
+ MOZ_ASSERT(!isAvailableF64(r));
+ push(Stk(r));
+ }
+
+ void pushF32(RegF32 r) {
+ MOZ_ASSERT(!isAvailableF32(r));
+ push(Stk(r));
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void pushV128(RegV128 r) {
+ MOZ_ASSERT(!isAvailableV128(r));
+ push(Stk(r));
+ }
+#endif
+
+ // Push the value onto the stack.
+
+ void pushI32(int32_t v) { push(Stk(v)); }
+
+ void pushI64(int64_t v) { push(Stk(v)); }
+
+ void pushRef(intptr_t v) { pushConstRef(v); }
+
+ void pushF64(double v) { push(Stk(v)); }
+
+ void pushF32(float v) { push(Stk(v)); }
+
+#ifdef ENABLE_WASM_SIMD
+ void pushV128(V128 v) { push(Stk(v)); }
+#endif
+
+ // Push the local slot onto the stack. The slot will not be read
+ // here; it will be read when it is consumed, or when a side
+ // effect to the slot forces its value to be saved.
+
+ void pushLocalI32(uint32_t slot) {
+ stk_.infallibleEmplaceBack(Stk(Stk::LocalI32, slot));
+ }
+
+ void pushLocalI64(uint32_t slot) {
+ stk_.infallibleEmplaceBack(Stk(Stk::LocalI64, slot));
+ }
+
+ void pushLocalRef(uint32_t slot) {
+ stk_.infallibleEmplaceBack(Stk(Stk::LocalRef, slot));
+ }
+
+ void pushLocalF64(uint32_t slot) {
+ stk_.infallibleEmplaceBack(Stk(Stk::LocalF64, slot));
+ }
+
+ void pushLocalF32(uint32_t slot) {
+ stk_.infallibleEmplaceBack(Stk(Stk::LocalF32, slot));
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void pushLocalV128(uint32_t slot) {
+ stk_.infallibleEmplaceBack(Stk(Stk::LocalV128, slot));
+ }
+#endif
+
+ // Call only from other popI32() variants.
+ // v must be the stack top. May pop the CPU stack.
+
+ void popI32(const Stk& v, RegI32 dest) {
+ MOZ_ASSERT(&v == &stk_.back());
+ switch (v.kind()) {
+ case Stk::ConstI32:
+ loadConstI32(v, dest);
+ break;
+ case Stk::LocalI32:
+ loadLocalI32(v, dest);
+ break;
+ case Stk::MemI32:
+ fr.popPtr(dest);
+ break;
+ case Stk::RegisterI32:
+ loadRegisterI32(v, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected int on stack");
+ }
+ }
+
+ [[nodiscard]] RegI32 popI32() {
+ Stk& v = stk_.back();
+ RegI32 r;
+ if (v.kind() == Stk::RegisterI32) {
+ r = v.i32reg();
+ } else {
+ popI32(v, (r = needI32()));
+ }
+ stk_.popBack();
+ return r;
+ }
+
+ RegI32 popI32(RegI32 specific) {
+ Stk& v = stk_.back();
+
+ if (!(v.kind() == Stk::RegisterI32 && v.i32reg() == specific)) {
+ needI32(specific);
+ popI32(v, specific);
+ if (v.kind() == Stk::RegisterI32) {
+ freeI32(v.i32reg());
+ }
+ }
+
+ stk_.popBack();
+ return specific;
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ // Call only from other popV128() variants.
+ // v must be the stack top. May pop the CPU stack.
+
+ void popV128(const Stk& v, RegV128 dest) {
+ MOZ_ASSERT(&v == &stk_.back());
+ switch (v.kind()) {
+ case Stk::ConstV128:
+ loadConstV128(v, dest);
+ break;
+ case Stk::LocalV128:
+ loadLocalV128(v, dest);
+ break;
+ case Stk::MemV128:
+ fr.popV128(dest);
+ break;
+ case Stk::RegisterV128:
+ loadRegisterV128(v, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected int on stack");
+ }
+ }
+
+ [[nodiscard]] RegV128 popV128() {
+ Stk& v = stk_.back();
+ RegV128 r;
+ if (v.kind() == Stk::RegisterV128) {
+ r = v.v128reg();
+ } else {
+ popV128(v, (r = needV128()));
+ }
+ stk_.popBack();
+ return r;
+ }
+
+ RegV128 popV128(RegV128 specific) {
+ Stk& v = stk_.back();
+
+ if (!(v.kind() == Stk::RegisterV128 && v.v128reg() == specific)) {
+ needV128(specific);
+ popV128(v, specific);
+ if (v.kind() == Stk::RegisterV128) {
+ freeV128(v.v128reg());
+ }
+ }
+
+ stk_.popBack();
+ return specific;
+ }
+#endif
+
+ // Call only from other popI64() variants.
+ // v must be the stack top. May pop the CPU stack.
+
+ void popI64(const Stk& v, RegI64 dest) {
+ MOZ_ASSERT(&v == &stk_.back());
+ switch (v.kind()) {
+ case Stk::ConstI64:
+ loadConstI64(v, dest);
+ break;
+ case Stk::LocalI64:
+ loadLocalI64(v, dest);
+ break;
+ case Stk::MemI64:
+#ifdef JS_PUNBOX64
+ fr.popPtr(dest.reg);
+#else
+ fr.popPtr(dest.low);
+ fr.popPtr(dest.high);
+#endif
+ break;
+ case Stk::RegisterI64:
+ loadRegisterI64(v, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected long on stack");
+ }
+ }
+
+ [[nodiscard]] RegI64 popI64() {
+ Stk& v = stk_.back();
+ RegI64 r;
+ if (v.kind() == Stk::RegisterI64) {
+ r = v.i64reg();
+ } else {
+ popI64(v, (r = needI64()));
+ }
+ stk_.popBack();
+ return r;
+ }
+
+ // Note, the stack top can be in one half of "specific" on 32-bit
+ // systems. We can optimize, but for simplicity, if the register
+ // does not match exactly, then just force the stack top to memory
+ // and then read it back in.
+
+ RegI64 popI64(RegI64 specific) {
+ Stk& v = stk_.back();
+
+ if (!(v.kind() == Stk::RegisterI64 && v.i64reg() == specific)) {
+ needI64(specific);
+ popI64(v, specific);
+ if (v.kind() == Stk::RegisterI64) {
+ freeI64(v.i64reg());
+ }
+ }
+
+ stk_.popBack();
+ return specific;
+ }
+
+ // Call only from other popRef() variants.
+ // v must be the stack top. May pop the CPU stack.
+
+ void popRef(const Stk& v, RegPtr dest) {
+ MOZ_ASSERT(&v == &stk_.back());
+ switch (v.kind()) {
+ case Stk::ConstRef:
+ loadConstRef(v, dest);
+ break;
+ case Stk::LocalRef:
+ loadLocalRef(v, dest);
+ break;
+ case Stk::MemRef:
+ fr.popPtr(dest);
+ break;
+ case Stk::RegisterRef:
+ loadRegisterRef(v, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected ref on stack");
+ }
+ }
+
+ RegPtr popRef(RegPtr specific) {
+ Stk& v = stk_.back();
+
+ if (!(v.kind() == Stk::RegisterRef && v.refReg() == specific)) {
+ needRef(specific);
+ popRef(v, specific);
+ if (v.kind() == Stk::RegisterRef) {
+ freeRef(v.refReg());
+ }
+ }
+
+ stk_.popBack();
+ if (v.kind() == Stk::MemRef) {
+ stackMapGenerator_.memRefsOnStk--;
+ }
+ return specific;
+ }
+
+ [[nodiscard]] RegPtr popRef() {
+ Stk& v = stk_.back();
+ RegPtr r;
+ if (v.kind() == Stk::RegisterRef) {
+ r = v.refReg();
+ } else {
+ popRef(v, (r = needRef()));
+ }
+ stk_.popBack();
+ if (v.kind() == Stk::MemRef) {
+ stackMapGenerator_.memRefsOnStk--;
+ }
+ return r;
+ }
+
+ // Call only from other popF64() variants.
+ // v must be the stack top. May pop the CPU stack.
+
+ void popF64(const Stk& v, RegF64 dest) {
+ MOZ_ASSERT(&v == &stk_.back());
+ switch (v.kind()) {
+ case Stk::ConstF64:
+ loadConstF64(v, dest);
+ break;
+ case Stk::LocalF64:
+ loadLocalF64(v, dest);
+ break;
+ case Stk::MemF64:
+ fr.popDouble(dest);
+ break;
+ case Stk::RegisterF64:
+ loadRegisterF64(v, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected double on stack");
+ }
+ }
+
+ [[nodiscard]] RegF64 popF64() {
+ Stk& v = stk_.back();
+ RegF64 r;
+ if (v.kind() == Stk::RegisterF64) {
+ r = v.f64reg();
+ } else {
+ popF64(v, (r = needF64()));
+ }
+ stk_.popBack();
+ return r;
+ }
+
+ RegF64 popF64(RegF64 specific) {
+ Stk& v = stk_.back();
+
+ if (!(v.kind() == Stk::RegisterF64 && v.f64reg() == specific)) {
+ needF64(specific);
+ popF64(v, specific);
+ if (v.kind() == Stk::RegisterF64) {
+ freeF64(v.f64reg());
+ }
+ }
+
+ stk_.popBack();
+ return specific;
+ }
+
+ // Call only from other popF32() variants.
+ // v must be the stack top. May pop the CPU stack.
+
+ void popF32(const Stk& v, RegF32 dest) {
+ MOZ_ASSERT(&v == &stk_.back());
+ switch (v.kind()) {
+ case Stk::ConstF32:
+ loadConstF32(v, dest);
+ break;
+ case Stk::LocalF32:
+ loadLocalF32(v, dest);
+ break;
+ case Stk::MemF32:
+ fr.popFloat32(dest);
+ break;
+ case Stk::RegisterF32:
+ loadRegisterF32(v, dest);
+ break;
+ default:
+ MOZ_CRASH("Compiler bug: expected float on stack");
+ }
+ }
+
+ [[nodiscard]] RegF32 popF32() {
+ Stk& v = stk_.back();
+ RegF32 r;
+ if (v.kind() == Stk::RegisterF32) {
+ r = v.f32reg();
+ } else {
+ popF32(v, (r = needF32()));
+ }
+ stk_.popBack();
+ return r;
+ }
+
+ RegF32 popF32(RegF32 specific) {
+ Stk& v = stk_.back();
+
+ if (!(v.kind() == Stk::RegisterF32 && v.f32reg() == specific)) {
+ needF32(specific);
+ popF32(v, specific);
+ if (v.kind() == Stk::RegisterF32) {
+ freeF32(v.f32reg());
+ }
+ }
+
+ stk_.popBack();
+ return specific;
+ }
+
+ [[nodiscard]] bool popConstI32(int32_t* c) {
+ Stk& v = stk_.back();
+ if (v.kind() != Stk::ConstI32) {
+ return false;
+ }
+ *c = v.i32val();
+ stk_.popBack();
+ return true;
+ }
+
+ [[nodiscard]] bool popConstI64(int64_t* c) {
+ Stk& v = stk_.back();
+ if (v.kind() != Stk::ConstI64) {
+ return false;
+ }
+ *c = v.i64val();
+ stk_.popBack();
+ return true;
+ }
+
+ [[nodiscard]] bool peekConstI32(int32_t* c) {
+ Stk& v = stk_.back();
+ if (v.kind() != Stk::ConstI32) {
+ return false;
+ }
+ *c = v.i32val();
+ return true;
+ }
+
+ [[nodiscard]] bool peekConstI64(int64_t* c) {
+ Stk& v = stk_.back();
+ if (v.kind() != Stk::ConstI64) {
+ return false;
+ }
+ *c = v.i64val();
+ return true;
+ }
+
+ [[nodiscard]] bool peek2xI32(int32_t* c0, int32_t* c1) {
+ MOZ_ASSERT(stk_.length() >= 2);
+ const Stk& v0 = *(stk_.end() - 1);
+ const Stk& v1 = *(stk_.end() - 2);
+ if (v0.kind() != Stk::ConstI32 || v1.kind() != Stk::ConstI32) {
+ return false;
+ }
+ *c0 = v0.i32val();
+ *c1 = v1.i32val();
+ return true;
+ }
+
+ [[nodiscard]] bool popConstPositivePowerOfTwoI32(int32_t* c,
+ uint_fast8_t* power,
+ int32_t cutoff) {
+ Stk& v = stk_.back();
+ if (v.kind() != Stk::ConstI32) {
+ return false;
+ }
+ *c = v.i32val();
+ if (*c <= cutoff || !IsPowerOfTwo(static_cast<uint32_t>(*c))) {
+ return false;
+ }
+ *power = FloorLog2(*c);
+ stk_.popBack();
+ return true;
+ }
+
+ [[nodiscard]] bool popConstPositivePowerOfTwoI64(int64_t* c,
+ uint_fast8_t* power,
+ int64_t cutoff) {
+ Stk& v = stk_.back();
+ if (v.kind() != Stk::ConstI64) {
+ return false;
+ }
+ *c = v.i64val();
+ if (*c <= cutoff || !IsPowerOfTwo(static_cast<uint64_t>(*c))) {
+ return false;
+ }
+ *power = FloorLog2(*c);
+ stk_.popBack();
+ return true;
+ }
+
+ [[nodiscard]] bool peekLocalI32(uint32_t* local) {
+ Stk& v = stk_.back();
+ if (v.kind() != Stk::LocalI32) {
+ return false;
+ }
+ *local = v.slot();
+ return true;
+ }
+
+ // TODO / OPTIMIZE (Bug 1316818): At the moment we use the Wasm
+ // inter-procedure ABI for block returns, which allocates ReturnReg as the
+ // single block result register. It is possible other choices would lead to
+ // better register allocation, as ReturnReg is often first in the register set
+ // and will be heavily wanted by the register allocator that uses takeFirst().
+ //
+ // Obvious options:
+ // - pick a register at the back of the register set
+ // - pick a random register per block (different blocks have
+ // different join regs)
+
+ void popRegisterResults(ABIResultIter& iter) {
+ // Pop register results. Note that in the single-value case, popping to a
+ // register may cause a sync(); for multi-value we sync'd already.
+ for (; !iter.done(); iter.next()) {
+ const ABIResult& result = iter.cur();
+ if (!result.inRegister()) {
+ // TODO / OPTIMIZE: We sync here to avoid solving the general parallel
+ // move problem in popStackResults. However we could avoid syncing the
+ // values that are going to registers anyway, if they are already in
+ // registers.
+ sync();
+ break;
+ }
+ switch (result.type().kind()) {
+ case ValType::I32:
+ popI32(RegI32(result.gpr()));
+ break;
+ case ValType::I64:
+ popI64(RegI64(result.gpr64()));
+ break;
+ case ValType::F32:
+ popF32(RegF32(result.fpr()));
+ break;
+ case ValType::F64:
+ popF64(RegF64(result.fpr()));
+ break;
+ case ValType::Ref:
+ popRef(RegPtr(result.gpr()));
+ break;
+ case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+ popV128(RegV128(result.fpr()));
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ }
+ }
+ }
+
+ void popStackResults(ABIResultIter& iter, StackHeight stackBase) {
+ MOZ_ASSERT(!iter.done());
+
+ // The iterator should be advanced beyond register results, and register
+ // results should be popped already from the value stack.
+ uint32_t alreadyPopped = iter.index();
+
+ // At this point, only stack arguments are remaining. Iterate through them
+ // to measure how much stack space they will take up.
+ for (; !iter.done(); iter.next()) {
+ MOZ_ASSERT(iter.cur().onStack());
+ }
+
+ // Calculate the space needed to store stack results, in bytes.
+ uint32_t stackResultBytes = iter.stackBytesConsumedSoFar();
+ MOZ_ASSERT(stackResultBytes);
+
+ // Compute the stack height including the stack results. Note that it's
+ // possible that this call expands the stack, for example if some of the
+ // results are supplied by constants and so are not already on the machine
+ // stack.
+ uint32_t endHeight = fr.prepareStackResultArea(stackBase, stackResultBytes);
+
+ // Find a free GPR to use when shuffling stack values. If none is
+ // available, push ReturnReg and restore it after we're done.
+ bool saved = false;
+ RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved);
+
+ // The sequence of Stk values is in the same order on the machine stack as
+ // the result locations, but there is a complication: constant values are
+ // not actually pushed on the machine stack. (At this point registers and
+ // locals have been spilled already.) So, moving the Stk values into place
+ // isn't simply a shuffle-down or shuffle-up operation. There is a part of
+ // the Stk sequence that shuffles toward the FP, a part that's already in
+ // place, and a part that shuffles toward the SP. After shuffling, we have
+ // to materialize the constants.
+
+ // Shuffle mem values toward the frame pointer, copying deepest values
+ // first. Stop when we run out of results, get to a register result, or
+ // find a Stk value that is closer to the FP than the result.
+ for (iter.switchToPrev(); !iter.done(); iter.prev()) {
+ const ABIResult& result = iter.cur();
+ if (!result.onStack()) {
+ break;
+ }
+ MOZ_ASSERT(result.stackOffset() < stackResultBytes);
+ uint32_t destHeight = endHeight - result.stackOffset();
+ uint32_t stkBase = stk_.length() - (iter.count() - alreadyPopped);
+ Stk& v = stk_[stkBase + iter.index()];
+ if (v.isMem()) {
+ uint32_t srcHeight = v.offs();
+ if (srcHeight <= destHeight) {
+ break;
+ }
+ fr.shuffleStackResultsTowardFP(srcHeight, destHeight, result.size(),
+ temp);
+ }
+ }
+
+ // Reset iterator and skip register results.
+ for (iter.reset(); !iter.done(); iter.next()) {
+ if (iter.cur().onStack()) {
+ break;
+ }
+ }
+
+ // Revisit top stack values, shuffling mem values toward the stack pointer,
+ // copying shallowest values first.
+ for (; !iter.done(); iter.next()) {
+ const ABIResult& result = iter.cur();
+ MOZ_ASSERT(result.onStack());
+ MOZ_ASSERT(result.stackOffset() < stackResultBytes);
+ uint32_t destHeight = endHeight - result.stackOffset();
+ Stk& v = stk_[stk_.length() - (iter.index() - alreadyPopped) - 1];
+ if (v.isMem()) {
+ uint32_t srcHeight = v.offs();
+ if (srcHeight >= destHeight) {
+ break;
+ }
+ fr.shuffleStackResultsTowardSP(srcHeight, destHeight, result.size(),
+ temp);
+ }
+ }
+
+ // Reset iterator and skip register results, which are already popped off
+ // the value stack.
+ for (iter.reset(); !iter.done(); iter.next()) {
+ if (iter.cur().onStack()) {
+ break;
+ }
+ }
+
+ // Materialize constants and pop the remaining items from the value stack.
+ for (; !iter.done(); iter.next()) {
+ const ABIResult& result = iter.cur();
+ uint32_t resultHeight = endHeight - result.stackOffset();
+ Stk& v = stk_.back();
+ switch (v.kind()) {
+ case Stk::ConstI32:
+ fr.storeImmediatePtrToStack(uint32_t(v.i32val_), resultHeight, temp);
+ break;
+ case Stk::ConstF32:
+ fr.storeImmediateF32ToStack(v.f32val_, resultHeight, temp);
+ break;
+ case Stk::ConstI64:
+ fr.storeImmediateI64ToStack(v.i64val_, resultHeight, temp);
+ break;
+ case Stk::ConstF64:
+ fr.storeImmediateF64ToStack(v.f64val_, resultHeight, temp);
+ break;
+#ifdef ENABLE_WASM_SIMD
+ case Stk::ConstV128:
+ fr.storeImmediateV128ToStack(v.v128val_, resultHeight, temp);
+ break;
+#endif
+ case Stk::ConstRef:
+ fr.storeImmediatePtrToStack(v.refval_, resultHeight, temp);
+ break;
+ case Stk::MemRef:
+ // Update bookkeeping as we pop the Stk entry.
+ stackMapGenerator_.memRefsOnStk--;
+ break;
+ default:
+ MOZ_ASSERT(v.isMem());
+ break;
+ }
+ stk_.popBack();
+ }
+
+ ra.freeTempPtr(temp, saved);
+
+ // This will pop the stack if needed.
+ fr.finishStackResultArea(stackBase, stackResultBytes);
+ }
+
+ enum class ContinuationKind { Fallthrough, Jump };
+
+ void popBlockResults(ResultType type, StackHeight stackBase,
+ ContinuationKind kind) {
+ if (!type.empty()) {
+ ABIResultIter iter(type);
+ popRegisterResults(iter);
+ if (!iter.done()) {
+ popStackResults(iter, stackBase);
+ // Because popStackResults might clobber the stack, it leaves the stack
+ // pointer already in the right place for the continuation, whether the
+ // continuation is a jump or fallthrough.
+ return;
+ }
+ }
+ // We get here if there are no stack results. For a fallthrough, the stack
+ // is already at the right height. For a jump, we may need to pop the stack
+ // pointer if the continuation's stack height is lower than the current
+ // stack height.
+ if (kind == ContinuationKind::Jump) {
+ fr.popStackBeforeBranch(stackBase, type);
+ }
+ }
+
+ Stk captureStackResult(const ABIResult& result, StackHeight resultsBase,
+ uint32_t stackResultBytes) {
+ MOZ_ASSERT(result.onStack());
+ uint32_t offs = fr.locateStackResult(result, resultsBase, stackResultBytes);
+ return Stk::StackResult(result.type(), offs);
+ }
+
+ MOZ_MUST_USE bool pushResults(ResultType type, StackHeight resultsBase) {
+ if (type.empty()) {
+ return true;
+ }
+
+ if (type.length() > 1) {
+ if (!stk_.reserve(stk_.length() + type.length() + MaxPushesPerOpcode)) {
+ return false;
+ }
+ }
+
+ // We need to push the results in reverse order, so first iterate through
+ // all results to determine the locations of stack result types.
+ ABIResultIter iter(type);
+ while (!iter.done()) {
+ iter.next();
+ }
+ uint32_t stackResultBytes = iter.stackBytesConsumedSoFar();
+ for (iter.switchToPrev(); !iter.done(); iter.prev()) {
+ const ABIResult& result = iter.cur();
+ if (!result.onStack()) {
+ break;
+ }
+ Stk v = captureStackResult(result, resultsBase, stackResultBytes);
+ push(v);
+ if (v.kind() == Stk::MemRef) {
+ stackMapGenerator_.memRefsOnStk++;
+ }
+ }
+
+ for (; !iter.done(); iter.prev()) {
+ const ABIResult& result = iter.cur();
+ MOZ_ASSERT(result.inRegister());
+ switch (result.type().kind()) {
+ case ValType::I32:
+ pushI32(RegI32(result.gpr()));
+ break;
+ case ValType::I64:
+ pushI64(RegI64(result.gpr64()));
+ break;
+ case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+ pushV128(RegV128(result.fpr()));
+ break;
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ case ValType::F32:
+ pushF32(RegF32(result.fpr()));
+ break;
+ case ValType::F64:
+ pushF64(RegF64(result.fpr()));
+ break;
+ case ValType::Ref:
+ pushRef(RegPtr(result.gpr()));
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ MOZ_MUST_USE bool pushBlockResults(ResultType type) {
+ return pushResults(type, controlItem().stackHeight);
+ }
+
+ // A combination of popBlockResults + pushBlockResults, used when entering a
+ // block with a control-flow join (loops) or split (if) to shuffle the
+ // fallthrough block parameters into the locations expected by the
+ // continuation.
+ MOZ_MUST_USE bool topBlockParams(ResultType type) {
+ // This function should only be called when entering a block with a
+ // control-flow join at the entry, where there are no live temporaries in
+ // the current block.
+ StackHeight base = controlItem().stackHeight;
+ MOZ_ASSERT(fr.stackResultsBase(stackConsumed(type.length())) == base);
+ popBlockResults(type, base, ContinuationKind::Fallthrough);
+ return pushBlockResults(type);
+ }
+
+ // A combination of popBlockResults + pushBlockResults, used before branches
+ // where we don't know the target (br_if / br_table). If and when the branch
+ // is taken, the stack results will be shuffled down into place. For br_if
+ // that has fallthrough, the parameters for the untaken branch flow through to
+ // the continuation.
+ MOZ_MUST_USE bool topBranchParams(ResultType type, StackHeight* height) {
+ if (type.empty()) {
+ *height = fr.stackHeight();
+ return true;
+ }
+ // There may be temporary values that need spilling; delay computation of
+ // the stack results base until after the popRegisterResults(), which spills
+ // if needed.
+ ABIResultIter iter(type);
+ popRegisterResults(iter);
+ StackHeight base = fr.stackResultsBase(stackConsumed(iter.remaining()));
+ if (!iter.done()) {
+ popStackResults(iter, base);
+ }
+ if (!pushResults(type, base)) {
+ return false;
+ }
+ *height = base;
+ return true;
+ }
+
+ // Conditional branches with fallthrough are preceded by a topBranchParams, so
+ // we know that there are no stack results that need to be materialized. In
+ // that case, we can just shuffle the whole block down before popping the
+ // stack.
+ void shuffleStackResultsBeforeBranch(StackHeight srcHeight,
+ StackHeight destHeight,
+ ResultType type) {
+ uint32_t stackResultBytes = 0;
+
+ if (ABIResultIter::HasStackResults(type)) {
+ MOZ_ASSERT(stk_.length() >= type.length());
+ ABIResultIter iter(type);
+ for (; !iter.done(); iter.next()) {
+#ifdef DEBUG
+ const ABIResult& result = iter.cur();
+ const Stk& v = stk_[stk_.length() - iter.index() - 1];
+ MOZ_ASSERT(v.isMem() == result.onStack());
+#endif
+ }
+
+ stackResultBytes = iter.stackBytesConsumedSoFar();
+ MOZ_ASSERT(stackResultBytes > 0);
+
+ if (srcHeight != destHeight) {
+ // Find a free GPR to use when shuffling stack values. If none
+ // is available, push ReturnReg and restore it after we're done.
+ bool saved = false;
+ RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved);
+ fr.shuffleStackResultsTowardFP(srcHeight, destHeight, stackResultBytes,
+ temp);
+ ra.freeTempPtr(temp, saved);
+ }
+ }
+
+ fr.popStackBeforeBranch(destHeight, stackResultBytes);
+ }
+
+ // Return the amount of execution stack consumed by the top numval
+ // values on the value stack.
+
+ size_t stackConsumed(size_t numval) {
+ size_t size = 0;
+ MOZ_ASSERT(numval <= stk_.length());
+ for (uint32_t i = stk_.length() - 1; numval > 0; numval--, i--) {
+ Stk& v = stk_[i];
+ switch (v.kind()) {
+ case Stk::MemRef:
+ size += BaseStackFrame::StackSizeOfPtr;
+ break;
+ case Stk::MemI32:
+ size += BaseStackFrame::StackSizeOfPtr;
+ break;
+ case Stk::MemI64:
+ size += BaseStackFrame::StackSizeOfInt64;
+ break;
+ case Stk::MemF64:
+ size += BaseStackFrame::StackSizeOfDouble;
+ break;
+ case Stk::MemF32:
+ size += BaseStackFrame::StackSizeOfFloat;
+ break;
+#ifdef ENABLE_WASM_SIMD
+ case Stk::MemV128:
+ size += BaseStackFrame::StackSizeOfV128;
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ return size;
+ }
+
+ void popValueStackTo(uint32_t stackSize) {
+ for (uint32_t i = stk_.length(); i > stackSize; i--) {
+ Stk& v = stk_[i - 1];
+ switch (v.kind()) {
+ case Stk::RegisterI32:
+ freeI32(v.i32reg());
+ break;
+ case Stk::RegisterI64:
+ freeI64(v.i64reg());
+ break;
+ case Stk::RegisterF64:
+ freeF64(v.f64reg());
+ break;
+ case Stk::RegisterF32:
+ freeF32(v.f32reg());
+ break;
+#ifdef ENABLE_WASM_SIMD
+ case Stk::RegisterV128:
+ freeV128(v.v128reg());
+ break;
+#endif
+ case Stk::RegisterRef:
+ freeRef(v.refReg());
+ break;
+ case Stk::MemRef:
+ stackMapGenerator_.memRefsOnStk--;
+ break;
+ default:
+ break;
+ }
+ }
+ stk_.shrinkTo(stackSize);
+ }
+
+ void popValueStackBy(uint32_t items) {
+ popValueStackTo(stk_.length() - items);
+ }
+
+ void dropValue() {
+ if (peek(0).isMem()) {
+ fr.popBytes(stackConsumed(1));
+ }
+ popValueStackBy(1);
+ }
+
+ // Peek at the stack, for calls.
+
+ Stk& peek(uint32_t relativeDepth) {
+ return stk_[stk_.length() - 1 - relativeDepth];
+ }
+
+#ifdef DEBUG
+ // Check that we're not leaking registers by comparing the
+ // state of the stack + available registers with the set of
+ // all available registers.
+
+ // Call this between opcodes.
+ void performRegisterLeakCheck() {
+ BaseRegAlloc::LeakCheck check(ra);
+ for (size_t i = 0; i < stk_.length(); i++) {
+ Stk& item = stk_[i];
+ switch (item.kind_) {
+ case Stk::RegisterI32:
+ check.addKnownI32(item.i32reg());
+ break;
+ case Stk::RegisterI64:
+ check.addKnownI64(item.i64reg());
+ break;
+ case Stk::RegisterF32:
+ check.addKnownF32(item.f32reg());
+ break;
+ case Stk::RegisterF64:
+ check.addKnownF64(item.f64reg());
+ break;
+# ifdef ENABLE_WASM_SIMD
+ case Stk::RegisterV128:
+ check.addKnownV128(item.v128reg());
+ break;
+# endif
+ case Stk::RegisterRef:
+ check.addKnownRef(item.refReg());
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ void assertStackInvariants() const {
+ if (deadCode_) {
+ // Nonlocal control flow can pass values in stack locations in a way that
+ // isn't accounted for by the value stack. In dead code, which occurs
+ // after unconditional non-local control flow, there is no invariant to
+ // assert.
+ return;
+ }
+ size_t size = 0;
+ for (const Stk& v : stk_) {
+ switch (v.kind()) {
+ case Stk::MemRef:
+ size += BaseStackFrame::StackSizeOfPtr;
+ break;
+ case Stk::MemI32:
+ size += BaseStackFrame::StackSizeOfPtr;
+ break;
+ case Stk::MemI64:
+ size += BaseStackFrame::StackSizeOfInt64;
+ break;
+ case Stk::MemF64:
+ size += BaseStackFrame::StackSizeOfDouble;
+ break;
+ case Stk::MemF32:
+ size += BaseStackFrame::StackSizeOfFloat;
+ break;
+# ifdef ENABLE_WASM_SIMD
+ case Stk::MemV128:
+ size += BaseStackFrame::StackSizeOfV128;
+ break;
+# endif
+ default:
+ MOZ_ASSERT(!v.isMem());
+ break;
+ }
+ }
+ MOZ_ASSERT(size == fr.dynamicHeight());
+ }
+
+#endif
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Control stack
+
+ void initControl(Control& item, ResultType params) {
+ // Make sure the constructor was run properly
+ MOZ_ASSERT(!item.stackHeight.isValid() && item.stackSize == UINT32_MAX);
+
+ uint32_t paramCount = deadCode_ ? 0 : params.length();
+ uint32_t stackParamSize = stackConsumed(paramCount);
+ item.stackHeight = fr.stackResultsBase(stackParamSize);
+ item.stackSize = stk_.length() - paramCount;
+ item.deadOnArrival = deadCode_;
+ item.bceSafeOnEntry = bceSafe_;
+ }
+
+ Control& controlItem() { return iter_.controlItem(); }
+
+ Control& controlItem(uint32_t relativeDepth) {
+ return iter_.controlItem(relativeDepth);
+ }
+
+ Control& controlOutermost() { return iter_.controlOutermost(); }
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Labels
+
+ void insertBreakablePoint(CallSiteDesc::Kind kind) {
+ fr.loadTlsPtr(WasmTlsReg);
+ masm.nopPatchableToCall(CallSiteDesc(iter_.lastOpcodeOffset(), kind));
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ //
+ // Function prologue and epilogue.
+
+ [[nodiscard]] bool beginFunction() {
+ JitSpew(JitSpew_Codegen, "# ========================================");
+ JitSpew(JitSpew_Codegen, "# Emitting wasm baseline code");
+ JitSpew(JitSpew_Codegen,
+ "# beginFunction: start of function prologue for index %d",
+ (int)func_.index);
+
+ // Make a start on the stack map for this function. Inspect the args so
+ // as to determine which of them are both in-memory and pointer-typed, and
+ // add entries to machineStackTracker as appropriate.
+
+ ArgTypeVector args(funcType());
+ size_t inboundStackArgBytes = StackArgAreaSizeUnaligned(args);
+ MOZ_ASSERT(inboundStackArgBytes % sizeof(void*) == 0);
+ stackMapGenerator_.numStackArgWords = inboundStackArgBytes / sizeof(void*);
+
+ MOZ_ASSERT(stackMapGenerator_.machineStackTracker.length() == 0);
+ if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
+ stackMapGenerator_.numStackArgWords)) {
+ return false;
+ }
+
+ // Identify GC-managed pointers passed on the stack.
+ for (WasmABIArgIter i(args); !i.done(); i++) {
+ ABIArg argLoc = *i;
+ if (argLoc.kind() == ABIArg::Stack &&
+ args[i.index()] == MIRType::RefOrNull) {
+ uint32_t offset = argLoc.offsetFromArgBase();
+ MOZ_ASSERT(offset < inboundStackArgBytes);
+ MOZ_ASSERT(offset % sizeof(void*) == 0);
+ stackMapGenerator_.machineStackTracker.setGCPointer(offset /
+ sizeof(void*));
+ }
+ }
+
+ GenerateFunctionPrologue(masm, *moduleEnv_.funcs[func_.index].typeId,
+ compilerEnv_.mode() == CompileMode::Tier1
+ ? Some(func_.index)
+ : Nothing(),
+ &offsets_);
+
+ // GenerateFunctionPrologue pushes exactly one wasm::Frame's worth of
+ // stuff, and none of the values are GC pointers. Hence:
+ if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
+ sizeof(Frame) / sizeof(void*))) {
+ return false;
+ }
+
+ // Initialize DebugFrame fields before the stack overflow trap so that
+ // we have the invariant that all observable Frames in a debugEnabled
+ // Module have valid DebugFrames.
+ if (compilerEnv_.debugEnabled()) {
+#ifdef JS_CODEGEN_ARM64
+ static_assert(DebugFrame::offsetOfFrame() % WasmStackAlignment == 0,
+ "aligned");
+#endif
+ masm.reserveStack(DebugFrame::offsetOfFrame());
+ if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
+ DebugFrame::offsetOfFrame() / sizeof(void*))) {
+ return false;
+ }
+
+ masm.store32(
+ Imm32(func_.index),
+ Address(masm.getStackPointer(), DebugFrame::offsetOfFuncIndex()));
+ masm.store32(Imm32(0), Address(masm.getStackPointer(),
+ DebugFrame::offsetOfFlags()));
+
+ // No need to initialize cachedReturnJSValue_ or any ref-typed spilled
+ // register results, as they are traced if and only if a corresponding
+ // flag (hasCachedReturnJSValue or hasSpilledRefRegisterResult) is set.
+ }
+
+ // Generate a stack-overflow check and its associated stack map.
+
+ fr.checkStack(ABINonArgReg0, BytecodeOffset(func_.lineOrBytecode));
+
+ ExitStubMapVector extras;
+ if (!stackMapGenerator_.generateStackmapEntriesForTrapExit(args, &extras)) {
+ return false;
+ }
+ if (!createStackMap("stack check", extras, masm.currentOffset())) {
+ return false;
+ }
+
+ size_t reservedBytes = fr.fixedAllocSize() - masm.framePushed();
+ MOZ_ASSERT(0 == (reservedBytes % sizeof(void*)));
+
+ masm.reserveStack(reservedBytes);
+ fr.onFixedStackAllocated();
+ if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
+ reservedBytes / sizeof(void*))) {
+ return false;
+ }
+
+ // Locals are stack allocated. Mark ref-typed ones in the stackmap
+ // accordingly.
+ for (const Local& l : localInfo_) {
+ // Locals that are stack arguments were already added to the stack map
+ // before pushing the frame.
+ if (l.type == MIRType::RefOrNull && !l.isStackArgument()) {
+ uint32_t offs = fr.localOffsetFromSp(l);
+ MOZ_ASSERT(0 == (offs % sizeof(void*)));
+ stackMapGenerator_.machineStackTracker.setGCPointer(offs /
+ sizeof(void*));
+ }
+ }
+
+ // Copy arguments from registers to stack.
+ for (WasmABIArgIter i(args); !i.done(); i++) {
+ if (args.isSyntheticStackResultPointerArg(i.index())) {
+ // If there are stack results and the pointer to stack results
+ // was passed in a register, store it to the stack.
+ if (i->argInRegister()) {
+ fr.storeIncomingStackResultAreaPtr(RegPtr(i->gpr()));
+ }
+ // If we're in a debug frame, copy the stack result pointer arg
+ // to a well-known place.
+ if (compilerEnv_.debugEnabled()) {
+ Register target = ABINonArgReturnReg0;
+ fr.loadIncomingStackResultAreaPtr(RegPtr(target));
+ size_t debugFrameOffset =
+ masm.framePushed() - DebugFrame::offsetOfFrame();
+ size_t debugStackResultsPointerOffset =
+ debugFrameOffset + DebugFrame::offsetOfStackResultsPointer();
+ masm.storePtr(target, Address(masm.getStackPointer(),
+ debugStackResultsPointerOffset));
+ }
+ continue;
+ }
+ if (!i->argInRegister()) {
+ continue;
+ }
+ Local& l = localInfo_[args.naturalIndex(i.index())];
+ switch (i.mirType()) {
+ case MIRType::Int32:
+ fr.storeLocalI32(RegI32(i->gpr()), l);
+ break;
+ case MIRType::Int64:
+ fr.storeLocalI64(RegI64(i->gpr64()), l);
+ break;
+ case MIRType::RefOrNull: {
+ DebugOnly<uint32_t> offs = fr.localOffsetFromSp(l);
+ MOZ_ASSERT(0 == (offs % sizeof(void*)));
+ fr.storeLocalPtr(RegPtr(i->gpr()), l);
+ // We should have just visited this local in the preceding loop.
+ MOZ_ASSERT(stackMapGenerator_.machineStackTracker.isGCPointer(
+ offs / sizeof(void*)));
+ break;
+ }
+ case MIRType::Double:
+ fr.storeLocalF64(RegF64(i->fpu()), l);
+ break;
+ case MIRType::Float32:
+ fr.storeLocalF32(RegF32(i->fpu()), l);
+ break;
+#ifdef ENABLE_WASM_SIMD
+ case MIRType::Simd128:
+ fr.storeLocalV128(RegV128(i->fpu()), l);
+ break;
+#endif
+ default:
+ MOZ_CRASH("Function argument type");
+ }
+ }
+
+ fr.zeroLocals(&ra);
+ fr.storeTlsPtr(WasmTlsReg);
+
+ if (compilerEnv_.debugEnabled()) {
+ insertBreakablePoint(CallSiteDesc::EnterFrame);
+ if (!createStackMap("debug: breakable point")) {
+ return false;
+ }
+ }
+
+ JitSpew(JitSpew_Codegen,
+ "# beginFunction: enter body with masm.framePushed = %u",
+ masm.framePushed());
+ MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isNothing());
+ stackMapGenerator_.framePushedAtEntryToBody.emplace(masm.framePushed());
+
+ return true;
+ }
+
+ void popStackReturnValues(const ResultType& resultType) {
+ uint32_t bytes = ABIResultIter::MeasureStackBytes(resultType);
+ if (bytes == 0) {
+ return;
+ }
+ Register target = ABINonArgReturnReg0;
+ Register temp = ABINonArgReturnReg1;
+ fr.loadIncomingStackResultAreaPtr(RegPtr(target));
+ fr.popStackResultsToMemory(target, bytes, temp);
+ }
+
+ void saveRegisterReturnValues(const ResultType& resultType) {
+ MOZ_ASSERT(compilerEnv_.debugEnabled());
+ size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame();
+ size_t registerResultIdx = 0;
+ for (ABIResultIter i(resultType); !i.done(); i.next()) {
+ const ABIResult result = i.cur();
+ if (!result.inRegister()) {
+#ifdef DEBUG
+ for (i.next(); !i.done(); i.next()) {
+ MOZ_ASSERT(!i.cur().inRegister());
+ }
+#endif
+ break;
+ }
+
+ size_t resultOffset =
+ DebugFrame::offsetOfRegisterResult(registerResultIdx);
+ Address dest(masm.getStackPointer(), debugFrameOffset + resultOffset);
+ switch (result.type().kind()) {
+ case ValType::I32:
+ masm.store32(RegI32(result.gpr()), dest);
+ break;
+ case ValType::I64:
+ masm.store64(RegI64(result.gpr64()), dest);
+ break;
+ case ValType::F64:
+ masm.storeDouble(RegF64(result.fpr()), dest);
+ break;
+ case ValType::F32:
+ masm.storeFloat32(RegF32(result.fpr()), dest);
+ break;
+ case ValType::Ref: {
+ uint32_t flag =
+ DebugFrame::hasSpilledRegisterRefResultBitMask(registerResultIdx);
+ // Tell Instance::traceFrame that we have a pointer to trace.
+ masm.or32(Imm32(flag),
+ Address(masm.getStackPointer(),
+ debugFrameOffset + DebugFrame::offsetOfFlags()));
+ masm.storePtr(RegPtr(result.gpr()), dest);
+ break;
+ }
+ case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+ masm.storeUnalignedSimd128(RegV128(result.fpr()), dest);
+ break;
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ }
+ registerResultIdx++;
+ }
+ }
+
+ void restoreRegisterReturnValues(const ResultType& resultType) {
+ MOZ_ASSERT(compilerEnv_.debugEnabled());
+ size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame();
+ size_t registerResultIdx = 0;
+ for (ABIResultIter i(resultType); !i.done(); i.next()) {
+ const ABIResult result = i.cur();
+ if (!result.inRegister()) {
+#ifdef DEBUG
+ for (i.next(); !i.done(); i.next()) {
+ MOZ_ASSERT(!i.cur().inRegister());
+ }
+#endif
+ break;
+ }
+ size_t resultOffset =
+ DebugFrame::offsetOfRegisterResult(registerResultIdx++);
+ Address src(masm.getStackPointer(), debugFrameOffset + resultOffset);
+ switch (result.type().kind()) {
+ case ValType::I32:
+ masm.load32(src, RegI32(result.gpr()));
+ break;
+ case ValType::I64:
+ masm.load64(src, RegI64(result.gpr64()));
+ break;
+ case ValType::F64:
+ masm.loadDouble(src, RegF64(result.fpr()));
+ break;
+ case ValType::F32:
+ masm.loadFloat32(src, RegF32(result.fpr()));
+ break;
+ case ValType::Ref:
+ masm.loadPtr(src, RegPtr(result.gpr()));
+ break;
+ case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+ masm.loadUnalignedSimd128(src, RegV128(result.fpr()));
+ break;
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ }
+ }
+ }
+
+ [[nodiscard]] bool endFunction() {
+ JitSpew(JitSpew_Codegen, "# endFunction: start of function epilogue");
+
+ // Always branch to returnLabel_.
+ masm.breakpoint();
+
+ // Patch the add in the prologue so that it checks against the correct
+ // frame size. Flush the constant pool in case it needs to be patched.
+ masm.flush();
+
+ // Precondition for patching.
+ if (masm.oom()) {
+ return false;
+ }
+
+ fr.patchCheckStack();
+
+ masm.bind(&returnLabel_);
+
+ ResultType resultType(ResultType::Vector(funcType().results()));
+
+ popStackReturnValues(resultType);
+
+ if (compilerEnv_.debugEnabled()) {
+ // Store and reload the return value from DebugFrame::return so that
+ // it can be clobbered, and/or modified by the debug trap.
+ saveRegisterReturnValues(resultType);
+ insertBreakablePoint(CallSiteDesc::Breakpoint);
+ if (!createStackMap("debug: breakpoint")) {
+ return false;
+ }
+ insertBreakablePoint(CallSiteDesc::LeaveFrame);
+ if (!createStackMap("debug: leave frame")) {
+ return false;
+ }
+ restoreRegisterReturnValues(resultType);
+ }
+
+ // To satisy Tls extent invariant we need to reload WasmTlsReg because
+ // baseline can clobber it.
+ fr.loadTlsPtr(WasmTlsReg);
+ GenerateFunctionEpilogue(masm, fr.fixedAllocSize(), &offsets_);
+
+#if defined(JS_ION_PERF)
+ // FIXME - profiling code missing. No bug for this.
+
+ // Note the end of the inline code and start of the OOL code.
+ // gen->perfSpewer().noteEndInlineCode(masm);
+#endif
+
+ JitSpew(JitSpew_Codegen, "# endFunction: end of function epilogue");
+ JitSpew(JitSpew_Codegen, "# endFunction: start of OOL code");
+ if (!generateOutOfLineCode()) {
+ return false;
+ }
+
+ offsets_.end = masm.currentOffset();
+
+ if (!fr.checkStackHeight()) {
+ return false;
+ }
+
+ JitSpew(JitSpew_Codegen, "# endFunction: end of OOL code for index %d",
+ (int)func_.index);
+ return !masm.oom();
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ //
+ // Calls.
+
+ struct FunctionCall {
+ explicit FunctionCall(uint32_t lineOrBytecode)
+ : lineOrBytecode(lineOrBytecode),
+ isInterModule(false),
+ usesSystemAbi(false),
+#ifdef JS_CODEGEN_ARM
+ hardFP(true),
+#endif
+ frameAlignAdjustment(0),
+ stackArgAreaSize(0) {
+ }
+
+ uint32_t lineOrBytecode;
+ WasmABIArgGenerator abi;
+ bool isInterModule;
+ bool usesSystemAbi;
+#ifdef JS_CODEGEN_ARM
+ bool hardFP;
+#endif
+ size_t frameAlignAdjustment;
+ size_t stackArgAreaSize;
+ };
+
+ void beginCall(FunctionCall& call, UseABI useABI, InterModule interModule) {
+ MOZ_ASSERT_IF(useABI == UseABI::Builtin, interModule == InterModule::False);
+
+ call.isInterModule = interModule == InterModule::True;
+ call.usesSystemAbi = useABI == UseABI::System;
+
+ if (call.usesSystemAbi) {
+ // Call-outs need to use the appropriate system ABI.
+#if defined(JS_CODEGEN_ARM)
+ call.hardFP = UseHardFpABI();
+ call.abi.setUseHardFp(call.hardFP);
+#elif defined(JS_CODEGEN_MIPS32)
+ call.abi.enforceO32ABI();
+#endif
+ } else {
+#if defined(JS_CODEGEN_ARM)
+ MOZ_ASSERT(call.hardFP,
+ "All private ABIs pass FP arguments in registers");
+#endif
+ }
+
+ // Use masm.framePushed() because the value we want here does not depend
+ // on the height of the frame's stack area, but the actual size of the
+ // allocated frame.
+ call.frameAlignAdjustment = ComputeByteAlignment(
+ masm.framePushed() + sizeof(Frame), JitStackAlignment);
+ }
+
+ void endCall(FunctionCall& call, size_t stackSpace) {
+ size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment;
+ fr.freeArgAreaAndPopBytes(adjustment, stackSpace);
+
+ MOZ_ASSERT(
+ stackMapGenerator_.framePushedExcludingOutboundCallArgs.isSome());
+ stackMapGenerator_.framePushedExcludingOutboundCallArgs.reset();
+
+ if (call.isInterModule) {
+ fr.loadTlsPtr(WasmTlsReg);
+ masm.loadWasmPinnedRegsFromTls();
+ masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
+ } else if (call.usesSystemAbi) {
+ // On x86 there are no pinned registers, so don't waste time
+ // reloading the Tls.
+#ifndef JS_CODEGEN_X86
+ fr.loadTlsPtr(WasmTlsReg);
+ masm.loadWasmPinnedRegsFromTls();
+#endif
+ }
+ }
+
+ void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call) {
+ size_t stackArgAreaSizeAligned =
+ AlignStackArgAreaSize(stackArgAreaSizeUnaligned);
+ MOZ_ASSERT(stackArgAreaSizeUnaligned <= stackArgAreaSizeAligned);
+
+ // Record the masm.framePushed() value at this point, before we push args
+ // for the call, but including the alignment space placed above the args.
+ // This defines the lower limit of the stackmap that will be created for
+ // this call.
+ MOZ_ASSERT(
+ stackMapGenerator_.framePushedExcludingOutboundCallArgs.isNothing());
+ stackMapGenerator_.framePushedExcludingOutboundCallArgs.emplace(
+ // However much we've pushed so far
+ masm.framePushed() +
+ // Extra space we'll push to get the frame aligned
+ call->frameAlignAdjustment +
+ // Extra space we'll push to get the outbound arg area 16-aligned
+ (stackArgAreaSizeAligned - stackArgAreaSizeUnaligned));
+
+ call->stackArgAreaSize = stackArgAreaSizeAligned;
+
+ size_t adjustment = call->stackArgAreaSize + call->frameAlignAdjustment;
+ fr.allocArgArea(adjustment);
+ }
+
+ const ABIArg reservePointerArgument(FunctionCall* call) {
+ return call->abi.next(MIRType::Pointer);
+ }
+
+ // TODO / OPTIMIZE (Bug 1316821): Note passArg is used only in one place.
+ // (Or it was, until Luke wandered through, but that can be fixed again.)
+ // I'm not saying we should manually inline it, but we could hoist the
+ // dispatch into the caller and have type-specific implementations of
+ // passArg: passArgI32(), etc. Then those might be inlined, at least in PGO
+ // builds.
+ //
+ // The bulk of the work here (60%) is in the next() call, though.
+ //
+ // Notably, since next() is so expensive, StackArgAreaSizeUnaligned()
+ // becomes expensive too.
+ //
+ // Somehow there could be a trick here where the sequence of argument types
+ // (read from the input stream) leads to a cached entry for
+ // StackArgAreaSizeUnaligned() and for how to pass arguments...
+ //
+ // But at least we could reduce the cost of StackArgAreaSizeUnaligned() by
+ // first reading the argument types into a (reusable) vector, then we have
+ // the outgoing size at low cost, and then we can pass args based on the
+ // info we read.
+
+ void passArg(ValType type, const Stk& arg, FunctionCall* call) {
+ switch (type.kind()) {
+ case ValType::I32: {
+ ABIArg argLoc = call->abi.next(MIRType::Int32);
+ if (argLoc.kind() == ABIArg::Stack) {
+ ScratchI32 scratch(*this);
+ loadI32(arg, scratch);
+ masm.store32(scratch, Address(masm.getStackPointer(),
+ argLoc.offsetFromArgBase()));
+ } else {
+ loadI32(arg, RegI32(argLoc.gpr()));
+ }
+ break;
+ }
+ case ValType::I64: {
+ ABIArg argLoc = call->abi.next(MIRType::Int64);
+ if (argLoc.kind() == ABIArg::Stack) {
+ ScratchI32 scratch(*this);
+#ifdef JS_PUNBOX64
+ loadI64(arg, fromI32(scratch));
+ masm.storePtr(scratch, Address(masm.getStackPointer(),
+ argLoc.offsetFromArgBase()));
+#else
+ loadI64Low(arg, scratch);
+ masm.store32(scratch, LowWord(Address(masm.getStackPointer(),
+ argLoc.offsetFromArgBase())));
+ loadI64High(arg, scratch);
+ masm.store32(scratch, HighWord(Address(masm.getStackPointer(),
+ argLoc.offsetFromArgBase())));
+#endif
+ } else {
+ loadI64(arg, RegI64(argLoc.gpr64()));
+ }
+ break;
+ }
+ case ValType::V128: {
+#ifdef ENABLE_WASM_SIMD
+ ABIArg argLoc = call->abi.next(MIRType::Simd128);
+ switch (argLoc.kind()) {
+ case ABIArg::Stack: {
+ ScratchV128 scratch(*this);
+ loadV128(arg, scratch);
+ masm.storeUnalignedSimd128(
+ (RegV128)scratch,
+ Address(masm.getStackPointer(), argLoc.offsetFromArgBase()));
+ break;
+ }
+ case ABIArg::GPR: {
+ MOZ_CRASH("Unexpected parameter passing discipline");
+ }
+ case ABIArg::FPU: {
+ loadV128(arg, RegV128(argLoc.fpu()));
+ break;
+ }
+# if defined(JS_CODEGEN_REGISTER_PAIR)
+ case ABIArg::GPR_PAIR: {
+ MOZ_CRASH("Unexpected parameter passing discipline");
+ }
+# endif
+ case ABIArg::Uninitialized:
+ MOZ_CRASH("Uninitialized ABIArg kind");
+ }
+ break;
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ }
+ case ValType::F64: {
+ ABIArg argLoc = call->abi.next(MIRType::Double);
+ switch (argLoc.kind()) {
+ case ABIArg::Stack: {
+ ScratchF64 scratch(*this);
+ loadF64(arg, scratch);
+ masm.storeDouble(scratch, Address(masm.getStackPointer(),
+ argLoc.offsetFromArgBase()));
+ break;
+ }
+#if defined(JS_CODEGEN_REGISTER_PAIR)
+ case ABIArg::GPR_PAIR: {
+# if defined(JS_CODEGEN_ARM)
+ ScratchF64 scratch(*this);
+ loadF64(arg, scratch);
+ masm.ma_vxfer(scratch, argLoc.evenGpr(), argLoc.oddGpr());
+ break;
+# elif defined(JS_CODEGEN_MIPS32)
+ ScratchF64 scratch(*this);
+ loadF64(arg, scratch);
+ MOZ_ASSERT(MOZ_LITTLE_ENDIAN());
+ masm.moveFromDoubleLo(scratch, argLoc.evenGpr());
+ masm.moveFromDoubleHi(scratch, argLoc.oddGpr());
+ break;
+# else
+ MOZ_CRASH("BaseCompiler platform hook: passArg F64 pair");
+# endif
+ }
+#endif
+ case ABIArg::FPU: {
+ loadF64(arg, RegF64(argLoc.fpu()));
+ break;
+ }
+ case ABIArg::GPR: {
+ MOZ_CRASH("Unexpected parameter passing discipline");
+ }
+ case ABIArg::Uninitialized:
+ MOZ_CRASH("Uninitialized ABIArg kind");
+ }
+ break;
+ }
+ case ValType::F32: {
+ ABIArg argLoc = call->abi.next(MIRType::Float32);
+ switch (argLoc.kind()) {
+ case ABIArg::Stack: {
+ ScratchF32 scratch(*this);
+ loadF32(arg, scratch);
+ masm.storeFloat32(scratch, Address(masm.getStackPointer(),
+ argLoc.offsetFromArgBase()));
+ break;
+ }
+ case ABIArg::GPR: {
+ ScratchF32 scratch(*this);
+ loadF32(arg, scratch);
+ masm.moveFloat32ToGPR(scratch, argLoc.gpr());
+ break;
+ }
+ case ABIArg::FPU: {
+ loadF32(arg, RegF32(argLoc.fpu()));
+ break;
+ }
+#if defined(JS_CODEGEN_REGISTER_PAIR)
+ case ABIArg::GPR_PAIR: {
+ MOZ_CRASH("Unexpected parameter passing discipline");
+ }
+#endif
+ case ABIArg::Uninitialized:
+ MOZ_CRASH("Uninitialized ABIArg kind");
+ }
+ break;
+ }
+ case ValType::Ref: {
+ ABIArg argLoc = call->abi.next(MIRType::RefOrNull);
+ if (argLoc.kind() == ABIArg::Stack) {
+ ScratchPtr scratch(*this);
+ loadRef(arg, scratch);
+ masm.storePtr(scratch, Address(masm.getStackPointer(),
+ argLoc.offsetFromArgBase()));
+ } else {
+ loadRef(arg, RegPtr(argLoc.gpr()));
+ }
+ break;
+ }
+ }
+ }
+
+ CodeOffset callDefinition(uint32_t funcIndex, const FunctionCall& call) {
+ CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Func);
+ return masm.call(desc, funcIndex);
+ }
+
+ CodeOffset callSymbolic(SymbolicAddress callee, const FunctionCall& call) {
+ CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
+ return masm.call(desc, callee);
+ }
+
+ // Precondition: sync()
+
+ CodeOffset callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex,
+ const Stk& indexVal, const FunctionCall& call) {
+ const TypeIdDesc& funcTypeId = moduleEnv_.typeIds[funcTypeIndex];
+ MOZ_ASSERT(funcTypeId.kind() != TypeIdDescKind::None);
+
+ const TableDesc& table = moduleEnv_.tables[tableIndex];
+
+ loadI32(indexVal, RegI32(WasmTableCallIndexReg));
+
+ CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic);
+ CalleeDesc callee = CalleeDesc::wasmTable(table, funcTypeId);
+ return masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true));
+ }
+
+ // Precondition: sync()
+
+ CodeOffset callImport(unsigned globalDataOffset, const FunctionCall& call) {
+ CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic);
+ CalleeDesc callee = CalleeDesc::import(globalDataOffset);
+ return masm.wasmCallImport(desc, callee);
+ }
+
+ CodeOffset builtinCall(SymbolicAddress builtin, const FunctionCall& call) {
+ return callSymbolic(builtin, call);
+ }
+
+ CodeOffset builtinInstanceMethodCall(const SymbolicAddressSignature& builtin,
+ const ABIArg& instanceArg,
+ const FunctionCall& call) {
+ // Builtin method calls assume the TLS register has been set.
+ fr.loadTlsPtr(WasmTlsReg);
+
+ CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
+ return masm.wasmCallBuiltinInstanceMethod(
+ desc, instanceArg, builtin.identity, builtin.failureMode);
+ }
+
+ MOZ_MUST_USE bool pushCallResults(const FunctionCall& call, ResultType type,
+ const StackResultsLoc& loc) {
+#if defined(JS_CODEGEN_ARM)
+ // pushResults currently bypasses special case code in captureReturnedFxx()
+ // that converts GPR results to FPR results for systemABI+softFP. If we
+ // ever start using that combination for calls we need more code. This
+ // assert is stronger than we need - we only care about results in return
+ // registers - but that's OK.
+ MOZ_ASSERT(!call.usesSystemAbi || call.hardFP);
+#endif
+ return pushResults(type, fr.stackResultsBase(loc.bytes()));
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ //
+ // Sundry low-level code generators.
+
+ // The compiler depends on moveImm32() clearing the high bits of a 64-bit
+ // register on 64-bit systems except MIPS64 where high bits are sign extended
+ // from lower bits.
+
+ void moveImm32(int32_t v, RegI32 dest) { masm.move32(Imm32(v), dest); }
+
+ void moveImm64(int64_t v, RegI64 dest) { masm.move64(Imm64(v), dest); }
+
+ void moveImmRef(intptr_t v, RegPtr dest) { masm.movePtr(ImmWord(v), dest); }
+
+ void moveImmF32(float f, RegF32 dest) { masm.loadConstantFloat32(f, dest); }
+
+ void moveImmF64(double d, RegF64 dest) { masm.loadConstantDouble(d, dest); }
+
+ [[nodiscard]] bool addInterruptCheck() {
+ ScratchI32 tmp(*this);
+ fr.loadTlsPtr(tmp);
+ masm.wasmInterruptCheck(tmp, bytecodeOffset());
+ return createStackMap("addInterruptCheck");
+ }
+
+ void jumpTable(const LabelVector& labels, Label* theTable) {
+ // Flush constant pools to ensure that the table is never interrupted by
+ // constant pool entries.
+ masm.flush();
+
+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+ // Prevent nop sequences to appear in the jump table.
+ AutoForbidNops afn(&masm);
+#endif
+ masm.bind(theTable);
+
+ for (uint32_t i = 0; i < labels.length(); i++) {
+ CodeLabel cl;
+ masm.writeCodePointer(&cl);
+ cl.target()->bind(labels[i].offset());
+ masm.addCodeLabel(cl);
+ }
+ }
+
+ void tableSwitch(Label* theTable, RegI32 switchValue, Label* dispatchCode) {
+ masm.bind(dispatchCode);
+
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+ ScratchI32 scratch(*this);
+ CodeLabel tableCl;
+
+ masm.mov(&tableCl, scratch);
+
+ tableCl.target()->bind(theTable->offset());
+ masm.addCodeLabel(tableCl);
+
+ masm.jmp(Operand(scratch, switchValue, ScalePointer));
+#elif defined(JS_CODEGEN_ARM)
+ // Flush constant pools: offset must reflect the distance from the MOV
+ // to the start of the table; as the address of the MOV is given by the
+ // label, nothing must come between the bind() and the ma_mov().
+ AutoForbidPoolsAndNops afp(&masm,
+ /* number of instructions in scope = */ 5);
+
+ ScratchI32 scratch(*this);
+
+ // Compute the offset from the ma_mov instruction to the jump table.
+ Label here;
+ masm.bind(&here);
+ uint32_t offset = here.offset() - theTable->offset();
+
+ // Read PC+8
+ masm.ma_mov(pc, scratch);
+
+ // ARM scratch register is required by ma_sub.
+ ScratchRegisterScope arm_scratch(*this);
+
+ // Compute the absolute table base pointer into `scratch`, offset by 8
+ // to account for the fact that ma_mov read PC+8.
+ masm.ma_sub(Imm32(offset + 8), scratch, arm_scratch);
+
+ // Jump indirect via table element.
+ masm.ma_ldr(DTRAddr(scratch, DtrRegImmShift(switchValue, LSL, 2)), pc,
+ Offset, Assembler::Always);
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ ScratchI32 scratch(*this);
+ CodeLabel tableCl;
+
+ masm.ma_li(scratch, &tableCl);
+
+ tableCl.target()->bind(theTable->offset());
+ masm.addCodeLabel(tableCl);
+
+ masm.branchToComputedAddress(BaseIndex(scratch, switchValue, ScalePointer));
+#elif defined(JS_CODEGEN_ARM64)
+ AutoForbidPoolsAndNops afp(&masm,
+ /* number of instructions in scope = */ 4);
+
+ ScratchI32 scratch(*this);
+
+ ARMRegister s(scratch, 64);
+ ARMRegister v(switchValue, 64);
+ masm.Adr(s, theTable);
+ masm.Add(s, s, Operand(v, vixl::LSL, 3));
+ masm.Ldr(s, MemOperand(s, 0));
+ masm.Br(s);
+#else
+ MOZ_CRASH("BaseCompiler platform hook: tableSwitch");
+#endif
+ }
+
+ RegI32 captureReturnedI32() {
+ RegI32 r = RegI32(ReturnReg);
+ MOZ_ASSERT(isAvailableI32(r));
+ needI32(r);
+#if defined(JS_CODEGEN_X64)
+ if (JitOptions.spectreIndexMasking) {
+ masm.movl(r, r);
+ }
+#endif
+ return r;
+ }
+
+ RegI64 captureReturnedI64() {
+ RegI64 r = RegI64(ReturnReg64);
+ MOZ_ASSERT(isAvailableI64(r));
+ needI64(r);
+ return r;
+ }
+
+ RegF32 captureReturnedF32(const FunctionCall& call) {
+ RegF32 r = RegF32(ReturnFloat32Reg);
+ MOZ_ASSERT(isAvailableF32(r));
+ needF32(r);
+#if defined(JS_CODEGEN_ARM)
+ if (call.usesSystemAbi && !call.hardFP) {
+ masm.ma_vxfer(ReturnReg, r);
+ }
+#endif
+ return r;
+ }
+
+ RegF64 captureReturnedF64(const FunctionCall& call) {
+ RegF64 r = RegF64(ReturnDoubleReg);
+ MOZ_ASSERT(isAvailableF64(r));
+ needF64(r);
+#if defined(JS_CODEGEN_ARM)
+ if (call.usesSystemAbi && !call.hardFP) {
+ masm.ma_vxfer(ReturnReg64.low, ReturnReg64.high, r);
+ }
+#endif
+ return r;
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ RegV128 captureReturnedV128(const FunctionCall& call) {
+ RegV128 r = RegV128(ReturnSimd128Reg);
+ MOZ_ASSERT(isAvailableV128(r));
+ needV128(r);
+ return r;
+ }
+#endif
+
+ RegPtr captureReturnedRef() {
+ RegPtr r = RegPtr(ReturnReg);
+ MOZ_ASSERT(isAvailableRef(r));
+ needRef(r);
+ return r;
+ }
+
+ void checkDivideByZeroI32(RegI32 rhs) {
+ Label nonZero;
+ masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero);
+ trap(Trap::IntegerDivideByZero);
+ masm.bind(&nonZero);
+ }
+
+ void checkDivideByZeroI64(RegI64 r) {
+ Label nonZero;
+ ScratchI32 scratch(*this);
+ masm.branchTest64(Assembler::NonZero, r, r, scratch, &nonZero);
+ trap(Trap::IntegerDivideByZero);
+ masm.bind(&nonZero);
+ }
+
+ void checkDivideSignedOverflowI32(RegI32 rhs, RegI32 srcDest, Label* done,
+ bool zeroOnOverflow) {
+ Label notMin;
+ masm.branch32(Assembler::NotEqual, srcDest, Imm32(INT32_MIN), &notMin);
+ if (zeroOnOverflow) {
+ masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notMin);
+ moveImm32(0, srcDest);
+ masm.jump(done);
+ } else {
+ masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notMin);
+ trap(Trap::IntegerOverflow);
+ }
+ masm.bind(&notMin);
+ }
+
+ void checkDivideSignedOverflowI64(RegI64 rhs, RegI64 srcDest, Label* done,
+ bool zeroOnOverflow) {
+ Label notmin;
+ masm.branch64(Assembler::NotEqual, srcDest, Imm64(INT64_MIN), &notmin);
+ masm.branch64(Assembler::NotEqual, rhs, Imm64(-1), &notmin);
+ if (zeroOnOverflow) {
+ masm.xor64(srcDest, srcDest);
+ masm.jump(done);
+ } else {
+ trap(Trap::IntegerOverflow);
+ }
+ masm.bind(&notmin);
+ }
+
+#ifndef RABALDR_INT_DIV_I64_CALLOUT
+ void quotientI64(RegI64 rhs, RegI64 srcDest, RegI64 reserved,
+ IsUnsigned isUnsigned, bool isConst, int64_t c) {
+ Label done;
+
+ if (!isConst || c == 0) {
+ checkDivideByZeroI64(rhs);
+ }
+
+ if (!isUnsigned && (!isConst || c == -1)) {
+ checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false));
+ }
+
+# if defined(JS_CODEGEN_X64)
+ // The caller must set up the following situation.
+ MOZ_ASSERT(srcDest.reg == rax);
+ MOZ_ASSERT(reserved == specific_.rdx);
+ if (isUnsigned) {
+ masm.xorq(rdx, rdx);
+ masm.udivq(rhs.reg);
+ } else {
+ masm.cqo();
+ masm.idivq(rhs.reg);
+ }
+# elif defined(JS_CODEGEN_MIPS64)
+ if (isUnsigned) {
+ masm.as_ddivu(srcDest.reg, rhs.reg);
+ } else {
+ masm.as_ddiv(srcDest.reg, rhs.reg);
+ }
+ masm.as_mflo(srcDest.reg);
+# elif defined(JS_CODEGEN_ARM64)
+ ARMRegister sd(srcDest.reg, 64);
+ ARMRegister r(rhs.reg, 64);
+ if (isUnsigned) {
+ masm.Udiv(sd, sd, r);
+ } else {
+ masm.Sdiv(sd, sd, r);
+ }
+# else
+ MOZ_CRASH("BaseCompiler platform hook: quotientI64");
+# endif
+ masm.bind(&done);
+ }
+
+ void remainderI64(RegI64 rhs, RegI64 srcDest, RegI64 reserved,
+ IsUnsigned isUnsigned, bool isConst, int64_t c) {
+ Label done;
+
+ if (!isConst || c == 0) {
+ checkDivideByZeroI64(rhs);
+ }
+
+ if (!isUnsigned && (!isConst || c == -1)) {
+ checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true));
+ }
+
+# if defined(JS_CODEGEN_X64)
+ // The caller must set up the following situation.
+ MOZ_ASSERT(srcDest.reg == rax);
+ MOZ_ASSERT(reserved == specific_.rdx);
+
+ if (isUnsigned) {
+ masm.xorq(rdx, rdx);
+ masm.udivq(rhs.reg);
+ } else {
+ masm.cqo();
+ masm.idivq(rhs.reg);
+ }
+ masm.movq(rdx, rax);
+# elif defined(JS_CODEGEN_MIPS64)
+ if (isUnsigned) {
+ masm.as_ddivu(srcDest.reg, rhs.reg);
+ } else {
+ masm.as_ddiv(srcDest.reg, rhs.reg);
+ }
+ masm.as_mfhi(srcDest.reg);
+# elif defined(JS_CODEGEN_ARM64)
+ MOZ_ASSERT(reserved.isInvalid());
+ ARMRegister sd(srcDest.reg, 64);
+ ARMRegister r(rhs.reg, 64);
+ ScratchI32 temp(*this);
+ ARMRegister t(temp, 64);
+ if (isUnsigned) {
+ masm.Udiv(t, sd, r);
+ } else {
+ masm.Sdiv(t, sd, r);
+ }
+ masm.Mul(t, t, r);
+ masm.Sub(sd, sd, t);
+# else
+ MOZ_CRASH("BaseCompiler platform hook: remainderI64");
+# endif
+ masm.bind(&done);
+ }
+#endif // RABALDR_INT_DIV_I64_CALLOUT
+
+ RegI32 needRotate64Temp() {
+#if defined(JS_CODEGEN_X86)
+ return needI32();
+#elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
+ defined(JS_CODEGEN_MIPS64)
+ return RegI32::Invalid();
+#else
+ MOZ_CRASH("BaseCompiler platform hook: needRotate64Temp");
+#endif
+ }
+
+ void maskShiftCount32(RegI32 r) {
+#if defined(JS_CODEGEN_ARM)
+ masm.and32(Imm32(31), r);
+#endif
+ }
+
+ RegI32 needPopcnt32Temp() {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ return AssemblerX86Shared::HasPOPCNT() ? RegI32::Invalid() : needI32();
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+ defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ return needI32();
+#else
+ MOZ_CRASH("BaseCompiler platform hook: needPopcnt32Temp");
+#endif
+ }
+
+ RegI32 needPopcnt64Temp() {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ return AssemblerX86Shared::HasPOPCNT() ? RegI32::Invalid() : needI32();
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+ defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ return needI32();
+#else
+ MOZ_CRASH("BaseCompiler platform hook: needPopcnt64Temp");
+#endif
+ }
+
+ class OutOfLineTruncateCheckF32OrF64ToI32 : public OutOfLineCode {
+ AnyReg src;
+ RegI32 dest;
+ TruncFlags flags;
+ BytecodeOffset off;
+
+ public:
+ OutOfLineTruncateCheckF32OrF64ToI32(AnyReg src, RegI32 dest,
+ TruncFlags flags, BytecodeOffset off)
+ : src(src), dest(dest), flags(flags), off(off) {}
+
+ virtual void generate(MacroAssembler* masm) override {
+ if (src.tag == AnyReg::F32) {
+ masm->oolWasmTruncateCheckF32ToI32(src.f32(), dest, flags, off,
+ rejoin());
+ } else if (src.tag == AnyReg::F64) {
+ masm->oolWasmTruncateCheckF64ToI32(src.f64(), dest, flags, off,
+ rejoin());
+ } else {
+ MOZ_CRASH("unexpected type");
+ }
+ }
+ };
+
+ [[nodiscard]] bool truncateF32ToI32(RegF32 src, RegI32 dest,
+ TruncFlags flags) {
+ BytecodeOffset off = bytecodeOffset();
+ OutOfLineCode* ool =
+ addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI32(
+ AnyReg(src), dest, flags, off));
+ if (!ool) {
+ return false;
+ }
+ bool isSaturating = flags & TRUNC_SATURATING;
+ if (flags & TRUNC_UNSIGNED) {
+ masm.wasmTruncateFloat32ToUInt32(src, dest, isSaturating, ool->entry());
+ } else {
+ masm.wasmTruncateFloat32ToInt32(src, dest, isSaturating, ool->entry());
+ }
+ masm.bind(ool->rejoin());
+ return true;
+ }
+
+ [[nodiscard]] bool truncateF64ToI32(RegF64 src, RegI32 dest,
+ TruncFlags flags) {
+ BytecodeOffset off = bytecodeOffset();
+ OutOfLineCode* ool =
+ addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI32(
+ AnyReg(src), dest, flags, off));
+ if (!ool) {
+ return false;
+ }
+ bool isSaturating = flags & TRUNC_SATURATING;
+ if (flags & TRUNC_UNSIGNED) {
+ masm.wasmTruncateDoubleToUInt32(src, dest, isSaturating, ool->entry());
+ } else {
+ masm.wasmTruncateDoubleToInt32(src, dest, isSaturating, ool->entry());
+ }
+ masm.bind(ool->rejoin());
+ return true;
+ }
+
+ class OutOfLineTruncateCheckF32OrF64ToI64 : public OutOfLineCode {
+ AnyReg src;
+ RegI64 dest;
+ TruncFlags flags;
+ BytecodeOffset off;
+
+ public:
+ OutOfLineTruncateCheckF32OrF64ToI64(AnyReg src, RegI64 dest,
+ TruncFlags flags, BytecodeOffset off)
+ : src(src), dest(dest), flags(flags), off(off) {}
+
+ virtual void generate(MacroAssembler* masm) override {
+ if (src.tag == AnyReg::F32) {
+ masm->oolWasmTruncateCheckF32ToI64(src.f32(), dest, flags, off,
+ rejoin());
+ } else if (src.tag == AnyReg::F64) {
+ masm->oolWasmTruncateCheckF64ToI64(src.f64(), dest, flags, off,
+ rejoin());
+ } else {
+ MOZ_CRASH("unexpected type");
+ }
+ }
+ };
+
+#ifndef RABALDR_FLOAT_TO_I64_CALLOUT
+ [[nodiscard]] RegF64 needTempForFloatingToI64(TruncFlags flags) {
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ if (flags & TRUNC_UNSIGNED) {
+ return needF64();
+ }
+# endif
+ return RegF64::Invalid();
+ }
+
+ [[nodiscard]] bool truncateF32ToI64(RegF32 src, RegI64 dest, TruncFlags flags,
+ RegF64 temp) {
+ OutOfLineCode* ool =
+ addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64(
+ AnyReg(src), dest, flags, bytecodeOffset()));
+ if (!ool) {
+ return false;
+ }
+ bool isSaturating = flags & TRUNC_SATURATING;
+ if (flags & TRUNC_UNSIGNED) {
+ masm.wasmTruncateFloat32ToUInt64(src, dest, isSaturating, ool->entry(),
+ ool->rejoin(), temp);
+ } else {
+ masm.wasmTruncateFloat32ToInt64(src, dest, isSaturating, ool->entry(),
+ ool->rejoin(), temp);
+ }
+ return true;
+ }
+
+ [[nodiscard]] bool truncateF64ToI64(RegF64 src, RegI64 dest, TruncFlags flags,
+ RegF64 temp) {
+ OutOfLineCode* ool =
+ addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64(
+ AnyReg(src), dest, flags, bytecodeOffset()));
+ if (!ool) {
+ return false;
+ }
+ bool isSaturating = flags & TRUNC_SATURATING;
+ if (flags & TRUNC_UNSIGNED) {
+ masm.wasmTruncateDoubleToUInt64(src, dest, isSaturating, ool->entry(),
+ ool->rejoin(), temp);
+ } else {
+ masm.wasmTruncateDoubleToInt64(src, dest, isSaturating, ool->entry(),
+ ool->rejoin(), temp);
+ }
+ return true;
+ }
+#endif // RABALDR_FLOAT_TO_I64_CALLOUT
+
+#ifndef RABALDR_I64_TO_FLOAT_CALLOUT
+ RegI32 needConvertI64ToFloatTemp(ValType to, bool isUnsigned) {
+ bool needs = false;
+ if (to == ValType::F64) {
+ needs = isUnsigned && masm.convertUInt64ToDoubleNeedsTemp();
+ } else {
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ needs = true;
+# endif
+ }
+ return needs ? needI32() : RegI32::Invalid();
+ }
+
+ void convertI64ToF32(RegI64 src, bool isUnsigned, RegF32 dest, RegI32 temp) {
+ if (isUnsigned) {
+ masm.convertUInt64ToFloat32(src, dest, temp);
+ } else {
+ masm.convertInt64ToFloat32(src, dest);
+ }
+ }
+
+ void convertI64ToF64(RegI64 src, bool isUnsigned, RegF64 dest, RegI32 temp) {
+ if (isUnsigned) {
+ masm.convertUInt64ToDouble(src, dest, temp);
+ } else {
+ masm.convertInt64ToDouble(src, dest);
+ }
+ }
+#endif // RABALDR_I64_TO_FLOAT_CALLOUT
+
+ void cmp64Set(Assembler::Condition cond, RegI64 lhs, RegI64 rhs,
+ RegI32 dest) {
+#if defined(JS_PUNBOX64)
+ masm.cmpPtrSet(cond, lhs.reg, rhs.reg, dest);
+#elif defined(JS_CODEGEN_MIPS32)
+ masm.cmp64Set(cond, lhs, rhs, dest);
+#else
+ // TODO / OPTIMIZE (Bug 1316822): This is pretty branchy, we should be
+ // able to do better.
+ Label done, condTrue;
+ masm.branch64(cond, lhs, rhs, &condTrue);
+ moveImm32(0, dest);
+ masm.jump(&done);
+ masm.bind(&condTrue);
+ moveImm32(1, dest);
+ masm.bind(&done);
+#endif
+ }
+
+ void eqz64(RegI64 src, RegI32 dest) {
+#ifdef JS_PUNBOX64
+ masm.cmpPtrSet(Assembler::Equal, src.reg, ImmWord(0), dest);
+#else
+ masm.or32(src.high, src.low);
+ masm.cmp32Set(Assembler::Equal, src.low, Imm32(0), dest);
+#endif
+ }
+
+ [[nodiscard]] bool supportsRoundInstruction(RoundingMode mode) {
+ return Assembler::HasRoundInstruction(mode);
+ }
+
+ void roundF32(RoundingMode roundingMode, RegF32 f0) {
+ masm.nearbyIntFloat32(roundingMode, f0, f0);
+ }
+
+ void roundF64(RoundingMode roundingMode, RegF64 f0) {
+ masm.nearbyIntDouble(roundingMode, f0, f0);
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ //
+ // Global variable access.
+
+ Address addressOfGlobalVar(const GlobalDesc& global, RegI32 tmp) {
+ uint32_t globalToTlsOffset =
+ offsetof(TlsData, globalArea) + global.offset();
+ fr.loadTlsPtr(tmp);
+ if (global.isIndirect()) {
+ masm.loadPtr(Address(tmp, globalToTlsOffset), tmp);
+ return Address(tmp, 0);
+ }
+ return Address(tmp, globalToTlsOffset);
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ //
+ // Heap access.
+
+ void bceCheckLocal(MemoryAccessDesc* access, AccessCheck* check,
+ uint32_t local) {
+ if (local >= sizeof(BCESet) * 8) {
+ return;
+ }
+
+ uint32_t offsetGuardLimit =
+ GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled());
+
+ if ((bceSafe_ & (BCESet(1) << local)) &&
+ access->offset() < offsetGuardLimit) {
+ check->omitBoundsCheck = true;
+ }
+
+ // The local becomes safe even if the offset is beyond the guard limit.
+ bceSafe_ |= (BCESet(1) << local);
+ }
+
+ void bceLocalIsUpdated(uint32_t local) {
+ if (local >= sizeof(BCESet) * 8) {
+ return;
+ }
+
+ bceSafe_ &= ~(BCESet(1) << local);
+ }
+
+ void prepareMemoryAccess(MemoryAccessDesc* access, AccessCheck* check,
+ RegI32 tls, RegI32 ptr) {
+ uint32_t offsetGuardLimit =
+ GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled());
+
+ // Fold offset if necessary for further computations.
+ if (access->offset() >= offsetGuardLimit ||
+ (access->isAtomic() && !check->omitAlignmentCheck &&
+ !check->onlyPointerAlignment)) {
+ Label ok;
+ masm.branchAdd32(Assembler::CarryClear, Imm32(access->offset()), ptr,
+ &ok);
+ masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+ masm.bind(&ok);
+ access->clearOffset();
+ check->onlyPointerAlignment = true;
+ }
+
+ // Alignment check if required.
+
+ if (access->isAtomic() && !check->omitAlignmentCheck) {
+ MOZ_ASSERT(check->onlyPointerAlignment);
+ // We only care about the low pointer bits here.
+ Label ok;
+ masm.branchTest32(Assembler::Zero, ptr, Imm32(access->byteSize() - 1),
+ &ok);
+ masm.wasmTrap(Trap::UnalignedAccess, bytecodeOffset());
+ masm.bind(&ok);
+ }
+
+ // Ensure no tls if we don't need it.
+
+ if (moduleEnv_.hugeMemoryEnabled()) {
+ // We have HeapReg and no bounds checking and need load neither
+ // memoryBase nor boundsCheckLimit from tls.
+ MOZ_ASSERT_IF(check->omitBoundsCheck, tls.isInvalid());
+ }
+#ifdef JS_CODEGEN_ARM
+ // We have HeapReg on ARM and don't need to load the memoryBase from tls.
+ MOZ_ASSERT_IF(check->omitBoundsCheck, tls.isInvalid());
+#endif
+
+ // Bounds check if required.
+
+ if (!moduleEnv_.hugeMemoryEnabled() && !check->omitBoundsCheck) {
+ Label ok;
+ masm.wasmBoundsCheck32(
+ Assembler::Below, ptr,
+ Address(tls, offsetof(TlsData, boundsCheckLimit32)), &ok);
+ masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+ masm.bind(&ok);
+ }
+ }
+
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
+ defined(JS_CODEGEN_MIPS64)
+ BaseIndex prepareAtomicMemoryAccess(MemoryAccessDesc* access,
+ AccessCheck* check, RegI32 tls,
+ RegI32 ptr) {
+ MOZ_ASSERT(needTlsForAccess(*check) == tls.isValid());
+ prepareMemoryAccess(access, check, tls, ptr);
+ return BaseIndex(HeapReg, ptr, TimesOne, access->offset());
+ }
+#elif defined(JS_CODEGEN_X86)
+ // Some consumers depend on the address not retaining tls, as tls may be the
+ // scratch register.
+
+ Address prepareAtomicMemoryAccess(MemoryAccessDesc* access,
+ AccessCheck* check, RegI32 tls,
+ RegI32 ptr) {
+ MOZ_ASSERT(needTlsForAccess(*check) == tls.isValid());
+ prepareMemoryAccess(access, check, tls, ptr);
+ masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
+ return Address(ptr, access->offset());
+ }
+#else
+ Address prepareAtomicMemoryAccess(MemoryAccessDesc* access,
+ AccessCheck* check, RegI32 tls,
+ RegI32 ptr) {
+ MOZ_CRASH("BaseCompiler platform hook: prepareAtomicMemoryAccess");
+ }
+#endif
+
+ void computeEffectiveAddress(MemoryAccessDesc* access) {
+ if (access->offset()) {
+ Label ok;
+ RegI32 ptr = popI32();
+ masm.branchAdd32(Assembler::CarryClear, Imm32(access->offset()), ptr,
+ &ok);
+ masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
+ masm.bind(&ok);
+ access->clearOffset();
+ pushI32(ptr);
+ }
+ }
+
+ void needLoadTemps(const MemoryAccessDesc& access, RegI32* temp1,
+ RegI32* temp2, RegI32* temp3) {
+#if defined(JS_CODEGEN_ARM)
+ if (IsUnaligned(access)) {
+ switch (access.type()) {
+ case Scalar::Float64:
+ *temp3 = needI32();
+ [[fallthrough]];
+ case Scalar::Float32:
+ *temp2 = needI32();
+ [[fallthrough]];
+ default:
+ *temp1 = needI32();
+ break;
+ }
+ }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ *temp1 = needI32();
+#endif
+ }
+
+ [[nodiscard]] bool needTlsForAccess(const AccessCheck& check) {
+#if defined(JS_CODEGEN_X86)
+ // x86 requires Tls for memory base
+ return true;
+#else
+ return !moduleEnv_.hugeMemoryEnabled() && !check.omitBoundsCheck;
+#endif
+ }
+
+ // ptr and dest may be the same iff dest is I32.
+ // This may destroy ptr even if ptr and dest are not the same.
+ [[nodiscard]] bool load(MemoryAccessDesc* access, AccessCheck* check,
+ RegI32 tls, RegI32 ptr, AnyReg dest, RegI32 temp1,
+ RegI32 temp2, RegI32 temp3) {
+ prepareMemoryAccess(access, check, tls, ptr);
+
+#if defined(JS_CODEGEN_X64)
+ Operand srcAddr(HeapReg, ptr, TimesOne, access->offset());
+
+ if (dest.tag == AnyReg::I64) {
+ masm.wasmLoadI64(*access, srcAddr, dest.i64());
+ } else {
+ masm.wasmLoad(*access, srcAddr, dest.any());
+ }
+#elif defined(JS_CODEGEN_X86)
+ masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
+ Operand srcAddr(ptr, access->offset());
+
+ if (dest.tag == AnyReg::I64) {
+ MOZ_ASSERT(dest.i64() == specific_.abiReturnRegI64);
+ masm.wasmLoadI64(*access, srcAddr, dest.i64());
+ } else {
+ // For 8 bit loads, this will generate movsbl or movzbl, so
+ // there's no constraint on what the output register may be.
+ masm.wasmLoad(*access, srcAddr, dest.any());
+ }
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32) || \
+ defined(JS_CODEGEN_MIPS64)
+ if (IsUnaligned(*access)) {
+ switch (dest.tag) {
+ case AnyReg::I64:
+ masm.wasmUnalignedLoadI64(*access, HeapReg, ptr, ptr, dest.i64(),
+ temp1);
+ break;
+ case AnyReg::F32:
+ masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f32(),
+ temp1, temp2, RegI32::Invalid());
+ break;
+ case AnyReg::F64:
+ masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f64(),
+ temp1, temp2, temp3);
+ break;
+ case AnyReg::I32:
+ masm.wasmUnalignedLoad(*access, HeapReg, ptr, ptr, dest.i32(), temp1);
+ break;
+ default:
+ MOZ_CRASH("Unexpected type");
+ }
+ } else {
+ if (dest.tag == AnyReg::I64) {
+ masm.wasmLoadI64(*access, HeapReg, ptr, ptr, dest.i64());
+ } else {
+ masm.wasmLoad(*access, HeapReg, ptr, ptr, dest.any());
+ }
+ }
+#elif defined(JS_CODEGEN_ARM64)
+ if (dest.tag == AnyReg::I64) {
+ masm.wasmLoadI64(*access, HeapReg, ptr, dest.i64());
+ } else {
+ masm.wasmLoad(*access, HeapReg, ptr, dest.any());
+ }
+#else
+ MOZ_CRASH("BaseCompiler platform hook: load");
+#endif
+
+ return true;
+ }
+
+ RegI32 needStoreTemp(const MemoryAccessDesc& access, ValType srcType) {
+#if defined(JS_CODEGEN_ARM)
+ if (IsUnaligned(access) && srcType != ValType::I32) {
+ return needI32();
+ }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ return needI32();
+#endif
+ return RegI32::Invalid();
+ }
+
+ // ptr and src must not be the same register.
+ // This may destroy ptr and src.
+ [[nodiscard]] bool store(MemoryAccessDesc* access, AccessCheck* check,
+ RegI32 tls, RegI32 ptr, AnyReg src, RegI32 temp) {
+ prepareMemoryAccess(access, check, tls, ptr);
+
+ // Emit the store
+#if defined(JS_CODEGEN_X64)
+ MOZ_ASSERT(temp.isInvalid());
+ Operand dstAddr(HeapReg, ptr, TimesOne, access->offset());
+
+ masm.wasmStore(*access, src.any(), dstAddr);
+#elif defined(JS_CODEGEN_X86)
+ MOZ_ASSERT(temp.isInvalid());
+ masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
+ Operand dstAddr(ptr, access->offset());
+
+ if (access->type() == Scalar::Int64) {
+ masm.wasmStoreI64(*access, src.i64(), dstAddr);
+ } else {
+ AnyRegister value;
+ ScratchI8 scratch(*this);
+ if (src.tag == AnyReg::I64) {
+ if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) {
+ masm.mov(src.i64().low, scratch);
+ value = AnyRegister(scratch);
+ } else {
+ value = AnyRegister(src.i64().low);
+ }
+ } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) {
+ masm.mov(src.i32(), scratch);
+ value = AnyRegister(scratch);
+ } else {
+ value = src.any();
+ }
+
+ masm.wasmStore(*access, value, dstAddr);
+ }
+#elif defined(JS_CODEGEN_ARM)
+ if (IsUnaligned(*access)) {
+ switch (src.tag) {
+ case AnyReg::I64:
+ masm.wasmUnalignedStoreI64(*access, src.i64(), HeapReg, ptr, ptr,
+ temp);
+ break;
+ case AnyReg::F32:
+ masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr,
+ temp);
+ break;
+ case AnyReg::F64:
+ masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr,
+ temp);
+ break;
+ case AnyReg::I32:
+ MOZ_ASSERT(temp.isInvalid());
+ masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr, temp);
+ break;
+ default:
+ MOZ_CRASH("Unexpected type");
+ }
+ } else {
+ MOZ_ASSERT(temp.isInvalid());
+ if (access->type() == Scalar::Int64) {
+ masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr);
+ } else if (src.tag == AnyReg::I64) {
+ masm.wasmStore(*access, AnyRegister(src.i64().low), HeapReg, ptr, ptr);
+ } else {
+ masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr);
+ }
+ }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ if (IsUnaligned(*access)) {
+ switch (src.tag) {
+ case AnyReg::I64:
+ masm.wasmUnalignedStoreI64(*access, src.i64(), HeapReg, ptr, ptr,
+ temp);
+ break;
+ case AnyReg::F32:
+ masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr,
+ temp);
+ break;
+ case AnyReg::F64:
+ masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr,
+ temp);
+ break;
+ case AnyReg::I32:
+ masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr, temp);
+ break;
+ default:
+ MOZ_CRASH("Unexpected type");
+ }
+ } else {
+ if (src.tag == AnyReg::I64) {
+ masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr);
+ } else {
+ masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr);
+ }
+ }
+#elif defined(JS_CODEGEN_ARM64)
+ MOZ_ASSERT(temp.isInvalid());
+ if (access->type() == Scalar::Int64) {
+ masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr);
+ } else {
+ masm.wasmStore(*access, src.any(), HeapReg, ptr);
+ }
+#else
+ MOZ_CRASH("BaseCompiler platform hook: store");
+#endif
+
+ return true;
+ }
+
+ template <size_t Count>
+ struct Atomic32Temps : mozilla::Array<RegI32, Count> {
+ // Allocate all temp registers if 'allocate' is not specified.
+ void allocate(BaseCompiler* bc, size_t allocate = Count) {
+ static_assert(Count != 0);
+ for (size_t i = 0; i < allocate; ++i) {
+ this->operator[](i) = bc->needI32();
+ }
+ }
+ void maybeFree(BaseCompiler* bc) {
+ for (size_t i = 0; i < Count; ++i) {
+ bc->maybeFreeI32(this->operator[](i));
+ }
+ }
+ };
+
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ using AtomicRMW32Temps = Atomic32Temps<3>;
+#else
+ using AtomicRMW32Temps = Atomic32Temps<1>;
+#endif
+
+ template <typename T>
+ void atomicRMW32(const MemoryAccessDesc& access, T srcAddr, AtomicOp op,
+ RegI32 rv, RegI32 rd, const AtomicRMW32Temps& temps) {
+ switch (access.type()) {
+ case Scalar::Uint8:
+#ifdef JS_CODEGEN_X86
+ {
+ RegI32 temp = temps[0];
+ // The temp, if used, must be a byte register.
+ MOZ_ASSERT(temp.isInvalid());
+ ScratchI8 scratch(*this);
+ if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) {
+ temp = scratch;
+ }
+ masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temp, rd);
+ break;
+ }
+#endif
+ case Scalar::Uint16:
+ case Scalar::Int32:
+ case Scalar::Uint32:
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps[0], temps[1],
+ temps[2], rd);
+#else
+ masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps[0], rd);
+#endif
+ break;
+ default: {
+ MOZ_CRASH("Bad type for atomic operation");
+ }
+ }
+ }
+
+ // On x86, V is Address. On other platforms, it is Register64.
+ // T is BaseIndex or Address.
+ template <typename T, typename V>
+ void atomicRMW64(const MemoryAccessDesc& access, const T& srcAddr,
+ AtomicOp op, V value, Register64 temp, Register64 rd) {
+ masm.wasmAtomicFetchOp64(access, op, value, srcAddr, temp, rd);
+ }
+
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ using AtomicCmpXchg32Temps = Atomic32Temps<3>;
+#else
+ using AtomicCmpXchg32Temps = Atomic32Temps<0>;
+#endif
+
+ template <typename T>
+ void atomicCmpXchg32(const MemoryAccessDesc& access, T srcAddr,
+ RegI32 rexpect, RegI32 rnew, RegI32 rd,
+ const AtomicCmpXchg32Temps& temps) {
+ switch (access.type()) {
+ case Scalar::Uint8:
+#if defined(JS_CODEGEN_X86)
+ {
+ ScratchI8 scratch(*this);
+ MOZ_ASSERT(rd == specific_.eax);
+ if (!ra.isSingleByteI32(rnew)) {
+ // The replacement value must have a byte persona.
+ masm.movl(rnew, scratch);
+ rnew = scratch;
+ }
+ masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd);
+ break;
+ }
+#endif
+ case Scalar::Uint16:
+ case Scalar::Int32:
+ case Scalar::Uint32:
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, temps[0],
+ temps[1], temps[2], rd);
+#else
+ masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd);
+#endif
+ break;
+ default:
+ MOZ_CRASH("Bad type for atomic operation");
+ }
+ }
+
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ using AtomicXchg32Temps = Atomic32Temps<3>;
+#else
+ using AtomicXchg32Temps = Atomic32Temps<0>;
+#endif
+
+ template <typename T>
+ void atomicXchg32(const MemoryAccessDesc& access, T srcAddr, RegI32 rv,
+ RegI32 rd, const AtomicXchg32Temps& temps) {
+ switch (access.type()) {
+ case Scalar::Uint8:
+#if defined(JS_CODEGEN_X86)
+ {
+ if (!ra.isSingleByteI32(rd)) {
+ ScratchI8 scratch(*this);
+ // The output register must have a byte persona.
+ masm.wasmAtomicExchange(access, srcAddr, rv, scratch);
+ masm.movl(scratch, rd);
+ } else {
+ masm.wasmAtomicExchange(access, srcAddr, rv, rd);
+ }
+ break;
+ }
+#endif
+ case Scalar::Uint16:
+ case Scalar::Int32:
+ case Scalar::Uint32:
+#if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ masm.wasmAtomicExchange(access, srcAddr, rv, temps[0], temps[1],
+ temps[2], rd);
+#else
+ masm.wasmAtomicExchange(access, srcAddr, rv, rd);
+#endif
+ break;
+ default:
+ MOZ_CRASH("Bad type for atomic operation");
+ }
+ }
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Generally speaking, ABOVE this point there should be no
+ // value stack manipulation (calls to popI32 etc).
+ //
+ ////////////////////////////////////////////////////////////
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Platform-specific popping and register targeting.
+ //
+ // These fall into two groups, popping methods for simple needs, and RAII
+ // wrappers for more complex behavior.
+
+ // The simple popping methods pop values into targeted registers; the caller
+ // can free registers using standard functions. These are always called
+ // popXForY where X says something about types and Y something about the
+ // operation being targeted.
+
+ void pop2xI32ForMulDivI32(RegI32* r0, RegI32* r1, RegI32* reserved) {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ // r0 must be eax, and edx will be clobbered.
+ need2xI32(specific_.eax, specific_.edx);
+ *r1 = popI32();
+ *r0 = popI32ToSpecific(specific_.eax);
+ *reserved = specific_.edx;
+#else
+ pop2xI32(r0, r1);
+#endif
+ }
+
+ void pop2xI64ForMulI64(RegI64* r0, RegI64* r1, RegI32* temp,
+ RegI64* reserved) {
+#if defined(JS_CODEGEN_X64)
+ // r0 must be rax, and rdx will be clobbered.
+ need2xI64(specific_.rax, specific_.rdx);
+ *r1 = popI64();
+ *r0 = popI64ToSpecific(specific_.rax);
+ *reserved = specific_.rdx;
+#elif defined(JS_CODEGEN_X86)
+ // As for x64, though edx is part of r0.
+ need2xI32(specific_.eax, specific_.edx);
+ *r1 = popI64();
+ *r0 = popI64ToSpecific(specific_.edx_eax);
+ *temp = needI32();
+#elif defined(JS_CODEGEN_MIPS64)
+ pop2xI64(r0, r1);
+#elif defined(JS_CODEGEN_MIPS32)
+ pop2xI64(r0, r1);
+ *temp = needI32();
+#elif defined(JS_CODEGEN_ARM)
+ pop2xI64(r0, r1);
+ *temp = needI32();
+#elif defined(JS_CODEGEN_ARM64)
+ pop2xI64(r0, r1);
+#else
+ MOZ_CRASH("BaseCompiler porting interface: pop2xI64ForMulI64");
+#endif
+ }
+
+ void pop2xI64ForDivI64(RegI64* r0, RegI64* r1, RegI64* reserved) {
+#if defined(JS_CODEGEN_X64)
+ // r0 must be rax, and rdx will be clobbered.
+ need2xI64(specific_.rax, specific_.rdx);
+ *r1 = popI64();
+ *r0 = popI64ToSpecific(specific_.rax);
+ *reserved = specific_.rdx;
+#else
+ pop2xI64(r0, r1);
+#endif
+ }
+
+ void pop2xI32ForShift(RegI32* r0, RegI32* r1) {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ // r1 must be ecx for a variable shift, unless BMI2 is available.
+ if (!Assembler::HasBMI2()) {
+ *r1 = popI32(specific_.ecx);
+ *r0 = popI32();
+ return;
+ }
+#endif
+ pop2xI32(r0, r1);
+ }
+
+ void pop2xI64ForShift(RegI64* r0, RegI64* r1) {
+#if defined(JS_CODEGEN_X86)
+ // r1 must be ecx for a variable shift.
+ needI32(specific_.ecx);
+ *r1 = popI64ToSpecific(widenI32(specific_.ecx));
+ *r0 = popI64();
+#else
+# if defined(JS_CODEGEN_X64)
+ // r1 must be rcx for a variable shift, unless BMI2 is available.
+ if (!Assembler::HasBMI2()) {
+ needI64(specific_.rcx);
+ *r1 = popI64ToSpecific(specific_.rcx);
+ *r0 = popI64();
+ return;
+ }
+# endif
+ pop2xI64(r0, r1);
+#endif
+ }
+
+ void pop2xI32ForRotate(RegI32* r0, RegI32* r1) {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ // r1 must be ecx for a variable rotate.
+ *r1 = popI32(specific_.ecx);
+ *r0 = popI32();
+#else
+ pop2xI32(r0, r1);
+#endif
+ }
+
+ void pop2xI64ForRotate(RegI64* r0, RegI64* r1) {
+#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ // r1 must be ecx for a variable rotate.
+ needI32(specific_.ecx);
+ *r1 = popI64ToSpecific(widenI32(specific_.ecx));
+ *r0 = popI64();
+#else
+ pop2xI64(r0, r1);
+#endif
+ }
+
+ void popI32ForSignExtendI64(RegI64* r0) {
+#if defined(JS_CODEGEN_X86)
+ // r0 must be edx:eax for cdq
+ need2xI32(specific_.edx, specific_.eax);
+ *r0 = specific_.edx_eax;
+ popI32ToSpecific(specific_.eax);
+#else
+ *r0 = widenI32(popI32());
+#endif
+ }
+
+ void popI64ForSignExtendI64(RegI64* r0) {
+#if defined(JS_CODEGEN_X86)
+ // r0 must be edx:eax for cdq
+ need2xI32(specific_.edx, specific_.eax);
+ // Low on top, high underneath
+ *r0 = popI64ToSpecific(specific_.edx_eax);
+#else
+ *r0 = popI64();
+#endif
+ }
+
+ // The RAII wrappers are used because we sometimes have to free partial
+ // registers, as when part of a register is the scratch register that has
+ // been temporarily used, or not free a register at all, as when the
+ // register is the same as the destination register (but only on some
+ // platforms, not on all). These are called PopX{32,64}Regs where X is the
+ // operation being targeted.
+
+ // Utility struct that holds the BaseCompiler and the destination, and frees
+ // the destination if it has not been extracted.
+
+ template <typename T>
+ class PopBase {
+ T rd_;
+
+ void maybeFree(RegI32 r) { bc->maybeFreeI32(r); }
+ void maybeFree(RegI64 r) { bc->maybeFreeI64(r); }
+
+ protected:
+ BaseCompiler* const bc;
+
+ void setRd(T r) {
+ MOZ_ASSERT(rd_.isInvalid());
+ rd_ = r;
+ }
+ T getRd() const {
+ MOZ_ASSERT(rd_.isValid());
+ return rd_;
+ }
+
+ public:
+ explicit PopBase(BaseCompiler* bc) : bc(bc) {}
+ ~PopBase() { maybeFree(rd_); }
+
+ // Take and clear the Rd - use this when pushing Rd.
+ T takeRd() {
+ MOZ_ASSERT(rd_.isValid());
+ T r = rd_;
+ rd_ = T::Invalid();
+ return r;
+ }
+ };
+
+ friend class PopAtomicCmpXchg32Regs;
+ class PopAtomicCmpXchg32Regs : public PopBase<RegI32> {
+ using Base = PopBase<RegI32>;
+ RegI32 rexpect, rnew;
+ AtomicCmpXchg32Temps temps;
+
+ public:
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+ explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType)
+ : Base(bc) {
+ // For cmpxchg, the expected value and the result are both in eax.
+ bc->needI32(bc->specific_.eax);
+ if (type == ValType::I64) {
+ rnew = bc->popI64ToI32();
+ rexpect = bc->popI64ToSpecificI32(bc->specific_.eax);
+ } else {
+ rnew = bc->popI32();
+ rexpect = bc->popI32ToSpecific(bc->specific_.eax);
+ }
+ setRd(rexpect);
+ }
+ ~PopAtomicCmpXchg32Regs() { bc->freeI32(rnew); }
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+ explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType)
+ : Base(bc) {
+ if (type == ValType::I64) {
+ rnew = bc->popI64ToI32();
+ rexpect = bc->popI64ToI32();
+ } else {
+ rnew = bc->popI32();
+ rexpect = bc->popI32();
+ }
+ setRd(bc->needI32());
+ }
+ ~PopAtomicCmpXchg32Regs() {
+ bc->freeI32(rnew);
+ bc->freeI32(rexpect);
+ }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType)
+ : Base(bc) {
+ if (type == ValType::I64) {
+ rnew = bc->popI64ToI32();
+ rexpect = bc->popI64ToI32();
+ } else {
+ rnew = bc->popI32();
+ rexpect = bc->popI32();
+ }
+ if (Scalar::byteSize(viewType) < 4) {
+ temps.allocate(bc);
+ }
+ setRd(bc->needI32());
+ }
+ ~PopAtomicCmpXchg32Regs() {
+ bc->freeI32(rnew);
+ bc->freeI32(rexpect);
+ temps.maybeFree(bc);
+ }
+#else
+ explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType)
+ : Base(bc) {
+ MOZ_CRASH("BaseCompiler porting interface: PopAtomicCmpXchg32Regs");
+ }
+#endif
+
+ template <typename T>
+ void atomicCmpXchg32(const MemoryAccessDesc& access, T srcAddr) {
+ bc->atomicCmpXchg32(access, srcAddr, rexpect, rnew, getRd(), temps);
+ }
+ };
+
+ friend class PopAtomicCmpXchg64Regs;
+ class PopAtomicCmpXchg64Regs : public PopBase<RegI64> {
+ using Base = PopBase<RegI64>;
+ RegI64 rexpect, rnew;
+
+ public:
+#ifdef JS_CODEGEN_X64
+ explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ // For cmpxchg, the expected value and the result are both in rax.
+ bc->needI64(bc->specific_.rax);
+ rnew = bc->popI64();
+ rexpect = bc->popI64ToSpecific(bc->specific_.rax);
+ setRd(rexpect);
+ }
+ ~PopAtomicCmpXchg64Regs() { bc->freeI64(rnew); }
+#elif defined(JS_CODEGEN_X86)
+ explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ // For cmpxchg8b, the expected value and the result are both in
+ // edx:eax, and the replacement value is in ecx:ebx. But we can't
+ // allocate ebx here, so instead we allocate a temp to hold the low
+ // word of 'new'.
+ bc->needI64(bc->specific_.edx_eax);
+ bc->needI32(bc->specific_.ecx);
+
+ rnew = bc->popI64ToSpecific(
+ RegI64(Register64(bc->specific_.ecx, bc->needI32())));
+ rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax);
+ setRd(rexpect);
+ }
+ ~PopAtomicCmpXchg64Regs() { bc->freeI64(rnew); }
+#elif defined(JS_CODEGEN_ARM)
+ explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ // The replacement value and the result must both be odd/even pairs.
+ rnew = bc->popI64Pair();
+ rexpect = bc->popI64();
+ setRd(bc->needI64Pair());
+ }
+ ~PopAtomicCmpXchg64Regs() {
+ bc->freeI64(rexpect);
+ bc->freeI64(rnew);
+ }
+#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
+ defined(JS_CODEGEN_MIPS64)
+ explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ rnew = bc->popI64();
+ rexpect = bc->popI64();
+ setRd(bc->needI64());
+ }
+ ~PopAtomicCmpXchg64Regs() {
+ bc->freeI64(rexpect);
+ bc->freeI64(rnew);
+ }
+#else
+ explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ MOZ_CRASH("BaseCompiler porting interface: PopAtomicCmpXchg64Regs");
+ }
+#endif
+
+#ifdef JS_CODEGEN_X86
+ template <typename T>
+ void atomicCmpXchg64(const MemoryAccessDesc& access, T srcAddr,
+ RegI32 ebx) {
+ MOZ_ASSERT(ebx == js::jit::ebx);
+ bc->masm.move32(rnew.low, ebx);
+ bc->masm.wasmCompareExchange64(access, srcAddr, rexpect,
+ bc->specific_.ecx_ebx, getRd());
+ }
+#else
+ template <typename T>
+ void atomicCmpXchg64(const MemoryAccessDesc& access, T srcAddr) {
+ bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, getRd());
+ }
+#endif
+ };
+
+#ifndef JS_64BIT
+ class PopAtomicLoad64Regs : public PopBase<RegI64> {
+ using Base = PopBase<RegI64>;
+
+ public:
+# if defined(JS_CODEGEN_X86)
+ explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) {
+ // The result is in edx:eax, and we need ecx:ebx as a temp. But we
+ // can't reserve ebx yet, so we'll accept it as an argument to the
+ // operation (below).
+ bc->needI32(bc->specific_.ecx);
+ bc->needI64(bc->specific_.edx_eax);
+ setRd(bc->specific_.edx_eax);
+ }
+ ~PopAtomicLoad64Regs() { bc->freeI32(bc->specific_.ecx); }
+# elif defined(JS_CODEGEN_ARM)
+ explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) {
+ setRd(bc->needI64Pair());
+ }
+# elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) {
+ setRd(bc->needI64());
+ }
+# else
+ explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) {
+ MOZ_CRASH("BaseCompiler porting interface: PopAtomicLoad64Regs");
+ }
+# endif
+
+# ifdef JS_CODEGEN_X86
+ template <typename T>
+ void atomicLoad64(const MemoryAccessDesc& access, T srcAddr, RegI32 ebx) {
+ MOZ_ASSERT(ebx == js::jit::ebx);
+ bc->masm.wasmAtomicLoad64(access, srcAddr, bc->specific_.ecx_ebx,
+ getRd());
+ }
+# else // ARM, MIPS32
+ template <typename T>
+ void atomicLoad64(const MemoryAccessDesc& access, T srcAddr) {
+ bc->masm.wasmAtomicLoad64(access, srcAddr, RegI64::Invalid(), getRd());
+ }
+# endif
+ };
+#endif // JS_64BIT
+
+ friend class PopAtomicRMW32Regs;
+ class PopAtomicRMW32Regs : public PopBase<RegI32> {
+ using Base = PopBase<RegI32>;
+ RegI32 rv;
+ AtomicRMW32Temps temps;
+
+ public:
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+ explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType, AtomicOp op)
+ : Base(bc) {
+ bc->needI32(bc->specific_.eax);
+ if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) {
+ // We use xadd, so source and destination are the same. Using
+ // eax here is overconstraining, but for byte operations on x86
+ // we do need something with a byte register.
+ if (type == ValType::I64) {
+ rv = bc->popI64ToSpecificI32(bc->specific_.eax);
+ } else {
+ rv = bc->popI32ToSpecific(bc->specific_.eax);
+ }
+ setRd(rv);
+ } else {
+ // We use a cmpxchg loop. The output must be eax; the input
+ // must be in a separate register since it may be used several
+ // times.
+ if (type == ValType::I64) {
+ rv = bc->popI64ToI32();
+ } else {
+ rv = bc->popI32();
+ }
+ setRd(bc->specific_.eax);
+# if defined(JS_CODEGEN_X86)
+ // Single-byte is a special case handled very locally with
+ // ScratchReg, see atomicRMW32 above.
+ if (Scalar::byteSize(viewType) > 1) {
+ temps.allocate(bc);
+ }
+# else
+ temps.allocate(bc);
+# endif
+ }
+ }
+ ~PopAtomicRMW32Regs() {
+ if (rv != bc->specific_.eax) {
+ bc->freeI32(rv);
+ }
+ temps.maybeFree(bc);
+ }
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+ explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType, AtomicOp op)
+ : Base(bc) {
+ rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
+ temps.allocate(bc);
+ setRd(bc->needI32());
+ }
+ ~PopAtomicRMW32Regs() {
+ bc->freeI32(rv);
+ temps.maybeFree(bc);
+ }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType, AtomicOp op)
+ : Base(bc) {
+ rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32();
+ if (Scalar::byteSize(viewType) < 4) {
+ temps.allocate(bc);
+ }
+
+ setRd(bc->needI32());
+ }
+ ~PopAtomicRMW32Regs() {
+ bc->freeI32(rv);
+ temps.maybeFree(bc);
+ }
+#else
+ explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType, AtomicOp op)
+ : Base(bc) {
+ MOZ_CRASH("BaseCompiler porting interface: PopAtomicRMW32Regs");
+ }
+#endif
+
+ template <typename T>
+ void atomicRMW32(const MemoryAccessDesc& access, T srcAddr, AtomicOp op) {
+ bc->atomicRMW32(access, srcAddr, op, rv, getRd(), temps);
+ }
+ };
+
+ friend class PopAtomicRMW64Regs;
+ class PopAtomicRMW64Regs : public PopBase<RegI64> {
+ using Base = PopBase<RegI64>;
+#if defined(JS_CODEGEN_X64)
+ AtomicOp op;
+#endif
+ RegI64 rv, temp;
+
+ public:
+#if defined(JS_CODEGEN_X64)
+ explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp op)
+ : Base(bc), op(op) {
+ if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) {
+ // We use xaddq, so input and output must be the same register.
+ rv = bc->popI64();
+ setRd(rv);
+ } else {
+ // We use a cmpxchgq loop, so the output must be rax.
+ bc->needI64(bc->specific_.rax);
+ rv = bc->popI64();
+ temp = bc->needI64();
+ setRd(bc->specific_.rax);
+ }
+ }
+ ~PopAtomicRMW64Regs() {
+ bc->maybeFreeI64(temp);
+ if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) {
+ bc->freeI64(rv);
+ }
+ }
+#elif defined(JS_CODEGEN_X86)
+ // We'll use cmpxchg8b, so rv must be in ecx:ebx, and rd must be
+ // edx:eax. But we can't reserve ebx here because we need it later, so
+ // use a separate temp and set up ebx when we perform the operation.
+ explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) {
+ bc->needI32(bc->specific_.ecx);
+ bc->needI64(bc->specific_.edx_eax);
+
+ temp = RegI64(Register64(bc->specific_.ecx, bc->needI32()));
+ bc->popI64ToSpecific(temp);
+
+ setRd(bc->specific_.edx_eax);
+ }
+ ~PopAtomicRMW64Regs() { bc->freeI64(temp); }
+ RegI32 valueHigh() const { return RegI32(temp.high); }
+ RegI32 valueLow() const { return RegI32(temp.low); }
+#elif defined(JS_CODEGEN_ARM)
+ explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) {
+ // We use a ldrex/strexd loop so the temp and the output must be
+ // odd/even pairs.
+ rv = bc->popI64();
+ temp = bc->needI64Pair();
+ setRd(bc->needI64Pair());
+ }
+ ~PopAtomicRMW64Regs() {
+ bc->freeI64(rv);
+ bc->freeI64(temp);
+ }
+#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
+ defined(JS_CODEGEN_MIPS64)
+ explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) {
+ rv = bc->popI64();
+ temp = bc->needI64();
+ setRd(bc->needI64());
+ }
+ ~PopAtomicRMW64Regs() {
+ bc->freeI64(rv);
+ bc->freeI64(temp);
+ }
+#else
+ explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) {
+ MOZ_CRASH("BaseCompiler porting interface: PopAtomicRMW64Regs");
+ }
+#endif
+
+#ifdef JS_CODEGEN_X86
+ template <typename T, typename V>
+ void atomicRMW64(const MemoryAccessDesc& access, T srcAddr, AtomicOp op,
+ const V& value, RegI32 ebx) {
+ MOZ_ASSERT(ebx == js::jit::ebx);
+ bc->atomicRMW64(access, srcAddr, op, value, bc->specific_.ecx_ebx,
+ getRd());
+ }
+#else
+ template <typename T>
+ void atomicRMW64(const MemoryAccessDesc& access, T srcAddr, AtomicOp op) {
+ bc->atomicRMW64(access, srcAddr, op, rv, temp, getRd());
+ }
+#endif
+ };
+
+ friend class PopAtomicXchg32Regs;
+ class PopAtomicXchg32Regs : public PopBase<RegI32> {
+ using Base = PopBase<RegI32>;
+ RegI32 rv;
+ AtomicXchg32Temps temps;
+
+ public:
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+ explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType)
+ : Base(bc) {
+ // The xchg instruction reuses rv as rd.
+ rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+ setRd(rv);
+ }
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
+ explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType)
+ : Base(bc) {
+ rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+ setRd(bc->needI32());
+ }
+ ~PopAtomicXchg32Regs() { bc->freeI32(rv); }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType)
+ : Base(bc) {
+ rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32();
+ if (Scalar::byteSize(viewType) < 4) {
+ temps.allocate(bc);
+ }
+ setRd(bc->needI32());
+ }
+ ~PopAtomicXchg32Regs() {
+ temps.maybeFree(bc);
+ bc->freeI32(rv);
+ }
+#else
+ explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type,
+ Scalar::Type viewType)
+ : Base(bc) {
+ MOZ_CRASH("BaseCompiler porting interface: PopAtomicXchg32Regs");
+ }
+#endif
+
+ template <typename T>
+ void atomicXchg32(const MemoryAccessDesc& access, T srcAddr) {
+ bc->atomicXchg32(access, srcAddr, rv, getRd(), temps);
+ }
+ };
+
+ friend class PopAtomicXchg64Regs;
+ class PopAtomicXchg64Regs : public PopBase<RegI64> {
+ using Base = PopBase<RegI64>;
+ RegI64 rv;
+
+ public:
+#if defined(JS_CODEGEN_X64)
+ explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ rv = bc->popI64();
+ setRd(rv);
+ }
+#elif defined(JS_CODEGEN_ARM64)
+ explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ rv = bc->popI64();
+ setRd(bc->needI64());
+ }
+ ~PopAtomicXchg64Regs() { bc->freeI64(rv); }
+#elif defined(JS_CODEGEN_X86)
+ // We'll use cmpxchg8b, so rv must be in ecx:ebx, and rd must be
+ // edx:eax. But we can't reserve ebx here because we need it later, so
+ // use a separate temp and set up ebx when we perform the operation.
+ explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ bc->needI32(bc->specific_.ecx);
+ bc->needI64(bc->specific_.edx_eax);
+
+ rv = RegI64(Register64(bc->specific_.ecx, bc->needI32()));
+ bc->popI64ToSpecific(rv);
+
+ setRd(bc->specific_.edx_eax);
+ }
+ ~PopAtomicXchg64Regs() { bc->freeI64(rv); }
+#elif defined(JS_CODEGEN_ARM)
+ // Both rv and rd must be odd/even pairs.
+ explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ rv = bc->popI64ToSpecific(bc->needI64Pair());
+ setRd(bc->needI64Pair());
+ }
+ ~PopAtomicXchg64Regs() { bc->freeI64(rv); }
+#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ rv = bc->popI64ToSpecific(bc->needI64());
+ setRd(bc->needI64());
+ }
+ ~PopAtomicXchg64Regs() { bc->freeI64(rv); }
+#else
+ explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) {
+ MOZ_CRASH("BaseCompiler porting interface: xchg64");
+ }
+#endif
+
+#ifdef JS_CODEGEN_X86
+ template <typename T>
+ void atomicXchg64(const MemoryAccessDesc& access, T srcAddr,
+ RegI32 ebx) const {
+ MOZ_ASSERT(ebx == js::jit::ebx);
+ bc->masm.move32(rv.low, ebx);
+ bc->masm.wasmAtomicExchange64(access, srcAddr, bc->specific_.ecx_ebx,
+ getRd());
+ }
+#else
+ template <typename T>
+ void atomicXchg64(const MemoryAccessDesc& access, T srcAddr) const {
+ bc->masm.wasmAtomicExchange64(access, srcAddr, rv, getRd());
+ }
+#endif
+ };
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Generally speaking, BELOW this point there should be no
+ // platform dependencies. We make very occasional exceptions
+ // when it doesn't become messy and further abstraction is
+ // not desirable.
+ //
+ ////////////////////////////////////////////////////////////
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Sundry wrappers.
+
+ void pop2xI32(RegI32* r0, RegI32* r1) {
+ *r1 = popI32();
+ *r0 = popI32();
+ }
+
+ RegI32 popI32ToSpecific(RegI32 specific) {
+ freeI32(specific);
+ return popI32(specific);
+ }
+
+ void pop2xI64(RegI64* r0, RegI64* r1) {
+ *r1 = popI64();
+ *r0 = popI64();
+ }
+
+ RegI64 popI64ToSpecific(RegI64 specific) {
+ freeI64(specific);
+ return popI64(specific);
+ }
+
+#ifdef JS_CODEGEN_ARM
+ RegI64 popI64Pair() {
+ RegI64 r = needI64Pair();
+ popI64ToSpecific(r);
+ return r;
+ }
+#endif
+
+ void pop2xF32(RegF32* r0, RegF32* r1) {
+ *r1 = popF32();
+ *r0 = popF32();
+ }
+
+ void pop2xF64(RegF64* r0, RegF64* r1) {
+ *r1 = popF64();
+ *r0 = popF64();
+ }
+
+#ifdef ENABLE_WASM_SIMD
+ void pop2xV128(RegV128* r0, RegV128* r1) {
+ *r1 = popV128();
+ *r0 = popV128();
+ }
+#endif
+
+ void pop2xRef(RegPtr* r0, RegPtr* r1) {
+ *r1 = popRef();
+ *r0 = popRef();
+ }
+
+ RegI32 popI64ToI32() {
+ RegI64 r = popI64();
+ return narrowI64(r);
+ }
+
+ RegI32 popI64ToSpecificI32(RegI32 specific) {
+ RegI64 rd = widenI32(specific);
+ popI64ToSpecific(rd);
+ return narrowI64(rd);
+ }
+
+ void pushU32AsI64(RegI32 rs) {
+ RegI64 rd = widenI32(rs);
+ masm.move32To64ZeroExtend(rs, rd);
+ pushI64(rd);
+ }
+
+ RegI32 popMemoryAccess(MemoryAccessDesc* access, AccessCheck* check);
+
+ void pushHeapBase();
+
+ template <typename RegType>
+ RegType pop();
+ template <typename RegType>
+ RegType need();
+ template <typename RegType>
+ void free(RegType r);
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Sundry helpers.
+
+ uint32_t readCallSiteLineOrBytecode() {
+ if (!func_.callSiteLineNums.empty()) {
+ return func_.callSiteLineNums[lastReadCallSite_++];
+ }
+ return iter_.lastOpcodeOffset();
+ }
+
+ bool done() const { return iter_.done(); }
+
+ BytecodeOffset bytecodeOffset() const { return iter_.bytecodeOffset(); }
+
+ void trap(Trap t) const { masm.wasmTrap(t, bytecodeOffset()); }
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Object support.
+
+ // This emits a GC pre-write barrier. The pre-barrier is needed when we
+ // replace a member field with a new value, and the previous field value
+ // might have no other referents, and incremental GC is ongoing. The field
+ // might belong to an object or be a stack slot or a register or a heap
+ // allocated value.
+ //
+ // let obj = { field: previousValue };
+ // obj.field = newValue; // previousValue must be marked with a pre-barrier.
+ //
+ // The `valueAddr` is the address of the location that we are about to
+ // update. This function preserves that register.
+
+ void emitPreBarrier(RegPtr valueAddr) {
+ Label skipBarrier;
+ ScratchPtr scratch(*this);
+
+ fr.loadTlsPtr(scratch);
+ EmitWasmPreBarrierGuard(masm, scratch, scratch, valueAddr, &skipBarrier);
+
+ fr.loadTlsPtr(scratch);
+#ifdef JS_CODEGEN_ARM64
+ // The prebarrier stub assumes the PseudoStackPointer is set up. It is OK
+ // to just move the sp to x28 here because x28 is not being used by the
+ // baseline compiler and need not be saved or restored.
+ MOZ_ASSERT(!GeneralRegisterSet::All().hasRegisterIndex(x28.asUnsized()));
+ masm.Mov(x28, sp);
+#endif
+ EmitWasmPreBarrierCall(masm, scratch, scratch, valueAddr);
+
+ masm.bind(&skipBarrier);
+ }
+
+ // This frees the register `valueAddr`.
+
+ [[nodiscard]] bool emitPostBarrierCall(RegPtr valueAddr) {
+ uint32_t bytecodeOffset = iter_.lastOpcodeOffset();
+
+ // The `valueAddr` is a raw pointer to the cell within some GC object or
+ // TLS area, and we guarantee that the GC will not run while the
+ // postbarrier call is active, so push a uintptr_t value.
+#ifdef JS_64BIT
+ pushI64(RegI64(Register64(valueAddr)));
+#else
+ pushI32(RegI32(valueAddr));
+#endif
+ if (!emitInstanceCall(bytecodeOffset, SASigPostBarrier,
+ /*pushReturnedValue=*/false)) {
+ return false;
+ }
+ return true;
+ }
+
+ [[nodiscard]] bool emitBarrieredStore(const Maybe<RegPtr>& object,
+ RegPtr valueAddr, RegPtr value) {
+ // TODO/AnyRef-boxing: With boxed immediates and strings, the write
+ // barrier is going to have to be more complicated.
+ ASSERT_ANYREF_IS_JSOBJECT;
+
+ emitPreBarrier(valueAddr); // Preserves valueAddr
+ masm.storePtr(value, Address(valueAddr, 0));
+
+ Label skipBarrier;
+ sync();
+
+ RegPtr otherScratch = needRef();
+ EmitWasmPostBarrierGuard(masm, object, otherScratch, value, &skipBarrier);
+ freeRef(otherScratch);
+
+ if (!emitPostBarrierCall(valueAddr)) {
+ return false;
+ }
+ masm.bind(&skipBarrier);
+ return true;
+ }
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Machinery for optimized conditional branches.
+ //
+ // To disable this optimization it is enough always to return false from
+ // sniffConditionalControl{Cmp,Eqz}.
+
+ struct BranchState {
+ union {
+ struct {
+ RegI32 lhs;
+ RegI32 rhs;
+ int32_t imm;
+ bool rhsImm;
+ } i32;
+ struct {
+ RegI64 lhs;
+ RegI64 rhs;
+ int64_t imm;
+ bool rhsImm;
+ } i64;
+ struct {
+ RegF32 lhs;
+ RegF32 rhs;
+ } f32;
+ struct {
+ RegF64 lhs;
+ RegF64 rhs;
+ } f64;
+ };
+
+ Label* const label; // The target of the branch, never NULL
+ const StackHeight stackHeight; // The stack base above which to place
+ // stack-spilled block results, if
+ // hasBlockResults().
+ const bool invertBranch; // If true, invert the sense of the branch
+ const ResultType resultType; // The result propagated along the edges
+
+ explicit BranchState(Label* label)
+ : label(label),
+ stackHeight(StackHeight::Invalid()),
+ invertBranch(false),
+ resultType(ResultType::Empty()) {}
+
+ BranchState(Label* label, bool invertBranch)
+ : label(label),
+ stackHeight(StackHeight::Invalid()),
+ invertBranch(invertBranch),
+ resultType(ResultType::Empty()) {}
+
+ BranchState(Label* label, StackHeight stackHeight, bool invertBranch,
+ ResultType resultType)
+ : label(label),
+ stackHeight(stackHeight),
+ invertBranch(invertBranch),
+ resultType(resultType) {}
+
+ bool hasBlockResults() const { return stackHeight.isValid(); }
+ };
+
+ void setLatentCompare(Assembler::Condition compareOp, ValType operandType) {
+ latentOp_ = LatentOp::Compare;
+ latentType_ = operandType;
+ latentIntCmp_ = compareOp;
+ }
+
+ void setLatentCompare(Assembler::DoubleCondition compareOp,
+ ValType operandType) {
+ latentOp_ = LatentOp::Compare;
+ latentType_ = operandType;
+ latentDoubleCmp_ = compareOp;
+ }
+
+ void setLatentEqz(ValType operandType) {
+ latentOp_ = LatentOp::Eqz;
+ latentType_ = operandType;
+ }
+
+ bool hasLatentOp() const { return latentOp_ != LatentOp::None; }
+
+ void resetLatentOp() { latentOp_ = LatentOp::None; }
+
+ void branchTo(Assembler::DoubleCondition c, RegF64 lhs, RegF64 rhs,
+ Label* l) {
+ masm.branchDouble(c, lhs, rhs, l);
+ }
+
+ void branchTo(Assembler::DoubleCondition c, RegF32 lhs, RegF32 rhs,
+ Label* l) {
+ masm.branchFloat(c, lhs, rhs, l);
+ }
+
+ void branchTo(Assembler::Condition c, RegI32 lhs, RegI32 rhs, Label* l) {
+ masm.branch32(c, lhs, rhs, l);
+ }
+
+ void branchTo(Assembler::Condition c, RegI32 lhs, Imm32 rhs, Label* l) {
+ masm.branch32(c, lhs, rhs, l);
+ }
+
+ void branchTo(Assembler::Condition c, RegI64 lhs, RegI64 rhs, Label* l) {
+ masm.branch64(c, lhs, rhs, l);
+ }
+
+ void branchTo(Assembler::Condition c, RegI64 lhs, Imm64 rhs, Label* l) {
+ masm.branch64(c, lhs, rhs, l);
+ }
+
+ void branchTo(Assembler::Condition c, RegPtr lhs, ImmWord rhs, Label* l) {
+ masm.branchPtr(c, lhs, rhs, l);
+ }
+
+ // Emit a conditional branch that optionally and optimally cleans up the CPU
+ // stack before we branch.
+ //
+ // Cond is either Assembler::Condition or Assembler::DoubleCondition.
+ //
+ // Lhs is RegI32, RegI64, or RegF32, RegF64, or RegPtr.
+ //
+ // Rhs is either the same as Lhs, or an immediate expression compatible with
+ // Lhs "when applicable".
+
+ template <typename Cond, typename Lhs, typename Rhs>
+ MOZ_MUST_USE bool jumpConditionalWithResults(BranchState* b, Cond cond,
+ Lhs lhs, Rhs rhs) {
+ if (b->hasBlockResults()) {
+ StackHeight resultsBase(0);
+ if (!topBranchParams(b->resultType, &resultsBase)) {
+ return false;
+ }
+ if (b->stackHeight != resultsBase) {
+ Label notTaken;
+ branchTo(b->invertBranch ? cond : Assembler::InvertCondition(cond), lhs,
+ rhs, &notTaken);
+
+ // Shuffle stack args.
+ shuffleStackResultsBeforeBranch(resultsBase, b->stackHeight,
+ b->resultType);
+ masm.jump(b->label);
+ masm.bind(&notTaken);
+ return true;
+ }
+ }
+
+ branchTo(b->invertBranch ? Assembler::InvertCondition(cond) : cond, lhs,
+ rhs, b->label);
+ return true;
+ }
+
+ // sniffConditionalControl{Cmp,Eqz} may modify the latentWhatever_ state in
+ // the BaseCompiler so that a subsequent conditional branch can be compiled
+ // optimally. emitBranchSetup() and emitBranchPerform() will consume that
+ // state. If the latter methods are not called because deadCode_ is true
+ // then the compiler MUST instead call resetLatentOp() to reset the state.
+
+ template <typename Cond>
+ MOZ_MUST_USE bool sniffConditionalControlCmp(Cond compareOp,
+ ValType operandType);
+ MOZ_MUST_USE bool sniffConditionalControlEqz(ValType operandType);
+ void emitBranchSetup(BranchState* b);
+ MOZ_MUST_USE bool emitBranchPerform(BranchState* b);
+
+ //////////////////////////////////////////////////////////////////////
+
+ [[nodiscard]] bool emitBody();
+ [[nodiscard]] bool emitBlock();
+ [[nodiscard]] bool emitLoop();
+ [[nodiscard]] bool emitIf();
+ [[nodiscard]] bool emitElse();
+#ifdef ENABLE_WASM_EXCEPTIONS
+ [[nodiscard]] bool emitTry();
+ [[nodiscard]] bool emitCatch();
+ [[nodiscard]] bool emitThrow();
+#endif
+ [[nodiscard]] bool emitEnd();
+ [[nodiscard]] bool emitBr();
+ [[nodiscard]] bool emitBrIf();
+ [[nodiscard]] bool emitBrTable();
+ [[nodiscard]] bool emitDrop();
+ [[nodiscard]] bool emitReturn();
+
+ enum class CalleeOnStack {
+ // After the arguments to the call, there is a callee pushed onto value
+ // stack. This is only the case for callIndirect. To get the arguments to
+ // the call, emitCallArgs has to reach one element deeper into the value
+ // stack, to skip the callee.
+ True,
+
+ // No callee on the stack.
+ False
+ };
+
+ [[nodiscard]] bool emitCallArgs(const ValTypeVector& args,
+ const StackResultsLoc& results,
+ FunctionCall* baselineCall,
+ CalleeOnStack calleeOnStack);
+
+ [[nodiscard]] bool emitCall();
+ [[nodiscard]] bool emitCallIndirect();
+ [[nodiscard]] bool emitUnaryMathBuiltinCall(SymbolicAddress callee,
+ ValType operandType);
+ [[nodiscard]] bool emitGetLocal();
+ [[nodiscard]] bool emitSetLocal();
+ [[nodiscard]] bool emitTeeLocal();
+ [[nodiscard]] bool emitGetGlobal();
+ [[nodiscard]] bool emitSetGlobal();
+ [[nodiscard]] RegI32 maybeLoadTlsForAccess(const AccessCheck& check);
+ [[nodiscard]] RegI32 maybeLoadTlsForAccess(const AccessCheck& check,
+ RegI32 specific);
+ [[nodiscard]] bool emitLoad(ValType type, Scalar::Type viewType);
+ [[nodiscard]] bool loadCommon(MemoryAccessDesc* access, AccessCheck check,
+ ValType type);
+ [[nodiscard]] bool emitStore(ValType resultType, Scalar::Type viewType);
+ [[nodiscard]] bool storeCommon(MemoryAccessDesc* access, AccessCheck check,
+ ValType resultType);
+ [[nodiscard]] bool emitSelect(bool typed);
+
+ template <bool isSetLocal>
+ [[nodiscard]] bool emitSetOrTeeLocal(uint32_t slot);
+
+ MOZ_MUST_USE bool endBlock(ResultType type);
+ MOZ_MUST_USE bool endIfThen(ResultType type);
+ MOZ_MUST_USE bool endIfThenElse(ResultType type);
+
+ void doReturn(ContinuationKind kind);
+ void pushReturnValueOfCall(const FunctionCall& call, MIRType type);
+
+ MOZ_MUST_USE bool pushStackResultsForCall(const ResultType& type, RegPtr temp,
+ StackResultsLoc* loc);
+ void popStackResultsAfterCall(const StackResultsLoc& results,
+ uint32_t stackArgBytes);
+
+ void emitCompareI32(Assembler::Condition compareOp, ValType compareType);
+ void emitCompareI64(Assembler::Condition compareOp, ValType compareType);
+ void emitCompareF32(Assembler::DoubleCondition compareOp,
+ ValType compareType);
+ void emitCompareF64(Assembler::DoubleCondition compareOp,
+ ValType compareType);
+ void emitCompareRef(Assembler::Condition compareOp, ValType compareType);
+
+ void emitAddI32();
+ void emitAddI64();
+ void emitAddF64();
+ void emitAddF32();
+ void emitSubtractI32();
+ void emitSubtractI64();
+ void emitSubtractF32();
+ void emitSubtractF64();
+ void emitMultiplyI32();
+ void emitMultiplyI64();
+ void emitMultiplyF32();
+ void emitMultiplyF64();
+ void emitQuotientI32();
+ void emitQuotientU32();
+ void emitRemainderI32();
+ void emitRemainderU32();
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+ [[nodiscard]] bool emitDivOrModI64BuiltinCall(SymbolicAddress callee,
+ ValType operandType);
+#else
+ void emitQuotientI64();
+ void emitQuotientU64();
+ void emitRemainderI64();
+ void emitRemainderU64();
+#endif
+ void emitDivideF32();
+ void emitDivideF64();
+ void emitMinF32();
+ void emitMaxF32();
+ void emitMinF64();
+ void emitMaxF64();
+ void emitCopysignF32();
+ void emitCopysignF64();
+ void emitOrI32();
+ void emitOrI64();
+ void emitAndI32();
+ void emitAndI64();
+ void emitXorI32();
+ void emitXorI64();
+ void emitShlI32();
+ void emitShlI64();
+ void emitShrI32();
+ void emitShrI64();
+ void emitShrU32();
+ void emitShrU64();
+ void emitRotrI32();
+ void emitRotrI64();
+ void emitRotlI32();
+ void emitRotlI64();
+ void emitEqzI32();
+ void emitEqzI64();
+ void emitClzI32();
+ void emitClzI64();
+ void emitCtzI32();
+ void emitCtzI64();
+ void emitPopcntI32();
+ void emitPopcntI64();
+ void emitAbsF32();
+ void emitAbsF64();
+ void emitNegateF32();
+ void emitNegateF64();
+ void emitSqrtF32();
+ void emitSqrtF64();
+ template <TruncFlags flags>
+ [[nodiscard]] bool emitTruncateF32ToI32();
+ template <TruncFlags flags>
+ [[nodiscard]] bool emitTruncateF64ToI32();
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ [[nodiscard]] bool emitConvertFloatingToInt64Callout(SymbolicAddress callee,
+ ValType operandType,
+ ValType resultType);
+#else
+ template <TruncFlags flags>
+ [[nodiscard]] bool emitTruncateF32ToI64();
+ template <TruncFlags flags>
+ [[nodiscard]] bool emitTruncateF64ToI64();
+#endif
+ void emitWrapI64ToI32();
+ void emitExtendI32_8();
+ void emitExtendI32_16();
+ void emitExtendI64_8();
+ void emitExtendI64_16();
+ void emitExtendI64_32();
+ void emitExtendI32ToI64();
+ void emitExtendU32ToI64();
+ void emitReinterpretF32AsI32();
+ void emitReinterpretF64AsI64();
+ void emitConvertF64ToF32();
+ void emitConvertI32ToF32();
+ void emitConvertU32ToF32();
+ void emitConvertF32ToF64();
+ void emitConvertI32ToF64();
+ void emitConvertU32ToF64();
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+ [[nodiscard]] bool emitConvertInt64ToFloatingCallout(SymbolicAddress callee,
+ ValType operandType,
+ ValType resultType);
+#else
+ void emitConvertI64ToF32();
+ void emitConvertU64ToF32();
+ void emitConvertI64ToF64();
+ void emitConvertU64ToF64();
+#endif
+ void emitReinterpretI32AsF32();
+ void emitReinterpretI64AsF64();
+ void emitRound(RoundingMode roundingMode, ValType operandType);
+ [[nodiscard]] bool emitInstanceCall(uint32_t lineOrBytecode,
+ const SymbolicAddressSignature& builtin,
+ bool pushReturnedValue = true);
+ [[nodiscard]] bool emitMemoryGrow();
+ [[nodiscard]] bool emitMemorySize();
+
+ [[nodiscard]] bool emitRefFunc();
+ [[nodiscard]] bool emitRefNull();
+ [[nodiscard]] bool emitRefIsNull();
+#ifdef ENABLE_WASM_FUNCTION_REFERENCES
+ [[nodiscard]] bool emitRefAsNonNull();
+ [[nodiscard]] bool emitBrOnNull();
+#endif
+
+ [[nodiscard]] bool emitAtomicCmpXchg(ValType type, Scalar::Type viewType);
+ [[nodiscard]] bool emitAtomicLoad(ValType type, Scalar::Type viewType);
+ [[nodiscard]] bool emitAtomicRMW(ValType type, Scalar::Type viewType,
+ AtomicOp op);
+ [[nodiscard]] bool emitAtomicStore(ValType type, Scalar::Type viewType);
+ [[nodiscard]] bool emitWait(ValType type, uint32_t byteSize);
+ [[nodiscard]] bool emitWake();
+ [[nodiscard]] bool emitFence();
+ [[nodiscard]] bool emitAtomicXchg(ValType type, Scalar::Type viewType);
+ void emitAtomicXchg64(MemoryAccessDesc* access, WantResult wantResult);
+ [[nodiscard]] bool emitMemCopy();
+ [[nodiscard]] bool emitMemCopyCall(uint32_t lineOrBytecode);
+ [[nodiscard]] bool emitMemCopyInline();
+ [[nodiscard]] bool emitTableCopy();
+ [[nodiscard]] bool emitDataOrElemDrop(bool isData);
+ [[nodiscard]] bool emitMemFill();
+ [[nodiscard]] bool emitMemFillCall(uint32_t lineOrBytecode);
+ [[nodiscard]] bool emitMemFillInline();
+ [[nodiscard]] bool emitMemOrTableInit(bool isMem);
+#ifdef ENABLE_WASM_REFTYPES
+ [[nodiscard]] bool emitTableFill();
+ [[nodiscard]] bool emitTableGet();
+ [[nodiscard]] bool emitTableGrow();
+ [[nodiscard]] bool emitTableSet();
+ [[nodiscard]] bool emitTableSize();
+#endif
+ [[nodiscard]] bool emitStructNew();
+ [[nodiscard]] bool emitStructGet();
+ [[nodiscard]] bool emitStructSet();
+ [[nodiscard]] bool emitStructNarrow();
+#ifdef ENABLE_WASM_SIMD
+ template <typename SourceType, typename DestType>
+ void emitVectorUnop(void (*op)(MacroAssembler& masm, SourceType rs,
+ DestType rd));
+
+ template <typename SourceType, typename DestType, typename TempType>
+ void emitVectorUnop(void (*op)(MacroAssembler& masm, SourceType rs,
+ DestType rd, TempType temp));
+
+ template <typename SourceType, typename DestType, typename ImmType>
+ void emitVectorUnop(ImmType immediate, void (*op)(MacroAssembler&, ImmType,
+ SourceType, DestType));
+
+ template <typename RhsType, typename LhsDestType>
+ void emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType src,
+ LhsDestType srcDest));
+
+ template <typename RhsDestType, typename LhsType>
+ void emitVectorBinop(void (*op)(MacroAssembler& masm, RhsDestType src,
+ LhsType srcDest, RhsDestOp));
+
+ template <typename RhsType, typename LhsDestType, typename TempType>
+ void emitVectorBinop(void (*)(MacroAssembler& masm, RhsType rs,
+ LhsDestType rsd, TempType temp));
+
+ template <typename RhsType, typename LhsDestType, typename TempType1,
+ typename TempType2>
+ void emitVectorBinop(void (*)(MacroAssembler& masm, RhsType rs,
+ LhsDestType rsd, TempType1 temp1,
+ TempType2 temp2));
+
+ template <typename RhsType, typename LhsDestType, typename ImmType>
+ void emitVectorBinop(ImmType immediate, void (*op)(MacroAssembler&, ImmType,
+ RhsType, LhsDestType));
+
+ template <typename RhsType, typename LhsDestType, typename ImmType,
+ typename TempType1, typename TempType2>
+ void emitVectorBinop(ImmType immediate,
+ void (*op)(MacroAssembler&, ImmType, RhsType,
+ LhsDestType, TempType1 temp1,
+ TempType2 temp2));
+
+ void emitVectorAndNot();
+
+ [[nodiscard]] bool emitLoadSplat(Scalar::Type viewType);
+ [[nodiscard]] bool emitLoadZero(Scalar::Type viewType);
+ [[nodiscard]] bool emitLoadExtend(Scalar::Type viewType);
+ [[nodiscard]] bool emitBitselect();
+ [[nodiscard]] bool emitVectorShuffle();
+ [[nodiscard]] bool emitVectorShiftRightI64x2(bool isUnsigned);
+ [[nodiscard]] bool emitVectorMulI64x2();
+#endif
+};
+
+// TODO: We want these to be inlined for sure; do we need an `inline` somewhere?
+
+template <>
+RegI32 BaseCompiler::need<RegI32>() {
+ return needI32();
+}
+template <>
+RegI64 BaseCompiler::need<RegI64>() {
+ return needI64();
+}
+template <>
+RegF32 BaseCompiler::need<RegF32>() {
+ return needF32();
+}
+template <>
+RegF64 BaseCompiler::need<RegF64>() {
+ return needF64();
+}
+
+template <>
+RegI32 BaseCompiler::pop<RegI32>() {
+ return popI32();
+}
+template <>
+RegI64 BaseCompiler::pop<RegI64>() {
+ return popI64();
+}
+template <>
+RegF32 BaseCompiler::pop<RegF32>() {
+ return popF32();
+}
+template <>
+RegF64 BaseCompiler::pop<RegF64>() {
+ return popF64();
+}
+
+template <>
+void BaseCompiler::free<RegI32>(RegI32 r) {
+ freeI32(r);
+}
+template <>
+void BaseCompiler::free<RegI64>(RegI64 r) {
+ freeI64(r);
+}
+template <>
+void BaseCompiler::free<RegF32>(RegF32 r) {
+ freeF32(r);
+}
+template <>
+void BaseCompiler::free<RegF64>(RegF64 r) {
+ freeF64(r);
+}
+
+#ifdef ENABLE_WASM_SIMD
+template <>
+RegV128 BaseCompiler::need<RegV128>() {
+ return needV128();
+}
+template <>
+RegV128 BaseCompiler::pop<RegV128>() {
+ return popV128();
+}
+template <>
+void BaseCompiler::free<RegV128>(RegV128 r) {
+ freeV128(r);
+}
+#endif
+
+void BaseCompiler::emitAddI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.add32(Imm32(c), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32(&r, &rs);
+ masm.add32(rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitAddI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ masm.add64(Imm64(c), r);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64(&r, &rs);
+ masm.add64(rs, r);
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitAddF64() {
+ RegF64 r, rs;
+ pop2xF64(&r, &rs);
+ masm.addDouble(rs, r);
+ freeF64(rs);
+ pushF64(r);
+}
+
+void BaseCompiler::emitAddF32() {
+ RegF32 r, rs;
+ pop2xF32(&r, &rs);
+ masm.addFloat32(rs, r);
+ freeF32(rs);
+ pushF32(r);
+}
+
+void BaseCompiler::emitSubtractI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.sub32(Imm32(c), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32(&r, &rs);
+ masm.sub32(rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitSubtractI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ masm.sub64(Imm64(c), r);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64(&r, &rs);
+ masm.sub64(rs, r);
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitSubtractF32() {
+ RegF32 r, rs;
+ pop2xF32(&r, &rs);
+ masm.subFloat32(rs, r);
+ freeF32(rs);
+ pushF32(r);
+}
+
+void BaseCompiler::emitSubtractF64() {
+ RegF64 r, rs;
+ pop2xF64(&r, &rs);
+ masm.subDouble(rs, r);
+ freeF64(rs);
+ pushF64(r);
+}
+
+void BaseCompiler::emitMultiplyI32() {
+ RegI32 r, rs, reserved;
+ pop2xI32ForMulDivI32(&r, &rs, &reserved);
+ masm.mul32(rs, r);
+ maybeFreeI32(reserved);
+ freeI32(rs);
+ pushI32(r);
+}
+
+void BaseCompiler::emitMultiplyI64() {
+ RegI64 r, rs, reserved;
+ RegI32 temp;
+ pop2xI64ForMulI64(&r, &rs, &temp, &reserved);
+ masm.mul64(rs, r, temp);
+ maybeFreeI64(reserved);
+ maybeFreeI32(temp);
+ freeI64(rs);
+ pushI64(r);
+}
+
+void BaseCompiler::emitMultiplyF32() {
+ RegF32 r, rs;
+ pop2xF32(&r, &rs);
+ masm.mulFloat32(rs, r);
+ freeF32(rs);
+ pushF32(r);
+}
+
+void BaseCompiler::emitMultiplyF64() {
+ RegF64 r, rs;
+ pop2xF64(&r, &rs);
+ masm.mulDouble(rs, r);
+ freeF64(rs);
+ pushF64(r);
+}
+
+void BaseCompiler::emitQuotientI32() {
+ int32_t c;
+ uint_fast8_t power;
+ if (popConstPositivePowerOfTwoI32(&c, &power, 0)) {
+ if (power != 0) {
+ RegI32 r = popI32();
+ Label positive;
+ masm.branchTest32(Assembler::NotSigned, r, r, &positive);
+ masm.add32(Imm32(c - 1), r);
+ masm.bind(&positive);
+
+ masm.rshift32Arithmetic(Imm32(power & 31), r);
+ pushI32(r);
+ }
+ } else {
+ bool isConst = peekConstI32(&c);
+ RegI32 r, rs, reserved;
+ pop2xI32ForMulDivI32(&r, &rs, &reserved);
+
+ if (!isConst || c == 0) {
+ checkDivideByZeroI32(rs);
+ }
+
+ Label done;
+ if (!isConst || c == -1) {
+ checkDivideSignedOverflowI32(rs, r, &done, ZeroOnOverflow(false));
+ }
+ masm.quotient32(rs, r, IsUnsigned(false));
+ masm.bind(&done);
+
+ maybeFreeI32(reserved);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitQuotientU32() {
+ int32_t c;
+ uint_fast8_t power;
+ if (popConstPositivePowerOfTwoI32(&c, &power, 0)) {
+ if (power != 0) {
+ RegI32 r = popI32();
+ masm.rshift32(Imm32(power & 31), r);
+ pushI32(r);
+ }
+ } else {
+ bool isConst = peekConstI32(&c);
+ RegI32 r, rs, reserved;
+ pop2xI32ForMulDivI32(&r, &rs, &reserved);
+
+ if (!isConst || c == 0) {
+ checkDivideByZeroI32(rs);
+ }
+ masm.quotient32(rs, r, IsUnsigned(true));
+
+ maybeFreeI32(reserved);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitRemainderI32() {
+ int32_t c;
+ uint_fast8_t power;
+ if (popConstPositivePowerOfTwoI32(&c, &power, 1)) {
+ RegI32 r = popI32();
+ RegI32 temp = needI32();
+ moveI32(r, temp);
+
+ Label positive;
+ masm.branchTest32(Assembler::NotSigned, temp, temp, &positive);
+ masm.add32(Imm32(c - 1), temp);
+ masm.bind(&positive);
+
+ masm.rshift32Arithmetic(Imm32(power & 31), temp);
+ masm.lshift32(Imm32(power & 31), temp);
+ masm.sub32(temp, r);
+ freeI32(temp);
+
+ pushI32(r);
+ } else {
+ bool isConst = peekConstI32(&c);
+ RegI32 r, rs, reserved;
+ pop2xI32ForMulDivI32(&r, &rs, &reserved);
+
+ if (!isConst || c == 0) {
+ checkDivideByZeroI32(rs);
+ }
+
+ Label done;
+ if (!isConst || c == -1) {
+ checkDivideSignedOverflowI32(rs, r, &done, ZeroOnOverflow(true));
+ }
+ masm.remainder32(rs, r, IsUnsigned(false));
+ masm.bind(&done);
+
+ maybeFreeI32(reserved);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitRemainderU32() {
+ int32_t c;
+ uint_fast8_t power;
+ if (popConstPositivePowerOfTwoI32(&c, &power, 1)) {
+ RegI32 r = popI32();
+ masm.and32(Imm32(c - 1), r);
+ pushI32(r);
+ } else {
+ bool isConst = peekConstI32(&c);
+ RegI32 r, rs, reserved;
+ pop2xI32ForMulDivI32(&r, &rs, &reserved);
+
+ if (!isConst || c == 0) {
+ checkDivideByZeroI32(rs);
+ }
+ masm.remainder32(rs, r, IsUnsigned(true));
+
+ maybeFreeI32(reserved);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+#ifndef RABALDR_INT_DIV_I64_CALLOUT
+void BaseCompiler::emitQuotientI64() {
+# ifdef JS_64BIT
+ int64_t c;
+ uint_fast8_t power;
+ if (popConstPositivePowerOfTwoI64(&c, &power, 0)) {
+ if (power != 0) {
+ RegI64 r = popI64();
+ Label positive;
+ masm.branchTest64(Assembler::NotSigned, r, r, RegI32::Invalid(),
+ &positive);
+ masm.add64(Imm64(c - 1), r);
+ masm.bind(&positive);
+
+ masm.rshift64Arithmetic(Imm32(power & 63), r);
+ pushI64(r);
+ }
+ } else {
+ bool isConst = peekConstI64(&c);
+ RegI64 r, rs, reserved;
+ pop2xI64ForDivI64(&r, &rs, &reserved);
+ quotientI64(rs, r, reserved, IsUnsigned(false), isConst, c);
+ maybeFreeI64(reserved);
+ freeI64(rs);
+ pushI64(r);
+ }
+# else
+ MOZ_CRASH("BaseCompiler platform hook: emitQuotientI64");
+# endif
+}
+
+void BaseCompiler::emitQuotientU64() {
+# ifdef JS_64BIT
+ int64_t c;
+ uint_fast8_t power;
+ if (popConstPositivePowerOfTwoI64(&c, &power, 0)) {
+ if (power != 0) {
+ RegI64 r = popI64();
+ masm.rshift64(Imm32(power & 63), r);
+ pushI64(r);
+ }
+ } else {
+ bool isConst = peekConstI64(&c);
+ RegI64 r, rs, reserved;
+ pop2xI64ForDivI64(&r, &rs, &reserved);
+ quotientI64(rs, r, reserved, IsUnsigned(true), isConst, c);
+ maybeFreeI64(reserved);
+ freeI64(rs);
+ pushI64(r);
+ }
+# else
+ MOZ_CRASH("BaseCompiler platform hook: emitQuotientU64");
+# endif
+}
+
+void BaseCompiler::emitRemainderI64() {
+# ifdef JS_64BIT
+ int64_t c;
+ uint_fast8_t power;
+ if (popConstPositivePowerOfTwoI64(&c, &power, 1)) {
+ RegI64 r = popI64();
+ RegI64 temp = needI64();
+ moveI64(r, temp);
+
+ Label positive;
+ masm.branchTest64(Assembler::NotSigned, temp, temp, RegI32::Invalid(),
+ &positive);
+ masm.add64(Imm64(c - 1), temp);
+ masm.bind(&positive);
+
+ masm.rshift64Arithmetic(Imm32(power & 63), temp);
+ masm.lshift64(Imm32(power & 63), temp);
+ masm.sub64(temp, r);
+ freeI64(temp);
+
+ pushI64(r);
+ } else {
+ bool isConst = peekConstI64(&c);
+ RegI64 r, rs, reserved;
+ pop2xI64ForDivI64(&r, &rs, &reserved);
+ remainderI64(rs, r, reserved, IsUnsigned(false), isConst, c);
+ maybeFreeI64(reserved);
+ freeI64(rs);
+ pushI64(r);
+ }
+# else
+ MOZ_CRASH("BaseCompiler platform hook: emitRemainderI64");
+# endif
+}
+
+void BaseCompiler::emitRemainderU64() {
+# ifdef JS_64BIT
+ int64_t c;
+ uint_fast8_t power;
+ if (popConstPositivePowerOfTwoI64(&c, &power, 1)) {
+ RegI64 r = popI64();
+ masm.and64(Imm64(c - 1), r);
+ pushI64(r);
+ } else {
+ bool isConst = peekConstI64(&c);
+ RegI64 r, rs, reserved;
+ pop2xI64ForDivI64(&r, &rs, &reserved);
+ remainderI64(rs, r, reserved, IsUnsigned(true), isConst, c);
+ maybeFreeI64(reserved);
+ freeI64(rs);
+ pushI64(r);
+ }
+# else
+ MOZ_CRASH("BaseCompiler platform hook: emitRemainderU64");
+# endif
+}
+#endif // RABALDR_INT_DIV_I64_CALLOUT
+
+void BaseCompiler::emitDivideF32() {
+ RegF32 r, rs;
+ pop2xF32(&r, &rs);
+ masm.divFloat32(rs, r);
+ freeF32(rs);
+ pushF32(r);
+}
+
+void BaseCompiler::emitDivideF64() {
+ RegF64 r, rs;
+ pop2xF64(&r, &rs);
+ masm.divDouble(rs, r);
+ freeF64(rs);
+ pushF64(r);
+}
+
+void BaseCompiler::emitMinF32() {
+ RegF32 r, rs;
+ pop2xF32(&r, &rs);
+ // Convert signaling NaN to quiet NaNs.
+ //
+ // TODO / OPTIMIZE (bug 1316824): Don't do this if one of the operands
+ // is known to be a constant.
+ ScratchF32 zero(*this);
+ moveImmF32(0.f, zero);
+ masm.subFloat32(zero, r);
+ masm.subFloat32(zero, rs);
+ masm.minFloat32(rs, r, HandleNaNSpecially(true));
+ freeF32(rs);
+ pushF32(r);
+}
+
+void BaseCompiler::emitMaxF32() {
+ RegF32 r, rs;
+ pop2xF32(&r, &rs);
+ // Convert signaling NaN to quiet NaNs.
+ //
+ // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32.
+ ScratchF32 zero(*this);
+ moveImmF32(0.f, zero);
+ masm.subFloat32(zero, r);
+ masm.subFloat32(zero, rs);
+ masm.maxFloat32(rs, r, HandleNaNSpecially(true));
+ freeF32(rs);
+ pushF32(r);
+}
+
+void BaseCompiler::emitMinF64() {
+ RegF64 r, rs;
+ pop2xF64(&r, &rs);
+ // Convert signaling NaN to quiet NaNs.
+ //
+ // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32.
+ ScratchF64 zero(*this);
+ moveImmF64(0, zero);
+ masm.subDouble(zero, r);
+ masm.subDouble(zero, rs);
+ masm.minDouble(rs, r, HandleNaNSpecially(true));
+ freeF64(rs);
+ pushF64(r);
+}
+
+void BaseCompiler::emitMaxF64() {
+ RegF64 r, rs;
+ pop2xF64(&r, &rs);
+ // Convert signaling NaN to quiet NaNs.
+ //
+ // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32.
+ ScratchF64 zero(*this);
+ moveImmF64(0, zero);
+ masm.subDouble(zero, r);
+ masm.subDouble(zero, rs);
+ masm.maxDouble(rs, r, HandleNaNSpecially(true));
+ freeF64(rs);
+ pushF64(r);
+}
+
+void BaseCompiler::emitCopysignF32() {
+ RegF32 r, rs;
+ pop2xF32(&r, &rs);
+ RegI32 temp0 = needI32();
+ RegI32 temp1 = needI32();
+ masm.moveFloat32ToGPR(r, temp0);
+ masm.moveFloat32ToGPR(rs, temp1);
+ masm.and32(Imm32(INT32_MAX), temp0);
+ masm.and32(Imm32(INT32_MIN), temp1);
+ masm.or32(temp1, temp0);
+ masm.moveGPRToFloat32(temp0, r);
+ freeI32(temp0);
+ freeI32(temp1);
+ freeF32(rs);
+ pushF32(r);
+}
+
+void BaseCompiler::emitCopysignF64() {
+ RegF64 r, rs;
+ pop2xF64(&r, &rs);
+ RegI64 temp0 = needI64();
+ RegI64 temp1 = needI64();
+ masm.moveDoubleToGPR64(r, temp0);
+ masm.moveDoubleToGPR64(rs, temp1);
+ masm.and64(Imm64(INT64_MAX), temp0);
+ masm.and64(Imm64(INT64_MIN), temp1);
+ masm.or64(temp1, temp0);
+ masm.moveGPR64ToDouble(temp0, r);
+ freeI64(temp0);
+ freeI64(temp1);
+ freeF64(rs);
+ pushF64(r);
+}
+
+void BaseCompiler::emitOrI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.or32(Imm32(c), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32(&r, &rs);
+ masm.or32(rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitOrI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ masm.or64(Imm64(c), r);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64(&r, &rs);
+ masm.or64(rs, r);
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitAndI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.and32(Imm32(c), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32(&r, &rs);
+ masm.and32(rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitAndI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ masm.and64(Imm64(c), r);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64(&r, &rs);
+ masm.and64(rs, r);
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitXorI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.xor32(Imm32(c), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32(&r, &rs);
+ masm.xor32(rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitXorI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ masm.xor64(Imm64(c), r);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64(&r, &rs);
+ masm.xor64(rs, r);
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitShlI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.lshift32(Imm32(c & 31), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32ForShift(&r, &rs);
+ maskShiftCount32(rs);
+ masm.lshift32(rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitShlI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ masm.lshift64(Imm32(c & 63), r);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64ForShift(&r, &rs);
+ masm.lshift64(lowPart(rs), r);
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitShrI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.rshift32Arithmetic(Imm32(c & 31), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32ForShift(&r, &rs);
+ maskShiftCount32(rs);
+ masm.rshift32Arithmetic(rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitShrI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ masm.rshift64Arithmetic(Imm32(c & 63), r);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64ForShift(&r, &rs);
+ masm.rshift64Arithmetic(lowPart(rs), r);
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitShrU32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.rshift32(Imm32(c & 31), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32ForShift(&r, &rs);
+ maskShiftCount32(rs);
+ masm.rshift32(rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitShrU64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ masm.rshift64(Imm32(c & 63), r);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64ForShift(&r, &rs);
+ masm.rshift64(lowPart(rs), r);
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitRotrI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.rotateRight(Imm32(c & 31), r, r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32ForRotate(&r, &rs);
+ masm.rotateRight(rs, r, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitRotrI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ RegI32 temp = needRotate64Temp();
+ masm.rotateRight64(Imm32(c & 63), r, r, temp);
+ maybeFreeI32(temp);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64ForRotate(&r, &rs);
+ masm.rotateRight64(lowPart(rs), r, r, maybeHighPart(rs));
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitRotlI32() {
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.rotateLeft(Imm32(c & 31), r, r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32ForRotate(&r, &rs);
+ masm.rotateLeft(rs, r, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitRotlI64() {
+ int64_t c;
+ if (popConstI64(&c)) {
+ RegI64 r = popI64();
+ RegI32 temp = needRotate64Temp();
+ masm.rotateLeft64(Imm32(c & 63), r, r, temp);
+ maybeFreeI32(temp);
+ pushI64(r);
+ } else {
+ RegI64 r, rs;
+ pop2xI64ForRotate(&r, &rs);
+ masm.rotateLeft64(lowPart(rs), r, r, maybeHighPart(rs));
+ freeI64(rs);
+ pushI64(r);
+ }
+}
+
+void BaseCompiler::emitEqzI32() {
+ if (sniffConditionalControlEqz(ValType::I32)) {
+ return;
+ }
+
+ RegI32 r = popI32();
+ masm.cmp32Set(Assembler::Equal, r, Imm32(0), r);
+ pushI32(r);
+}
+
+void BaseCompiler::emitEqzI64() {
+ if (sniffConditionalControlEqz(ValType::I64)) {
+ return;
+ }
+
+ RegI64 rs = popI64();
+ RegI32 rd = fromI64(rs);
+ eqz64(rs, rd);
+ freeI64Except(rs, rd);
+ pushI32(rd);
+}
+
+void BaseCompiler::emitClzI32() {
+ RegI32 r = popI32();
+ masm.clz32(r, r, IsKnownNotZero(false));
+ pushI32(r);
+}
+
+void BaseCompiler::emitClzI64() {
+ RegI64 r = popI64();
+ masm.clz64(r, lowPart(r));
+ maybeClearHighPart(r);
+ pushI64(r);
+}
+
+void BaseCompiler::emitCtzI32() {
+ RegI32 r = popI32();
+ masm.ctz32(r, r, IsKnownNotZero(false));
+ pushI32(r);
+}
+
+void BaseCompiler::emitCtzI64() {
+ RegI64 r = popI64();
+ masm.ctz64(r, lowPart(r));
+ maybeClearHighPart(r);
+ pushI64(r);
+}
+
+void BaseCompiler::emitPopcntI32() {
+ RegI32 r = popI32();
+ RegI32 temp = needPopcnt32Temp();
+ masm.popcnt32(r, r, temp);
+ maybeFreeI32(temp);
+ pushI32(r);
+}
+
+void BaseCompiler::emitPopcntI64() {
+ RegI64 r = popI64();
+ RegI32 temp = needPopcnt64Temp();
+ masm.popcnt64(r, r, temp);
+ maybeFreeI32(temp);
+ pushI64(r);
+}
+
+void BaseCompiler::emitAbsF32() {
+ RegF32 r = popF32();
+ masm.absFloat32(r, r);
+ pushF32(r);
+}
+
+void BaseCompiler::emitAbsF64() {
+ RegF64 r = popF64();
+ masm.absDouble(r, r);
+ pushF64(r);
+}
+
+void BaseCompiler::emitNegateF32() {
+ RegF32 r = popF32();
+ masm.negateFloat(r);
+ pushF32(r);
+}
+
+void BaseCompiler::emitNegateF64() {
+ RegF64 r = popF64();
+ masm.negateDouble(r);
+ pushF64(r);
+}
+
+void BaseCompiler::emitSqrtF32() {
+ RegF32 r = popF32();
+ masm.sqrtFloat32(r, r);
+ pushF32(r);
+}
+
+void BaseCompiler::emitSqrtF64() {
+ RegF64 r = popF64();
+ masm.sqrtDouble(r, r);
+ pushF64(r);
+}
+
+template <TruncFlags flags>
+bool BaseCompiler::emitTruncateF32ToI32() {
+ RegF32 rs = popF32();
+ RegI32 rd = needI32();
+ if (!truncateF32ToI32(rs, rd, flags)) {
+ return false;
+ }
+ freeF32(rs);
+ pushI32(rd);
+ return true;
+}
+
+template <TruncFlags flags>
+bool BaseCompiler::emitTruncateF64ToI32() {
+ RegF64 rs = popF64();
+ RegI32 rd = needI32();
+ if (!truncateF64ToI32(rs, rd, flags)) {
+ return false;
+ }
+ freeF64(rs);
+ pushI32(rd);
+ return true;
+}
+
+#ifndef RABALDR_FLOAT_TO_I64_CALLOUT
+template <TruncFlags flags>
+bool BaseCompiler::emitTruncateF32ToI64() {
+ RegF32 rs = popF32();
+ RegI64 rd = needI64();
+ RegF64 temp = needTempForFloatingToI64(flags);
+ if (!truncateF32ToI64(rs, rd, flags, temp)) {
+ return false;
+ }
+ maybeFreeF64(temp);
+ freeF32(rs);
+ pushI64(rd);
+ return true;
+}
+
+template <TruncFlags flags>
+bool BaseCompiler::emitTruncateF64ToI64() {
+ RegF64 rs = popF64();
+ RegI64 rd = needI64();
+ RegF64 temp = needTempForFloatingToI64(flags);
+ if (!truncateF64ToI64(rs, rd, flags, temp)) {
+ return false;
+ }
+ maybeFreeF64(temp);
+ freeF64(rs);
+ pushI64(rd);
+ return true;
+}
+#endif // RABALDR_FLOAT_TO_I64_CALLOUT
+
+void BaseCompiler::emitWrapI64ToI32() {
+ RegI64 rs = popI64();
+ RegI32 rd = fromI64(rs);
+ masm.move64To32(rs, rd);
+ freeI64Except(rs, rd);
+ pushI32(rd);
+}
+
+void BaseCompiler::emitExtendI32_8() {
+ RegI32 r = popI32();
+#ifdef JS_CODEGEN_X86
+ if (!ra.isSingleByteI32(r)) {
+ ScratchI8 scratch(*this);
+ moveI32(r, scratch);
+ masm.move8SignExtend(scratch, r);
+ pushI32(r);
+ return;
+ }
+#endif
+ masm.move8SignExtend(r, r);
+ pushI32(r);
+}
+
+void BaseCompiler::emitExtendI32_16() {
+ RegI32 r = popI32();
+ masm.move16SignExtend(r, r);
+ pushI32(r);
+}
+
+void BaseCompiler::emitExtendI64_8() {
+ RegI64 r;
+ popI64ForSignExtendI64(&r);
+ masm.move8To64SignExtend(lowPart(r), r);
+ pushI64(r);
+}
+
+void BaseCompiler::emitExtendI64_16() {
+ RegI64 r;
+ popI64ForSignExtendI64(&r);
+ masm.move16To64SignExtend(lowPart(r), r);
+ pushI64(r);
+}
+
+void BaseCompiler::emitExtendI64_32() {
+ RegI64 r;
+ popI64ForSignExtendI64(&r);
+ masm.move32To64SignExtend(lowPart(r), r);
+ pushI64(r);
+}
+
+void BaseCompiler::emitExtendI32ToI64() {
+ RegI64 r;
+ popI32ForSignExtendI64(&r);
+ masm.move32To64SignExtend(lowPart(r), r);
+ pushI64(r);
+}
+
+void BaseCompiler::emitExtendU32ToI64() {
+ RegI32 rs = popI32();
+ RegI64 rd = widenI32(rs);
+ masm.move32To64ZeroExtend(rs, rd);
+ pushI64(rd);
+}
+
+void BaseCompiler::emitReinterpretF32AsI32() {
+ RegF32 rs = popF32();
+ RegI32 rd = needI32();
+ masm.moveFloat32ToGPR(rs, rd);
+ freeF32(rs);
+ pushI32(rd);
+}
+
+void BaseCompiler::emitReinterpretF64AsI64() {
+ RegF64 rs = popF64();
+ RegI64 rd = needI64();
+ masm.moveDoubleToGPR64(rs, rd);
+ freeF64(rs);
+ pushI64(rd);
+}
+
+void BaseCompiler::emitConvertF64ToF32() {
+ RegF64 rs = popF64();
+ RegF32 rd = needF32();
+ masm.convertDoubleToFloat32(rs, rd);
+ freeF64(rs);
+ pushF32(rd);
+}
+
+void BaseCompiler::emitConvertI32ToF32() {
+ RegI32 rs = popI32();
+ RegF32 rd = needF32();
+ masm.convertInt32ToFloat32(rs, rd);
+ freeI32(rs);
+ pushF32(rd);
+}
+
+void BaseCompiler::emitConvertU32ToF32() {
+ RegI32 rs = popI32();
+ RegF32 rd = needF32();
+ masm.convertUInt32ToFloat32(rs, rd);
+ freeI32(rs);
+ pushF32(rd);
+}
+
+#ifndef RABALDR_I64_TO_FLOAT_CALLOUT
+void BaseCompiler::emitConvertI64ToF32() {
+ RegI64 rs = popI64();
+ RegF32 rd = needF32();
+ convertI64ToF32(rs, IsUnsigned(false), rd, RegI32());
+ freeI64(rs);
+ pushF32(rd);
+}
+
+void BaseCompiler::emitConvertU64ToF32() {
+ RegI64 rs = popI64();
+ RegF32 rd = needF32();
+ RegI32 temp = needConvertI64ToFloatTemp(ValType::F32, IsUnsigned(true));
+ convertI64ToF32(rs, IsUnsigned(true), rd, temp);
+ maybeFreeI32(temp);
+ freeI64(rs);
+ pushF32(rd);
+}
+#endif
+
+void BaseCompiler::emitConvertF32ToF64() {
+ RegF32 rs = popF32();
+ RegF64 rd = needF64();
+ masm.convertFloat32ToDouble(rs, rd);
+ freeF32(rs);
+ pushF64(rd);
+}
+
+void BaseCompiler::emitConvertI32ToF64() {
+ RegI32 rs = popI32();
+ RegF64 rd = needF64();
+ masm.convertInt32ToDouble(rs, rd);
+ freeI32(rs);
+ pushF64(rd);
+}
+
+void BaseCompiler::emitConvertU32ToF64() {
+ RegI32 rs = popI32();
+ RegF64 rd = needF64();
+ masm.convertUInt32ToDouble(rs, rd);
+ freeI32(rs);
+ pushF64(rd);
+}
+
+#ifndef RABALDR_I64_TO_FLOAT_CALLOUT
+void BaseCompiler::emitConvertI64ToF64() {
+ RegI64 rs = popI64();
+ RegF64 rd = needF64();
+ convertI64ToF64(rs, IsUnsigned(false), rd, RegI32());
+ freeI64(rs);
+ pushF64(rd);
+}
+
+void BaseCompiler::emitConvertU64ToF64() {
+ RegI64 rs = popI64();
+ RegF64 rd = needF64();
+ RegI32 temp = needConvertI64ToFloatTemp(ValType::F64, IsUnsigned(true));
+ convertI64ToF64(rs, IsUnsigned(true), rd, temp);
+ maybeFreeI32(temp);
+ freeI64(rs);
+ pushF64(rd);
+}
+#endif // RABALDR_I64_TO_FLOAT_CALLOUT
+
+void BaseCompiler::emitReinterpretI32AsF32() {
+ RegI32 rs = popI32();
+ RegF32 rd = needF32();
+ masm.moveGPRToFloat32(rs, rd);
+ freeI32(rs);
+ pushF32(rd);
+}
+
+void BaseCompiler::emitReinterpretI64AsF64() {
+ RegI64 rs = popI64();
+ RegF64 rd = needF64();
+ masm.moveGPR64ToDouble(rs, rd);
+ freeI64(rs);
+ pushF64(rd);
+}
+
+template <typename Cond>
+bool BaseCompiler::sniffConditionalControlCmp(Cond compareOp,
+ ValType operandType) {
+ MOZ_ASSERT(latentOp_ == LatentOp::None,
+ "Latent comparison state not properly reset");
+
+#ifdef JS_CODEGEN_X86
+ // On x86, latent i64 binary comparisons use too many registers: the
+ // reserved join register and the lhs and rhs operands require six, but we
+ // only have five.
+ if (operandType == ValType::I64) {
+ return false;
+ }
+#endif
+
+ // No optimization for pointer compares yet.
+ if (operandType.isReference()) {
+ return false;
+ }
+
+ OpBytes op;
+ iter_.peekOp(&op);
+ switch (op.b0) {
+ case uint16_t(Op::BrIf):
+ case uint16_t(Op::If):
+ case uint16_t(Op::SelectNumeric):
+ case uint16_t(Op::SelectTyped):
+ setLatentCompare(compareOp, operandType);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool BaseCompiler::sniffConditionalControlEqz(ValType operandType) {
+ MOZ_ASSERT(latentOp_ == LatentOp::None,
+ "Latent comparison state not properly reset");
+
+ OpBytes op;
+ iter_.peekOp(&op);
+ switch (op.b0) {
+ case uint16_t(Op::BrIf):
+ case uint16_t(Op::SelectNumeric):
+ case uint16_t(Op::SelectTyped):
+ case uint16_t(Op::If):
+ setLatentEqz(operandType);
+ return true;
+ default:
+ return false;
+ }
+}
+
+void BaseCompiler::emitBranchSetup(BranchState* b) {
+ // Avoid allocating operands to latentOp_ to result registers.
+ if (b->hasBlockResults()) {
+ needResultRegisters(b->resultType);
+ }
+
+ // Set up fields so that emitBranchPerform() need not switch on latentOp_.
+ switch (latentOp_) {
+ case LatentOp::None: {
+ latentIntCmp_ = Assembler::NotEqual;
+ latentType_ = ValType::I32;
+ b->i32.lhs = popI32();
+ b->i32.rhsImm = true;
+ b->i32.imm = 0;
+ break;
+ }
+ case LatentOp::Compare: {
+ switch (latentType_.kind()) {
+ case ValType::I32: {
+ if (popConstI32(&b->i32.imm)) {
+ b->i32.lhs = popI32();
+ b->i32.rhsImm = true;
+ } else {
+ pop2xI32(&b->i32.lhs, &b->i32.rhs);
+ b->i32.rhsImm = false;
+ }
+ break;
+ }
+ case ValType::I64: {
+ pop2xI64(&b->i64.lhs, &b->i64.rhs);
+ b->i64.rhsImm = false;
+ break;
+ }
+ case ValType::F32: {
+ pop2xF32(&b->f32.lhs, &b->f32.rhs);
+ break;
+ }
+ case ValType::F64: {
+ pop2xF64(&b->f64.lhs, &b->f64.rhs);
+ break;
+ }
+ default: {
+ MOZ_CRASH("Unexpected type for LatentOp::Compare");
+ }
+ }
+ break;
+ }
+ case LatentOp::Eqz: {
+ switch (latentType_.kind()) {
+ case ValType::I32: {
+ latentIntCmp_ = Assembler::Equal;
+ b->i32.lhs = popI32();
+ b->i32.rhsImm = true;
+ b->i32.imm = 0;
+ break;
+ }
+ case ValType::I64: {
+ latentIntCmp_ = Assembler::Equal;
+ b->i64.lhs = popI64();
+ b->i64.rhsImm = true;
+ b->i64.imm = 0;
+ break;
+ }
+ default: {
+ MOZ_CRASH("Unexpected type for LatentOp::Eqz");
+ }
+ }
+ break;
+ }
+ }
+
+ if (b->hasBlockResults()) {
+ freeResultRegisters(b->resultType);
+ }
+}
+
+bool BaseCompiler::emitBranchPerform(BranchState* b) {
+ switch (latentType_.kind()) {
+ case ValType::I32: {
+ if (b->i32.rhsImm) {
+ if (!jumpConditionalWithResults(b, latentIntCmp_, b->i32.lhs,
+ Imm32(b->i32.imm))) {
+ return false;
+ }
+ } else {
+ if (!jumpConditionalWithResults(b, latentIntCmp_, b->i32.lhs,
+ b->i32.rhs)) {
+ return false;
+ }
+ freeI32(b->i32.rhs);
+ }
+ freeI32(b->i32.lhs);
+ break;
+ }
+ case ValType::I64: {
+ if (b->i64.rhsImm) {
+ if (!jumpConditionalWithResults(b, latentIntCmp_, b->i64.lhs,
+ Imm64(b->i64.imm))) {
+ return false;
+ }
+ } else {
+ if (!jumpConditionalWithResults(b, latentIntCmp_, b->i64.lhs,
+ b->i64.rhs)) {
+ return false;
+ }
+ freeI64(b->i64.rhs);
+ }
+ freeI64(b->i64.lhs);
+ break;
+ }
+ case ValType::F32: {
+ if (!jumpConditionalWithResults(b, latentDoubleCmp_, b->f32.lhs,
+ b->f32.rhs)) {
+ return false;
+ }
+ freeF32(b->f32.lhs);
+ freeF32(b->f32.rhs);
+ break;
+ }
+ case ValType::F64: {
+ if (!jumpConditionalWithResults(b, latentDoubleCmp_, b->f64.lhs,
+ b->f64.rhs)) {
+ return false;
+ }
+ freeF64(b->f64.lhs);
+ freeF64(b->f64.rhs);
+ break;
+ }
+ default: {
+ MOZ_CRASH("Unexpected type for LatentOp::Compare");
+ }
+ }
+ resetLatentOp();
+ return true;
+}
+
+// For blocks and loops and ifs:
+//
+// - Sync the value stack before going into the block in order to simplify exit
+// from the block: all exits from the block can assume that there are no
+// live registers except the one carrying the exit value.
+// - The block can accumulate a number of dead values on the stacks, so when
+// branching out of the block or falling out at the end be sure to
+// pop the appropriate stacks back to where they were on entry, while
+// preserving the exit value.
+// - A continue branch in a loop is much like an exit branch, but the branch
+// value must not be preserved.
+// - The exit value is always in a designated join register (type dependent).
+
+bool BaseCompiler::emitBlock() {
+ ResultType params;
+ if (!iter_.readBlock(&params)) {
+ return false;
+ }
+
+ if (!deadCode_) {
+ sync(); // Simplifies branching out from block
+ }
+
+ initControl(controlItem(), params);
+
+ return true;
+}
+
+bool BaseCompiler::endBlock(ResultType type) {
+ Control& block = controlItem();
+
+ if (deadCode_) {
+ // Block does not fall through; reset stack.
+ fr.resetStackHeight(block.stackHeight, type);
+ popValueStackTo(block.stackSize);
+ } else {
+ // If the block label is used, we have a control join, so we need to shuffle
+ // fallthrough values into place. Otherwise if it's not a control join, we
+ // can leave the value stack alone.
+ MOZ_ASSERT(stk_.length() == block.stackSize + type.length());
+ if (block.label.used()) {
+ popBlockResults(type, block.stackHeight, ContinuationKind::Fallthrough);
+ }
+ block.bceSafeOnExit &= bceSafe_;
+ }
+
+ // Bind after cleanup: branches out will have popped the stack.
+ if (block.label.used()) {
+ masm.bind(&block.label);
+ if (deadCode_) {
+ captureResultRegisters(type);
+ deadCode_ = false;
+ }
+ if (!pushBlockResults(type)) {
+ return false;
+ }
+ }
+
+ bceSafe_ = block.bceSafeOnExit;
+
+ return true;
+}
+
+bool BaseCompiler::emitLoop() {
+ ResultType params;
+ if (!iter_.readLoop(&params)) {
+ return false;
+ }
+
+ if (!deadCode_) {
+ sync(); // Simplifies branching out from block
+ }
+
+ initControl(controlItem(), params);
+ bceSafe_ = 0;
+
+ if (!deadCode_) {
+ // Loop entry is a control join, so shuffle the entry parameters into the
+ // well-known locations.
+ if (!topBlockParams(params)) {
+ return false;
+ }
+ masm.nopAlign(CodeAlignment);
+ masm.bind(&controlItem(0).label);
+ // The interrupt check barfs if there are live registers.
+ sync();
+ if (!addInterruptCheck()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// The bodies of the "then" and "else" arms can be arbitrary sequences
+// of expressions, they push control and increment the nesting and can
+// even be targeted by jumps. A branch to the "if" block branches to
+// the exit of the if, ie, it's like "break". Consider:
+//
+// (func (result i32)
+// (if (i32.const 1)
+// (begin (br 1) (unreachable))
+// (begin (unreachable)))
+// (i32.const 1))
+//
+// The branch causes neither of the unreachable expressions to be
+// evaluated.
+
+bool BaseCompiler::emitIf() {
+ ResultType params;
+ Nothing unused_cond;
+ if (!iter_.readIf(&params, &unused_cond)) {
+ return false;
+ }
+
+ BranchState b(&controlItem().otherLabel, InvertBranch(true));
+ if (!deadCode_) {
+ needResultRegisters(params);
+ emitBranchSetup(&b);
+ freeResultRegisters(params);
+ sync();
+ } else {
+ resetLatentOp();
+ }
+
+ initControl(controlItem(), params);
+
+ if (!deadCode_) {
+ // Because params can flow immediately to results in the case of an empty
+ // "then" or "else" block, and the result of an if/then is a join in
+ // general, we shuffle params eagerly to the result allocations.
+ if (!topBlockParams(params)) {
+ return false;
+ }
+ if (!emitBranchPerform(&b)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool BaseCompiler::endIfThen(ResultType type) {
+ Control& ifThen = controlItem();
+
+ // The parameters to the "if" logically flow to both the "then" and "else"
+ // blocks, but the "else" block is empty. Since we know that the "if"
+ // type-checks, that means that the "else" parameters are the "else" results,
+ // and that the "if"'s result type is the same as its parameter type.
+
+ if (deadCode_) {
+ // "then" arm does not fall through; reset stack.
+ fr.resetStackHeight(ifThen.stackHeight, type);
+ popValueStackTo(ifThen.stackSize);
+ if (!ifThen.deadOnArrival) {
+ captureResultRegisters(type);
+ }
+ } else {
+ MOZ_ASSERT(stk_.length() == ifThen.stackSize + type.length());
+ // Assume we have a control join, so place results in block result
+ // allocations.
+ popBlockResults(type, ifThen.stackHeight, ContinuationKind::Fallthrough);
+ MOZ_ASSERT(!ifThen.deadOnArrival);
+ }
+
+ if (ifThen.otherLabel.used()) {
+ masm.bind(&ifThen.otherLabel);
+ }
+
+ if (ifThen.label.used()) {
+ masm.bind(&ifThen.label);
+ }
+
+ if (!deadCode_) {
+ ifThen.bceSafeOnExit &= bceSafe_;
+ }
+
+ deadCode_ = ifThen.deadOnArrival;
+ if (!deadCode_) {
+ if (!pushBlockResults(type)) {
+ return false;
+ }
+ }
+
+ bceSafe_ = ifThen.bceSafeOnExit & ifThen.bceSafeOnEntry;
+
+ return true;
+}
+
+bool BaseCompiler::emitElse() {
+ ResultType params, results;
+ NothingVector unused_thenValues;
+
+ if (!iter_.readElse(&params, &results, &unused_thenValues)) {
+ return false;
+ }
+
+ Control& ifThenElse = controlItem(0);
+
+ // See comment in endIfThenElse, below.
+
+ // Exit the "then" branch.
+
+ ifThenElse.deadThenBranch = deadCode_;
+
+ if (deadCode_) {
+ fr.resetStackHeight(ifThenElse.stackHeight, results);
+ popValueStackTo(ifThenElse.stackSize);
+ } else {
+ MOZ_ASSERT(stk_.length() == ifThenElse.stackSize + results.length());
+ popBlockResults(results, ifThenElse.stackHeight, ContinuationKind::Jump);
+ freeResultRegisters(results);
+ MOZ_ASSERT(!ifThenElse.deadOnArrival);
+ }
+
+ if (!deadCode_) {
+ masm.jump(&ifThenElse.label);
+ }
+
+ if (ifThenElse.otherLabel.used()) {
+ masm.bind(&ifThenElse.otherLabel);
+ }
+
+ // Reset to the "else" branch.
+
+ if (!deadCode_) {
+ ifThenElse.bceSafeOnExit &= bceSafe_;
+ }
+
+ deadCode_ = ifThenElse.deadOnArrival;
+ bceSafe_ = ifThenElse.bceSafeOnEntry;
+
+ fr.resetStackHeight(ifThenElse.stackHeight, params);
+
+ if (!deadCode_) {
+ captureResultRegisters(params);
+ if (!pushBlockResults(params)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool BaseCompiler::endIfThenElse(ResultType type) {
+ Control& ifThenElse = controlItem();
+
+ // The expression type is not a reliable guide to what we'll find
+ // on the stack, we could have (if E (i32.const 1) (unreachable))
+ // in which case the "else" arm is AnyType but the type of the
+ // full expression is I32. So restore whatever's there, not what
+ // we want to find there. The "then" arm has the same constraint.
+
+ if (deadCode_) {
+ // "then" arm does not fall through; reset stack.
+ fr.resetStackHeight(ifThenElse.stackHeight, type);
+ popValueStackTo(ifThenElse.stackSize);
+ } else {
+ MOZ_ASSERT(stk_.length() == ifThenElse.stackSize + type.length());
+ // Assume we have a control join, so place results in block result
+ // allocations.
+ popBlockResults(type, ifThenElse.stackHeight,
+ ContinuationKind::Fallthrough);
+ ifThenElse.bceSafeOnExit &= bceSafe_;
+ MOZ_ASSERT(!ifThenElse.deadOnArrival);
+ }
+
+ if (ifThenElse.label.used()) {
+ masm.bind(&ifThenElse.label);
+ }
+
+ bool joinLive =
+ !ifThenElse.deadOnArrival &&
+ (!ifThenElse.deadThenBranch || !deadCode_ || ifThenElse.label.bound());
+
+ if (joinLive) {
+ // No values were provided by the "then" path, but capture the values
+ // provided by the "else" path.
+ if (deadCode_) {
+ captureResultRegisters(type);
+ }
+ deadCode_ = false;
+ }
+
+ bceSafe_ = ifThenElse.bceSafeOnExit;
+
+ if (!deadCode_) {
+ if (!pushBlockResults(type)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool BaseCompiler::emitEnd() {
+ LabelKind kind;
+ ResultType type;
+ NothingVector unused_values;
+ if (!iter_.readEnd(&kind, &type, &unused_values, &unused_values)) {
+ return false;
+ }
+
+ switch (kind) {
+ case LabelKind::Body:
+ if (!endBlock(type)) {
+ return false;
+ }
+ doReturn(ContinuationKind::Fallthrough);
+ iter_.popEnd();
+ MOZ_ASSERT(iter_.controlStackEmpty());
+ return iter_.readFunctionEnd(iter_.end());
+ case LabelKind::Block:
+ if (!endBlock(type)) {
+ return false;
+ }
+ break;
+ case LabelKind::Loop:
+ // The end of a loop isn't a branch target, so we can just leave its
+ // results on the expression stack to be consumed by the outer block.
+ break;
+ case LabelKind::Then:
+ if (!endIfThen(type)) {
+ return false;
+ }
+ break;
+ case LabelKind::Else:
+ if (!endIfThenElse(type)) {
+ return false;
+ }
+ break;
+#ifdef ENABLE_WASM_EXCEPTIONS
+ case LabelKind::Try:
+ MOZ_CRASH("NYI");
+ break;
+ case LabelKind::Catch:
+ MOZ_CRASH("NYI");
+ break;
+#endif
+ }
+
+ iter_.popEnd();
+
+ return true;
+}
+
+bool BaseCompiler::emitBr() {
+ uint32_t relativeDepth;
+ ResultType type;
+ NothingVector unused_values;
+ if (!iter_.readBr(&relativeDepth, &type, &unused_values)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ Control& target = controlItem(relativeDepth);
+ target.bceSafeOnExit &= bceSafe_;
+
+ // Save any values in the designated join registers, as if the target block
+ // returned normally.
+
+ popBlockResults(type, target.stackHeight, ContinuationKind::Jump);
+ masm.jump(&target.label);
+
+ // The registers holding the join values are free for the remainder of this
+ // block.
+
+ freeResultRegisters(type);
+
+ deadCode_ = true;
+
+ return true;
+}
+
+bool BaseCompiler::emitBrIf() {
+ uint32_t relativeDepth;
+ ResultType type;
+ NothingVector unused_values;
+ Nothing unused_condition;
+ if (!iter_.readBrIf(&relativeDepth, &type, &unused_values,
+ &unused_condition)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ resetLatentOp();
+ return true;
+ }
+
+ Control& target = controlItem(relativeDepth);
+ target.bceSafeOnExit &= bceSafe_;
+
+ BranchState b(&target.label, target.stackHeight, InvertBranch(false), type);
+ emitBranchSetup(&b);
+ return emitBranchPerform(&b);
+}
+
+#ifdef ENABLE_WASM_FUNCTION_REFERENCES
+bool BaseCompiler::emitBrOnNull() {
+ MOZ_ASSERT(!hasLatentOp());
+
+ uint32_t relativeDepth;
+ ResultType type;
+ NothingVector unused_values;
+ Nothing unused_condition;
+ if (!iter_.readBrOnNull(&relativeDepth, &type, &unused_values,
+ &unused_condition)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ Control& target = controlItem(relativeDepth);
+ target.bceSafeOnExit &= bceSafe_;
+
+ BranchState b(&target.label, target.stackHeight, InvertBranch(false), type);
+ if (b.hasBlockResults()) {
+ needResultRegisters(b.resultType);
+ }
+ RegPtr rp = popRef();
+ if (b.hasBlockResults()) {
+ freeResultRegisters(b.resultType);
+ }
+ if (!jumpConditionalWithResults(&b, Assembler::Equal, rp,
+ ImmWord(NULLREF_VALUE))) {
+ return false;
+ }
+ pushRef(rp);
+
+ return true;
+}
+#endif
+
+bool BaseCompiler::emitBrTable() {
+ Uint32Vector depths;
+ uint32_t defaultDepth;
+ ResultType branchParams;
+ NothingVector unused_values;
+ Nothing unused_index;
+ // N.B., `branchParams' gets set to the type of the default branch target. In
+ // the presence of subtyping, it could be that the different branch targets
+ // have different types. Here we rely on the assumption that the value
+ // representations (e.g. Stk value types) of all branch target types are the
+ // same, in the baseline compiler. Notably, this means that all Ref types
+ // should be represented the same.
+ if (!iter_.readBrTable(&depths, &defaultDepth, &branchParams, &unused_values,
+ &unused_index)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ // Don't use param registers for rc
+ needIntegerResultRegisters(branchParams);
+
+ // Table switch value always on top.
+ RegI32 rc = popI32();
+
+ freeIntegerResultRegisters(branchParams);
+
+ StackHeight resultsBase(0);
+ if (!topBranchParams(branchParams, &resultsBase)) {
+ return false;
+ }
+
+ Label dispatchCode;
+ masm.branch32(Assembler::Below, rc, Imm32(depths.length()), &dispatchCode);
+
+ // This is the out-of-range stub. rc is dead here but we don't need it.
+
+ shuffleStackResultsBeforeBranch(
+ resultsBase, controlItem(defaultDepth).stackHeight, branchParams);
+ controlItem(defaultDepth).bceSafeOnExit &= bceSafe_;
+ masm.jump(&controlItem(defaultDepth).label);
+
+ // Emit stubs. rc is dead in all of these but we don't need it.
+ //
+ // The labels in the vector are in the TempAllocator and will
+ // be freed by and by.
+ //
+ // TODO / OPTIMIZE (Bug 1316804): Branch directly to the case code if we
+ // can, don't emit an intermediate stub.
+
+ LabelVector stubs;
+ if (!stubs.reserve(depths.length())) {
+ return false;
+ }
+
+ for (uint32_t depth : depths) {
+ stubs.infallibleEmplaceBack(NonAssertingLabel());
+ masm.bind(&stubs.back());
+ shuffleStackResultsBeforeBranch(resultsBase, controlItem(depth).stackHeight,
+ branchParams);
+ controlItem(depth).bceSafeOnExit &= bceSafe_;
+ masm.jump(&controlItem(depth).label);
+ }
+
+ // Emit table.
+
+ Label theTable;
+ jumpTable(stubs, &theTable);
+
+ // Emit indirect jump. rc is live here.
+
+ tableSwitch(&theTable, rc, &dispatchCode);
+
+ deadCode_ = true;
+
+ // Clean up.
+
+ freeI32(rc);
+ popValueStackBy(branchParams.length());
+
+ return true;
+}
+
+#ifdef ENABLE_WASM_EXCEPTIONS
+bool BaseCompiler::emitTry() {
+ ResultType params;
+ if (!iter_.readTry(&params)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MOZ_CRASH("NYI");
+}
+
+bool BaseCompiler::emitCatch() {
+ LabelKind kind;
+ uint32_t eventIndex;
+ ResultType paramType, resultType;
+ NothingVector unused_tryValues;
+
+ if (!iter_.readCatch(&kind, &eventIndex, &paramType, &resultType,
+ &unused_tryValues)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MOZ_CRASH("NYI");
+}
+
+bool BaseCompiler::emitThrow() {
+ uint32_t exnIndex;
+ NothingVector unused_argValues;
+
+ if (!iter_.readThrow(&exnIndex, &unused_argValues)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MOZ_CRASH("NYI");
+}
+#endif
+
+bool BaseCompiler::emitDrop() {
+ if (!iter_.readDrop()) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ dropValue();
+ return true;
+}
+
+void BaseCompiler::doReturn(ContinuationKind kind) {
+ if (deadCode_) {
+ return;
+ }
+
+ StackHeight height = controlOutermost().stackHeight;
+ ResultType type = ResultType::Vector(funcType().results());
+ popBlockResults(type, height, kind);
+ masm.jump(&returnLabel_);
+ freeResultRegisters(type);
+}
+
+bool BaseCompiler::emitReturn() {
+ NothingVector unused_values;
+ if (!iter_.readReturn(&unused_values)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ doReturn(ContinuationKind::Jump);
+ deadCode_ = true;
+
+ return true;
+}
+
+bool BaseCompiler::emitCallArgs(const ValTypeVector& argTypes,
+ const StackResultsLoc& results,
+ FunctionCall* baselineCall,
+ CalleeOnStack calleeOnStack) {
+ MOZ_ASSERT(!deadCode_);
+
+ ArgTypeVector args(argTypes, results.stackResults());
+ uint32_t naturalArgCount = argTypes.length();
+ uint32_t abiArgCount = args.lengthWithStackResults();
+ startCallArgs(StackArgAreaSizeUnaligned(args), baselineCall);
+
+ // Args are deeper on the stack than the stack result area, if any.
+ size_t argsDepth = results.count();
+ // They're deeper than the callee too, for callIndirect.
+ if (calleeOnStack == CalleeOnStack::True) {
+ argsDepth++;
+ }
+
+ for (size_t i = 0; i < abiArgCount; ++i) {
+ if (args.isNaturalArg(i)) {
+ size_t naturalIndex = args.naturalIndex(i);
+ size_t stackIndex = naturalArgCount - 1 - naturalIndex + argsDepth;
+ passArg(argTypes[naturalIndex], peek(stackIndex), baselineCall);
+ } else {
+ // The synthetic stack result area pointer.
+ ABIArg argLoc = baselineCall->abi.next(MIRType::Pointer);
+ if (argLoc.kind() == ABIArg::Stack) {
+ ScratchPtr scratch(*this);
+ fr.computeOutgoingStackResultAreaPtr(results, scratch);
+ masm.storePtr(scratch, Address(masm.getStackPointer(),
+ argLoc.offsetFromArgBase()));
+ } else {
+ fr.computeOutgoingStackResultAreaPtr(results, RegPtr(argLoc.gpr()));
+ }
+ }
+ }
+
+ fr.loadTlsPtr(WasmTlsReg);
+ return true;
+}
+
+void BaseCompiler::pushReturnValueOfCall(const FunctionCall& call,
+ MIRType type) {
+ switch (type) {
+ case MIRType::Int32: {
+ RegI32 rv = captureReturnedI32();
+ pushI32(rv);
+ break;
+ }
+ case MIRType::Int64: {
+ RegI64 rv = captureReturnedI64();
+ pushI64(rv);
+ break;
+ }
+ case MIRType::Float32: {
+ RegF32 rv = captureReturnedF32(call);
+ pushF32(rv);
+ break;
+ }
+ case MIRType::Double: {
+ RegF64 rv = captureReturnedF64(call);
+ pushF64(rv);
+ break;
+ }
+#ifdef ENABLE_WASM_SIMD
+ case MIRType::Simd128: {
+ RegV128 rv = captureReturnedV128(call);
+ pushV128(rv);
+ break;
+ }
+#endif
+ case MIRType::RefOrNull: {
+ RegPtr rv = captureReturnedRef();
+ pushRef(rv);
+ break;
+ }
+ default:
+ // In particular, passing |type| as MIRType::Void or MIRType::Pointer to
+ // this function is an error.
+ MOZ_CRASH("Function return type");
+ }
+}
+
+bool BaseCompiler::pushStackResultsForCall(const ResultType& type, RegPtr temp,
+ StackResultsLoc* loc) {
+ if (!ABIResultIter::HasStackResults(type)) {
+ return true;
+ }
+
+ // This method is the only one in the class that can increase stk_.length() by
+ // an unbounded amount, so it's the only one that requires an allocation.
+ // (The general case is handled in emitBody.)
+ if (!stk_.reserve(stk_.length() + type.length())) {
+ return false;
+ }
+
+ // Measure stack results.
+ ABIResultIter i(type);
+ size_t count = 0;
+ for (; !i.done(); i.next()) {
+ if (i.cur().onStack()) {
+ count++;
+ }
+ }
+ uint32_t bytes = i.stackBytesConsumedSoFar();
+
+ // Reserve space for the stack results.
+ StackHeight resultsBase = fr.stackHeight();
+ uint32_t height = fr.prepareStackResultArea(resultsBase, bytes);
+
+ // Push Stk values onto the value stack, and zero out Ref values.
+ for (i.switchToPrev(); !i.done(); i.prev()) {
+ const ABIResult& result = i.cur();
+ if (result.onStack()) {
+ Stk v = captureStackResult(result, resultsBase, bytes);
+ push(v);
+ if (v.kind() == Stk::MemRef) {
+ stackMapGenerator_.memRefsOnStk++;
+ fr.storeImmediatePtrToStack(intptr_t(0), v.offs(), temp);
+ }
+ }
+ }
+
+ *loc = StackResultsLoc(bytes, count, height);
+
+ return true;
+}
+
+// After a call, some results may be written to the stack result locations that
+// are pushed on the machine stack after any stack args. If there are stack
+// args and stack results, these results need to be shuffled down, as the args
+// are "consumed" by the call.
+void BaseCompiler::popStackResultsAfterCall(const StackResultsLoc& results,
+ uint32_t stackArgBytes) {
+ if (results.bytes() != 0) {
+ popValueStackBy(results.count());
+ if (stackArgBytes != 0) {
+ uint32_t srcHeight = results.height();
+ MOZ_ASSERT(srcHeight >= stackArgBytes + results.bytes());
+ uint32_t destHeight = srcHeight - stackArgBytes;
+
+ fr.shuffleStackResultsTowardFP(srcHeight, destHeight, results.bytes(),
+ ABINonArgReturnVolatileReg);
+ }
+ }
+}
+
+// For now, always sync() at the beginning of the call to easily save live
+// values.
+//
+// TODO / OPTIMIZE (Bug 1316806): We may be able to avoid a full sync(), since
+// all we want is to save live registers that won't be saved by the callee or
+// that we need for outgoing args - we don't need to sync the locals. We can
+// just push the necessary registers, it'll be like a lightweight sync.
+//
+// Even some of the pushing may be unnecessary if the registers will be consumed
+// by the call, because then what we want is parallel assignment to the argument
+// registers or onto the stack for outgoing arguments. A sync() is just
+// simpler.
+
+bool BaseCompiler::emitCall() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ uint32_t funcIndex;
+ NothingVector args_;
+ if (!iter_.readCall(&funcIndex, &args_)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ sync();
+
+ const FuncType& funcType = *moduleEnv_.funcs[funcIndex].type;
+ bool import = moduleEnv_.funcIsImport(funcIndex);
+
+ uint32_t numArgs = funcType.args().length();
+ size_t stackArgBytes = stackConsumed(numArgs);
+
+ ResultType resultType(ResultType::Vector(funcType.results()));
+ StackResultsLoc results;
+ if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) {
+ return false;
+ }
+
+ FunctionCall baselineCall(lineOrBytecode);
+ beginCall(baselineCall, UseABI::Wasm,
+ import ? InterModule::True : InterModule::False);
+
+ if (!emitCallArgs(funcType.args(), results, &baselineCall,
+ CalleeOnStack::False)) {
+ return false;
+ }
+
+ CodeOffset raOffset;
+ if (import) {
+ raOffset = callImport(moduleEnv_.funcImportGlobalDataOffsets[funcIndex],
+ baselineCall);
+ } else {
+ raOffset = callDefinition(funcIndex, baselineCall);
+ }
+
+ if (!createStackMap("emitCall", raOffset)) {
+ return false;
+ }
+
+ popStackResultsAfterCall(results, stackArgBytes);
+
+ endCall(baselineCall, stackArgBytes);
+
+ popValueStackBy(numArgs);
+
+ captureCallResultRegisters(resultType);
+ return pushCallResults(baselineCall, resultType, results);
+}
+
+bool BaseCompiler::emitCallIndirect() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ uint32_t funcTypeIndex;
+ uint32_t tableIndex;
+ Nothing callee_;
+ NothingVector args_;
+ if (!iter_.readCallIndirect(&funcTypeIndex, &tableIndex, &callee_, &args_)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ sync();
+
+ const FuncType& funcType = moduleEnv_.types[funcTypeIndex].funcType();
+
+ // Stack: ... arg1 .. argn callee
+
+ uint32_t numArgs = funcType.args().length() + 1;
+ size_t stackArgBytes = stackConsumed(numArgs);
+
+ ResultType resultType(ResultType::Vector(funcType.results()));
+ StackResultsLoc results;
+ if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) {
+ return false;
+ }
+
+ FunctionCall baselineCall(lineOrBytecode);
+ beginCall(baselineCall, UseABI::Wasm, InterModule::True);
+
+ if (!emitCallArgs(funcType.args(), results, &baselineCall,
+ CalleeOnStack::True)) {
+ return false;
+ }
+
+ const Stk& callee = peek(results.count());
+ CodeOffset raOffset =
+ callIndirect(funcTypeIndex, tableIndex, callee, baselineCall);
+ if (!createStackMap("emitCallIndirect", raOffset)) {
+ return false;
+ }
+
+ popStackResultsAfterCall(results, stackArgBytes);
+
+ endCall(baselineCall, stackArgBytes);
+
+ popValueStackBy(numArgs);
+
+ captureCallResultRegisters(resultType);
+ return pushCallResults(baselineCall, resultType, results);
+}
+
+void BaseCompiler::emitRound(RoundingMode roundingMode, ValType operandType) {
+ if (operandType == ValType::F32) {
+ RegF32 f0 = popF32();
+ roundF32(roundingMode, f0);
+ pushF32(f0);
+ } else if (operandType == ValType::F64) {
+ RegF64 f0 = popF64();
+ roundF64(roundingMode, f0);
+ pushF64(f0);
+ } else {
+ MOZ_CRASH("unexpected type");
+ }
+}
+
+bool BaseCompiler::emitUnaryMathBuiltinCall(SymbolicAddress callee,
+ ValType operandType) {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ Nothing operand_;
+ if (!iter_.readUnary(operandType, &operand_)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ RoundingMode roundingMode;
+ if (IsRoundingFunction(callee, &roundingMode) &&
+ supportsRoundInstruction(roundingMode)) {
+ emitRound(roundingMode, operandType);
+ return true;
+ }
+
+ sync();
+
+ ValTypeVector& signature = operandType == ValType::F32 ? SigF_ : SigD_;
+ ValType retType = operandType;
+ uint32_t numArgs = signature.length();
+ size_t stackSpace = stackConsumed(numArgs);
+ StackResultsLoc noStackResults;
+
+ FunctionCall baselineCall(lineOrBytecode);
+ beginCall(baselineCall, UseABI::Builtin, InterModule::False);
+
+ if (!emitCallArgs(signature, noStackResults, &baselineCall,
+ CalleeOnStack::False)) {
+ return false;
+ }
+
+ CodeOffset raOffset = builtinCall(callee, baselineCall);
+ if (!createStackMap("emitUnaryMathBuiltin[..]", raOffset)) {
+ return false;
+ }
+
+ endCall(baselineCall, stackSpace);
+
+ popValueStackBy(numArgs);
+
+ pushReturnValueOfCall(baselineCall, ToMIRType(retType));
+
+ return true;
+}
+
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+bool BaseCompiler::emitDivOrModI64BuiltinCall(SymbolicAddress callee,
+ ValType operandType) {
+ MOZ_ASSERT(operandType == ValType::I64);
+ MOZ_ASSERT(!deadCode_);
+
+ sync();
+
+ needI64(specific_.abiReturnRegI64);
+
+ RegI64 rhs = popI64();
+ RegI64 srcDest = popI64ToSpecific(specific_.abiReturnRegI64);
+
+ Label done;
+
+ checkDivideByZeroI64(rhs);
+
+ if (callee == SymbolicAddress::DivI64) {
+ checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false));
+ } else if (callee == SymbolicAddress::ModI64) {
+ checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true));
+ }
+
+ masm.setupWasmABICall();
+ masm.passABIArg(srcDest.high);
+ masm.passABIArg(srcDest.low);
+ masm.passABIArg(rhs.high);
+ masm.passABIArg(rhs.low);
+ CodeOffset raOffset = masm.callWithABI(bytecodeOffset(), callee,
+ mozilla::Some(fr.getTlsPtrOffset()));
+ if (!createStackMap("emitDivOrModI64Bui[..]", raOffset)) {
+ return false;
+ }
+
+ masm.bind(&done);
+
+ freeI64(rhs);
+ pushI64(srcDest);
+ return true;
+}
+#endif // RABALDR_INT_DIV_I64_CALLOUT
+
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+bool BaseCompiler::emitConvertInt64ToFloatingCallout(SymbolicAddress callee,
+ ValType operandType,
+ ValType resultType) {
+ sync();
+
+ RegI64 input = popI64();
+
+ FunctionCall call(0);
+
+ masm.setupWasmABICall();
+# ifdef JS_PUNBOX64
+ MOZ_CRASH("BaseCompiler platform hook: emitConvertInt64ToFloatingCallout");
+# else
+ masm.passABIArg(input.high);
+ masm.passABIArg(input.low);
+# endif
+ CodeOffset raOffset = masm.callWithABI(
+ bytecodeOffset(), callee, mozilla::Some(fr.getTlsPtrOffset()),
+ resultType == ValType::F32 ? MoveOp::FLOAT32 : MoveOp::DOUBLE);
+ if (!createStackMap("emitConvertInt64To[..]", raOffset)) {
+ return false;
+ }
+
+ freeI64(input);
+
+ if (resultType == ValType::F32) {
+ pushF32(captureReturnedF32(call));
+ } else {
+ pushF64(captureReturnedF64(call));
+ }
+
+ return true;
+}
+#endif // RABALDR_I64_TO_FLOAT_CALLOUT
+
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+// `Callee` always takes a double, so a float32 input must be converted.
+bool BaseCompiler::emitConvertFloatingToInt64Callout(SymbolicAddress callee,
+ ValType operandType,
+ ValType resultType) {
+ RegF64 doubleInput;
+ if (operandType == ValType::F32) {
+ doubleInput = needF64();
+ RegF32 input = popF32();
+ masm.convertFloat32ToDouble(input, doubleInput);
+ freeF32(input);
+ } else {
+ doubleInput = popF64();
+ }
+
+ // We may need the value after the call for the ool check.
+ RegF64 otherReg = needF64();
+ moveF64(doubleInput, otherReg);
+ pushF64(otherReg);
+
+ sync();
+
+ FunctionCall call(0);
+
+ masm.setupWasmABICall();
+ masm.passABIArg(doubleInput, MoveOp::DOUBLE);
+ CodeOffset raOffset = masm.callWithABI(bytecodeOffset(), callee,
+ mozilla::Some(fr.getTlsPtrOffset()));
+ if (!createStackMap("emitConvertFloatin[..]", raOffset)) {
+ return false;
+ }
+
+ freeF64(doubleInput);
+
+ RegI64 rv = captureReturnedI64();
+
+ RegF64 inputVal = popF64();
+
+ TruncFlags flags = 0;
+ if (callee == SymbolicAddress::TruncateDoubleToUint64) {
+ flags |= TRUNC_UNSIGNED;
+ }
+ if (callee == SymbolicAddress::SaturatingTruncateDoubleToInt64 ||
+ callee == SymbolicAddress::SaturatingTruncateDoubleToUint64) {
+ flags |= TRUNC_SATURATING;
+ }
+
+ // If we're saturating, the callout will always produce the final result
+ // value. Otherwise, the callout value will return 0x8000000000000000
+ // and we need to produce traps.
+ OutOfLineCode* ool = nullptr;
+ if (!(flags & TRUNC_SATURATING)) {
+ // The OOL check just succeeds or fails, it does not generate a value.
+ ool = addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64(
+ AnyReg(inputVal), rv, flags, bytecodeOffset()));
+ if (!ool) {
+ return false;
+ }
+
+ masm.branch64(Assembler::Equal, rv, Imm64(0x8000000000000000),
+ ool->entry());
+ masm.bind(ool->rejoin());
+ }
+
+ pushI64(rv);
+ freeF64(inputVal);
+
+ return true;
+}
+#endif // RABALDR_FLOAT_TO_I64_CALLOUT
+
+bool BaseCompiler::emitGetLocal() {
+ uint32_t slot;
+ if (!iter_.readGetLocal(locals_, &slot)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ // Local loads are pushed unresolved, ie, they may be deferred
+ // until needed, until they may be affected by a store, or until a
+ // sync. This is intended to reduce register pressure.
+
+ switch (locals_[slot].kind()) {
+ case ValType::I32:
+ pushLocalI32(slot);
+ break;
+ case ValType::I64:
+ pushLocalI64(slot);
+ break;
+ case ValType::V128:
+#ifdef ENABLE_WASM_SIMD
+ pushLocalV128(slot);
+ break;
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ case ValType::F64:
+ pushLocalF64(slot);
+ break;
+ case ValType::F32:
+ pushLocalF32(slot);
+ break;
+ case ValType::Ref:
+ pushLocalRef(slot);
+ break;
+ }
+
+ return true;
+}
+
+template <bool isSetLocal>
+bool BaseCompiler::emitSetOrTeeLocal(uint32_t slot) {
+ if (deadCode_) {
+ return true;
+ }
+
+ bceLocalIsUpdated(slot);
+ switch (locals_[slot].kind()) {
+ case ValType::I32: {
+ RegI32 rv = popI32();
+ syncLocal(slot);
+ fr.storeLocalI32(rv, localFromSlot(slot, MIRType::Int32));
+ if (isSetLocal) {
+ freeI32(rv);
+ } else {
+ pushI32(rv);
+ }
+ break;
+ }
+ case ValType::I64: {
+ RegI64 rv = popI64();
+ syncLocal(slot);
+ fr.storeLocalI64(rv, localFromSlot(slot, MIRType::Int64));
+ if (isSetLocal) {
+ freeI64(rv);
+ } else {
+ pushI64(rv);
+ }
+ break;
+ }
+ case ValType::F64: {
+ RegF64 rv = popF64();
+ syncLocal(slot);
+ fr.storeLocalF64(rv, localFromSlot(slot, MIRType::Double));
+ if (isSetLocal) {
+ freeF64(rv);
+ } else {
+ pushF64(rv);
+ }
+ break;
+ }
+ case ValType::F32: {
+ RegF32 rv = popF32();
+ syncLocal(slot);
+ fr.storeLocalF32(rv, localFromSlot(slot, MIRType::Float32));
+ if (isSetLocal) {
+ freeF32(rv);
+ } else {
+ pushF32(rv);
+ }
+ break;
+ }
+ case ValType::V128: {
+#ifdef ENABLE_WASM_SIMD
+ RegV128 rv = popV128();
+ syncLocal(slot);
+ fr.storeLocalV128(rv, localFromSlot(slot, MIRType::Simd128));
+ if (isSetLocal) {
+ freeV128(rv);
+ } else {
+ pushV128(rv);
+ }
+ break;
+#else
+ MOZ_CRASH("No SIMD support");
+#endif
+ }
+ case ValType::Ref: {
+ RegPtr rv = popRef();
+ syncLocal(slot);
+ fr.storeLocalPtr(rv, localFromSlot(slot, MIRType::RefOrNull));
+ if (isSetLocal) {
+ freeRef(rv);
+ } else {
+ pushRef(rv);
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+bool BaseCompiler::emitSetLocal() {
+ uint32_t slot;
+ Nothing unused_value;
+ if (!iter_.readSetLocal(locals_, &slot, &unused_value)) {
+ return false;
+ }
+ return emitSetOrTeeLocal<true>(slot);
+}
+
+bool BaseCompiler::emitTeeLocal() {
+ uint32_t slot;
+ Nothing unused_value;
+ if (!iter_.readTeeLocal(locals_, &slot, &unused_value)) {
+ return false;
+ }
+ return emitSetOrTeeLocal<false>(slot);
+}
+
+bool BaseCompiler::emitGetGlobal() {
+ uint32_t id;
+ if (!iter_.readGetGlobal(&id)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ const GlobalDesc& global = moduleEnv_.globals[id];
+
+ if (global.isConstant()) {
+ LitVal value = global.constantValue();
+ switch (value.type().kind()) {
+ case ValType::I32:
+ pushI32(value.i32());
+ break;
+ case ValType::I64:
+ pushI64(value.i64());
+ break;
+ case ValType::F32:
+ pushF32(value.f32());
+ break;
+ case ValType::F64:
+ pushF64(value.f64());
+ break;
+ case ValType::Ref:
+ pushRef(intptr_t(value.ref().forCompiledCode()));
+ break;
+#ifdef ENABLE_WASM_SIMD
+ case ValType::V128:
+ pushV128(value.v128());
+ break;
+#endif
+ default:
+ MOZ_CRASH("Global constant type");
+ }
+ return true;
+ }
+
+ switch (global.type().kind()) {
+ case ValType::I32: {
+ RegI32 rv = needI32();
+ ScratchI32 tmp(*this);
+ masm.load32(addressOfGlobalVar(global, tmp), rv);
+ pushI32(rv);
+ break;
+ }
+ case ValType::I64: {
+ RegI64 rv = needI64();
+ ScratchI32 tmp(*this);
+ masm.load64(addressOfGlobalVar(global, tmp), rv);
+ pushI64(rv);
+ break;
+ }
+ case ValType::F32: {
+ RegF32 rv = needF32();
+ ScratchI32 tmp(*this);
+ masm.loadFloat32(addressOfGlobalVar(global, tmp), rv);
+ pushF32(rv);
+ break;
+ }
+ case ValType::F64: {
+ RegF64 rv = needF64();
+ ScratchI32 tmp(*this);
+ masm.loadDouble(addressOfGlobalVar(global, tmp), rv);
+ pushF64(rv);
+ break;
+ }
+ case ValType::Ref: {
+ RegPtr rv = needRef();
+ ScratchI32 tmp(*this);
+ masm.loadPtr(addressOfGlobalVar(global, tmp), rv);
+ pushRef(rv);
+ break;
+ }
+#ifdef ENABLE_WASM_SIMD
+ case ValType::V128: {
+ RegV128 rv = needV128();
+ ScratchI32 tmp(*this);
+ masm.loadUnalignedSimd128(addressOfGlobalVar(global, tmp), rv);
+ pushV128(rv);
+ break;
+ }
+#endif
+ default:
+ MOZ_CRASH("Global variable type");
+ break;
+ }
+ return true;
+}
+
+bool BaseCompiler::emitSetGlobal() {
+ uint32_t id;
+ Nothing unused_value;
+ if (!iter_.readSetGlobal(&id, &unused_value)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ const GlobalDesc& global = moduleEnv_.globals[id];
+
+ switch (global.type().kind()) {
+ case ValType::I32: {
+ RegI32 rv = popI32();
+ ScratchI32 tmp(*this);
+ masm.store32(rv, addressOfGlobalVar(global, tmp));
+ freeI32(rv);
+ break;
+ }
+ case ValType::I64: {
+ RegI64 rv = popI64();
+ ScratchI32 tmp(*this);
+ masm.store64(rv, addressOfGlobalVar(global, tmp));
+ freeI64(rv);
+ break;
+ }
+ case ValType::F32: {
+ RegF32 rv = popF32();
+ ScratchI32 tmp(*this);
+ masm.storeFloat32(rv, addressOfGlobalVar(global, tmp));
+ freeF32(rv);
+ break;
+ }
+ case ValType::F64: {
+ RegF64 rv = popF64();
+ ScratchI32 tmp(*this);
+ masm.storeDouble(rv, addressOfGlobalVar(global, tmp));
+ freeF64(rv);
+ break;
+ }
+ case ValType::Ref: {
+ RegPtr valueAddr(PreBarrierReg);
+ needRef(valueAddr);
+ {
+ ScratchI32 tmp(*this);
+ masm.computeEffectiveAddress(addressOfGlobalVar(global, tmp),
+ valueAddr);
+ }
+ RegPtr rv = popRef();
+ // emitBarrieredStore consumes valueAddr
+ if (!emitBarrieredStore(Nothing(), valueAddr, rv)) {
+ return false;
+ }
+ freeRef(rv);
+ break;
+ }
+#ifdef ENABLE_WASM_SIMD
+ case ValType::V128: {
+ RegV128 rv = popV128();
+ ScratchI32 tmp(*this);
+ masm.storeUnalignedSimd128(rv, addressOfGlobalVar(global, tmp));
+ freeV128(rv);
+ break;
+ }
+#endif
+ default:
+ MOZ_CRASH("Global variable type");
+ break;
+ }
+ return true;
+}
+
+// Bounds check elimination.
+//
+// We perform BCE on two kinds of address expressions: on constant heap pointers
+// that are known to be in the heap or will be handled by the out-of-bounds trap
+// handler; and on local variables that have been checked in dominating code
+// without being updated since.
+//
+// For an access through a constant heap pointer + an offset we can eliminate
+// the bounds check if the sum of the address and offset is below the sum of the
+// minimum memory length and the offset guard length.
+//
+// For an access through a local variable + an offset we can eliminate the
+// bounds check if the local variable has already been checked and has not been
+// updated since, and the offset is less than the guard limit.
+//
+// To track locals for which we can eliminate checks we use a bit vector
+// bceSafe_ that has a bit set for those locals whose bounds have been checked
+// and which have not subsequently been set. Initially this vector is zero.
+//
+// In straight-line code a bit is set when we perform a bounds check on an
+// access via the local and is reset when the variable is updated.
+//
+// In control flow, the bit vector is manipulated as follows. Each ControlItem
+// has a value bceSafeOnEntry, which is the value of bceSafe_ on entry to the
+// item, and a value bceSafeOnExit, which is initially ~0. On a branch (br,
+// brIf, brTable), we always AND the branch target's bceSafeOnExit with the
+// value of bceSafe_ at the branch point. On exiting an item by falling out of
+// it, provided we're not in dead code, we AND the current value of bceSafe_
+// into the item's bceSafeOnExit. Additional processing depends on the item
+// type:
+//
+// - After a block, set bceSafe_ to the block's bceSafeOnExit.
+//
+// - On loop entry, after pushing the ControlItem, set bceSafe_ to zero; the
+// back edges would otherwise require us to iterate to a fixedpoint.
+//
+// - After a loop, the bceSafe_ is left unchanged, because only fallthrough
+// control flow will reach that point and the bceSafe_ value represents the
+// correct state of the fallthrough path.
+//
+// - Set bceSafe_ to the ControlItem's bceSafeOnEntry at both the 'then' branch
+// and the 'else' branch.
+//
+// - After an if-then-else, set bceSafe_ to the if-then-else's bceSafeOnExit.
+//
+// - After an if-then, set bceSafe_ to the if-then's bceSafeOnExit AND'ed with
+// the if-then's bceSafeOnEntry.
+//
+// Finally, when the debugger allows locals to be mutated we must disable BCE
+// for references via a local, by returning immediately from bceCheckLocal if
+// compilerEnv_.debugEnabled() is true.
+//
+//
+// Alignment check elimination.
+//
+// Alignment checks for atomic operations can be omitted if the pointer is a
+// constant and the pointer + offset is aligned. Alignment checking that can't
+// be omitted can still be simplified by checking only the pointer if the offset
+// is aligned.
+//
+// (In addition, alignment checking of the pointer can be omitted if the pointer
+// has been checked in dominating code, but we don't do that yet.)
+
+// TODO / OPTIMIZE (bug 1329576): There are opportunities to generate better
+// code by not moving a constant address with a zero offset into a register.
+
+RegI32 BaseCompiler::popMemoryAccess(MemoryAccessDesc* access,
+ AccessCheck* check) {
+ check->onlyPointerAlignment =
+ (access->offset() & (access->byteSize() - 1)) == 0;
+
+ int32_t addrTemp;
+ if (popConstI32(&addrTemp)) {
+ uint32_t addr = addrTemp;
+
+ uint32_t offsetGuardLimit =
+ GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled());
+
+ uint64_t ea = uint64_t(addr) + uint64_t(access->offset());
+ uint64_t limit = moduleEnv_.minMemoryLength + offsetGuardLimit;
+
+ check->omitBoundsCheck = ea < limit;
+ check->omitAlignmentCheck = (ea & (access->byteSize() - 1)) == 0;
+
+ // Fold the offset into the pointer if we can, as this is always
+ // beneficial.
+
+ if (ea <= UINT32_MAX) {
+ addr = uint32_t(ea);
+ access->clearOffset();
+ }
+
+ RegI32 r = needI32();
+ moveImm32(int32_t(addr), r);
+ return r;
+ }
+
+ uint32_t local;
+ if (peekLocalI32(&local)) {
+ bceCheckLocal(access, check, local);
+ }
+
+ return popI32();
+}
+
+void BaseCompiler::pushHeapBase() {
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) || \
+ defined(JS_CODEGEN_MIPS64)
+ RegI64 heapBase = needI64();
+ moveI64(RegI64(Register64(HeapReg)), heapBase);
+ pushI64(heapBase);
+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32)
+ RegI32 heapBase = needI32();
+ moveI32(RegI32(HeapReg), heapBase);
+ pushI32(heapBase);
+#elif defined(JS_CODEGEN_X86)
+ RegI32 heapBase = needI32();
+ fr.loadTlsPtr(heapBase);
+ masm.loadPtr(Address(heapBase, offsetof(TlsData, memoryBase)), heapBase);
+ pushI32(heapBase);
+#else
+ MOZ_CRASH("BaseCompiler platform hook: pushHeapBase");
+#endif
+}
+
+RegI32 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check) {
+ RegI32 tls;
+ if (needTlsForAccess(check)) {
+ tls = needI32();
+ fr.loadTlsPtr(tls);
+ }
+ return tls;
+}
+
+RegI32 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check,
+ RegI32 specific) {
+ if (needTlsForAccess(check)) {
+ fr.loadTlsPtr(specific);
+ return specific;
+ }
+ return RegI32::Invalid();
+}
+
+bool BaseCompiler::loadCommon(MemoryAccessDesc* access, AccessCheck check,
+ ValType type) {
+ RegI32 tls, temp1, temp2, temp3;
+ needLoadTemps(*access, &temp1, &temp2, &temp3);
+
+ switch (type.kind()) {
+ case ValType::I32: {
+ RegI32 rp = popMemoryAccess(access, &check);
+#ifdef JS_CODEGEN_ARM
+ RegI32 rv = IsUnaligned(*access) ? needI32() : rp;
+#else
+ RegI32 rv = rp;
+#endif
+ tls = maybeLoadTlsForAccess(check);
+ if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+ return false;
+ }
+ pushI32(rv);
+ if (rp != rv) {
+ freeI32(rp);
+ }
+ break;
+ }
+ case ValType::I64: {
+ RegI64 rv;
+ RegI32 rp;
+#ifdef JS_CODEGEN_X86
+ rv = specific_.abiReturnRegI64;
+ needI64(rv);
+ rp = popMemoryAccess(access, &check);
+#else
+ rp = popMemoryAccess(access, &check);
+ rv = needI64();
+#endif
+ tls = maybeLoadTlsForAccess(check);
+ if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+ return false;
+ }
+ pushI64(rv);
+ freeI32(rp);
+ break;
+ }
+ case ValType::F32: {
+ RegI32 rp = popMemoryAccess(access, &check);
+ RegF32 rv = needF32();
+ tls = maybeLoadTlsForAccess(check);
+ if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+ return false;
+ }
+ pushF32(rv);
+ freeI32(rp);
+ break;
+ }
+ case ValType::F64: {
+ RegI32 rp = popMemoryAccess(access, &check);
+ RegF64 rv = needF64();
+ tls = maybeLoadTlsForAccess(check);
+ if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+ return false;
+ }
+ pushF64(rv);
+ freeI32(rp);
+ break;
+ }
+#ifdef ENABLE_WASM_SIMD
+ case ValType::V128: {
+ RegI32 rp = popMemoryAccess(access, &check);
+ RegV128 rv = needV128();
+ tls = maybeLoadTlsForAccess(check);
+ if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) {
+ return false;
+ }
+ pushV128(rv);
+ freeI32(rp);
+ break;
+ }
+#endif
+ default:
+ MOZ_CRASH("load type");
+ break;
+ }
+
+ maybeFreeI32(tls);
+ maybeFreeI32(temp1);
+ maybeFreeI32(temp2);
+ maybeFreeI32(temp3);
+
+ return true;
+}
+
+bool BaseCompiler::emitLoad(ValType type, Scalar::Type viewType) {
+ LinearMemoryAddress<Nothing> addr;
+ if (!iter_.readLoad(type, Scalar::byteSize(viewType), &addr)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset());
+ return loadCommon(&access, AccessCheck(), type);
+}
+
+bool BaseCompiler::storeCommon(MemoryAccessDesc* access, AccessCheck check,
+ ValType resultType) {
+ RegI32 tls;
+ RegI32 temp = needStoreTemp(*access, resultType);
+
+ switch (resultType.kind()) {
+ case ValType::I32: {
+ RegI32 rv = popI32();
+ RegI32 rp = popMemoryAccess(access, &check);
+ tls = maybeLoadTlsForAccess(check);
+ if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+ return false;
+ }
+ freeI32(rp);
+ freeI32(rv);
+ break;
+ }
+ case ValType::I64: {
+ RegI64 rv = popI64();
+ RegI32 rp = popMemoryAccess(access, &check);
+ tls = maybeLoadTlsForAccess(check);
+ if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+ return false;
+ }
+ freeI32(rp);
+ freeI64(rv);
+ break;
+ }
+ case ValType::F32: {
+ RegF32 rv = popF32();
+ RegI32 rp = popMemoryAccess(access, &check);
+ tls = maybeLoadTlsForAccess(check);
+ if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+ return false;
+ }
+ freeI32(rp);
+ freeF32(rv);
+ break;
+ }
+ case ValType::F64: {
+ RegF64 rv = popF64();
+ RegI32 rp = popMemoryAccess(access, &check);
+ tls = maybeLoadTlsForAccess(check);
+ if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+ return false;
+ }
+ freeI32(rp);
+ freeF64(rv);
+ break;
+ }
+#ifdef ENABLE_WASM_SIMD
+ case ValType::V128: {
+ RegV128 rv = popV128();
+ RegI32 rp = popMemoryAccess(access, &check);
+ tls = maybeLoadTlsForAccess(check);
+ if (!store(access, &check, tls, rp, AnyReg(rv), temp)) {
+ return false;
+ }
+ freeI32(rp);
+ freeV128(rv);
+ break;
+ }
+#endif
+ default:
+ MOZ_CRASH("store type");
+ break;
+ }
+
+ maybeFreeI32(tls);
+ maybeFreeI32(temp);
+
+ return true;
+}
+
+bool BaseCompiler::emitStore(ValType resultType, Scalar::Type viewType) {
+ LinearMemoryAddress<Nothing> addr;
+ Nothing unused_value;
+ if (!iter_.readStore(resultType, Scalar::byteSize(viewType), &addr,
+ &unused_value)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset());
+ return storeCommon(&access, AccessCheck(), resultType);
+}
+
+bool BaseCompiler::emitSelect(bool typed) {
+ StackType type;
+ Nothing unused_trueValue;
+ Nothing unused_falseValue;
+ Nothing unused_condition;
+ if (!iter_.readSelect(typed, &type, &unused_trueValue, &unused_falseValue,
+ &unused_condition)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ resetLatentOp();
+ return true;
+ }
+
+ // I32 condition on top, then false, then true.
+
+ Label done;
+ BranchState b(&done);
+ emitBranchSetup(&b);
+
+ switch (type.valType().kind()) {
+ case ValType::I32: {
+ RegI32 r, rs;
+ pop2xI32(&r, &rs);
+ if (!emitBranchPerform(&b)) {
+ return false;
+ }
+ moveI32(rs, r);
+ masm.bind(&done);
+ freeI32(rs);
+ pushI32(r);
+ break;
+ }
+ case ValType::I64: {
+#ifdef JS_CODEGEN_X86
+ // There may be as many as four Int64 values in registers at a time: two
+ // for the latent branch operands, and two for the true/false values we
+ // normally pop before executing the branch. On x86 this is one value
+ // too many, so we need to generate more complicated code here, and for
+ // simplicity's sake we do so even if the branch operands are not Int64.
+ // However, the resulting control flow diamond is complicated since the
+ // arms of the diamond will have to stay synchronized with respect to
+ // their evaluation stack and regalloc state. To simplify further, we
+ // use a double branch and a temporary boolean value for now.
+ RegI32 temp = needI32();
+ moveImm32(0, temp);
+ if (!emitBranchPerform(&b)) {
+ return false;
+ }
+ moveImm32(1, temp);
+ masm.bind(&done);
+
+ Label trueValue;
+ RegI64 r, rs;
+ pop2xI64(&r, &rs);
+ masm.branch32(Assembler::Equal, temp, Imm32(0), &trueValue);
+ moveI64(rs, r);
+ masm.bind(&trueValue);
+ freeI32(temp);
+ freeI64(rs);
+ pushI64(r);
+#else
+ RegI64 r, rs;
+ pop2xI64(&r, &rs);
+ if (!emitBranchPerform(&b)) {
+ return false;
+ }
+ moveI64(rs, r);
+ masm.bind(&done);
+ freeI64(rs);
+ pushI64(r);
+#endif
+ break;
+ }
+ case ValType::F32: {
+ RegF32 r, rs;
+ pop2xF32(&r, &rs);
+ if (!emitBranchPerform(&b)) {
+ return false;
+ }
+ moveF32(rs, r);
+ masm.bind(&done);
+ freeF32(rs);
+ pushF32(r);
+ break;
+ }
+ case ValType::F64: {
+ RegF64 r, rs;
+ pop2xF64(&r, &rs);
+ if (!emitBranchPerform(&b)) {
+ return false;
+ }
+ moveF64(rs, r);
+ masm.bind(&done);
+ freeF64(rs);
+ pushF64(r);
+ break;
+ }
+#ifdef ENABLE_WASM_SIMD
+ case ValType::V128: {
+ RegV128 r, rs;
+ pop2xV128(&r, &rs);
+ if (!emitBranchPerform(&b)) {
+ return false;
+ }
+ moveV128(rs, r);
+ masm.bind(&done);
+ freeV128(rs);
+ pushV128(r);
+ break;
+ }
+#endif
+ case ValType::Ref: {
+ RegPtr r, rs;
+ pop2xRef(&r, &rs);
+ if (!emitBranchPerform(&b)) {
+ return false;
+ }
+ moveRef(rs, r);
+ masm.bind(&done);
+ freeRef(rs);
+ pushRef(r);
+ break;
+ }
+ default: {
+ MOZ_CRASH("select type");
+ }
+ }
+
+ return true;
+}
+
+void BaseCompiler::emitCompareI32(Assembler::Condition compareOp,
+ ValType compareType) {
+ MOZ_ASSERT(compareType == ValType::I32);
+
+ if (sniffConditionalControlCmp(compareOp, compareType)) {
+ return;
+ }
+
+ int32_t c;
+ if (popConstI32(&c)) {
+ RegI32 r = popI32();
+ masm.cmp32Set(compareOp, r, Imm32(c), r);
+ pushI32(r);
+ } else {
+ RegI32 r, rs;
+ pop2xI32(&r, &rs);
+ masm.cmp32Set(compareOp, r, rs, r);
+ freeI32(rs);
+ pushI32(r);
+ }
+}
+
+void BaseCompiler::emitCompareI64(Assembler::Condition compareOp,
+ ValType compareType) {
+ MOZ_ASSERT(compareType == ValType::I64);
+
+ if (sniffConditionalControlCmp(compareOp, compareType)) {
+ return;
+ }
+
+ RegI64 rs0, rs1;
+ pop2xI64(&rs0, &rs1);
+ RegI32 rd(fromI64(rs0));
+ cmp64Set(compareOp, rs0, rs1, rd);
+ freeI64(rs1);
+ freeI64Except(rs0, rd);
+ pushI32(rd);
+}
+
+void BaseCompiler::emitCompareF32(Assembler::DoubleCondition compareOp,
+ ValType compareType) {
+ MOZ_ASSERT(compareType == ValType::F32);
+
+ if (sniffConditionalControlCmp(compareOp, compareType)) {
+ return;
+ }
+
+ Label across;
+ RegF32 rs0, rs1;
+ pop2xF32(&rs0, &rs1);
+ RegI32 rd = needI32();
+ moveImm32(1, rd);
+ masm.branchFloat(compareOp, rs0, rs1, &across);
+ moveImm32(0, rd);
+ masm.bind(&across);
+ freeF32(rs0);
+ freeF32(rs1);
+ pushI32(rd);
+}
+
+void BaseCompiler::emitCompareF64(Assembler::DoubleCondition compareOp,
+ ValType compareType) {
+ MOZ_ASSERT(compareType == ValType::F64);
+
+ if (sniffConditionalControlCmp(compareOp, compareType)) {
+ return;
+ }
+
+ Label across;
+ RegF64 rs0, rs1;
+ pop2xF64(&rs0, &rs1);
+ RegI32 rd = needI32();
+ moveImm32(1, rd);
+ masm.branchDouble(compareOp, rs0, rs1, &across);
+ moveImm32(0, rd);
+ masm.bind(&across);
+ freeF64(rs0);
+ freeF64(rs1);
+ pushI32(rd);
+}
+
+void BaseCompiler::emitCompareRef(Assembler::Condition compareOp,
+ ValType compareType) {
+ MOZ_ASSERT(!sniffConditionalControlCmp(compareOp, compareType));
+
+ RegPtr rs1, rs2;
+ pop2xRef(&rs1, &rs2);
+ RegI32 rd = needI32();
+ masm.cmpPtrSet(compareOp, rs1, rs2, rd);
+ freeRef(rs1);
+ freeRef(rs2);
+ pushI32(rd);
+}
+
+bool BaseCompiler::emitInstanceCall(uint32_t lineOrBytecode,
+ const SymbolicAddressSignature& builtin,
+ bool pushReturnedValue /*=true*/) {
+ const MIRType* argTypes = builtin.argTypes;
+ MOZ_ASSERT(argTypes[0] == MIRType::Pointer);
+
+ sync();
+
+ uint32_t numNonInstanceArgs = builtin.numArgs - 1 /* instance */;
+ size_t stackSpace = stackConsumed(numNonInstanceArgs);
+
+ FunctionCall baselineCall(lineOrBytecode);
+ beginCall(baselineCall, UseABI::System, InterModule::True);
+
+ ABIArg instanceArg = reservePointerArgument(&baselineCall);
+
+ startCallArgs(StackArgAreaSizeUnaligned(builtin), &baselineCall);
+ for (uint32_t i = 1; i < builtin.numArgs; i++) {
+ ValType t;
+ switch (argTypes[i]) {
+ case MIRType::Int32:
+ t = ValType::I32;
+ break;
+ case MIRType::Int64:
+ t = ValType::I64;
+ break;
+ case MIRType::RefOrNull:
+ t = RefType::extern_();
+ break;
+ case MIRType::Pointer:
+ // Instance function args can now be uninterpreted pointers (eg, for
+ // the cases PostBarrier and PostBarrierFilter) so we simply treat
+ // them like the equivalently sized integer.
+ t = sizeof(void*) == 4 ? ValType::I32 : ValType::I64;
+ break;
+ default:
+ MOZ_CRASH("Unexpected type");
+ }
+ passArg(t, peek(numNonInstanceArgs - i), &baselineCall);
+ }
+ CodeOffset raOffset =
+ builtinInstanceMethodCall(builtin, instanceArg, baselineCall);
+ if (!createStackMap("emitInstanceCall", raOffset)) {
+ return false;
+ }
+
+ endCall(baselineCall, stackSpace);
+
+ popValueStackBy(numNonInstanceArgs);
+
+ // Note, many clients of emitInstanceCall currently assume that pushing the
+ // result here does not destroy ReturnReg.
+ //
+ // Furthermore, clients assume that if builtin.retType != MIRType::None, the
+ // callee will have returned a result and left it in ReturnReg for us to
+ // find, and that that register will not be destroyed here (or above).
+
+ if (pushReturnedValue) {
+ // For the return type only, MIRType::None is used to indicate that the
+ // call doesn't return a result, that is, returns a C/C++ "void".
+ MOZ_ASSERT(builtin.retType != MIRType::None);
+ pushReturnValueOfCall(baselineCall, builtin.retType);
+ }
+ return true;
+}
+
+bool BaseCompiler::emitMemoryGrow() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ Nothing arg;
+ if (!iter_.readMemoryGrow(&arg)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ return emitInstanceCall(lineOrBytecode, SASigMemoryGrow);
+}
+
+bool BaseCompiler::emitMemorySize() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ if (!iter_.readMemorySize()) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ return emitInstanceCall(lineOrBytecode, SASigMemorySize);
+}
+
+bool BaseCompiler::emitRefFunc() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+ uint32_t funcIndex;
+ if (!iter_.readRefFunc(&funcIndex)) {
+ return false;
+ }
+ if (deadCode_) {
+ return true;
+ }
+
+ pushI32(funcIndex);
+ return emitInstanceCall(lineOrBytecode, SASigRefFunc);
+}
+
+bool BaseCompiler::emitRefNull() {
+ if (!iter_.readRefNull()) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ pushRef(NULLREF_VALUE);
+ return true;
+}
+
+bool BaseCompiler::emitRefIsNull() {
+ Nothing nothing;
+ if (!iter_.readRefIsNull(&nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ RegPtr r = popRef();
+ RegI32 rd = narrowPtr(r);
+
+ masm.cmpPtrSet(Assembler::Equal, r, ImmWord(NULLREF_VALUE), rd);
+ pushI32(rd);
+ return true;
+}
+
+#ifdef ENABLE_WASM_FUNCTION_REFERENCES
+bool BaseCompiler::emitRefAsNonNull() {
+ Nothing nothing;
+ if (!iter_.readRefAsNonNull(&nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ RegPtr rp = popRef();
+ Label ok;
+ masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok);
+ trap(Trap::NullPointerDereference);
+ masm.bind(&ok);
+ pushRef(rp);
+
+ return true;
+}
+#endif
+
+bool BaseCompiler::emitAtomicCmpXchg(ValType type, Scalar::Type viewType) {
+ LinearMemoryAddress<Nothing> addr;
+ Nothing unused;
+
+ if (!iter_.readAtomicCmpXchg(&addr, type, Scalar::byteSize(viewType), &unused,
+ &unused)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+ Synchronization::Full());
+
+ if (Scalar::byteSize(viewType) <= 4) {
+ PopAtomicCmpXchg32Regs regs(this, type, viewType);
+
+ AccessCheck check;
+ RegI32 rp = popMemoryAccess(&access, &check);
+ RegI32 tls = maybeLoadTlsForAccess(check);
+
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicCmpXchg32(access, memaddr);
+
+ maybeFreeI32(tls);
+ freeI32(rp);
+
+ if (type == ValType::I64) {
+ pushU32AsI64(regs.takeRd());
+ } else {
+ pushI32(regs.takeRd());
+ }
+
+ return true;
+ }
+
+ MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+ PopAtomicCmpXchg64Regs regs(this);
+
+ AccessCheck check;
+ RegI32 rp = popMemoryAccess(&access, &check);
+
+#ifdef JS_CODEGEN_X86
+ ScratchEBX ebx(*this);
+ RegI32 tls = maybeLoadTlsForAccess(check, ebx);
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicCmpXchg64(access, memaddr, ebx);
+#else
+ RegI32 tls = maybeLoadTlsForAccess(check);
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicCmpXchg64(access, memaddr);
+ maybeFreeI32(tls);
+#endif
+
+ freeI32(rp);
+
+ pushI64(regs.takeRd());
+ return true;
+}
+
+bool BaseCompiler::emitAtomicLoad(ValType type, Scalar::Type viewType) {
+ LinearMemoryAddress<Nothing> addr;
+ if (!iter_.readAtomicLoad(&addr, type, Scalar::byteSize(viewType))) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+ Synchronization::Load());
+
+ if (Scalar::byteSize(viewType) <= sizeof(void*)) {
+ return loadCommon(&access, AccessCheck(), type);
+ }
+
+ MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+#if defined(JS_64BIT)
+ MOZ_CRASH("Should not happen");
+#else
+ PopAtomicLoad64Regs regs(this);
+
+ AccessCheck check;
+ RegI32 rp = popMemoryAccess(&access, &check);
+
+# ifdef JS_CODEGEN_X86
+ ScratchEBX ebx(*this);
+ RegI32 tls = maybeLoadTlsForAccess(check, ebx);
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicLoad64(access, memaddr, ebx);
+# else
+ RegI32 tls = maybeLoadTlsForAccess(check);
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicLoad64(access, memaddr);
+ maybeFreeI32(tls);
+# endif
+
+ freeI32(rp);
+
+ pushI64(regs.takeRd());
+ return true;
+#endif // JS_64BIT
+}
+
+bool BaseCompiler::emitAtomicRMW(ValType type, Scalar::Type viewType,
+ AtomicOp op) {
+ LinearMemoryAddress<Nothing> addr;
+ Nothing unused_value;
+ if (!iter_.readAtomicRMW(&addr, type, Scalar::byteSize(viewType),
+ &unused_value)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+ Synchronization::Full());
+
+ if (Scalar::byteSize(viewType) <= 4) {
+ PopAtomicRMW32Regs regs(this, type, viewType, op);
+
+ AccessCheck check;
+ RegI32 rp = popMemoryAccess(&access, &check);
+ RegI32 tls = maybeLoadTlsForAccess(check);
+
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicRMW32(access, memaddr, op);
+
+ maybeFreeI32(tls);
+ freeI32(rp);
+
+ if (type == ValType::I64) {
+ pushU32AsI64(regs.takeRd());
+ } else {
+ pushI32(regs.takeRd());
+ }
+ return true;
+ }
+
+ MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+ PopAtomicRMW64Regs regs(this, op);
+
+ AccessCheck check;
+ RegI32 rp = popMemoryAccess(&access, &check);
+
+#ifdef JS_CODEGEN_X86
+ ScratchEBX ebx(*this);
+ RegI32 tls = maybeLoadTlsForAccess(check, ebx);
+
+ fr.pushPtr(regs.valueHigh());
+ fr.pushPtr(regs.valueLow());
+ Address value(esp, 0);
+
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicRMW64(access, memaddr, op, value, ebx);
+
+ fr.popBytes(8);
+#else
+ RegI32 tls = maybeLoadTlsForAccess(check);
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicRMW64(access, memaddr, op);
+ maybeFreeI32(tls);
+#endif
+
+ freeI32(rp);
+
+ pushI64(regs.takeRd());
+ return true;
+}
+
+bool BaseCompiler::emitAtomicStore(ValType type, Scalar::Type viewType) {
+ LinearMemoryAddress<Nothing> addr;
+ Nothing unused_value;
+ if (!iter_.readAtomicStore(&addr, type, Scalar::byteSize(viewType),
+ &unused_value)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+ Synchronization::Store());
+
+ if (Scalar::byteSize(viewType) <= sizeof(void*)) {
+ return storeCommon(&access, AccessCheck(), type);
+ }
+
+ MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+#ifdef JS_64BIT
+ MOZ_CRASH("Should not happen");
+#else
+ emitAtomicXchg64(&access, WantResult(false));
+ return true;
+#endif
+}
+
+bool BaseCompiler::emitAtomicXchg(ValType type, Scalar::Type viewType) {
+ LinearMemoryAddress<Nothing> addr;
+ Nothing unused_value;
+ if (!iter_.readAtomicRMW(&addr, type, Scalar::byteSize(viewType),
+ &unused_value)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ AccessCheck check;
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(),
+ Synchronization::Full());
+
+ if (Scalar::byteSize(viewType) <= 4) {
+ PopAtomicXchg32Regs regs(this, type, viewType);
+ RegI32 rp = popMemoryAccess(&access, &check);
+ RegI32 tls = maybeLoadTlsForAccess(check);
+
+ auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp);
+ regs.atomicXchg32(access, memaddr);
+
+ maybeFreeI32(tls);
+ freeI32(rp);
+
+ if (type == ValType::I64) {
+ pushU32AsI64(regs.takeRd());
+ } else {
+ pushI32(regs.takeRd());
+ }
+ return true;
+ }
+
+ MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
+
+ emitAtomicXchg64(&access, WantResult(true));
+ return true;
+}
+
+void BaseCompiler::emitAtomicXchg64(MemoryAccessDesc* access,
+ WantResult wantResult) {
+ PopAtomicXchg64Regs regs(this);
+
+ AccessCheck check;
+ RegI32 rp = popMemoryAccess(access, &check);
+
+#ifdef JS_CODEGEN_X86
+ ScratchEBX ebx(*this);
+ RegI32 tls = maybeLoadTlsForAccess(check, ebx);
+ auto memaddr = prepareAtomicMemoryAccess(access, &check, tls, rp);
+ regs.atomicXchg64(*access, memaddr, ebx);
+#else
+ RegI32 tls = maybeLoadTlsForAccess(check);
+ auto memaddr = prepareAtomicMemoryAccess(access, &check, tls, rp);
+ regs.atomicXchg64(*access, memaddr);
+ maybeFreeI32(tls);
+#endif
+
+ freeI32(rp);
+
+ if (wantResult) {
+ pushI64(regs.takeRd());
+ }
+}
+
+bool BaseCompiler::emitWait(ValType type, uint32_t byteSize) {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ Nothing nothing;
+ LinearMemoryAddress<Nothing> addr;
+ if (!iter_.readWait(&addr, type, byteSize, &nothing, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ switch (type.kind()) {
+ case ValType::I32: {
+ RegI64 timeout = popI64();
+ RegI32 val = popI32();
+
+ MemoryAccessDesc access(Scalar::Int32, addr.align, addr.offset,
+ bytecodeOffset());
+ computeEffectiveAddress(&access);
+
+ pushI32(val);
+ pushI64(timeout);
+
+ if (!emitInstanceCall(lineOrBytecode, SASigWaitI32)) {
+ return false;
+ }
+ break;
+ }
+ case ValType::I64: {
+ RegI64 timeout = popI64();
+ RegI64 val = popI64();
+
+ MemoryAccessDesc access(Scalar::Int64, addr.align, addr.offset,
+ bytecodeOffset());
+ computeEffectiveAddress(&access);
+
+ pushI64(val);
+ pushI64(timeout);
+
+ if (!emitInstanceCall(lineOrBytecode, SASigWaitI64)) {
+ return false;
+ }
+ break;
+ }
+ default:
+ MOZ_CRASH();
+ }
+
+ return true;
+}
+
+bool BaseCompiler::emitWake() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ Nothing nothing;
+ LinearMemoryAddress<Nothing> addr;
+ if (!iter_.readWake(&addr, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ RegI32 count = popI32();
+
+ MemoryAccessDesc access(Scalar::Int32, addr.align, addr.offset,
+ bytecodeOffset());
+ computeEffectiveAddress(&access);
+
+ pushI32(count);
+
+ return emitInstanceCall(lineOrBytecode, SASigWake);
+}
+
+bool BaseCompiler::emitFence() {
+ if (!iter_.readFence()) {
+ return false;
+ }
+ if (deadCode_) {
+ return true;
+ }
+
+ masm.memoryBarrier(MembarFull);
+ return true;
+}
+
+bool BaseCompiler::emitMemCopy() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ uint32_t dstMemOrTableIndex = 0;
+ uint32_t srcMemOrTableIndex = 0;
+ Nothing nothing;
+ if (!iter_.readMemOrTableCopy(true, &dstMemOrTableIndex, &nothing,
+ &srcMemOrTableIndex, &nothing, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ int32_t signedLength;
+ if (MacroAssembler::SupportsFastUnalignedAccesses() &&
+ peekConstI32(&signedLength) && signedLength != 0 &&
+ uint32_t(signedLength) <= MaxInlineMemoryCopyLength) {
+ return emitMemCopyInline();
+ }
+
+ return emitMemCopyCall(lineOrBytecode);
+}
+
+bool BaseCompiler::emitMemCopyCall(uint32_t lineOrBytecode) {
+ pushHeapBase();
+ if (!emitInstanceCall(lineOrBytecode,
+ usesSharedMemory() ? SASigMemCopyShared : SASigMemCopy,
+ /*pushReturnedValue=*/false)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool BaseCompiler::emitMemCopyInline() {
+ MOZ_ASSERT(MaxInlineMemoryCopyLength != 0);
+
+ int32_t signedLength;
+ MOZ_ALWAYS_TRUE(popConstI32(&signedLength));
+ uint32_t length = signedLength;
+ MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryCopyLength);
+
+ RegI32 src = popI32();
+ RegI32 dest = popI32();
+
+ // Compute the number of copies of each width we will need to do
+ size_t remainder = length;
+#ifdef JS_64BIT
+ size_t numCopies8 = remainder / sizeof(uint64_t);
+ remainder %= sizeof(uint64_t);
+#endif
+ size_t numCopies4 = remainder / sizeof(uint32_t);
+ remainder %= sizeof(uint32_t);
+ size_t numCopies2 = remainder / sizeof(uint16_t);
+ remainder %= sizeof(uint16_t);
+ size_t numCopies1 = remainder;
+
+ // Load all source bytes onto the value stack from low to high using the
+ // widest transfer width we can for the system. We will trap without writing
+ // anything if any source byte is out-of-bounds.
+ bool omitBoundsCheck = false;
+ size_t offset = 0;
+
+#ifdef JS_64BIT
+ for (uint32_t i = 0; i < numCopies8; i++) {
+ RegI32 temp = needI32();
+ moveI32(src, temp);
+ pushI32(temp);
+
+ MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!loadCommon(&access, check, ValType::I64)) {
+ return false;
+ }
+
+ offset += sizeof(uint64_t);
+ omitBoundsCheck = true;
+ }
+#endif
+
+ for (uint32_t i = 0; i < numCopies4; i++) {
+ RegI32 temp = needI32();
+ moveI32(src, temp);
+ pushI32(temp);
+
+ MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!loadCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+
+ offset += sizeof(uint32_t);
+ omitBoundsCheck = true;
+ }
+
+ if (numCopies2) {
+ RegI32 temp = needI32();
+ moveI32(src, temp);
+ pushI32(temp);
+
+ MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!loadCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+
+ offset += sizeof(uint16_t);
+ omitBoundsCheck = true;
+ }
+
+ if (numCopies1) {
+ RegI32 temp = needI32();
+ moveI32(src, temp);
+ pushI32(temp);
+
+ MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!loadCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+ }
+
+ // Store all source bytes from the value stack to the destination from
+ // high to low. We will trap without writing anything on the first store
+ // if any dest byte is out-of-bounds.
+ offset = length;
+ omitBoundsCheck = false;
+
+ if (numCopies1) {
+ offset -= sizeof(uint8_t);
+
+ RegI32 value = popI32();
+ RegI32 temp = needI32();
+ moveI32(dest, temp);
+ pushI32(temp);
+ pushI32(value);
+
+ MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ if (!storeCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+
+ omitBoundsCheck = true;
+ }
+
+ if (numCopies2) {
+ offset -= sizeof(uint16_t);
+
+ RegI32 value = popI32();
+ RegI32 temp = needI32();
+ moveI32(dest, temp);
+ pushI32(temp);
+ pushI32(value);
+
+ MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!storeCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+
+ omitBoundsCheck = true;
+ }
+
+ for (uint32_t i = 0; i < numCopies4; i++) {
+ offset -= sizeof(uint32_t);
+
+ RegI32 value = popI32();
+ RegI32 temp = needI32();
+ moveI32(dest, temp);
+ pushI32(temp);
+ pushI32(value);
+
+ MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!storeCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+
+ omitBoundsCheck = true;
+ }
+
+#ifdef JS_64BIT
+ for (uint32_t i = 0; i < numCopies8; i++) {
+ offset -= sizeof(uint64_t);
+
+ RegI64 value = popI64();
+ RegI32 temp = needI32();
+ moveI32(dest, temp);
+ pushI32(temp);
+ pushI64(value);
+
+ MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!storeCommon(&access, check, ValType::I64)) {
+ return false;
+ }
+
+ omitBoundsCheck = true;
+ }
+#endif
+
+ freeI32(dest);
+ freeI32(src);
+ return true;
+}
+
+bool BaseCompiler::emitTableCopy() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ uint32_t dstMemOrTableIndex = 0;
+ uint32_t srcMemOrTableIndex = 0;
+ Nothing nothing;
+ if (!iter_.readMemOrTableCopy(false, &dstMemOrTableIndex, &nothing,
+ &srcMemOrTableIndex, &nothing, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ pushI32(dstMemOrTableIndex);
+ pushI32(srcMemOrTableIndex);
+ if (!emitInstanceCall(lineOrBytecode, SASigTableCopy,
+ /*pushReturnedValue=*/false)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool BaseCompiler::emitDataOrElemDrop(bool isData) {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ uint32_t segIndex = 0;
+ if (!iter_.readDataOrElemDrop(isData, &segIndex)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ // Despite the cast to int32_t, the callee regards the value as unsigned.
+ pushI32(int32_t(segIndex));
+
+ return emitInstanceCall(lineOrBytecode,
+ isData ? SASigDataDrop : SASigElemDrop,
+ /*pushReturnedValue=*/false);
+}
+
+bool BaseCompiler::emitMemFill() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ Nothing nothing;
+ if (!iter_.readMemFill(&nothing, &nothing, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ int32_t signedLength;
+ int32_t signedValue;
+ if (MacroAssembler::SupportsFastUnalignedAccesses() &&
+ peek2xI32(&signedLength, &signedValue) && signedLength != 0 &&
+ uint32_t(signedLength) <= MaxInlineMemoryFillLength) {
+ return emitMemFillInline();
+ }
+ return emitMemFillCall(lineOrBytecode);
+}
+
+bool BaseCompiler::emitMemFillCall(uint32_t lineOrBytecode) {
+ pushHeapBase();
+ return emitInstanceCall(
+ lineOrBytecode, usesSharedMemory() ? SASigMemFillShared : SASigMemFill,
+ /*pushReturnedValue=*/false);
+}
+
+bool BaseCompiler::emitMemFillInline() {
+ MOZ_ASSERT(MaxInlineMemoryFillLength != 0);
+
+ int32_t signedLength;
+ int32_t signedValue;
+ MOZ_ALWAYS_TRUE(popConstI32(&signedLength));
+ MOZ_ALWAYS_TRUE(popConstI32(&signedValue));
+ uint32_t length = uint32_t(signedLength);
+ uint32_t value = uint32_t(signedValue);
+ MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryFillLength);
+
+ RegI32 dest = popI32();
+
+ // Compute the number of copies of each width we will need to do
+ size_t remainder = length;
+#ifdef JS_64BIT
+ size_t numCopies8 = remainder / sizeof(uint64_t);
+ remainder %= sizeof(uint64_t);
+#endif
+ size_t numCopies4 = remainder / sizeof(uint32_t);
+ remainder %= sizeof(uint32_t);
+ size_t numCopies2 = remainder / sizeof(uint16_t);
+ remainder %= sizeof(uint16_t);
+ size_t numCopies1 = remainder;
+
+ MOZ_ASSERT(numCopies2 <= 1 && numCopies1 <= 1);
+
+ // Generate splatted definitions for wider fills as needed
+#ifdef JS_64BIT
+ uint64_t val8 = SplatByteToUInt<uint64_t>(value, 8);
+#endif
+ uint32_t val4 = SplatByteToUInt<uint32_t>(value, 4);
+ uint32_t val2 = SplatByteToUInt<uint32_t>(value, 2);
+ uint32_t val1 = value;
+
+ // Store the fill value to the destination from high to low. We will trap
+ // without writing anything on the first store if any dest byte is
+ // out-of-bounds.
+ size_t offset = length;
+ bool omitBoundsCheck = false;
+
+ if (numCopies1) {
+ offset -= sizeof(uint8_t);
+
+ RegI32 temp = needI32();
+ moveI32(dest, temp);
+ pushI32(temp);
+ pushI32(val1);
+
+ MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ if (!storeCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+
+ omitBoundsCheck = true;
+ }
+
+ if (numCopies2) {
+ offset -= sizeof(uint16_t);
+
+ RegI32 temp = needI32();
+ moveI32(dest, temp);
+ pushI32(temp);
+ pushI32(val2);
+
+ MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!storeCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+
+ omitBoundsCheck = true;
+ }
+
+ for (uint32_t i = 0; i < numCopies4; i++) {
+ offset -= sizeof(uint32_t);
+
+ RegI32 temp = needI32();
+ moveI32(dest, temp);
+ pushI32(temp);
+ pushI32(val4);
+
+ MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!storeCommon(&access, check, ValType::I32)) {
+ return false;
+ }
+
+ omitBoundsCheck = true;
+ }
+
+#ifdef JS_64BIT
+ for (uint32_t i = 0; i < numCopies8; i++) {
+ offset -= sizeof(uint64_t);
+
+ RegI32 temp = needI32();
+ moveI32(dest, temp);
+ pushI32(temp);
+ pushI64(val8);
+
+ MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset());
+ AccessCheck check;
+ check.omitBoundsCheck = omitBoundsCheck;
+ if (!storeCommon(&access, check, ValType::I64)) {
+ return false;
+ }
+
+ omitBoundsCheck = true;
+ }
+#endif
+
+ freeI32(dest);
+ return true;
+}
+
+bool BaseCompiler::emitMemOrTableInit(bool isMem) {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ uint32_t segIndex = 0;
+ uint32_t dstTableIndex = 0;
+ Nothing nothing;
+ if (!iter_.readMemOrTableInit(isMem, &segIndex, &dstTableIndex, &nothing,
+ &nothing, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ pushI32(int32_t(segIndex));
+ if (isMem) {
+ if (!emitInstanceCall(lineOrBytecode, SASigMemInit,
+ /*pushReturnedValue=*/false)) {
+ return false;
+ }
+ } else {
+ pushI32(dstTableIndex);
+ if (!emitInstanceCall(lineOrBytecode, SASigTableInit,
+ /*pushReturnedValue=*/false)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#ifdef ENABLE_WASM_REFTYPES
+[[nodiscard]] bool BaseCompiler::emitTableFill() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ Nothing nothing;
+ uint32_t tableIndex;
+ if (!iter_.readTableFill(&tableIndex, &nothing, &nothing, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ // fill(start:u32, val:ref, len:u32, table:u32) -> u32
+ pushI32(tableIndex);
+ return emitInstanceCall(lineOrBytecode, SASigTableFill,
+ /*pushReturnedValue=*/false);
+}
+
+[[nodiscard]] bool BaseCompiler::emitTableGet() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+ Nothing index;
+ uint32_t tableIndex;
+ if (!iter_.readTableGet(&tableIndex, &index)) {
+ return false;
+ }
+ if (deadCode_) {
+ return true;
+ }
+ // get(index:u32, table:u32) -> uintptr_t(AnyRef)
+ pushI32(tableIndex);
+ if (!emitInstanceCall(lineOrBytecode, SASigTableGet,
+ /*pushReturnedValue=*/false)) {
+ return false;
+ }
+
+ // Push the resulting anyref back on the eval stack. NOTE: needRef() must
+ // not kill the value in the register.
+ RegPtr r = RegPtr(ReturnReg);
+ needRef(r);
+ pushRef(r);
+
+ return true;
+}
+
+[[nodiscard]] bool BaseCompiler::emitTableGrow() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+ Nothing delta;
+ Nothing initValue;
+ uint32_t tableIndex;
+ if (!iter_.readTableGrow(&tableIndex, &initValue, &delta)) {
+ return false;
+ }
+ if (deadCode_) {
+ return true;
+ }
+ // grow(initValue:anyref, delta:u32, table:u32) -> u32
+ pushI32(tableIndex);
+ return emitInstanceCall(lineOrBytecode, SASigTableGrow);
+}
+
+[[nodiscard]] bool BaseCompiler::emitTableSet() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+ Nothing index, value;
+ uint32_t tableIndex;
+ if (!iter_.readTableSet(&tableIndex, &index, &value)) {
+ return false;
+ }
+ if (deadCode_) {
+ return true;
+ }
+ // set(index:u32, value:ref, table:u32) -> i32
+ pushI32(tableIndex);
+ return emitInstanceCall(lineOrBytecode, SASigTableSet,
+ /*pushReturnedValue=*/false);
+}
+
+[[nodiscard]] bool BaseCompiler::emitTableSize() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+ uint32_t tableIndex;
+ if (!iter_.readTableSize(&tableIndex)) {
+ return false;
+ }
+ if (deadCode_) {
+ return true;
+ }
+ // size(table:u32) -> u32
+ pushI32(tableIndex);
+ return emitInstanceCall(lineOrBytecode, SASigTableSize);
+}
+#endif
+
+bool BaseCompiler::emitStructNew() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ uint32_t typeIndex;
+ NothingVector args;
+ if (!iter_.readStructNew(&typeIndex, &args)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ // Allocate zeroed storage. The parameter to StructNew is an index into a
+ // descriptor table that the instance has.
+ //
+ // Returns null on OOM.
+
+ const StructType& structType = moduleEnv_.types[typeIndex].structType();
+ const TypeIdDesc& structTypeId = moduleEnv_.typeIds[typeIndex];
+ RegPtr rst = needRef();
+ fr.loadTlsPtr(WasmTlsReg);
+ masm.loadWasmGlobalPtr(structTypeId.globalDataOffset(), rst);
+ pushRef(rst);
+
+ if (!emitInstanceCall(lineOrBytecode, SASigStructNew)) {
+ return false;
+ }
+
+ // Optimization opportunity: Iterate backward to pop arguments off the
+ // stack. This will generate more instructions than we want, since we
+ // really only need to pop the stack once at the end, not for every element,
+ // but to do better we need a bit more machinery to load elements off the
+ // stack into registers.
+
+ RegPtr rp = popRef();
+ RegPtr rdata = rp;
+
+ if (!structType.isInline_) {
+ rdata = needRef();
+ masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rdata);
+ }
+
+ // Optimization opportunity: when the value being stored is a known
+ // zero/null we need store nothing. This case may be somewhat common
+ // because struct.new forces a value to be specified for every field.
+
+ uint32_t fieldNo = structType.fields_.length();
+ while (fieldNo-- > 0) {
+ uint32_t offs = structType.objectBaseFieldOffset(fieldNo);
+ switch (structType.fields_[fieldNo].type.kind()) {
+ case ValType::I32: {
+ RegI32 r = popI32();
+ masm.store32(r, Address(rdata, offs));
+ freeI32(r);
+ break;
+ }
+ case ValType::I64: {
+ RegI64 r = popI64();
+ masm.store64(r, Address(rdata, offs));
+ freeI64(r);
+ break;
+ }
+ case ValType::F32: {
+ RegF32 r = popF32();
+ masm.storeFloat32(r, Address(rdata, offs));
+ freeF32(r);
+ break;
+ }
+ case ValType::F64: {
+ RegF64 r = popF64();
+ masm.storeDouble(r, Address(rdata, offs));
+ freeF64(r);
+ break;
+ }
+ case ValType::Ref: {
+ RegPtr value = popRef();
+ masm.storePtr(value, Address(rdata, offs));
+
+ // A write barrier is needed here for the extremely unlikely case
+ // that the object is allocated in the tenured area - a result of
+ // a GC artifact.
+
+ Label skipBarrier;
+
+ sync();
+
+ RegPtr rowner = rp;
+ if (!structType.isInline_) {
+ rowner = needRef();
+ masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfOwner()),
+ rowner);
+ }
+
+ RegPtr otherScratch = needRef();
+ EmitWasmPostBarrierGuard(masm, Some(rowner), otherScratch, value,
+ &skipBarrier);
+ freeRef(otherScratch);
+
+ if (!structType.isInline_) {
+ freeRef(rowner);
+ }
+
+ freeRef(value);
+
+ // TODO/AnyRef-boxing: With boxed immediates and strings, the write
+ // barrier is going to have to be more complicated.
+ ASSERT_ANYREF_IS_JSOBJECT;
+
+ pushRef(rp); // Save rp across the call
+ RegPtr valueAddr = needRef();
+ masm.computeEffectiveAddress(Address(rdata, offs), valueAddr);
+ if (!emitPostBarrierCall(valueAddr)) { // Consumes valueAddr
+ return false;
+ }
+ popRef(rp); // Restore rp
+ if (!structType.isInline_) {
+ masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rdata);
+ }
+
+ masm.bind(&skipBarrier);
+ break;
+ }
+ default: {
+ MOZ_CRASH("Unexpected field type");
+ }
+ }
+ }
+
+ if (!structType.isInline_) {
+ freeRef(rdata);
+ }
+
+ pushRef(rp);
+
+ return true;
+}
+
+bool BaseCompiler::emitStructGet() {
+ uint32_t typeIndex;
+ uint32_t fieldIndex;
+ Nothing nothing;
+ if (!iter_.readStructGet(&typeIndex, &fieldIndex, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ const StructType& structType = moduleEnv_.types[typeIndex].structType();
+
+ RegPtr rp = popRef();
+
+ Label ok;
+ masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok);
+ trap(Trap::NullPointerDereference);
+ masm.bind(&ok);
+
+ if (!structType.isInline_) {
+ masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rp);
+ }
+
+ uint32_t offs = structType.objectBaseFieldOffset(fieldIndex);
+ switch (structType.fields_[fieldIndex].type.kind()) {
+ case ValType::I32: {
+ RegI32 r = needI32();
+ masm.load32(Address(rp, offs), r);
+ pushI32(r);
+ break;
+ }
+ case ValType::I64: {
+ RegI64 r = needI64();
+ masm.load64(Address(rp, offs), r);
+ pushI64(r);
+ break;
+ }
+ case ValType::F32: {
+ RegF32 r = needF32();
+ masm.loadFloat32(Address(rp, offs), r);
+ pushF32(r);
+ break;
+ }
+ case ValType::F64: {
+ RegF64 r = needF64();
+ masm.loadDouble(Address(rp, offs), r);
+ pushF64(r);
+ break;
+ }
+ case ValType::Ref: {
+ RegPtr r = needRef();
+ masm.loadPtr(Address(rp, offs), r);
+ pushRef(r);
+ break;
+ }
+ default: {
+ MOZ_CRASH("Unexpected field type");
+ }
+ }
+
+ freeRef(rp);
+
+ return true;
+}
+
+bool BaseCompiler::emitStructSet() {
+ uint32_t typeIndex;
+ uint32_t fieldIndex;
+ Nothing nothing;
+ if (!iter_.readStructSet(&typeIndex, &fieldIndex, &nothing, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ const StructType& structType = moduleEnv_.types[typeIndex].structType();
+
+ RegI32 ri;
+ RegI64 rl;
+ RegF32 rf;
+ RegF64 rd;
+ RegPtr rr;
+
+ // Reserve this register early if we will need it so that it is not taken by
+ // rr or rp.
+ RegPtr valueAddr;
+ if (structType.fields_[fieldIndex].type.isReference()) {
+ valueAddr = RegPtr(PreBarrierReg);
+ needRef(valueAddr);
+ }
+
+ switch (structType.fields_[fieldIndex].type.kind()) {
+ case ValType::I32:
+ ri = popI32();
+ break;
+ case ValType::I64:
+ rl = popI64();
+ break;
+ case ValType::F32:
+ rf = popF32();
+ break;
+ case ValType::F64:
+ rd = popF64();
+ break;
+ case ValType::Ref:
+ rr = popRef();
+ break;
+ default:
+ MOZ_CRASH("Unexpected field type");
+ }
+
+ RegPtr rp = popRef();
+
+ Label ok;
+ masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok);
+ trap(Trap::NullPointerDereference);
+ masm.bind(&ok);
+
+ if (!structType.isInline_) {
+ masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rp);
+ }
+
+ uint32_t offs = structType.objectBaseFieldOffset(fieldIndex);
+ switch (structType.fields_[fieldIndex].type.kind()) {
+ case ValType::I32: {
+ masm.store32(ri, Address(rp, offs));
+ freeI32(ri);
+ break;
+ }
+ case ValType::I64: {
+ masm.store64(rl, Address(rp, offs));
+ freeI64(rl);
+ break;
+ }
+ case ValType::F32: {
+ masm.storeFloat32(rf, Address(rp, offs));
+ freeF32(rf);
+ break;
+ }
+ case ValType::F64: {
+ masm.storeDouble(rd, Address(rp, offs));
+ freeF64(rd);
+ break;
+ }
+ case ValType::Ref: {
+ masm.computeEffectiveAddress(Address(rp, offs), valueAddr);
+
+ // Bug 1617908. Ensure that if a TypedObject is not inline, then its
+ // underlying ArrayBuffer also is not inline, or the barrier logic fails.
+ static_assert(InlineTypedObject::MaxInlineBytes >=
+ ArrayBufferObject::MaxInlineBytes);
+
+ // emitBarrieredStore consumes valueAddr
+ if (!emitBarrieredStore(structType.isInline_ ? Some(rp) : Nothing(),
+ valueAddr, rr)) {
+ return false;
+ }
+ freeRef(rr);
+ break;
+ }
+ default: {
+ MOZ_CRASH("Unexpected field type");
+ }
+ }
+
+ freeRef(rp);
+
+ return true;
+}
+
+bool BaseCompiler::emitStructNarrow() {
+ uint32_t lineOrBytecode = readCallSiteLineOrBytecode();
+
+ ValType inputType, outputType;
+ Nothing nothing;
+ if (!iter_.readStructNarrow(&inputType, &outputType, &nothing)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ // struct.narrow validation ensures that these hold.
+
+ MOZ_ASSERT(inputType.isEqRef() ||
+ moduleEnv_.types.isStructType(inputType.refType()));
+ MOZ_ASSERT(outputType.isEqRef() ||
+ moduleEnv_.types.isStructType(outputType.refType()));
+ MOZ_ASSERT_IF(outputType.isEqRef(), inputType.isEqRef());
+
+ // EqRef -> EqRef is a no-op, just leave the value on the stack.
+
+ if (inputType.isEqRef() && outputType.isEqRef()) {
+ return true;
+ }
+
+ RegPtr rp = popRef();
+
+ // Dynamic downcast eqref|(optref T) -> (optref U), leaves rp or null
+ const TypeIdDesc& outputStructTypeId =
+ moduleEnv_.typeIds[outputType.refType().typeIndex()];
+ RegPtr rst = needRef();
+ fr.loadTlsPtr(WasmTlsReg);
+ masm.loadWasmGlobalPtr(outputStructTypeId.globalDataOffset(), rst);
+ pushRef(rst);
+
+ pushRef(rp);
+ return emitInstanceCall(lineOrBytecode, SASigStructNarrow);
+}
+
+#ifdef ENABLE_WASM_SIMD
+
+// Emitter trampolines used by abstracted SIMD operations. Naming here follows
+// the SIMD spec pretty closely.
+
+static void AndV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.bitwiseAndSimd128(rs, rsd);
+}
+
+static void OrV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.bitwiseOrSimd128(rs, rsd);
+}
+
+static void XorV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.bitwiseXorSimd128(rs, rsd);
+}
+
+static void AddI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.addInt8x16(rs, rsd);
+}
+
+static void AddI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.addInt16x8(rs, rsd);
+}
+
+static void AddI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.addInt32x4(rs, rsd);
+}
+
+static void AddF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.addFloat32x4(rs, rsd);
+}
+
+static void AddI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.addInt64x2(rs, rsd);
+}
+
+static void AddF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.addFloat64x2(rs, rsd);
+}
+
+static void AddSatI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.addSatInt8x16(rs, rsd);
+}
+
+static void AddSatUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedAddSatInt8x16(rs, rsd);
+}
+
+static void AddSatI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.addSatInt16x8(rs, rsd);
+}
+
+static void AddSatUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedAddSatInt16x8(rs, rsd);
+}
+
+static void SubI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.subInt8x16(rs, rsd);
+}
+
+static void SubI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.subInt16x8(rs, rsd);
+}
+
+static void SubI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.subInt32x4(rs, rsd);
+}
+
+static void SubF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.subFloat32x4(rs, rsd);
+}
+
+static void SubI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.subInt64x2(rs, rsd);
+}
+
+static void SubF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.subFloat64x2(rs, rsd);
+}
+
+static void SubSatI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.subSatInt8x16(rs, rsd);
+}
+
+static void SubSatUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedSubSatInt8x16(rs, rsd);
+}
+
+static void SubSatI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.subSatInt16x8(rs, rsd);
+}
+
+static void SubSatUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedSubSatInt16x8(rs, rsd);
+}
+
+static void MulI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.mulInt16x8(rs, rsd);
+}
+
+static void MulI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.mulInt32x4(rs, rsd);
+}
+
+static void MulF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.mulFloat32x4(rs, rsd);
+}
+
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void MulI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+ RegV128 temp) {
+ masm.mulInt64x2(rs, rsd, temp);
+}
+# endif
+
+static void MulF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.mulFloat64x2(rs, rsd);
+}
+
+static void DivF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.divFloat32x4(rs, rsd);
+}
+
+static void DivF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.divFloat64x2(rs, rsd);
+}
+
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void MinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+ RegV128 temp1, RegV128 temp2) {
+ masm.minFloat32x4(rs, rsd, temp1, temp2);
+}
+
+static void MinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+ RegV128 temp1, RegV128 temp2) {
+ masm.minFloat64x2(rs, rsd, temp1, temp2);
+}
+
+static void MaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+ RegV128 temp1, RegV128 temp2) {
+ masm.maxFloat32x4(rs, rsd, temp1, temp2);
+}
+
+static void MaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+ RegV128 temp1, RegV128 temp2) {
+ masm.maxFloat64x2(rs, rsd, temp1, temp2);
+}
+
+static void PMinF32x4(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
+ RhsDestOp) {
+ masm.pseudoMinFloat32x4(rsd, rs);
+}
+
+static void PMinF64x2(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
+ RhsDestOp) {
+ masm.pseudoMinFloat64x2(rsd, rs);
+}
+
+static void PMaxF32x4(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
+ RhsDestOp) {
+ masm.pseudoMaxFloat32x4(rsd, rs);
+}
+
+static void PMaxF64x2(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
+ RhsDestOp) {
+ masm.pseudoMaxFloat64x2(rsd, rs);
+}
+# elif defined(JS_CODEGEN_ARM64)
+static void MinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.minFloat32x4(rs, rsd);
+}
+
+static void MinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.minFloat64x2(rs, rsd);
+}
+
+static void MaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.maxFloat32x4(rs, rsd);
+}
+
+static void MaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.maxFloat64x2(rs, rsd);
+}
+
+static void PMinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.pseudoMinFloat32x4(rs, rsd);
+}
+
+static void PMinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.pseudoMinFloat64x2(rs, rsd);
+}
+
+static void PMaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.pseudoMaxFloat32x4(rs, rsd);
+}
+
+static void PMaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.pseudoMaxFloat64x2(rs, rsd);
+}
+# endif
+
+static void DotI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.widenDotInt16x8(rs, rsd);
+}
+
+static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd) {
+ masm.compareInt8x16(cond, rs, rsd);
+}
+
+static void CmpI16x8(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd) {
+ masm.compareInt16x8(cond, rs, rsd);
+}
+
+static void CmpI32x4(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd) {
+ masm.compareInt32x4(cond, rs, rsd);
+}
+
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void CmpUI8x16(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) {
+ masm.unsignedCompareInt8x16(cond, rs, rsd, temp1, temp2);
+}
+
+static void CmpUI16x8(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) {
+ masm.unsignedCompareInt16x8(cond, rs, rsd, temp1, temp2);
+}
+
+static void CmpUI32x4(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) {
+ masm.unsignedCompareInt32x4(cond, rs, rsd, temp1, temp2);
+}
+# else
+static void CmpUI8x16(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd) {
+ masm.compareInt8x16(cond, rs, rsd);
+}
+
+static void CmpUI16x8(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd) {
+ masm.compareInt16x8(cond, rs, rsd);
+}
+
+static void CmpUI32x4(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd) {
+ masm.compareInt32x4(cond, rs, rsd);
+}
+# endif
+
+static void CmpF32x4(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd) {
+ masm.compareFloat32x4(cond, rs, rsd);
+}
+
+static void CmpF64x2(MacroAssembler& masm, Assembler::Condition cond,
+ RegV128 rs, RegV128 rsd) {
+ masm.compareFloat64x2(cond, rs, rsd);
+}
+
+static void NegI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.negInt8x16(rs, rd);
+}
+
+static void NegI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.negInt16x8(rs, rd);
+}
+
+static void NegI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.negInt32x4(rs, rd);
+}
+
+static void NegI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.negInt64x2(rs, rd);
+}
+
+static void NegF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.negFloat32x4(rs, rd);
+}
+
+static void NegF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.negFloat64x2(rs, rd);
+}
+
+static void AbsF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.absFloat32x4(rs, rd);
+}
+
+static void AbsF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.absFloat64x2(rs, rd);
+}
+
+static void SqrtF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.sqrtFloat32x4(rs, rd);
+}
+
+static void SqrtF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.sqrtFloat64x2(rs, rd);
+}
+
+static void CeilF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.ceilFloat32x4(rs, rd);
+}
+
+static void FloorF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.floorFloat32x4(rs, rd);
+}
+
+static void TruncF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.truncFloat32x4(rs, rd);
+}
+
+static void NearestF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.nearestFloat32x4(rs, rd);
+}
+
+static void CeilF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.ceilFloat64x2(rs, rd);
+}
+
+static void FloorF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.floorFloat64x2(rs, rd);
+}
+
+static void TruncF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.truncFloat64x2(rs, rd);
+}
+
+static void NearestF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.nearestFloat64x2(rs, rd);
+}
+
+static void NotV128(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.bitwiseNotSimd128(rs, rd);
+}
+
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp1, RegV128 temp2) {
+ masm.leftShiftInt8x16(rs, rsd, temp1, temp2);
+}
+
+static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp) {
+ masm.leftShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp) {
+ masm.leftShiftInt32x4(rs, rsd, temp);
+}
+
+static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp) {
+ masm.leftShiftInt64x2(rs, rsd, temp);
+}
+
+static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp1, RegV128 temp2) {
+ masm.rightShiftInt8x16(rs, rsd, temp1, temp2);
+}
+
+static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp1, RegV128 temp2) {
+ masm.unsignedRightShiftInt8x16(rs, rsd, temp1, temp2);
+}
+
+static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp) {
+ masm.rightShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp) {
+ masm.unsignedRightShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp) {
+ masm.rightShiftInt32x4(rs, rsd, temp);
+}
+
+static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp) {
+ masm.unsignedRightShiftInt32x4(rs, rsd, temp);
+}
+
+static void ShiftRightUI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegI32 temp) {
+ masm.unsignedRightShiftInt64x2(rs, rsd, temp);
+}
+# elif defined(JS_CODEGEN_ARM64)
+static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd) {
+ masm.leftShiftInt8x16(rs, rsd);
+}
+
+static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd) {
+ masm.leftShiftInt16x8(rs, rsd);
+}
+
+static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd) {
+ masm.leftShiftInt32x4(rs, rsd);
+}
+
+static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd) {
+ masm.leftShiftInt64x2(rs, rsd);
+}
+
+static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegV128 temp) {
+ masm.rightShiftInt8x16(rs, rsd, temp);
+}
+
+static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegV128 temp) {
+ masm.unsignedRightShiftInt8x16(rs, rsd, temp);
+}
+
+static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegV128 temp) {
+ masm.rightShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegV128 temp) {
+ masm.unsignedRightShiftInt16x8(rs, rsd, temp);
+}
+
+static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegV128 temp) {
+ masm.rightShiftInt32x4(rs, rsd, temp);
+}
+
+static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
+ RegV128 temp) {
+ masm.unsignedRightShiftInt32x4(rs, rsd, temp);
+}
+# endif
+
+static void AverageUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedAverageInt8x16(rs, rsd);
+}
+
+static void AverageUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedAverageInt16x8(rs, rsd);
+}
+
+static void MinI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.minInt8x16(rs, rsd);
+}
+
+static void MinUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedMinInt8x16(rs, rsd);
+}
+
+static void MaxI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.maxInt8x16(rs, rsd);
+}
+
+static void MaxUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedMaxInt8x16(rs, rsd);
+}
+
+static void MinI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.minInt16x8(rs, rsd);
+}
+
+static void MinUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedMinInt16x8(rs, rsd);
+}
+
+static void MaxI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.maxInt16x8(rs, rsd);
+}
+
+static void MaxUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedMaxInt16x8(rs, rsd);
+}
+
+static void MinI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.minInt32x4(rs, rsd);
+}
+
+static void MinUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedMinInt32x4(rs, rsd);
+}
+
+static void MaxI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.maxInt32x4(rs, rsd);
+}
+
+static void MaxUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedMaxInt32x4(rs, rsd);
+}
+
+static void NarrowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.narrowInt16x8(rs, rsd);
+}
+
+static void NarrowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedNarrowInt16x8(rs, rsd);
+}
+
+static void NarrowI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.narrowInt32x4(rs, rsd);
+}
+
+static void NarrowUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.unsignedNarrowInt32x4(rs, rsd);
+}
+
+static void WidenLowI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.widenLowInt8x16(rs, rd);
+}
+
+static void WidenHighI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.widenHighInt8x16(rs, rd);
+}
+
+static void WidenLowUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.unsignedWidenLowInt8x16(rs, rd);
+}
+
+static void WidenHighUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.unsignedWidenHighInt8x16(rs, rd);
+}
+
+static void WidenLowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.widenLowInt16x8(rs, rd);
+}
+
+static void WidenHighI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.widenHighInt16x8(rs, rd);
+}
+
+static void WidenLowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.unsignedWidenLowInt16x8(rs, rd);
+}
+
+static void WidenHighUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.unsignedWidenHighInt16x8(rs, rd);
+}
+
+static void AbsI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.absInt8x16(rs, rd);
+}
+
+static void AbsI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.absInt16x8(rs, rd);
+}
+
+static void AbsI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.absInt32x4(rs, rd);
+}
+
+static void ExtractLaneI8x16(MacroAssembler& masm, uint32_t laneIndex,
+ RegV128 rs, RegI32 rd) {
+ masm.extractLaneInt8x16(laneIndex, rs, rd);
+}
+
+static void ExtractLaneUI8x16(MacroAssembler& masm, uint32_t laneIndex,
+ RegV128 rs, RegI32 rd) {
+ masm.unsignedExtractLaneInt8x16(laneIndex, rs, rd);
+}
+
+static void ExtractLaneI16x8(MacroAssembler& masm, uint32_t laneIndex,
+ RegV128 rs, RegI32 rd) {
+ masm.extractLaneInt16x8(laneIndex, rs, rd);
+}
+
+static void ExtractLaneUI16x8(MacroAssembler& masm, uint32_t laneIndex,
+ RegV128 rs, RegI32 rd) {
+ masm.unsignedExtractLaneInt16x8(laneIndex, rs, rd);
+}
+
+static void ExtractLaneI32x4(MacroAssembler& masm, uint32_t laneIndex,
+ RegV128 rs, RegI32 rd) {
+ masm.extractLaneInt32x4(laneIndex, rs, rd);
+}
+
+static void ExtractLaneI64x2(MacroAssembler& masm, uint32_t laneIndex,
+ RegV128 rs, RegI64 rd) {
+ masm.extractLaneInt64x2(laneIndex, rs, rd);
+}
+
+static void ExtractLaneF32x4(MacroAssembler& masm, uint32_t laneIndex,
+ RegV128 rs, RegF32 rd) {
+ masm.extractLaneFloat32x4(laneIndex, rs, rd);
+}
+
+static void ExtractLaneF64x2(MacroAssembler& masm, uint32_t laneIndex,
+ RegV128 rs, RegF64 rd) {
+ masm.extractLaneFloat64x2(laneIndex, rs, rd);
+}
+
+static void ReplaceLaneI8x16(MacroAssembler& masm, uint32_t laneIndex,
+ RegI32 rs, RegV128 rsd) {
+ masm.replaceLaneInt8x16(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneI16x8(MacroAssembler& masm, uint32_t laneIndex,
+ RegI32 rs, RegV128 rsd) {
+ masm.replaceLaneInt16x8(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneI32x4(MacroAssembler& masm, uint32_t laneIndex,
+ RegI32 rs, RegV128 rsd) {
+ masm.replaceLaneInt32x4(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneI64x2(MacroAssembler& masm, uint32_t laneIndex,
+ RegI64 rs, RegV128 rsd) {
+ masm.replaceLaneInt64x2(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneF32x4(MacroAssembler& masm, uint32_t laneIndex,
+ RegF32 rs, RegV128 rsd) {
+ masm.replaceLaneFloat32x4(laneIndex, rs, rsd);
+}
+
+static void ReplaceLaneF64x2(MacroAssembler& masm, uint32_t laneIndex,
+ RegF64 rs, RegV128 rsd) {
+ masm.replaceLaneFloat64x2(laneIndex, rs, rsd);
+}
+
+static void SplatI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rd) {
+ masm.splatX16(rs, rd);
+}
+
+static void SplatI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rd) {
+ masm.splatX8(rs, rd);
+}
+
+static void SplatI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rd) {
+ masm.splatX4(rs, rd);
+}
+
+static void SplatI64x2(MacroAssembler& masm, RegI64 rs, RegV128 rd) {
+ masm.splatX2(rs, rd);
+}
+
+static void SplatF32x4(MacroAssembler& masm, RegF32 rs, RegV128 rd) {
+ masm.splatX4(rs, rd);
+}
+
+static void SplatF64x2(MacroAssembler& masm, RegF64 rs, RegV128 rd) {
+ masm.splatX2(rs, rd);
+}
+
+// This is the same op independent of lanes: it tests for any nonzero bit.
+static void AnyTrue(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+ masm.anyTrueSimd128(rs, rd);
+}
+
+static void AllTrueI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+ masm.allTrueInt8x16(rs, rd);
+}
+
+static void AllTrueI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+ masm.allTrueInt16x8(rs, rd);
+}
+
+static void AllTrueI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+ masm.allTrueInt32x4(rs, rd);
+}
+
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+static void BitmaskI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+ masm.bitmaskInt8x16(rs, rd);
+}
+
+static void BitmaskI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+ masm.bitmaskInt16x8(rs, rd);
+}
+
+static void BitmaskI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd) {
+ masm.bitmaskInt32x4(rs, rd);
+}
+
+static void Swizzle(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
+ RegV128 temp) {
+ masm.swizzleInt8x16(rs, rsd, temp);
+}
+# elif defined(JS_CODEGEN_ARM64)
+static void BitmaskI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd,
+ RegV128 temp) {
+ masm.bitmaskInt8x16(rs, rd, temp);
+}
+
+static void BitmaskI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd,
+ RegV128 temp) {
+ masm.bitmaskInt16x8(rs, rd, temp);
+}
+
+static void BitmaskI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd,
+ RegV128 temp) {
+ masm.bitmaskInt32x4(rs, rd, temp);
+}
+
+static void Swizzle(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+ masm.swizzleInt8x16(rs, rsd);
+}
+# endif
+
+static void ConvertI32x4ToF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.convertInt32x4ToFloat32x4(rs, rd);
+}
+
+static void ConvertUI32x4ToF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.unsignedConvertInt32x4ToFloat32x4(rs, rd);
+}
+
+static void ConvertF32x4ToI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
+ masm.truncSatFloat32x4ToInt32x4(rs, rd);
+}
+
+static void ConvertF32x4ToUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd,
+ RegV128 temp) {
+ masm.unsignedTruncSatFloat32x4ToInt32x4(rs, rd, temp);
+}
+
+template <typename SourceType, typename DestType>
+void BaseCompiler::emitVectorUnop(void (*op)(MacroAssembler& masm,
+ SourceType rs, DestType rd)) {
+ SourceType rs = pop<SourceType>();
+ DestType rd = need<DestType>();
+ op(masm, rs, rd);
+ free(rs);
+ push(rd);
+}
+
+template <typename SourceType, typename DestType, typename TempType>
+void BaseCompiler::emitVectorUnop(void (*op)(MacroAssembler& masm,
+ SourceType rs, DestType rd,
+ TempType temp)) {
+ SourceType rs = pop<SourceType>();
+ DestType rd = need<DestType>();
+ TempType temp = need<TempType>();
+ op(masm, rs, rd, temp);
+ free(rs);
+ free(temp);
+ push(rd);
+}
+
+template <typename SourceType, typename DestType, typename ImmType>
+void BaseCompiler::emitVectorUnop(ImmType immediate,
+ void (*op)(MacroAssembler&, ImmType,
+ SourceType, DestType)) {
+ SourceType rs = pop<SourceType>();
+ DestType rd = need<DestType>();
+ op(masm, immediate, rs, rd);
+ free(rs);
+ push(rd);
+}
+
+template <typename RhsType, typename LhsDestType>
+void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType src,
+ LhsDestType srcDest)) {
+ RhsType rs = pop<RhsType>();
+ LhsDestType rsd = pop<LhsDestType>();
+ op(masm, rs, rsd);
+ free(rs);
+ push(rsd);
+}
+
+template <typename RhsDestType, typename LhsType>
+void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm,
+ RhsDestType src, LhsType srcDest,
+ RhsDestOp)) {
+ RhsDestType rsd = pop<RhsDestType>();
+ LhsType rs = pop<LhsType>();
+ op(masm, rsd, rs, RhsDestOp::True);
+ free(rs);
+ push(rsd);
+}
+
+template <typename RhsType, typename LhsDestType, typename TempType>
+void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType rs,
+ LhsDestType rsd, TempType temp)) {
+ RhsType rs = pop<RhsType>();
+ LhsDestType rsd = pop<LhsDestType>();
+ TempType temp = need<TempType>();
+ op(masm, rs, rsd, temp);
+ free(rs);
+ free(temp);
+ push(rsd);
+}
+
+template <typename RhsType, typename LhsDestType, typename TempType1,
+ typename TempType2>
+void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType rs,
+ LhsDestType rsd, TempType1 temp1,
+ TempType2 temp2)) {
+ RhsType rs = pop<RhsType>();
+ LhsDestType rsd = pop<LhsDestType>();
+ TempType1 temp1 = need<TempType1>();
+ TempType2 temp2 = need<TempType2>();
+ op(masm, rs, rsd, temp1, temp2);
+ free(rs);
+ free(temp1);
+ free(temp2);
+ push(rsd);
+}
+
+template <typename RhsType, typename LhsDestType, typename ImmType>
+void BaseCompiler::emitVectorBinop(ImmType immediate,
+ void (*op)(MacroAssembler&, ImmType, RhsType,
+ LhsDestType)) {
+ RhsType rs = pop<RhsType>();
+ LhsDestType rsd = pop<LhsDestType>();
+ op(masm, immediate, rs, rsd);
+ free(rs);
+ push(rsd);
+}
+
+template <typename RhsType, typename LhsDestType, typename ImmType,
+ typename TempType1, typename TempType2>
+void BaseCompiler::emitVectorBinop(ImmType immediate,
+ void (*op)(MacroAssembler&, ImmType, RhsType,
+ LhsDestType, TempType1 temp1,
+ TempType2 temp2)) {
+ RhsType rs = pop<RhsType>();
+ LhsDestType rsd = pop<LhsDestType>();
+ TempType1 temp1 = need<TempType1>();
+ TempType2 temp2 = need<TempType2>();
+ op(masm, immediate, rs, rsd, temp1, temp2);
+ free(rs);
+ free(temp1);
+ free(temp2);
+ push(rsd);
+}
+
+void BaseCompiler::emitVectorAndNot() {
+ // We want x & ~y but the available operation is ~x & y, so reverse the
+ // operands.
+ RegV128 r, rs;
+ pop2xV128(&r, &rs);
+ masm.bitwiseNotAndSimd128(r, rs);
+ freeV128(r);
+ pushV128(rs);
+}
+
+bool BaseCompiler::emitLoadSplat(Scalar::Type viewType) {
+ // We can implement loadSplat mostly as load + splat because the push of the
+ // result onto the value stack in loadCommon normally will not generate any
+ // code, it will leave the value in a register which we will consume.
+
+ LinearMemoryAddress<Nothing> addr;
+ if (!iter_.readLoadSplat(Scalar::byteSize(viewType), &addr)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ // We use uint types when we can on the general assumption that unsigned loads
+ // might be smaller/faster on some platforms, because no sign extension needs
+ // to be done after the sub-register load.
+
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset());
+ switch (viewType) {
+ case Scalar::Uint8:
+ if (!loadCommon(&access, AccessCheck(), ValType::I32)) {
+ return false;
+ }
+ emitVectorUnop(SplatI8x16);
+ break;
+ case Scalar::Uint16:
+ if (!loadCommon(&access, AccessCheck(), ValType::I32)) {
+ return false;
+ }
+ emitVectorUnop(SplatI16x8);
+ break;
+ case Scalar::Uint32:
+ if (!loadCommon(&access, AccessCheck(), ValType::I32)) {
+ return false;
+ }
+ emitVectorUnop(SplatI32x4);
+ break;
+ case Scalar::Int64:
+ if (!loadCommon(&access, AccessCheck(), ValType::I64)) {
+ return false;
+ }
+ emitVectorUnop(SplatI64x2);
+ break;
+ default:
+ MOZ_CRASH();
+ }
+ return true;
+}
+
+bool BaseCompiler::emitLoadZero(Scalar::Type viewType) {
+ // LoadZero has the structure of LoadSplat
+ LinearMemoryAddress<Nothing> addr;
+ if (!iter_.readLoadSplat(Scalar::byteSize(viewType), &addr)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset());
+ access.setZeroExtendSimd128Load();
+ return loadCommon(&access, AccessCheck(), ValType::V128);
+}
+
+bool BaseCompiler::emitLoadExtend(Scalar::Type viewType) {
+ LinearMemoryAddress<Nothing> addr;
+ if (!iter_.readLoadExtend(&addr)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ MemoryAccessDesc access(Scalar::Int64, addr.align, addr.offset,
+ bytecodeOffset());
+ if (!loadCommon(&access, AccessCheck(), ValType::I64)) {
+ return false;
+ }
+
+ RegI64 rs = popI64();
+ RegV128 rd = needV128();
+ masm.moveGPR64ToDouble(rs, rd);
+ switch (viewType) {
+ case Scalar::Int8:
+ masm.widenLowInt8x16(rd, rd);
+ break;
+ case Scalar::Uint8:
+ masm.unsignedWidenLowInt8x16(rd, rd);
+ break;
+ case Scalar::Int16:
+ masm.widenLowInt16x8(rd, rd);
+ break;
+ case Scalar::Uint16:
+ masm.unsignedWidenLowInt16x8(rd, rd);
+ break;
+ case Scalar::Int32:
+ masm.widenLowInt32x4(rd, rd);
+ break;
+ case Scalar::Uint32:
+ masm.unsignedWidenLowInt32x4(rd, rd);
+ break;
+ default:
+ MOZ_CRASH();
+ }
+ freeI64(rs);
+ pushV128(rd);
+
+ return true;
+}
+
+bool BaseCompiler::emitBitselect() {
+ Nothing unused_a, unused_b, unused_c;
+
+ if (!iter_.readVectorSelect(&unused_a, &unused_b, &unused_c)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ RegV128 rs3 = popV128(); // Control
+ RegV128 rs2 = popV128(); // 'false' vector
+ RegV128 rs1 = popV128(); // 'true' vector
+
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ // On x86, certain register assignments will result in more compact code: we
+ // want output=rs1 and tmp=rs3. Attend to this after we see what other
+ // platforms want/need.
+ RegV128 tmp = needV128(); // Distinguished tmp, for now
+ masm.bitwiseSelectSimd128(rs3, rs1, rs2, rs1, tmp);
+ freeV128(rs2);
+ freeV128(rs3);
+ freeV128(tmp);
+ pushV128(rs1);
+# elif defined(JS_CODEGEN_ARM64)
+ // Note register conventions differ significantly from x86.
+ masm.bitwiseSelectSimd128(rs1, rs2, rs3);
+ freeV128(rs1);
+ freeV128(rs2);
+ pushV128(rs3);
+# else
+ MOZ_CRASH("NYI");
+# endif
+ return true;
+}
+
+bool BaseCompiler::emitVectorShuffle() {
+ Nothing unused_a, unused_b;
+ V128 shuffleMask;
+
+ if (!iter_.readVectorShuffle(&unused_a, &unused_b, &shuffleMask)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+ RegV128 rd, rs;
+ pop2xV128(&rd, &rs);
+ masm.shuffleInt8x16(shuffleMask.bytes, rs, rd);
+ freeV128(rs);
+ pushV128(rd);
+
+ return true;
+}
+
+// Signed case must be scalarized on x86/x64 and requires CL.
+// Signed and unsigned cases must be scalarized on ARM64.
+bool BaseCompiler::emitVectorShiftRightI64x2(bool isUnsigned) {
+ Nothing unused_a, unused_b;
+
+ if (!iter_.readVectorShift(&unused_a, &unused_b)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ if (isUnsigned) {
+ emitVectorBinop(ShiftRightUI64x2);
+ return true;
+ }
+# endif
+
+# if defined(JS_CODEGEN_X86)
+ needI32(specific_.ecx);
+ RegI32 count = popI32ToSpecific(specific_.ecx);
+# elif defined(JS_CODEGEN_X64)
+ RegI32 count;
+ if (Assembler::HasBMI2()) {
+ count = popI32();
+ } else {
+ needI32(specific_.ecx);
+ count = popI32ToSpecific(specific_.ecx);
+ }
+# elif defined(JS_CODEGEN_ARM64)
+ RegI32 count = popI32();
+# endif
+ RegV128 lhsDest = popV128();
+ RegI64 tmp = needI64();
+ masm.and32(Imm32(63), count);
+ masm.extractLaneInt64x2(0, lhsDest, tmp);
+ if (isUnsigned) {
+ masm.rshift64(count, tmp);
+ } else {
+ masm.rshift64Arithmetic(count, tmp);
+ }
+ masm.replaceLaneInt64x2(0, tmp, lhsDest);
+ masm.extractLaneInt64x2(1, lhsDest, tmp);
+ if (isUnsigned) {
+ masm.rshift64(count, tmp);
+ } else {
+ masm.rshift64Arithmetic(count, tmp);
+ }
+ masm.replaceLaneInt64x2(1, tmp, lhsDest);
+ freeI64(tmp);
+ freeI32(count);
+ pushV128(lhsDest);
+
+ return true;
+}
+
+// Must be scalarized on ARM64.
+bool BaseCompiler::emitVectorMulI64x2() {
+ Nothing unused_a, unused_b;
+
+ if (!iter_.readBinary(ValType::V128, &unused_a, &unused_b)) {
+ return false;
+ }
+
+ if (deadCode_) {
+ return true;
+ }
+
+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+ emitVectorBinop(MulI64x2);
+# elif defined(JS_CODEGEN_ARM64)
+ RegV128 r, rs;
+ pop2xV128(&r, &rs);
+ RegI64 temp1 = needI64();
+ RegI64 temp2 = needI64();
+ masm.extractLaneInt64x2(0, r, temp1);
+ masm.extractLaneInt64x2(0, rs, temp2);
+ masm.mul64(temp2, temp1, Register::Invalid());
+ masm.replaceLaneInt64x2(0, temp1, r);
+ masm.extractLaneInt64x2(1, r, temp1);
+ masm.extractLaneInt64x2(1, rs, temp2);
+ masm.mul64(temp2, temp1, Register::Invalid());
+ masm.replaceLaneInt64x2(1, temp1, r);
+ freeI64(temp1);
+ freeI64(temp2);
+ freeV128(rs);
+ pushV128(r);
+# else
+ MOZ_CRASH("NYI");
+# endif
+
+ return true;
+}
+#endif
+
+bool BaseCompiler::emitBody() {
+ MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isSome());
+
+ if (!iter_.readFunctionStart(func_.index)) {
+ return false;
+ }
+
+ initControl(controlItem(), ResultType::Empty());
+
+ for (;;) {
+ Nothing unused_a, unused_b;
+
+#ifdef DEBUG
+ performRegisterLeakCheck();
+ assertStackInvariants();
+#endif
+
+#define dispatchBinary(doEmit, type) \
+ iter_.readBinary(type, &unused_a, &unused_b) && \
+ (deadCode_ || (doEmit(), true))
+
+#define dispatchUnary(doEmit, type) \
+ iter_.readUnary(type, &unused_a) && (deadCode_ || (doEmit(), true))
+
+#define dispatchComparison(doEmit, operandType, compareOp) \
+ iter_.readComparison(operandType, &unused_a, &unused_b) && \
+ (deadCode_ || (doEmit(compareOp, operandType), true))
+
+#define dispatchConversion(doEmit, inType, outType) \
+ iter_.readConversion(inType, outType, &unused_a) && \
+ (deadCode_ || (doEmit(), true))
+
+#define dispatchConversionOOM(doEmit, inType, outType) \
+ iter_.readConversion(inType, outType, &unused_a) && (deadCode_ || doEmit())
+
+#define dispatchCalloutConversionOOM(doEmit, symbol, inType, outType) \
+ iter_.readConversion(inType, outType, &unused_a) && \
+ (deadCode_ || doEmit(symbol, inType, outType))
+
+#define dispatchIntDivCallout(doEmit, symbol, type) \
+ iter_.readBinary(type, &unused_a, &unused_b) && \
+ (deadCode_ || doEmit(symbol, type))
+
+#define dispatchVectorBinary(op) \
+ iter_.readBinary(ValType::V128, &unused_a, &unused_b) && \
+ (deadCode_ || (emitVectorBinop(op), true))
+
+#define dispatchVectorUnary(op) \
+ iter_.readUnary(ValType::V128, &unused_a) && \
+ (deadCode_ || (emitVectorUnop(op), true))
+
+#define dispatchVectorComparison(op, compareOp) \
+ iter_.readBinary(ValType::V128, &unused_a, &unused_b) && \
+ (deadCode_ || (emitVectorBinop(compareOp, op), true))
+
+#define dispatchVectorVariableShift(op) \
+ iter_.readVectorShift(&unused_a, &unused_b) && \
+ (deadCode_ || (emitVectorBinop(op), true))
+
+#define dispatchExtractLane(op, outType, laneLimit) \
+ iter_.readExtractLane(outType, laneLimit, &laneIndex, &unused_a) && \
+ (deadCode_ || (emitVectorUnop(laneIndex, op), true))
+
+#define dispatchReplaceLane(op, inType, laneLimit) \
+ iter_.readReplaceLane(inType, laneLimit, &laneIndex, &unused_a, \
+ &unused_b) && \
+ (deadCode_ || (emitVectorBinop(laneIndex, op), true))
+
+#define dispatchSplat(op, inType) \
+ iter_.readConversion(inType, ValType::V128, &unused_a) && \
+ (deadCode_ || (emitVectorUnop(op), true))
+
+#define dispatchVectorReduction(op) \
+ iter_.readConversion(ValType::V128, ValType::I32, &unused_a) && \
+ (deadCode_ || (emitVectorUnop(op), true))
+
+#ifdef DEBUG
+ // Check that the number of ref-typed entries in the operand stack matches
+ // reality.
+# define CHECK_POINTER_COUNT \
+ do { \
+ MOZ_ASSERT(countMemRefsOnStk() == stackMapGenerator_.memRefsOnStk); \
+ } while (0)
+#else
+# define CHECK_POINTER_COUNT \
+ do { \
+ } while (0)
+#endif
+
+#ifdef ENABLE_WASM_SIMD_EXPERIMENTAL
+# define CHECK_SIMD_EXPERIMENTAL() (void)(0)
+#else
+# define CHECK_SIMD_EXPERIMENTAL() break
+#endif
+
+#define CHECK(E) \
+ if (!(E)) return false
+#define NEXT() \
+ { \
+ CHECK_POINTER_COUNT; \
+ continue; \
+ }
+#define CHECK_NEXT(E) \
+ if (!(E)) return false; \
+ { \
+ CHECK_POINTER_COUNT; \
+ continue; \
+ }
+
+ CHECK(stk_.reserve(stk_.length() + MaxPushesPerOpcode));
+
+ OpBytes op;
+ CHECK(iter_.readOp(&op));
+
+ // When compilerEnv_.debugEnabled(), every operator has breakpoint site but
+ // Op::End.
+ if (compilerEnv_.debugEnabled() && op.b0 != (uint16_t)Op::End) {
+ // TODO sync only registers that can be clobbered by the exit
+ // prologue/epilogue or disable these registers for use in
+ // baseline compiler when compilerEnv_.debugEnabled() is set.
+ sync();
+
+ insertBreakablePoint(CallSiteDesc::Breakpoint);
+ if (!createStackMap("debug: per insn")) {
+ return false;
+ }
+ }
+
+ // Going below framePushedAtEntryToBody would imply that we've
+ // popped off the machine stack, part of the frame created by
+ // beginFunction().
+ MOZ_ASSERT(masm.framePushed() >=
+ stackMapGenerator_.framePushedAtEntryToBody.value());
+
+ // At this point we're definitely not generating code for a function call.
+ MOZ_ASSERT(
+ stackMapGenerator_.framePushedExcludingOutboundCallArgs.isNothing());
+
+ switch (op.b0) {
+ case uint16_t(Op::End):
+ if (!emitEnd()) {
+ return false;
+ }
+ if (iter_.controlStackEmpty()) {
+ return true;
+ }
+ NEXT();
+
+ // Control opcodes
+ case uint16_t(Op::Nop):
+ CHECK_NEXT(iter_.readNop());
+ case uint16_t(Op::Drop):
+ CHECK_NEXT(emitDrop());
+ case uint16_t(Op::Block):
+ CHECK_NEXT(emitBlock());
+ case uint16_t(Op::Loop):
+ CHECK_NEXT(emitLoop());
+ case uint16_t(Op::If):
+ CHECK_NEXT(emitIf());
+ case uint16_t(Op::Else):
+ CHECK_NEXT(emitElse());
+#ifdef ENABLE_WASM_EXCEPTIONS
+ case uint16_t(Op::Try):
+ if (!moduleEnv_.exceptionsEnabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ CHECK_NEXT(emitTry());
+ case uint16_t(Op::Catch):
+ if (!moduleEnv_.exceptionsEnabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ CHECK_NEXT(emitCatch());
+ case uint16_t(Op::Throw):
+ if (!moduleEnv_.exceptionsEnabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ CHECK_NEXT(emitThrow());
+#endif
+ case uint16_t(Op::Br):
+ CHECK_NEXT(emitBr());
+ case uint16_t(Op::BrIf):
+ CHECK_NEXT(emitBrIf());
+ case uint16_t(Op::BrTable):
+ CHECK_NEXT(emitBrTable());
+ case uint16_t(Op::Return):
+ CHECK_NEXT(emitReturn());
+ case uint16_t(Op::Unreachable):
+ CHECK(iter_.readUnreachable());
+ if (!deadCode_) {
+ trap(Trap::Unreachable);
+ deadCode_ = true;
+ }
+ NEXT();
+
+ // Calls
+ case uint16_t(Op::Call):
+ CHECK_NEXT(emitCall());
+ case uint16_t(Op::CallIndirect):
+ CHECK_NEXT(emitCallIndirect());
+
+ // Locals and globals
+ case uint16_t(Op::GetLocal):
+ CHECK_NEXT(emitGetLocal());
+ case uint16_t(Op::SetLocal):
+ CHECK_NEXT(emitSetLocal());
+ case uint16_t(Op::TeeLocal):
+ CHECK_NEXT(emitTeeLocal());
+ case uint16_t(Op::GetGlobal):
+ CHECK_NEXT(emitGetGlobal());
+ case uint16_t(Op::SetGlobal):
+ CHECK_NEXT(emitSetGlobal());
+#ifdef ENABLE_WASM_REFTYPES
+ case uint16_t(Op::TableGet):
+ CHECK_NEXT(emitTableGet());
+ case uint16_t(Op::TableSet):
+ CHECK_NEXT(emitTableSet());
+#endif
+
+ // Select
+ case uint16_t(Op::SelectNumeric):
+ CHECK_NEXT(emitSelect(/*typed*/ false));
+ case uint16_t(Op::SelectTyped):
+ if (!moduleEnv_.refTypesEnabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ CHECK_NEXT(emitSelect(/*typed*/ true));
+
+ // I32
+ case uint16_t(Op::I32Const): {
+ int32_t i32;
+ CHECK(iter_.readI32Const(&i32));
+ if (!deadCode_) {
+ pushI32(i32);
+ }
+ NEXT();
+ }
+ case uint16_t(Op::I32Add):
+ CHECK_NEXT(dispatchBinary(emitAddI32, ValType::I32));
+ case uint16_t(Op::I32Sub):
+ CHECK_NEXT(dispatchBinary(emitSubtractI32, ValType::I32));
+ case uint16_t(Op::I32Mul):
+ CHECK_NEXT(dispatchBinary(emitMultiplyI32, ValType::I32));
+ case uint16_t(Op::I32DivS):
+ CHECK_NEXT(dispatchBinary(emitQuotientI32, ValType::I32));
+ case uint16_t(Op::I32DivU):
+ CHECK_NEXT(dispatchBinary(emitQuotientU32, ValType::I32));
+ case uint16_t(Op::I32RemS):
+ CHECK_NEXT(dispatchBinary(emitRemainderI32, ValType::I32));
+ case uint16_t(Op::I32RemU):
+ CHECK_NEXT(dispatchBinary(emitRemainderU32, ValType::I32));
+ case uint16_t(Op::I32Eqz):
+ CHECK_NEXT(dispatchConversion(emitEqzI32, ValType::I32, ValType::I32));
+ case uint16_t(Op::I32TruncSF32):
+ CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI32<0>, ValType::F32,
+ ValType::I32));
+ case uint16_t(Op::I32TruncUF32):
+ CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI32<TRUNC_UNSIGNED>,
+ ValType::F32, ValType::I32));
+ case uint16_t(Op::I32TruncSF64):
+ CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI32<0>, ValType::F64,
+ ValType::I32));
+ case uint16_t(Op::I32TruncUF64):
+ CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI32<TRUNC_UNSIGNED>,
+ ValType::F64, ValType::I32));
+ case uint16_t(Op::I32WrapI64):
+ CHECK_NEXT(
+ dispatchConversion(emitWrapI64ToI32, ValType::I64, ValType::I32));
+ case uint16_t(Op::I32ReinterpretF32):
+ CHECK_NEXT(dispatchConversion(emitReinterpretF32AsI32, ValType::F32,
+ ValType::I32));
+ case uint16_t(Op::I32Clz):
+ CHECK_NEXT(dispatchUnary(emitClzI32, ValType::I32));
+ case uint16_t(Op::I32Ctz):
+ CHECK_NEXT(dispatchUnary(emitCtzI32, ValType::I32));
+ case uint16_t(Op::I32Popcnt):
+ CHECK_NEXT(dispatchUnary(emitPopcntI32, ValType::I32));
+ case uint16_t(Op::I32Or):
+ CHECK_NEXT(dispatchBinary(emitOrI32, ValType::I32));
+ case uint16_t(Op::I32And):
+ CHECK_NEXT(dispatchBinary(emitAndI32, ValType::I32));
+ case uint16_t(Op::I32Xor):
+ CHECK_NEXT(dispatchBinary(emitXorI32, ValType::I32));
+ case uint16_t(Op::I32Shl):
+ CHECK_NEXT(dispatchBinary(emitShlI32, ValType::I32));
+ case uint16_t(Op::I32ShrS):
+ CHECK_NEXT(dispatchBinary(emitShrI32, ValType::I32));
+ case uint16_t(Op::I32ShrU):
+ CHECK_NEXT(dispatchBinary(emitShrU32, ValType::I32));
+ case uint16_t(Op::I32Load8S):
+ CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int8));
+ case uint16_t(Op::I32Load8U):
+ CHECK_NEXT(emitLoad(ValType::I32, Scalar::Uint8));
+ case uint16_t(Op::I32Load16S):
+ CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int16));
+ case uint16_t(Op::I32Load16U):
+ CHECK_NEXT(emitLoad(ValType::I32, Scalar::Uint16));
+ case uint16_t(Op::I32Load):
+ CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int32));
+ case uint16_t(Op::I32Store8):
+ CHECK_NEXT(emitStore(ValType::I32, Scalar::Int8));
+ case uint16_t(Op::I32Store16):
+ CHECK_NEXT(emitStore(ValType::I32, Scalar::Int16));
+ case uint16_t(Op::I32Store):
+ CHECK_NEXT(emitStore(ValType::I32, Scalar::Int32));
+ case uint16_t(Op::I32Rotr):
+ CHECK_NEXT(dispatchBinary(emitRotrI32, ValType::I32));
+ case uint16_t(Op::I32Rotl):
+ CHECK_NEXT(dispatchBinary(emitRotlI32, ValType::I32));
+
+ // I64
+ case uint16_t(Op::I64Const): {
+ int64_t i64;
+ CHECK(iter_.readI64Const(&i64));
+ if (!deadCode_) {
+ pushI64(i64);
+ }
+ NEXT();
+ }
+ case uint16_t(Op::I64Add):
+ CHECK_NEXT(dispatchBinary(emitAddI64, ValType::I64));
+ case uint16_t(Op::I64Sub):
+ CHECK_NEXT(dispatchBinary(emitSubtractI64, ValType::I64));
+ case uint16_t(Op::I64Mul):
+ CHECK_NEXT(dispatchBinary(emitMultiplyI64, ValType::I64));
+ case uint16_t(Op::I64DivS):
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+ CHECK_NEXT(dispatchIntDivCallout(
+ emitDivOrModI64BuiltinCall, SymbolicAddress::DivI64, ValType::I64));
+#else
+ CHECK_NEXT(dispatchBinary(emitQuotientI64, ValType::I64));
+#endif
+ case uint16_t(Op::I64DivU):
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+ CHECK_NEXT(dispatchIntDivCallout(emitDivOrModI64BuiltinCall,
+ SymbolicAddress::UDivI64,
+ ValType::I64));
+#else
+ CHECK_NEXT(dispatchBinary(emitQuotientU64, ValType::I64));
+#endif
+ case uint16_t(Op::I64RemS):
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+ CHECK_NEXT(dispatchIntDivCallout(
+ emitDivOrModI64BuiltinCall, SymbolicAddress::ModI64, ValType::I64));
+#else
+ CHECK_NEXT(dispatchBinary(emitRemainderI64, ValType::I64));
+#endif
+ case uint16_t(Op::I64RemU):
+#ifdef RABALDR_INT_DIV_I64_CALLOUT
+ CHECK_NEXT(dispatchIntDivCallout(emitDivOrModI64BuiltinCall,
+ SymbolicAddress::UModI64,
+ ValType::I64));
+#else
+ CHECK_NEXT(dispatchBinary(emitRemainderU64, ValType::I64));
+#endif
+ case uint16_t(Op::I64TruncSF32):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ CHECK_NEXT(
+ dispatchCalloutConversionOOM(emitConvertFloatingToInt64Callout,
+ SymbolicAddress::TruncateDoubleToInt64,
+ ValType::F32, ValType::I64));
+#else
+ CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI64<0>, ValType::F32,
+ ValType::I64));
+#endif
+ case uint16_t(Op::I64TruncUF32):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertFloatingToInt64Callout,
+ SymbolicAddress::TruncateDoubleToUint64, ValType::F32,
+ ValType::I64));
+#else
+ CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI64<TRUNC_UNSIGNED>,
+ ValType::F32, ValType::I64));
+#endif
+ case uint16_t(Op::I64TruncSF64):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ CHECK_NEXT(
+ dispatchCalloutConversionOOM(emitConvertFloatingToInt64Callout,
+ SymbolicAddress::TruncateDoubleToInt64,
+ ValType::F64, ValType::I64));
+#else
+ CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI64<0>, ValType::F64,
+ ValType::I64));
+#endif
+ case uint16_t(Op::I64TruncUF64):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertFloatingToInt64Callout,
+ SymbolicAddress::TruncateDoubleToUint64, ValType::F64,
+ ValType::I64));
+#else
+ CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI64<TRUNC_UNSIGNED>,
+ ValType::F64, ValType::I64));
+#endif
+ case uint16_t(Op::I64ExtendSI32):
+ CHECK_NEXT(
+ dispatchConversion(emitExtendI32ToI64, ValType::I32, ValType::I64));
+ case uint16_t(Op::I64ExtendUI32):
+ CHECK_NEXT(
+ dispatchConversion(emitExtendU32ToI64, ValType::I32, ValType::I64));
+ case uint16_t(Op::I64ReinterpretF64):
+ CHECK_NEXT(dispatchConversion(emitReinterpretF64AsI64, ValType::F64,
+ ValType::I64));
+ case uint16_t(Op::I64Or):
+ CHECK_NEXT(dispatchBinary(emitOrI64, ValType::I64));
+ case uint16_t(Op::I64And):
+ CHECK_NEXT(dispatchBinary(emitAndI64, ValType::I64));
+ case uint16_t(Op::I64Xor):
+ CHECK_NEXT(dispatchBinary(emitXorI64, ValType::I64));
+ case uint16_t(Op::I64Shl):
+ CHECK_NEXT(dispatchBinary(emitShlI64, ValType::I64));
+ case uint16_t(Op::I64ShrS):
+ CHECK_NEXT(dispatchBinary(emitShrI64, ValType::I64));
+ case uint16_t(Op::I64ShrU):
+ CHECK_NEXT(dispatchBinary(emitShrU64, ValType::I64));
+ case uint16_t(Op::I64Rotr):
+ CHECK_NEXT(dispatchBinary(emitRotrI64, ValType::I64));
+ case uint16_t(Op::I64Rotl):
+ CHECK_NEXT(dispatchBinary(emitRotlI64, ValType::I64));
+ case uint16_t(Op::I64Clz):
+ CHECK_NEXT(dispatchUnary(emitClzI64, ValType::I64));
+ case uint16_t(Op::I64Ctz):
+ CHECK_NEXT(dispatchUnary(emitCtzI64, ValType::I64));
+ case uint16_t(Op::I64Popcnt):
+ CHECK_NEXT(dispatchUnary(emitPopcntI64, ValType::I64));
+ case uint16_t(Op::I64Eqz):
+ CHECK_NEXT(dispatchConversion(emitEqzI64, ValType::I64, ValType::I32));
+ case uint16_t(Op::I64Load8S):
+ CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int8));
+ case uint16_t(Op::I64Load16S):
+ CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int16));
+ case uint16_t(Op::I64Load32S):
+ CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int32));
+ case uint16_t(Op::I64Load8U):
+ CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint8));
+ case uint16_t(Op::I64Load16U):
+ CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint16));
+ case uint16_t(Op::I64Load32U):
+ CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint32));
+ case uint16_t(Op::I64Load):
+ CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int64));
+ case uint16_t(Op::I64Store8):
+ CHECK_NEXT(emitStore(ValType::I64, Scalar::Int8));
+ case uint16_t(Op::I64Store16):
+ CHECK_NEXT(emitStore(ValType::I64, Scalar::Int16));
+ case uint16_t(Op::I64Store32):
+ CHECK_NEXT(emitStore(ValType::I64, Scalar::Int32));
+ case uint16_t(Op::I64Store):
+ CHECK_NEXT(emitStore(ValType::I64, Scalar::Int64));
+
+ // F32
+ case uint16_t(Op::F32Const): {
+ float f32;
+ CHECK(iter_.readF32Const(&f32));
+ if (!deadCode_) {
+ pushF32(f32);
+ }
+ NEXT();
+ }
+ case uint16_t(Op::F32Add):
+ CHECK_NEXT(dispatchBinary(emitAddF32, ValType::F32));
+ case uint16_t(Op::F32Sub):
+ CHECK_NEXT(dispatchBinary(emitSubtractF32, ValType::F32));
+ case uint16_t(Op::F32Mul):
+ CHECK_NEXT(dispatchBinary(emitMultiplyF32, ValType::F32));
+ case uint16_t(Op::F32Div):
+ CHECK_NEXT(dispatchBinary(emitDivideF32, ValType::F32));
+ case uint16_t(Op::F32Min):
+ CHECK_NEXT(dispatchBinary(emitMinF32, ValType::F32));
+ case uint16_t(Op::F32Max):
+ CHECK_NEXT(dispatchBinary(emitMaxF32, ValType::F32));
+ case uint16_t(Op::F32Neg):
+ CHECK_NEXT(dispatchUnary(emitNegateF32, ValType::F32));
+ case uint16_t(Op::F32Abs):
+ CHECK_NEXT(dispatchUnary(emitAbsF32, ValType::F32));
+ case uint16_t(Op::F32Sqrt):
+ CHECK_NEXT(dispatchUnary(emitSqrtF32, ValType::F32));
+ case uint16_t(Op::F32Ceil):
+ CHECK_NEXT(
+ emitUnaryMathBuiltinCall(SymbolicAddress::CeilF, ValType::F32));
+ case uint16_t(Op::F32Floor):
+ CHECK_NEXT(
+ emitUnaryMathBuiltinCall(SymbolicAddress::FloorF, ValType::F32));
+ case uint16_t(Op::F32DemoteF64):
+ CHECK_NEXT(dispatchConversion(emitConvertF64ToF32, ValType::F64,
+ ValType::F32));
+ case uint16_t(Op::F32ConvertSI32):
+ CHECK_NEXT(dispatchConversion(emitConvertI32ToF32, ValType::I32,
+ ValType::F32));
+ case uint16_t(Op::F32ConvertUI32):
+ CHECK_NEXT(dispatchConversion(emitConvertU32ToF32, ValType::I32,
+ ValType::F32));
+ case uint16_t(Op::F32ConvertSI64):
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertInt64ToFloatingCallout, SymbolicAddress::Int64ToFloat32,
+ ValType::I64, ValType::F32));
+#else
+ CHECK_NEXT(dispatchConversion(emitConvertI64ToF32, ValType::I64,
+ ValType::F32));
+#endif
+ case uint16_t(Op::F32ConvertUI64):
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertInt64ToFloatingCallout, SymbolicAddress::Uint64ToFloat32,
+ ValType::I64, ValType::F32));
+#else
+ CHECK_NEXT(dispatchConversion(emitConvertU64ToF32, ValType::I64,
+ ValType::F32));
+#endif
+ case uint16_t(Op::F32ReinterpretI32):
+ CHECK_NEXT(dispatchConversion(emitReinterpretI32AsF32, ValType::I32,
+ ValType::F32));
+ case uint16_t(Op::F32Load):
+ CHECK_NEXT(emitLoad(ValType::F32, Scalar::Float32));
+ case uint16_t(Op::F32Store):
+ CHECK_NEXT(emitStore(ValType::F32, Scalar::Float32));
+ case uint16_t(Op::F32CopySign):
+ CHECK_NEXT(dispatchBinary(emitCopysignF32, ValType::F32));
+ case uint16_t(Op::F32Nearest):
+ CHECK_NEXT(emitUnaryMathBuiltinCall(SymbolicAddress::NearbyIntF,
+ ValType::F32));
+ case uint16_t(Op::F32Trunc):
+ CHECK_NEXT(
+ emitUnaryMathBuiltinCall(SymbolicAddress::TruncF, ValType::F32));
+
+ // F64
+ case uint16_t(Op::F64Const): {
+ double f64;
+ CHECK(iter_.readF64Const(&f64));
+ if (!deadCode_) {
+ pushF64(f64);
+ }
+ NEXT();
+ }
+ case uint16_t(Op::F64Add):
+ CHECK_NEXT(dispatchBinary(emitAddF64, ValType::F64));
+ case uint16_t(Op::F64Sub):
+ CHECK_NEXT(dispatchBinary(emitSubtractF64, ValType::F64));
+ case uint16_t(Op::F64Mul):
+ CHECK_NEXT(dispatchBinary(emitMultiplyF64, ValType::F64));
+ case uint16_t(Op::F64Div):
+ CHECK_NEXT(dispatchBinary(emitDivideF64, ValType::F64));
+ case uint16_t(Op::F64Min):
+ CHECK_NEXT(dispatchBinary(emitMinF64, ValType::F64));
+ case uint16_t(Op::F64Max):
+ CHECK_NEXT(dispatchBinary(emitMaxF64, ValType::F64));
+ case uint16_t(Op::F64Neg):
+ CHECK_NEXT(dispatchUnary(emitNegateF64, ValType::F64));
+ case uint16_t(Op::F64Abs):
+ CHECK_NEXT(dispatchUnary(emitAbsF64, ValType::F64));
+ case uint16_t(Op::F64Sqrt):
+ CHECK_NEXT(dispatchUnary(emitSqrtF64, ValType::F64));
+ case uint16_t(Op::F64Ceil):
+ CHECK_NEXT(
+ emitUnaryMathBuiltinCall(SymbolicAddress::CeilD, ValType::F64));
+ case uint16_t(Op::F64Floor):
+ CHECK_NEXT(
+ emitUnaryMathBuiltinCall(SymbolicAddress::FloorD, ValType::F64));
+ case uint16_t(Op::F64PromoteF32):
+ CHECK_NEXT(dispatchConversion(emitConvertF32ToF64, ValType::F32,
+ ValType::F64));
+ case uint16_t(Op::F64ConvertSI32):
+ CHECK_NEXT(dispatchConversion(emitConvertI32ToF64, ValType::I32,
+ ValType::F64));
+ case uint16_t(Op::F64ConvertUI32):
+ CHECK_NEXT(dispatchConversion(emitConvertU32ToF64, ValType::I32,
+ ValType::F64));
+ case uint16_t(Op::F64ConvertSI64):
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertInt64ToFloatingCallout, SymbolicAddress::Int64ToDouble,
+ ValType::I64, ValType::F64));
+#else
+ CHECK_NEXT(dispatchConversion(emitConvertI64ToF64, ValType::I64,
+ ValType::F64));
+#endif
+ case uint16_t(Op::F64ConvertUI64):
+#ifdef RABALDR_I64_TO_FLOAT_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertInt64ToFloatingCallout, SymbolicAddress::Uint64ToDouble,
+ ValType::I64, ValType::F64));
+#else
+ CHECK_NEXT(dispatchConversion(emitConvertU64ToF64, ValType::I64,
+ ValType::F64));
+#endif
+ case uint16_t(Op::F64Load):
+ CHECK_NEXT(emitLoad(ValType::F64, Scalar::Float64));
+ case uint16_t(Op::F64Store):
+ CHECK_NEXT(emitStore(ValType::F64, Scalar::Float64));
+ case uint16_t(Op::F64ReinterpretI64):
+ CHECK_NEXT(dispatchConversion(emitReinterpretI64AsF64, ValType::I64,
+ ValType::F64));
+ case uint16_t(Op::F64CopySign):
+ CHECK_NEXT(dispatchBinary(emitCopysignF64, ValType::F64));
+ case uint16_t(Op::F64Nearest):
+ CHECK_NEXT(emitUnaryMathBuiltinCall(SymbolicAddress::NearbyIntD,
+ ValType::F64));
+ case uint16_t(Op::F64Trunc):
+ CHECK_NEXT(
+ emitUnaryMathBuiltinCall(SymbolicAddress::TruncD, ValType::F64));
+
+ // Comparisons
+ case uint16_t(Op::I32Eq):
+ CHECK_NEXT(
+ dispatchComparison(emitCompareI32, ValType::I32, Assembler::Equal));
+ case uint16_t(Op::I32Ne):
+ CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+ Assembler::NotEqual));
+ case uint16_t(Op::I32LtS):
+ CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+ Assembler::LessThan));
+ case uint16_t(Op::I32LeS):
+ CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+ Assembler::LessThanOrEqual));
+ case uint16_t(Op::I32GtS):
+ CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+ Assembler::GreaterThan));
+ case uint16_t(Op::I32GeS):
+ CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+ Assembler::GreaterThanOrEqual));
+ case uint16_t(Op::I32LtU):
+ CHECK_NEXT(
+ dispatchComparison(emitCompareI32, ValType::I32, Assembler::Below));
+ case uint16_t(Op::I32LeU):
+ CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+ Assembler::BelowOrEqual));
+ case uint16_t(Op::I32GtU):
+ CHECK_NEXT(
+ dispatchComparison(emitCompareI32, ValType::I32, Assembler::Above));
+ case uint16_t(Op::I32GeU):
+ CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32,
+ Assembler::AboveOrEqual));
+ case uint16_t(Op::I64Eq):
+ CHECK_NEXT(
+ dispatchComparison(emitCompareI64, ValType::I64, Assembler::Equal));
+ case uint16_t(Op::I64Ne):
+ CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+ Assembler::NotEqual));
+ case uint16_t(Op::I64LtS):
+ CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+ Assembler::LessThan));
+ case uint16_t(Op::I64LeS):
+ CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+ Assembler::LessThanOrEqual));
+ case uint16_t(Op::I64GtS):
+ CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+ Assembler::GreaterThan));
+ case uint16_t(Op::I64GeS):
+ CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+ Assembler::GreaterThanOrEqual));
+ case uint16_t(Op::I64LtU):
+ CHECK_NEXT(
+ dispatchComparison(emitCompareI64, ValType::I64, Assembler::Below));
+ case uint16_t(Op::I64LeU):
+ CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+ Assembler::BelowOrEqual));
+ case uint16_t(Op::I64GtU):
+ CHECK_NEXT(
+ dispatchComparison(emitCompareI64, ValType::I64, Assembler::Above));
+ case uint16_t(Op::I64GeU):
+ CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64,
+ Assembler::AboveOrEqual));
+ case uint16_t(Op::F32Eq):
+ CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+ Assembler::DoubleEqual));
+ case uint16_t(Op::F32Ne):
+ CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+ Assembler::DoubleNotEqualOrUnordered));
+ case uint16_t(Op::F32Lt):
+ CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+ Assembler::DoubleLessThan));
+ case uint16_t(Op::F32Le):
+ CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+ Assembler::DoubleLessThanOrEqual));
+ case uint16_t(Op::F32Gt):
+ CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+ Assembler::DoubleGreaterThan));
+ case uint16_t(Op::F32Ge):
+ CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32,
+ Assembler::DoubleGreaterThanOrEqual));
+ case uint16_t(Op::F64Eq):
+ CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+ Assembler::DoubleEqual));
+ case uint16_t(Op::F64Ne):
+ CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+ Assembler::DoubleNotEqualOrUnordered));
+ case uint16_t(Op::F64Lt):
+ CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+ Assembler::DoubleLessThan));
+ case uint16_t(Op::F64Le):
+ CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+ Assembler::DoubleLessThanOrEqual));
+ case uint16_t(Op::F64Gt):
+ CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+ Assembler::DoubleGreaterThan));
+ case uint16_t(Op::F64Ge):
+ CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64,
+ Assembler::DoubleGreaterThanOrEqual));
+
+ // Sign extensions
+ case uint16_t(Op::I32Extend8S):
+ CHECK_NEXT(
+ dispatchConversion(emitExtendI32_8, ValType::I32, ValType::I32));
+ case uint16_t(Op::I32Extend16S):
+ CHECK_NEXT(
+ dispatchConversion(emitExtendI32_16, ValType::I32, ValType::I32));
+ case uint16_t(Op::I64Extend8S):
+ CHECK_NEXT(
+ dispatchConversion(emitExtendI64_8, ValType::I64, ValType::I64));
+ case uint16_t(Op::I64Extend16S):
+ CHECK_NEXT(
+ dispatchConversion(emitExtendI64_16, ValType::I64, ValType::I64));
+ case uint16_t(Op::I64Extend32S):
+ CHECK_NEXT(
+ dispatchConversion(emitExtendI64_32, ValType::I64, ValType::I64));
+
+ // Memory Related
+ case uint16_t(Op::MemoryGrow):
+ CHECK_NEXT(emitMemoryGrow());
+ case uint16_t(Op::MemorySize):
+ CHECK_NEXT(emitMemorySize());
+
+#ifdef ENABLE_WASM_FUNCTION_REFERENCES
+ case uint16_t(Op::RefAsNonNull):
+ if (!moduleEnv_.functionReferencesEnabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ CHECK_NEXT(emitRefAsNonNull());
+ case uint16_t(Op::BrOnNull):
+ if (!moduleEnv_.functionReferencesEnabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ CHECK_NEXT(emitBrOnNull());
+#endif
+#ifdef ENABLE_WASM_GC
+ case uint16_t(Op::RefEq):
+ if (!moduleEnv_.gcTypesEnabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ CHECK_NEXT(dispatchComparison(emitCompareRef, RefType::eq(),
+ Assembler::Equal));
+#endif
+#ifdef ENABLE_WASM_REFTYPES
+ case uint16_t(Op::RefFunc):
+ CHECK_NEXT(emitRefFunc());
+ break;
+ case uint16_t(Op::RefNull):
+ CHECK_NEXT(emitRefNull());
+ break;
+ case uint16_t(Op::RefIsNull):
+ CHECK_NEXT(emitRefIsNull());
+ break;
+#endif
+
+#ifdef ENABLE_WASM_GC
+ // "GC" operations
+ case uint16_t(Op::GcPrefix): {
+ if (!moduleEnv_.gcTypesEnabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ switch (op.b1) {
+ case uint32_t(GcOp::StructNew):
+ CHECK_NEXT(emitStructNew());
+ case uint32_t(GcOp::StructGet):
+ CHECK_NEXT(emitStructGet());
+ case uint32_t(GcOp::StructSet):
+ CHECK_NEXT(emitStructSet());
+ case uint32_t(GcOp::StructNarrow):
+ CHECK_NEXT(emitStructNarrow());
+ default:
+ break;
+ } // switch (op.b1)
+ return iter_.unrecognizedOpcode(&op);
+ }
+#endif
+
+#ifdef ENABLE_WASM_SIMD
+ // SIMD operations
+ case uint16_t(Op::SimdPrefix): {
+ uint32_t laneIndex;
+ if (!moduleEnv_.v128Enabled()) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ switch (op.b1) {
+ case uint32_t(SimdOp::I8x16ExtractLaneS):
+ CHECK_NEXT(dispatchExtractLane(ExtractLaneI8x16, ValType::I32, 16));
+ case uint32_t(SimdOp::I8x16ExtractLaneU):
+ CHECK_NEXT(
+ dispatchExtractLane(ExtractLaneUI8x16, ValType::I32, 16));
+ case uint32_t(SimdOp::I16x8ExtractLaneS):
+ CHECK_NEXT(dispatchExtractLane(ExtractLaneI16x8, ValType::I32, 8));
+ case uint32_t(SimdOp::I16x8ExtractLaneU):
+ CHECK_NEXT(dispatchExtractLane(ExtractLaneUI16x8, ValType::I32, 8));
+ case uint32_t(SimdOp::I32x4ExtractLane):
+ CHECK_NEXT(dispatchExtractLane(ExtractLaneI32x4, ValType::I32, 4));
+ case uint32_t(SimdOp::I64x2ExtractLane):
+ CHECK_NEXT(dispatchExtractLane(ExtractLaneI64x2, ValType::I64, 2));
+ case uint32_t(SimdOp::F32x4ExtractLane):
+ CHECK_NEXT(dispatchExtractLane(ExtractLaneF32x4, ValType::F32, 4));
+ case uint32_t(SimdOp::F64x2ExtractLane):
+ CHECK_NEXT(dispatchExtractLane(ExtractLaneF64x2, ValType::F64, 2));
+ case uint32_t(SimdOp::I8x16Splat):
+ CHECK_NEXT(dispatchSplat(SplatI8x16, ValType::I32));
+ case uint32_t(SimdOp::I16x8Splat):
+ CHECK_NEXT(dispatchSplat(SplatI16x8, ValType::I32));
+ case uint32_t(SimdOp::I32x4Splat):
+ CHECK_NEXT(dispatchSplat(SplatI32x4, ValType::I32));
+ case uint32_t(SimdOp::I64x2Splat):
+ CHECK_NEXT(dispatchSplat(SplatI64x2, ValType::I64));
+ case uint32_t(SimdOp::F32x4Splat):
+ CHECK_NEXT(dispatchSplat(SplatF32x4, ValType::F32));
+ case uint32_t(SimdOp::F64x2Splat):
+ CHECK_NEXT(dispatchSplat(SplatF64x2, ValType::F64));
+ case uint32_t(SimdOp::I8x16AnyTrue):
+ case uint32_t(SimdOp::I16x8AnyTrue):
+ case uint32_t(SimdOp::I32x4AnyTrue):
+ CHECK_NEXT(dispatchVectorReduction(AnyTrue));
+ case uint32_t(SimdOp::I8x16AllTrue):
+ CHECK_NEXT(dispatchVectorReduction(AllTrueI8x16));
+ case uint32_t(SimdOp::I16x8AllTrue):
+ CHECK_NEXT(dispatchVectorReduction(AllTrueI16x8));
+ case uint32_t(SimdOp::I32x4AllTrue):
+ CHECK_NEXT(dispatchVectorReduction(AllTrueI32x4));
+ case uint32_t(SimdOp::I8x16Bitmask):
+ CHECK_NEXT(dispatchVectorReduction(BitmaskI8x16));
+ case uint32_t(SimdOp::I16x8Bitmask):
+ CHECK_NEXT(dispatchVectorReduction(BitmaskI16x8));
+ case uint32_t(SimdOp::I32x4Bitmask):
+ CHECK_NEXT(dispatchVectorReduction(BitmaskI32x4));
+ case uint32_t(SimdOp::I8x16ReplaceLane):
+ CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI8x16, ValType::I32, 16));
+ case uint32_t(SimdOp::I16x8ReplaceLane):
+ CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI16x8, ValType::I32, 8));
+ case uint32_t(SimdOp::I32x4ReplaceLane):
+ CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI32x4, ValType::I32, 4));
+ case uint32_t(SimdOp::I64x2ReplaceLane):
+ CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI64x2, ValType::I64, 2));
+ case uint32_t(SimdOp::F32x4ReplaceLane):
+ CHECK_NEXT(dispatchReplaceLane(ReplaceLaneF32x4, ValType::F32, 4));
+ case uint32_t(SimdOp::F64x2ReplaceLane):
+ CHECK_NEXT(dispatchReplaceLane(ReplaceLaneF64x2, ValType::F64, 2));
+ case uint32_t(SimdOp::I8x16Eq):
+ CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::Equal));
+ case uint32_t(SimdOp::I8x16Ne):
+ CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::NotEqual));
+ case uint32_t(SimdOp::I8x16LtS):
+ CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::LessThan));
+ case uint32_t(SimdOp::I8x16LtU):
+ CHECK_NEXT(dispatchVectorComparison(CmpUI8x16, Assembler::Below));
+ case uint32_t(SimdOp::I8x16GtS):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpI8x16, Assembler::GreaterThan));
+ case uint32_t(SimdOp::I8x16GtU):
+ CHECK_NEXT(dispatchVectorComparison(CmpUI8x16, Assembler::Above));
+ case uint32_t(SimdOp::I8x16LeS):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpI8x16, Assembler::LessThanOrEqual));
+ case uint32_t(SimdOp::I8x16LeU):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpUI8x16, Assembler::BelowOrEqual));
+ case uint32_t(SimdOp::I8x16GeS):
+ CHECK_NEXT(dispatchVectorComparison(CmpI8x16,
+ Assembler::GreaterThanOrEqual));
+ case uint32_t(SimdOp::I8x16GeU):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpUI8x16, Assembler::AboveOrEqual));
+ case uint32_t(SimdOp::I16x8Eq):
+ CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::Equal));
+ case uint32_t(SimdOp::I16x8Ne):
+ CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::NotEqual));
+ case uint32_t(SimdOp::I16x8LtS):
+ CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::LessThan));
+ case uint32_t(SimdOp::I16x8LtU):
+ CHECK_NEXT(dispatchVectorComparison(CmpUI16x8, Assembler::Below));
+ case uint32_t(SimdOp::I16x8GtS):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpI16x8, Assembler::GreaterThan));
+ case uint32_t(SimdOp::I16x8GtU):
+ CHECK_NEXT(dispatchVectorComparison(CmpUI16x8, Assembler::Above));
+ case uint32_t(SimdOp::I16x8LeS):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpI16x8, Assembler::LessThanOrEqual));
+ case uint32_t(SimdOp::I16x8LeU):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpUI16x8, Assembler::BelowOrEqual));
+ case uint32_t(SimdOp::I16x8GeS):
+ CHECK_NEXT(dispatchVectorComparison(CmpI16x8,
+ Assembler::GreaterThanOrEqual));
+ case uint32_t(SimdOp::I16x8GeU):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpUI16x8, Assembler::AboveOrEqual));
+ case uint32_t(SimdOp::I32x4Eq):
+ CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::Equal));
+ case uint32_t(SimdOp::I32x4Ne):
+ CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::NotEqual));
+ case uint32_t(SimdOp::I32x4LtS):
+ CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::LessThan));
+ case uint32_t(SimdOp::I32x4LtU):
+ CHECK_NEXT(dispatchVectorComparison(CmpUI32x4, Assembler::Below));
+ case uint32_t(SimdOp::I32x4GtS):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpI32x4, Assembler::GreaterThan));
+ case uint32_t(SimdOp::I32x4GtU):
+ CHECK_NEXT(dispatchVectorComparison(CmpUI32x4, Assembler::Above));
+ case uint32_t(SimdOp::I32x4LeS):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpI32x4, Assembler::LessThanOrEqual));
+ case uint32_t(SimdOp::I32x4LeU):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpUI32x4, Assembler::BelowOrEqual));
+ case uint32_t(SimdOp::I32x4GeS):
+ CHECK_NEXT(dispatchVectorComparison(CmpI32x4,
+ Assembler::GreaterThanOrEqual));
+ case uint32_t(SimdOp::I32x4GeU):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpUI32x4, Assembler::AboveOrEqual));
+ case uint32_t(SimdOp::F32x4Eq):
+ CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::Equal));
+ case uint32_t(SimdOp::F32x4Ne):
+ CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::NotEqual));
+ case uint32_t(SimdOp::F32x4Lt):
+ CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::LessThan));
+ case uint32_t(SimdOp::F32x4Gt):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpF32x4, Assembler::GreaterThan));
+ case uint32_t(SimdOp::F32x4Le):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpF32x4, Assembler::LessThanOrEqual));
+ case uint32_t(SimdOp::F32x4Ge):
+ CHECK_NEXT(dispatchVectorComparison(CmpF32x4,
+ Assembler::GreaterThanOrEqual));
+ case uint32_t(SimdOp::F64x2Eq):
+ CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::Equal));
+ case uint32_t(SimdOp::F64x2Ne):
+ CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::NotEqual));
+ case uint32_t(SimdOp::F64x2Lt):
+ CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::LessThan));
+ case uint32_t(SimdOp::F64x2Gt):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpF64x2, Assembler::GreaterThan));
+ case uint32_t(SimdOp::F64x2Le):
+ CHECK_NEXT(
+ dispatchVectorComparison(CmpF64x2, Assembler::LessThanOrEqual));
+ case uint32_t(SimdOp::F64x2Ge):
+ CHECK_NEXT(dispatchVectorComparison(CmpF64x2,
+ Assembler::GreaterThanOrEqual));
+ case uint32_t(SimdOp::V128And):
+ CHECK_NEXT(dispatchVectorBinary(AndV128));
+ case uint32_t(SimdOp::V128Or):
+ CHECK_NEXT(dispatchVectorBinary(OrV128));
+ case uint32_t(SimdOp::V128Xor):
+ CHECK_NEXT(dispatchVectorBinary(XorV128));
+ case uint32_t(SimdOp::V128AndNot):
+ CHECK_NEXT(dispatchBinary(emitVectorAndNot, ValType::V128));
+ case uint32_t(SimdOp::I8x16AvgrU):
+ CHECK_NEXT(dispatchVectorBinary(AverageUI8x16));
+ case uint32_t(SimdOp::I16x8AvgrU):
+ CHECK_NEXT(dispatchVectorBinary(AverageUI16x8));
+ case uint32_t(SimdOp::I8x16Add):
+ CHECK_NEXT(dispatchVectorBinary(AddI8x16));
+ case uint32_t(SimdOp::I8x16AddSaturateS):
+ CHECK_NEXT(dispatchVectorBinary(AddSatI8x16));
+ case uint32_t(SimdOp::I8x16AddSaturateU):
+ CHECK_NEXT(dispatchVectorBinary(AddSatUI8x16));
+ case uint32_t(SimdOp::I8x16Sub):
+ CHECK_NEXT(dispatchVectorBinary(SubI8x16));
+ case uint32_t(SimdOp::I8x16SubSaturateS):
+ CHECK_NEXT(dispatchVectorBinary(SubSatI8x16));
+ case uint32_t(SimdOp::I8x16SubSaturateU):
+ CHECK_NEXT(dispatchVectorBinary(SubSatUI8x16));
+ case uint32_t(SimdOp::I8x16MinS):
+ CHECK_NEXT(dispatchVectorBinary(MinI8x16));
+ case uint32_t(SimdOp::I8x16MinU):
+ CHECK_NEXT(dispatchVectorBinary(MinUI8x16));
+ case uint32_t(SimdOp::I8x16MaxS):
+ CHECK_NEXT(dispatchVectorBinary(MaxI8x16));
+ case uint32_t(SimdOp::I8x16MaxU):
+ CHECK_NEXT(dispatchVectorBinary(MaxUI8x16));
+ case uint32_t(SimdOp::I16x8Add):
+ CHECK_NEXT(dispatchVectorBinary(AddI16x8));
+ case uint32_t(SimdOp::I16x8AddSaturateS):
+ CHECK_NEXT(dispatchVectorBinary(AddSatI16x8));
+ case uint32_t(SimdOp::I16x8AddSaturateU):
+ CHECK_NEXT(dispatchVectorBinary(AddSatUI16x8));
+ case uint32_t(SimdOp::I16x8Sub):
+ CHECK_NEXT(dispatchVectorBinary(SubI16x8));
+ case uint32_t(SimdOp::I16x8SubSaturateS):
+ CHECK_NEXT(dispatchVectorBinary(SubSatI16x8));
+ case uint32_t(SimdOp::I16x8SubSaturateU):
+ CHECK_NEXT(dispatchVectorBinary(SubSatUI16x8));
+ case uint32_t(SimdOp::I16x8Mul):
+ CHECK_NEXT(dispatchVectorBinary(MulI16x8));
+ case uint32_t(SimdOp::I16x8MinS):
+ CHECK_NEXT(dispatchVectorBinary(MinI16x8));
+ case uint32_t(SimdOp::I16x8MinU):
+ CHECK_NEXT(dispatchVectorBinary(MinUI16x8));
+ case uint32_t(SimdOp::I16x8MaxS):
+ CHECK_NEXT(dispatchVectorBinary(MaxI16x8));
+ case uint32_t(SimdOp::I16x8MaxU):
+ CHECK_NEXT(dispatchVectorBinary(MaxUI16x8));
+ case uint32_t(SimdOp::I32x4Add):
+ CHECK_NEXT(dispatchVectorBinary(AddI32x4));
+ case uint32_t(SimdOp::I32x4Sub):
+ CHECK_NEXT(dispatchVectorBinary(SubI32x4));
+ case uint32_t(SimdOp::I32x4Mul):
+ CHECK_NEXT(dispatchVectorBinary(MulI32x4));
+ case uint32_t(SimdOp::I32x4MinS):
+ CHECK_NEXT(dispatchVectorBinary(MinI32x4));
+ case uint32_t(SimdOp::I32x4MinU):
+ CHECK_NEXT(dispatchVectorBinary(MinUI32x4));
+ case uint32_t(SimdOp::I32x4MaxS):
+ CHECK_NEXT(dispatchVectorBinary(MaxI32x4));
+ case uint32_t(SimdOp::I32x4MaxU):
+ CHECK_NEXT(dispatchVectorBinary(MaxUI32x4));
+ case uint32_t(SimdOp::I64x2Add):
+ CHECK_NEXT(dispatchVectorBinary(AddI64x2));
+ case uint32_t(SimdOp::I64x2Sub):
+ CHECK_NEXT(dispatchVectorBinary(SubI64x2));
+ case uint32_t(SimdOp::I64x2Mul):
+ CHECK_NEXT(emitVectorMulI64x2());
+ case uint32_t(SimdOp::F32x4Add):
+ CHECK_NEXT(dispatchVectorBinary(AddF32x4));
+ case uint32_t(SimdOp::F32x4Sub):
+ CHECK_NEXT(dispatchVectorBinary(SubF32x4));
+ case uint32_t(SimdOp::F32x4Mul):
+ CHECK_NEXT(dispatchVectorBinary(MulF32x4));
+ case uint32_t(SimdOp::F32x4Div):
+ CHECK_NEXT(dispatchVectorBinary(DivF32x4));
+ case uint32_t(SimdOp::F32x4Min):
+ CHECK_NEXT(dispatchVectorBinary(MinF32x4));
+ case uint32_t(SimdOp::F32x4Max):
+ CHECK_NEXT(dispatchVectorBinary(MaxF32x4));
+ case uint32_t(SimdOp::F64x2Add):
+ CHECK_NEXT(dispatchVectorBinary(AddF64x2));
+ case uint32_t(SimdOp::F64x2Sub):
+ CHECK_NEXT(dispatchVectorBinary(SubF64x2));
+ case uint32_t(SimdOp::F64x2Mul):
+ CHECK_NEXT(dispatchVectorBinary(MulF64x2));
+ case uint32_t(SimdOp::F64x2Div):
+ CHECK_NEXT(dispatchVectorBinary(DivF64x2));
+ case uint32_t(SimdOp::F64x2Min):
+ CHECK_NEXT(dispatchVectorBinary(MinF64x2));
+ case uint32_t(SimdOp::F64x2Max):
+ CHECK_NEXT(dispatchVectorBinary(MaxF64x2));
+ case uint32_t(SimdOp::I8x16NarrowSI16x8):
+ CHECK_NEXT(dispatchVectorBinary(NarrowI16x8));
+ case uint32_t(SimdOp::I8x16NarrowUI16x8):
+ CHECK_NEXT(dispatchVectorBinary(NarrowUI16x8));
+ case uint32_t(SimdOp::I16x8NarrowSI32x4):
+ CHECK_NEXT(dispatchVectorBinary(NarrowI32x4));
+ case uint32_t(SimdOp::I16x8NarrowUI32x4):
+ CHECK_NEXT(dispatchVectorBinary(NarrowUI32x4));
+ case uint32_t(SimdOp::V8x16Swizzle):
+ CHECK_NEXT(dispatchVectorBinary(Swizzle));
+ case uint32_t(SimdOp::F32x4PMax):
+ CHECK_NEXT(dispatchVectorBinary(PMaxF32x4));
+ case uint32_t(SimdOp::F32x4PMin):
+ CHECK_NEXT(dispatchVectorBinary(PMinF32x4));
+ case uint32_t(SimdOp::F64x2PMax):
+ CHECK_NEXT(dispatchVectorBinary(PMaxF64x2));
+ case uint32_t(SimdOp::F64x2PMin):
+ CHECK_NEXT(dispatchVectorBinary(PMinF64x2));
+ case uint32_t(SimdOp::I32x4DotSI16x8):
+ CHECK_NEXT(dispatchVectorBinary(DotI16x8));
+ case uint32_t(SimdOp::I8x16Neg):
+ CHECK_NEXT(dispatchVectorUnary(NegI8x16));
+ case uint32_t(SimdOp::I16x8Neg):
+ CHECK_NEXT(dispatchVectorUnary(NegI16x8));
+ case uint32_t(SimdOp::I16x8WidenLowSI8x16):
+ CHECK_NEXT(dispatchVectorUnary(WidenLowI8x16));
+ case uint32_t(SimdOp::I16x8WidenHighSI8x16):
+ CHECK_NEXT(dispatchVectorUnary(WidenHighI8x16));
+ case uint32_t(SimdOp::I16x8WidenLowUI8x16):
+ CHECK_NEXT(dispatchVectorUnary(WidenLowUI8x16));
+ case uint32_t(SimdOp::I16x8WidenHighUI8x16):
+ CHECK_NEXT(dispatchVectorUnary(WidenHighUI8x16));
+ case uint32_t(SimdOp::I32x4Neg):
+ CHECK_NEXT(dispatchVectorUnary(NegI32x4));
+ case uint32_t(SimdOp::I32x4WidenLowSI16x8):
+ CHECK_NEXT(dispatchVectorUnary(WidenLowI16x8));
+ case uint32_t(SimdOp::I32x4WidenHighSI16x8):
+ CHECK_NEXT(dispatchVectorUnary(WidenHighI16x8));
+ case uint32_t(SimdOp::I32x4WidenLowUI16x8):
+ CHECK_NEXT(dispatchVectorUnary(WidenLowUI16x8));
+ case uint32_t(SimdOp::I32x4WidenHighUI16x8):
+ CHECK_NEXT(dispatchVectorUnary(WidenHighUI16x8));
+ case uint32_t(SimdOp::I32x4TruncSSatF32x4):
+ CHECK_NEXT(dispatchVectorUnary(ConvertF32x4ToI32x4));
+ case uint32_t(SimdOp::I32x4TruncUSatF32x4):
+ CHECK_NEXT(dispatchVectorUnary(ConvertF32x4ToUI32x4));
+ case uint32_t(SimdOp::I64x2Neg):
+ CHECK_NEXT(dispatchVectorUnary(NegI64x2));
+ case uint32_t(SimdOp::F32x4Abs):
+ CHECK_NEXT(dispatchVectorUnary(AbsF32x4));
+ case uint32_t(SimdOp::F32x4Neg):
+ CHECK_NEXT(dispatchVectorUnary(NegF32x4));
+ case uint32_t(SimdOp::F32x4Sqrt):
+ CHECK_NEXT(dispatchVectorUnary(SqrtF32x4));
+ case uint32_t(SimdOp::F32x4ConvertSI32x4):
+ CHECK_NEXT(dispatchVectorUnary(ConvertI32x4ToF32x4));
+ case uint32_t(SimdOp::F32x4ConvertUI32x4):
+ CHECK_NEXT(dispatchVectorUnary(ConvertUI32x4ToF32x4));
+ case uint32_t(SimdOp::F64x2Abs):
+ CHECK_NEXT(dispatchVectorUnary(AbsF64x2));
+ case uint32_t(SimdOp::F64x2Neg):
+ CHECK_NEXT(dispatchVectorUnary(NegF64x2));
+ case uint32_t(SimdOp::F64x2Sqrt):
+ CHECK_NEXT(dispatchVectorUnary(SqrtF64x2));
+ case uint32_t(SimdOp::V128Not):
+ CHECK_NEXT(dispatchVectorUnary(NotV128));
+ case uint32_t(SimdOp::I8x16Abs):
+ CHECK_NEXT(dispatchVectorUnary(AbsI8x16));
+ case uint32_t(SimdOp::I16x8Abs):
+ CHECK_NEXT(dispatchVectorUnary(AbsI16x8));
+ case uint32_t(SimdOp::I32x4Abs):
+ CHECK_NEXT(dispatchVectorUnary(AbsI32x4));
+ case uint32_t(SimdOp::F32x4Ceil):
+ CHECK_NEXT(dispatchVectorUnary(CeilF32x4));
+ case uint32_t(SimdOp::F32x4Floor):
+ CHECK_NEXT(dispatchVectorUnary(FloorF32x4));
+ case uint32_t(SimdOp::F32x4Trunc):
+ CHECK_NEXT(dispatchVectorUnary(TruncF32x4));
+ case uint32_t(SimdOp::F32x4Nearest):
+ CHECK_NEXT(dispatchVectorUnary(NearestF32x4));
+ case uint32_t(SimdOp::F64x2Ceil):
+ CHECK_NEXT(dispatchVectorUnary(CeilF64x2));
+ case uint32_t(SimdOp::F64x2Floor):
+ CHECK_NEXT(dispatchVectorUnary(FloorF64x2));
+ case uint32_t(SimdOp::F64x2Trunc):
+ CHECK_NEXT(dispatchVectorUnary(TruncF64x2));
+ case uint32_t(SimdOp::F64x2Nearest):
+ CHECK_NEXT(dispatchVectorUnary(NearestF64x2));
+ case uint32_t(SimdOp::I8x16Shl):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI8x16));
+ case uint32_t(SimdOp::I8x16ShrS):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI8x16));
+ case uint32_t(SimdOp::I8x16ShrU):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI8x16));
+ case uint32_t(SimdOp::I16x8Shl):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI16x8));
+ case uint32_t(SimdOp::I16x8ShrS):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI16x8));
+ case uint32_t(SimdOp::I16x8ShrU):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI16x8));
+ case uint32_t(SimdOp::I32x4Shl):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI32x4));
+ case uint32_t(SimdOp::I32x4ShrS):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI32x4));
+ case uint32_t(SimdOp::I32x4ShrU):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI32x4));
+ case uint32_t(SimdOp::I64x2Shl):
+ CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI64x2));
+ case uint32_t(SimdOp::I64x2ShrS):
+ CHECK_NEXT(emitVectorShiftRightI64x2(/* isUnsigned */ false));
+ case uint32_t(SimdOp::I64x2ShrU):
+ CHECK_NEXT(emitVectorShiftRightI64x2(/* isUnsigned */ true));
+ case uint32_t(SimdOp::V128Bitselect):
+ CHECK_NEXT(emitBitselect());
+ case uint32_t(SimdOp::V8x16Shuffle):
+ CHECK_NEXT(emitVectorShuffle());
+ case uint32_t(SimdOp::V128Const): {
+ V128 v128;
+ CHECK(iter_.readV128Const(&v128));
+ if (!deadCode_) {
+ pushV128(v128);
+ }
+ NEXT();
+ }
+ case uint32_t(SimdOp::V128Load):
+ CHECK_NEXT(emitLoad(ValType::V128, Scalar::Simd128));
+ case uint32_t(SimdOp::V8x16LoadSplat):
+ CHECK_NEXT(emitLoadSplat(Scalar::Uint8));
+ case uint32_t(SimdOp::V16x8LoadSplat):
+ CHECK_NEXT(emitLoadSplat(Scalar::Uint16));
+ case uint32_t(SimdOp::V32x4LoadSplat):
+ CHECK_NEXT(emitLoadSplat(Scalar::Uint32));
+ case uint32_t(SimdOp::V64x2LoadSplat):
+ CHECK_NEXT(emitLoadSplat(Scalar::Int64));
+ case uint32_t(SimdOp::I16x8LoadS8x8):
+ CHECK_NEXT(emitLoadExtend(Scalar::Int8));
+ case uint32_t(SimdOp::I16x8LoadU8x8):
+ CHECK_NEXT(emitLoadExtend(Scalar::Uint8));
+ case uint32_t(SimdOp::I32x4LoadS16x4):
+ CHECK_NEXT(emitLoadExtend(Scalar::Int16));
+ case uint32_t(SimdOp::I32x4LoadU16x4):
+ CHECK_NEXT(emitLoadExtend(Scalar::Uint16));
+ case uint32_t(SimdOp::I64x2LoadS32x2):
+ CHECK_NEXT(emitLoadExtend(Scalar::Int32));
+ case uint32_t(SimdOp::I64x2LoadU32x2):
+ CHECK_NEXT(emitLoadExtend(Scalar::Uint32));
+ case uint32_t(SimdOp::V128Load32Zero):
+ CHECK_NEXT(emitLoadZero(Scalar::Float32));
+ case uint32_t(SimdOp::V128Load64Zero):
+ CHECK_NEXT(emitLoadZero(Scalar::Float64));
+ case uint32_t(SimdOp::V128Store):
+ CHECK_NEXT(emitStore(ValType::V128, Scalar::Simd128));
+ default:
+ break;
+ } // switch (op.b1)
+ return iter_.unrecognizedOpcode(&op);
+ }
+#endif // ENABLE_WASM_SIMD
+
+ // "Miscellaneous" operations
+ case uint16_t(Op::MiscPrefix): {
+ switch (op.b1) {
+ case uint32_t(MiscOp::I32TruncSSatF32):
+ CHECK_NEXT(
+ dispatchConversionOOM(emitTruncateF32ToI32<TRUNC_SATURATING>,
+ ValType::F32, ValType::I32));
+ case uint32_t(MiscOp::I32TruncUSatF32):
+ CHECK_NEXT(dispatchConversionOOM(
+ emitTruncateF32ToI32<TRUNC_UNSIGNED | TRUNC_SATURATING>,
+ ValType::F32, ValType::I32));
+ case uint32_t(MiscOp::I32TruncSSatF64):
+ CHECK_NEXT(
+ dispatchConversionOOM(emitTruncateF64ToI32<TRUNC_SATURATING>,
+ ValType::F64, ValType::I32));
+ case uint32_t(MiscOp::I32TruncUSatF64):
+ CHECK_NEXT(dispatchConversionOOM(
+ emitTruncateF64ToI32<TRUNC_UNSIGNED | TRUNC_SATURATING>,
+ ValType::F64, ValType::I32));
+ case uint32_t(MiscOp::I64TruncSSatF32):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertFloatingToInt64Callout,
+ SymbolicAddress::SaturatingTruncateDoubleToInt64, ValType::F32,
+ ValType::I64));
+#else
+ CHECK_NEXT(
+ dispatchConversionOOM(emitTruncateF32ToI64<TRUNC_SATURATING>,
+ ValType::F32, ValType::I64));
+#endif
+ case uint32_t(MiscOp::I64TruncUSatF32):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertFloatingToInt64Callout,
+ SymbolicAddress::SaturatingTruncateDoubleToUint64, ValType::F32,
+ ValType::I64));
+#else
+ CHECK_NEXT(dispatchConversionOOM(
+ emitTruncateF32ToI64<TRUNC_UNSIGNED | TRUNC_SATURATING>,
+ ValType::F32, ValType::I64));
+#endif
+ case uint32_t(MiscOp::I64TruncSSatF64):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertFloatingToInt64Callout,
+ SymbolicAddress::SaturatingTruncateDoubleToInt64, ValType::F64,
+ ValType::I64));
+#else
+ CHECK_NEXT(
+ dispatchConversionOOM(emitTruncateF64ToI64<TRUNC_SATURATING>,
+ ValType::F64, ValType::I64));
+#endif
+ case uint32_t(MiscOp::I64TruncUSatF64):
+#ifdef RABALDR_FLOAT_TO_I64_CALLOUT
+ CHECK_NEXT(dispatchCalloutConversionOOM(
+ emitConvertFloatingToInt64Callout,
+ SymbolicAddress::SaturatingTruncateDoubleToUint64, ValType::F64,
+ ValType::I64));
+#else
+ CHECK_NEXT(dispatchConversionOOM(
+ emitTruncateF64ToI64<TRUNC_UNSIGNED | TRUNC_SATURATING>,
+ ValType::F64, ValType::I64));
+#endif
+ case uint32_t(MiscOp::MemCopy):
+ CHECK_NEXT(emitMemCopy());
+ case uint32_t(MiscOp::DataDrop):
+ CHECK_NEXT(emitDataOrElemDrop(/*isData=*/true));
+ case uint32_t(MiscOp::MemFill):
+ CHECK_NEXT(emitMemFill());
+ case uint32_t(MiscOp::MemInit):
+ CHECK_NEXT(emitMemOrTableInit(/*isMem=*/true));
+ case uint32_t(MiscOp::TableCopy):
+ CHECK_NEXT(emitTableCopy());
+ case uint32_t(MiscOp::ElemDrop):
+ CHECK_NEXT(emitDataOrElemDrop(/*isData=*/false));
+ case uint32_t(MiscOp::TableInit):
+ CHECK_NEXT(emitMemOrTableInit(/*isMem=*/false));
+#ifdef ENABLE_WASM_REFTYPES
+ case uint32_t(MiscOp::TableFill):
+ CHECK_NEXT(emitTableFill());
+ case uint32_t(MiscOp::TableGrow):
+ CHECK_NEXT(emitTableGrow());
+ case uint32_t(MiscOp::TableSize):
+ CHECK_NEXT(emitTableSize());
+#endif
+ default:
+ break;
+ } // switch (op.b1)
+ return iter_.unrecognizedOpcode(&op);
+ }
+
+ // Thread operations
+ case uint16_t(Op::ThreadPrefix): {
+ if (moduleEnv_.sharedMemoryEnabled() == Shareable::False) {
+ return iter_.unrecognizedOpcode(&op);
+ }
+ switch (op.b1) {
+ case uint32_t(ThreadOp::Wake):
+ CHECK_NEXT(emitWake());
+
+ case uint32_t(ThreadOp::I32Wait):
+ CHECK_NEXT(emitWait(ValType::I32, 4));
+ case uint32_t(ThreadOp::I64Wait):
+ CHECK_NEXT(emitWait(ValType::I64, 8));
+ case uint32_t(ThreadOp::Fence):
+ CHECK_NEXT(emitFence());
+
+ case uint32_t(ThreadOp::I32AtomicLoad):
+ CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Int32));
+ case uint32_t(ThreadOp::I64AtomicLoad):
+ CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Int64));
+ case uint32_t(ThreadOp::I32AtomicLoad8U):
+ CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Uint8));
+ case uint32_t(ThreadOp::I32AtomicLoad16U):
+ CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Uint16));
+ case uint32_t(ThreadOp::I64AtomicLoad8U):
+ CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint8));
+ case uint32_t(ThreadOp::I64AtomicLoad16U):
+ CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint16));
+ case uint32_t(ThreadOp::I64AtomicLoad32U):
+ CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint32));
+
+ case uint32_t(ThreadOp::I32AtomicStore):
+ CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Int32));
+ case uint32_t(ThreadOp::I64AtomicStore):
+ CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Int64));
+ case uint32_t(ThreadOp::I32AtomicStore8U):
+ CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Uint8));
+ case uint32_t(ThreadOp::I32AtomicStore16U):
+ CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Uint16));
+ case uint32_t(ThreadOp::I64AtomicStore8U):
+ CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint8));
+ case uint32_t(ThreadOp::I64AtomicStore16U):
+ CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint16));
+ case uint32_t(ThreadOp::I64AtomicStore32U):
+ CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint32));
+
+ case uint32_t(ThreadOp::I32AtomicAdd):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchAddOp));
+ case uint32_t(ThreadOp::I64AtomicAdd):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchAddOp));
+ case uint32_t(ThreadOp::I32AtomicAdd8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchAddOp));
+ case uint32_t(ThreadOp::I32AtomicAdd16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchAddOp));
+ case uint32_t(ThreadOp::I64AtomicAdd8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchAddOp));
+ case uint32_t(ThreadOp::I64AtomicAdd16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchAddOp));
+ case uint32_t(ThreadOp::I64AtomicAdd32U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchAddOp));
+
+ case uint32_t(ThreadOp::I32AtomicSub):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchSubOp));
+ case uint32_t(ThreadOp::I64AtomicSub):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchSubOp));
+ case uint32_t(ThreadOp::I32AtomicSub8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchSubOp));
+ case uint32_t(ThreadOp::I32AtomicSub16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchSubOp));
+ case uint32_t(ThreadOp::I64AtomicSub8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchSubOp));
+ case uint32_t(ThreadOp::I64AtomicSub16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchSubOp));
+ case uint32_t(ThreadOp::I64AtomicSub32U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchSubOp));
+
+ case uint32_t(ThreadOp::I32AtomicAnd):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchAndOp));
+ case uint32_t(ThreadOp::I64AtomicAnd):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchAndOp));
+ case uint32_t(ThreadOp::I32AtomicAnd8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchAndOp));
+ case uint32_t(ThreadOp::I32AtomicAnd16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchAndOp));
+ case uint32_t(ThreadOp::I64AtomicAnd8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchAndOp));
+ case uint32_t(ThreadOp::I64AtomicAnd16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchAndOp));
+ case uint32_t(ThreadOp::I64AtomicAnd32U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchAndOp));
+
+ case uint32_t(ThreadOp::I32AtomicOr):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchOrOp));
+ case uint32_t(ThreadOp::I64AtomicOr):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchOrOp));
+ case uint32_t(ThreadOp::I32AtomicOr8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchOrOp));
+ case uint32_t(ThreadOp::I32AtomicOr16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchOrOp));
+ case uint32_t(ThreadOp::I64AtomicOr8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchOrOp));
+ case uint32_t(ThreadOp::I64AtomicOr16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchOrOp));
+ case uint32_t(ThreadOp::I64AtomicOr32U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchOrOp));
+
+ case uint32_t(ThreadOp::I32AtomicXor):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchXorOp));
+ case uint32_t(ThreadOp::I64AtomicXor):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchXorOp));
+ case uint32_t(ThreadOp::I32AtomicXor8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchXorOp));
+ case uint32_t(ThreadOp::I32AtomicXor16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchXorOp));
+ case uint32_t(ThreadOp::I64AtomicXor8U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchXorOp));
+ case uint32_t(ThreadOp::I64AtomicXor16U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchXorOp));
+ case uint32_t(ThreadOp::I64AtomicXor32U):
+ CHECK_NEXT(
+ emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchXorOp));
+
+ case uint32_t(ThreadOp::I32AtomicXchg):
+ CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Int32));
+ case uint32_t(ThreadOp::I64AtomicXchg):
+ CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Int64));
+ case uint32_t(ThreadOp::I32AtomicXchg8U):
+ CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Uint8));
+ case uint32_t(ThreadOp::I32AtomicXchg16U):
+ CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Uint16));
+ case uint32_t(ThreadOp::I64AtomicXchg8U):
+ CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint8));
+ case uint32_t(ThreadOp::I64AtomicXchg16U):
+ CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint16));
+ case uint32_t(ThreadOp::I64AtomicXchg32U):
+ CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint32));
+
+ case uint32_t(ThreadOp::I32AtomicCmpXchg):
+ CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Int32));
+ case uint32_t(ThreadOp::I64AtomicCmpXchg):
+ CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Int64));
+ case uint32_t(ThreadOp::I32AtomicCmpXchg8U):
+ CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Uint8));
+ case uint32_t(ThreadOp::I32AtomicCmpXchg16U):
+ CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Uint16));
+ case uint32_t(ThreadOp::I64AtomicCmpXchg8U):
+ CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint8));
+ case uint32_t(ThreadOp::I64AtomicCmpXchg16U):
+ CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint16));
+ case uint32_t(ThreadOp::I64AtomicCmpXchg32U):
+ CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint32));
+
+ default:
+ return iter_.unrecognizedOpcode(&op);
+ }
+ break;
+ }
+
+ // asm.js and other private operations
+ case uint16_t(Op::MozPrefix):
+ return iter_.unrecognizedOpcode(&op);
+
+ default:
+ return iter_.unrecognizedOpcode(&op);
+ }
+
+#undef CHECK
+#undef NEXT
+#undef CHECK_NEXT
+#undef CHECK_POINTER_COUNT
+#undef CHECK_SIMD_EXPERIMENTAL
+#undef dispatchBinary
+#undef dispatchUnary
+#undef dispatchComparison
+#undef dispatchConversion
+#undef dispatchConversionOOM
+#undef dispatchCalloutConversionOOM
+#undef dispatchIntDivCallout
+#undef dispatchVectorBinary
+#undef dispatchVectorUnary
+#undef dispatchVectorComparison
+#undef dispatchExtractLane
+#undef dispatchReplaceLane
+#undef dispatchSplat
+#undef dispatchVectorReduction
+
+ MOZ_CRASH("unreachable");
+ }
+
+ MOZ_CRASH("unreachable");
+}
+
+bool BaseCompiler::emitFunction() {
+ if (!beginFunction()) {
+ return false;
+ }
+
+ if (!emitBody()) {
+ return false;
+ }
+
+ if (!endFunction()) {
+ return false;
+ }
+
+ return true;
+}
+
+BaseCompiler::BaseCompiler(const ModuleEnvironment& moduleEnv,
+ const CompilerEnvironment& compilerEnv,
+ const FuncCompileInput& func,
+ const ValTypeVector& locals,
+ const MachineState& trapExitLayout,
+ size_t trapExitLayoutNumWords, Decoder& decoder,
+ StkVector& stkSource, TempAllocator* alloc,
+ MacroAssembler* masm, StackMaps* stackMaps)
+ : moduleEnv_(moduleEnv),
+ compilerEnv_(compilerEnv),
+ iter_(moduleEnv, decoder),
+ func_(func),
+ lastReadCallSite_(0),
+ alloc_(alloc->fallible()),
+ locals_(locals),
+ deadCode_(false),
+ bceSafe_(0),
+ latentOp_(LatentOp::None),
+ latentType_(ValType::I32),
+ latentIntCmp_(Assembler::Equal),
+ latentDoubleCmp_(Assembler::DoubleEqual),
+ masm(*masm),
+ fr(*masm),
+ stackMapGenerator_(stackMaps, trapExitLayout, trapExitLayoutNumWords,
+ *masm),
+ stkSource_(stkSource) {
+ // Our caller, BaselineCompileFunctions, will lend us the vector contents to
+ // use for the eval stack. To get hold of those contents, we'll temporarily
+ // installing an empty one in its place.
+ MOZ_ASSERT(stk_.empty());
+ stk_.swap(stkSource_);
+
+ // Assuming that previously processed wasm functions are well formed, the
+ // eval stack should now be empty. But empty it anyway; any non-emptyness
+ // at this point will cause chaos.
+ stk_.clear();
+}
+
+BaseCompiler::~BaseCompiler() {
+ stk_.swap(stkSource_);
+ // We've returned the eval stack vector contents to our caller,
+ // BaselineCompileFunctions. We expect the vector we get in return to be
+ // empty since that's what we swapped for the stack vector in our
+ // constructor.
+ MOZ_ASSERT(stk_.empty());
+}
+
+bool BaseCompiler::init() {
+ ra.init(this);
+
+ if (!SigD_.append(ValType::F64)) {
+ return false;
+ }
+ if (!SigF_.append(ValType::F32)) {
+ return false;
+ }
+
+ ArgTypeVector args(funcType());
+ if (!fr.setupLocals(locals_, args, compilerEnv_.debugEnabled(),
+ &localInfo_)) {
+ return false;
+ }
+
+ return true;
+}
+
+FuncOffsets BaseCompiler::finish() {
+ MOZ_ASSERT(done(), "all bytes must be consumed");
+ MOZ_ASSERT(func_.callSiteLineNums.length() == lastReadCallSite_);
+
+ MOZ_ASSERT(stk_.empty());
+ MOZ_ASSERT(stackMapGenerator_.memRefsOnStk == 0);
+
+ masm.flushBuffer();
+
+ return offsets_;
+}
+
+} // namespace wasm
+} // namespace js
+
+bool js::wasm::BaselinePlatformSupport() {
+#if defined(JS_CODEGEN_ARM)
+ // Simplifying assumption: require SDIV and UDIV.
+ //
+ // I have no good data on ARM populations allowing me to say that
+ // X% of devices in the market implement SDIV and UDIV. However,
+ // they are definitely implemented on the Cortex-A7 and Cortex-A15
+ // and on all ARMv8 systems.
+ if (!HasIDIV()) {
+ return false;
+ }
+#endif
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || \
+ defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+ defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+ return true;
+#else
+ return false;
+#endif
+}
+
+bool js::wasm::BaselineCompileFunctions(const ModuleEnvironment& moduleEnv,
+ const CompilerEnvironment& compilerEnv,
+ LifoAlloc& lifo,
+ const FuncCompileInputVector& inputs,
+ CompiledCode* code,
+ UniqueChars* error) {
+ MOZ_ASSERT(compilerEnv.tier() == Tier::Baseline);
+ MOZ_ASSERT(moduleEnv.kind == ModuleKind::Wasm);
+
+ // The MacroAssembler will sometimes access the jitContext.
+
+ TempAllocator alloc(&lifo);
+ JitContext jitContext(&alloc);
+ MOZ_ASSERT(IsCompilingWasm());
+ WasmMacroAssembler masm(alloc, moduleEnv);
+
+ // Swap in already-allocated empty vectors to avoid malloc/free.
+ MOZ_ASSERT(code->empty());
+ if (!code->swap(masm)) {
+ return false;
+ }
+
+ // Create a description of the stack layout created by GenerateTrapExit().
+ MachineState trapExitLayout;
+ size_t trapExitLayoutNumWords;
+ GenerateTrapExitMachineState(&trapExitLayout, &trapExitLayoutNumWords);
+
+ // The compiler's operand stack. We reuse it across all functions so as to
+ // avoid malloc/free. Presize it to 128 elements in the hope of avoiding
+ // reallocation later.
+ StkVector stk;
+ if (!stk.reserve(128)) {
+ return false;
+ }
+
+ for (const FuncCompileInput& func : inputs) {
+ Decoder d(func.begin, func.end, func.lineOrBytecode, error);
+
+ // Build the local types vector.
+
+ ValTypeVector locals;
+ if (!locals.appendAll(moduleEnv.funcs[func.index].type->args())) {
+ return false;
+ }
+ if (!DecodeLocalEntries(d, moduleEnv.types, moduleEnv.features, &locals)) {
+ return false;
+ }
+
+ // One-pass baseline compilation.
+
+ BaseCompiler f(moduleEnv, compilerEnv, func, locals, trapExitLayout,
+ trapExitLayoutNumWords, d, stk, &alloc, &masm,
+ &code->stackMaps);
+ if (!f.init()) {
+ return false;
+ }
+ if (!f.emitFunction()) {
+ return false;
+ }
+ if (!code->codeRanges.emplaceBack(func.index, func.lineOrBytecode,
+ f.finish())) {
+ return false;
+ }
+ }
+
+ masm.finish();
+ if (masm.oom()) {
+ return false;
+ }
+
+ return code->swap(masm);
+}
+
+#ifdef DEBUG
+bool js::wasm::IsValidStackMapKey(bool debugEnabled, const uint8_t* nextPC) {
+# if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
+ const uint8_t* insn = nextPC;
+ return (insn[-2] == 0x0F && insn[-1] == 0x0B) || // ud2
+ (insn[-2] == 0xFF && (insn[-1] & 0xF8) == 0xD0) || // call *%r_
+ insn[-5] == 0xE8 || // call simm32
+ (debugEnabled && insn[-5] == 0x0F && insn[-4] == 0x1F &&
+ insn[-3] == 0x44 && insn[-2] == 0x00 &&
+ insn[-1] == 0x00); // nop_five
+
+# elif defined(JS_CODEGEN_ARM)
+ const uint32_t* insn = (const uint32_t*)nextPC;
+ return ((uintptr_t(insn) & 3) == 0) && // must be ARM, not Thumb
+ (insn[-1] == 0xe7f000f0 || // udf
+ (insn[-1] & 0xfffffff0) == 0xe12fff30 || // blx reg (ARM, enc A1)
+ (insn[-1] & 0xff000000) == 0xeb000000 || // bl simm24 (ARM, enc A1)
+ (debugEnabled && insn[-1] == 0xe320f000)); // "as_nop"
+
+# elif defined(JS_CODEGEN_ARM64)
+ const uint32_t hltInsn = 0xd4a00000;
+ const uint32_t* insn = (const uint32_t*)nextPC;
+ return ((uintptr_t(insn) & 3) == 0) &&
+ (insn[-1] == hltInsn || // hlt
+ (insn[-1] & 0xfffffc1f) == 0xd63f0000 || // blr reg
+ (insn[-1] & 0xfc000000) == 0x94000000 || // bl simm26
+ (debugEnabled && insn[-1] == 0xd503201f)); // nop
+
+# else
+ MOZ_CRASH("IsValidStackMapKey: requires implementation on this platform");
+# endif
+}
+#endif
+
+#undef RABALDR_INT_DIV_I64_CALLOUT
+#undef RABALDR_I64_TO_FLOAT_CALLOUT
+#undef RABALDR_FLOAT_TO_I64_CALLOUT