/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * * Copyright 2016 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * [SMDOC] WebAssembly baseline compiler (RabaldrMonkey) * * General assumptions for 32-bit vs 64-bit code: * * - A 32-bit register can be extended in-place to a 64-bit register on 64-bit * systems. * * - Code that knows that Register64 has a '.reg' member on 64-bit systems and * '.high' and '.low' members on 32-bit systems, or knows the implications * thereof, is #ifdef JS_PUNBOX64. All other code is #if(n)?def JS_64BIT. * * * Coding standards: * * - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32, * and surrounding functions; most functions fall into this class) where the * meaning is obvious: * * - if there is a single source + destination register, it is called 'r' * - if there is one source and a different destination, they are called 'rs' * and 'rd' * - if there is one source + destination register and another source register * they are called 'r' and 'rs' * - if there are two source registers and a destination register they are * called 'rs0', 'rs1', and 'rd'. * * - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/. * * - Registers can be named non-generically for their function ('rp' for the * 'pointer' register and 'rv' for the 'value' register are typical) and those * names may or may not have an 'r' prefix. * * - "Larger" code generating functions make their own rules. * * * General status notes: * * "FIXME" indicates a known or suspected bug. Always has a bug#. * * "TODO" indicates an opportunity for a general improvement, with an additional * tag to indicate the area of improvement. Usually has a bug#. * * There are lots of machine dependencies here but they are pretty well isolated * to a segment of the compiler. Many dependencies will eventually be factored * into the MacroAssembler layer and shared with other code generators. * * * High-value compiler performance improvements: * * - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r) * etc methods) can avoid syncing the value stack if the specific register is * in use but there is a free register to shuffle the specific register into. * (This will also improve the generated code.) The sync happens often enough * here to show up in profiles, because it is triggered by integer multiply * and divide. * * * High-value code generation improvements: * * - (Bug 1316804) brTable pessimizes by always dispatching to code that pops * the stack and then jumps to the code for the target case. If no cleanup is * needed we could just branch conditionally to the target; if the same amount * of cleanup is needed for all cases then the cleanup can be done before the * dispatch. Both are highly likely. * * - (Bug 1316806) Register management around calls: At the moment we sync the * value stack unconditionally (this is simple) but there are probably many * common cases where we could instead save/restore live caller-saves * registers and perform parallel assignment into argument registers. This * may be important if we keep some locals in registers. * * - (Bug 1316808) Allocate some locals to registers on machines where there are * enough registers. This is probably hard to do well in a one-pass compiler * but it might be that just keeping register arguments and the first few * locals in registers is a viable strategy; another (more general) strategy * is caching locals in registers in straight-line code. Such caching could * also track constant values in registers, if that is deemed valuable. A * combination of techniques may be desirable: parameters and the first few * locals could be cached on entry to the function but not statically assigned * to registers throughout. * * (On a large corpus of code it should be possible to compute, for every * signature comprising the types of parameters and locals, and using a static * weight for loops, a list in priority order of which parameters and locals * that should be assigned to registers. Or something like that. Wasm makes * this simple. Static assignments are desirable because they are not flushed * to memory by the pre-block sync() call.) */ #include "wasm/WasmBaselineCompile.h" #include "mozilla/MathAlgorithms.h" #include "mozilla/Maybe.h" #include #include #include "jit/AtomicOp.h" #include "jit/IonTypes.h" #include "jit/JitAllocPolicy.h" #include "jit/Label.h" #include "jit/MIR.h" #include "jit/RegisterAllocator.h" #include "jit/Registers.h" #include "jit/RegisterSets.h" #if defined(JS_CODEGEN_ARM) # include "jit/arm/Assembler-arm.h" #endif #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) # include "jit/x86-shared/Architecture-x86-shared.h" # include "jit/x86-shared/Assembler-x86-shared.h" #endif #if defined(JS_CODEGEN_MIPS32) # include "jit/mips-shared/Assembler-mips-shared.h" # include "jit/mips32/Assembler-mips32.h" #endif #if defined(JS_CODEGEN_MIPS64) # include "jit/mips-shared/Assembler-mips-shared.h" # include "jit/mips64/Assembler-mips64.h" #endif #include "js/ScalarType.h" // js::Scalar::Type #include "util/Memory.h" #include "wasm/WasmGC.h" #include "wasm/WasmGenerator.h" #include "wasm/WasmInstance.h" #include "wasm/WasmOpIter.h" #include "wasm/WasmSignalHandlers.h" #include "wasm/WasmStubs.h" #include "wasm/WasmValidate.h" #include "jit/MacroAssembler-inl.h" using mozilla::DebugOnly; using mozilla::FloorLog2; using mozilla::IsPowerOfTwo; using mozilla::Maybe; namespace js { namespace wasm { using namespace js::jit; using HandleNaNSpecially = bool; using InvertBranch = bool; using IsKnownNotZero = bool; using IsUnsigned = bool; using NeedsBoundsCheck = bool; using WantResult = bool; using ZeroOnOverflow = bool; class BaseStackFrame; // Two flags, useABI and interModule, control how calls are made. // // UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile, // except when InterModule::True is also set, when they are volatile. // // UseABI::Builtin implies that the Tls/Heap/Global registers are volatile. // In this case, we require InterModule::False. The calling convention // is otherwise like UseABI::Wasm. // // UseABI::System implies that the Tls/Heap/Global registers are volatile. // Additionally, the parameter passing mechanism may be slightly different from // the UseABI::Wasm convention. // // When the Tls/Heap/Global registers are not volatile, the baseline compiler // will restore the Tls register from its save slot before the call, since the // baseline compiler uses the Tls register for other things. // // When those registers are volatile, the baseline compiler will reload them // after the call (it will restore the Tls register from the save slot and load // the other two from the Tls data). enum class UseABI { Wasm, Builtin, System }; enum class InterModule { False = false, True = true }; enum class RhsDestOp { True = true }; #if defined(JS_CODEGEN_NONE) # define RABALDR_SCRATCH_I32 # define RABALDR_SCRATCH_F32 # define RABALDR_SCRATCH_F64 static constexpr Register RabaldrScratchI32 = Register::Invalid(); static constexpr FloatRegister RabaldrScratchF32 = InvalidFloatReg; static constexpr FloatRegister RabaldrScratchF64 = InvalidFloatReg; #endif #ifdef JS_CODEGEN_ARM64 # define RABALDR_CHUNKY_STACK # define RABALDR_SIDEALLOC_V128 # define RABALDR_SCRATCH_I32 # define RABALDR_SCRATCH_F32 # define RABALDR_SCRATCH_F64 # define RABALDR_SCRATCH_V128 # define RABALDR_SCRATCH_F32_ALIASES_F64 static constexpr Register RabaldrScratchI32{Registers::x15}; // Note, the float scratch regs cannot be registers that are used for parameter // passing in any ABI we use. Argregs tend to be low-numbered; register 30 // should be safe. static constexpr FloatRegister RabaldrScratchF32{FloatRegisters::s30, FloatRegisters::Single}; static constexpr FloatRegister RabaldrScratchF64{FloatRegisters::d30, FloatRegisters::Double}; # ifdef ENABLE_WASM_SIMD static constexpr FloatRegister RabaldrScratchV128{FloatRegisters::d30, FloatRegisters::Simd128}; # endif static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy"); static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy"); # ifdef ENABLE_WASM_SIMD static_assert(RabaldrScratchV128 != ScratchSimd128Reg, "Too busy"); # endif #endif #ifdef JS_CODEGEN_X86 // The selection of EBX here steps gingerly around: the need for EDX // to be allocatable for multiply/divide; ECX to be allocatable for // shift/rotate; EAX (= ReturnReg) to be allocatable as the result // register; EBX not being one of the WasmTableCall registers; and // needing a temp register for load/store that has a single-byte // persona. // // The compiler assumes that RabaldrScratchI32 has a single-byte // persona. Code for 8-byte atomic operations assumes that // RabaldrScratchI32 is in fact ebx. # define RABALDR_SCRATCH_I32 static constexpr Register RabaldrScratchI32 = ebx; # define RABALDR_INT_DIV_I64_CALLOUT #endif #ifdef JS_CODEGEN_ARM // We use our own scratch register, because the macro assembler uses // the regular scratch register(s) pretty liberally. We could // work around that in several cases but the mess does not seem // worth it yet. CallTempReg2 seems safe. # define RABALDR_SCRATCH_I32 static constexpr Register RabaldrScratchI32 = CallTempReg2; # define RABALDR_INT_DIV_I64_CALLOUT # define RABALDR_I64_TO_FLOAT_CALLOUT # define RABALDR_FLOAT_TO_I64_CALLOUT #endif #ifdef JS_CODEGEN_MIPS32 # define RABALDR_SCRATCH_I32 static constexpr Register RabaldrScratchI32 = CallTempReg2; # define RABALDR_INT_DIV_I64_CALLOUT # define RABALDR_I64_TO_FLOAT_CALLOUT # define RABALDR_FLOAT_TO_I64_CALLOUT #endif #ifdef JS_CODEGEN_MIPS64 # define RABALDR_SCRATCH_I32 static constexpr Register RabaldrScratchI32 = CallTempReg2; #endif #ifdef RABALDR_SCRATCH_F32_ALIASES_F64 # if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64) # error "Bad configuration" # endif #endif template struct RegTypeOf { #ifdef ENABLE_WASM_SIMD static_assert(t == MIRType::Float32 || t == MIRType::Double || t == MIRType::Simd128, "Float mask type"); #else static_assert(t == MIRType::Float32 || t == MIRType::Double, "Float mask type"); #endif }; template <> struct RegTypeOf { static constexpr RegTypeName value = RegTypeName::Float32; }; template <> struct RegTypeOf { static constexpr RegTypeName value = RegTypeName::Float64; }; #ifdef ENABLE_WASM_SIMD template <> struct RegTypeOf { static constexpr RegTypeName value = RegTypeName::Vector128; }; #endif // The strongly typed register wrappers are especially useful to distinguish // float registers from double registers, but they also clearly distinguish // 32-bit registers from 64-bit register pairs on 32-bit systems. struct RegI32 : public Register { RegI32() : Register(Register::Invalid()) {} explicit RegI32(Register reg) : Register(reg) { MOZ_ASSERT(reg != Invalid()); } bool isInvalid() const { return *this == Invalid(); } bool isValid() const { return !isInvalid(); } static RegI32 Invalid() { return RegI32(); } }; struct RegI64 : public Register64 { RegI64() : Register64(Register64::Invalid()) {} explicit RegI64(Register64 reg) : Register64(reg) { MOZ_ASSERT(reg != Invalid()); } bool isInvalid() const { return *this == Invalid(); } bool isValid() const { return !isInvalid(); } static RegI64 Invalid() { return RegI64(); } }; struct RegPtr : public Register { RegPtr() : Register(Register::Invalid()) {} explicit RegPtr(Register reg) : Register(reg) { MOZ_ASSERT(reg != Invalid()); } bool isInvalid() const { return *this == Invalid(); } bool isValid() const { return !isInvalid(); } static RegPtr Invalid() { return RegPtr(); } }; struct RegF32 : public FloatRegister { RegF32() : FloatRegister() {} explicit RegF32(FloatRegister reg) : FloatRegister(reg) { MOZ_ASSERT(isSingle()); } bool isValid() const { return !isInvalid(); } static RegF32 Invalid() { return RegF32(); } }; struct RegF64 : public FloatRegister { RegF64() : FloatRegister() {} explicit RegF64(FloatRegister reg) : FloatRegister(reg) { MOZ_ASSERT(isDouble()); } bool isValid() const { return !isInvalid(); } static RegF64 Invalid() { return RegF64(); } }; #ifdef ENABLE_WASM_SIMD # ifdef RABALDR_SIDEALLOC_V128 class RegV128 { // fpr_ is either invalid or a double that aliases the simd register, see // comments below at BaseRegAlloc. FloatRegister fpr_; public: RegV128() : fpr_(FloatRegister()) {} explicit RegV128(FloatRegister reg) : fpr_(FloatRegister(reg.encoding(), FloatRegisters::Double)) { MOZ_ASSERT(reg.isSimd128()); } static RegV128 fromDouble(FloatRegister reg) { MOZ_ASSERT(reg.isDouble()); return RegV128(FloatRegister(reg.encoding(), FloatRegisters::Simd128)); } FloatRegister asDouble() const { return fpr_; } bool isInvalid() const { return fpr_.isInvalid(); } bool isValid() const { return !isInvalid(); } static RegV128 Invalid() { return RegV128(); } operator FloatRegister() const { return FloatRegister(fpr_.encoding(), FloatRegisters::Simd128); } bool operator==(const RegV128& that) const { return asDouble() == that.asDouble(); } bool operator!=(const RegV128& that) const { return asDouble() != that.asDouble(); } }; # else struct RegV128 : public FloatRegister { RegV128() : FloatRegister() {} explicit RegV128(FloatRegister reg) : FloatRegister(reg) { MOZ_ASSERT(isSimd128()); } bool isValid() const { return !isInvalid(); } static RegV128 Invalid() { return RegV128(); } }; # endif #endif struct AnyReg { union { RegI32 i32_; RegI64 i64_; RegPtr ref_; RegF32 f32_; RegF64 f64_; #ifdef ENABLE_WASM_SIMD RegV128 v128_; #endif }; enum { I32, I64, REF, F32, F64, #ifdef ENABLE_WASM_SIMD V128 #endif } tag; explicit AnyReg(RegI32 r) { tag = I32; i32_ = r; } explicit AnyReg(RegI64 r) { tag = I64; i64_ = r; } explicit AnyReg(RegF32 r) { tag = F32; f32_ = r; } explicit AnyReg(RegF64 r) { tag = F64; f64_ = r; } #ifdef ENABLE_WASM_SIMD explicit AnyReg(RegV128 r) { tag = V128; v128_ = r; } #endif explicit AnyReg(RegPtr r) { tag = REF; ref_ = r; } RegI32 i32() const { MOZ_ASSERT(tag == I32); return i32_; } RegI64 i64() const { MOZ_ASSERT(tag == I64); return i64_; } RegF32 f32() const { MOZ_ASSERT(tag == F32); return f32_; } RegF64 f64() const { MOZ_ASSERT(tag == F64); return f64_; } #ifdef ENABLE_WASM_SIMD RegV128 v128() const { MOZ_ASSERT(tag == V128); return v128_; } #endif RegPtr ref() const { MOZ_ASSERT(tag == REF); return ref_; } AnyRegister any() const { switch (tag) { case F32: return AnyRegister(f32_); case F64: return AnyRegister(f64_); #ifdef ENABLE_WASM_SIMD case V128: return AnyRegister(v128_); #endif case I32: return AnyRegister(i32_); case I64: #ifdef JS_PUNBOX64 return AnyRegister(i64_.reg); #else // The compiler is written so that this is never needed: any() is // called on arbitrary registers for asm.js but asm.js does not have // 64-bit ints. For wasm, any() is called on arbitrary registers // only on 64-bit platforms. MOZ_CRASH("AnyReg::any() on 32-bit platform"); #endif case REF: MOZ_CRASH("AnyReg::any() not implemented for ref types"); default: MOZ_CRASH(); } // Work around GCC 5 analysis/warning bug. MOZ_CRASH("AnyReg::any(): impossible case"); } }; // Platform-specific registers. // // All platforms must define struct SpecificRegs. All 32-bit platforms must // have an abiReturnRegI64 member in that struct. #if defined(JS_CODEGEN_X64) struct SpecificRegs { RegI32 eax, ecx, edx, edi, esi; RegI64 rax, rcx, rdx; SpecificRegs() : eax(RegI32(js::jit::eax)), ecx(RegI32(js::jit::ecx)), edx(RegI32(js::jit::edx)), edi(RegI32(js::jit::edi)), esi(RegI32(js::jit::esi)), rax(RegI64(Register64(js::jit::rax))), rcx(RegI64(Register64(js::jit::rcx))), rdx(RegI64(Register64(js::jit::rdx))) {} }; #elif defined(JS_CODEGEN_X86) struct SpecificRegs { RegI32 eax, ecx, edx, edi, esi; RegI64 ecx_ebx, edx_eax, abiReturnRegI64; SpecificRegs() : eax(RegI32(js::jit::eax)), ecx(RegI32(js::jit::ecx)), edx(RegI32(js::jit::edx)), edi(RegI32(js::jit::edi)), esi(RegI32(js::jit::esi)), ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))), edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))), abiReturnRegI64(edx_eax) {} }; #elif defined(JS_CODEGEN_ARM) struct SpecificRegs { RegI64 abiReturnRegI64; SpecificRegs() : abiReturnRegI64(ReturnReg64) {} }; #elif defined(JS_CODEGEN_ARM64) struct SpecificRegs {}; #elif defined(JS_CODEGEN_MIPS32) struct SpecificRegs { RegI64 abiReturnRegI64; SpecificRegs() : abiReturnRegI64(ReturnReg64) {} }; #elif defined(JS_CODEGEN_MIPS64) struct SpecificRegs {}; #else struct SpecificRegs { # ifndef JS_64BIT RegI64 abiReturnRegI64; # endif SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); } }; #endif class BaseCompilerInterface { public: // Spill all spillable registers. // // TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by // spilling only enough registers to satisfy current needs. virtual void sync() = 0; virtual void saveTempPtr(RegPtr r) = 0; virtual void restoreTempPtr(RegPtr r) = 0; }; // Register allocator. class BaseRegAlloc { // Notes on float register allocation. // // The general rule in SpiderMonkey is that float registers can alias double // registers, but there are predicates to handle exceptions to that rule: // hasUnaliasedDouble() and hasMultiAlias(). The way aliasing actually // works is platform dependent and exposed through the aliased(n, &r) // predicate, etc. // // - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that // cannot be treated as float. // - hasMultiAlias(): on ARM and MIPS a double register aliases two float // registers. // // On some platforms (x86, x64, ARM64) but not all (ARM) // ScratchFloat32Register is the same as ScratchDoubleRegister. // // It's a basic invariant of the AllocatableRegisterSet that it deals // properly with aliasing of registers: if s0 or s1 are allocated then d0 is // not allocatable; if s0 and s1 are freed individually then d0 becomes // allocatable. // // On platforms with RABALDR_SIDEALLOC_V128, the register set does not // represent SIMD registers. Instead, we allocate and free these registers as // doubles and change the kind to Simd128 while the register is exposed to // masm. (This is the case on ARM64 for now, and is a consequence of needing // more than 64 bits for FloatRegisters::SetType to represent SIMD registers. // See lengty comment in Architecture-arm64.h.) BaseCompilerInterface* bc; AllocatableGeneralRegisterSet availGPR; AllocatableFloatRegisterSet availFPU; #ifdef DEBUG // The registers available after removing ScratchReg, HeapReg, etc. AllocatableGeneralRegisterSet allGPR; AllocatableFloatRegisterSet allFPU; uint32_t scratchTaken; #endif #ifdef JS_CODEGEN_X86 AllocatableGeneralRegisterSet singleByteRegs; #endif bool hasGPR() { return !availGPR.empty(); } bool hasGPR64() { #ifdef JS_PUNBOX64 return !availGPR.empty(); #else if (availGPR.empty()) { return false; } Register r = allocGPR(); bool available = !availGPR.empty(); freeGPR(r); return available; #endif } template bool hasFPU() { #ifdef RABALDR_SIDEALLOC_V128 // Workaround for GCC problem, bug 1677690 if constexpr (t == MIRType::Simd128) { MOZ_CRASH("Should not happen"); } else #endif { return availFPU.hasAny::value>(); } } bool isAvailableGPR(Register r) { return availGPR.has(r); } bool isAvailableFPU(FloatRegister r) { #ifdef RABALDR_SIDEALLOC_V128 MOZ_ASSERT(!r.isSimd128()); #endif return availFPU.has(r); } void allocGPR(Register r) { MOZ_ASSERT(isAvailableGPR(r)); availGPR.take(r); } Register allocGPR() { MOZ_ASSERT(hasGPR()); return availGPR.takeAny(); } void allocInt64(Register64 r) { #ifdef JS_PUNBOX64 allocGPR(r.reg); #else allocGPR(r.low); allocGPR(r.high); #endif } Register64 allocInt64() { MOZ_ASSERT(hasGPR64()); #ifdef JS_PUNBOX64 return Register64(availGPR.takeAny()); #else Register high = availGPR.takeAny(); Register low = availGPR.takeAny(); return Register64(high, low); #endif } #ifdef JS_CODEGEN_ARM // r12 is normally the ScratchRegister and r13 is always the stack pointer, // so the highest possible pair has r10 as the even-numbered register. static constexpr uint32_t PAIR_LIMIT = 10; bool hasGPRPair() { for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) { if (isAvailableGPR(Register::FromCode(i)) && isAvailableGPR(Register::FromCode(i + 1))) { return true; } } return false; } void allocGPRPair(Register* low, Register* high) { MOZ_ASSERT(hasGPRPair()); for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) { if (isAvailableGPR(Register::FromCode(i)) && isAvailableGPR(Register::FromCode(i + 1))) { *low = Register::FromCode(i); *high = Register::FromCode(i + 1); allocGPR(*low); allocGPR(*high); return; } } MOZ_CRASH("No pair"); } #endif void allocFPU(FloatRegister r) { #ifdef RABALDR_SIDEALLOC_V128 MOZ_ASSERT(!r.isSimd128()); #endif MOZ_ASSERT(isAvailableFPU(r)); availFPU.take(r); } template FloatRegister allocFPU() { #ifdef RABALDR_SIDEALLOC_V128 // Workaround for GCC problem, bug 1677690 if constexpr (t == MIRType::Simd128) { MOZ_CRASH("Should not happen"); } else #endif { return availFPU.takeAny::value>(); } } void freeGPR(Register r) { availGPR.add(r); } void freeInt64(Register64 r) { #ifdef JS_PUNBOX64 freeGPR(r.reg); #else freeGPR(r.low); freeGPR(r.high); #endif } void freeFPU(FloatRegister r) { #ifdef RABALDR_SIDEALLOC_V128 MOZ_ASSERT(!r.isSimd128()); #endif availFPU.add(r); } public: explicit BaseRegAlloc() : bc(nullptr), availGPR(GeneralRegisterSet::All()), availFPU(FloatRegisterSet::All()) #ifdef DEBUG , scratchTaken(0) #endif #ifdef JS_CODEGEN_X86 , singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs)) #endif { RegisterAllocator::takeWasmRegisters(availGPR); // Allocate any private scratch registers. #if defined(RABALDR_SCRATCH_I32) if (RabaldrScratchI32 != RegI32::Invalid()) { availGPR.take(RabaldrScratchI32); } #endif #ifdef RABALDR_SCRATCH_F32_ALIASES_F64 static_assert(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition"); static_assert(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition"); #endif #if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64) if (RabaldrScratchF32 != RegF32::Invalid()) { availFPU.take(RabaldrScratchF32); } #endif #if defined(RABALDR_SCRATCH_F64) # ifdef RABALDR_SCRATCH_F32_ALIASES_F64 MOZ_ASSERT(availFPU.has(RabaldrScratchF32)); # endif if (RabaldrScratchF64 != RegF64::Invalid()) { availFPU.take(RabaldrScratchF64); } # ifdef RABALDR_SCRATCH_F32_ALIASES_F64 MOZ_ASSERT(!availFPU.has(RabaldrScratchF32)); # endif #endif #ifdef DEBUG allGPR = availGPR; allFPU = availFPU; #endif } void init(BaseCompilerInterface* bc) { this->bc = bc; } enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4, V128 = 8 }; #ifdef DEBUG bool isScratchRegisterTaken(ScratchKind s) const { return (scratchTaken & uint32_t(s)) != 0; } void setScratchRegisterTaken(ScratchKind s, bool state) { if (state) { scratchTaken |= uint32_t(s); } else { scratchTaken &= ~uint32_t(s); } } #endif #ifdef JS_CODEGEN_X86 bool isSingleByteI32(Register r) { return singleByteRegs.has(r); } #endif bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); } bool isAvailableI64(RegI64 r) { #ifdef JS_PUNBOX64 return isAvailableGPR(r.reg); #else return isAvailableGPR(r.low) && isAvailableGPR(r.high); #endif } bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); } bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); } bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); } #ifdef ENABLE_WASM_SIMD # ifdef RABALDR_SIDEALLOC_V128 bool isAvailableV128(RegV128 r) { return isAvailableFPU(r.asDouble()); } # else bool isAvailableV128(RegV128 r) { return isAvailableFPU(r); } # endif #endif // TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation // failure, only as much as we need. [[nodiscard]] RegI32 needI32() { if (!hasGPR()) { bc->sync(); } return RegI32(allocGPR()); } void needI32(RegI32 specific) { if (!isAvailableI32(specific)) { bc->sync(); } allocGPR(specific); } [[nodiscard]] RegI64 needI64() { if (!hasGPR64()) { bc->sync(); } return RegI64(allocInt64()); } void needI64(RegI64 specific) { if (!isAvailableI64(specific)) { bc->sync(); } allocInt64(specific); } [[nodiscard]] RegPtr needPtr() { if (!hasGPR()) { bc->sync(); } return RegPtr(allocGPR()); } void needPtr(RegPtr specific) { if (!isAvailablePtr(specific)) { bc->sync(); } allocGPR(specific); } // Use when you need a register for a short time but explicitly want to avoid // a full sync(). [[nodiscard]] RegPtr needTempPtr(RegPtr fallback, bool* saved) { if (hasGPR()) { *saved = false; return RegPtr(allocGPR()); } *saved = true; bc->saveTempPtr(fallback); MOZ_ASSERT(isAvailablePtr(fallback)); allocGPR(fallback); return RegPtr(fallback); } [[nodiscard]] RegF32 needF32() { if (!hasFPU()) { bc->sync(); } return RegF32(allocFPU()); } void needF32(RegF32 specific) { if (!isAvailableF32(specific)) { bc->sync(); } allocFPU(specific); } [[nodiscard]] RegF64 needF64() { if (!hasFPU()) { bc->sync(); } return RegF64(allocFPU()); } void needF64(RegF64 specific) { if (!isAvailableF64(specific)) { bc->sync(); } allocFPU(specific); } #ifdef ENABLE_WASM_SIMD [[nodiscard]] RegV128 needV128() { # ifdef RABALDR_SIDEALLOC_V128 if (!hasFPU()) { bc->sync(); } return RegV128::fromDouble(allocFPU()); # else if (!hasFPU()) { bc->sync(); } return RegV128(allocFPU()); # endif } void needV128(RegV128 specific) { # ifdef RABALDR_SIDEALLOC_V128 if (!isAvailableV128(specific)) { bc->sync(); } allocFPU(specific.asDouble()); # else if (!isAvailableV128(specific)) { bc->sync(); } allocFPU(specific); # endif } #endif void freeI32(RegI32 r) { freeGPR(r); } void freeI64(RegI64 r) { freeInt64(r); } void freePtr(RegPtr r) { freeGPR(r); } void freeF64(RegF64 r) { freeFPU(r); } void freeF32(RegF32 r) { freeFPU(r); } #ifdef ENABLE_WASM_SIMD void freeV128(RegV128 r) { # ifdef RABALDR_SIDEALLOC_V128 freeFPU(r.asDouble()); # else freeFPU(r); # endif } #endif void freeTempPtr(RegPtr r, bool saved) { freePtr(r); if (saved) { bc->restoreTempPtr(r); MOZ_ASSERT(!isAvailablePtr(r)); } } #ifdef JS_CODEGEN_ARM [[nodiscard]] RegI64 needI64Pair() { if (!hasGPRPair()) { bc->sync(); } Register low, high; allocGPRPair(&low, &high); return RegI64(Register64(high, low)); } #endif #ifdef DEBUG friend class LeakCheck; class MOZ_RAII LeakCheck { private: const BaseRegAlloc& ra; AllocatableGeneralRegisterSet knownGPR_; AllocatableFloatRegisterSet knownFPU_; public: explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) { knownGPR_ = ra.availGPR; knownFPU_ = ra.availFPU; } ~LeakCheck() { MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits()); MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits()); } void addKnownI32(RegI32 r) { knownGPR_.add(r); } void addKnownI64(RegI64 r) { # ifdef JS_PUNBOX64 knownGPR_.add(r.reg); # else knownGPR_.add(r.high); knownGPR_.add(r.low); # endif } void addKnownF32(RegF32 r) { knownFPU_.add(r); } void addKnownF64(RegF64 r) { knownFPU_.add(r); } # ifdef ENABLE_WASM_SIMD void addKnownV128(RegV128 r) { # ifdef RABALDR_SIDEALLOC_V128 knownFPU_.add(r.asDouble()); # else knownFPU_.add(r); # endif } # endif void addKnownRef(RegPtr r) { knownGPR_.add(r); } }; #endif }; // Scratch register abstractions. // // We define our own scratch registers when the platform doesn't provide what we // need. A notable use case is that we will need a private scratch register // when the platform masm uses its scratch register very frequently (eg, ARM). class BaseScratchRegister { #ifdef DEBUG BaseRegAlloc& ra; BaseRegAlloc::ScratchKind kind_; public: explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind) : ra(ra), kind_(kind) { MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_)); ra.setScratchRegisterTaken(kind_, true); } ~BaseScratchRegister() { MOZ_ASSERT(ra.isScratchRegisterTaken(kind_)); ra.setScratchRegisterTaken(kind_, false); } #else public: explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind) {} #endif }; #ifdef ENABLE_WASM_SIMD # ifdef RABALDR_SCRATCH_V128 class ScratchV128 : public BaseScratchRegister { public: explicit ScratchV128(BaseRegAlloc& ra) : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::V128) {} operator RegV128() const { return RegV128(RabaldrScratchV128); } }; # else class ScratchV128 : public ScratchSimd128Scope { public: explicit ScratchV128(MacroAssembler& m) : ScratchSimd128Scope(m) {} operator RegV128() const { return RegV128(FloatRegister(*this)); } }; # endif #endif #ifdef RABALDR_SCRATCH_F64 class ScratchF64 : public BaseScratchRegister { public: explicit ScratchF64(BaseRegAlloc& ra) : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {} operator RegF64() const { return RegF64(RabaldrScratchF64); } }; #else class ScratchF64 : public ScratchDoubleScope { public: explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {} operator RegF64() const { return RegF64(FloatRegister(*this)); } }; #endif #ifdef RABALDR_SCRATCH_F32 class ScratchF32 : public BaseScratchRegister { public: explicit ScratchF32(BaseRegAlloc& ra) : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {} operator RegF32() const { return RegF32(RabaldrScratchF32); } }; #else class ScratchF32 : public ScratchFloat32Scope { public: explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {} operator RegF32() const { return RegF32(FloatRegister(*this)); } }; #endif #ifdef RABALDR_SCRATCH_I32 template class ScratchGPR : public BaseScratchRegister { public: explicit ScratchGPR(BaseRegAlloc& ra) : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {} operator RegType() const { return RegType(RabaldrScratchI32); } }; #else template class ScratchGPR : public ScratchRegisterScope { public: explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {} operator RegType() const { return RegType(Register(*this)); } }; #endif using ScratchI32 = ScratchGPR; using ScratchPtr = ScratchGPR; #if defined(JS_CODEGEN_X86) // ScratchEBX is a mnemonic device: For some atomic ops we really need EBX, // no other register will do. And we would normally have to allocate that // register using ScratchI32 since normally the scratch register is EBX. // But the whole point of ScratchI32 is to hide that relationship. By using // the ScratchEBX alias, we document that at that point we require the // scratch register to be EBX. using ScratchEBX = ScratchI32; // ScratchI8 is a mnemonic device: For some ops we need a register with a // byte subregister. using ScratchI8 = ScratchI32; #endif // The stack frame. // // The stack frame has four parts ("below" means at lower addresses): // // - the Frame element; // - the Local area, including the DebugFrame element and possibly a spilled // pointer to stack results, if any; allocated below the header with various // forms of alignment; // - the Dynamic area, comprising the temporary storage the compiler uses for // register spilling, allocated below the Local area; // - the Arguments area, comprising memory allocated for outgoing calls, // allocated below the Dynamic area. // // +==============================+ // | Incoming stack arg | // | ... | // ------------- +==============================+ // | Frame (fixed size) | // ------------- +==============================+ <-------------------- FP // ^ | DebugFrame (optional) | ^ ^ ^^ // localSize | Register arg local | | | || // | | ... | | | framePushed // | | Register stack result ptr?| | | || // | | Non-arg local | | | || // | | ... | | | || // | | (padding) | | | || // | | Tls pointer | | | || // | +------------------------------+ | | || // v | (padding) | | v || // ------------- +==============================+ currentStackHeight || // ^ | Dynamic (variable size) | | || // dynamicSize | ... | | || // v | ... | v || // ------------- | (free space, sometimes) | --------- v| // +==============================+ <----- SP not-during calls // | Arguments (sometimes) | | // | ... | v // +==============================+ <----- SP during calls // // The Frame is addressed off the stack pointer. masm.framePushed() is always // correct, and masm.getStackPointer() + masm.framePushed() always addresses the // Frame, with the DebugFrame optionally below it. // // The Local area (including the DebugFrame and, if needed, the spilled value of // the stack results area pointer) is laid out by BaseLocalIter and is allocated // and deallocated by standard prologue and epilogue functions that manipulate // the stack pointer, but it is accessed via BaseStackFrame. // // The Dynamic area is maintained by and accessed via BaseStackFrame. On some // systems (such as ARM64), the Dynamic memory may be allocated in chunks // because the SP needs a specific alignment, and in this case there will // normally be some free space directly above the SP. The stack height does not // include the free space, it reflects the logically used space only. // // The Dynamic area is where space for stack results is allocated when calling // functions that return results on the stack. If a function has stack results, // a pointer to the low address of the stack result area is passed as an // additional argument, according to the usual ABI. See // ABIResultIter::HasStackResults. // // The Arguments area is allocated and deallocated via BaseStackFrame (see // comments later) but is accessed directly off the stack pointer. // BaseLocalIter iterates over a vector of types of locals and provides offsets // from the Frame address for those locals, and associated data. // // The implementation of BaseLocalIter is the property of the BaseStackFrame. // But it is also exposed for eg the debugger to use. BaseLocalIter::BaseLocalIter(const ValTypeVector& locals, const ArgTypeVector& args, bool debugEnabled) : locals_(locals), args_(args), argsIter_(args_), index_(0), nextFrameSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0), frameOffset_(INT32_MAX), stackResultPointerOffset_(INT32_MAX), mirType_(MIRType::Undefined), done_(false) { MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length()); settle(); } int32_t BaseLocalIter::pushLocal(size_t nbytes) { MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16); nextFrameSize_ = AlignBytes(frameSize_, nbytes) + nbytes; return nextFrameSize_; // Locals grow down so capture base address. } void BaseLocalIter::settle() { MOZ_ASSERT(!done_); frameSize_ = nextFrameSize_; if (!argsIter_.done()) { mirType_ = argsIter_.mirType(); MIRType concreteType = mirType_; switch (mirType_) { case MIRType::StackResults: // The pointer to stack results is handled like any other argument: // either addressed in place if it is passed on the stack, or we spill // it in the frame if it's in a register. MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_)); concreteType = MIRType::Pointer; [[fallthrough]]; case MIRType::Int32: case MIRType::Int64: case MIRType::Double: case MIRType::Float32: case MIRType::RefOrNull: #ifdef ENABLE_WASM_SIMD case MIRType::Simd128: #endif if (argsIter_->argInRegister()) { frameOffset_ = pushLocal(MIRTypeToSize(concreteType)); } else { frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame)); } break; default: MOZ_CRASH("Argument type"); } if (mirType_ == MIRType::StackResults) { stackResultPointerOffset_ = frameOffset(); // Advance past the synthetic stack result pointer argument and fall // through to the next case. argsIter_++; frameSize_ = nextFrameSize_; MOZ_ASSERT(argsIter_.done()); } else { return; } } if (index_ < locals_.length()) { switch (locals_[index_].kind()) { case ValType::I32: case ValType::I64: case ValType::F32: case ValType::F64: #ifdef ENABLE_WASM_SIMD case ValType::V128: #endif case ValType::Ref: // TODO/AnyRef-boxing: With boxed immediates and strings, the // debugger must be made aware that AnyRef != Pointer. ASSERT_ANYREF_IS_JSOBJECT; mirType_ = ToMIRType(locals_[index_]); frameOffset_ = pushLocal(MIRTypeToSize(mirType_)); break; default: MOZ_CRASH("Compiler bug: Unexpected local type"); } return; } done_ = true; } void BaseLocalIter::operator++(int) { MOZ_ASSERT(!done_); index_++; if (!argsIter_.done()) { argsIter_++; } settle(); } // Abstraction of the height of the stack frame, to avoid type confusion. class StackHeight { friend class BaseStackFrameAllocator; uint32_t height; public: explicit StackHeight(uint32_t h) : height(h) {} static StackHeight Invalid() { return StackHeight(UINT32_MAX); } bool isValid() const { return height != UINT32_MAX; } bool operator==(StackHeight rhs) const { MOZ_ASSERT(isValid() && rhs.isValid()); return height == rhs.height; } bool operator!=(StackHeight rhs) const { return !(*this == rhs); } }; // Abstraction for where multi-value results go on the machine stack. class StackResultsLoc { uint32_t bytes_; size_t count_; Maybe height_; public: StackResultsLoc() : bytes_(0), count_(0){}; StackResultsLoc(uint32_t bytes, size_t count, uint32_t height) : bytes_(bytes), count_(count), height_(Some(height)) { MOZ_ASSERT(bytes != 0); MOZ_ASSERT(count != 0); MOZ_ASSERT(height != 0); } uint32_t bytes() const { return bytes_; } uint32_t count() const { return count_; } uint32_t height() const { return height_.value(); } bool hasStackResults() const { return bytes() != 0; } StackResults stackResults() const { return hasStackResults() ? StackResults::HasStackResults : StackResults::NoStackResults; } }; // Abstraction of the baseline compiler's stack frame (except for the Frame / // DebugFrame parts). See comments above for more. Remember, "below" on the // stack means at lower addresses. // // The abstraction is split into two parts: BaseStackFrameAllocator is // responsible for allocating and deallocating space on the stack and for // performing computations that are affected by how the allocation is performed; // BaseStackFrame then provides a pleasant interface for stack frame management. class BaseStackFrameAllocator { MacroAssembler& masm; #ifdef RABALDR_CHUNKY_STACK // On platforms that require the stack pointer to be aligned on a boundary // greater than the typical stack item (eg, ARM64 requires 16-byte alignment // but items are 8 bytes), allocate stack memory in chunks, and use a // separate stack height variable to track the effective stack pointer // within the allocated area. Effectively, there's a variable amount of // free space directly above the stack pointer. See diagram above. // The following must be true in order for the stack height to be // predictable at control flow joins: // // - The Local area is always aligned according to WasmStackAlignment, ie, // masm.framePushed() % WasmStackAlignment is zero after allocating // locals. // // - ChunkSize is always a multiple of WasmStackAlignment. // // - Pushing and popping are always in units of ChunkSize (hence preserving // alignment). // // - The free space on the stack (masm.framePushed() - currentStackHeight_) // is a predictable (nonnegative) amount. // As an optimization, we pre-allocate some space on the stack, the size of // this allocation is InitialChunk and it must be a multiple of ChunkSize. // It is allocated as part of the function prologue and deallocated as part // of the epilogue, along with the locals. // // If ChunkSize is too large then we risk overflowing the stack on simple // recursions with few live values where stack overflow should not be a // risk; if it is too small we spend too much time adjusting the stack // pointer. // // Good values for ChunkSize are the subject of future empirical analysis; // eight words is just an educated guess. static constexpr uint32_t ChunkSize = 8 * sizeof(void*); static constexpr uint32_t InitialChunk = ChunkSize; // The current logical height of the frame is // currentStackHeight_ = localSize_ + dynamicSize // where dynamicSize is not accounted for explicitly and localSize_ also // includes size for the DebugFrame. // // The allocated size of the frame, provided by masm.framePushed(), is usually // larger than currentStackHeight_, notably at the beginning of execution when // we've allocated InitialChunk extra space. uint32_t currentStackHeight_; #endif // Size of the Local area in bytes (stable after BaseCompiler::init() has // called BaseStackFrame::setupLocals(), which in turn calls // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper // stack alignment. The Local area is then allocated in beginFunction(), // following the allocation of the Header. See onFixedStackAllocated() // below. uint32_t localSize_; protected: /////////////////////////////////////////////////////////////////////////// // // Initialization explicit BaseStackFrameAllocator(MacroAssembler& masm) : masm(masm), #ifdef RABALDR_CHUNKY_STACK currentStackHeight_(0), #endif localSize_(UINT32_MAX) { } protected: ////////////////////////////////////////////////////////////////////// // // The Local area - the static part of the frame. // Record the size of the Local area, once it is known. void setLocalSize(uint32_t localSize) { MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)), "localSize_ should be aligned to at least a pointer"); MOZ_ASSERT(localSize_ == UINT32_MAX); localSize_ = localSize; } // Record the current stack height, after it has become stable in // beginFunction(). See also BaseStackFrame::onFixedStackAllocated(). void onFixedStackAllocated() { MOZ_ASSERT(localSize_ != UINT32_MAX); #ifdef RABALDR_CHUNKY_STACK currentStackHeight_ = localSize_; #endif } public: // The fixed amount of memory, in bytes, allocated on the stack below the // Header for purposes such as locals and other fixed values. Includes all // necessary alignment, and on ARM64 also the initial chunk for the working // stack memory. uint32_t fixedAllocSize() const { MOZ_ASSERT(localSize_ != UINT32_MAX); #ifdef RABALDR_CHUNKY_STACK return localSize_ + InitialChunk; #else return localSize_; #endif } #ifdef RABALDR_CHUNKY_STACK // The allocated frame size is frequently larger than the logical stack // height; we round up to a chunk boundary, and special case the initial // chunk. uint32_t framePushedForHeight(uint32_t logicalHeight) { if (logicalHeight <= fixedAllocSize()) { return fixedAllocSize(); } return fixedAllocSize() + AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize); } #endif protected: ////////////////////////////////////////////////////////////////////// // // The Dynamic area - the dynamic part of the frame, for spilling and saving // intermediate values. // Offset off of sp_ for the slot at stack area location `offset`. int32_t stackOffset(int32_t offset) { MOZ_ASSERT(offset > 0); return masm.framePushed() - offset; } uint32_t computeHeightWithStackResults(StackHeight stackBase, uint32_t stackResultBytes) { MOZ_ASSERT(stackResultBytes); MOZ_ASSERT(currentStackHeight() >= stackBase.height); return stackBase.height + stackResultBytes; } #ifdef RABALDR_CHUNKY_STACK void pushChunkyBytes(uint32_t bytes) { checkChunkyInvariants(); uint32_t freeSpace = masm.framePushed() - currentStackHeight_; if (freeSpace < bytes) { uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize); MOZ_ASSERT(bytesToReserve + freeSpace >= bytes); masm.reserveStack(bytesToReserve); } currentStackHeight_ += bytes; checkChunkyInvariants(); } void popChunkyBytes(uint32_t bytes) { checkChunkyInvariants(); currentStackHeight_ -= bytes; // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop // values consumed by a call, and we may need to drop several chunks. But // never drop the initial chunk. Crucially, the amount we drop is always an // integral number of chunks. uint32_t freeSpace = masm.framePushed() - currentStackHeight_; if (freeSpace >= ChunkSize) { uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_); uint32_t amountToFree = masm.framePushed() - targetAllocSize; MOZ_ASSERT(amountToFree % ChunkSize == 0); if (amountToFree) { masm.freeStack(amountToFree); } } checkChunkyInvariants(); } #endif uint32_t currentStackHeight() const { #ifdef RABALDR_CHUNKY_STACK return currentStackHeight_; #else return masm.framePushed(); #endif } private: #ifdef RABALDR_CHUNKY_STACK void checkChunkyInvariants() { MOZ_ASSERT(masm.framePushed() >= fixedAllocSize()); MOZ_ASSERT(masm.framePushed() >= currentStackHeight_); MOZ_ASSERT(masm.framePushed() == fixedAllocSize() || masm.framePushed() - currentStackHeight_ < ChunkSize); MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0); } #endif // For a given stack height, return the appropriate size of the allocated // frame. uint32_t framePushedForHeight(StackHeight stackHeight) { #ifdef RABALDR_CHUNKY_STACK // A more complicated adjustment is needed. return framePushedForHeight(stackHeight.height); #else // The allocated frame size equals the stack height. return stackHeight.height; #endif } public: // The current height of the stack area, not necessarily zero-based, in a // type-safe way. StackHeight stackHeight() const { return StackHeight(currentStackHeight()); } // Set the frame height to a previously recorded value. void setStackHeight(StackHeight amount) { #ifdef RABALDR_CHUNKY_STACK currentStackHeight_ = amount.height; masm.setFramePushed(framePushedForHeight(amount)); checkChunkyInvariants(); #else masm.setFramePushed(amount.height); #endif } // The current height of the dynamic part of the stack area (ie, the backing // store for the evaluation stack), zero-based. uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; } // Before branching to an outer control label, pop the execution stack to // the level expected by that region, but do not update masm.framePushed() // as that will happen as compilation leaves the block. // // Note these operate directly on the stack pointer register. void popStackBeforeBranch(StackHeight destStackHeight, uint32_t stackResultBytes) { uint32_t framePushedHere = masm.framePushed(); StackHeight heightThere = StackHeight(destStackHeight.height + stackResultBytes); uint32_t framePushedThere = framePushedForHeight(heightThere); if (framePushedHere > framePushedThere) { masm.addToStackPtr(Imm32(framePushedHere - framePushedThere)); } } void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) { popStackBeforeBranch(destStackHeight, ABIResultIter::MeasureStackBytes(type)); } // Given that there are |stackParamSize| bytes on the dynamic stack // corresponding to the stack results, return the stack height once these // parameters are popped. StackHeight stackResultsBase(uint32_t stackParamSize) { return StackHeight(currentStackHeight() - stackParamSize); } // For most of WebAssembly, adjacent instructions have fallthrough control // flow between them, which allows us to simply thread the current stack // height through the compiler. There are two exceptions to this rule: when // leaving a block via dead code, and when entering the "else" arm of an "if". // In these cases, the stack height is the block entry height, plus any stack // values (results in the block exit case, parameters in the else entry case). void resetStackHeight(StackHeight destStackHeight, ResultType type) { uint32_t height = destStackHeight.height; height += ABIResultIter::MeasureStackBytes(type); setStackHeight(StackHeight(height)); } // Return offset of stack result. uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase, uint32_t stackResultBytes) { MOZ_ASSERT(result.onStack()); MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes); uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); return end - result.stackOffset(); } public: ////////////////////////////////////////////////////////////////////// // // The Argument area - for outgoing calls. // // We abstract these operations as an optimization: we can merge the freeing // of the argument area and dropping values off the stack after a call. But // they always amount to manipulating the real stack pointer by some amount. // // Note that we do not update currentStackHeight_ for this; the frame does // not know about outgoing arguments. But we do update framePushed(), so we // can still index into the frame below the outgoing arguments area. // This is always equivalent to a masm.reserveStack() call. void allocArgArea(size_t argSize) { if (argSize) { masm.reserveStack(argSize); } } // This frees the argument area allocated by allocArgArea(), and `argSize` // must be equal to the `argSize` argument to allocArgArea(). In addition // we drop some values from the frame, corresponding to the values that were // consumed by the call. void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) { #ifdef RABALDR_CHUNKY_STACK // Freeing the outgoing arguments and freeing the consumed values have // different semantics here, which is why the operation is split. if (argSize) { masm.freeStack(argSize); } popChunkyBytes(dropSize); #else if (argSize + dropSize) { masm.freeStack(argSize + dropSize); } #endif } }; class BaseStackFrame final : public BaseStackFrameAllocator { MacroAssembler& masm; // The largest observed value of masm.framePushed(), ie, the size of the // stack frame. Read this for its true value only when code generation is // finished. uint32_t maxFramePushed_; // Patch point where we check for stack overflow. CodeOffset stackAddOffset_; // Low byte offset of pointer to stack results, if any. Maybe stackResultsPtrOffset_; // The offset of TLS pointer. uint32_t tlsPointerOffset_; // Low byte offset of local area for true locals (not parameters). uint32_t varLow_; // High byte offset + 1 of local area for true locals. uint32_t varHigh_; // The stack pointer, cached for brevity. RegisterOrSP sp_; public: explicit BaseStackFrame(MacroAssembler& masm) : BaseStackFrameAllocator(masm), masm(masm), maxFramePushed_(0), stackAddOffset_(0), tlsPointerOffset_(UINT32_MAX), varLow_(UINT32_MAX), varHigh_(UINT32_MAX), sp_(masm.getStackPointer()) {} /////////////////////////////////////////////////////////////////////////// // // Stack management and overflow checking // This must be called once beginFunction has allocated space for the Header // (the Frame and DebugFrame) and the Local area, and will record the current // frame size for internal use by the stack abstractions. void onFixedStackAllocated() { maxFramePushed_ = masm.framePushed(); BaseStackFrameAllocator::onFixedStackAllocated(); } // We won't know until after we've generated code how big the frame will be // (we may need arbitrary spill slots and outgoing param slots) so emit a // patchable add that is patched in endFunction(). // // Note the platform scratch register may be used by branchPtr(), so // generally tmp must be something else. void checkStack(Register tmp, BytecodeOffset trapOffset) { stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp); Label ok; masm.branchPtr(Assembler::Below, Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)), tmp, &ok); masm.wasmTrap(Trap::StackOverflow, trapOffset); masm.bind(&ok); } void patchCheckStack() { masm.patchSub32FromStackPtr(stackAddOffset_, Imm32(int32_t(maxFramePushed_))); } // Very large frames are implausible, probably an attack. bool checkStackHeight() { // 512KiB should be enough, considering how Rabaldr uses the stack and // what the standard limits are: // // - 1,000 parameters // - 50,000 locals // - 10,000 values on the eval stack (not an official limit) // // At sizeof(int64) bytes per slot this works out to about 480KiB. return maxFramePushed_ <= 512 * 1024; } /////////////////////////////////////////////////////////////////////////// // // Local area struct Local { // Type of the value. const MIRType type; // Byte offset from Frame "into" the locals, ie positive for true locals // and negative for incoming args that read directly from the arg area. // It assumes the stack is growing down and that locals are on the stack // at lower addresses than Frame, and is the offset from Frame of the // lowest-addressed byte of the local. const int32_t offs; Local(MIRType type, int32_t offs) : type(type), offs(offs) {} bool isStackArgument() const { return offs < 0; } }; // Profiling shows that the number of parameters and locals frequently // touches or exceeds 8. So 16 seems like a reasonable starting point. using LocalVector = Vector; // Initialize `localInfo` based on the types of `locals` and `args`. MOZ_MUST_USE bool setupLocals(const ValTypeVector& locals, const ArgTypeVector& args, bool debugEnabled, LocalVector* localInfo) { if (!localInfo->reserve(locals.length())) { return false; } DebugOnly index = 0; BaseLocalIter i(locals, args, debugEnabled); for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) { MOZ_ASSERT(i.isArg()); MOZ_ASSERT(i.index() == index); localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset()); index++; } varLow_ = i.frameSize(); for (; !i.done(); i++) { MOZ_ASSERT(!i.isArg()); MOZ_ASSERT(i.index() == index); localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset()); index++; } varHigh_ = i.frameSize(); // Reserve an additional stack slot for the TLS pointer. const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*)); const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*); tlsPointerOffset_ = localSize; setLocalSize(AlignBytes(localSize, WasmStackAlignment)); if (args.hasSyntheticStackResultPointerArg()) { stackResultsPtrOffset_ = Some(i.stackResultPointerOffset()); } return true; } void zeroLocals(BaseRegAlloc* ra); Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) { if (local.isStackArgument()) { return Address(FramePointer, stackArgumentOffsetFromFp(local) + additionalOffset); } return Address(sp_, localOffsetFromSp(local) + additionalOffset); } void loadLocalI32(const Local& src, RegI32 dest) { masm.load32(addressOfLocal(src), dest); } #ifndef JS_PUNBOX64 void loadLocalI64Low(const Local& src, RegI32 dest) { masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest); } void loadLocalI64High(const Local& src, RegI32 dest) { masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest); } #endif void loadLocalI64(const Local& src, RegI64 dest) { masm.load64(addressOfLocal(src), dest); } void loadLocalPtr(const Local& src, RegPtr dest) { masm.loadPtr(addressOfLocal(src), dest); } void loadLocalF64(const Local& src, RegF64 dest) { masm.loadDouble(addressOfLocal(src), dest); } void loadLocalF32(const Local& src, RegF32 dest) { masm.loadFloat32(addressOfLocal(src), dest); } #ifdef ENABLE_WASM_SIMD void loadLocalV128(const Local& src, RegV128 dest) { masm.loadUnalignedSimd128(addressOfLocal(src), dest); } #endif void storeLocalI32(RegI32 src, const Local& dest) { masm.store32(src, addressOfLocal(dest)); } void storeLocalI64(RegI64 src, const Local& dest) { masm.store64(src, addressOfLocal(dest)); } void storeLocalPtr(Register src, const Local& dest) { masm.storePtr(src, addressOfLocal(dest)); } void storeLocalF64(RegF64 src, const Local& dest) { masm.storeDouble(src, addressOfLocal(dest)); } void storeLocalF32(RegF32 src, const Local& dest) { masm.storeFloat32(src, addressOfLocal(dest)); } #ifdef ENABLE_WASM_SIMD void storeLocalV128(RegV128 src, const Local& dest) { masm.storeUnalignedSimd128(src, addressOfLocal(dest)); } #endif // Offset off of sp_ for `local`. int32_t localOffsetFromSp(const Local& local) { MOZ_ASSERT(!local.isStackArgument()); return localOffset(local.offs); } // Offset off of frame pointer for `stack argument`. int32_t stackArgumentOffsetFromFp(const Local& local) { MOZ_ASSERT(local.isStackArgument()); return -local.offs; } // The incoming stack result area pointer is for stack results of the function // being compiled. void loadIncomingStackResultAreaPtr(RegPtr reg) { const int32_t offset = stackResultsPtrOffset_.value(); Address src = offset < 0 ? Address(FramePointer, -offset) : Address(sp_, stackOffset(offset)); masm.loadPtr(src, reg); } void storeIncomingStackResultAreaPtr(RegPtr reg) { // If we get here, that means the pointer to the stack results area was // passed in as a register, and therefore it will be spilled below the // frame, so the offset is a positive height. MOZ_ASSERT(stackResultsPtrOffset_.value() > 0); masm.storePtr(reg, Address(sp_, stackOffset(stackResultsPtrOffset_.value()))); } void loadTlsPtr(Register dst) { masm.loadPtr(Address(sp_, stackOffset(tlsPointerOffset_)), dst); } void storeTlsPtr(Register tls) { masm.storePtr(tls, Address(sp_, stackOffset(tlsPointerOffset_))); } int32_t getTlsPtrOffset() { return stackOffset(tlsPointerOffset_); } // An outgoing stack result area pointer is for stack results of callees of // the function being compiled. void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results, RegPtr dest) { MOZ_ASSERT(results.height() <= masm.framePushed()); uint32_t offsetFromSP = masm.framePushed() - results.height(); masm.moveStackPtrTo(dest); if (offsetFromSP) { masm.addPtr(Imm32(offsetFromSP), dest); } } private: // Offset off of sp_ for a local with offset `offset` from Frame. int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; } public: /////////////////////////////////////////////////////////////////////////// // // Dynamic area static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr; static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64; static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat; static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble; #ifdef ENABLE_WASM_SIMD static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128; #endif uint32_t pushPtr(Register r) { DebugOnly stackBefore = currentStackHeight(); #ifdef RABALDR_CHUNKY_STACK pushChunkyBytes(StackSizeOfPtr); masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight()))); #else masm.Push(r); #endif maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight()); return currentStackHeight(); } uint32_t pushFloat32(FloatRegister r) { DebugOnly stackBefore = currentStackHeight(); #ifdef RABALDR_CHUNKY_STACK pushChunkyBytes(StackSizeOfFloat); masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight()))); #else masm.Push(r); #endif maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight()); return currentStackHeight(); } #ifdef ENABLE_WASM_SIMD uint32_t pushV128(RegV128 r) { DebugOnly stackBefore = currentStackHeight(); # ifdef RABALDR_CHUNKY_STACK pushChunkyBytes(StackSizeOfV128); # else masm.adjustStack(-(int)StackSizeOfV128); # endif masm.storeUnalignedSimd128(r, Address(sp_, stackOffset(currentStackHeight()))); maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight()); return currentStackHeight(); } #endif uint32_t pushDouble(FloatRegister r) { DebugOnly stackBefore = currentStackHeight(); #ifdef RABALDR_CHUNKY_STACK pushChunkyBytes(StackSizeOfDouble); masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight()))); #else masm.Push(r); #endif maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight()); return currentStackHeight(); } void popPtr(Register r) { DebugOnly stackBefore = currentStackHeight(); #ifdef RABALDR_CHUNKY_STACK masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r); popChunkyBytes(StackSizeOfPtr); #else masm.Pop(r); #endif MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight()); } void popFloat32(FloatRegister r) { DebugOnly stackBefore = currentStackHeight(); #ifdef RABALDR_CHUNKY_STACK masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r); popChunkyBytes(StackSizeOfFloat); #else masm.Pop(r); #endif MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight()); } void popDouble(FloatRegister r) { DebugOnly stackBefore = currentStackHeight(); #ifdef RABALDR_CHUNKY_STACK masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r); popChunkyBytes(StackSizeOfDouble); #else masm.Pop(r); #endif MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight()); } #ifdef ENABLE_WASM_SIMD void popV128(RegV128 r) { DebugOnly stackBefore = currentStackHeight(); masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())), r); # ifdef RABALDR_CHUNKY_STACK popChunkyBytes(StackSizeOfV128); # else masm.adjustStack((int)StackSizeOfV128); # endif MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight()); } #endif void popBytes(size_t bytes) { if (bytes > 0) { #ifdef RABALDR_CHUNKY_STACK popChunkyBytes(bytes); #else masm.freeStack(bytes); #endif } } void loadStackI32(int32_t offset, RegI32 dest) { masm.load32(Address(sp_, stackOffset(offset)), dest); } void loadStackI64(int32_t offset, RegI64 dest) { masm.load64(Address(sp_, stackOffset(offset)), dest); } #ifndef JS_PUNBOX64 void loadStackI64Low(int32_t offset, RegI32 dest) { masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest); } void loadStackI64High(int32_t offset, RegI32 dest) { masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest); } #endif // Disambiguation: this loads a "Ptr" value from the stack, it does not load // the "StackPtr". void loadStackPtr(int32_t offset, RegPtr dest) { masm.loadPtr(Address(sp_, stackOffset(offset)), dest); } void loadStackF64(int32_t offset, RegF64 dest) { masm.loadDouble(Address(sp_, stackOffset(offset)), dest); } void loadStackF32(int32_t offset, RegF32 dest) { masm.loadFloat32(Address(sp_, stackOffset(offset)), dest); } #ifdef ENABLE_WASM_SIMD void loadStackV128(int32_t offset, RegV128 dest) { masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest); } #endif uint32_t prepareStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) { uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); if (currentStackHeight() < end) { uint32_t bytes = end - currentStackHeight(); #ifdef RABALDR_CHUNKY_STACK pushChunkyBytes(bytes); #else masm.reserveStack(bytes); #endif maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); } return end; } void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) { uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); MOZ_ASSERT(currentStackHeight() >= end); popBytes(currentStackHeight() - end); } // |srcHeight| and |destHeight| are stack heights *including* |bytes|. void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight, uint32_t bytes, Register temp) { MOZ_ASSERT(destHeight < srcHeight); MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); uint32_t destOffset = stackOffset(destHeight) + bytes; uint32_t srcOffset = stackOffset(srcHeight) + bytes; while (bytes >= sizeof(intptr_t)) { destOffset -= sizeof(intptr_t); srcOffset -= sizeof(intptr_t); bytes -= sizeof(intptr_t); masm.loadPtr(Address(sp_, srcOffset), temp); masm.storePtr(temp, Address(sp_, destOffset)); } if (bytes) { MOZ_ASSERT(bytes == sizeof(uint32_t)); destOffset -= sizeof(uint32_t); srcOffset -= sizeof(uint32_t); masm.load32(Address(sp_, srcOffset), temp); masm.store32(temp, Address(sp_, destOffset)); } } // Unlike the overload that operates on raw heights, |srcHeight| and // |destHeight| are stack heights *not including* |bytes|. void shuffleStackResultsTowardFP(StackHeight srcHeight, StackHeight destHeight, uint32_t bytes, Register temp) { MOZ_ASSERT(srcHeight.isValid()); MOZ_ASSERT(destHeight.isValid()); uint32_t src = computeHeightWithStackResults(srcHeight, bytes); uint32_t dest = computeHeightWithStackResults(destHeight, bytes); MOZ_ASSERT(src <= currentStackHeight()); MOZ_ASSERT(dest <= currentStackHeight()); shuffleStackResultsTowardFP(src, dest, bytes, temp); } // |srcHeight| and |destHeight| are stack heights *including* |bytes|. void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight, uint32_t bytes, Register temp) { MOZ_ASSERT(destHeight > srcHeight); MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); uint32_t destOffset = stackOffset(destHeight); uint32_t srcOffset = stackOffset(srcHeight); while (bytes >= sizeof(intptr_t)) { masm.loadPtr(Address(sp_, srcOffset), temp); masm.storePtr(temp, Address(sp_, destOffset)); destOffset += sizeof(intptr_t); srcOffset += sizeof(intptr_t); bytes -= sizeof(intptr_t); } if (bytes) { MOZ_ASSERT(bytes == sizeof(uint32_t)); masm.load32(Address(sp_, srcOffset), temp); masm.store32(temp, Address(sp_, destOffset)); } } // Copy results from the top of the current stack frame to an area of memory, // and pop the stack accordingly. `dest` is the address of the low byte of // that memory. void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) { MOZ_ASSERT(bytes <= currentStackHeight()); MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); uint32_t bytesToPop = bytes; uint32_t srcOffset = stackOffset(currentStackHeight()); uint32_t destOffset = 0; while (bytes >= sizeof(intptr_t)) { masm.loadPtr(Address(sp_, srcOffset), temp); masm.storePtr(temp, Address(dest, destOffset)); destOffset += sizeof(intptr_t); srcOffset += sizeof(intptr_t); bytes -= sizeof(intptr_t); } if (bytes) { MOZ_ASSERT(bytes == sizeof(uint32_t)); masm.load32(Address(sp_, srcOffset), temp); masm.store32(temp, Address(dest, destOffset)); } popBytes(bytesToPop); } private: void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) { masm.move32(Imm32(imm), temp); masm.store32(temp, Address(sp_, stackOffset(destHeight))); } void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) { #ifdef JS_PUNBOX64 masm.move64(Imm64(imm), Register64(temp)); masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight))); #else union { int64_t i64; int32_t i32[2]; } bits = {.i64 = imm}; static_assert(sizeof(bits) == 8); store32BitsToStack(bits.i32[0], destHeight, temp); store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp); #endif } public: void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight, Register temp) { #ifdef JS_PUNBOX64 static_assert(StackSizeOfPtr == 8); store64BitsToStack(imm, destHeight, temp); #else static_assert(StackSizeOfPtr == 4); store32BitsToStack(int32_t(imm), destHeight, temp); #endif } void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight, Register temp) { store64BitsToStack(imm, destHeight, temp); } void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) { union { int32_t i32; float f32; } bits = {.f32 = imm}; static_assert(sizeof(bits) == 4); // Do not store 4 bytes if StackSizeOfFloat == 8. It's probably OK to do // so, but it costs little to store something predictable. if (StackSizeOfFloat == 4) { store32BitsToStack(bits.i32, destHeight, temp); } else { store64BitsToStack(uint32_t(bits.i32), destHeight, temp); } } void storeImmediateF64ToStack(double imm, uint32_t destHeight, Register temp) { union { int64_t i64; double f64; } bits = {.f64 = imm}; static_assert(sizeof(bits) == 8); store64BitsToStack(bits.i64, destHeight, temp); } #ifdef ENABLE_WASM_SIMD void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) { union { int32_t i32[4]; uint8_t bytes[16]; } bits; static_assert(sizeof(bits) == 16); memcpy(bits.bytes, imm.bytes, 16); for (unsigned i = 0; i < 4; i++) { store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp); } } #endif }; void BaseStackFrame::zeroLocals(BaseRegAlloc* ra) { MOZ_ASSERT(varLow_ != UINT32_MAX); if (varLow_ == varHigh_) { return; } static const uint32_t wordSize = sizeof(void*); // The adjustments to 'low' by the size of the item being stored compensates // for the fact that locals offsets are the offsets from Frame to the bytes // directly "above" the locals in the locals area. See comment at Local. // On 64-bit systems we may have 32-bit alignment for the local area as it // may be preceded by parameters and prologue/debug data. uint32_t low = varLow_; if (low % wordSize) { masm.store32(Imm32(0), Address(sp_, localOffset(low + 4))); low += 4; } MOZ_ASSERT(low % wordSize == 0); const uint32_t high = AlignBytes(varHigh_, wordSize); // An UNROLL_LIMIT of 16 is chosen so that we only need an 8-bit signed // immediate to represent the offset in the store instructions in the loop // on x64. const uint32_t UNROLL_LIMIT = 16; const uint32_t initWords = (high - low) / wordSize; const uint32_t tailWords = initWords % UNROLL_LIMIT; const uint32_t loopHigh = high - (tailWords * wordSize); // With only one word to initialize, just store an immediate zero. if (initWords == 1) { masm.storePtr(ImmWord(0), Address(sp_, localOffset(low + wordSize))); return; } // For other cases, it's best to have a zero in a register. // // One can do more here with SIMD registers (store 16 bytes at a time) or // with instructions like STRD on ARM (store 8 bytes at a time), but that's // for another day. RegI32 zero = ra->needI32(); masm.mov(ImmWord(0), zero); // For the general case we want to have a loop body of UNROLL_LIMIT stores // and then a tail of less than UNROLL_LIMIT stores. When initWords is less // than 2*UNROLL_LIMIT the loop trip count is at most 1 and there is no // benefit to having the pointer calculations and the compare-and-branch. // So we completely unroll when we have initWords < 2 * UNROLL_LIMIT. (In // this case we'll end up using 32-bit offsets on x64 for up to half of the // stores, though.) // Fully-unrolled case. if (initWords < 2 * UNROLL_LIMIT) { for (uint32_t i = low; i < high; i += wordSize) { masm.storePtr(zero, Address(sp_, localOffset(i + wordSize))); } ra->freeI32(zero); return; } // Unrolled loop with a tail. Stores will use negative offsets. That's OK // for x86 and ARM, at least. // Compute pointer to the highest-addressed slot on the frame. RegI32 p = ra->needI32(); masm.computeEffectiveAddress(Address(sp_, localOffset(low + wordSize)), p); // Compute pointer to the lowest-addressed slot on the frame that will be // initialized by the loop body. RegI32 lim = ra->needI32(); masm.computeEffectiveAddress(Address(sp_, localOffset(loopHigh + wordSize)), lim); // The loop body. Eventually we'll have p == lim and exit the loop. Label again; masm.bind(&again); for (uint32_t i = 0; i < UNROLL_LIMIT; ++i) { masm.storePtr(zero, Address(p, -(wordSize * i))); } masm.subPtr(Imm32(UNROLL_LIMIT * wordSize), p); masm.branchPtr(Assembler::LessThan, lim, p, &again); // The tail. for (uint32_t i = 0; i < tailWords; ++i) { masm.storePtr(zero, Address(p, -(wordSize * i))); } ra->freeI32(p); ra->freeI32(lim); ra->freeI32(zero); } // Value stack: stack elements struct Stk { private: Stk() : kind_(Unknown), i64val_(0) {} public: enum Kind { // The Mem opcodes are all clustered at the beginning to // allow for a quick test within sync(). MemI32, // 32-bit integer stack value ("offs") MemI64, // 64-bit integer stack value ("offs") MemF32, // 32-bit floating stack value ("offs") MemF64, // 64-bit floating stack value ("offs") #ifdef ENABLE_WASM_SIMD MemV128, // 128-bit vector stack value ("offs") #endif MemRef, // reftype (pointer wide) stack value ("offs") // The Local opcodes follow the Mem opcodes for a similar // quick test within hasLocal(). LocalI32, // Local int32 var ("slot") LocalI64, // Local int64 var ("slot") LocalF32, // Local float32 var ("slot") LocalF64, // Local double var ("slot") #ifdef ENABLE_WASM_SIMD LocalV128, // Local v128 var ("slot") #endif LocalRef, // Local reftype (pointer wide) var ("slot") RegisterI32, // 32-bit integer register ("i32reg") RegisterI64, // 64-bit integer register ("i64reg") RegisterF32, // 32-bit floating register ("f32reg") RegisterF64, // 64-bit floating register ("f64reg") #ifdef ENABLE_WASM_SIMD RegisterV128, // 128-bit vector register ("v128reg") #endif RegisterRef, // reftype (pointer wide) register ("refReg") ConstI32, // 32-bit integer constant ("i32val") ConstI64, // 64-bit integer constant ("i64val") ConstF32, // 32-bit floating constant ("f32val") ConstF64, // 64-bit floating constant ("f64val") #ifdef ENABLE_WASM_SIMD ConstV128, // 128-bit vector constant ("v128val") #endif ConstRef, // reftype (pointer wide) constant ("refval") Unknown, }; Kind kind_; static const Kind MemLast = MemRef; static const Kind LocalLast = LocalRef; union { RegI32 i32reg_; RegI64 i64reg_; RegPtr refReg_; RegF32 f32reg_; RegF64 f64reg_; #ifdef ENABLE_WASM_SIMD RegV128 v128reg_; #endif int32_t i32val_; int64_t i64val_; intptr_t refval_; float f32val_; double f64val_; #ifdef ENABLE_WASM_SIMD V128 v128val_; #endif uint32_t slot_; uint32_t offs_; }; explicit Stk(RegI32 r) : kind_(RegisterI32), i32reg_(r) {} explicit Stk(RegI64 r) : kind_(RegisterI64), i64reg_(r) {} explicit Stk(RegPtr r) : kind_(RegisterRef), refReg_(r) {} explicit Stk(RegF32 r) : kind_(RegisterF32), f32reg_(r) {} explicit Stk(RegF64 r) : kind_(RegisterF64), f64reg_(r) {} #ifdef ENABLE_WASM_SIMD explicit Stk(RegV128 r) : kind_(RegisterV128), v128reg_(r) {} #endif explicit Stk(int32_t v) : kind_(ConstI32), i32val_(v) {} explicit Stk(int64_t v) : kind_(ConstI64), i64val_(v) {} explicit Stk(float v) : kind_(ConstF32), f32val_(v) {} explicit Stk(double v) : kind_(ConstF64), f64val_(v) {} #ifdef ENABLE_WASM_SIMD explicit Stk(V128 v) : kind_(ConstV128), v128val_(v) {} #endif explicit Stk(Kind k, uint32_t v) : kind_(k), slot_(v) { MOZ_ASSERT(k > MemLast && k <= LocalLast); } static Stk StkRef(intptr_t v) { Stk s; s.kind_ = ConstRef; s.refval_ = v; return s; } static Stk StackResult(ValType type, uint32_t offs) { Kind k; switch (type.kind()) { case ValType::I32: k = Stk::MemI32; break; case ValType::I64: k = Stk::MemI64; break; case ValType::V128: #ifdef ENABLE_WASM_SIMD k = Stk::MemV128; break; #else MOZ_CRASH("No SIMD"); #endif case ValType::F32: k = Stk::MemF32; break; case ValType::F64: k = Stk::MemF64; break; case ValType::Ref: k = Stk::MemRef; break; } Stk s; s.setOffs(k, offs); return s; } void setOffs(Kind k, uint32_t v) { MOZ_ASSERT(k <= MemLast); kind_ = k; offs_ = v; } Kind kind() const { return kind_; } bool isMem() const { return kind_ <= MemLast; } RegI32 i32reg() const { MOZ_ASSERT(kind_ == RegisterI32); return i32reg_; } RegI64 i64reg() const { MOZ_ASSERT(kind_ == RegisterI64); return i64reg_; } RegPtr refReg() const { MOZ_ASSERT(kind_ == RegisterRef); return refReg_; } RegF32 f32reg() const { MOZ_ASSERT(kind_ == RegisterF32); return f32reg_; } RegF64 f64reg() const { MOZ_ASSERT(kind_ == RegisterF64); return f64reg_; } #ifdef ENABLE_WASM_SIMD RegV128 v128reg() const { MOZ_ASSERT(kind_ == RegisterV128); return v128reg_; } #endif int32_t i32val() const { MOZ_ASSERT(kind_ == ConstI32); return i32val_; } int64_t i64val() const { MOZ_ASSERT(kind_ == ConstI64); return i64val_; } intptr_t refval() const { MOZ_ASSERT(kind_ == ConstRef); return refval_; } // For these two, use an out-param instead of simply returning, to // use the normal stack and not the x87 FP stack (which has effect on // NaNs with the signaling bit set). void f32val(float* out) const { MOZ_ASSERT(kind_ == ConstF32); *out = f32val_; } void f64val(double* out) const { MOZ_ASSERT(kind_ == ConstF64); *out = f64val_; } #ifdef ENABLE_WASM_SIMD // For SIMD, do the same as for floats since we're using float registers to // hold vectors; this is just conservative. void v128val(V128* out) const { MOZ_ASSERT(kind_ == ConstV128); *out = v128val_; } #endif uint32_t slot() const { MOZ_ASSERT(kind_ > MemLast && kind_ <= LocalLast); return slot_; } uint32_t offs() const { MOZ_ASSERT(isMem()); return offs_; } }; typedef Vector StkVector; // MachineStackTracker, used for stack-slot pointerness tracking. class MachineStackTracker { // Simulates the machine's stack, with one bool per word. Index zero in // this vector corresponds to the highest address in the machine stack. The // last entry corresponds to what SP currently points at. This all assumes // a grow-down stack. // // numPtrs_ contains the number of "true" values in vec_, and is therefore // redundant. But it serves as a constant-time way to detect the common // case where vec_ holds no "true" values. size_t numPtrs_; Vector vec_; public: MachineStackTracker() : numPtrs_(0) {} ~MachineStackTracker() { #ifdef DEBUG size_t n = 0; for (bool b : vec_) { n += (b ? 1 : 0); } MOZ_ASSERT(n == numPtrs_); #endif } // Clone this MachineStackTracker, writing the result at |dst|. [[nodiscard]] bool cloneTo(MachineStackTracker* dst) { MOZ_ASSERT(dst->vec_.empty()); if (!dst->vec_.appendAll(vec_)) { return false; } dst->numPtrs_ = numPtrs_; return true; } // Notionally push |n| non-pointers on the stack. [[nodiscard]] bool pushNonGCPointers(size_t n) { return vec_.appendN(false, n); } // Mark the stack slot |offsetFromSP| up from the bottom as holding a // pointer. void setGCPointer(size_t offsetFromSP) { // offsetFromSP == 0 denotes the most recently pushed item, == 1 the // second most recently pushed item, etc. MOZ_ASSERT(offsetFromSP < vec_.length()); size_t offsetFromTop = vec_.length() - 1 - offsetFromSP; numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0); vec_[offsetFromTop] = true; } // Query the pointerness of the slot |offsetFromSP| up from the bottom. bool isGCPointer(size_t offsetFromSP) { MOZ_ASSERT(offsetFromSP < vec_.length()); size_t offsetFromTop = vec_.length() - 1 - offsetFromSP; return vec_[offsetFromTop]; } // Return the number of words tracked by this MachineStackTracker. size_t length() { return vec_.length(); } // Return the number of pointer-typed words tracked by this // MachineStackTracker. size_t numPtrs() { MOZ_ASSERT(numPtrs_ <= length()); return numPtrs_; } // Discard all contents, but (per mozilla::Vector::clear semantics) don't // free or reallocate any dynamic storage associated with |vec_|. void clear() { vec_.clear(); numPtrs_ = 0; } }; // StackMapGenerator, which carries all state needed to create stack maps. enum class HasDebugFrame { No, Yes }; struct StackMapGenerator { private: // --- These are constant for the life of the function's compilation --- // For generating stack maps, we'll need to know the offsets of registers // as saved by the trap exit stub. const MachineState& trapExitLayout_; const size_t trapExitLayoutNumWords_; // Completed stackmaps are added here StackMaps* stackMaps_; // So as to be able to get current offset when creating stack maps const MacroAssembler& masm_; public: // --- These are constant once we've completed beginFunction() --- // The number of words of arguments passed to this function in memory. size_t numStackArgWords; MachineStackTracker machineStackTracker; // tracks machine stack pointerness // This holds masm.framePushed at entry to the function's body. It is a // Maybe because createStackMap needs to know whether or not we're still // in the prologue. It makes a Nothing-to-Some transition just once per // function. Maybe framePushedAtEntryToBody; // --- These can change at any point --- // This holds masm.framePushed at it would be be for a function call // instruction, but excluding the stack area used to pass arguments in // memory. That is, for an upcoming function call, this will hold // // masm.framePushed() at the call instruction - // StackArgAreaSizeUnaligned(argumentTypes) // // This value denotes the lowest-addressed stack word covered by the current // function's stackmap. Words below this point form the highest-addressed // area of the callee's stackmap. Note that all alignment padding above the // arguments-in-memory themselves belongs to the caller's stack map, which // is why this is defined in terms of StackArgAreaSizeUnaligned() rather than // StackArgAreaSizeAligned(). // // When not inside a function call setup/teardown sequence, it is Nothing. // It can make Nothing-to/from-Some transitions arbitrarily as we progress // through the function body. Maybe framePushedExcludingOutboundCallArgs; // The number of memory-resident, ref-typed entries on the containing // BaseCompiler::stk_. size_t memRefsOnStk; // This is a copy of machineStackTracker that is used only within individual // calls to createStackMap. It is here only to avoid possible heap allocation // costs resulting from making it local to createStackMap(). MachineStackTracker augmentedMst; StackMapGenerator(StackMaps* stackMaps, const MachineState& trapExitLayout, const size_t trapExitLayoutNumWords, const MacroAssembler& masm) : trapExitLayout_(trapExitLayout), trapExitLayoutNumWords_(trapExitLayoutNumWords), stackMaps_(stackMaps), masm_(masm), numStackArgWords(0), memRefsOnStk(0) {} // At the beginning of a function, we may have live roots in registers (as // arguments) at the point where we perform a stack overflow check. This // method generates the "extra" stackmap entries to describe that, in the // case that the check fails and we wind up calling into the wasm exit // stub, as generated by GenerateTrapExit(). // // The resulting map must correspond precisely with the stack layout // created for the integer registers as saved by (code generated by) // GenerateTrapExit(). To do that we use trapExitLayout_ and // trapExitLayoutNumWords_, which together comprise a description of the // layout and are created by GenerateTrapExitMachineState(). [[nodiscard]] bool generateStackmapEntriesForTrapExit( const ArgTypeVector& args, ExitStubMapVector* extras) { return GenerateStackmapEntriesForTrapExit(args, trapExitLayout_, trapExitLayoutNumWords_, extras); } // Creates a stackmap associated with the instruction denoted by // |assemblerOffset|, incorporating pointers from the current operand // stack |stk|, incorporating possible extra pointers in |extra| at the // lower addressed end, and possibly with the associated frame having a // ref-typed DebugFrame as indicated by |refDebugFrame|. [[nodiscard]] bool createStackMap(const char* who, const ExitStubMapVector& extras, uint32_t assemblerOffset, HasDebugFrame debugFrame, const StkVector& stk) { size_t countedPointers = machineStackTracker.numPtrs() + memRefsOnStk; #ifndef DEBUG // An important optimization. If there are obviously no pointers, as // we expect in the majority of cases, exit quickly. if (countedPointers == 0 && debugFrame == HasDebugFrame::No) { // We can skip creating the map if there are no |true| elements in // |extras|. bool extrasHasRef = false; for (bool b : extras) { if (b) { extrasHasRef = true; break; } } if (!extrasHasRef) { return true; } } #else // In the debug case, create the stack map regardless, and cross-check // the pointer-counting below. We expect the final map to have // |countedPointers| in total. This doesn't include those in the // DebugFrame, but they do not appear in the map's bitmap. Note that // |countedPointers| is debug-only from this point onwards. for (bool b : extras) { countedPointers += (b ? 1 : 0); } #endif // Start with the frame-setup map, and add operand-stack information to // that. augmentedMst holds live data only within individual calls to // createStackMap. augmentedMst.clear(); if (!machineStackTracker.cloneTo(&augmentedMst)) { return false; } // At this point, augmentedMst only contains entries covering the // incoming argument area (if any) and for the area allocated by this // function's prologue. We now need to calculate how far the machine's // stack pointer is below where it was at the start of the body. But we // must take care not to include any words pushed as arguments to an // upcoming function call, since those words "belong" to the stackmap of // the callee, not to the stackmap of this function. Note however that // any alignment padding pushed prior to pushing the args *does* belong to // this function. // // That padding is taken into account at the point where // framePushedExcludingOutboundCallArgs is set, viz, in startCallArgs(), // and comprises two components: // // * call->frameAlignAdjustment // * the padding applied to the stack arg area itself. That is: // StackArgAreaSize(argTys) - StackArgAreaSizeUnpadded(argTys) Maybe framePushedExcludingArgs; if (framePushedAtEntryToBody.isNothing()) { // Still in the prologue. framePushedExcludingArgs remains Nothing. MOZ_ASSERT(framePushedExcludingOutboundCallArgs.isNothing()); } else { // In the body. MOZ_ASSERT(masm_.framePushed() >= framePushedAtEntryToBody.value()); if (framePushedExcludingOutboundCallArgs.isSome()) { // In the body, and we've potentially pushed some args onto the stack. // We must ignore them when sizing the stackmap. MOZ_ASSERT(masm_.framePushed() >= framePushedExcludingOutboundCallArgs.value()); MOZ_ASSERT(framePushedExcludingOutboundCallArgs.value() >= framePushedAtEntryToBody.value()); framePushedExcludingArgs = Some(framePushedExcludingOutboundCallArgs.value()); } else { // In the body, but not with call args on the stack. The stackmap // must be sized so as to extend all the way "down" to // masm_.framePushed(). framePushedExcludingArgs = Some(masm_.framePushed()); } } if (framePushedExcludingArgs.isSome()) { uint32_t bodyPushedBytes = framePushedExcludingArgs.value() - framePushedAtEntryToBody.value(); MOZ_ASSERT(0 == bodyPushedBytes % sizeof(void*)); if (!augmentedMst.pushNonGCPointers(bodyPushedBytes / sizeof(void*))) { return false; } } // Scan the operand stack, marking pointers in the just-added new // section. MOZ_ASSERT_IF(framePushedAtEntryToBody.isNothing(), stk.empty()); MOZ_ASSERT_IF(framePushedExcludingArgs.isNothing(), stk.empty()); for (const Stk& v : stk) { #ifndef DEBUG // We don't track roots in registers, per rationale below, so if this // doesn't hold, something is seriously wrong, and we're likely to get a // GC-related crash. MOZ_RELEASE_ASSERT(v.kind() != Stk::RegisterRef); if (v.kind() != Stk::MemRef) { continue; } #else // Take the opportunity to check everything we reasonably can about // operand stack elements. switch (v.kind()) { case Stk::MemI32: case Stk::MemI64: case Stk::MemF32: case Stk::MemF64: case Stk::ConstI32: case Stk::ConstI64: case Stk::ConstF32: case Stk::ConstF64: # ifdef ENABLE_WASM_SIMD case Stk::MemV128: case Stk::ConstV128: # endif // All of these have uninteresting type. continue; case Stk::LocalI32: case Stk::LocalI64: case Stk::LocalF32: case Stk::LocalF64: # ifdef ENABLE_WASM_SIMD case Stk::LocalV128: # endif // These also have uninteresting type. Check that they live in the // section of stack set up by beginFunction(). The unguarded use of // |value()| here is safe due to the assertion above this loop. MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value()); continue; case Stk::RegisterI32: case Stk::RegisterI64: case Stk::RegisterF32: case Stk::RegisterF64: # ifdef ENABLE_WASM_SIMD case Stk::RegisterV128: # endif // These also have uninteresting type, but more to the point: all // registers holding live values should have been flushed to the // machine stack immediately prior to the instruction to which this // stackmap pertains. So these can't happen. MOZ_CRASH("createStackMap: operand stack has Register-non-Ref"); case Stk::MemRef: // This is the only case we care about. We'll handle it after the // switch. break; case Stk::LocalRef: // We need the stackmap to mention this pointer, but it should // already be in the machineStackTracker section created by // beginFunction(). MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value()); continue; case Stk::ConstRef: // This can currently only be a null pointer. MOZ_ASSERT(v.refval() == 0); continue; case Stk::RegisterRef: // This can't happen, per rationale above. MOZ_CRASH("createStackMap: operand stack contains RegisterRef"); default: MOZ_CRASH("createStackMap: unknown operand stack element"); } #endif // v.offs() holds masm.framePushed() at the point immediately after it // was pushed on the stack. Since it's still on the stack, // masm.framePushed() can't be less. MOZ_ASSERT(v.offs() <= framePushedExcludingArgs.value()); uint32_t offsFromMapLowest = framePushedExcludingArgs.value() - v.offs(); MOZ_ASSERT(0 == offsFromMapLowest % sizeof(void*)); augmentedMst.setGCPointer(offsFromMapLowest / sizeof(void*)); } // Create the final StackMap. The initial map is zeroed out, so there's // no need to write zero bits in it. const uint32_t extraWords = extras.length(); const uint32_t augmentedMstWords = augmentedMst.length(); const uint32_t numMappedWords = extraWords + augmentedMstWords; StackMap* stackMap = StackMap::create(numMappedWords); if (!stackMap) { return false; } { // First the exit stub extra words, if any. uint32_t i = 0; for (bool b : extras) { if (b) { stackMap->setBit(i); } i++; } } // Followed by the "main" part of the map. for (uint32_t i = 0; i < augmentedMstWords; i++) { if (augmentedMst.isGCPointer(i)) { stackMap->setBit(extraWords + i); } } stackMap->setExitStubWords(extraWords); // Record in the map, how far down from the highest address the Frame* is. // Take the opportunity to check that we haven't marked any part of the // Frame itself as a pointer. stackMap->setFrameOffsetFromTop(numStackArgWords + sizeof(Frame) / sizeof(void*)); #ifdef DEBUG for (uint32_t i = 0; i < sizeof(Frame) / sizeof(void*); i++) { MOZ_ASSERT(stackMap->getBit(stackMap->numMappedWords - stackMap->frameOffsetFromTop + i) == 0); } #endif // Note the presence of a ref-typed DebugFrame, if any. if (debugFrame == HasDebugFrame::Yes) { stackMap->setHasDebugFrame(); } // Add the completed map to the running collection thereof. if (!stackMaps_->add((uint8_t*)(uintptr_t)assemblerOffset, stackMap)) { stackMap->destroy(); return false; } #ifdef DEBUG { // Crosscheck the map pointer counting. uint32_t nw = stackMap->numMappedWords; uint32_t np = 0; for (uint32_t i = 0; i < nw; i++) { np += stackMap->getBit(i); } MOZ_ASSERT(size_t(np) == countedPointers); } #endif return true; } }; // The baseline compiler proper. class BaseCompiler final : public BaseCompilerInterface { using Local = BaseStackFrame::Local; using LabelVector = Vector; // Bit set used for simple bounds check elimination. Capping this at 64 // locals makes sense; even 32 locals would probably be OK in practice. // // For more information about BCE, see the block comment above // popMemoryAccess(), below. using BCESet = uint64_t; // Control node, representing labels and stack heights at join points. struct Control { NonAssertingLabel label; // The "exit" label NonAssertingLabel otherLabel; // Used for the "else" branch of if-then-else StackHeight stackHeight; // From BaseStackFrame uint32_t stackSize; // Value stack height BCESet bceSafeOnEntry; // Bounds check info flowing into the item BCESet bceSafeOnExit; // Bounds check info flowing out of the item bool deadOnArrival; // deadCode_ was set on entry to the region bool deadThenBranch; // deadCode_ was set on exit from "then" Control() : stackHeight(StackHeight::Invalid()), stackSize(UINT32_MAX), bceSafeOnEntry(0), bceSafeOnExit(~BCESet(0)), deadOnArrival(false), deadThenBranch(false) {} }; class NothingVector { Nothing unused_; public: bool resize(size_t length) { return true; } Nothing& operator[](size_t) { return unused_; } Nothing& back() { return unused_; } }; struct BaseCompilePolicy { // The baseline compiler tracks values on a stack of its own -- it // needs to scan that stack for spilling -- and thus has no need // for the values maintained by the iterator. using Value = Nothing; using ValueVector = NothingVector; // The baseline compiler uses the iterator's control stack, attaching // its own control information. using ControlItem = Control; }; using BaseOpIter = OpIter; // The baseline compiler will use OOL code more sparingly than // Baldr since our code is not high performance and frills like // code density and branch prediction friendliness will be less // important. class OutOfLineCode : public TempObject { private: NonAssertingLabel entry_; NonAssertingLabel rejoin_; StackHeight stackHeight_; public: OutOfLineCode() : stackHeight_(StackHeight::Invalid()) {} Label* entry() { return &entry_; } Label* rejoin() { return &rejoin_; } void setStackHeight(StackHeight stackHeight) { MOZ_ASSERT(!stackHeight_.isValid()); stackHeight_ = stackHeight; } void bind(BaseStackFrame* fr, MacroAssembler* masm) { MOZ_ASSERT(stackHeight_.isValid()); masm->bind(&entry_); fr->setStackHeight(stackHeight_); } // The generate() method must be careful about register use // because it will be invoked when there is a register // assignment in the BaseCompiler that does not correspond // to the available registers when the generated OOL code is // executed. The register allocator *must not* be called. // // The best strategy is for the creator of the OOL object to // allocate all temps that the OOL code will need. // // Input, output, and temp registers are embedded in the OOL // object and are known to the code generator. // // Scratch registers are available to use in OOL code. // // All other registers must be explicitly saved and restored // by the OOL code before being used. virtual void generate(MacroAssembler* masm) = 0; }; enum class LatentOp { None, Compare, Eqz }; struct AccessCheck { AccessCheck() : omitBoundsCheck(false), omitAlignmentCheck(false), onlyPointerAlignment(false) {} // If `omitAlignmentCheck` is true then we need check neither the // pointer nor the offset. Otherwise, if `onlyPointerAlignment` is true // then we need check only the pointer. Otherwise, check the sum of // pointer and offset. bool omitBoundsCheck; bool omitAlignmentCheck; bool onlyPointerAlignment; }; const ModuleEnvironment& moduleEnv_; const CompilerEnvironment& compilerEnv_; BaseOpIter iter_; const FuncCompileInput& func_; size_t lastReadCallSite_; TempAllocator::Fallible alloc_; const ValTypeVector& locals_; // Types of parameters and locals bool deadCode_; // Flag indicating we should decode & discard the opcode BCESet bceSafe_; // Locals that have been bounds checked and not updated since ValTypeVector SigD_; ValTypeVector SigF_; NonAssertingLabel returnLabel_; LatentOp latentOp_; // Latent operation for branch (seen next) ValType latentType_; // Operand type, if latentOp_ is true Assembler::Condition latentIntCmp_; // Comparison operator, if latentOp_ == Compare, int types Assembler::DoubleCondition latentDoubleCmp_; // Comparison operator, if latentOp_ == Compare, float // types FuncOffsets offsets_; MacroAssembler& masm; // No '_' suffix - too tedious... BaseRegAlloc ra; // Ditto BaseStackFrame fr; StackMapGenerator stackMapGenerator_; BaseStackFrame::LocalVector localInfo_; Vector outOfLine_; // On specific platforms we sometimes need to use specific registers. SpecificRegs specific_; // There are more members scattered throughout. public: BaseCompiler(const ModuleEnvironment& moduleEnv, const CompilerEnvironment& compilerEnv, const FuncCompileInput& input, const ValTypeVector& locals, const MachineState& trapExitLayout, size_t trapExitLayoutNumWords, Decoder& decoder, StkVector& stkSource, TempAllocator* alloc, MacroAssembler* masm, StackMaps* stackMaps); ~BaseCompiler(); [[nodiscard]] bool init(); FuncOffsets finish(); [[nodiscard]] bool emitFunction(); void emitInitStackLocals(); const FuncType& funcType() const { return *moduleEnv_.funcs[func_.index].type; } const TypeIdDesc& funcTypeId() const { return *moduleEnv_.funcs[func_.index].typeId; } // Used by some of the ScratchRegister implementations. operator MacroAssembler&() const { return masm; } operator BaseRegAlloc&() { return ra; } bool usesSharedMemory() const { return moduleEnv_.usesSharedMemory(); } private: //////////////////////////////////////////////////////////// // // Out of line code management. [[nodiscard]] OutOfLineCode* addOutOfLineCode(OutOfLineCode* ool) { if (!ool || !outOfLine_.append(ool)) { return nullptr; } ool->setStackHeight(fr.stackHeight()); return ool; } [[nodiscard]] bool generateOutOfLineCode() { for (uint32_t i = 0; i < outOfLine_.length(); i++) { OutOfLineCode* ool = outOfLine_[i]; ool->bind(&fr, &masm); ool->generate(&masm); } return !masm.oom(); } // Utility. const Local& localFromSlot(uint32_t slot, MIRType type) { MOZ_ASSERT(localInfo_[slot].type == type); return localInfo_[slot]; } //////////////////////////////////////////////////////////// // // High-level register management. bool isAvailableI32(RegI32 r) { return ra.isAvailableI32(r); } bool isAvailableI64(RegI64 r) { return ra.isAvailableI64(r); } bool isAvailableRef(RegPtr r) { return ra.isAvailablePtr(r); } bool isAvailableF32(RegF32 r) { return ra.isAvailableF32(r); } bool isAvailableF64(RegF64 r) { return ra.isAvailableF64(r); } #ifdef ENABLE_WASM_SIMD bool isAvailableV128(RegV128 r) { return ra.isAvailableV128(r); } #endif [[nodiscard]] RegI32 needI32() { return ra.needI32(); } [[nodiscard]] RegI64 needI64() { return ra.needI64(); } [[nodiscard]] RegPtr needRef() { return ra.needPtr(); } [[nodiscard]] RegF32 needF32() { return ra.needF32(); } [[nodiscard]] RegF64 needF64() { return ra.needF64(); } #ifdef ENABLE_WASM_SIMD [[nodiscard]] RegV128 needV128() { return ra.needV128(); } #endif void needI32(RegI32 specific) { ra.needI32(specific); } void needI64(RegI64 specific) { ra.needI64(specific); } void needRef(RegPtr specific) { ra.needPtr(specific); } void needF32(RegF32 specific) { ra.needF32(specific); } void needF64(RegF64 specific) { ra.needF64(specific); } #ifdef ENABLE_WASM_SIMD void needV128(RegV128 specific) { ra.needV128(specific); } #endif #if defined(JS_CODEGEN_ARM) [[nodiscard]] RegI64 needI64Pair() { return ra.needI64Pair(); } #endif void freeI32(RegI32 r) { ra.freeI32(r); } void freeI64(RegI64 r) { ra.freeI64(r); } void freeRef(RegPtr r) { ra.freePtr(r); } void freeF32(RegF32 r) { ra.freeF32(r); } void freeF64(RegF64 r) { ra.freeF64(r); } #ifdef ENABLE_WASM_SIMD void freeV128(RegV128 r) { ra.freeV128(r); } #endif void freeI64Except(RegI64 r, RegI32 except) { #ifdef JS_PUNBOX64 MOZ_ASSERT(r.reg == except); #else MOZ_ASSERT(r.high == except || r.low == except); freeI64(r); needI32(except); #endif } void maybeFreeI32(RegI32 r) { if (r.isValid()) { freeI32(r); } } void maybeFreeI64(RegI64 r) { if (r.isValid()) { freeI64(r); } } void maybeFreeF64(RegF64 r) { if (r.isValid()) { freeF64(r); } } void needI32NoSync(RegI32 r) { MOZ_ASSERT(isAvailableI32(r)); needI32(r); } // TODO / OPTIMIZE: need2xI32() can be optimized along with needI32() // to avoid sync(). (Bug 1316802) void need2xI32(RegI32 r0, RegI32 r1) { needI32(r0); needI32(r1); } void need2xI64(RegI64 r0, RegI64 r1) { needI64(r0); needI64(r1); } RegI32 fromI64(RegI64 r) { return RegI32(lowPart(r)); } #ifdef JS_PUNBOX64 RegI64 fromI32(RegI32 r) { return RegI64(Register64(r)); } #endif RegI64 widenI32(RegI32 r) { MOZ_ASSERT(!isAvailableI32(r)); #ifdef JS_PUNBOX64 return fromI32(r); #else RegI32 high = needI32(); return RegI64(Register64(high, r)); #endif } RegI32 narrowI64(RegI64 r) { #ifdef JS_PUNBOX64 return RegI32(r.reg); #else freeI32(RegI32(r.high)); return RegI32(r.low); #endif } RegI32 narrowPtr(RegPtr r) { return RegI32(r); } RegI32 lowPart(RegI64 r) { #ifdef JS_PUNBOX64 return RegI32(r.reg); #else return RegI32(r.low); #endif } RegI32 maybeHighPart(RegI64 r) { #ifdef JS_PUNBOX64 return RegI32::Invalid(); #else return RegI32(r.high); #endif } void maybeClearHighPart(RegI64 r) { #if !defined(JS_PUNBOX64) moveImm32(0, RegI32(r.high)); #endif } void moveI32(RegI32 src, RegI32 dest) { if (src != dest) { masm.move32(src, dest); } } void moveI64(RegI64 src, RegI64 dest) { if (src != dest) { masm.move64(src, dest); } } void moveRef(RegPtr src, RegPtr dest) { if (src != dest) { masm.movePtr(src, dest); } } void moveF64(RegF64 src, RegF64 dest) { if (src != dest) { masm.moveDouble(src, dest); } } void moveF32(RegF32 src, RegF32 dest) { if (src != dest) { masm.moveFloat32(src, dest); } } #ifdef ENABLE_WASM_SIMD void moveV128(RegV128 src, RegV128 dest) { if (src != dest) { masm.moveSimd128(src, dest); } } #endif //////////////////////////////////////////////////////////////////////////// // // Block parameters and results. // // Blocks may have multiple parameters and multiple results. Blocks can also // be the target of branches: the entry for loops, and the exit for // non-loops. // // Passing multiple values to a non-branch target (i.e., the entry of a // "block") falls out naturally: any items on the value stack can flow // directly from one block to another. // // However, for branch targets, we need to allocate well-known locations for // the branch values. The approach taken in the baseline compiler is to // allocate registers to the top N values (currently N=1), and then stack // locations for the rest. // enum class RegKind { All, OnlyGPRs }; inline void needResultRegisters(ResultType type, RegKind which) { if (type.empty()) { return; } for (ABIResultIter iter(type); !iter.done(); iter.next()) { ABIResult result = iter.cur(); // Register results are visited first; when we see a stack result we're // done. if (!result.inRegister()) { return; } switch (result.type().kind()) { case ValType::I32: needI32(RegI32(result.gpr())); break; case ValType::I64: needI64(RegI64(result.gpr64())); break; case ValType::V128: #ifdef ENABLE_WASM_SIMD if (which == RegKind::All) { needV128(RegV128(result.fpr())); } break; #else MOZ_CRASH("No SIMD support"); #endif case ValType::F32: if (which == RegKind::All) { needF32(RegF32(result.fpr())); } break; case ValType::F64: if (which == RegKind::All) { needF64(RegF64(result.fpr())); } break; case ValType::Ref: needRef(RegPtr(result.gpr())); break; } } } #ifdef JS_CODEGEN_X64 inline void maskResultRegisters(ResultType type) { MOZ_ASSERT(JitOptions.spectreIndexMasking); if (type.empty()) { return; } for (ABIResultIter iter(type); !iter.done(); iter.next()) { ABIResult result = iter.cur(); if (result.inRegister() && result.type().kind() == ValType::I32) { masm.movl(result.gpr(), result.gpr()); } } } #endif inline void freeResultRegisters(ResultType type, RegKind which) { if (type.empty()) { return; } for (ABIResultIter iter(type); !iter.done(); iter.next()) { ABIResult result = iter.cur(); // Register results are visited first; when we see a stack result we're // done. if (!result.inRegister()) { return; } switch (result.type().kind()) { case ValType::I32: freeI32(RegI32(result.gpr())); break; case ValType::I64: freeI64(RegI64(result.gpr64())); break; case ValType::V128: #ifdef ENABLE_WASM_SIMD if (which == RegKind::All) { freeV128(RegV128(result.fpr())); } break; #else MOZ_CRASH("No SIMD support"); #endif case ValType::F32: if (which == RegKind::All) { freeF32(RegF32(result.fpr())); } break; case ValType::F64: if (which == RegKind::All) { freeF64(RegF64(result.fpr())); } break; case ValType::Ref: freeRef(RegPtr(result.gpr())); break; } } } void needIntegerResultRegisters(ResultType type) { needResultRegisters(type, RegKind::OnlyGPRs); } void freeIntegerResultRegisters(ResultType type) { freeResultRegisters(type, RegKind::OnlyGPRs); } void needResultRegisters(ResultType type) { needResultRegisters(type, RegKind::All); } void freeResultRegisters(ResultType type) { freeResultRegisters(type, RegKind::All); } void assertResultRegistersAvailable(ResultType type) { #ifdef DEBUG for (ABIResultIter iter(type); !iter.done(); iter.next()) { ABIResult result = iter.cur(); if (!result.inRegister()) { return; } switch (result.type().kind()) { case ValType::I32: MOZ_ASSERT(isAvailableI32(RegI32(result.gpr()))); break; case ValType::I64: MOZ_ASSERT(isAvailableI64(RegI64(result.gpr64()))); break; case ValType::V128: # ifdef ENABLE_WASM_SIMD MOZ_ASSERT(isAvailableV128(RegV128(result.fpr()))); break; # else MOZ_CRASH("No SIMD support"); # endif case ValType::F32: MOZ_ASSERT(isAvailableF32(RegF32(result.fpr()))); break; case ValType::F64: MOZ_ASSERT(isAvailableF64(RegF64(result.fpr()))); break; case ValType::Ref: MOZ_ASSERT(isAvailableRef(RegPtr(result.gpr()))); break; } } #endif } void captureResultRegisters(ResultType type) { assertResultRegistersAvailable(type); needResultRegisters(type); } void captureCallResultRegisters(ResultType type) { captureResultRegisters(type); #ifdef JS_CODEGEN_X64 if (JitOptions.spectreIndexMasking) { maskResultRegisters(type); } #endif } //////////////////////////////////////////////////////////// // // Value stack and spilling. // // The value stack facilitates some on-the-fly register allocation // and immediate-constant use. It tracks constants, latent // references to locals, register contents, and values on the CPU // stack. // // The stack can be flushed to memory using sync(). This is handy // to avoid problems with control flow and messy register usage // patterns. // This is the value stack actually used during compilation. It is a // StkVector rather than a StkVector& since constantly dereferencing a // StkVector& adds about 0.5% or more to the compiler's dynamic instruction // count. StkVector stk_; static constexpr size_t MaxPushesPerOpcode = 10; // BaselineCompileFunctions() "lends" us the StkVector to use in this // BaseCompiler object, and that is installed in |stk_| in our constructor. // This is so as to avoid having to malloc/free the vector's contents at // each creation/destruction of a BaseCompiler object. It does however mean // that we need to hold on to a reference to BaselineCompileFunctions()'s // vector, so we can swap (give) its contents back when this BaseCompiler // object is destroyed. This significantly reduces the heap turnover of the // baseline compiler. See bug 1532592. StkVector& stkSource_; #ifdef DEBUG size_t countMemRefsOnStk() { size_t nRefs = 0; for (Stk& v : stk_) { if (v.kind() == Stk::MemRef) { nRefs++; } } return nRefs; } #endif template void push(T item) { // None of the single-arg Stk constructors create a Stk::MemRef, so // there's no need to increment stackMapGenerator_.memRefsOnStk here. stk_.infallibleEmplaceBack(Stk(item)); } void pushConstRef(intptr_t v) { stk_.infallibleEmplaceBack(Stk::StkRef(v)); } void loadConstI32(const Stk& src, RegI32 dest) { moveImm32(src.i32val(), dest); } void loadMemI32(const Stk& src, RegI32 dest) { fr.loadStackI32(src.offs(), dest); } void loadLocalI32(const Stk& src, RegI32 dest) { fr.loadLocalI32(localFromSlot(src.slot(), MIRType::Int32), dest); } void loadRegisterI32(const Stk& src, RegI32 dest) { moveI32(src.i32reg(), dest); } void loadConstI64(const Stk& src, RegI64 dest) { moveImm64(src.i64val(), dest); } void loadMemI64(const Stk& src, RegI64 dest) { fr.loadStackI64(src.offs(), dest); } void loadLocalI64(const Stk& src, RegI64 dest) { fr.loadLocalI64(localFromSlot(src.slot(), MIRType::Int64), dest); } void loadRegisterI64(const Stk& src, RegI64 dest) { moveI64(src.i64reg(), dest); } void loadConstRef(const Stk& src, RegPtr dest) { moveImmRef(src.refval(), dest); } void loadMemRef(const Stk& src, RegPtr dest) { fr.loadStackPtr(src.offs(), dest); } void loadLocalRef(const Stk& src, RegPtr dest) { fr.loadLocalPtr(localFromSlot(src.slot(), MIRType::RefOrNull), dest); } void loadRegisterRef(const Stk& src, RegPtr dest) { moveRef(src.refReg(), dest); } void loadConstF64(const Stk& src, RegF64 dest) { double d; src.f64val(&d); masm.loadConstantDouble(d, dest); } void loadMemF64(const Stk& src, RegF64 dest) { fr.loadStackF64(src.offs(), dest); } void loadLocalF64(const Stk& src, RegF64 dest) { fr.loadLocalF64(localFromSlot(src.slot(), MIRType::Double), dest); } void loadRegisterF64(const Stk& src, RegF64 dest) { moveF64(src.f64reg(), dest); } void loadConstF32(const Stk& src, RegF32 dest) { float f; src.f32val(&f); masm.loadConstantFloat32(f, dest); } void loadMemF32(const Stk& src, RegF32 dest) { fr.loadStackF32(src.offs(), dest); } void loadLocalF32(const Stk& src, RegF32 dest) { fr.loadLocalF32(localFromSlot(src.slot(), MIRType::Float32), dest); } void loadRegisterF32(const Stk& src, RegF32 dest) { moveF32(src.f32reg(), dest); } #ifdef ENABLE_WASM_SIMD void loadConstV128(const Stk& src, RegV128 dest) { V128 f; src.v128val(&f); masm.loadConstantSimd128(SimdConstant::CreateX16((int8_t*)f.bytes), dest); } void loadMemV128(const Stk& src, RegV128 dest) { fr.loadStackV128(src.offs(), dest); } void loadLocalV128(const Stk& src, RegV128 dest) { fr.loadLocalV128(localFromSlot(src.slot(), MIRType::Simd128), dest); } void loadRegisterV128(const Stk& src, RegV128 dest) { moveV128(src.v128reg(), dest); } #endif void loadI32(const Stk& src, RegI32 dest) { switch (src.kind()) { case Stk::ConstI32: loadConstI32(src, dest); break; case Stk::MemI32: loadMemI32(src, dest); break; case Stk::LocalI32: loadLocalI32(src, dest); break; case Stk::RegisterI32: loadRegisterI32(src, dest); break; default: MOZ_CRASH("Compiler bug: Expected I32 on stack"); } } void loadI64(const Stk& src, RegI64 dest) { switch (src.kind()) { case Stk::ConstI64: loadConstI64(src, dest); break; case Stk::MemI64: loadMemI64(src, dest); break; case Stk::LocalI64: loadLocalI64(src, dest); break; case Stk::RegisterI64: loadRegisterI64(src, dest); break; default: MOZ_CRASH("Compiler bug: Expected I64 on stack"); } } #if !defined(JS_PUNBOX64) void loadI64Low(const Stk& src, RegI32 dest) { switch (src.kind()) { case Stk::ConstI64: moveImm32(int32_t(src.i64val()), dest); break; case Stk::MemI64: fr.loadStackI64Low(src.offs(), dest); break; case Stk::LocalI64: fr.loadLocalI64Low(localFromSlot(src.slot(), MIRType::Int64), dest); break; case Stk::RegisterI64: moveI32(RegI32(src.i64reg().low), dest); break; default: MOZ_CRASH("Compiler bug: Expected I64 on stack"); } } void loadI64High(const Stk& src, RegI32 dest) { switch (src.kind()) { case Stk::ConstI64: moveImm32(int32_t(src.i64val() >> 32), dest); break; case Stk::MemI64: fr.loadStackI64High(src.offs(), dest); break; case Stk::LocalI64: fr.loadLocalI64High(localFromSlot(src.slot(), MIRType::Int64), dest); break; case Stk::RegisterI64: moveI32(RegI32(src.i64reg().high), dest); break; default: MOZ_CRASH("Compiler bug: Expected I64 on stack"); } } #endif void loadF64(const Stk& src, RegF64 dest) { switch (src.kind()) { case Stk::ConstF64: loadConstF64(src, dest); break; case Stk::MemF64: loadMemF64(src, dest); break; case Stk::LocalF64: loadLocalF64(src, dest); break; case Stk::RegisterF64: loadRegisterF64(src, dest); break; default: MOZ_CRASH("Compiler bug: expected F64 on stack"); } } void loadF32(const Stk& src, RegF32 dest) { switch (src.kind()) { case Stk::ConstF32: loadConstF32(src, dest); break; case Stk::MemF32: loadMemF32(src, dest); break; case Stk::LocalF32: loadLocalF32(src, dest); break; case Stk::RegisterF32: loadRegisterF32(src, dest); break; default: MOZ_CRASH("Compiler bug: expected F32 on stack"); } } #ifdef ENABLE_WASM_SIMD void loadV128(const Stk& src, RegV128 dest) { switch (src.kind()) { case Stk::ConstV128: loadConstV128(src, dest); break; case Stk::MemV128: loadMemV128(src, dest); break; case Stk::LocalV128: loadLocalV128(src, dest); break; case Stk::RegisterV128: loadRegisterV128(src, dest); break; default: MOZ_CRASH("Compiler bug: expected V128 on stack"); } } #endif void loadRef(const Stk& src, RegPtr dest) { switch (src.kind()) { case Stk::ConstRef: loadConstRef(src, dest); break; case Stk::MemRef: loadMemRef(src, dest); break; case Stk::LocalRef: loadLocalRef(src, dest); break; case Stk::RegisterRef: loadRegisterRef(src, dest); break; default: MOZ_CRASH("Compiler bug: expected ref on stack"); } } // Flush all local and register value stack elements to memory. // // TODO / OPTIMIZE: As this is fairly expensive and causes worse // code to be emitted subsequently, it is useful to avoid calling // it. (Bug 1316802) // // Some optimization has been done already. Remaining // opportunities: // // - It would be interesting to see if we can specialize it // before calls with particularly simple signatures, or where // we can do parallel assignment of register arguments, or // similar. See notes in emitCall(). // // - Operations that need specific registers: multiply, quotient, // remainder, will tend to sync because the registers we need // will tend to be allocated. We may be able to avoid that by // prioritizing registers differently (takeLast instead of // takeFirst) but we may also be able to allocate an unused // register on demand to free up one we need, thus avoiding the // sync. That type of fix would go into needI32(). void sync() final { size_t start = 0; size_t lim = stk_.length(); for (size_t i = lim; i > 0; i--) { // Memory opcodes are first in the enum, single check against MemLast is // fine. if (stk_[i - 1].kind() <= Stk::MemLast) { start = i; break; } } for (size_t i = start; i < lim; i++) { Stk& v = stk_[i]; switch (v.kind()) { case Stk::LocalI32: { ScratchI32 scratch(*this); loadLocalI32(v, scratch); uint32_t offs = fr.pushPtr(scratch); v.setOffs(Stk::MemI32, offs); break; } case Stk::RegisterI32: { uint32_t offs = fr.pushPtr(v.i32reg()); freeI32(v.i32reg()); v.setOffs(Stk::MemI32, offs); break; } case Stk::LocalI64: { ScratchI32 scratch(*this); #ifdef JS_PUNBOX64 loadI64(v, fromI32(scratch)); uint32_t offs = fr.pushPtr(scratch); #else fr.loadLocalI64High(localFromSlot(v.slot(), MIRType::Int64), scratch); fr.pushPtr(scratch); fr.loadLocalI64Low(localFromSlot(v.slot(), MIRType::Int64), scratch); uint32_t offs = fr.pushPtr(scratch); #endif v.setOffs(Stk::MemI64, offs); break; } case Stk::RegisterI64: { #ifdef JS_PUNBOX64 uint32_t offs = fr.pushPtr(v.i64reg().reg); freeI64(v.i64reg()); #else fr.pushPtr(v.i64reg().high); uint32_t offs = fr.pushPtr(v.i64reg().low); freeI64(v.i64reg()); #endif v.setOffs(Stk::MemI64, offs); break; } case Stk::LocalF64: { ScratchF64 scratch(*this); loadF64(v, scratch); uint32_t offs = fr.pushDouble(scratch); v.setOffs(Stk::MemF64, offs); break; } case Stk::RegisterF64: { uint32_t offs = fr.pushDouble(v.f64reg()); freeF64(v.f64reg()); v.setOffs(Stk::MemF64, offs); break; } case Stk::LocalF32: { ScratchF32 scratch(*this); loadF32(v, scratch); uint32_t offs = fr.pushFloat32(scratch); v.setOffs(Stk::MemF32, offs); break; } case Stk::RegisterF32: { uint32_t offs = fr.pushFloat32(v.f32reg()); freeF32(v.f32reg()); v.setOffs(Stk::MemF32, offs); break; } #ifdef ENABLE_WASM_SIMD case Stk::LocalV128: { ScratchV128 scratch(*this); loadV128(v, scratch); uint32_t offs = fr.pushV128(scratch); v.setOffs(Stk::MemV128, offs); break; } case Stk::RegisterV128: { uint32_t offs = fr.pushV128(v.v128reg()); freeV128(v.v128reg()); v.setOffs(Stk::MemV128, offs); break; } #endif case Stk::LocalRef: { ScratchPtr scratch(*this); loadLocalRef(v, scratch); uint32_t offs = fr.pushPtr(scratch); v.setOffs(Stk::MemRef, offs); stackMapGenerator_.memRefsOnStk++; break; } case Stk::RegisterRef: { uint32_t offs = fr.pushPtr(v.refReg()); freeRef(v.refReg()); v.setOffs(Stk::MemRef, offs); stackMapGenerator_.memRefsOnStk++; break; } default: { break; } } } } void saveTempPtr(RegPtr r) final { MOZ_ASSERT(!ra.isAvailablePtr(r)); fr.pushPtr(r); ra.freePtr(r); MOZ_ASSERT(ra.isAvailablePtr(r)); } void restoreTempPtr(RegPtr r) final { MOZ_ASSERT(ra.isAvailablePtr(r)); ra.needPtr(r); fr.popPtr(r); MOZ_ASSERT(!ra.isAvailablePtr(r)); } // Various methods for creating a stack map. Stack maps are indexed by the // lowest address of the instruction immediately *after* the instruction of // interest. In practice that means either: the return point of a call, the // instruction immediately after a trap instruction (the "resume" // instruction), or the instruction immediately following a no-op (when // debugging is enabled). // Create a vanilla stack map. [[nodiscard]] bool createStackMap(const char* who) { const ExitStubMapVector noExtras; return createStackMap(who, noExtras, masm.currentOffset()); } // Create a stack map as vanilla, but for a custom assembler offset. [[nodiscard]] bool createStackMap(const char* who, CodeOffset assemblerOffset) { const ExitStubMapVector noExtras; return createStackMap(who, noExtras, assemblerOffset.offset()); } // The most general stack map construction. [[nodiscard]] bool createStackMap(const char* who, const ExitStubMapVector& extras, uint32_t assemblerOffset) { auto debugFrame = compilerEnv_.debugEnabled() ? HasDebugFrame::Yes : HasDebugFrame::No; return stackMapGenerator_.createStackMap(who, extras, assemblerOffset, debugFrame, stk_); } // This is an optimization used to avoid calling sync() for // setLocal(): if the local does not exist unresolved on the stack // then we can skip the sync. bool hasLocal(uint32_t slot) { for (size_t i = stk_.length(); i > 0; i--) { // Memory opcodes are first in the enum, single check against MemLast is // fine. Stk::Kind kind = stk_[i - 1].kind(); if (kind <= Stk::MemLast) { return false; } // Local opcodes follow memory opcodes in the enum, single check against // LocalLast is sufficient. if (kind <= Stk::LocalLast && stk_[i - 1].slot() == slot) { return true; } } return false; } void syncLocal(uint32_t slot) { if (hasLocal(slot)) { sync(); // TODO / OPTIMIZE: Improve this? (Bug 1316817) } } // Push the register r onto the stack. void pushI32(RegI32 r) { MOZ_ASSERT(!isAvailableI32(r)); push(Stk(r)); } void pushI64(RegI64 r) { MOZ_ASSERT(!isAvailableI64(r)); push(Stk(r)); } void pushRef(RegPtr r) { MOZ_ASSERT(!isAvailableRef(r)); push(Stk(r)); } void pushF64(RegF64 r) { MOZ_ASSERT(!isAvailableF64(r)); push(Stk(r)); } void pushF32(RegF32 r) { MOZ_ASSERT(!isAvailableF32(r)); push(Stk(r)); } #ifdef ENABLE_WASM_SIMD void pushV128(RegV128 r) { MOZ_ASSERT(!isAvailableV128(r)); push(Stk(r)); } #endif // Push the value onto the stack. void pushI32(int32_t v) { push(Stk(v)); } void pushI64(int64_t v) { push(Stk(v)); } void pushRef(intptr_t v) { pushConstRef(v); } void pushF64(double v) { push(Stk(v)); } void pushF32(float v) { push(Stk(v)); } #ifdef ENABLE_WASM_SIMD void pushV128(V128 v) { push(Stk(v)); } #endif // Push the local slot onto the stack. The slot will not be read // here; it will be read when it is consumed, or when a side // effect to the slot forces its value to be saved. void pushLocalI32(uint32_t slot) { stk_.infallibleEmplaceBack(Stk(Stk::LocalI32, slot)); } void pushLocalI64(uint32_t slot) { stk_.infallibleEmplaceBack(Stk(Stk::LocalI64, slot)); } void pushLocalRef(uint32_t slot) { stk_.infallibleEmplaceBack(Stk(Stk::LocalRef, slot)); } void pushLocalF64(uint32_t slot) { stk_.infallibleEmplaceBack(Stk(Stk::LocalF64, slot)); } void pushLocalF32(uint32_t slot) { stk_.infallibleEmplaceBack(Stk(Stk::LocalF32, slot)); } #ifdef ENABLE_WASM_SIMD void pushLocalV128(uint32_t slot) { stk_.infallibleEmplaceBack(Stk(Stk::LocalV128, slot)); } #endif // Call only from other popI32() variants. // v must be the stack top. May pop the CPU stack. void popI32(const Stk& v, RegI32 dest) { MOZ_ASSERT(&v == &stk_.back()); switch (v.kind()) { case Stk::ConstI32: loadConstI32(v, dest); break; case Stk::LocalI32: loadLocalI32(v, dest); break; case Stk::MemI32: fr.popPtr(dest); break; case Stk::RegisterI32: loadRegisterI32(v, dest); break; default: MOZ_CRASH("Compiler bug: expected int on stack"); } } [[nodiscard]] RegI32 popI32() { Stk& v = stk_.back(); RegI32 r; if (v.kind() == Stk::RegisterI32) { r = v.i32reg(); } else { popI32(v, (r = needI32())); } stk_.popBack(); return r; } RegI32 popI32(RegI32 specific) { Stk& v = stk_.back(); if (!(v.kind() == Stk::RegisterI32 && v.i32reg() == specific)) { needI32(specific); popI32(v, specific); if (v.kind() == Stk::RegisterI32) { freeI32(v.i32reg()); } } stk_.popBack(); return specific; } #ifdef ENABLE_WASM_SIMD // Call only from other popV128() variants. // v must be the stack top. May pop the CPU stack. void popV128(const Stk& v, RegV128 dest) { MOZ_ASSERT(&v == &stk_.back()); switch (v.kind()) { case Stk::ConstV128: loadConstV128(v, dest); break; case Stk::LocalV128: loadLocalV128(v, dest); break; case Stk::MemV128: fr.popV128(dest); break; case Stk::RegisterV128: loadRegisterV128(v, dest); break; default: MOZ_CRASH("Compiler bug: expected int on stack"); } } [[nodiscard]] RegV128 popV128() { Stk& v = stk_.back(); RegV128 r; if (v.kind() == Stk::RegisterV128) { r = v.v128reg(); } else { popV128(v, (r = needV128())); } stk_.popBack(); return r; } RegV128 popV128(RegV128 specific) { Stk& v = stk_.back(); if (!(v.kind() == Stk::RegisterV128 && v.v128reg() == specific)) { needV128(specific); popV128(v, specific); if (v.kind() == Stk::RegisterV128) { freeV128(v.v128reg()); } } stk_.popBack(); return specific; } #endif // Call only from other popI64() variants. // v must be the stack top. May pop the CPU stack. void popI64(const Stk& v, RegI64 dest) { MOZ_ASSERT(&v == &stk_.back()); switch (v.kind()) { case Stk::ConstI64: loadConstI64(v, dest); break; case Stk::LocalI64: loadLocalI64(v, dest); break; case Stk::MemI64: #ifdef JS_PUNBOX64 fr.popPtr(dest.reg); #else fr.popPtr(dest.low); fr.popPtr(dest.high); #endif break; case Stk::RegisterI64: loadRegisterI64(v, dest); break; default: MOZ_CRASH("Compiler bug: expected long on stack"); } } [[nodiscard]] RegI64 popI64() { Stk& v = stk_.back(); RegI64 r; if (v.kind() == Stk::RegisterI64) { r = v.i64reg(); } else { popI64(v, (r = needI64())); } stk_.popBack(); return r; } // Note, the stack top can be in one half of "specific" on 32-bit // systems. We can optimize, but for simplicity, if the register // does not match exactly, then just force the stack top to memory // and then read it back in. RegI64 popI64(RegI64 specific) { Stk& v = stk_.back(); if (!(v.kind() == Stk::RegisterI64 && v.i64reg() == specific)) { needI64(specific); popI64(v, specific); if (v.kind() == Stk::RegisterI64) { freeI64(v.i64reg()); } } stk_.popBack(); return specific; } // Call only from other popRef() variants. // v must be the stack top. May pop the CPU stack. void popRef(const Stk& v, RegPtr dest) { MOZ_ASSERT(&v == &stk_.back()); switch (v.kind()) { case Stk::ConstRef: loadConstRef(v, dest); break; case Stk::LocalRef: loadLocalRef(v, dest); break; case Stk::MemRef: fr.popPtr(dest); break; case Stk::RegisterRef: loadRegisterRef(v, dest); break; default: MOZ_CRASH("Compiler bug: expected ref on stack"); } } RegPtr popRef(RegPtr specific) { Stk& v = stk_.back(); if (!(v.kind() == Stk::RegisterRef && v.refReg() == specific)) { needRef(specific); popRef(v, specific); if (v.kind() == Stk::RegisterRef) { freeRef(v.refReg()); } } stk_.popBack(); if (v.kind() == Stk::MemRef) { stackMapGenerator_.memRefsOnStk--; } return specific; } [[nodiscard]] RegPtr popRef() { Stk& v = stk_.back(); RegPtr r; if (v.kind() == Stk::RegisterRef) { r = v.refReg(); } else { popRef(v, (r = needRef())); } stk_.popBack(); if (v.kind() == Stk::MemRef) { stackMapGenerator_.memRefsOnStk--; } return r; } // Call only from other popF64() variants. // v must be the stack top. May pop the CPU stack. void popF64(const Stk& v, RegF64 dest) { MOZ_ASSERT(&v == &stk_.back()); switch (v.kind()) { case Stk::ConstF64: loadConstF64(v, dest); break; case Stk::LocalF64: loadLocalF64(v, dest); break; case Stk::MemF64: fr.popDouble(dest); break; case Stk::RegisterF64: loadRegisterF64(v, dest); break; default: MOZ_CRASH("Compiler bug: expected double on stack"); } } [[nodiscard]] RegF64 popF64() { Stk& v = stk_.back(); RegF64 r; if (v.kind() == Stk::RegisterF64) { r = v.f64reg(); } else { popF64(v, (r = needF64())); } stk_.popBack(); return r; } RegF64 popF64(RegF64 specific) { Stk& v = stk_.back(); if (!(v.kind() == Stk::RegisterF64 && v.f64reg() == specific)) { needF64(specific); popF64(v, specific); if (v.kind() == Stk::RegisterF64) { freeF64(v.f64reg()); } } stk_.popBack(); return specific; } // Call only from other popF32() variants. // v must be the stack top. May pop the CPU stack. void popF32(const Stk& v, RegF32 dest) { MOZ_ASSERT(&v == &stk_.back()); switch (v.kind()) { case Stk::ConstF32: loadConstF32(v, dest); break; case Stk::LocalF32: loadLocalF32(v, dest); break; case Stk::MemF32: fr.popFloat32(dest); break; case Stk::RegisterF32: loadRegisterF32(v, dest); break; default: MOZ_CRASH("Compiler bug: expected float on stack"); } } [[nodiscard]] RegF32 popF32() { Stk& v = stk_.back(); RegF32 r; if (v.kind() == Stk::RegisterF32) { r = v.f32reg(); } else { popF32(v, (r = needF32())); } stk_.popBack(); return r; } RegF32 popF32(RegF32 specific) { Stk& v = stk_.back(); if (!(v.kind() == Stk::RegisterF32 && v.f32reg() == specific)) { needF32(specific); popF32(v, specific); if (v.kind() == Stk::RegisterF32) { freeF32(v.f32reg()); } } stk_.popBack(); return specific; } [[nodiscard]] bool popConstI32(int32_t* c) { Stk& v = stk_.back(); if (v.kind() != Stk::ConstI32) { return false; } *c = v.i32val(); stk_.popBack(); return true; } [[nodiscard]] bool popConstI64(int64_t* c) { Stk& v = stk_.back(); if (v.kind() != Stk::ConstI64) { return false; } *c = v.i64val(); stk_.popBack(); return true; } [[nodiscard]] bool peekConstI32(int32_t* c) { Stk& v = stk_.back(); if (v.kind() != Stk::ConstI32) { return false; } *c = v.i32val(); return true; } [[nodiscard]] bool peekConstI64(int64_t* c) { Stk& v = stk_.back(); if (v.kind() != Stk::ConstI64) { return false; } *c = v.i64val(); return true; } [[nodiscard]] bool peek2xI32(int32_t* c0, int32_t* c1) { MOZ_ASSERT(stk_.length() >= 2); const Stk& v0 = *(stk_.end() - 1); const Stk& v1 = *(stk_.end() - 2); if (v0.kind() != Stk::ConstI32 || v1.kind() != Stk::ConstI32) { return false; } *c0 = v0.i32val(); *c1 = v1.i32val(); return true; } [[nodiscard]] bool popConstPositivePowerOfTwoI32(int32_t* c, uint_fast8_t* power, int32_t cutoff) { Stk& v = stk_.back(); if (v.kind() != Stk::ConstI32) { return false; } *c = v.i32val(); if (*c <= cutoff || !IsPowerOfTwo(static_cast(*c))) { return false; } *power = FloorLog2(*c); stk_.popBack(); return true; } [[nodiscard]] bool popConstPositivePowerOfTwoI64(int64_t* c, uint_fast8_t* power, int64_t cutoff) { Stk& v = stk_.back(); if (v.kind() != Stk::ConstI64) { return false; } *c = v.i64val(); if (*c <= cutoff || !IsPowerOfTwo(static_cast(*c))) { return false; } *power = FloorLog2(*c); stk_.popBack(); return true; } [[nodiscard]] bool peekLocalI32(uint32_t* local) { Stk& v = stk_.back(); if (v.kind() != Stk::LocalI32) { return false; } *local = v.slot(); return true; } // TODO / OPTIMIZE (Bug 1316818): At the moment we use the Wasm // inter-procedure ABI for block returns, which allocates ReturnReg as the // single block result register. It is possible other choices would lead to // better register allocation, as ReturnReg is often first in the register set // and will be heavily wanted by the register allocator that uses takeFirst(). // // Obvious options: // - pick a register at the back of the register set // - pick a random register per block (different blocks have // different join regs) void popRegisterResults(ABIResultIter& iter) { // Pop register results. Note that in the single-value case, popping to a // register may cause a sync(); for multi-value we sync'd already. for (; !iter.done(); iter.next()) { const ABIResult& result = iter.cur(); if (!result.inRegister()) { // TODO / OPTIMIZE: We sync here to avoid solving the general parallel // move problem in popStackResults. However we could avoid syncing the // values that are going to registers anyway, if they are already in // registers. sync(); break; } switch (result.type().kind()) { case ValType::I32: popI32(RegI32(result.gpr())); break; case ValType::I64: popI64(RegI64(result.gpr64())); break; case ValType::F32: popF32(RegF32(result.fpr())); break; case ValType::F64: popF64(RegF64(result.fpr())); break; case ValType::Ref: popRef(RegPtr(result.gpr())); break; case ValType::V128: #ifdef ENABLE_WASM_SIMD popV128(RegV128(result.fpr())); #else MOZ_CRASH("No SIMD support"); #endif } } } void popStackResults(ABIResultIter& iter, StackHeight stackBase) { MOZ_ASSERT(!iter.done()); // The iterator should be advanced beyond register results, and register // results should be popped already from the value stack. uint32_t alreadyPopped = iter.index(); // At this point, only stack arguments are remaining. Iterate through them // to measure how much stack space they will take up. for (; !iter.done(); iter.next()) { MOZ_ASSERT(iter.cur().onStack()); } // Calculate the space needed to store stack results, in bytes. uint32_t stackResultBytes = iter.stackBytesConsumedSoFar(); MOZ_ASSERT(stackResultBytes); // Compute the stack height including the stack results. Note that it's // possible that this call expands the stack, for example if some of the // results are supplied by constants and so are not already on the machine // stack. uint32_t endHeight = fr.prepareStackResultArea(stackBase, stackResultBytes); // Find a free GPR to use when shuffling stack values. If none is // available, push ReturnReg and restore it after we're done. bool saved = false; RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved); // The sequence of Stk values is in the same order on the machine stack as // the result locations, but there is a complication: constant values are // not actually pushed on the machine stack. (At this point registers and // locals have been spilled already.) So, moving the Stk values into place // isn't simply a shuffle-down or shuffle-up operation. There is a part of // the Stk sequence that shuffles toward the FP, a part that's already in // place, and a part that shuffles toward the SP. After shuffling, we have // to materialize the constants. // Shuffle mem values toward the frame pointer, copying deepest values // first. Stop when we run out of results, get to a register result, or // find a Stk value that is closer to the FP than the result. for (iter.switchToPrev(); !iter.done(); iter.prev()) { const ABIResult& result = iter.cur(); if (!result.onStack()) { break; } MOZ_ASSERT(result.stackOffset() < stackResultBytes); uint32_t destHeight = endHeight - result.stackOffset(); uint32_t stkBase = stk_.length() - (iter.count() - alreadyPopped); Stk& v = stk_[stkBase + iter.index()]; if (v.isMem()) { uint32_t srcHeight = v.offs(); if (srcHeight <= destHeight) { break; } fr.shuffleStackResultsTowardFP(srcHeight, destHeight, result.size(), temp); } } // Reset iterator and skip register results. for (iter.reset(); !iter.done(); iter.next()) { if (iter.cur().onStack()) { break; } } // Revisit top stack values, shuffling mem values toward the stack pointer, // copying shallowest values first. for (; !iter.done(); iter.next()) { const ABIResult& result = iter.cur(); MOZ_ASSERT(result.onStack()); MOZ_ASSERT(result.stackOffset() < stackResultBytes); uint32_t destHeight = endHeight - result.stackOffset(); Stk& v = stk_[stk_.length() - (iter.index() - alreadyPopped) - 1]; if (v.isMem()) { uint32_t srcHeight = v.offs(); if (srcHeight >= destHeight) { break; } fr.shuffleStackResultsTowardSP(srcHeight, destHeight, result.size(), temp); } } // Reset iterator and skip register results, which are already popped off // the value stack. for (iter.reset(); !iter.done(); iter.next()) { if (iter.cur().onStack()) { break; } } // Materialize constants and pop the remaining items from the value stack. for (; !iter.done(); iter.next()) { const ABIResult& result = iter.cur(); uint32_t resultHeight = endHeight - result.stackOffset(); Stk& v = stk_.back(); switch (v.kind()) { case Stk::ConstI32: fr.storeImmediatePtrToStack(uint32_t(v.i32val_), resultHeight, temp); break; case Stk::ConstF32: fr.storeImmediateF32ToStack(v.f32val_, resultHeight, temp); break; case Stk::ConstI64: fr.storeImmediateI64ToStack(v.i64val_, resultHeight, temp); break; case Stk::ConstF64: fr.storeImmediateF64ToStack(v.f64val_, resultHeight, temp); break; #ifdef ENABLE_WASM_SIMD case Stk::ConstV128: fr.storeImmediateV128ToStack(v.v128val_, resultHeight, temp); break; #endif case Stk::ConstRef: fr.storeImmediatePtrToStack(v.refval_, resultHeight, temp); break; case Stk::MemRef: // Update bookkeeping as we pop the Stk entry. stackMapGenerator_.memRefsOnStk--; break; default: MOZ_ASSERT(v.isMem()); break; } stk_.popBack(); } ra.freeTempPtr(temp, saved); // This will pop the stack if needed. fr.finishStackResultArea(stackBase, stackResultBytes); } enum class ContinuationKind { Fallthrough, Jump }; void popBlockResults(ResultType type, StackHeight stackBase, ContinuationKind kind) { if (!type.empty()) { ABIResultIter iter(type); popRegisterResults(iter); if (!iter.done()) { popStackResults(iter, stackBase); // Because popStackResults might clobber the stack, it leaves the stack // pointer already in the right place for the continuation, whether the // continuation is a jump or fallthrough. return; } } // We get here if there are no stack results. For a fallthrough, the stack // is already at the right height. For a jump, we may need to pop the stack // pointer if the continuation's stack height is lower than the current // stack height. if (kind == ContinuationKind::Jump) { fr.popStackBeforeBranch(stackBase, type); } } Stk captureStackResult(const ABIResult& result, StackHeight resultsBase, uint32_t stackResultBytes) { MOZ_ASSERT(result.onStack()); uint32_t offs = fr.locateStackResult(result, resultsBase, stackResultBytes); return Stk::StackResult(result.type(), offs); } MOZ_MUST_USE bool pushResults(ResultType type, StackHeight resultsBase) { if (type.empty()) { return true; } if (type.length() > 1) { if (!stk_.reserve(stk_.length() + type.length() + MaxPushesPerOpcode)) { return false; } } // We need to push the results in reverse order, so first iterate through // all results to determine the locations of stack result types. ABIResultIter iter(type); while (!iter.done()) { iter.next(); } uint32_t stackResultBytes = iter.stackBytesConsumedSoFar(); for (iter.switchToPrev(); !iter.done(); iter.prev()) { const ABIResult& result = iter.cur(); if (!result.onStack()) { break; } Stk v = captureStackResult(result, resultsBase, stackResultBytes); push(v); if (v.kind() == Stk::MemRef) { stackMapGenerator_.memRefsOnStk++; } } for (; !iter.done(); iter.prev()) { const ABIResult& result = iter.cur(); MOZ_ASSERT(result.inRegister()); switch (result.type().kind()) { case ValType::I32: pushI32(RegI32(result.gpr())); break; case ValType::I64: pushI64(RegI64(result.gpr64())); break; case ValType::V128: #ifdef ENABLE_WASM_SIMD pushV128(RegV128(result.fpr())); break; #else MOZ_CRASH("No SIMD support"); #endif case ValType::F32: pushF32(RegF32(result.fpr())); break; case ValType::F64: pushF64(RegF64(result.fpr())); break; case ValType::Ref: pushRef(RegPtr(result.gpr())); break; } } return true; } MOZ_MUST_USE bool pushBlockResults(ResultType type) { return pushResults(type, controlItem().stackHeight); } // A combination of popBlockResults + pushBlockResults, used when entering a // block with a control-flow join (loops) or split (if) to shuffle the // fallthrough block parameters into the locations expected by the // continuation. MOZ_MUST_USE bool topBlockParams(ResultType type) { // This function should only be called when entering a block with a // control-flow join at the entry, where there are no live temporaries in // the current block. StackHeight base = controlItem().stackHeight; MOZ_ASSERT(fr.stackResultsBase(stackConsumed(type.length())) == base); popBlockResults(type, base, ContinuationKind::Fallthrough); return pushBlockResults(type); } // A combination of popBlockResults + pushBlockResults, used before branches // where we don't know the target (br_if / br_table). If and when the branch // is taken, the stack results will be shuffled down into place. For br_if // that has fallthrough, the parameters for the untaken branch flow through to // the continuation. MOZ_MUST_USE bool topBranchParams(ResultType type, StackHeight* height) { if (type.empty()) { *height = fr.stackHeight(); return true; } // There may be temporary values that need spilling; delay computation of // the stack results base until after the popRegisterResults(), which spills // if needed. ABIResultIter iter(type); popRegisterResults(iter); StackHeight base = fr.stackResultsBase(stackConsumed(iter.remaining())); if (!iter.done()) { popStackResults(iter, base); } if (!pushResults(type, base)) { return false; } *height = base; return true; } // Conditional branches with fallthrough are preceded by a topBranchParams, so // we know that there are no stack results that need to be materialized. In // that case, we can just shuffle the whole block down before popping the // stack. void shuffleStackResultsBeforeBranch(StackHeight srcHeight, StackHeight destHeight, ResultType type) { uint32_t stackResultBytes = 0; if (ABIResultIter::HasStackResults(type)) { MOZ_ASSERT(stk_.length() >= type.length()); ABIResultIter iter(type); for (; !iter.done(); iter.next()) { #ifdef DEBUG const ABIResult& result = iter.cur(); const Stk& v = stk_[stk_.length() - iter.index() - 1]; MOZ_ASSERT(v.isMem() == result.onStack()); #endif } stackResultBytes = iter.stackBytesConsumedSoFar(); MOZ_ASSERT(stackResultBytes > 0); if (srcHeight != destHeight) { // Find a free GPR to use when shuffling stack values. If none // is available, push ReturnReg and restore it after we're done. bool saved = false; RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved); fr.shuffleStackResultsTowardFP(srcHeight, destHeight, stackResultBytes, temp); ra.freeTempPtr(temp, saved); } } fr.popStackBeforeBranch(destHeight, stackResultBytes); } // Return the amount of execution stack consumed by the top numval // values on the value stack. size_t stackConsumed(size_t numval) { size_t size = 0; MOZ_ASSERT(numval <= stk_.length()); for (uint32_t i = stk_.length() - 1; numval > 0; numval--, i--) { Stk& v = stk_[i]; switch (v.kind()) { case Stk::MemRef: size += BaseStackFrame::StackSizeOfPtr; break; case Stk::MemI32: size += BaseStackFrame::StackSizeOfPtr; break; case Stk::MemI64: size += BaseStackFrame::StackSizeOfInt64; break; case Stk::MemF64: size += BaseStackFrame::StackSizeOfDouble; break; case Stk::MemF32: size += BaseStackFrame::StackSizeOfFloat; break; #ifdef ENABLE_WASM_SIMD case Stk::MemV128: size += BaseStackFrame::StackSizeOfV128; break; #endif default: break; } } return size; } void popValueStackTo(uint32_t stackSize) { for (uint32_t i = stk_.length(); i > stackSize; i--) { Stk& v = stk_[i - 1]; switch (v.kind()) { case Stk::RegisterI32: freeI32(v.i32reg()); break; case Stk::RegisterI64: freeI64(v.i64reg()); break; case Stk::RegisterF64: freeF64(v.f64reg()); break; case Stk::RegisterF32: freeF32(v.f32reg()); break; #ifdef ENABLE_WASM_SIMD case Stk::RegisterV128: freeV128(v.v128reg()); break; #endif case Stk::RegisterRef: freeRef(v.refReg()); break; case Stk::MemRef: stackMapGenerator_.memRefsOnStk--; break; default: break; } } stk_.shrinkTo(stackSize); } void popValueStackBy(uint32_t items) { popValueStackTo(stk_.length() - items); } void dropValue() { if (peek(0).isMem()) { fr.popBytes(stackConsumed(1)); } popValueStackBy(1); } // Peek at the stack, for calls. Stk& peek(uint32_t relativeDepth) { return stk_[stk_.length() - 1 - relativeDepth]; } #ifdef DEBUG // Check that we're not leaking registers by comparing the // state of the stack + available registers with the set of // all available registers. // Call this between opcodes. void performRegisterLeakCheck() { BaseRegAlloc::LeakCheck check(ra); for (size_t i = 0; i < stk_.length(); i++) { Stk& item = stk_[i]; switch (item.kind_) { case Stk::RegisterI32: check.addKnownI32(item.i32reg()); break; case Stk::RegisterI64: check.addKnownI64(item.i64reg()); break; case Stk::RegisterF32: check.addKnownF32(item.f32reg()); break; case Stk::RegisterF64: check.addKnownF64(item.f64reg()); break; # ifdef ENABLE_WASM_SIMD case Stk::RegisterV128: check.addKnownV128(item.v128reg()); break; # endif case Stk::RegisterRef: check.addKnownRef(item.refReg()); break; default: break; } } } void assertStackInvariants() const { if (deadCode_) { // Nonlocal control flow can pass values in stack locations in a way that // isn't accounted for by the value stack. In dead code, which occurs // after unconditional non-local control flow, there is no invariant to // assert. return; } size_t size = 0; for (const Stk& v : stk_) { switch (v.kind()) { case Stk::MemRef: size += BaseStackFrame::StackSizeOfPtr; break; case Stk::MemI32: size += BaseStackFrame::StackSizeOfPtr; break; case Stk::MemI64: size += BaseStackFrame::StackSizeOfInt64; break; case Stk::MemF64: size += BaseStackFrame::StackSizeOfDouble; break; case Stk::MemF32: size += BaseStackFrame::StackSizeOfFloat; break; # ifdef ENABLE_WASM_SIMD case Stk::MemV128: size += BaseStackFrame::StackSizeOfV128; break; # endif default: MOZ_ASSERT(!v.isMem()); break; } } MOZ_ASSERT(size == fr.dynamicHeight()); } #endif //////////////////////////////////////////////////////////// // // Control stack void initControl(Control& item, ResultType params) { // Make sure the constructor was run properly MOZ_ASSERT(!item.stackHeight.isValid() && item.stackSize == UINT32_MAX); uint32_t paramCount = deadCode_ ? 0 : params.length(); uint32_t stackParamSize = stackConsumed(paramCount); item.stackHeight = fr.stackResultsBase(stackParamSize); item.stackSize = stk_.length() - paramCount; item.deadOnArrival = deadCode_; item.bceSafeOnEntry = bceSafe_; } Control& controlItem() { return iter_.controlItem(); } Control& controlItem(uint32_t relativeDepth) { return iter_.controlItem(relativeDepth); } Control& controlOutermost() { return iter_.controlOutermost(); } //////////////////////////////////////////////////////////// // // Labels void insertBreakablePoint(CallSiteDesc::Kind kind) { fr.loadTlsPtr(WasmTlsReg); masm.nopPatchableToCall(CallSiteDesc(iter_.lastOpcodeOffset(), kind)); } ////////////////////////////////////////////////////////////////////// // // Function prologue and epilogue. [[nodiscard]] bool beginFunction() { JitSpew(JitSpew_Codegen, "# ========================================"); JitSpew(JitSpew_Codegen, "# Emitting wasm baseline code"); JitSpew(JitSpew_Codegen, "# beginFunction: start of function prologue for index %d", (int)func_.index); // Make a start on the stack map for this function. Inspect the args so // as to determine which of them are both in-memory and pointer-typed, and // add entries to machineStackTracker as appropriate. ArgTypeVector args(funcType()); size_t inboundStackArgBytes = StackArgAreaSizeUnaligned(args); MOZ_ASSERT(inboundStackArgBytes % sizeof(void*) == 0); stackMapGenerator_.numStackArgWords = inboundStackArgBytes / sizeof(void*); MOZ_ASSERT(stackMapGenerator_.machineStackTracker.length() == 0); if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers( stackMapGenerator_.numStackArgWords)) { return false; } // Identify GC-managed pointers passed on the stack. for (WasmABIArgIter i(args); !i.done(); i++) { ABIArg argLoc = *i; if (argLoc.kind() == ABIArg::Stack && args[i.index()] == MIRType::RefOrNull) { uint32_t offset = argLoc.offsetFromArgBase(); MOZ_ASSERT(offset < inboundStackArgBytes); MOZ_ASSERT(offset % sizeof(void*) == 0); stackMapGenerator_.machineStackTracker.setGCPointer(offset / sizeof(void*)); } } GenerateFunctionPrologue(masm, *moduleEnv_.funcs[func_.index].typeId, compilerEnv_.mode() == CompileMode::Tier1 ? Some(func_.index) : Nothing(), &offsets_); // GenerateFunctionPrologue pushes exactly one wasm::Frame's worth of // stuff, and none of the values are GC pointers. Hence: if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers( sizeof(Frame) / sizeof(void*))) { return false; } // Initialize DebugFrame fields before the stack overflow trap so that // we have the invariant that all observable Frames in a debugEnabled // Module have valid DebugFrames. if (compilerEnv_.debugEnabled()) { #ifdef JS_CODEGEN_ARM64 static_assert(DebugFrame::offsetOfFrame() % WasmStackAlignment == 0, "aligned"); #endif masm.reserveStack(DebugFrame::offsetOfFrame()); if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers( DebugFrame::offsetOfFrame() / sizeof(void*))) { return false; } masm.store32( Imm32(func_.index), Address(masm.getStackPointer(), DebugFrame::offsetOfFuncIndex())); masm.store32(Imm32(0), Address(masm.getStackPointer(), DebugFrame::offsetOfFlags())); // No need to initialize cachedReturnJSValue_ or any ref-typed spilled // register results, as they are traced if and only if a corresponding // flag (hasCachedReturnJSValue or hasSpilledRefRegisterResult) is set. } // Generate a stack-overflow check and its associated stack map. fr.checkStack(ABINonArgReg0, BytecodeOffset(func_.lineOrBytecode)); ExitStubMapVector extras; if (!stackMapGenerator_.generateStackmapEntriesForTrapExit(args, &extras)) { return false; } if (!createStackMap("stack check", extras, masm.currentOffset())) { return false; } size_t reservedBytes = fr.fixedAllocSize() - masm.framePushed(); MOZ_ASSERT(0 == (reservedBytes % sizeof(void*))); masm.reserveStack(reservedBytes); fr.onFixedStackAllocated(); if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers( reservedBytes / sizeof(void*))) { return false; } // Locals are stack allocated. Mark ref-typed ones in the stackmap // accordingly. for (const Local& l : localInfo_) { // Locals that are stack arguments were already added to the stack map // before pushing the frame. if (l.type == MIRType::RefOrNull && !l.isStackArgument()) { uint32_t offs = fr.localOffsetFromSp(l); MOZ_ASSERT(0 == (offs % sizeof(void*))); stackMapGenerator_.machineStackTracker.setGCPointer(offs / sizeof(void*)); } } // Copy arguments from registers to stack. for (WasmABIArgIter i(args); !i.done(); i++) { if (args.isSyntheticStackResultPointerArg(i.index())) { // If there are stack results and the pointer to stack results // was passed in a register, store it to the stack. if (i->argInRegister()) { fr.storeIncomingStackResultAreaPtr(RegPtr(i->gpr())); } // If we're in a debug frame, copy the stack result pointer arg // to a well-known place. if (compilerEnv_.debugEnabled()) { Register target = ABINonArgReturnReg0; fr.loadIncomingStackResultAreaPtr(RegPtr(target)); size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame(); size_t debugStackResultsPointerOffset = debugFrameOffset + DebugFrame::offsetOfStackResultsPointer(); masm.storePtr(target, Address(masm.getStackPointer(), debugStackResultsPointerOffset)); } continue; } if (!i->argInRegister()) { continue; } Local& l = localInfo_[args.naturalIndex(i.index())]; switch (i.mirType()) { case MIRType::Int32: fr.storeLocalI32(RegI32(i->gpr()), l); break; case MIRType::Int64: fr.storeLocalI64(RegI64(i->gpr64()), l); break; case MIRType::RefOrNull: { DebugOnly offs = fr.localOffsetFromSp(l); MOZ_ASSERT(0 == (offs % sizeof(void*))); fr.storeLocalPtr(RegPtr(i->gpr()), l); // We should have just visited this local in the preceding loop. MOZ_ASSERT(stackMapGenerator_.machineStackTracker.isGCPointer( offs / sizeof(void*))); break; } case MIRType::Double: fr.storeLocalF64(RegF64(i->fpu()), l); break; case MIRType::Float32: fr.storeLocalF32(RegF32(i->fpu()), l); break; #ifdef ENABLE_WASM_SIMD case MIRType::Simd128: fr.storeLocalV128(RegV128(i->fpu()), l); break; #endif default: MOZ_CRASH("Function argument type"); } } fr.zeroLocals(&ra); fr.storeTlsPtr(WasmTlsReg); if (compilerEnv_.debugEnabled()) { insertBreakablePoint(CallSiteDesc::EnterFrame); if (!createStackMap("debug: breakable point")) { return false; } } JitSpew(JitSpew_Codegen, "# beginFunction: enter body with masm.framePushed = %u", masm.framePushed()); MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isNothing()); stackMapGenerator_.framePushedAtEntryToBody.emplace(masm.framePushed()); return true; } void popStackReturnValues(const ResultType& resultType) { uint32_t bytes = ABIResultIter::MeasureStackBytes(resultType); if (bytes == 0) { return; } Register target = ABINonArgReturnReg0; Register temp = ABINonArgReturnReg1; fr.loadIncomingStackResultAreaPtr(RegPtr(target)); fr.popStackResultsToMemory(target, bytes, temp); } void saveRegisterReturnValues(const ResultType& resultType) { MOZ_ASSERT(compilerEnv_.debugEnabled()); size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame(); size_t registerResultIdx = 0; for (ABIResultIter i(resultType); !i.done(); i.next()) { const ABIResult result = i.cur(); if (!result.inRegister()) { #ifdef DEBUG for (i.next(); !i.done(); i.next()) { MOZ_ASSERT(!i.cur().inRegister()); } #endif break; } size_t resultOffset = DebugFrame::offsetOfRegisterResult(registerResultIdx); Address dest(masm.getStackPointer(), debugFrameOffset + resultOffset); switch (result.type().kind()) { case ValType::I32: masm.store32(RegI32(result.gpr()), dest); break; case ValType::I64: masm.store64(RegI64(result.gpr64()), dest); break; case ValType::F64: masm.storeDouble(RegF64(result.fpr()), dest); break; case ValType::F32: masm.storeFloat32(RegF32(result.fpr()), dest); break; case ValType::Ref: { uint32_t flag = DebugFrame::hasSpilledRegisterRefResultBitMask(registerResultIdx); // Tell Instance::traceFrame that we have a pointer to trace. masm.or32(Imm32(flag), Address(masm.getStackPointer(), debugFrameOffset + DebugFrame::offsetOfFlags())); masm.storePtr(RegPtr(result.gpr()), dest); break; } case ValType::V128: #ifdef ENABLE_WASM_SIMD masm.storeUnalignedSimd128(RegV128(result.fpr()), dest); break; #else MOZ_CRASH("No SIMD support"); #endif } registerResultIdx++; } } void restoreRegisterReturnValues(const ResultType& resultType) { MOZ_ASSERT(compilerEnv_.debugEnabled()); size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame(); size_t registerResultIdx = 0; for (ABIResultIter i(resultType); !i.done(); i.next()) { const ABIResult result = i.cur(); if (!result.inRegister()) { #ifdef DEBUG for (i.next(); !i.done(); i.next()) { MOZ_ASSERT(!i.cur().inRegister()); } #endif break; } size_t resultOffset = DebugFrame::offsetOfRegisterResult(registerResultIdx++); Address src(masm.getStackPointer(), debugFrameOffset + resultOffset); switch (result.type().kind()) { case ValType::I32: masm.load32(src, RegI32(result.gpr())); break; case ValType::I64: masm.load64(src, RegI64(result.gpr64())); break; case ValType::F64: masm.loadDouble(src, RegF64(result.fpr())); break; case ValType::F32: masm.loadFloat32(src, RegF32(result.fpr())); break; case ValType::Ref: masm.loadPtr(src, RegPtr(result.gpr())); break; case ValType::V128: #ifdef ENABLE_WASM_SIMD masm.loadUnalignedSimd128(src, RegV128(result.fpr())); break; #else MOZ_CRASH("No SIMD support"); #endif } } } [[nodiscard]] bool endFunction() { JitSpew(JitSpew_Codegen, "# endFunction: start of function epilogue"); // Always branch to returnLabel_. masm.breakpoint(); // Patch the add in the prologue so that it checks against the correct // frame size. Flush the constant pool in case it needs to be patched. masm.flush(); // Precondition for patching. if (masm.oom()) { return false; } fr.patchCheckStack(); masm.bind(&returnLabel_); ResultType resultType(ResultType::Vector(funcType().results())); popStackReturnValues(resultType); if (compilerEnv_.debugEnabled()) { // Store and reload the return value from DebugFrame::return so that // it can be clobbered, and/or modified by the debug trap. saveRegisterReturnValues(resultType); insertBreakablePoint(CallSiteDesc::Breakpoint); if (!createStackMap("debug: breakpoint")) { return false; } insertBreakablePoint(CallSiteDesc::LeaveFrame); if (!createStackMap("debug: leave frame")) { return false; } restoreRegisterReturnValues(resultType); } // To satisy Tls extent invariant we need to reload WasmTlsReg because // baseline can clobber it. fr.loadTlsPtr(WasmTlsReg); GenerateFunctionEpilogue(masm, fr.fixedAllocSize(), &offsets_); #if defined(JS_ION_PERF) // FIXME - profiling code missing. No bug for this. // Note the end of the inline code and start of the OOL code. // gen->perfSpewer().noteEndInlineCode(masm); #endif JitSpew(JitSpew_Codegen, "# endFunction: end of function epilogue"); JitSpew(JitSpew_Codegen, "# endFunction: start of OOL code"); if (!generateOutOfLineCode()) { return false; } offsets_.end = masm.currentOffset(); if (!fr.checkStackHeight()) { return false; } JitSpew(JitSpew_Codegen, "# endFunction: end of OOL code for index %d", (int)func_.index); return !masm.oom(); } ////////////////////////////////////////////////////////////////////// // // Calls. struct FunctionCall { explicit FunctionCall(uint32_t lineOrBytecode) : lineOrBytecode(lineOrBytecode), isInterModule(false), usesSystemAbi(false), #ifdef JS_CODEGEN_ARM hardFP(true), #endif frameAlignAdjustment(0), stackArgAreaSize(0) { } uint32_t lineOrBytecode; WasmABIArgGenerator abi; bool isInterModule; bool usesSystemAbi; #ifdef JS_CODEGEN_ARM bool hardFP; #endif size_t frameAlignAdjustment; size_t stackArgAreaSize; }; void beginCall(FunctionCall& call, UseABI useABI, InterModule interModule) { MOZ_ASSERT_IF(useABI == UseABI::Builtin, interModule == InterModule::False); call.isInterModule = interModule == InterModule::True; call.usesSystemAbi = useABI == UseABI::System; if (call.usesSystemAbi) { // Call-outs need to use the appropriate system ABI. #if defined(JS_CODEGEN_ARM) call.hardFP = UseHardFpABI(); call.abi.setUseHardFp(call.hardFP); #elif defined(JS_CODEGEN_MIPS32) call.abi.enforceO32ABI(); #endif } else { #if defined(JS_CODEGEN_ARM) MOZ_ASSERT(call.hardFP, "All private ABIs pass FP arguments in registers"); #endif } // Use masm.framePushed() because the value we want here does not depend // on the height of the frame's stack area, but the actual size of the // allocated frame. call.frameAlignAdjustment = ComputeByteAlignment( masm.framePushed() + sizeof(Frame), JitStackAlignment); } void endCall(FunctionCall& call, size_t stackSpace) { size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment; fr.freeArgAreaAndPopBytes(adjustment, stackSpace); MOZ_ASSERT( stackMapGenerator_.framePushedExcludingOutboundCallArgs.isSome()); stackMapGenerator_.framePushedExcludingOutboundCallArgs.reset(); if (call.isInterModule) { fr.loadTlsPtr(WasmTlsReg); masm.loadWasmPinnedRegsFromTls(); masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1); } else if (call.usesSystemAbi) { // On x86 there are no pinned registers, so don't waste time // reloading the Tls. #ifndef JS_CODEGEN_X86 fr.loadTlsPtr(WasmTlsReg); masm.loadWasmPinnedRegsFromTls(); #endif } } void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call) { size_t stackArgAreaSizeAligned = AlignStackArgAreaSize(stackArgAreaSizeUnaligned); MOZ_ASSERT(stackArgAreaSizeUnaligned <= stackArgAreaSizeAligned); // Record the masm.framePushed() value at this point, before we push args // for the call, but including the alignment space placed above the args. // This defines the lower limit of the stackmap that will be created for // this call. MOZ_ASSERT( stackMapGenerator_.framePushedExcludingOutboundCallArgs.isNothing()); stackMapGenerator_.framePushedExcludingOutboundCallArgs.emplace( // However much we've pushed so far masm.framePushed() + // Extra space we'll push to get the frame aligned call->frameAlignAdjustment + // Extra space we'll push to get the outbound arg area 16-aligned (stackArgAreaSizeAligned - stackArgAreaSizeUnaligned)); call->stackArgAreaSize = stackArgAreaSizeAligned; size_t adjustment = call->stackArgAreaSize + call->frameAlignAdjustment; fr.allocArgArea(adjustment); } const ABIArg reservePointerArgument(FunctionCall* call) { return call->abi.next(MIRType::Pointer); } // TODO / OPTIMIZE (Bug 1316821): Note passArg is used only in one place. // (Or it was, until Luke wandered through, but that can be fixed again.) // I'm not saying we should manually inline it, but we could hoist the // dispatch into the caller and have type-specific implementations of // passArg: passArgI32(), etc. Then those might be inlined, at least in PGO // builds. // // The bulk of the work here (60%) is in the next() call, though. // // Notably, since next() is so expensive, StackArgAreaSizeUnaligned() // becomes expensive too. // // Somehow there could be a trick here where the sequence of argument types // (read from the input stream) leads to a cached entry for // StackArgAreaSizeUnaligned() and for how to pass arguments... // // But at least we could reduce the cost of StackArgAreaSizeUnaligned() by // first reading the argument types into a (reusable) vector, then we have // the outgoing size at low cost, and then we can pass args based on the // info we read. void passArg(ValType type, const Stk& arg, FunctionCall* call) { switch (type.kind()) { case ValType::I32: { ABIArg argLoc = call->abi.next(MIRType::Int32); if (argLoc.kind() == ABIArg::Stack) { ScratchI32 scratch(*this); loadI32(arg, scratch); masm.store32(scratch, Address(masm.getStackPointer(), argLoc.offsetFromArgBase())); } else { loadI32(arg, RegI32(argLoc.gpr())); } break; } case ValType::I64: { ABIArg argLoc = call->abi.next(MIRType::Int64); if (argLoc.kind() == ABIArg::Stack) { ScratchI32 scratch(*this); #ifdef JS_PUNBOX64 loadI64(arg, fromI32(scratch)); masm.storePtr(scratch, Address(masm.getStackPointer(), argLoc.offsetFromArgBase())); #else loadI64Low(arg, scratch); masm.store32(scratch, LowWord(Address(masm.getStackPointer(), argLoc.offsetFromArgBase()))); loadI64High(arg, scratch); masm.store32(scratch, HighWord(Address(masm.getStackPointer(), argLoc.offsetFromArgBase()))); #endif } else { loadI64(arg, RegI64(argLoc.gpr64())); } break; } case ValType::V128: { #ifdef ENABLE_WASM_SIMD ABIArg argLoc = call->abi.next(MIRType::Simd128); switch (argLoc.kind()) { case ABIArg::Stack: { ScratchV128 scratch(*this); loadV128(arg, scratch); masm.storeUnalignedSimd128( (RegV128)scratch, Address(masm.getStackPointer(), argLoc.offsetFromArgBase())); break; } case ABIArg::GPR: { MOZ_CRASH("Unexpected parameter passing discipline"); } case ABIArg::FPU: { loadV128(arg, RegV128(argLoc.fpu())); break; } # if defined(JS_CODEGEN_REGISTER_PAIR) case ABIArg::GPR_PAIR: { MOZ_CRASH("Unexpected parameter passing discipline"); } # endif case ABIArg::Uninitialized: MOZ_CRASH("Uninitialized ABIArg kind"); } break; #else MOZ_CRASH("No SIMD support"); #endif } case ValType::F64: { ABIArg argLoc = call->abi.next(MIRType::Double); switch (argLoc.kind()) { case ABIArg::Stack: { ScratchF64 scratch(*this); loadF64(arg, scratch); masm.storeDouble(scratch, Address(masm.getStackPointer(), argLoc.offsetFromArgBase())); break; } #if defined(JS_CODEGEN_REGISTER_PAIR) case ABIArg::GPR_PAIR: { # if defined(JS_CODEGEN_ARM) ScratchF64 scratch(*this); loadF64(arg, scratch); masm.ma_vxfer(scratch, argLoc.evenGpr(), argLoc.oddGpr()); break; # elif defined(JS_CODEGEN_MIPS32) ScratchF64 scratch(*this); loadF64(arg, scratch); MOZ_ASSERT(MOZ_LITTLE_ENDIAN()); masm.moveFromDoubleLo(scratch, argLoc.evenGpr()); masm.moveFromDoubleHi(scratch, argLoc.oddGpr()); break; # else MOZ_CRASH("BaseCompiler platform hook: passArg F64 pair"); # endif } #endif case ABIArg::FPU: { loadF64(arg, RegF64(argLoc.fpu())); break; } case ABIArg::GPR: { MOZ_CRASH("Unexpected parameter passing discipline"); } case ABIArg::Uninitialized: MOZ_CRASH("Uninitialized ABIArg kind"); } break; } case ValType::F32: { ABIArg argLoc = call->abi.next(MIRType::Float32); switch (argLoc.kind()) { case ABIArg::Stack: { ScratchF32 scratch(*this); loadF32(arg, scratch); masm.storeFloat32(scratch, Address(masm.getStackPointer(), argLoc.offsetFromArgBase())); break; } case ABIArg::GPR: { ScratchF32 scratch(*this); loadF32(arg, scratch); masm.moveFloat32ToGPR(scratch, argLoc.gpr()); break; } case ABIArg::FPU: { loadF32(arg, RegF32(argLoc.fpu())); break; } #if defined(JS_CODEGEN_REGISTER_PAIR) case ABIArg::GPR_PAIR: { MOZ_CRASH("Unexpected parameter passing discipline"); } #endif case ABIArg::Uninitialized: MOZ_CRASH("Uninitialized ABIArg kind"); } break; } case ValType::Ref: { ABIArg argLoc = call->abi.next(MIRType::RefOrNull); if (argLoc.kind() == ABIArg::Stack) { ScratchPtr scratch(*this); loadRef(arg, scratch); masm.storePtr(scratch, Address(masm.getStackPointer(), argLoc.offsetFromArgBase())); } else { loadRef(arg, RegPtr(argLoc.gpr())); } break; } } } CodeOffset callDefinition(uint32_t funcIndex, const FunctionCall& call) { CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Func); return masm.call(desc, funcIndex); } CodeOffset callSymbolic(SymbolicAddress callee, const FunctionCall& call) { CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic); return masm.call(desc, callee); } // Precondition: sync() CodeOffset callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex, const Stk& indexVal, const FunctionCall& call) { const TypeIdDesc& funcTypeId = moduleEnv_.typeIds[funcTypeIndex]; MOZ_ASSERT(funcTypeId.kind() != TypeIdDescKind::None); const TableDesc& table = moduleEnv_.tables[tableIndex]; loadI32(indexVal, RegI32(WasmTableCallIndexReg)); CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic); CalleeDesc callee = CalleeDesc::wasmTable(table, funcTypeId); return masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true)); } // Precondition: sync() CodeOffset callImport(unsigned globalDataOffset, const FunctionCall& call) { CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic); CalleeDesc callee = CalleeDesc::import(globalDataOffset); return masm.wasmCallImport(desc, callee); } CodeOffset builtinCall(SymbolicAddress builtin, const FunctionCall& call) { return callSymbolic(builtin, call); } CodeOffset builtinInstanceMethodCall(const SymbolicAddressSignature& builtin, const ABIArg& instanceArg, const FunctionCall& call) { // Builtin method calls assume the TLS register has been set. fr.loadTlsPtr(WasmTlsReg); CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic); return masm.wasmCallBuiltinInstanceMethod( desc, instanceArg, builtin.identity, builtin.failureMode); } MOZ_MUST_USE bool pushCallResults(const FunctionCall& call, ResultType type, const StackResultsLoc& loc) { #if defined(JS_CODEGEN_ARM) // pushResults currently bypasses special case code in captureReturnedFxx() // that converts GPR results to FPR results for systemABI+softFP. If we // ever start using that combination for calls we need more code. This // assert is stronger than we need - we only care about results in return // registers - but that's OK. MOZ_ASSERT(!call.usesSystemAbi || call.hardFP); #endif return pushResults(type, fr.stackResultsBase(loc.bytes())); } ////////////////////////////////////////////////////////////////////// // // Sundry low-level code generators. // The compiler depends on moveImm32() clearing the high bits of a 64-bit // register on 64-bit systems except MIPS64 where high bits are sign extended // from lower bits. void moveImm32(int32_t v, RegI32 dest) { masm.move32(Imm32(v), dest); } void moveImm64(int64_t v, RegI64 dest) { masm.move64(Imm64(v), dest); } void moveImmRef(intptr_t v, RegPtr dest) { masm.movePtr(ImmWord(v), dest); } void moveImmF32(float f, RegF32 dest) { masm.loadConstantFloat32(f, dest); } void moveImmF64(double d, RegF64 dest) { masm.loadConstantDouble(d, dest); } [[nodiscard]] bool addInterruptCheck() { ScratchI32 tmp(*this); fr.loadTlsPtr(tmp); masm.wasmInterruptCheck(tmp, bytecodeOffset()); return createStackMap("addInterruptCheck"); } void jumpTable(const LabelVector& labels, Label* theTable) { // Flush constant pools to ensure that the table is never interrupted by // constant pool entries. masm.flush(); #if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) // Prevent nop sequences to appear in the jump table. AutoForbidNops afn(&masm); #endif masm.bind(theTable); for (uint32_t i = 0; i < labels.length(); i++) { CodeLabel cl; masm.writeCodePointer(&cl); cl.target()->bind(labels[i].offset()); masm.addCodeLabel(cl); } } void tableSwitch(Label* theTable, RegI32 switchValue, Label* dispatchCode) { masm.bind(dispatchCode); #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) ScratchI32 scratch(*this); CodeLabel tableCl; masm.mov(&tableCl, scratch); tableCl.target()->bind(theTable->offset()); masm.addCodeLabel(tableCl); masm.jmp(Operand(scratch, switchValue, ScalePointer)); #elif defined(JS_CODEGEN_ARM) // Flush constant pools: offset must reflect the distance from the MOV // to the start of the table; as the address of the MOV is given by the // label, nothing must come between the bind() and the ma_mov(). AutoForbidPoolsAndNops afp(&masm, /* number of instructions in scope = */ 5); ScratchI32 scratch(*this); // Compute the offset from the ma_mov instruction to the jump table. Label here; masm.bind(&here); uint32_t offset = here.offset() - theTable->offset(); // Read PC+8 masm.ma_mov(pc, scratch); // ARM scratch register is required by ma_sub. ScratchRegisterScope arm_scratch(*this); // Compute the absolute table base pointer into `scratch`, offset by 8 // to account for the fact that ma_mov read PC+8. masm.ma_sub(Imm32(offset + 8), scratch, arm_scratch); // Jump indirect via table element. masm.ma_ldr(DTRAddr(scratch, DtrRegImmShift(switchValue, LSL, 2)), pc, Offset, Assembler::Always); #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) ScratchI32 scratch(*this); CodeLabel tableCl; masm.ma_li(scratch, &tableCl); tableCl.target()->bind(theTable->offset()); masm.addCodeLabel(tableCl); masm.branchToComputedAddress(BaseIndex(scratch, switchValue, ScalePointer)); #elif defined(JS_CODEGEN_ARM64) AutoForbidPoolsAndNops afp(&masm, /* number of instructions in scope = */ 4); ScratchI32 scratch(*this); ARMRegister s(scratch, 64); ARMRegister v(switchValue, 64); masm.Adr(s, theTable); masm.Add(s, s, Operand(v, vixl::LSL, 3)); masm.Ldr(s, MemOperand(s, 0)); masm.Br(s); #else MOZ_CRASH("BaseCompiler platform hook: tableSwitch"); #endif } RegI32 captureReturnedI32() { RegI32 r = RegI32(ReturnReg); MOZ_ASSERT(isAvailableI32(r)); needI32(r); #if defined(JS_CODEGEN_X64) if (JitOptions.spectreIndexMasking) { masm.movl(r, r); } #endif return r; } RegI64 captureReturnedI64() { RegI64 r = RegI64(ReturnReg64); MOZ_ASSERT(isAvailableI64(r)); needI64(r); return r; } RegF32 captureReturnedF32(const FunctionCall& call) { RegF32 r = RegF32(ReturnFloat32Reg); MOZ_ASSERT(isAvailableF32(r)); needF32(r); #if defined(JS_CODEGEN_ARM) if (call.usesSystemAbi && !call.hardFP) { masm.ma_vxfer(ReturnReg, r); } #endif return r; } RegF64 captureReturnedF64(const FunctionCall& call) { RegF64 r = RegF64(ReturnDoubleReg); MOZ_ASSERT(isAvailableF64(r)); needF64(r); #if defined(JS_CODEGEN_ARM) if (call.usesSystemAbi && !call.hardFP) { masm.ma_vxfer(ReturnReg64.low, ReturnReg64.high, r); } #endif return r; } #ifdef ENABLE_WASM_SIMD RegV128 captureReturnedV128(const FunctionCall& call) { RegV128 r = RegV128(ReturnSimd128Reg); MOZ_ASSERT(isAvailableV128(r)); needV128(r); return r; } #endif RegPtr captureReturnedRef() { RegPtr r = RegPtr(ReturnReg); MOZ_ASSERT(isAvailableRef(r)); needRef(r); return r; } void checkDivideByZeroI32(RegI32 rhs) { Label nonZero; masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero); trap(Trap::IntegerDivideByZero); masm.bind(&nonZero); } void checkDivideByZeroI64(RegI64 r) { Label nonZero; ScratchI32 scratch(*this); masm.branchTest64(Assembler::NonZero, r, r, scratch, &nonZero); trap(Trap::IntegerDivideByZero); masm.bind(&nonZero); } void checkDivideSignedOverflowI32(RegI32 rhs, RegI32 srcDest, Label* done, bool zeroOnOverflow) { Label notMin; masm.branch32(Assembler::NotEqual, srcDest, Imm32(INT32_MIN), ¬Min); if (zeroOnOverflow) { masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), ¬Min); moveImm32(0, srcDest); masm.jump(done); } else { masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), ¬Min); trap(Trap::IntegerOverflow); } masm.bind(¬Min); } void checkDivideSignedOverflowI64(RegI64 rhs, RegI64 srcDest, Label* done, bool zeroOnOverflow) { Label notmin; masm.branch64(Assembler::NotEqual, srcDest, Imm64(INT64_MIN), ¬min); masm.branch64(Assembler::NotEqual, rhs, Imm64(-1), ¬min); if (zeroOnOverflow) { masm.xor64(srcDest, srcDest); masm.jump(done); } else { trap(Trap::IntegerOverflow); } masm.bind(¬min); } #ifndef RABALDR_INT_DIV_I64_CALLOUT void quotientI64(RegI64 rhs, RegI64 srcDest, RegI64 reserved, IsUnsigned isUnsigned, bool isConst, int64_t c) { Label done; if (!isConst || c == 0) { checkDivideByZeroI64(rhs); } if (!isUnsigned && (!isConst || c == -1)) { checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false)); } # if defined(JS_CODEGEN_X64) // The caller must set up the following situation. MOZ_ASSERT(srcDest.reg == rax); MOZ_ASSERT(reserved == specific_.rdx); if (isUnsigned) { masm.xorq(rdx, rdx); masm.udivq(rhs.reg); } else { masm.cqo(); masm.idivq(rhs.reg); } # elif defined(JS_CODEGEN_MIPS64) if (isUnsigned) { masm.as_ddivu(srcDest.reg, rhs.reg); } else { masm.as_ddiv(srcDest.reg, rhs.reg); } masm.as_mflo(srcDest.reg); # elif defined(JS_CODEGEN_ARM64) ARMRegister sd(srcDest.reg, 64); ARMRegister r(rhs.reg, 64); if (isUnsigned) { masm.Udiv(sd, sd, r); } else { masm.Sdiv(sd, sd, r); } # else MOZ_CRASH("BaseCompiler platform hook: quotientI64"); # endif masm.bind(&done); } void remainderI64(RegI64 rhs, RegI64 srcDest, RegI64 reserved, IsUnsigned isUnsigned, bool isConst, int64_t c) { Label done; if (!isConst || c == 0) { checkDivideByZeroI64(rhs); } if (!isUnsigned && (!isConst || c == -1)) { checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true)); } # if defined(JS_CODEGEN_X64) // The caller must set up the following situation. MOZ_ASSERT(srcDest.reg == rax); MOZ_ASSERT(reserved == specific_.rdx); if (isUnsigned) { masm.xorq(rdx, rdx); masm.udivq(rhs.reg); } else { masm.cqo(); masm.idivq(rhs.reg); } masm.movq(rdx, rax); # elif defined(JS_CODEGEN_MIPS64) if (isUnsigned) { masm.as_ddivu(srcDest.reg, rhs.reg); } else { masm.as_ddiv(srcDest.reg, rhs.reg); } masm.as_mfhi(srcDest.reg); # elif defined(JS_CODEGEN_ARM64) MOZ_ASSERT(reserved.isInvalid()); ARMRegister sd(srcDest.reg, 64); ARMRegister r(rhs.reg, 64); ScratchI32 temp(*this); ARMRegister t(temp, 64); if (isUnsigned) { masm.Udiv(t, sd, r); } else { masm.Sdiv(t, sd, r); } masm.Mul(t, t, r); masm.Sub(sd, sd, t); # else MOZ_CRASH("BaseCompiler platform hook: remainderI64"); # endif masm.bind(&done); } #endif // RABALDR_INT_DIV_I64_CALLOUT RegI32 needRotate64Temp() { #if defined(JS_CODEGEN_X86) return needI32(); #elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \ defined(JS_CODEGEN_MIPS64) return RegI32::Invalid(); #else MOZ_CRASH("BaseCompiler platform hook: needRotate64Temp"); #endif } void maskShiftCount32(RegI32 r) { #if defined(JS_CODEGEN_ARM) masm.and32(Imm32(31), r); #endif } RegI32 needPopcnt32Temp() { #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) return AssemblerX86Shared::HasPOPCNT() ? RegI32::Invalid() : needI32(); #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \ defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) return needI32(); #else MOZ_CRASH("BaseCompiler platform hook: needPopcnt32Temp"); #endif } RegI32 needPopcnt64Temp() { #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) return AssemblerX86Shared::HasPOPCNT() ? RegI32::Invalid() : needI32(); #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \ defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) return needI32(); #else MOZ_CRASH("BaseCompiler platform hook: needPopcnt64Temp"); #endif } class OutOfLineTruncateCheckF32OrF64ToI32 : public OutOfLineCode { AnyReg src; RegI32 dest; TruncFlags flags; BytecodeOffset off; public: OutOfLineTruncateCheckF32OrF64ToI32(AnyReg src, RegI32 dest, TruncFlags flags, BytecodeOffset off) : src(src), dest(dest), flags(flags), off(off) {} virtual void generate(MacroAssembler* masm) override { if (src.tag == AnyReg::F32) { masm->oolWasmTruncateCheckF32ToI32(src.f32(), dest, flags, off, rejoin()); } else if (src.tag == AnyReg::F64) { masm->oolWasmTruncateCheckF64ToI32(src.f64(), dest, flags, off, rejoin()); } else { MOZ_CRASH("unexpected type"); } } }; [[nodiscard]] bool truncateF32ToI32(RegF32 src, RegI32 dest, TruncFlags flags) { BytecodeOffset off = bytecodeOffset(); OutOfLineCode* ool = addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI32( AnyReg(src), dest, flags, off)); if (!ool) { return false; } bool isSaturating = flags & TRUNC_SATURATING; if (flags & TRUNC_UNSIGNED) { masm.wasmTruncateFloat32ToUInt32(src, dest, isSaturating, ool->entry()); } else { masm.wasmTruncateFloat32ToInt32(src, dest, isSaturating, ool->entry()); } masm.bind(ool->rejoin()); return true; } [[nodiscard]] bool truncateF64ToI32(RegF64 src, RegI32 dest, TruncFlags flags) { BytecodeOffset off = bytecodeOffset(); OutOfLineCode* ool = addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI32( AnyReg(src), dest, flags, off)); if (!ool) { return false; } bool isSaturating = flags & TRUNC_SATURATING; if (flags & TRUNC_UNSIGNED) { masm.wasmTruncateDoubleToUInt32(src, dest, isSaturating, ool->entry()); } else { masm.wasmTruncateDoubleToInt32(src, dest, isSaturating, ool->entry()); } masm.bind(ool->rejoin()); return true; } class OutOfLineTruncateCheckF32OrF64ToI64 : public OutOfLineCode { AnyReg src; RegI64 dest; TruncFlags flags; BytecodeOffset off; public: OutOfLineTruncateCheckF32OrF64ToI64(AnyReg src, RegI64 dest, TruncFlags flags, BytecodeOffset off) : src(src), dest(dest), flags(flags), off(off) {} virtual void generate(MacroAssembler* masm) override { if (src.tag == AnyReg::F32) { masm->oolWasmTruncateCheckF32ToI64(src.f32(), dest, flags, off, rejoin()); } else if (src.tag == AnyReg::F64) { masm->oolWasmTruncateCheckF64ToI64(src.f64(), dest, flags, off, rejoin()); } else { MOZ_CRASH("unexpected type"); } } }; #ifndef RABALDR_FLOAT_TO_I64_CALLOUT [[nodiscard]] RegF64 needTempForFloatingToI64(TruncFlags flags) { # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) if (flags & TRUNC_UNSIGNED) { return needF64(); } # endif return RegF64::Invalid(); } [[nodiscard]] bool truncateF32ToI64(RegF32 src, RegI64 dest, TruncFlags flags, RegF64 temp) { OutOfLineCode* ool = addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64( AnyReg(src), dest, flags, bytecodeOffset())); if (!ool) { return false; } bool isSaturating = flags & TRUNC_SATURATING; if (flags & TRUNC_UNSIGNED) { masm.wasmTruncateFloat32ToUInt64(src, dest, isSaturating, ool->entry(), ool->rejoin(), temp); } else { masm.wasmTruncateFloat32ToInt64(src, dest, isSaturating, ool->entry(), ool->rejoin(), temp); } return true; } [[nodiscard]] bool truncateF64ToI64(RegF64 src, RegI64 dest, TruncFlags flags, RegF64 temp) { OutOfLineCode* ool = addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64( AnyReg(src), dest, flags, bytecodeOffset())); if (!ool) { return false; } bool isSaturating = flags & TRUNC_SATURATING; if (flags & TRUNC_UNSIGNED) { masm.wasmTruncateDoubleToUInt64(src, dest, isSaturating, ool->entry(), ool->rejoin(), temp); } else { masm.wasmTruncateDoubleToInt64(src, dest, isSaturating, ool->entry(), ool->rejoin(), temp); } return true; } #endif // RABALDR_FLOAT_TO_I64_CALLOUT #ifndef RABALDR_I64_TO_FLOAT_CALLOUT RegI32 needConvertI64ToFloatTemp(ValType to, bool isUnsigned) { bool needs = false; if (to == ValType::F64) { needs = isUnsigned && masm.convertUInt64ToDoubleNeedsTemp(); } else { # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) needs = true; # endif } return needs ? needI32() : RegI32::Invalid(); } void convertI64ToF32(RegI64 src, bool isUnsigned, RegF32 dest, RegI32 temp) { if (isUnsigned) { masm.convertUInt64ToFloat32(src, dest, temp); } else { masm.convertInt64ToFloat32(src, dest); } } void convertI64ToF64(RegI64 src, bool isUnsigned, RegF64 dest, RegI32 temp) { if (isUnsigned) { masm.convertUInt64ToDouble(src, dest, temp); } else { masm.convertInt64ToDouble(src, dest); } } #endif // RABALDR_I64_TO_FLOAT_CALLOUT void cmp64Set(Assembler::Condition cond, RegI64 lhs, RegI64 rhs, RegI32 dest) { #if defined(JS_PUNBOX64) masm.cmpPtrSet(cond, lhs.reg, rhs.reg, dest); #elif defined(JS_CODEGEN_MIPS32) masm.cmp64Set(cond, lhs, rhs, dest); #else // TODO / OPTIMIZE (Bug 1316822): This is pretty branchy, we should be // able to do better. Label done, condTrue; masm.branch64(cond, lhs, rhs, &condTrue); moveImm32(0, dest); masm.jump(&done); masm.bind(&condTrue); moveImm32(1, dest); masm.bind(&done); #endif } void eqz64(RegI64 src, RegI32 dest) { #ifdef JS_PUNBOX64 masm.cmpPtrSet(Assembler::Equal, src.reg, ImmWord(0), dest); #else masm.or32(src.high, src.low); masm.cmp32Set(Assembler::Equal, src.low, Imm32(0), dest); #endif } [[nodiscard]] bool supportsRoundInstruction(RoundingMode mode) { return Assembler::HasRoundInstruction(mode); } void roundF32(RoundingMode roundingMode, RegF32 f0) { masm.nearbyIntFloat32(roundingMode, f0, f0); } void roundF64(RoundingMode roundingMode, RegF64 f0) { masm.nearbyIntDouble(roundingMode, f0, f0); } ////////////////////////////////////////////////////////////////////// // // Global variable access. Address addressOfGlobalVar(const GlobalDesc& global, RegI32 tmp) { uint32_t globalToTlsOffset = offsetof(TlsData, globalArea) + global.offset(); fr.loadTlsPtr(tmp); if (global.isIndirect()) { masm.loadPtr(Address(tmp, globalToTlsOffset), tmp); return Address(tmp, 0); } return Address(tmp, globalToTlsOffset); } ////////////////////////////////////////////////////////////////////// // // Heap access. void bceCheckLocal(MemoryAccessDesc* access, AccessCheck* check, uint32_t local) { if (local >= sizeof(BCESet) * 8) { return; } uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled()); if ((bceSafe_ & (BCESet(1) << local)) && access->offset() < offsetGuardLimit) { check->omitBoundsCheck = true; } // The local becomes safe even if the offset is beyond the guard limit. bceSafe_ |= (BCESet(1) << local); } void bceLocalIsUpdated(uint32_t local) { if (local >= sizeof(BCESet) * 8) { return; } bceSafe_ &= ~(BCESet(1) << local); } void prepareMemoryAccess(MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr) { uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled()); // Fold offset if necessary for further computations. if (access->offset() >= offsetGuardLimit || (access->isAtomic() && !check->omitAlignmentCheck && !check->onlyPointerAlignment)) { Label ok; masm.branchAdd32(Assembler::CarryClear, Imm32(access->offset()), ptr, &ok); masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset()); masm.bind(&ok); access->clearOffset(); check->onlyPointerAlignment = true; } // Alignment check if required. if (access->isAtomic() && !check->omitAlignmentCheck) { MOZ_ASSERT(check->onlyPointerAlignment); // We only care about the low pointer bits here. Label ok; masm.branchTest32(Assembler::Zero, ptr, Imm32(access->byteSize() - 1), &ok); masm.wasmTrap(Trap::UnalignedAccess, bytecodeOffset()); masm.bind(&ok); } // Ensure no tls if we don't need it. if (moduleEnv_.hugeMemoryEnabled()) { // We have HeapReg and no bounds checking and need load neither // memoryBase nor boundsCheckLimit from tls. MOZ_ASSERT_IF(check->omitBoundsCheck, tls.isInvalid()); } #ifdef JS_CODEGEN_ARM // We have HeapReg on ARM and don't need to load the memoryBase from tls. MOZ_ASSERT_IF(check->omitBoundsCheck, tls.isInvalid()); #endif // Bounds check if required. if (!moduleEnv_.hugeMemoryEnabled() && !check->omitBoundsCheck) { Label ok; masm.wasmBoundsCheck32( Assembler::Below, ptr, Address(tls, offsetof(TlsData, boundsCheckLimit32)), &ok); masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset()); masm.bind(&ok); } } #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \ defined(JS_CODEGEN_MIPS64) BaseIndex prepareAtomicMemoryAccess(MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr) { MOZ_ASSERT(needTlsForAccess(*check) == tls.isValid()); prepareMemoryAccess(access, check, tls, ptr); return BaseIndex(HeapReg, ptr, TimesOne, access->offset()); } #elif defined(JS_CODEGEN_X86) // Some consumers depend on the address not retaining tls, as tls may be the // scratch register. Address prepareAtomicMemoryAccess(MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr) { MOZ_ASSERT(needTlsForAccess(*check) == tls.isValid()); prepareMemoryAccess(access, check, tls, ptr); masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr); return Address(ptr, access->offset()); } #else Address prepareAtomicMemoryAccess(MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr) { MOZ_CRASH("BaseCompiler platform hook: prepareAtomicMemoryAccess"); } #endif void computeEffectiveAddress(MemoryAccessDesc* access) { if (access->offset()) { Label ok; RegI32 ptr = popI32(); masm.branchAdd32(Assembler::CarryClear, Imm32(access->offset()), ptr, &ok); masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset()); masm.bind(&ok); access->clearOffset(); pushI32(ptr); } } void needLoadTemps(const MemoryAccessDesc& access, RegI32* temp1, RegI32* temp2, RegI32* temp3) { #if defined(JS_CODEGEN_ARM) if (IsUnaligned(access)) { switch (access.type()) { case Scalar::Float64: *temp3 = needI32(); [[fallthrough]]; case Scalar::Float32: *temp2 = needI32(); [[fallthrough]]; default: *temp1 = needI32(); break; } } #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) *temp1 = needI32(); #endif } [[nodiscard]] bool needTlsForAccess(const AccessCheck& check) { #if defined(JS_CODEGEN_X86) // x86 requires Tls for memory base return true; #else return !moduleEnv_.hugeMemoryEnabled() && !check.omitBoundsCheck; #endif } // ptr and dest may be the same iff dest is I32. // This may destroy ptr even if ptr and dest are not the same. [[nodiscard]] bool load(MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr, AnyReg dest, RegI32 temp1, RegI32 temp2, RegI32 temp3) { prepareMemoryAccess(access, check, tls, ptr); #if defined(JS_CODEGEN_X64) Operand srcAddr(HeapReg, ptr, TimesOne, access->offset()); if (dest.tag == AnyReg::I64) { masm.wasmLoadI64(*access, srcAddr, dest.i64()); } else { masm.wasmLoad(*access, srcAddr, dest.any()); } #elif defined(JS_CODEGEN_X86) masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr); Operand srcAddr(ptr, access->offset()); if (dest.tag == AnyReg::I64) { MOZ_ASSERT(dest.i64() == specific_.abiReturnRegI64); masm.wasmLoadI64(*access, srcAddr, dest.i64()); } else { // For 8 bit loads, this will generate movsbl or movzbl, so // there's no constraint on what the output register may be. masm.wasmLoad(*access, srcAddr, dest.any()); } #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32) || \ defined(JS_CODEGEN_MIPS64) if (IsUnaligned(*access)) { switch (dest.tag) { case AnyReg::I64: masm.wasmUnalignedLoadI64(*access, HeapReg, ptr, ptr, dest.i64(), temp1); break; case AnyReg::F32: masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f32(), temp1, temp2, RegI32::Invalid()); break; case AnyReg::F64: masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f64(), temp1, temp2, temp3); break; case AnyReg::I32: masm.wasmUnalignedLoad(*access, HeapReg, ptr, ptr, dest.i32(), temp1); break; default: MOZ_CRASH("Unexpected type"); } } else { if (dest.tag == AnyReg::I64) { masm.wasmLoadI64(*access, HeapReg, ptr, ptr, dest.i64()); } else { masm.wasmLoad(*access, HeapReg, ptr, ptr, dest.any()); } } #elif defined(JS_CODEGEN_ARM64) if (dest.tag == AnyReg::I64) { masm.wasmLoadI64(*access, HeapReg, ptr, dest.i64()); } else { masm.wasmLoad(*access, HeapReg, ptr, dest.any()); } #else MOZ_CRASH("BaseCompiler platform hook: load"); #endif return true; } RegI32 needStoreTemp(const MemoryAccessDesc& access, ValType srcType) { #if defined(JS_CODEGEN_ARM) if (IsUnaligned(access) && srcType != ValType::I32) { return needI32(); } #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) return needI32(); #endif return RegI32::Invalid(); } // ptr and src must not be the same register. // This may destroy ptr and src. [[nodiscard]] bool store(MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr, AnyReg src, RegI32 temp) { prepareMemoryAccess(access, check, tls, ptr); // Emit the store #if defined(JS_CODEGEN_X64) MOZ_ASSERT(temp.isInvalid()); Operand dstAddr(HeapReg, ptr, TimesOne, access->offset()); masm.wasmStore(*access, src.any(), dstAddr); #elif defined(JS_CODEGEN_X86) MOZ_ASSERT(temp.isInvalid()); masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr); Operand dstAddr(ptr, access->offset()); if (access->type() == Scalar::Int64) { masm.wasmStoreI64(*access, src.i64(), dstAddr); } else { AnyRegister value; ScratchI8 scratch(*this); if (src.tag == AnyReg::I64) { if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) { masm.mov(src.i64().low, scratch); value = AnyRegister(scratch); } else { value = AnyRegister(src.i64().low); } } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) { masm.mov(src.i32(), scratch); value = AnyRegister(scratch); } else { value = src.any(); } masm.wasmStore(*access, value, dstAddr); } #elif defined(JS_CODEGEN_ARM) if (IsUnaligned(*access)) { switch (src.tag) { case AnyReg::I64: masm.wasmUnalignedStoreI64(*access, src.i64(), HeapReg, ptr, ptr, temp); break; case AnyReg::F32: masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr, temp); break; case AnyReg::F64: masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr, temp); break; case AnyReg::I32: MOZ_ASSERT(temp.isInvalid()); masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr, temp); break; default: MOZ_CRASH("Unexpected type"); } } else { MOZ_ASSERT(temp.isInvalid()); if (access->type() == Scalar::Int64) { masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr); } else if (src.tag == AnyReg::I64) { masm.wasmStore(*access, AnyRegister(src.i64().low), HeapReg, ptr, ptr); } else { masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr); } } #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) if (IsUnaligned(*access)) { switch (src.tag) { case AnyReg::I64: masm.wasmUnalignedStoreI64(*access, src.i64(), HeapReg, ptr, ptr, temp); break; case AnyReg::F32: masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr, temp); break; case AnyReg::F64: masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr, temp); break; case AnyReg::I32: masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr, temp); break; default: MOZ_CRASH("Unexpected type"); } } else { if (src.tag == AnyReg::I64) { masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr); } else { masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr); } } #elif defined(JS_CODEGEN_ARM64) MOZ_ASSERT(temp.isInvalid()); if (access->type() == Scalar::Int64) { masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr); } else { masm.wasmStore(*access, src.any(), HeapReg, ptr); } #else MOZ_CRASH("BaseCompiler platform hook: store"); #endif return true; } template struct Atomic32Temps : mozilla::Array { // Allocate all temp registers if 'allocate' is not specified. void allocate(BaseCompiler* bc, size_t allocate = Count) { static_assert(Count != 0); for (size_t i = 0; i < allocate; ++i) { this->operator[](i) = bc->needI32(); } } void maybeFree(BaseCompiler* bc) { for (size_t i = 0; i < Count; ++i) { bc->maybeFreeI32(this->operator[](i)); } } }; #if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) using AtomicRMW32Temps = Atomic32Temps<3>; #else using AtomicRMW32Temps = Atomic32Temps<1>; #endif template void atomicRMW32(const MemoryAccessDesc& access, T srcAddr, AtomicOp op, RegI32 rv, RegI32 rd, const AtomicRMW32Temps& temps) { switch (access.type()) { case Scalar::Uint8: #ifdef JS_CODEGEN_X86 { RegI32 temp = temps[0]; // The temp, if used, must be a byte register. MOZ_ASSERT(temp.isInvalid()); ScratchI8 scratch(*this); if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) { temp = scratch; } masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temp, rd); break; } #endif case Scalar::Uint16: case Scalar::Int32: case Scalar::Uint32: #if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps[0], temps[1], temps[2], rd); #else masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps[0], rd); #endif break; default: { MOZ_CRASH("Bad type for atomic operation"); } } } // On x86, V is Address. On other platforms, it is Register64. // T is BaseIndex or Address. template void atomicRMW64(const MemoryAccessDesc& access, const T& srcAddr, AtomicOp op, V value, Register64 temp, Register64 rd) { masm.wasmAtomicFetchOp64(access, op, value, srcAddr, temp, rd); } #if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) using AtomicCmpXchg32Temps = Atomic32Temps<3>; #else using AtomicCmpXchg32Temps = Atomic32Temps<0>; #endif template void atomicCmpXchg32(const MemoryAccessDesc& access, T srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd, const AtomicCmpXchg32Temps& temps) { switch (access.type()) { case Scalar::Uint8: #if defined(JS_CODEGEN_X86) { ScratchI8 scratch(*this); MOZ_ASSERT(rd == specific_.eax); if (!ra.isSingleByteI32(rnew)) { // The replacement value must have a byte persona. masm.movl(rnew, scratch); rnew = scratch; } masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd); break; } #endif case Scalar::Uint16: case Scalar::Int32: case Scalar::Uint32: #if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, temps[0], temps[1], temps[2], rd); #else masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd); #endif break; default: MOZ_CRASH("Bad type for atomic operation"); } } #if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) using AtomicXchg32Temps = Atomic32Temps<3>; #else using AtomicXchg32Temps = Atomic32Temps<0>; #endif template void atomicXchg32(const MemoryAccessDesc& access, T srcAddr, RegI32 rv, RegI32 rd, const AtomicXchg32Temps& temps) { switch (access.type()) { case Scalar::Uint8: #if defined(JS_CODEGEN_X86) { if (!ra.isSingleByteI32(rd)) { ScratchI8 scratch(*this); // The output register must have a byte persona. masm.wasmAtomicExchange(access, srcAddr, rv, scratch); masm.movl(scratch, rd); } else { masm.wasmAtomicExchange(access, srcAddr, rv, rd); } break; } #endif case Scalar::Uint16: case Scalar::Int32: case Scalar::Uint32: #if defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) masm.wasmAtomicExchange(access, srcAddr, rv, temps[0], temps[1], temps[2], rd); #else masm.wasmAtomicExchange(access, srcAddr, rv, rd); #endif break; default: MOZ_CRASH("Bad type for atomic operation"); } } //////////////////////////////////////////////////////////// // // Generally speaking, ABOVE this point there should be no // value stack manipulation (calls to popI32 etc). // //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // // Platform-specific popping and register targeting. // // These fall into two groups, popping methods for simple needs, and RAII // wrappers for more complex behavior. // The simple popping methods pop values into targeted registers; the caller // can free registers using standard functions. These are always called // popXForY where X says something about types and Y something about the // operation being targeted. void pop2xI32ForMulDivI32(RegI32* r0, RegI32* r1, RegI32* reserved) { #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) // r0 must be eax, and edx will be clobbered. need2xI32(specific_.eax, specific_.edx); *r1 = popI32(); *r0 = popI32ToSpecific(specific_.eax); *reserved = specific_.edx; #else pop2xI32(r0, r1); #endif } void pop2xI64ForMulI64(RegI64* r0, RegI64* r1, RegI32* temp, RegI64* reserved) { #if defined(JS_CODEGEN_X64) // r0 must be rax, and rdx will be clobbered. need2xI64(specific_.rax, specific_.rdx); *r1 = popI64(); *r0 = popI64ToSpecific(specific_.rax); *reserved = specific_.rdx; #elif defined(JS_CODEGEN_X86) // As for x64, though edx is part of r0. need2xI32(specific_.eax, specific_.edx); *r1 = popI64(); *r0 = popI64ToSpecific(specific_.edx_eax); *temp = needI32(); #elif defined(JS_CODEGEN_MIPS64) pop2xI64(r0, r1); #elif defined(JS_CODEGEN_MIPS32) pop2xI64(r0, r1); *temp = needI32(); #elif defined(JS_CODEGEN_ARM) pop2xI64(r0, r1); *temp = needI32(); #elif defined(JS_CODEGEN_ARM64) pop2xI64(r0, r1); #else MOZ_CRASH("BaseCompiler porting interface: pop2xI64ForMulI64"); #endif } void pop2xI64ForDivI64(RegI64* r0, RegI64* r1, RegI64* reserved) { #if defined(JS_CODEGEN_X64) // r0 must be rax, and rdx will be clobbered. need2xI64(specific_.rax, specific_.rdx); *r1 = popI64(); *r0 = popI64ToSpecific(specific_.rax); *reserved = specific_.rdx; #else pop2xI64(r0, r1); #endif } void pop2xI32ForShift(RegI32* r0, RegI32* r1) { #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) // r1 must be ecx for a variable shift, unless BMI2 is available. if (!Assembler::HasBMI2()) { *r1 = popI32(specific_.ecx); *r0 = popI32(); return; } #endif pop2xI32(r0, r1); } void pop2xI64ForShift(RegI64* r0, RegI64* r1) { #if defined(JS_CODEGEN_X86) // r1 must be ecx for a variable shift. needI32(specific_.ecx); *r1 = popI64ToSpecific(widenI32(specific_.ecx)); *r0 = popI64(); #else # if defined(JS_CODEGEN_X64) // r1 must be rcx for a variable shift, unless BMI2 is available. if (!Assembler::HasBMI2()) { needI64(specific_.rcx); *r1 = popI64ToSpecific(specific_.rcx); *r0 = popI64(); return; } # endif pop2xI64(r0, r1); #endif } void pop2xI32ForRotate(RegI32* r0, RegI32* r1) { #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) // r1 must be ecx for a variable rotate. *r1 = popI32(specific_.ecx); *r0 = popI32(); #else pop2xI32(r0, r1); #endif } void pop2xI64ForRotate(RegI64* r0, RegI64* r1) { #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) // r1 must be ecx for a variable rotate. needI32(specific_.ecx); *r1 = popI64ToSpecific(widenI32(specific_.ecx)); *r0 = popI64(); #else pop2xI64(r0, r1); #endif } void popI32ForSignExtendI64(RegI64* r0) { #if defined(JS_CODEGEN_X86) // r0 must be edx:eax for cdq need2xI32(specific_.edx, specific_.eax); *r0 = specific_.edx_eax; popI32ToSpecific(specific_.eax); #else *r0 = widenI32(popI32()); #endif } void popI64ForSignExtendI64(RegI64* r0) { #if defined(JS_CODEGEN_X86) // r0 must be edx:eax for cdq need2xI32(specific_.edx, specific_.eax); // Low on top, high underneath *r0 = popI64ToSpecific(specific_.edx_eax); #else *r0 = popI64(); #endif } // The RAII wrappers are used because we sometimes have to free partial // registers, as when part of a register is the scratch register that has // been temporarily used, or not free a register at all, as when the // register is the same as the destination register (but only on some // platforms, not on all). These are called PopX{32,64}Regs where X is the // operation being targeted. // Utility struct that holds the BaseCompiler and the destination, and frees // the destination if it has not been extracted. template class PopBase { T rd_; void maybeFree(RegI32 r) { bc->maybeFreeI32(r); } void maybeFree(RegI64 r) { bc->maybeFreeI64(r); } protected: BaseCompiler* const bc; void setRd(T r) { MOZ_ASSERT(rd_.isInvalid()); rd_ = r; } T getRd() const { MOZ_ASSERT(rd_.isValid()); return rd_; } public: explicit PopBase(BaseCompiler* bc) : bc(bc) {} ~PopBase() { maybeFree(rd_); } // Take and clear the Rd - use this when pushing Rd. T takeRd() { MOZ_ASSERT(rd_.isValid()); T r = rd_; rd_ = T::Invalid(); return r; } }; friend class PopAtomicCmpXchg32Regs; class PopAtomicCmpXchg32Regs : public PopBase { using Base = PopBase; RegI32 rexpect, rnew; AtomicCmpXchg32Temps temps; public: #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType) : Base(bc) { // For cmpxchg, the expected value and the result are both in eax. bc->needI32(bc->specific_.eax); if (type == ValType::I64) { rnew = bc->popI64ToI32(); rexpect = bc->popI64ToSpecificI32(bc->specific_.eax); } else { rnew = bc->popI32(); rexpect = bc->popI32ToSpecific(bc->specific_.eax); } setRd(rexpect); } ~PopAtomicCmpXchg32Regs() { bc->freeI32(rnew); } #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType) : Base(bc) { if (type == ValType::I64) { rnew = bc->popI64ToI32(); rexpect = bc->popI64ToI32(); } else { rnew = bc->popI32(); rexpect = bc->popI32(); } setRd(bc->needI32()); } ~PopAtomicCmpXchg32Regs() { bc->freeI32(rnew); bc->freeI32(rexpect); } #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType) : Base(bc) { if (type == ValType::I64) { rnew = bc->popI64ToI32(); rexpect = bc->popI64ToI32(); } else { rnew = bc->popI32(); rexpect = bc->popI32(); } if (Scalar::byteSize(viewType) < 4) { temps.allocate(bc); } setRd(bc->needI32()); } ~PopAtomicCmpXchg32Regs() { bc->freeI32(rnew); bc->freeI32(rexpect); temps.maybeFree(bc); } #else explicit PopAtomicCmpXchg32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType) : Base(bc) { MOZ_CRASH("BaseCompiler porting interface: PopAtomicCmpXchg32Regs"); } #endif template void atomicCmpXchg32(const MemoryAccessDesc& access, T srcAddr) { bc->atomicCmpXchg32(access, srcAddr, rexpect, rnew, getRd(), temps); } }; friend class PopAtomicCmpXchg64Regs; class PopAtomicCmpXchg64Regs : public PopBase { using Base = PopBase; RegI64 rexpect, rnew; public: #ifdef JS_CODEGEN_X64 explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) { // For cmpxchg, the expected value and the result are both in rax. bc->needI64(bc->specific_.rax); rnew = bc->popI64(); rexpect = bc->popI64ToSpecific(bc->specific_.rax); setRd(rexpect); } ~PopAtomicCmpXchg64Regs() { bc->freeI64(rnew); } #elif defined(JS_CODEGEN_X86) explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) { // For cmpxchg8b, the expected value and the result are both in // edx:eax, and the replacement value is in ecx:ebx. But we can't // allocate ebx here, so instead we allocate a temp to hold the low // word of 'new'. bc->needI64(bc->specific_.edx_eax); bc->needI32(bc->specific_.ecx); rnew = bc->popI64ToSpecific( RegI64(Register64(bc->specific_.ecx, bc->needI32()))); rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax); setRd(rexpect); } ~PopAtomicCmpXchg64Regs() { bc->freeI64(rnew); } #elif defined(JS_CODEGEN_ARM) explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) { // The replacement value and the result must both be odd/even pairs. rnew = bc->popI64Pair(); rexpect = bc->popI64(); setRd(bc->needI64Pair()); } ~PopAtomicCmpXchg64Regs() { bc->freeI64(rexpect); bc->freeI64(rnew); } #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \ defined(JS_CODEGEN_MIPS64) explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) { rnew = bc->popI64(); rexpect = bc->popI64(); setRd(bc->needI64()); } ~PopAtomicCmpXchg64Regs() { bc->freeI64(rexpect); bc->freeI64(rnew); } #else explicit PopAtomicCmpXchg64Regs(BaseCompiler* bc) : Base(bc) { MOZ_CRASH("BaseCompiler porting interface: PopAtomicCmpXchg64Regs"); } #endif #ifdef JS_CODEGEN_X86 template void atomicCmpXchg64(const MemoryAccessDesc& access, T srcAddr, RegI32 ebx) { MOZ_ASSERT(ebx == js::jit::ebx); bc->masm.move32(rnew.low, ebx); bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, bc->specific_.ecx_ebx, getRd()); } #else template void atomicCmpXchg64(const MemoryAccessDesc& access, T srcAddr) { bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, getRd()); } #endif }; #ifndef JS_64BIT class PopAtomicLoad64Regs : public PopBase { using Base = PopBase; public: # if defined(JS_CODEGEN_X86) explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) { // The result is in edx:eax, and we need ecx:ebx as a temp. But we // can't reserve ebx yet, so we'll accept it as an argument to the // operation (below). bc->needI32(bc->specific_.ecx); bc->needI64(bc->specific_.edx_eax); setRd(bc->specific_.edx_eax); } ~PopAtomicLoad64Regs() { bc->freeI32(bc->specific_.ecx); } # elif defined(JS_CODEGEN_ARM) explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) { setRd(bc->needI64Pair()); } # elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) { setRd(bc->needI64()); } # else explicit PopAtomicLoad64Regs(BaseCompiler* bc) : Base(bc) { MOZ_CRASH("BaseCompiler porting interface: PopAtomicLoad64Regs"); } # endif # ifdef JS_CODEGEN_X86 template void atomicLoad64(const MemoryAccessDesc& access, T srcAddr, RegI32 ebx) { MOZ_ASSERT(ebx == js::jit::ebx); bc->masm.wasmAtomicLoad64(access, srcAddr, bc->specific_.ecx_ebx, getRd()); } # else // ARM, MIPS32 template void atomicLoad64(const MemoryAccessDesc& access, T srcAddr) { bc->masm.wasmAtomicLoad64(access, srcAddr, RegI64::Invalid(), getRd()); } # endif }; #endif // JS_64BIT friend class PopAtomicRMW32Regs; class PopAtomicRMW32Regs : public PopBase { using Base = PopBase; RegI32 rv; AtomicRMW32Temps temps; public: #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType, AtomicOp op) : Base(bc) { bc->needI32(bc->specific_.eax); if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) { // We use xadd, so source and destination are the same. Using // eax here is overconstraining, but for byte operations on x86 // we do need something with a byte register. if (type == ValType::I64) { rv = bc->popI64ToSpecificI32(bc->specific_.eax); } else { rv = bc->popI32ToSpecific(bc->specific_.eax); } setRd(rv); } else { // We use a cmpxchg loop. The output must be eax; the input // must be in a separate register since it may be used several // times. if (type == ValType::I64) { rv = bc->popI64ToI32(); } else { rv = bc->popI32(); } setRd(bc->specific_.eax); # if defined(JS_CODEGEN_X86) // Single-byte is a special case handled very locally with // ScratchReg, see atomicRMW32 above. if (Scalar::byteSize(viewType) > 1) { temps.allocate(bc); } # else temps.allocate(bc); # endif } } ~PopAtomicRMW32Regs() { if (rv != bc->specific_.eax) { bc->freeI32(rv); } temps.maybeFree(bc); } #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType, AtomicOp op) : Base(bc) { rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32(); temps.allocate(bc); setRd(bc->needI32()); } ~PopAtomicRMW32Regs() { bc->freeI32(rv); temps.maybeFree(bc); } #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType, AtomicOp op) : Base(bc) { rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32(); if (Scalar::byteSize(viewType) < 4) { temps.allocate(bc); } setRd(bc->needI32()); } ~PopAtomicRMW32Regs() { bc->freeI32(rv); temps.maybeFree(bc); } #else explicit PopAtomicRMW32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType, AtomicOp op) : Base(bc) { MOZ_CRASH("BaseCompiler porting interface: PopAtomicRMW32Regs"); } #endif template void atomicRMW32(const MemoryAccessDesc& access, T srcAddr, AtomicOp op) { bc->atomicRMW32(access, srcAddr, op, rv, getRd(), temps); } }; friend class PopAtomicRMW64Regs; class PopAtomicRMW64Regs : public PopBase { using Base = PopBase; #if defined(JS_CODEGEN_X64) AtomicOp op; #endif RegI64 rv, temp; public: #if defined(JS_CODEGEN_X64) explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp op) : Base(bc), op(op) { if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) { // We use xaddq, so input and output must be the same register. rv = bc->popI64(); setRd(rv); } else { // We use a cmpxchgq loop, so the output must be rax. bc->needI64(bc->specific_.rax); rv = bc->popI64(); temp = bc->needI64(); setRd(bc->specific_.rax); } } ~PopAtomicRMW64Regs() { bc->maybeFreeI64(temp); if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) { bc->freeI64(rv); } } #elif defined(JS_CODEGEN_X86) // We'll use cmpxchg8b, so rv must be in ecx:ebx, and rd must be // edx:eax. But we can't reserve ebx here because we need it later, so // use a separate temp and set up ebx when we perform the operation. explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) { bc->needI32(bc->specific_.ecx); bc->needI64(bc->specific_.edx_eax); temp = RegI64(Register64(bc->specific_.ecx, bc->needI32())); bc->popI64ToSpecific(temp); setRd(bc->specific_.edx_eax); } ~PopAtomicRMW64Regs() { bc->freeI64(temp); } RegI32 valueHigh() const { return RegI32(temp.high); } RegI32 valueLow() const { return RegI32(temp.low); } #elif defined(JS_CODEGEN_ARM) explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) { // We use a ldrex/strexd loop so the temp and the output must be // odd/even pairs. rv = bc->popI64(); temp = bc->needI64Pair(); setRd(bc->needI64Pair()); } ~PopAtomicRMW64Regs() { bc->freeI64(rv); bc->freeI64(temp); } #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \ defined(JS_CODEGEN_MIPS64) explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) { rv = bc->popI64(); temp = bc->needI64(); setRd(bc->needI64()); } ~PopAtomicRMW64Regs() { bc->freeI64(rv); bc->freeI64(temp); } #else explicit PopAtomicRMW64Regs(BaseCompiler* bc, AtomicOp) : Base(bc) { MOZ_CRASH("BaseCompiler porting interface: PopAtomicRMW64Regs"); } #endif #ifdef JS_CODEGEN_X86 template void atomicRMW64(const MemoryAccessDesc& access, T srcAddr, AtomicOp op, const V& value, RegI32 ebx) { MOZ_ASSERT(ebx == js::jit::ebx); bc->atomicRMW64(access, srcAddr, op, value, bc->specific_.ecx_ebx, getRd()); } #else template void atomicRMW64(const MemoryAccessDesc& access, T srcAddr, AtomicOp op) { bc->atomicRMW64(access, srcAddr, op, rv, temp, getRd()); } #endif }; friend class PopAtomicXchg32Regs; class PopAtomicXchg32Regs : public PopBase { using Base = PopBase; RegI32 rv; AtomicXchg32Temps temps; public: #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType) : Base(bc) { // The xchg instruction reuses rv as rd. rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32(); setRd(rv); } #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType) : Base(bc) { rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32(); setRd(bc->needI32()); } ~PopAtomicXchg32Regs() { bc->freeI32(rv); } #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType) : Base(bc) { rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32(); if (Scalar::byteSize(viewType) < 4) { temps.allocate(bc); } setRd(bc->needI32()); } ~PopAtomicXchg32Regs() { temps.maybeFree(bc); bc->freeI32(rv); } #else explicit PopAtomicXchg32Regs(BaseCompiler* bc, ValType type, Scalar::Type viewType) : Base(bc) { MOZ_CRASH("BaseCompiler porting interface: PopAtomicXchg32Regs"); } #endif template void atomicXchg32(const MemoryAccessDesc& access, T srcAddr) { bc->atomicXchg32(access, srcAddr, rv, getRd(), temps); } }; friend class PopAtomicXchg64Regs; class PopAtomicXchg64Regs : public PopBase { using Base = PopBase; RegI64 rv; public: #if defined(JS_CODEGEN_X64) explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) { rv = bc->popI64(); setRd(rv); } #elif defined(JS_CODEGEN_ARM64) explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) { rv = bc->popI64(); setRd(bc->needI64()); } ~PopAtomicXchg64Regs() { bc->freeI64(rv); } #elif defined(JS_CODEGEN_X86) // We'll use cmpxchg8b, so rv must be in ecx:ebx, and rd must be // edx:eax. But we can't reserve ebx here because we need it later, so // use a separate temp and set up ebx when we perform the operation. explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) { bc->needI32(bc->specific_.ecx); bc->needI64(bc->specific_.edx_eax); rv = RegI64(Register64(bc->specific_.ecx, bc->needI32())); bc->popI64ToSpecific(rv); setRd(bc->specific_.edx_eax); } ~PopAtomicXchg64Regs() { bc->freeI64(rv); } #elif defined(JS_CODEGEN_ARM) // Both rv and rd must be odd/even pairs. explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) { rv = bc->popI64ToSpecific(bc->needI64Pair()); setRd(bc->needI64Pair()); } ~PopAtomicXchg64Regs() { bc->freeI64(rv); } #elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) { rv = bc->popI64ToSpecific(bc->needI64()); setRd(bc->needI64()); } ~PopAtomicXchg64Regs() { bc->freeI64(rv); } #else explicit PopAtomicXchg64Regs(BaseCompiler* bc) : Base(bc) { MOZ_CRASH("BaseCompiler porting interface: xchg64"); } #endif #ifdef JS_CODEGEN_X86 template void atomicXchg64(const MemoryAccessDesc& access, T srcAddr, RegI32 ebx) const { MOZ_ASSERT(ebx == js::jit::ebx); bc->masm.move32(rv.low, ebx); bc->masm.wasmAtomicExchange64(access, srcAddr, bc->specific_.ecx_ebx, getRd()); } #else template void atomicXchg64(const MemoryAccessDesc& access, T srcAddr) const { bc->masm.wasmAtomicExchange64(access, srcAddr, rv, getRd()); } #endif }; //////////////////////////////////////////////////////////// // // Generally speaking, BELOW this point there should be no // platform dependencies. We make very occasional exceptions // when it doesn't become messy and further abstraction is // not desirable. // //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // // Sundry wrappers. void pop2xI32(RegI32* r0, RegI32* r1) { *r1 = popI32(); *r0 = popI32(); } RegI32 popI32ToSpecific(RegI32 specific) { freeI32(specific); return popI32(specific); } void pop2xI64(RegI64* r0, RegI64* r1) { *r1 = popI64(); *r0 = popI64(); } RegI64 popI64ToSpecific(RegI64 specific) { freeI64(specific); return popI64(specific); } #ifdef JS_CODEGEN_ARM RegI64 popI64Pair() { RegI64 r = needI64Pair(); popI64ToSpecific(r); return r; } #endif void pop2xF32(RegF32* r0, RegF32* r1) { *r1 = popF32(); *r0 = popF32(); } void pop2xF64(RegF64* r0, RegF64* r1) { *r1 = popF64(); *r0 = popF64(); } #ifdef ENABLE_WASM_SIMD void pop2xV128(RegV128* r0, RegV128* r1) { *r1 = popV128(); *r0 = popV128(); } #endif void pop2xRef(RegPtr* r0, RegPtr* r1) { *r1 = popRef(); *r0 = popRef(); } RegI32 popI64ToI32() { RegI64 r = popI64(); return narrowI64(r); } RegI32 popI64ToSpecificI32(RegI32 specific) { RegI64 rd = widenI32(specific); popI64ToSpecific(rd); return narrowI64(rd); } void pushU32AsI64(RegI32 rs) { RegI64 rd = widenI32(rs); masm.move32To64ZeroExtend(rs, rd); pushI64(rd); } RegI32 popMemoryAccess(MemoryAccessDesc* access, AccessCheck* check); void pushHeapBase(); template RegType pop(); template RegType need(); template void free(RegType r); //////////////////////////////////////////////////////////// // // Sundry helpers. uint32_t readCallSiteLineOrBytecode() { if (!func_.callSiteLineNums.empty()) { return func_.callSiteLineNums[lastReadCallSite_++]; } return iter_.lastOpcodeOffset(); } bool done() const { return iter_.done(); } BytecodeOffset bytecodeOffset() const { return iter_.bytecodeOffset(); } void trap(Trap t) const { masm.wasmTrap(t, bytecodeOffset()); } //////////////////////////////////////////////////////////// // // Object support. // This emits a GC pre-write barrier. The pre-barrier is needed when we // replace a member field with a new value, and the previous field value // might have no other referents, and incremental GC is ongoing. The field // might belong to an object or be a stack slot or a register or a heap // allocated value. // // let obj = { field: previousValue }; // obj.field = newValue; // previousValue must be marked with a pre-barrier. // // The `valueAddr` is the address of the location that we are about to // update. This function preserves that register. void emitPreBarrier(RegPtr valueAddr) { Label skipBarrier; ScratchPtr scratch(*this); fr.loadTlsPtr(scratch); EmitWasmPreBarrierGuard(masm, scratch, scratch, valueAddr, &skipBarrier); fr.loadTlsPtr(scratch); #ifdef JS_CODEGEN_ARM64 // The prebarrier stub assumes the PseudoStackPointer is set up. It is OK // to just move the sp to x28 here because x28 is not being used by the // baseline compiler and need not be saved or restored. MOZ_ASSERT(!GeneralRegisterSet::All().hasRegisterIndex(x28.asUnsized())); masm.Mov(x28, sp); #endif EmitWasmPreBarrierCall(masm, scratch, scratch, valueAddr); masm.bind(&skipBarrier); } // This frees the register `valueAddr`. [[nodiscard]] bool emitPostBarrierCall(RegPtr valueAddr) { uint32_t bytecodeOffset = iter_.lastOpcodeOffset(); // The `valueAddr` is a raw pointer to the cell within some GC object or // TLS area, and we guarantee that the GC will not run while the // postbarrier call is active, so push a uintptr_t value. #ifdef JS_64BIT pushI64(RegI64(Register64(valueAddr))); #else pushI32(RegI32(valueAddr)); #endif if (!emitInstanceCall(bytecodeOffset, SASigPostBarrier, /*pushReturnedValue=*/false)) { return false; } return true; } [[nodiscard]] bool emitBarrieredStore(const Maybe& object, RegPtr valueAddr, RegPtr value) { // TODO/AnyRef-boxing: With boxed immediates and strings, the write // barrier is going to have to be more complicated. ASSERT_ANYREF_IS_JSOBJECT; emitPreBarrier(valueAddr); // Preserves valueAddr masm.storePtr(value, Address(valueAddr, 0)); Label skipBarrier; sync(); RegPtr otherScratch = needRef(); EmitWasmPostBarrierGuard(masm, object, otherScratch, value, &skipBarrier); freeRef(otherScratch); if (!emitPostBarrierCall(valueAddr)) { return false; } masm.bind(&skipBarrier); return true; } //////////////////////////////////////////////////////////// // // Machinery for optimized conditional branches. // // To disable this optimization it is enough always to return false from // sniffConditionalControl{Cmp,Eqz}. struct BranchState { union { struct { RegI32 lhs; RegI32 rhs; int32_t imm; bool rhsImm; } i32; struct { RegI64 lhs; RegI64 rhs; int64_t imm; bool rhsImm; } i64; struct { RegF32 lhs; RegF32 rhs; } f32; struct { RegF64 lhs; RegF64 rhs; } f64; }; Label* const label; // The target of the branch, never NULL const StackHeight stackHeight; // The stack base above which to place // stack-spilled block results, if // hasBlockResults(). const bool invertBranch; // If true, invert the sense of the branch const ResultType resultType; // The result propagated along the edges explicit BranchState(Label* label) : label(label), stackHeight(StackHeight::Invalid()), invertBranch(false), resultType(ResultType::Empty()) {} BranchState(Label* label, bool invertBranch) : label(label), stackHeight(StackHeight::Invalid()), invertBranch(invertBranch), resultType(ResultType::Empty()) {} BranchState(Label* label, StackHeight stackHeight, bool invertBranch, ResultType resultType) : label(label), stackHeight(stackHeight), invertBranch(invertBranch), resultType(resultType) {} bool hasBlockResults() const { return stackHeight.isValid(); } }; void setLatentCompare(Assembler::Condition compareOp, ValType operandType) { latentOp_ = LatentOp::Compare; latentType_ = operandType; latentIntCmp_ = compareOp; } void setLatentCompare(Assembler::DoubleCondition compareOp, ValType operandType) { latentOp_ = LatentOp::Compare; latentType_ = operandType; latentDoubleCmp_ = compareOp; } void setLatentEqz(ValType operandType) { latentOp_ = LatentOp::Eqz; latentType_ = operandType; } bool hasLatentOp() const { return latentOp_ != LatentOp::None; } void resetLatentOp() { latentOp_ = LatentOp::None; } void branchTo(Assembler::DoubleCondition c, RegF64 lhs, RegF64 rhs, Label* l) { masm.branchDouble(c, lhs, rhs, l); } void branchTo(Assembler::DoubleCondition c, RegF32 lhs, RegF32 rhs, Label* l) { masm.branchFloat(c, lhs, rhs, l); } void branchTo(Assembler::Condition c, RegI32 lhs, RegI32 rhs, Label* l) { masm.branch32(c, lhs, rhs, l); } void branchTo(Assembler::Condition c, RegI32 lhs, Imm32 rhs, Label* l) { masm.branch32(c, lhs, rhs, l); } void branchTo(Assembler::Condition c, RegI64 lhs, RegI64 rhs, Label* l) { masm.branch64(c, lhs, rhs, l); } void branchTo(Assembler::Condition c, RegI64 lhs, Imm64 rhs, Label* l) { masm.branch64(c, lhs, rhs, l); } void branchTo(Assembler::Condition c, RegPtr lhs, ImmWord rhs, Label* l) { masm.branchPtr(c, lhs, rhs, l); } // Emit a conditional branch that optionally and optimally cleans up the CPU // stack before we branch. // // Cond is either Assembler::Condition or Assembler::DoubleCondition. // // Lhs is RegI32, RegI64, or RegF32, RegF64, or RegPtr. // // Rhs is either the same as Lhs, or an immediate expression compatible with // Lhs "when applicable". template MOZ_MUST_USE bool jumpConditionalWithResults(BranchState* b, Cond cond, Lhs lhs, Rhs rhs) { if (b->hasBlockResults()) { StackHeight resultsBase(0); if (!topBranchParams(b->resultType, &resultsBase)) { return false; } if (b->stackHeight != resultsBase) { Label notTaken; branchTo(b->invertBranch ? cond : Assembler::InvertCondition(cond), lhs, rhs, ¬Taken); // Shuffle stack args. shuffleStackResultsBeforeBranch(resultsBase, b->stackHeight, b->resultType); masm.jump(b->label); masm.bind(¬Taken); return true; } } branchTo(b->invertBranch ? Assembler::InvertCondition(cond) : cond, lhs, rhs, b->label); return true; } // sniffConditionalControl{Cmp,Eqz} may modify the latentWhatever_ state in // the BaseCompiler so that a subsequent conditional branch can be compiled // optimally. emitBranchSetup() and emitBranchPerform() will consume that // state. If the latter methods are not called because deadCode_ is true // then the compiler MUST instead call resetLatentOp() to reset the state. template MOZ_MUST_USE bool sniffConditionalControlCmp(Cond compareOp, ValType operandType); MOZ_MUST_USE bool sniffConditionalControlEqz(ValType operandType); void emitBranchSetup(BranchState* b); MOZ_MUST_USE bool emitBranchPerform(BranchState* b); ////////////////////////////////////////////////////////////////////// [[nodiscard]] bool emitBody(); [[nodiscard]] bool emitBlock(); [[nodiscard]] bool emitLoop(); [[nodiscard]] bool emitIf(); [[nodiscard]] bool emitElse(); #ifdef ENABLE_WASM_EXCEPTIONS [[nodiscard]] bool emitTry(); [[nodiscard]] bool emitCatch(); [[nodiscard]] bool emitThrow(); #endif [[nodiscard]] bool emitEnd(); [[nodiscard]] bool emitBr(); [[nodiscard]] bool emitBrIf(); [[nodiscard]] bool emitBrTable(); [[nodiscard]] bool emitDrop(); [[nodiscard]] bool emitReturn(); enum class CalleeOnStack { // After the arguments to the call, there is a callee pushed onto value // stack. This is only the case for callIndirect. To get the arguments to // the call, emitCallArgs has to reach one element deeper into the value // stack, to skip the callee. True, // No callee on the stack. False }; [[nodiscard]] bool emitCallArgs(const ValTypeVector& args, const StackResultsLoc& results, FunctionCall* baselineCall, CalleeOnStack calleeOnStack); [[nodiscard]] bool emitCall(); [[nodiscard]] bool emitCallIndirect(); [[nodiscard]] bool emitUnaryMathBuiltinCall(SymbolicAddress callee, ValType operandType); [[nodiscard]] bool emitGetLocal(); [[nodiscard]] bool emitSetLocal(); [[nodiscard]] bool emitTeeLocal(); [[nodiscard]] bool emitGetGlobal(); [[nodiscard]] bool emitSetGlobal(); [[nodiscard]] RegI32 maybeLoadTlsForAccess(const AccessCheck& check); [[nodiscard]] RegI32 maybeLoadTlsForAccess(const AccessCheck& check, RegI32 specific); [[nodiscard]] bool emitLoad(ValType type, Scalar::Type viewType); [[nodiscard]] bool loadCommon(MemoryAccessDesc* access, AccessCheck check, ValType type); [[nodiscard]] bool emitStore(ValType resultType, Scalar::Type viewType); [[nodiscard]] bool storeCommon(MemoryAccessDesc* access, AccessCheck check, ValType resultType); [[nodiscard]] bool emitSelect(bool typed); template [[nodiscard]] bool emitSetOrTeeLocal(uint32_t slot); MOZ_MUST_USE bool endBlock(ResultType type); MOZ_MUST_USE bool endIfThen(ResultType type); MOZ_MUST_USE bool endIfThenElse(ResultType type); void doReturn(ContinuationKind kind); void pushReturnValueOfCall(const FunctionCall& call, MIRType type); MOZ_MUST_USE bool pushStackResultsForCall(const ResultType& type, RegPtr temp, StackResultsLoc* loc); void popStackResultsAfterCall(const StackResultsLoc& results, uint32_t stackArgBytes); void emitCompareI32(Assembler::Condition compareOp, ValType compareType); void emitCompareI64(Assembler::Condition compareOp, ValType compareType); void emitCompareF32(Assembler::DoubleCondition compareOp, ValType compareType); void emitCompareF64(Assembler::DoubleCondition compareOp, ValType compareType); void emitCompareRef(Assembler::Condition compareOp, ValType compareType); void emitAddI32(); void emitAddI64(); void emitAddF64(); void emitAddF32(); void emitSubtractI32(); void emitSubtractI64(); void emitSubtractF32(); void emitSubtractF64(); void emitMultiplyI32(); void emitMultiplyI64(); void emitMultiplyF32(); void emitMultiplyF64(); void emitQuotientI32(); void emitQuotientU32(); void emitRemainderI32(); void emitRemainderU32(); #ifdef RABALDR_INT_DIV_I64_CALLOUT [[nodiscard]] bool emitDivOrModI64BuiltinCall(SymbolicAddress callee, ValType operandType); #else void emitQuotientI64(); void emitQuotientU64(); void emitRemainderI64(); void emitRemainderU64(); #endif void emitDivideF32(); void emitDivideF64(); void emitMinF32(); void emitMaxF32(); void emitMinF64(); void emitMaxF64(); void emitCopysignF32(); void emitCopysignF64(); void emitOrI32(); void emitOrI64(); void emitAndI32(); void emitAndI64(); void emitXorI32(); void emitXorI64(); void emitShlI32(); void emitShlI64(); void emitShrI32(); void emitShrI64(); void emitShrU32(); void emitShrU64(); void emitRotrI32(); void emitRotrI64(); void emitRotlI32(); void emitRotlI64(); void emitEqzI32(); void emitEqzI64(); void emitClzI32(); void emitClzI64(); void emitCtzI32(); void emitCtzI64(); void emitPopcntI32(); void emitPopcntI64(); void emitAbsF32(); void emitAbsF64(); void emitNegateF32(); void emitNegateF64(); void emitSqrtF32(); void emitSqrtF64(); template [[nodiscard]] bool emitTruncateF32ToI32(); template [[nodiscard]] bool emitTruncateF64ToI32(); #ifdef RABALDR_FLOAT_TO_I64_CALLOUT [[nodiscard]] bool emitConvertFloatingToInt64Callout(SymbolicAddress callee, ValType operandType, ValType resultType); #else template [[nodiscard]] bool emitTruncateF32ToI64(); template [[nodiscard]] bool emitTruncateF64ToI64(); #endif void emitWrapI64ToI32(); void emitExtendI32_8(); void emitExtendI32_16(); void emitExtendI64_8(); void emitExtendI64_16(); void emitExtendI64_32(); void emitExtendI32ToI64(); void emitExtendU32ToI64(); void emitReinterpretF32AsI32(); void emitReinterpretF64AsI64(); void emitConvertF64ToF32(); void emitConvertI32ToF32(); void emitConvertU32ToF32(); void emitConvertF32ToF64(); void emitConvertI32ToF64(); void emitConvertU32ToF64(); #ifdef RABALDR_I64_TO_FLOAT_CALLOUT [[nodiscard]] bool emitConvertInt64ToFloatingCallout(SymbolicAddress callee, ValType operandType, ValType resultType); #else void emitConvertI64ToF32(); void emitConvertU64ToF32(); void emitConvertI64ToF64(); void emitConvertU64ToF64(); #endif void emitReinterpretI32AsF32(); void emitReinterpretI64AsF64(); void emitRound(RoundingMode roundingMode, ValType operandType); [[nodiscard]] bool emitInstanceCall(uint32_t lineOrBytecode, const SymbolicAddressSignature& builtin, bool pushReturnedValue = true); [[nodiscard]] bool emitMemoryGrow(); [[nodiscard]] bool emitMemorySize(); [[nodiscard]] bool emitRefFunc(); [[nodiscard]] bool emitRefNull(); [[nodiscard]] bool emitRefIsNull(); #ifdef ENABLE_WASM_FUNCTION_REFERENCES [[nodiscard]] bool emitRefAsNonNull(); [[nodiscard]] bool emitBrOnNull(); #endif [[nodiscard]] bool emitAtomicCmpXchg(ValType type, Scalar::Type viewType); [[nodiscard]] bool emitAtomicLoad(ValType type, Scalar::Type viewType); [[nodiscard]] bool emitAtomicRMW(ValType type, Scalar::Type viewType, AtomicOp op); [[nodiscard]] bool emitAtomicStore(ValType type, Scalar::Type viewType); [[nodiscard]] bool emitWait(ValType type, uint32_t byteSize); [[nodiscard]] bool emitWake(); [[nodiscard]] bool emitFence(); [[nodiscard]] bool emitAtomicXchg(ValType type, Scalar::Type viewType); void emitAtomicXchg64(MemoryAccessDesc* access, WantResult wantResult); [[nodiscard]] bool emitMemCopy(); [[nodiscard]] bool emitMemCopyCall(uint32_t lineOrBytecode); [[nodiscard]] bool emitMemCopyInline(); [[nodiscard]] bool emitTableCopy(); [[nodiscard]] bool emitDataOrElemDrop(bool isData); [[nodiscard]] bool emitMemFill(); [[nodiscard]] bool emitMemFillCall(uint32_t lineOrBytecode); [[nodiscard]] bool emitMemFillInline(); [[nodiscard]] bool emitMemOrTableInit(bool isMem); #ifdef ENABLE_WASM_REFTYPES [[nodiscard]] bool emitTableFill(); [[nodiscard]] bool emitTableGet(); [[nodiscard]] bool emitTableGrow(); [[nodiscard]] bool emitTableSet(); [[nodiscard]] bool emitTableSize(); #endif [[nodiscard]] bool emitStructNew(); [[nodiscard]] bool emitStructGet(); [[nodiscard]] bool emitStructSet(); [[nodiscard]] bool emitStructNarrow(); #ifdef ENABLE_WASM_SIMD template void emitVectorUnop(void (*op)(MacroAssembler& masm, SourceType rs, DestType rd)); template void emitVectorUnop(void (*op)(MacroAssembler& masm, SourceType rs, DestType rd, TempType temp)); template void emitVectorUnop(ImmType immediate, void (*op)(MacroAssembler&, ImmType, SourceType, DestType)); template void emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType src, LhsDestType srcDest)); template void emitVectorBinop(void (*op)(MacroAssembler& masm, RhsDestType src, LhsType srcDest, RhsDestOp)); template void emitVectorBinop(void (*)(MacroAssembler& masm, RhsType rs, LhsDestType rsd, TempType temp)); template void emitVectorBinop(void (*)(MacroAssembler& masm, RhsType rs, LhsDestType rsd, TempType1 temp1, TempType2 temp2)); template void emitVectorBinop(ImmType immediate, void (*op)(MacroAssembler&, ImmType, RhsType, LhsDestType)); template void emitVectorBinop(ImmType immediate, void (*op)(MacroAssembler&, ImmType, RhsType, LhsDestType, TempType1 temp1, TempType2 temp2)); void emitVectorAndNot(); [[nodiscard]] bool emitLoadSplat(Scalar::Type viewType); [[nodiscard]] bool emitLoadZero(Scalar::Type viewType); [[nodiscard]] bool emitLoadExtend(Scalar::Type viewType); [[nodiscard]] bool emitBitselect(); [[nodiscard]] bool emitVectorShuffle(); [[nodiscard]] bool emitVectorShiftRightI64x2(bool isUnsigned); [[nodiscard]] bool emitVectorMulI64x2(); #endif }; // TODO: We want these to be inlined for sure; do we need an `inline` somewhere? template <> RegI32 BaseCompiler::need() { return needI32(); } template <> RegI64 BaseCompiler::need() { return needI64(); } template <> RegF32 BaseCompiler::need() { return needF32(); } template <> RegF64 BaseCompiler::need() { return needF64(); } template <> RegI32 BaseCompiler::pop() { return popI32(); } template <> RegI64 BaseCompiler::pop() { return popI64(); } template <> RegF32 BaseCompiler::pop() { return popF32(); } template <> RegF64 BaseCompiler::pop() { return popF64(); } template <> void BaseCompiler::free(RegI32 r) { freeI32(r); } template <> void BaseCompiler::free(RegI64 r) { freeI64(r); } template <> void BaseCompiler::free(RegF32 r) { freeF32(r); } template <> void BaseCompiler::free(RegF64 r) { freeF64(r); } #ifdef ENABLE_WASM_SIMD template <> RegV128 BaseCompiler::need() { return needV128(); } template <> RegV128 BaseCompiler::pop() { return popV128(); } template <> void BaseCompiler::free(RegV128 r) { freeV128(r); } #endif void BaseCompiler::emitAddI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.add32(Imm32(c), r); pushI32(r); } else { RegI32 r, rs; pop2xI32(&r, &rs); masm.add32(rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitAddI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); masm.add64(Imm64(c), r); pushI64(r); } else { RegI64 r, rs; pop2xI64(&r, &rs); masm.add64(rs, r); freeI64(rs); pushI64(r); } } void BaseCompiler::emitAddF64() { RegF64 r, rs; pop2xF64(&r, &rs); masm.addDouble(rs, r); freeF64(rs); pushF64(r); } void BaseCompiler::emitAddF32() { RegF32 r, rs; pop2xF32(&r, &rs); masm.addFloat32(rs, r); freeF32(rs); pushF32(r); } void BaseCompiler::emitSubtractI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.sub32(Imm32(c), r); pushI32(r); } else { RegI32 r, rs; pop2xI32(&r, &rs); masm.sub32(rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitSubtractI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); masm.sub64(Imm64(c), r); pushI64(r); } else { RegI64 r, rs; pop2xI64(&r, &rs); masm.sub64(rs, r); freeI64(rs); pushI64(r); } } void BaseCompiler::emitSubtractF32() { RegF32 r, rs; pop2xF32(&r, &rs); masm.subFloat32(rs, r); freeF32(rs); pushF32(r); } void BaseCompiler::emitSubtractF64() { RegF64 r, rs; pop2xF64(&r, &rs); masm.subDouble(rs, r); freeF64(rs); pushF64(r); } void BaseCompiler::emitMultiplyI32() { RegI32 r, rs, reserved; pop2xI32ForMulDivI32(&r, &rs, &reserved); masm.mul32(rs, r); maybeFreeI32(reserved); freeI32(rs); pushI32(r); } void BaseCompiler::emitMultiplyI64() { RegI64 r, rs, reserved; RegI32 temp; pop2xI64ForMulI64(&r, &rs, &temp, &reserved); masm.mul64(rs, r, temp); maybeFreeI64(reserved); maybeFreeI32(temp); freeI64(rs); pushI64(r); } void BaseCompiler::emitMultiplyF32() { RegF32 r, rs; pop2xF32(&r, &rs); masm.mulFloat32(rs, r); freeF32(rs); pushF32(r); } void BaseCompiler::emitMultiplyF64() { RegF64 r, rs; pop2xF64(&r, &rs); masm.mulDouble(rs, r); freeF64(rs); pushF64(r); } void BaseCompiler::emitQuotientI32() { int32_t c; uint_fast8_t power; if (popConstPositivePowerOfTwoI32(&c, &power, 0)) { if (power != 0) { RegI32 r = popI32(); Label positive; masm.branchTest32(Assembler::NotSigned, r, r, &positive); masm.add32(Imm32(c - 1), r); masm.bind(&positive); masm.rshift32Arithmetic(Imm32(power & 31), r); pushI32(r); } } else { bool isConst = peekConstI32(&c); RegI32 r, rs, reserved; pop2xI32ForMulDivI32(&r, &rs, &reserved); if (!isConst || c == 0) { checkDivideByZeroI32(rs); } Label done; if (!isConst || c == -1) { checkDivideSignedOverflowI32(rs, r, &done, ZeroOnOverflow(false)); } masm.quotient32(rs, r, IsUnsigned(false)); masm.bind(&done); maybeFreeI32(reserved); freeI32(rs); pushI32(r); } } void BaseCompiler::emitQuotientU32() { int32_t c; uint_fast8_t power; if (popConstPositivePowerOfTwoI32(&c, &power, 0)) { if (power != 0) { RegI32 r = popI32(); masm.rshift32(Imm32(power & 31), r); pushI32(r); } } else { bool isConst = peekConstI32(&c); RegI32 r, rs, reserved; pop2xI32ForMulDivI32(&r, &rs, &reserved); if (!isConst || c == 0) { checkDivideByZeroI32(rs); } masm.quotient32(rs, r, IsUnsigned(true)); maybeFreeI32(reserved); freeI32(rs); pushI32(r); } } void BaseCompiler::emitRemainderI32() { int32_t c; uint_fast8_t power; if (popConstPositivePowerOfTwoI32(&c, &power, 1)) { RegI32 r = popI32(); RegI32 temp = needI32(); moveI32(r, temp); Label positive; masm.branchTest32(Assembler::NotSigned, temp, temp, &positive); masm.add32(Imm32(c - 1), temp); masm.bind(&positive); masm.rshift32Arithmetic(Imm32(power & 31), temp); masm.lshift32(Imm32(power & 31), temp); masm.sub32(temp, r); freeI32(temp); pushI32(r); } else { bool isConst = peekConstI32(&c); RegI32 r, rs, reserved; pop2xI32ForMulDivI32(&r, &rs, &reserved); if (!isConst || c == 0) { checkDivideByZeroI32(rs); } Label done; if (!isConst || c == -1) { checkDivideSignedOverflowI32(rs, r, &done, ZeroOnOverflow(true)); } masm.remainder32(rs, r, IsUnsigned(false)); masm.bind(&done); maybeFreeI32(reserved); freeI32(rs); pushI32(r); } } void BaseCompiler::emitRemainderU32() { int32_t c; uint_fast8_t power; if (popConstPositivePowerOfTwoI32(&c, &power, 1)) { RegI32 r = popI32(); masm.and32(Imm32(c - 1), r); pushI32(r); } else { bool isConst = peekConstI32(&c); RegI32 r, rs, reserved; pop2xI32ForMulDivI32(&r, &rs, &reserved); if (!isConst || c == 0) { checkDivideByZeroI32(rs); } masm.remainder32(rs, r, IsUnsigned(true)); maybeFreeI32(reserved); freeI32(rs); pushI32(r); } } #ifndef RABALDR_INT_DIV_I64_CALLOUT void BaseCompiler::emitQuotientI64() { # ifdef JS_64BIT int64_t c; uint_fast8_t power; if (popConstPositivePowerOfTwoI64(&c, &power, 0)) { if (power != 0) { RegI64 r = popI64(); Label positive; masm.branchTest64(Assembler::NotSigned, r, r, RegI32::Invalid(), &positive); masm.add64(Imm64(c - 1), r); masm.bind(&positive); masm.rshift64Arithmetic(Imm32(power & 63), r); pushI64(r); } } else { bool isConst = peekConstI64(&c); RegI64 r, rs, reserved; pop2xI64ForDivI64(&r, &rs, &reserved); quotientI64(rs, r, reserved, IsUnsigned(false), isConst, c); maybeFreeI64(reserved); freeI64(rs); pushI64(r); } # else MOZ_CRASH("BaseCompiler platform hook: emitQuotientI64"); # endif } void BaseCompiler::emitQuotientU64() { # ifdef JS_64BIT int64_t c; uint_fast8_t power; if (popConstPositivePowerOfTwoI64(&c, &power, 0)) { if (power != 0) { RegI64 r = popI64(); masm.rshift64(Imm32(power & 63), r); pushI64(r); } } else { bool isConst = peekConstI64(&c); RegI64 r, rs, reserved; pop2xI64ForDivI64(&r, &rs, &reserved); quotientI64(rs, r, reserved, IsUnsigned(true), isConst, c); maybeFreeI64(reserved); freeI64(rs); pushI64(r); } # else MOZ_CRASH("BaseCompiler platform hook: emitQuotientU64"); # endif } void BaseCompiler::emitRemainderI64() { # ifdef JS_64BIT int64_t c; uint_fast8_t power; if (popConstPositivePowerOfTwoI64(&c, &power, 1)) { RegI64 r = popI64(); RegI64 temp = needI64(); moveI64(r, temp); Label positive; masm.branchTest64(Assembler::NotSigned, temp, temp, RegI32::Invalid(), &positive); masm.add64(Imm64(c - 1), temp); masm.bind(&positive); masm.rshift64Arithmetic(Imm32(power & 63), temp); masm.lshift64(Imm32(power & 63), temp); masm.sub64(temp, r); freeI64(temp); pushI64(r); } else { bool isConst = peekConstI64(&c); RegI64 r, rs, reserved; pop2xI64ForDivI64(&r, &rs, &reserved); remainderI64(rs, r, reserved, IsUnsigned(false), isConst, c); maybeFreeI64(reserved); freeI64(rs); pushI64(r); } # else MOZ_CRASH("BaseCompiler platform hook: emitRemainderI64"); # endif } void BaseCompiler::emitRemainderU64() { # ifdef JS_64BIT int64_t c; uint_fast8_t power; if (popConstPositivePowerOfTwoI64(&c, &power, 1)) { RegI64 r = popI64(); masm.and64(Imm64(c - 1), r); pushI64(r); } else { bool isConst = peekConstI64(&c); RegI64 r, rs, reserved; pop2xI64ForDivI64(&r, &rs, &reserved); remainderI64(rs, r, reserved, IsUnsigned(true), isConst, c); maybeFreeI64(reserved); freeI64(rs); pushI64(r); } # else MOZ_CRASH("BaseCompiler platform hook: emitRemainderU64"); # endif } #endif // RABALDR_INT_DIV_I64_CALLOUT void BaseCompiler::emitDivideF32() { RegF32 r, rs; pop2xF32(&r, &rs); masm.divFloat32(rs, r); freeF32(rs); pushF32(r); } void BaseCompiler::emitDivideF64() { RegF64 r, rs; pop2xF64(&r, &rs); masm.divDouble(rs, r); freeF64(rs); pushF64(r); } void BaseCompiler::emitMinF32() { RegF32 r, rs; pop2xF32(&r, &rs); // Convert signaling NaN to quiet NaNs. // // TODO / OPTIMIZE (bug 1316824): Don't do this if one of the operands // is known to be a constant. ScratchF32 zero(*this); moveImmF32(0.f, zero); masm.subFloat32(zero, r); masm.subFloat32(zero, rs); masm.minFloat32(rs, r, HandleNaNSpecially(true)); freeF32(rs); pushF32(r); } void BaseCompiler::emitMaxF32() { RegF32 r, rs; pop2xF32(&r, &rs); // Convert signaling NaN to quiet NaNs. // // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32. ScratchF32 zero(*this); moveImmF32(0.f, zero); masm.subFloat32(zero, r); masm.subFloat32(zero, rs); masm.maxFloat32(rs, r, HandleNaNSpecially(true)); freeF32(rs); pushF32(r); } void BaseCompiler::emitMinF64() { RegF64 r, rs; pop2xF64(&r, &rs); // Convert signaling NaN to quiet NaNs. // // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32. ScratchF64 zero(*this); moveImmF64(0, zero); masm.subDouble(zero, r); masm.subDouble(zero, rs); masm.minDouble(rs, r, HandleNaNSpecially(true)); freeF64(rs); pushF64(r); } void BaseCompiler::emitMaxF64() { RegF64 r, rs; pop2xF64(&r, &rs); // Convert signaling NaN to quiet NaNs. // // TODO / OPTIMIZE (bug 1316824): see comment in emitMinF32. ScratchF64 zero(*this); moveImmF64(0, zero); masm.subDouble(zero, r); masm.subDouble(zero, rs); masm.maxDouble(rs, r, HandleNaNSpecially(true)); freeF64(rs); pushF64(r); } void BaseCompiler::emitCopysignF32() { RegF32 r, rs; pop2xF32(&r, &rs); RegI32 temp0 = needI32(); RegI32 temp1 = needI32(); masm.moveFloat32ToGPR(r, temp0); masm.moveFloat32ToGPR(rs, temp1); masm.and32(Imm32(INT32_MAX), temp0); masm.and32(Imm32(INT32_MIN), temp1); masm.or32(temp1, temp0); masm.moveGPRToFloat32(temp0, r); freeI32(temp0); freeI32(temp1); freeF32(rs); pushF32(r); } void BaseCompiler::emitCopysignF64() { RegF64 r, rs; pop2xF64(&r, &rs); RegI64 temp0 = needI64(); RegI64 temp1 = needI64(); masm.moveDoubleToGPR64(r, temp0); masm.moveDoubleToGPR64(rs, temp1); masm.and64(Imm64(INT64_MAX), temp0); masm.and64(Imm64(INT64_MIN), temp1); masm.or64(temp1, temp0); masm.moveGPR64ToDouble(temp0, r); freeI64(temp0); freeI64(temp1); freeF64(rs); pushF64(r); } void BaseCompiler::emitOrI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.or32(Imm32(c), r); pushI32(r); } else { RegI32 r, rs; pop2xI32(&r, &rs); masm.or32(rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitOrI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); masm.or64(Imm64(c), r); pushI64(r); } else { RegI64 r, rs; pop2xI64(&r, &rs); masm.or64(rs, r); freeI64(rs); pushI64(r); } } void BaseCompiler::emitAndI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.and32(Imm32(c), r); pushI32(r); } else { RegI32 r, rs; pop2xI32(&r, &rs); masm.and32(rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitAndI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); masm.and64(Imm64(c), r); pushI64(r); } else { RegI64 r, rs; pop2xI64(&r, &rs); masm.and64(rs, r); freeI64(rs); pushI64(r); } } void BaseCompiler::emitXorI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.xor32(Imm32(c), r); pushI32(r); } else { RegI32 r, rs; pop2xI32(&r, &rs); masm.xor32(rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitXorI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); masm.xor64(Imm64(c), r); pushI64(r); } else { RegI64 r, rs; pop2xI64(&r, &rs); masm.xor64(rs, r); freeI64(rs); pushI64(r); } } void BaseCompiler::emitShlI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.lshift32(Imm32(c & 31), r); pushI32(r); } else { RegI32 r, rs; pop2xI32ForShift(&r, &rs); maskShiftCount32(rs); masm.lshift32(rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitShlI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); masm.lshift64(Imm32(c & 63), r); pushI64(r); } else { RegI64 r, rs; pop2xI64ForShift(&r, &rs); masm.lshift64(lowPart(rs), r); freeI64(rs); pushI64(r); } } void BaseCompiler::emitShrI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.rshift32Arithmetic(Imm32(c & 31), r); pushI32(r); } else { RegI32 r, rs; pop2xI32ForShift(&r, &rs); maskShiftCount32(rs); masm.rshift32Arithmetic(rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitShrI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); masm.rshift64Arithmetic(Imm32(c & 63), r); pushI64(r); } else { RegI64 r, rs; pop2xI64ForShift(&r, &rs); masm.rshift64Arithmetic(lowPart(rs), r); freeI64(rs); pushI64(r); } } void BaseCompiler::emitShrU32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.rshift32(Imm32(c & 31), r); pushI32(r); } else { RegI32 r, rs; pop2xI32ForShift(&r, &rs); maskShiftCount32(rs); masm.rshift32(rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitShrU64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); masm.rshift64(Imm32(c & 63), r); pushI64(r); } else { RegI64 r, rs; pop2xI64ForShift(&r, &rs); masm.rshift64(lowPart(rs), r); freeI64(rs); pushI64(r); } } void BaseCompiler::emitRotrI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.rotateRight(Imm32(c & 31), r, r); pushI32(r); } else { RegI32 r, rs; pop2xI32ForRotate(&r, &rs); masm.rotateRight(rs, r, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitRotrI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); RegI32 temp = needRotate64Temp(); masm.rotateRight64(Imm32(c & 63), r, r, temp); maybeFreeI32(temp); pushI64(r); } else { RegI64 r, rs; pop2xI64ForRotate(&r, &rs); masm.rotateRight64(lowPart(rs), r, r, maybeHighPart(rs)); freeI64(rs); pushI64(r); } } void BaseCompiler::emitRotlI32() { int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.rotateLeft(Imm32(c & 31), r, r); pushI32(r); } else { RegI32 r, rs; pop2xI32ForRotate(&r, &rs); masm.rotateLeft(rs, r, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitRotlI64() { int64_t c; if (popConstI64(&c)) { RegI64 r = popI64(); RegI32 temp = needRotate64Temp(); masm.rotateLeft64(Imm32(c & 63), r, r, temp); maybeFreeI32(temp); pushI64(r); } else { RegI64 r, rs; pop2xI64ForRotate(&r, &rs); masm.rotateLeft64(lowPart(rs), r, r, maybeHighPart(rs)); freeI64(rs); pushI64(r); } } void BaseCompiler::emitEqzI32() { if (sniffConditionalControlEqz(ValType::I32)) { return; } RegI32 r = popI32(); masm.cmp32Set(Assembler::Equal, r, Imm32(0), r); pushI32(r); } void BaseCompiler::emitEqzI64() { if (sniffConditionalControlEqz(ValType::I64)) { return; } RegI64 rs = popI64(); RegI32 rd = fromI64(rs); eqz64(rs, rd); freeI64Except(rs, rd); pushI32(rd); } void BaseCompiler::emitClzI32() { RegI32 r = popI32(); masm.clz32(r, r, IsKnownNotZero(false)); pushI32(r); } void BaseCompiler::emitClzI64() { RegI64 r = popI64(); masm.clz64(r, lowPart(r)); maybeClearHighPart(r); pushI64(r); } void BaseCompiler::emitCtzI32() { RegI32 r = popI32(); masm.ctz32(r, r, IsKnownNotZero(false)); pushI32(r); } void BaseCompiler::emitCtzI64() { RegI64 r = popI64(); masm.ctz64(r, lowPart(r)); maybeClearHighPart(r); pushI64(r); } void BaseCompiler::emitPopcntI32() { RegI32 r = popI32(); RegI32 temp = needPopcnt32Temp(); masm.popcnt32(r, r, temp); maybeFreeI32(temp); pushI32(r); } void BaseCompiler::emitPopcntI64() { RegI64 r = popI64(); RegI32 temp = needPopcnt64Temp(); masm.popcnt64(r, r, temp); maybeFreeI32(temp); pushI64(r); } void BaseCompiler::emitAbsF32() { RegF32 r = popF32(); masm.absFloat32(r, r); pushF32(r); } void BaseCompiler::emitAbsF64() { RegF64 r = popF64(); masm.absDouble(r, r); pushF64(r); } void BaseCompiler::emitNegateF32() { RegF32 r = popF32(); masm.negateFloat(r); pushF32(r); } void BaseCompiler::emitNegateF64() { RegF64 r = popF64(); masm.negateDouble(r); pushF64(r); } void BaseCompiler::emitSqrtF32() { RegF32 r = popF32(); masm.sqrtFloat32(r, r); pushF32(r); } void BaseCompiler::emitSqrtF64() { RegF64 r = popF64(); masm.sqrtDouble(r, r); pushF64(r); } template bool BaseCompiler::emitTruncateF32ToI32() { RegF32 rs = popF32(); RegI32 rd = needI32(); if (!truncateF32ToI32(rs, rd, flags)) { return false; } freeF32(rs); pushI32(rd); return true; } template bool BaseCompiler::emitTruncateF64ToI32() { RegF64 rs = popF64(); RegI32 rd = needI32(); if (!truncateF64ToI32(rs, rd, flags)) { return false; } freeF64(rs); pushI32(rd); return true; } #ifndef RABALDR_FLOAT_TO_I64_CALLOUT template bool BaseCompiler::emitTruncateF32ToI64() { RegF32 rs = popF32(); RegI64 rd = needI64(); RegF64 temp = needTempForFloatingToI64(flags); if (!truncateF32ToI64(rs, rd, flags, temp)) { return false; } maybeFreeF64(temp); freeF32(rs); pushI64(rd); return true; } template bool BaseCompiler::emitTruncateF64ToI64() { RegF64 rs = popF64(); RegI64 rd = needI64(); RegF64 temp = needTempForFloatingToI64(flags); if (!truncateF64ToI64(rs, rd, flags, temp)) { return false; } maybeFreeF64(temp); freeF64(rs); pushI64(rd); return true; } #endif // RABALDR_FLOAT_TO_I64_CALLOUT void BaseCompiler::emitWrapI64ToI32() { RegI64 rs = popI64(); RegI32 rd = fromI64(rs); masm.move64To32(rs, rd); freeI64Except(rs, rd); pushI32(rd); } void BaseCompiler::emitExtendI32_8() { RegI32 r = popI32(); #ifdef JS_CODEGEN_X86 if (!ra.isSingleByteI32(r)) { ScratchI8 scratch(*this); moveI32(r, scratch); masm.move8SignExtend(scratch, r); pushI32(r); return; } #endif masm.move8SignExtend(r, r); pushI32(r); } void BaseCompiler::emitExtendI32_16() { RegI32 r = popI32(); masm.move16SignExtend(r, r); pushI32(r); } void BaseCompiler::emitExtendI64_8() { RegI64 r; popI64ForSignExtendI64(&r); masm.move8To64SignExtend(lowPart(r), r); pushI64(r); } void BaseCompiler::emitExtendI64_16() { RegI64 r; popI64ForSignExtendI64(&r); masm.move16To64SignExtend(lowPart(r), r); pushI64(r); } void BaseCompiler::emitExtendI64_32() { RegI64 r; popI64ForSignExtendI64(&r); masm.move32To64SignExtend(lowPart(r), r); pushI64(r); } void BaseCompiler::emitExtendI32ToI64() { RegI64 r; popI32ForSignExtendI64(&r); masm.move32To64SignExtend(lowPart(r), r); pushI64(r); } void BaseCompiler::emitExtendU32ToI64() { RegI32 rs = popI32(); RegI64 rd = widenI32(rs); masm.move32To64ZeroExtend(rs, rd); pushI64(rd); } void BaseCompiler::emitReinterpretF32AsI32() { RegF32 rs = popF32(); RegI32 rd = needI32(); masm.moveFloat32ToGPR(rs, rd); freeF32(rs); pushI32(rd); } void BaseCompiler::emitReinterpretF64AsI64() { RegF64 rs = popF64(); RegI64 rd = needI64(); masm.moveDoubleToGPR64(rs, rd); freeF64(rs); pushI64(rd); } void BaseCompiler::emitConvertF64ToF32() { RegF64 rs = popF64(); RegF32 rd = needF32(); masm.convertDoubleToFloat32(rs, rd); freeF64(rs); pushF32(rd); } void BaseCompiler::emitConvertI32ToF32() { RegI32 rs = popI32(); RegF32 rd = needF32(); masm.convertInt32ToFloat32(rs, rd); freeI32(rs); pushF32(rd); } void BaseCompiler::emitConvertU32ToF32() { RegI32 rs = popI32(); RegF32 rd = needF32(); masm.convertUInt32ToFloat32(rs, rd); freeI32(rs); pushF32(rd); } #ifndef RABALDR_I64_TO_FLOAT_CALLOUT void BaseCompiler::emitConvertI64ToF32() { RegI64 rs = popI64(); RegF32 rd = needF32(); convertI64ToF32(rs, IsUnsigned(false), rd, RegI32()); freeI64(rs); pushF32(rd); } void BaseCompiler::emitConvertU64ToF32() { RegI64 rs = popI64(); RegF32 rd = needF32(); RegI32 temp = needConvertI64ToFloatTemp(ValType::F32, IsUnsigned(true)); convertI64ToF32(rs, IsUnsigned(true), rd, temp); maybeFreeI32(temp); freeI64(rs); pushF32(rd); } #endif void BaseCompiler::emitConvertF32ToF64() { RegF32 rs = popF32(); RegF64 rd = needF64(); masm.convertFloat32ToDouble(rs, rd); freeF32(rs); pushF64(rd); } void BaseCompiler::emitConvertI32ToF64() { RegI32 rs = popI32(); RegF64 rd = needF64(); masm.convertInt32ToDouble(rs, rd); freeI32(rs); pushF64(rd); } void BaseCompiler::emitConvertU32ToF64() { RegI32 rs = popI32(); RegF64 rd = needF64(); masm.convertUInt32ToDouble(rs, rd); freeI32(rs); pushF64(rd); } #ifndef RABALDR_I64_TO_FLOAT_CALLOUT void BaseCompiler::emitConvertI64ToF64() { RegI64 rs = popI64(); RegF64 rd = needF64(); convertI64ToF64(rs, IsUnsigned(false), rd, RegI32()); freeI64(rs); pushF64(rd); } void BaseCompiler::emitConvertU64ToF64() { RegI64 rs = popI64(); RegF64 rd = needF64(); RegI32 temp = needConvertI64ToFloatTemp(ValType::F64, IsUnsigned(true)); convertI64ToF64(rs, IsUnsigned(true), rd, temp); maybeFreeI32(temp); freeI64(rs); pushF64(rd); } #endif // RABALDR_I64_TO_FLOAT_CALLOUT void BaseCompiler::emitReinterpretI32AsF32() { RegI32 rs = popI32(); RegF32 rd = needF32(); masm.moveGPRToFloat32(rs, rd); freeI32(rs); pushF32(rd); } void BaseCompiler::emitReinterpretI64AsF64() { RegI64 rs = popI64(); RegF64 rd = needF64(); masm.moveGPR64ToDouble(rs, rd); freeI64(rs); pushF64(rd); } template bool BaseCompiler::sniffConditionalControlCmp(Cond compareOp, ValType operandType) { MOZ_ASSERT(latentOp_ == LatentOp::None, "Latent comparison state not properly reset"); #ifdef JS_CODEGEN_X86 // On x86, latent i64 binary comparisons use too many registers: the // reserved join register and the lhs and rhs operands require six, but we // only have five. if (operandType == ValType::I64) { return false; } #endif // No optimization for pointer compares yet. if (operandType.isReference()) { return false; } OpBytes op; iter_.peekOp(&op); switch (op.b0) { case uint16_t(Op::BrIf): case uint16_t(Op::If): case uint16_t(Op::SelectNumeric): case uint16_t(Op::SelectTyped): setLatentCompare(compareOp, operandType); return true; default: return false; } } bool BaseCompiler::sniffConditionalControlEqz(ValType operandType) { MOZ_ASSERT(latentOp_ == LatentOp::None, "Latent comparison state not properly reset"); OpBytes op; iter_.peekOp(&op); switch (op.b0) { case uint16_t(Op::BrIf): case uint16_t(Op::SelectNumeric): case uint16_t(Op::SelectTyped): case uint16_t(Op::If): setLatentEqz(operandType); return true; default: return false; } } void BaseCompiler::emitBranchSetup(BranchState* b) { // Avoid allocating operands to latentOp_ to result registers. if (b->hasBlockResults()) { needResultRegisters(b->resultType); } // Set up fields so that emitBranchPerform() need not switch on latentOp_. switch (latentOp_) { case LatentOp::None: { latentIntCmp_ = Assembler::NotEqual; latentType_ = ValType::I32; b->i32.lhs = popI32(); b->i32.rhsImm = true; b->i32.imm = 0; break; } case LatentOp::Compare: { switch (latentType_.kind()) { case ValType::I32: { if (popConstI32(&b->i32.imm)) { b->i32.lhs = popI32(); b->i32.rhsImm = true; } else { pop2xI32(&b->i32.lhs, &b->i32.rhs); b->i32.rhsImm = false; } break; } case ValType::I64: { pop2xI64(&b->i64.lhs, &b->i64.rhs); b->i64.rhsImm = false; break; } case ValType::F32: { pop2xF32(&b->f32.lhs, &b->f32.rhs); break; } case ValType::F64: { pop2xF64(&b->f64.lhs, &b->f64.rhs); break; } default: { MOZ_CRASH("Unexpected type for LatentOp::Compare"); } } break; } case LatentOp::Eqz: { switch (latentType_.kind()) { case ValType::I32: { latentIntCmp_ = Assembler::Equal; b->i32.lhs = popI32(); b->i32.rhsImm = true; b->i32.imm = 0; break; } case ValType::I64: { latentIntCmp_ = Assembler::Equal; b->i64.lhs = popI64(); b->i64.rhsImm = true; b->i64.imm = 0; break; } default: { MOZ_CRASH("Unexpected type for LatentOp::Eqz"); } } break; } } if (b->hasBlockResults()) { freeResultRegisters(b->resultType); } } bool BaseCompiler::emitBranchPerform(BranchState* b) { switch (latentType_.kind()) { case ValType::I32: { if (b->i32.rhsImm) { if (!jumpConditionalWithResults(b, latentIntCmp_, b->i32.lhs, Imm32(b->i32.imm))) { return false; } } else { if (!jumpConditionalWithResults(b, latentIntCmp_, b->i32.lhs, b->i32.rhs)) { return false; } freeI32(b->i32.rhs); } freeI32(b->i32.lhs); break; } case ValType::I64: { if (b->i64.rhsImm) { if (!jumpConditionalWithResults(b, latentIntCmp_, b->i64.lhs, Imm64(b->i64.imm))) { return false; } } else { if (!jumpConditionalWithResults(b, latentIntCmp_, b->i64.lhs, b->i64.rhs)) { return false; } freeI64(b->i64.rhs); } freeI64(b->i64.lhs); break; } case ValType::F32: { if (!jumpConditionalWithResults(b, latentDoubleCmp_, b->f32.lhs, b->f32.rhs)) { return false; } freeF32(b->f32.lhs); freeF32(b->f32.rhs); break; } case ValType::F64: { if (!jumpConditionalWithResults(b, latentDoubleCmp_, b->f64.lhs, b->f64.rhs)) { return false; } freeF64(b->f64.lhs); freeF64(b->f64.rhs); break; } default: { MOZ_CRASH("Unexpected type for LatentOp::Compare"); } } resetLatentOp(); return true; } // For blocks and loops and ifs: // // - Sync the value stack before going into the block in order to simplify exit // from the block: all exits from the block can assume that there are no // live registers except the one carrying the exit value. // - The block can accumulate a number of dead values on the stacks, so when // branching out of the block or falling out at the end be sure to // pop the appropriate stacks back to where they were on entry, while // preserving the exit value. // - A continue branch in a loop is much like an exit branch, but the branch // value must not be preserved. // - The exit value is always in a designated join register (type dependent). bool BaseCompiler::emitBlock() { ResultType params; if (!iter_.readBlock(¶ms)) { return false; } if (!deadCode_) { sync(); // Simplifies branching out from block } initControl(controlItem(), params); return true; } bool BaseCompiler::endBlock(ResultType type) { Control& block = controlItem(); if (deadCode_) { // Block does not fall through; reset stack. fr.resetStackHeight(block.stackHeight, type); popValueStackTo(block.stackSize); } else { // If the block label is used, we have a control join, so we need to shuffle // fallthrough values into place. Otherwise if it's not a control join, we // can leave the value stack alone. MOZ_ASSERT(stk_.length() == block.stackSize + type.length()); if (block.label.used()) { popBlockResults(type, block.stackHeight, ContinuationKind::Fallthrough); } block.bceSafeOnExit &= bceSafe_; } // Bind after cleanup: branches out will have popped the stack. if (block.label.used()) { masm.bind(&block.label); if (deadCode_) { captureResultRegisters(type); deadCode_ = false; } if (!pushBlockResults(type)) { return false; } } bceSafe_ = block.bceSafeOnExit; return true; } bool BaseCompiler::emitLoop() { ResultType params; if (!iter_.readLoop(¶ms)) { return false; } if (!deadCode_) { sync(); // Simplifies branching out from block } initControl(controlItem(), params); bceSafe_ = 0; if (!deadCode_) { // Loop entry is a control join, so shuffle the entry parameters into the // well-known locations. if (!topBlockParams(params)) { return false; } masm.nopAlign(CodeAlignment); masm.bind(&controlItem(0).label); // The interrupt check barfs if there are live registers. sync(); if (!addInterruptCheck()) { return false; } } return true; } // The bodies of the "then" and "else" arms can be arbitrary sequences // of expressions, they push control and increment the nesting and can // even be targeted by jumps. A branch to the "if" block branches to // the exit of the if, ie, it's like "break". Consider: // // (func (result i32) // (if (i32.const 1) // (begin (br 1) (unreachable)) // (begin (unreachable))) // (i32.const 1)) // // The branch causes neither of the unreachable expressions to be // evaluated. bool BaseCompiler::emitIf() { ResultType params; Nothing unused_cond; if (!iter_.readIf(¶ms, &unused_cond)) { return false; } BranchState b(&controlItem().otherLabel, InvertBranch(true)); if (!deadCode_) { needResultRegisters(params); emitBranchSetup(&b); freeResultRegisters(params); sync(); } else { resetLatentOp(); } initControl(controlItem(), params); if (!deadCode_) { // Because params can flow immediately to results in the case of an empty // "then" or "else" block, and the result of an if/then is a join in // general, we shuffle params eagerly to the result allocations. if (!topBlockParams(params)) { return false; } if (!emitBranchPerform(&b)) { return false; } } return true; } bool BaseCompiler::endIfThen(ResultType type) { Control& ifThen = controlItem(); // The parameters to the "if" logically flow to both the "then" and "else" // blocks, but the "else" block is empty. Since we know that the "if" // type-checks, that means that the "else" parameters are the "else" results, // and that the "if"'s result type is the same as its parameter type. if (deadCode_) { // "then" arm does not fall through; reset stack. fr.resetStackHeight(ifThen.stackHeight, type); popValueStackTo(ifThen.stackSize); if (!ifThen.deadOnArrival) { captureResultRegisters(type); } } else { MOZ_ASSERT(stk_.length() == ifThen.stackSize + type.length()); // Assume we have a control join, so place results in block result // allocations. popBlockResults(type, ifThen.stackHeight, ContinuationKind::Fallthrough); MOZ_ASSERT(!ifThen.deadOnArrival); } if (ifThen.otherLabel.used()) { masm.bind(&ifThen.otherLabel); } if (ifThen.label.used()) { masm.bind(&ifThen.label); } if (!deadCode_) { ifThen.bceSafeOnExit &= bceSafe_; } deadCode_ = ifThen.deadOnArrival; if (!deadCode_) { if (!pushBlockResults(type)) { return false; } } bceSafe_ = ifThen.bceSafeOnExit & ifThen.bceSafeOnEntry; return true; } bool BaseCompiler::emitElse() { ResultType params, results; NothingVector unused_thenValues; if (!iter_.readElse(¶ms, &results, &unused_thenValues)) { return false; } Control& ifThenElse = controlItem(0); // See comment in endIfThenElse, below. // Exit the "then" branch. ifThenElse.deadThenBranch = deadCode_; if (deadCode_) { fr.resetStackHeight(ifThenElse.stackHeight, results); popValueStackTo(ifThenElse.stackSize); } else { MOZ_ASSERT(stk_.length() == ifThenElse.stackSize + results.length()); popBlockResults(results, ifThenElse.stackHeight, ContinuationKind::Jump); freeResultRegisters(results); MOZ_ASSERT(!ifThenElse.deadOnArrival); } if (!deadCode_) { masm.jump(&ifThenElse.label); } if (ifThenElse.otherLabel.used()) { masm.bind(&ifThenElse.otherLabel); } // Reset to the "else" branch. if (!deadCode_) { ifThenElse.bceSafeOnExit &= bceSafe_; } deadCode_ = ifThenElse.deadOnArrival; bceSafe_ = ifThenElse.bceSafeOnEntry; fr.resetStackHeight(ifThenElse.stackHeight, params); if (!deadCode_) { captureResultRegisters(params); if (!pushBlockResults(params)) { return false; } } return true; } bool BaseCompiler::endIfThenElse(ResultType type) { Control& ifThenElse = controlItem(); // The expression type is not a reliable guide to what we'll find // on the stack, we could have (if E (i32.const 1) (unreachable)) // in which case the "else" arm is AnyType but the type of the // full expression is I32. So restore whatever's there, not what // we want to find there. The "then" arm has the same constraint. if (deadCode_) { // "then" arm does not fall through; reset stack. fr.resetStackHeight(ifThenElse.stackHeight, type); popValueStackTo(ifThenElse.stackSize); } else { MOZ_ASSERT(stk_.length() == ifThenElse.stackSize + type.length()); // Assume we have a control join, so place results in block result // allocations. popBlockResults(type, ifThenElse.stackHeight, ContinuationKind::Fallthrough); ifThenElse.bceSafeOnExit &= bceSafe_; MOZ_ASSERT(!ifThenElse.deadOnArrival); } if (ifThenElse.label.used()) { masm.bind(&ifThenElse.label); } bool joinLive = !ifThenElse.deadOnArrival && (!ifThenElse.deadThenBranch || !deadCode_ || ifThenElse.label.bound()); if (joinLive) { // No values were provided by the "then" path, but capture the values // provided by the "else" path. if (deadCode_) { captureResultRegisters(type); } deadCode_ = false; } bceSafe_ = ifThenElse.bceSafeOnExit; if (!deadCode_) { if (!pushBlockResults(type)) { return false; } } return true; } bool BaseCompiler::emitEnd() { LabelKind kind; ResultType type; NothingVector unused_values; if (!iter_.readEnd(&kind, &type, &unused_values, &unused_values)) { return false; } switch (kind) { case LabelKind::Body: if (!endBlock(type)) { return false; } doReturn(ContinuationKind::Fallthrough); iter_.popEnd(); MOZ_ASSERT(iter_.controlStackEmpty()); return iter_.readFunctionEnd(iter_.end()); case LabelKind::Block: if (!endBlock(type)) { return false; } break; case LabelKind::Loop: // The end of a loop isn't a branch target, so we can just leave its // results on the expression stack to be consumed by the outer block. break; case LabelKind::Then: if (!endIfThen(type)) { return false; } break; case LabelKind::Else: if (!endIfThenElse(type)) { return false; } break; #ifdef ENABLE_WASM_EXCEPTIONS case LabelKind::Try: MOZ_CRASH("NYI"); break; case LabelKind::Catch: MOZ_CRASH("NYI"); break; #endif } iter_.popEnd(); return true; } bool BaseCompiler::emitBr() { uint32_t relativeDepth; ResultType type; NothingVector unused_values; if (!iter_.readBr(&relativeDepth, &type, &unused_values)) { return false; } if (deadCode_) { return true; } Control& target = controlItem(relativeDepth); target.bceSafeOnExit &= bceSafe_; // Save any values in the designated join registers, as if the target block // returned normally. popBlockResults(type, target.stackHeight, ContinuationKind::Jump); masm.jump(&target.label); // The registers holding the join values are free for the remainder of this // block. freeResultRegisters(type); deadCode_ = true; return true; } bool BaseCompiler::emitBrIf() { uint32_t relativeDepth; ResultType type; NothingVector unused_values; Nothing unused_condition; if (!iter_.readBrIf(&relativeDepth, &type, &unused_values, &unused_condition)) { return false; } if (deadCode_) { resetLatentOp(); return true; } Control& target = controlItem(relativeDepth); target.bceSafeOnExit &= bceSafe_; BranchState b(&target.label, target.stackHeight, InvertBranch(false), type); emitBranchSetup(&b); return emitBranchPerform(&b); } #ifdef ENABLE_WASM_FUNCTION_REFERENCES bool BaseCompiler::emitBrOnNull() { MOZ_ASSERT(!hasLatentOp()); uint32_t relativeDepth; ResultType type; NothingVector unused_values; Nothing unused_condition; if (!iter_.readBrOnNull(&relativeDepth, &type, &unused_values, &unused_condition)) { return false; } if (deadCode_) { return true; } Control& target = controlItem(relativeDepth); target.bceSafeOnExit &= bceSafe_; BranchState b(&target.label, target.stackHeight, InvertBranch(false), type); if (b.hasBlockResults()) { needResultRegisters(b.resultType); } RegPtr rp = popRef(); if (b.hasBlockResults()) { freeResultRegisters(b.resultType); } if (!jumpConditionalWithResults(&b, Assembler::Equal, rp, ImmWord(NULLREF_VALUE))) { return false; } pushRef(rp); return true; } #endif bool BaseCompiler::emitBrTable() { Uint32Vector depths; uint32_t defaultDepth; ResultType branchParams; NothingVector unused_values; Nothing unused_index; // N.B., `branchParams' gets set to the type of the default branch target. In // the presence of subtyping, it could be that the different branch targets // have different types. Here we rely on the assumption that the value // representations (e.g. Stk value types) of all branch target types are the // same, in the baseline compiler. Notably, this means that all Ref types // should be represented the same. if (!iter_.readBrTable(&depths, &defaultDepth, &branchParams, &unused_values, &unused_index)) { return false; } if (deadCode_) { return true; } // Don't use param registers for rc needIntegerResultRegisters(branchParams); // Table switch value always on top. RegI32 rc = popI32(); freeIntegerResultRegisters(branchParams); StackHeight resultsBase(0); if (!topBranchParams(branchParams, &resultsBase)) { return false; } Label dispatchCode; masm.branch32(Assembler::Below, rc, Imm32(depths.length()), &dispatchCode); // This is the out-of-range stub. rc is dead here but we don't need it. shuffleStackResultsBeforeBranch( resultsBase, controlItem(defaultDepth).stackHeight, branchParams); controlItem(defaultDepth).bceSafeOnExit &= bceSafe_; masm.jump(&controlItem(defaultDepth).label); // Emit stubs. rc is dead in all of these but we don't need it. // // The labels in the vector are in the TempAllocator and will // be freed by and by. // // TODO / OPTIMIZE (Bug 1316804): Branch directly to the case code if we // can, don't emit an intermediate stub. LabelVector stubs; if (!stubs.reserve(depths.length())) { return false; } for (uint32_t depth : depths) { stubs.infallibleEmplaceBack(NonAssertingLabel()); masm.bind(&stubs.back()); shuffleStackResultsBeforeBranch(resultsBase, controlItem(depth).stackHeight, branchParams); controlItem(depth).bceSafeOnExit &= bceSafe_; masm.jump(&controlItem(depth).label); } // Emit table. Label theTable; jumpTable(stubs, &theTable); // Emit indirect jump. rc is live here. tableSwitch(&theTable, rc, &dispatchCode); deadCode_ = true; // Clean up. freeI32(rc); popValueStackBy(branchParams.length()); return true; } #ifdef ENABLE_WASM_EXCEPTIONS bool BaseCompiler::emitTry() { ResultType params; if (!iter_.readTry(¶ms)) { return false; } if (deadCode_) { return true; } MOZ_CRASH("NYI"); } bool BaseCompiler::emitCatch() { LabelKind kind; uint32_t eventIndex; ResultType paramType, resultType; NothingVector unused_tryValues; if (!iter_.readCatch(&kind, &eventIndex, ¶mType, &resultType, &unused_tryValues)) { return false; } if (deadCode_) { return true; } MOZ_CRASH("NYI"); } bool BaseCompiler::emitThrow() { uint32_t exnIndex; NothingVector unused_argValues; if (!iter_.readThrow(&exnIndex, &unused_argValues)) { return false; } if (deadCode_) { return true; } MOZ_CRASH("NYI"); } #endif bool BaseCompiler::emitDrop() { if (!iter_.readDrop()) { return false; } if (deadCode_) { return true; } dropValue(); return true; } void BaseCompiler::doReturn(ContinuationKind kind) { if (deadCode_) { return; } StackHeight height = controlOutermost().stackHeight; ResultType type = ResultType::Vector(funcType().results()); popBlockResults(type, height, kind); masm.jump(&returnLabel_); freeResultRegisters(type); } bool BaseCompiler::emitReturn() { NothingVector unused_values; if (!iter_.readReturn(&unused_values)) { return false; } if (deadCode_) { return true; } doReturn(ContinuationKind::Jump); deadCode_ = true; return true; } bool BaseCompiler::emitCallArgs(const ValTypeVector& argTypes, const StackResultsLoc& results, FunctionCall* baselineCall, CalleeOnStack calleeOnStack) { MOZ_ASSERT(!deadCode_); ArgTypeVector args(argTypes, results.stackResults()); uint32_t naturalArgCount = argTypes.length(); uint32_t abiArgCount = args.lengthWithStackResults(); startCallArgs(StackArgAreaSizeUnaligned(args), baselineCall); // Args are deeper on the stack than the stack result area, if any. size_t argsDepth = results.count(); // They're deeper than the callee too, for callIndirect. if (calleeOnStack == CalleeOnStack::True) { argsDepth++; } for (size_t i = 0; i < abiArgCount; ++i) { if (args.isNaturalArg(i)) { size_t naturalIndex = args.naturalIndex(i); size_t stackIndex = naturalArgCount - 1 - naturalIndex + argsDepth; passArg(argTypes[naturalIndex], peek(stackIndex), baselineCall); } else { // The synthetic stack result area pointer. ABIArg argLoc = baselineCall->abi.next(MIRType::Pointer); if (argLoc.kind() == ABIArg::Stack) { ScratchPtr scratch(*this); fr.computeOutgoingStackResultAreaPtr(results, scratch); masm.storePtr(scratch, Address(masm.getStackPointer(), argLoc.offsetFromArgBase())); } else { fr.computeOutgoingStackResultAreaPtr(results, RegPtr(argLoc.gpr())); } } } fr.loadTlsPtr(WasmTlsReg); return true; } void BaseCompiler::pushReturnValueOfCall(const FunctionCall& call, MIRType type) { switch (type) { case MIRType::Int32: { RegI32 rv = captureReturnedI32(); pushI32(rv); break; } case MIRType::Int64: { RegI64 rv = captureReturnedI64(); pushI64(rv); break; } case MIRType::Float32: { RegF32 rv = captureReturnedF32(call); pushF32(rv); break; } case MIRType::Double: { RegF64 rv = captureReturnedF64(call); pushF64(rv); break; } #ifdef ENABLE_WASM_SIMD case MIRType::Simd128: { RegV128 rv = captureReturnedV128(call); pushV128(rv); break; } #endif case MIRType::RefOrNull: { RegPtr rv = captureReturnedRef(); pushRef(rv); break; } default: // In particular, passing |type| as MIRType::Void or MIRType::Pointer to // this function is an error. MOZ_CRASH("Function return type"); } } bool BaseCompiler::pushStackResultsForCall(const ResultType& type, RegPtr temp, StackResultsLoc* loc) { if (!ABIResultIter::HasStackResults(type)) { return true; } // This method is the only one in the class that can increase stk_.length() by // an unbounded amount, so it's the only one that requires an allocation. // (The general case is handled in emitBody.) if (!stk_.reserve(stk_.length() + type.length())) { return false; } // Measure stack results. ABIResultIter i(type); size_t count = 0; for (; !i.done(); i.next()) { if (i.cur().onStack()) { count++; } } uint32_t bytes = i.stackBytesConsumedSoFar(); // Reserve space for the stack results. StackHeight resultsBase = fr.stackHeight(); uint32_t height = fr.prepareStackResultArea(resultsBase, bytes); // Push Stk values onto the value stack, and zero out Ref values. for (i.switchToPrev(); !i.done(); i.prev()) { const ABIResult& result = i.cur(); if (result.onStack()) { Stk v = captureStackResult(result, resultsBase, bytes); push(v); if (v.kind() == Stk::MemRef) { stackMapGenerator_.memRefsOnStk++; fr.storeImmediatePtrToStack(intptr_t(0), v.offs(), temp); } } } *loc = StackResultsLoc(bytes, count, height); return true; } // After a call, some results may be written to the stack result locations that // are pushed on the machine stack after any stack args. If there are stack // args and stack results, these results need to be shuffled down, as the args // are "consumed" by the call. void BaseCompiler::popStackResultsAfterCall(const StackResultsLoc& results, uint32_t stackArgBytes) { if (results.bytes() != 0) { popValueStackBy(results.count()); if (stackArgBytes != 0) { uint32_t srcHeight = results.height(); MOZ_ASSERT(srcHeight >= stackArgBytes + results.bytes()); uint32_t destHeight = srcHeight - stackArgBytes; fr.shuffleStackResultsTowardFP(srcHeight, destHeight, results.bytes(), ABINonArgReturnVolatileReg); } } } // For now, always sync() at the beginning of the call to easily save live // values. // // TODO / OPTIMIZE (Bug 1316806): We may be able to avoid a full sync(), since // all we want is to save live registers that won't be saved by the callee or // that we need for outgoing args - we don't need to sync the locals. We can // just push the necessary registers, it'll be like a lightweight sync. // // Even some of the pushing may be unnecessary if the registers will be consumed // by the call, because then what we want is parallel assignment to the argument // registers or onto the stack for outgoing arguments. A sync() is just // simpler. bool BaseCompiler::emitCall() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t funcIndex; NothingVector args_; if (!iter_.readCall(&funcIndex, &args_)) { return false; } if (deadCode_) { return true; } sync(); const FuncType& funcType = *moduleEnv_.funcs[funcIndex].type; bool import = moduleEnv_.funcIsImport(funcIndex); uint32_t numArgs = funcType.args().length(); size_t stackArgBytes = stackConsumed(numArgs); ResultType resultType(ResultType::Vector(funcType.results())); StackResultsLoc results; if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) { return false; } FunctionCall baselineCall(lineOrBytecode); beginCall(baselineCall, UseABI::Wasm, import ? InterModule::True : InterModule::False); if (!emitCallArgs(funcType.args(), results, &baselineCall, CalleeOnStack::False)) { return false; } CodeOffset raOffset; if (import) { raOffset = callImport(moduleEnv_.funcImportGlobalDataOffsets[funcIndex], baselineCall); } else { raOffset = callDefinition(funcIndex, baselineCall); } if (!createStackMap("emitCall", raOffset)) { return false; } popStackResultsAfterCall(results, stackArgBytes); endCall(baselineCall, stackArgBytes); popValueStackBy(numArgs); captureCallResultRegisters(resultType); return pushCallResults(baselineCall, resultType, results); } bool BaseCompiler::emitCallIndirect() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t funcTypeIndex; uint32_t tableIndex; Nothing callee_; NothingVector args_; if (!iter_.readCallIndirect(&funcTypeIndex, &tableIndex, &callee_, &args_)) { return false; } if (deadCode_) { return true; } sync(); const FuncType& funcType = moduleEnv_.types[funcTypeIndex].funcType(); // Stack: ... arg1 .. argn callee uint32_t numArgs = funcType.args().length() + 1; size_t stackArgBytes = stackConsumed(numArgs); ResultType resultType(ResultType::Vector(funcType.results())); StackResultsLoc results; if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) { return false; } FunctionCall baselineCall(lineOrBytecode); beginCall(baselineCall, UseABI::Wasm, InterModule::True); if (!emitCallArgs(funcType.args(), results, &baselineCall, CalleeOnStack::True)) { return false; } const Stk& callee = peek(results.count()); CodeOffset raOffset = callIndirect(funcTypeIndex, tableIndex, callee, baselineCall); if (!createStackMap("emitCallIndirect", raOffset)) { return false; } popStackResultsAfterCall(results, stackArgBytes); endCall(baselineCall, stackArgBytes); popValueStackBy(numArgs); captureCallResultRegisters(resultType); return pushCallResults(baselineCall, resultType, results); } void BaseCompiler::emitRound(RoundingMode roundingMode, ValType operandType) { if (operandType == ValType::F32) { RegF32 f0 = popF32(); roundF32(roundingMode, f0); pushF32(f0); } else if (operandType == ValType::F64) { RegF64 f0 = popF64(); roundF64(roundingMode, f0); pushF64(f0); } else { MOZ_CRASH("unexpected type"); } } bool BaseCompiler::emitUnaryMathBuiltinCall(SymbolicAddress callee, ValType operandType) { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing operand_; if (!iter_.readUnary(operandType, &operand_)) { return false; } if (deadCode_) { return true; } RoundingMode roundingMode; if (IsRoundingFunction(callee, &roundingMode) && supportsRoundInstruction(roundingMode)) { emitRound(roundingMode, operandType); return true; } sync(); ValTypeVector& signature = operandType == ValType::F32 ? SigF_ : SigD_; ValType retType = operandType; uint32_t numArgs = signature.length(); size_t stackSpace = stackConsumed(numArgs); StackResultsLoc noStackResults; FunctionCall baselineCall(lineOrBytecode); beginCall(baselineCall, UseABI::Builtin, InterModule::False); if (!emitCallArgs(signature, noStackResults, &baselineCall, CalleeOnStack::False)) { return false; } CodeOffset raOffset = builtinCall(callee, baselineCall); if (!createStackMap("emitUnaryMathBuiltin[..]", raOffset)) { return false; } endCall(baselineCall, stackSpace); popValueStackBy(numArgs); pushReturnValueOfCall(baselineCall, ToMIRType(retType)); return true; } #ifdef RABALDR_INT_DIV_I64_CALLOUT bool BaseCompiler::emitDivOrModI64BuiltinCall(SymbolicAddress callee, ValType operandType) { MOZ_ASSERT(operandType == ValType::I64); MOZ_ASSERT(!deadCode_); sync(); needI64(specific_.abiReturnRegI64); RegI64 rhs = popI64(); RegI64 srcDest = popI64ToSpecific(specific_.abiReturnRegI64); Label done; checkDivideByZeroI64(rhs); if (callee == SymbolicAddress::DivI64) { checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false)); } else if (callee == SymbolicAddress::ModI64) { checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true)); } masm.setupWasmABICall(); masm.passABIArg(srcDest.high); masm.passABIArg(srcDest.low); masm.passABIArg(rhs.high); masm.passABIArg(rhs.low); CodeOffset raOffset = masm.callWithABI(bytecodeOffset(), callee, mozilla::Some(fr.getTlsPtrOffset())); if (!createStackMap("emitDivOrModI64Bui[..]", raOffset)) { return false; } masm.bind(&done); freeI64(rhs); pushI64(srcDest); return true; } #endif // RABALDR_INT_DIV_I64_CALLOUT #ifdef RABALDR_I64_TO_FLOAT_CALLOUT bool BaseCompiler::emitConvertInt64ToFloatingCallout(SymbolicAddress callee, ValType operandType, ValType resultType) { sync(); RegI64 input = popI64(); FunctionCall call(0); masm.setupWasmABICall(); # ifdef JS_PUNBOX64 MOZ_CRASH("BaseCompiler platform hook: emitConvertInt64ToFloatingCallout"); # else masm.passABIArg(input.high); masm.passABIArg(input.low); # endif CodeOffset raOffset = masm.callWithABI( bytecodeOffset(), callee, mozilla::Some(fr.getTlsPtrOffset()), resultType == ValType::F32 ? MoveOp::FLOAT32 : MoveOp::DOUBLE); if (!createStackMap("emitConvertInt64To[..]", raOffset)) { return false; } freeI64(input); if (resultType == ValType::F32) { pushF32(captureReturnedF32(call)); } else { pushF64(captureReturnedF64(call)); } return true; } #endif // RABALDR_I64_TO_FLOAT_CALLOUT #ifdef RABALDR_FLOAT_TO_I64_CALLOUT // `Callee` always takes a double, so a float32 input must be converted. bool BaseCompiler::emitConvertFloatingToInt64Callout(SymbolicAddress callee, ValType operandType, ValType resultType) { RegF64 doubleInput; if (operandType == ValType::F32) { doubleInput = needF64(); RegF32 input = popF32(); masm.convertFloat32ToDouble(input, doubleInput); freeF32(input); } else { doubleInput = popF64(); } // We may need the value after the call for the ool check. RegF64 otherReg = needF64(); moveF64(doubleInput, otherReg); pushF64(otherReg); sync(); FunctionCall call(0); masm.setupWasmABICall(); masm.passABIArg(doubleInput, MoveOp::DOUBLE); CodeOffset raOffset = masm.callWithABI(bytecodeOffset(), callee, mozilla::Some(fr.getTlsPtrOffset())); if (!createStackMap("emitConvertFloatin[..]", raOffset)) { return false; } freeF64(doubleInput); RegI64 rv = captureReturnedI64(); RegF64 inputVal = popF64(); TruncFlags flags = 0; if (callee == SymbolicAddress::TruncateDoubleToUint64) { flags |= TRUNC_UNSIGNED; } if (callee == SymbolicAddress::SaturatingTruncateDoubleToInt64 || callee == SymbolicAddress::SaturatingTruncateDoubleToUint64) { flags |= TRUNC_SATURATING; } // If we're saturating, the callout will always produce the final result // value. Otherwise, the callout value will return 0x8000000000000000 // and we need to produce traps. OutOfLineCode* ool = nullptr; if (!(flags & TRUNC_SATURATING)) { // The OOL check just succeeds or fails, it does not generate a value. ool = addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64( AnyReg(inputVal), rv, flags, bytecodeOffset())); if (!ool) { return false; } masm.branch64(Assembler::Equal, rv, Imm64(0x8000000000000000), ool->entry()); masm.bind(ool->rejoin()); } pushI64(rv); freeF64(inputVal); return true; } #endif // RABALDR_FLOAT_TO_I64_CALLOUT bool BaseCompiler::emitGetLocal() { uint32_t slot; if (!iter_.readGetLocal(locals_, &slot)) { return false; } if (deadCode_) { return true; } // Local loads are pushed unresolved, ie, they may be deferred // until needed, until they may be affected by a store, or until a // sync. This is intended to reduce register pressure. switch (locals_[slot].kind()) { case ValType::I32: pushLocalI32(slot); break; case ValType::I64: pushLocalI64(slot); break; case ValType::V128: #ifdef ENABLE_WASM_SIMD pushLocalV128(slot); break; #else MOZ_CRASH("No SIMD support"); #endif case ValType::F64: pushLocalF64(slot); break; case ValType::F32: pushLocalF32(slot); break; case ValType::Ref: pushLocalRef(slot); break; } return true; } template bool BaseCompiler::emitSetOrTeeLocal(uint32_t slot) { if (deadCode_) { return true; } bceLocalIsUpdated(slot); switch (locals_[slot].kind()) { case ValType::I32: { RegI32 rv = popI32(); syncLocal(slot); fr.storeLocalI32(rv, localFromSlot(slot, MIRType::Int32)); if (isSetLocal) { freeI32(rv); } else { pushI32(rv); } break; } case ValType::I64: { RegI64 rv = popI64(); syncLocal(slot); fr.storeLocalI64(rv, localFromSlot(slot, MIRType::Int64)); if (isSetLocal) { freeI64(rv); } else { pushI64(rv); } break; } case ValType::F64: { RegF64 rv = popF64(); syncLocal(slot); fr.storeLocalF64(rv, localFromSlot(slot, MIRType::Double)); if (isSetLocal) { freeF64(rv); } else { pushF64(rv); } break; } case ValType::F32: { RegF32 rv = popF32(); syncLocal(slot); fr.storeLocalF32(rv, localFromSlot(slot, MIRType::Float32)); if (isSetLocal) { freeF32(rv); } else { pushF32(rv); } break; } case ValType::V128: { #ifdef ENABLE_WASM_SIMD RegV128 rv = popV128(); syncLocal(slot); fr.storeLocalV128(rv, localFromSlot(slot, MIRType::Simd128)); if (isSetLocal) { freeV128(rv); } else { pushV128(rv); } break; #else MOZ_CRASH("No SIMD support"); #endif } case ValType::Ref: { RegPtr rv = popRef(); syncLocal(slot); fr.storeLocalPtr(rv, localFromSlot(slot, MIRType::RefOrNull)); if (isSetLocal) { freeRef(rv); } else { pushRef(rv); } break; } } return true; } bool BaseCompiler::emitSetLocal() { uint32_t slot; Nothing unused_value; if (!iter_.readSetLocal(locals_, &slot, &unused_value)) { return false; } return emitSetOrTeeLocal(slot); } bool BaseCompiler::emitTeeLocal() { uint32_t slot; Nothing unused_value; if (!iter_.readTeeLocal(locals_, &slot, &unused_value)) { return false; } return emitSetOrTeeLocal(slot); } bool BaseCompiler::emitGetGlobal() { uint32_t id; if (!iter_.readGetGlobal(&id)) { return false; } if (deadCode_) { return true; } const GlobalDesc& global = moduleEnv_.globals[id]; if (global.isConstant()) { LitVal value = global.constantValue(); switch (value.type().kind()) { case ValType::I32: pushI32(value.i32()); break; case ValType::I64: pushI64(value.i64()); break; case ValType::F32: pushF32(value.f32()); break; case ValType::F64: pushF64(value.f64()); break; case ValType::Ref: pushRef(intptr_t(value.ref().forCompiledCode())); break; #ifdef ENABLE_WASM_SIMD case ValType::V128: pushV128(value.v128()); break; #endif default: MOZ_CRASH("Global constant type"); } return true; } switch (global.type().kind()) { case ValType::I32: { RegI32 rv = needI32(); ScratchI32 tmp(*this); masm.load32(addressOfGlobalVar(global, tmp), rv); pushI32(rv); break; } case ValType::I64: { RegI64 rv = needI64(); ScratchI32 tmp(*this); masm.load64(addressOfGlobalVar(global, tmp), rv); pushI64(rv); break; } case ValType::F32: { RegF32 rv = needF32(); ScratchI32 tmp(*this); masm.loadFloat32(addressOfGlobalVar(global, tmp), rv); pushF32(rv); break; } case ValType::F64: { RegF64 rv = needF64(); ScratchI32 tmp(*this); masm.loadDouble(addressOfGlobalVar(global, tmp), rv); pushF64(rv); break; } case ValType::Ref: { RegPtr rv = needRef(); ScratchI32 tmp(*this); masm.loadPtr(addressOfGlobalVar(global, tmp), rv); pushRef(rv); break; } #ifdef ENABLE_WASM_SIMD case ValType::V128: { RegV128 rv = needV128(); ScratchI32 tmp(*this); masm.loadUnalignedSimd128(addressOfGlobalVar(global, tmp), rv); pushV128(rv); break; } #endif default: MOZ_CRASH("Global variable type"); break; } return true; } bool BaseCompiler::emitSetGlobal() { uint32_t id; Nothing unused_value; if (!iter_.readSetGlobal(&id, &unused_value)) { return false; } if (deadCode_) { return true; } const GlobalDesc& global = moduleEnv_.globals[id]; switch (global.type().kind()) { case ValType::I32: { RegI32 rv = popI32(); ScratchI32 tmp(*this); masm.store32(rv, addressOfGlobalVar(global, tmp)); freeI32(rv); break; } case ValType::I64: { RegI64 rv = popI64(); ScratchI32 tmp(*this); masm.store64(rv, addressOfGlobalVar(global, tmp)); freeI64(rv); break; } case ValType::F32: { RegF32 rv = popF32(); ScratchI32 tmp(*this); masm.storeFloat32(rv, addressOfGlobalVar(global, tmp)); freeF32(rv); break; } case ValType::F64: { RegF64 rv = popF64(); ScratchI32 tmp(*this); masm.storeDouble(rv, addressOfGlobalVar(global, tmp)); freeF64(rv); break; } case ValType::Ref: { RegPtr valueAddr(PreBarrierReg); needRef(valueAddr); { ScratchI32 tmp(*this); masm.computeEffectiveAddress(addressOfGlobalVar(global, tmp), valueAddr); } RegPtr rv = popRef(); // emitBarrieredStore consumes valueAddr if (!emitBarrieredStore(Nothing(), valueAddr, rv)) { return false; } freeRef(rv); break; } #ifdef ENABLE_WASM_SIMD case ValType::V128: { RegV128 rv = popV128(); ScratchI32 tmp(*this); masm.storeUnalignedSimd128(rv, addressOfGlobalVar(global, tmp)); freeV128(rv); break; } #endif default: MOZ_CRASH("Global variable type"); break; } return true; } // Bounds check elimination. // // We perform BCE on two kinds of address expressions: on constant heap pointers // that are known to be in the heap or will be handled by the out-of-bounds trap // handler; and on local variables that have been checked in dominating code // without being updated since. // // For an access through a constant heap pointer + an offset we can eliminate // the bounds check if the sum of the address and offset is below the sum of the // minimum memory length and the offset guard length. // // For an access through a local variable + an offset we can eliminate the // bounds check if the local variable has already been checked and has not been // updated since, and the offset is less than the guard limit. // // To track locals for which we can eliminate checks we use a bit vector // bceSafe_ that has a bit set for those locals whose bounds have been checked // and which have not subsequently been set. Initially this vector is zero. // // In straight-line code a bit is set when we perform a bounds check on an // access via the local and is reset when the variable is updated. // // In control flow, the bit vector is manipulated as follows. Each ControlItem // has a value bceSafeOnEntry, which is the value of bceSafe_ on entry to the // item, and a value bceSafeOnExit, which is initially ~0. On a branch (br, // brIf, brTable), we always AND the branch target's bceSafeOnExit with the // value of bceSafe_ at the branch point. On exiting an item by falling out of // it, provided we're not in dead code, we AND the current value of bceSafe_ // into the item's bceSafeOnExit. Additional processing depends on the item // type: // // - After a block, set bceSafe_ to the block's bceSafeOnExit. // // - On loop entry, after pushing the ControlItem, set bceSafe_ to zero; the // back edges would otherwise require us to iterate to a fixedpoint. // // - After a loop, the bceSafe_ is left unchanged, because only fallthrough // control flow will reach that point and the bceSafe_ value represents the // correct state of the fallthrough path. // // - Set bceSafe_ to the ControlItem's bceSafeOnEntry at both the 'then' branch // and the 'else' branch. // // - After an if-then-else, set bceSafe_ to the if-then-else's bceSafeOnExit. // // - After an if-then, set bceSafe_ to the if-then's bceSafeOnExit AND'ed with // the if-then's bceSafeOnEntry. // // Finally, when the debugger allows locals to be mutated we must disable BCE // for references via a local, by returning immediately from bceCheckLocal if // compilerEnv_.debugEnabled() is true. // // // Alignment check elimination. // // Alignment checks for atomic operations can be omitted if the pointer is a // constant and the pointer + offset is aligned. Alignment checking that can't // be omitted can still be simplified by checking only the pointer if the offset // is aligned. // // (In addition, alignment checking of the pointer can be omitted if the pointer // has been checked in dominating code, but we don't do that yet.) // TODO / OPTIMIZE (bug 1329576): There are opportunities to generate better // code by not moving a constant address with a zero offset into a register. RegI32 BaseCompiler::popMemoryAccess(MemoryAccessDesc* access, AccessCheck* check) { check->onlyPointerAlignment = (access->offset() & (access->byteSize() - 1)) == 0; int32_t addrTemp; if (popConstI32(&addrTemp)) { uint32_t addr = addrTemp; uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled()); uint64_t ea = uint64_t(addr) + uint64_t(access->offset()); uint64_t limit = moduleEnv_.minMemoryLength + offsetGuardLimit; check->omitBoundsCheck = ea < limit; check->omitAlignmentCheck = (ea & (access->byteSize() - 1)) == 0; // Fold the offset into the pointer if we can, as this is always // beneficial. if (ea <= UINT32_MAX) { addr = uint32_t(ea); access->clearOffset(); } RegI32 r = needI32(); moveImm32(int32_t(addr), r); return r; } uint32_t local; if (peekLocalI32(&local)) { bceCheckLocal(access, check, local); } return popI32(); } void BaseCompiler::pushHeapBase() { #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) || \ defined(JS_CODEGEN_MIPS64) RegI64 heapBase = needI64(); moveI64(RegI64(Register64(HeapReg)), heapBase); pushI64(heapBase); #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32) RegI32 heapBase = needI32(); moveI32(RegI32(HeapReg), heapBase); pushI32(heapBase); #elif defined(JS_CODEGEN_X86) RegI32 heapBase = needI32(); fr.loadTlsPtr(heapBase); masm.loadPtr(Address(heapBase, offsetof(TlsData, memoryBase)), heapBase); pushI32(heapBase); #else MOZ_CRASH("BaseCompiler platform hook: pushHeapBase"); #endif } RegI32 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check) { RegI32 tls; if (needTlsForAccess(check)) { tls = needI32(); fr.loadTlsPtr(tls); } return tls; } RegI32 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check, RegI32 specific) { if (needTlsForAccess(check)) { fr.loadTlsPtr(specific); return specific; } return RegI32::Invalid(); } bool BaseCompiler::loadCommon(MemoryAccessDesc* access, AccessCheck check, ValType type) { RegI32 tls, temp1, temp2, temp3; needLoadTemps(*access, &temp1, &temp2, &temp3); switch (type.kind()) { case ValType::I32: { RegI32 rp = popMemoryAccess(access, &check); #ifdef JS_CODEGEN_ARM RegI32 rv = IsUnaligned(*access) ? needI32() : rp; #else RegI32 rv = rp; #endif tls = maybeLoadTlsForAccess(check); if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) { return false; } pushI32(rv); if (rp != rv) { freeI32(rp); } break; } case ValType::I64: { RegI64 rv; RegI32 rp; #ifdef JS_CODEGEN_X86 rv = specific_.abiReturnRegI64; needI64(rv); rp = popMemoryAccess(access, &check); #else rp = popMemoryAccess(access, &check); rv = needI64(); #endif tls = maybeLoadTlsForAccess(check); if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) { return false; } pushI64(rv); freeI32(rp); break; } case ValType::F32: { RegI32 rp = popMemoryAccess(access, &check); RegF32 rv = needF32(); tls = maybeLoadTlsForAccess(check); if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) { return false; } pushF32(rv); freeI32(rp); break; } case ValType::F64: { RegI32 rp = popMemoryAccess(access, &check); RegF64 rv = needF64(); tls = maybeLoadTlsForAccess(check); if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) { return false; } pushF64(rv); freeI32(rp); break; } #ifdef ENABLE_WASM_SIMD case ValType::V128: { RegI32 rp = popMemoryAccess(access, &check); RegV128 rv = needV128(); tls = maybeLoadTlsForAccess(check); if (!load(access, &check, tls, rp, AnyReg(rv), temp1, temp2, temp3)) { return false; } pushV128(rv); freeI32(rp); break; } #endif default: MOZ_CRASH("load type"); break; } maybeFreeI32(tls); maybeFreeI32(temp1); maybeFreeI32(temp2); maybeFreeI32(temp3); return true; } bool BaseCompiler::emitLoad(ValType type, Scalar::Type viewType) { LinearMemoryAddress addr; if (!iter_.readLoad(type, Scalar::byteSize(viewType), &addr)) { return false; } if (deadCode_) { return true; } MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset()); return loadCommon(&access, AccessCheck(), type); } bool BaseCompiler::storeCommon(MemoryAccessDesc* access, AccessCheck check, ValType resultType) { RegI32 tls; RegI32 temp = needStoreTemp(*access, resultType); switch (resultType.kind()) { case ValType::I32: { RegI32 rv = popI32(); RegI32 rp = popMemoryAccess(access, &check); tls = maybeLoadTlsForAccess(check); if (!store(access, &check, tls, rp, AnyReg(rv), temp)) { return false; } freeI32(rp); freeI32(rv); break; } case ValType::I64: { RegI64 rv = popI64(); RegI32 rp = popMemoryAccess(access, &check); tls = maybeLoadTlsForAccess(check); if (!store(access, &check, tls, rp, AnyReg(rv), temp)) { return false; } freeI32(rp); freeI64(rv); break; } case ValType::F32: { RegF32 rv = popF32(); RegI32 rp = popMemoryAccess(access, &check); tls = maybeLoadTlsForAccess(check); if (!store(access, &check, tls, rp, AnyReg(rv), temp)) { return false; } freeI32(rp); freeF32(rv); break; } case ValType::F64: { RegF64 rv = popF64(); RegI32 rp = popMemoryAccess(access, &check); tls = maybeLoadTlsForAccess(check); if (!store(access, &check, tls, rp, AnyReg(rv), temp)) { return false; } freeI32(rp); freeF64(rv); break; } #ifdef ENABLE_WASM_SIMD case ValType::V128: { RegV128 rv = popV128(); RegI32 rp = popMemoryAccess(access, &check); tls = maybeLoadTlsForAccess(check); if (!store(access, &check, tls, rp, AnyReg(rv), temp)) { return false; } freeI32(rp); freeV128(rv); break; } #endif default: MOZ_CRASH("store type"); break; } maybeFreeI32(tls); maybeFreeI32(temp); return true; } bool BaseCompiler::emitStore(ValType resultType, Scalar::Type viewType) { LinearMemoryAddress addr; Nothing unused_value; if (!iter_.readStore(resultType, Scalar::byteSize(viewType), &addr, &unused_value)) { return false; } if (deadCode_) { return true; } MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset()); return storeCommon(&access, AccessCheck(), resultType); } bool BaseCompiler::emitSelect(bool typed) { StackType type; Nothing unused_trueValue; Nothing unused_falseValue; Nothing unused_condition; if (!iter_.readSelect(typed, &type, &unused_trueValue, &unused_falseValue, &unused_condition)) { return false; } if (deadCode_) { resetLatentOp(); return true; } // I32 condition on top, then false, then true. Label done; BranchState b(&done); emitBranchSetup(&b); switch (type.valType().kind()) { case ValType::I32: { RegI32 r, rs; pop2xI32(&r, &rs); if (!emitBranchPerform(&b)) { return false; } moveI32(rs, r); masm.bind(&done); freeI32(rs); pushI32(r); break; } case ValType::I64: { #ifdef JS_CODEGEN_X86 // There may be as many as four Int64 values in registers at a time: two // for the latent branch operands, and two for the true/false values we // normally pop before executing the branch. On x86 this is one value // too many, so we need to generate more complicated code here, and for // simplicity's sake we do so even if the branch operands are not Int64. // However, the resulting control flow diamond is complicated since the // arms of the diamond will have to stay synchronized with respect to // their evaluation stack and regalloc state. To simplify further, we // use a double branch and a temporary boolean value for now. RegI32 temp = needI32(); moveImm32(0, temp); if (!emitBranchPerform(&b)) { return false; } moveImm32(1, temp); masm.bind(&done); Label trueValue; RegI64 r, rs; pop2xI64(&r, &rs); masm.branch32(Assembler::Equal, temp, Imm32(0), &trueValue); moveI64(rs, r); masm.bind(&trueValue); freeI32(temp); freeI64(rs); pushI64(r); #else RegI64 r, rs; pop2xI64(&r, &rs); if (!emitBranchPerform(&b)) { return false; } moveI64(rs, r); masm.bind(&done); freeI64(rs); pushI64(r); #endif break; } case ValType::F32: { RegF32 r, rs; pop2xF32(&r, &rs); if (!emitBranchPerform(&b)) { return false; } moveF32(rs, r); masm.bind(&done); freeF32(rs); pushF32(r); break; } case ValType::F64: { RegF64 r, rs; pop2xF64(&r, &rs); if (!emitBranchPerform(&b)) { return false; } moveF64(rs, r); masm.bind(&done); freeF64(rs); pushF64(r); break; } #ifdef ENABLE_WASM_SIMD case ValType::V128: { RegV128 r, rs; pop2xV128(&r, &rs); if (!emitBranchPerform(&b)) { return false; } moveV128(rs, r); masm.bind(&done); freeV128(rs); pushV128(r); break; } #endif case ValType::Ref: { RegPtr r, rs; pop2xRef(&r, &rs); if (!emitBranchPerform(&b)) { return false; } moveRef(rs, r); masm.bind(&done); freeRef(rs); pushRef(r); break; } default: { MOZ_CRASH("select type"); } } return true; } void BaseCompiler::emitCompareI32(Assembler::Condition compareOp, ValType compareType) { MOZ_ASSERT(compareType == ValType::I32); if (sniffConditionalControlCmp(compareOp, compareType)) { return; } int32_t c; if (popConstI32(&c)) { RegI32 r = popI32(); masm.cmp32Set(compareOp, r, Imm32(c), r); pushI32(r); } else { RegI32 r, rs; pop2xI32(&r, &rs); masm.cmp32Set(compareOp, r, rs, r); freeI32(rs); pushI32(r); } } void BaseCompiler::emitCompareI64(Assembler::Condition compareOp, ValType compareType) { MOZ_ASSERT(compareType == ValType::I64); if (sniffConditionalControlCmp(compareOp, compareType)) { return; } RegI64 rs0, rs1; pop2xI64(&rs0, &rs1); RegI32 rd(fromI64(rs0)); cmp64Set(compareOp, rs0, rs1, rd); freeI64(rs1); freeI64Except(rs0, rd); pushI32(rd); } void BaseCompiler::emitCompareF32(Assembler::DoubleCondition compareOp, ValType compareType) { MOZ_ASSERT(compareType == ValType::F32); if (sniffConditionalControlCmp(compareOp, compareType)) { return; } Label across; RegF32 rs0, rs1; pop2xF32(&rs0, &rs1); RegI32 rd = needI32(); moveImm32(1, rd); masm.branchFloat(compareOp, rs0, rs1, &across); moveImm32(0, rd); masm.bind(&across); freeF32(rs0); freeF32(rs1); pushI32(rd); } void BaseCompiler::emitCompareF64(Assembler::DoubleCondition compareOp, ValType compareType) { MOZ_ASSERT(compareType == ValType::F64); if (sniffConditionalControlCmp(compareOp, compareType)) { return; } Label across; RegF64 rs0, rs1; pop2xF64(&rs0, &rs1); RegI32 rd = needI32(); moveImm32(1, rd); masm.branchDouble(compareOp, rs0, rs1, &across); moveImm32(0, rd); masm.bind(&across); freeF64(rs0); freeF64(rs1); pushI32(rd); } void BaseCompiler::emitCompareRef(Assembler::Condition compareOp, ValType compareType) { MOZ_ASSERT(!sniffConditionalControlCmp(compareOp, compareType)); RegPtr rs1, rs2; pop2xRef(&rs1, &rs2); RegI32 rd = needI32(); masm.cmpPtrSet(compareOp, rs1, rs2, rd); freeRef(rs1); freeRef(rs2); pushI32(rd); } bool BaseCompiler::emitInstanceCall(uint32_t lineOrBytecode, const SymbolicAddressSignature& builtin, bool pushReturnedValue /*=true*/) { const MIRType* argTypes = builtin.argTypes; MOZ_ASSERT(argTypes[0] == MIRType::Pointer); sync(); uint32_t numNonInstanceArgs = builtin.numArgs - 1 /* instance */; size_t stackSpace = stackConsumed(numNonInstanceArgs); FunctionCall baselineCall(lineOrBytecode); beginCall(baselineCall, UseABI::System, InterModule::True); ABIArg instanceArg = reservePointerArgument(&baselineCall); startCallArgs(StackArgAreaSizeUnaligned(builtin), &baselineCall); for (uint32_t i = 1; i < builtin.numArgs; i++) { ValType t; switch (argTypes[i]) { case MIRType::Int32: t = ValType::I32; break; case MIRType::Int64: t = ValType::I64; break; case MIRType::RefOrNull: t = RefType::extern_(); break; case MIRType::Pointer: // Instance function args can now be uninterpreted pointers (eg, for // the cases PostBarrier and PostBarrierFilter) so we simply treat // them like the equivalently sized integer. t = sizeof(void*) == 4 ? ValType::I32 : ValType::I64; break; default: MOZ_CRASH("Unexpected type"); } passArg(t, peek(numNonInstanceArgs - i), &baselineCall); } CodeOffset raOffset = builtinInstanceMethodCall(builtin, instanceArg, baselineCall); if (!createStackMap("emitInstanceCall", raOffset)) { return false; } endCall(baselineCall, stackSpace); popValueStackBy(numNonInstanceArgs); // Note, many clients of emitInstanceCall currently assume that pushing the // result here does not destroy ReturnReg. // // Furthermore, clients assume that if builtin.retType != MIRType::None, the // callee will have returned a result and left it in ReturnReg for us to // find, and that that register will not be destroyed here (or above). if (pushReturnedValue) { // For the return type only, MIRType::None is used to indicate that the // call doesn't return a result, that is, returns a C/C++ "void". MOZ_ASSERT(builtin.retType != MIRType::None); pushReturnValueOfCall(baselineCall, builtin.retType); } return true; } bool BaseCompiler::emitMemoryGrow() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing arg; if (!iter_.readMemoryGrow(&arg)) { return false; } if (deadCode_) { return true; } return emitInstanceCall(lineOrBytecode, SASigMemoryGrow); } bool BaseCompiler::emitMemorySize() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); if (!iter_.readMemorySize()) { return false; } if (deadCode_) { return true; } return emitInstanceCall(lineOrBytecode, SASigMemorySize); } bool BaseCompiler::emitRefFunc() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t funcIndex; if (!iter_.readRefFunc(&funcIndex)) { return false; } if (deadCode_) { return true; } pushI32(funcIndex); return emitInstanceCall(lineOrBytecode, SASigRefFunc); } bool BaseCompiler::emitRefNull() { if (!iter_.readRefNull()) { return false; } if (deadCode_) { return true; } pushRef(NULLREF_VALUE); return true; } bool BaseCompiler::emitRefIsNull() { Nothing nothing; if (!iter_.readRefIsNull(¬hing)) { return false; } if (deadCode_) { return true; } RegPtr r = popRef(); RegI32 rd = narrowPtr(r); masm.cmpPtrSet(Assembler::Equal, r, ImmWord(NULLREF_VALUE), rd); pushI32(rd); return true; } #ifdef ENABLE_WASM_FUNCTION_REFERENCES bool BaseCompiler::emitRefAsNonNull() { Nothing nothing; if (!iter_.readRefAsNonNull(¬hing)) { return false; } if (deadCode_) { return true; } RegPtr rp = popRef(); Label ok; masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok); trap(Trap::NullPointerDereference); masm.bind(&ok); pushRef(rp); return true; } #endif bool BaseCompiler::emitAtomicCmpXchg(ValType type, Scalar::Type viewType) { LinearMemoryAddress addr; Nothing unused; if (!iter_.readAtomicCmpXchg(&addr, type, Scalar::byteSize(viewType), &unused, &unused)) { return false; } if (deadCode_) { return true; } MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(), Synchronization::Full()); if (Scalar::byteSize(viewType) <= 4) { PopAtomicCmpXchg32Regs regs(this, type, viewType); AccessCheck check; RegI32 rp = popMemoryAccess(&access, &check); RegI32 tls = maybeLoadTlsForAccess(check); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicCmpXchg32(access, memaddr); maybeFreeI32(tls); freeI32(rp); if (type == ValType::I64) { pushU32AsI64(regs.takeRd()); } else { pushI32(regs.takeRd()); } return true; } MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); PopAtomicCmpXchg64Regs regs(this); AccessCheck check; RegI32 rp = popMemoryAccess(&access, &check); #ifdef JS_CODEGEN_X86 ScratchEBX ebx(*this); RegI32 tls = maybeLoadTlsForAccess(check, ebx); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicCmpXchg64(access, memaddr, ebx); #else RegI32 tls = maybeLoadTlsForAccess(check); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicCmpXchg64(access, memaddr); maybeFreeI32(tls); #endif freeI32(rp); pushI64(regs.takeRd()); return true; } bool BaseCompiler::emitAtomicLoad(ValType type, Scalar::Type viewType) { LinearMemoryAddress addr; if (!iter_.readAtomicLoad(&addr, type, Scalar::byteSize(viewType))) { return false; } if (deadCode_) { return true; } MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(), Synchronization::Load()); if (Scalar::byteSize(viewType) <= sizeof(void*)) { return loadCommon(&access, AccessCheck(), type); } MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); #if defined(JS_64BIT) MOZ_CRASH("Should not happen"); #else PopAtomicLoad64Regs regs(this); AccessCheck check; RegI32 rp = popMemoryAccess(&access, &check); # ifdef JS_CODEGEN_X86 ScratchEBX ebx(*this); RegI32 tls = maybeLoadTlsForAccess(check, ebx); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicLoad64(access, memaddr, ebx); # else RegI32 tls = maybeLoadTlsForAccess(check); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicLoad64(access, memaddr); maybeFreeI32(tls); # endif freeI32(rp); pushI64(regs.takeRd()); return true; #endif // JS_64BIT } bool BaseCompiler::emitAtomicRMW(ValType type, Scalar::Type viewType, AtomicOp op) { LinearMemoryAddress addr; Nothing unused_value; if (!iter_.readAtomicRMW(&addr, type, Scalar::byteSize(viewType), &unused_value)) { return false; } if (deadCode_) { return true; } MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(), Synchronization::Full()); if (Scalar::byteSize(viewType) <= 4) { PopAtomicRMW32Regs regs(this, type, viewType, op); AccessCheck check; RegI32 rp = popMemoryAccess(&access, &check); RegI32 tls = maybeLoadTlsForAccess(check); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicRMW32(access, memaddr, op); maybeFreeI32(tls); freeI32(rp); if (type == ValType::I64) { pushU32AsI64(regs.takeRd()); } else { pushI32(regs.takeRd()); } return true; } MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); PopAtomicRMW64Regs regs(this, op); AccessCheck check; RegI32 rp = popMemoryAccess(&access, &check); #ifdef JS_CODEGEN_X86 ScratchEBX ebx(*this); RegI32 tls = maybeLoadTlsForAccess(check, ebx); fr.pushPtr(regs.valueHigh()); fr.pushPtr(regs.valueLow()); Address value(esp, 0); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicRMW64(access, memaddr, op, value, ebx); fr.popBytes(8); #else RegI32 tls = maybeLoadTlsForAccess(check); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicRMW64(access, memaddr, op); maybeFreeI32(tls); #endif freeI32(rp); pushI64(regs.takeRd()); return true; } bool BaseCompiler::emitAtomicStore(ValType type, Scalar::Type viewType) { LinearMemoryAddress addr; Nothing unused_value; if (!iter_.readAtomicStore(&addr, type, Scalar::byteSize(viewType), &unused_value)) { return false; } if (deadCode_) { return true; } MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(), Synchronization::Store()); if (Scalar::byteSize(viewType) <= sizeof(void*)) { return storeCommon(&access, AccessCheck(), type); } MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); #ifdef JS_64BIT MOZ_CRASH("Should not happen"); #else emitAtomicXchg64(&access, WantResult(false)); return true; #endif } bool BaseCompiler::emitAtomicXchg(ValType type, Scalar::Type viewType) { LinearMemoryAddress addr; Nothing unused_value; if (!iter_.readAtomicRMW(&addr, type, Scalar::byteSize(viewType), &unused_value)) { return false; } if (deadCode_) { return true; } AccessCheck check; MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset(), Synchronization::Full()); if (Scalar::byteSize(viewType) <= 4) { PopAtomicXchg32Regs regs(this, type, viewType); RegI32 rp = popMemoryAccess(&access, &check); RegI32 tls = maybeLoadTlsForAccess(check); auto memaddr = prepareAtomicMemoryAccess(&access, &check, tls, rp); regs.atomicXchg32(access, memaddr); maybeFreeI32(tls); freeI32(rp); if (type == ValType::I64) { pushU32AsI64(regs.takeRd()); } else { pushI32(regs.takeRd()); } return true; } MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); emitAtomicXchg64(&access, WantResult(true)); return true; } void BaseCompiler::emitAtomicXchg64(MemoryAccessDesc* access, WantResult wantResult) { PopAtomicXchg64Regs regs(this); AccessCheck check; RegI32 rp = popMemoryAccess(access, &check); #ifdef JS_CODEGEN_X86 ScratchEBX ebx(*this); RegI32 tls = maybeLoadTlsForAccess(check, ebx); auto memaddr = prepareAtomicMemoryAccess(access, &check, tls, rp); regs.atomicXchg64(*access, memaddr, ebx); #else RegI32 tls = maybeLoadTlsForAccess(check); auto memaddr = prepareAtomicMemoryAccess(access, &check, tls, rp); regs.atomicXchg64(*access, memaddr); maybeFreeI32(tls); #endif freeI32(rp); if (wantResult) { pushI64(regs.takeRd()); } } bool BaseCompiler::emitWait(ValType type, uint32_t byteSize) { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing nothing; LinearMemoryAddress addr; if (!iter_.readWait(&addr, type, byteSize, ¬hing, ¬hing)) { return false; } if (deadCode_) { return true; } switch (type.kind()) { case ValType::I32: { RegI64 timeout = popI64(); RegI32 val = popI32(); MemoryAccessDesc access(Scalar::Int32, addr.align, addr.offset, bytecodeOffset()); computeEffectiveAddress(&access); pushI32(val); pushI64(timeout); if (!emitInstanceCall(lineOrBytecode, SASigWaitI32)) { return false; } break; } case ValType::I64: { RegI64 timeout = popI64(); RegI64 val = popI64(); MemoryAccessDesc access(Scalar::Int64, addr.align, addr.offset, bytecodeOffset()); computeEffectiveAddress(&access); pushI64(val); pushI64(timeout); if (!emitInstanceCall(lineOrBytecode, SASigWaitI64)) { return false; } break; } default: MOZ_CRASH(); } return true; } bool BaseCompiler::emitWake() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing nothing; LinearMemoryAddress addr; if (!iter_.readWake(&addr, ¬hing)) { return false; } if (deadCode_) { return true; } RegI32 count = popI32(); MemoryAccessDesc access(Scalar::Int32, addr.align, addr.offset, bytecodeOffset()); computeEffectiveAddress(&access); pushI32(count); return emitInstanceCall(lineOrBytecode, SASigWake); } bool BaseCompiler::emitFence() { if (!iter_.readFence()) { return false; } if (deadCode_) { return true; } masm.memoryBarrier(MembarFull); return true; } bool BaseCompiler::emitMemCopy() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t dstMemOrTableIndex = 0; uint32_t srcMemOrTableIndex = 0; Nothing nothing; if (!iter_.readMemOrTableCopy(true, &dstMemOrTableIndex, ¬hing, &srcMemOrTableIndex, ¬hing, ¬hing)) { return false; } if (deadCode_) { return true; } int32_t signedLength; if (MacroAssembler::SupportsFastUnalignedAccesses() && peekConstI32(&signedLength) && signedLength != 0 && uint32_t(signedLength) <= MaxInlineMemoryCopyLength) { return emitMemCopyInline(); } return emitMemCopyCall(lineOrBytecode); } bool BaseCompiler::emitMemCopyCall(uint32_t lineOrBytecode) { pushHeapBase(); if (!emitInstanceCall(lineOrBytecode, usesSharedMemory() ? SASigMemCopyShared : SASigMemCopy, /*pushReturnedValue=*/false)) { return false; } return true; } bool BaseCompiler::emitMemCopyInline() { MOZ_ASSERT(MaxInlineMemoryCopyLength != 0); int32_t signedLength; MOZ_ALWAYS_TRUE(popConstI32(&signedLength)); uint32_t length = signedLength; MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryCopyLength); RegI32 src = popI32(); RegI32 dest = popI32(); // Compute the number of copies of each width we will need to do size_t remainder = length; #ifdef JS_64BIT size_t numCopies8 = remainder / sizeof(uint64_t); remainder %= sizeof(uint64_t); #endif size_t numCopies4 = remainder / sizeof(uint32_t); remainder %= sizeof(uint32_t); size_t numCopies2 = remainder / sizeof(uint16_t); remainder %= sizeof(uint16_t); size_t numCopies1 = remainder; // Load all source bytes onto the value stack from low to high using the // widest transfer width we can for the system. We will trap without writing // anything if any source byte is out-of-bounds. bool omitBoundsCheck = false; size_t offset = 0; #ifdef JS_64BIT for (uint32_t i = 0; i < numCopies8; i++) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!loadCommon(&access, check, ValType::I64)) { return false; } offset += sizeof(uint64_t); omitBoundsCheck = true; } #endif for (uint32_t i = 0; i < numCopies4; i++) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!loadCommon(&access, check, ValType::I32)) { return false; } offset += sizeof(uint32_t); omitBoundsCheck = true; } if (numCopies2) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!loadCommon(&access, check, ValType::I32)) { return false; } offset += sizeof(uint16_t); omitBoundsCheck = true; } if (numCopies1) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!loadCommon(&access, check, ValType::I32)) { return false; } } // Store all source bytes from the value stack to the destination from // high to low. We will trap without writing anything on the first store // if any dest byte is out-of-bounds. offset = length; omitBoundsCheck = false; if (numCopies1) { offset -= sizeof(uint8_t); RegI32 value = popI32(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(value); MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset()); AccessCheck check; if (!storeCommon(&access, check, ValType::I32)) { return false; } omitBoundsCheck = true; } if (numCopies2) { offset -= sizeof(uint16_t); RegI32 value = popI32(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(value); MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!storeCommon(&access, check, ValType::I32)) { return false; } omitBoundsCheck = true; } for (uint32_t i = 0; i < numCopies4; i++) { offset -= sizeof(uint32_t); RegI32 value = popI32(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(value); MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!storeCommon(&access, check, ValType::I32)) { return false; } omitBoundsCheck = true; } #ifdef JS_64BIT for (uint32_t i = 0; i < numCopies8; i++) { offset -= sizeof(uint64_t); RegI64 value = popI64(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI64(value); MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!storeCommon(&access, check, ValType::I64)) { return false; } omitBoundsCheck = true; } #endif freeI32(dest); freeI32(src); return true; } bool BaseCompiler::emitTableCopy() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t dstMemOrTableIndex = 0; uint32_t srcMemOrTableIndex = 0; Nothing nothing; if (!iter_.readMemOrTableCopy(false, &dstMemOrTableIndex, ¬hing, &srcMemOrTableIndex, ¬hing, ¬hing)) { return false; } if (deadCode_) { return true; } pushI32(dstMemOrTableIndex); pushI32(srcMemOrTableIndex); if (!emitInstanceCall(lineOrBytecode, SASigTableCopy, /*pushReturnedValue=*/false)) { return false; } return true; } bool BaseCompiler::emitDataOrElemDrop(bool isData) { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t segIndex = 0; if (!iter_.readDataOrElemDrop(isData, &segIndex)) { return false; } if (deadCode_) { return true; } // Despite the cast to int32_t, the callee regards the value as unsigned. pushI32(int32_t(segIndex)); return emitInstanceCall(lineOrBytecode, isData ? SASigDataDrop : SASigElemDrop, /*pushReturnedValue=*/false); } bool BaseCompiler::emitMemFill() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing nothing; if (!iter_.readMemFill(¬hing, ¬hing, ¬hing)) { return false; } if (deadCode_) { return true; } int32_t signedLength; int32_t signedValue; if (MacroAssembler::SupportsFastUnalignedAccesses() && peek2xI32(&signedLength, &signedValue) && signedLength != 0 && uint32_t(signedLength) <= MaxInlineMemoryFillLength) { return emitMemFillInline(); } return emitMemFillCall(lineOrBytecode); } bool BaseCompiler::emitMemFillCall(uint32_t lineOrBytecode) { pushHeapBase(); return emitInstanceCall( lineOrBytecode, usesSharedMemory() ? SASigMemFillShared : SASigMemFill, /*pushReturnedValue=*/false); } bool BaseCompiler::emitMemFillInline() { MOZ_ASSERT(MaxInlineMemoryFillLength != 0); int32_t signedLength; int32_t signedValue; MOZ_ALWAYS_TRUE(popConstI32(&signedLength)); MOZ_ALWAYS_TRUE(popConstI32(&signedValue)); uint32_t length = uint32_t(signedLength); uint32_t value = uint32_t(signedValue); MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryFillLength); RegI32 dest = popI32(); // Compute the number of copies of each width we will need to do size_t remainder = length; #ifdef JS_64BIT size_t numCopies8 = remainder / sizeof(uint64_t); remainder %= sizeof(uint64_t); #endif size_t numCopies4 = remainder / sizeof(uint32_t); remainder %= sizeof(uint32_t); size_t numCopies2 = remainder / sizeof(uint16_t); remainder %= sizeof(uint16_t); size_t numCopies1 = remainder; MOZ_ASSERT(numCopies2 <= 1 && numCopies1 <= 1); // Generate splatted definitions for wider fills as needed #ifdef JS_64BIT uint64_t val8 = SplatByteToUInt(value, 8); #endif uint32_t val4 = SplatByteToUInt(value, 4); uint32_t val2 = SplatByteToUInt(value, 2); uint32_t val1 = value; // Store the fill value to the destination from high to low. We will trap // without writing anything on the first store if any dest byte is // out-of-bounds. size_t offset = length; bool omitBoundsCheck = false; if (numCopies1) { offset -= sizeof(uint8_t); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(val1); MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset()); AccessCheck check; if (!storeCommon(&access, check, ValType::I32)) { return false; } omitBoundsCheck = true; } if (numCopies2) { offset -= sizeof(uint16_t); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(val2); MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!storeCommon(&access, check, ValType::I32)) { return false; } omitBoundsCheck = true; } for (uint32_t i = 0; i < numCopies4; i++) { offset -= sizeof(uint32_t); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(val4); MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!storeCommon(&access, check, ValType::I32)) { return false; } omitBoundsCheck = true; } #ifdef JS_64BIT for (uint32_t i = 0; i < numCopies8; i++) { offset -= sizeof(uint64_t); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI64(val8); MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; if (!storeCommon(&access, check, ValType::I64)) { return false; } omitBoundsCheck = true; } #endif freeI32(dest); return true; } bool BaseCompiler::emitMemOrTableInit(bool isMem) { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t segIndex = 0; uint32_t dstTableIndex = 0; Nothing nothing; if (!iter_.readMemOrTableInit(isMem, &segIndex, &dstTableIndex, ¬hing, ¬hing, ¬hing)) { return false; } if (deadCode_) { return true; } pushI32(int32_t(segIndex)); if (isMem) { if (!emitInstanceCall(lineOrBytecode, SASigMemInit, /*pushReturnedValue=*/false)) { return false; } } else { pushI32(dstTableIndex); if (!emitInstanceCall(lineOrBytecode, SASigTableInit, /*pushReturnedValue=*/false)) { return false; } } return true; } #ifdef ENABLE_WASM_REFTYPES [[nodiscard]] bool BaseCompiler::emitTableFill() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing nothing; uint32_t tableIndex; if (!iter_.readTableFill(&tableIndex, ¬hing, ¬hing, ¬hing)) { return false; } if (deadCode_) { return true; } // fill(start:u32, val:ref, len:u32, table:u32) -> u32 pushI32(tableIndex); return emitInstanceCall(lineOrBytecode, SASigTableFill, /*pushReturnedValue=*/false); } [[nodiscard]] bool BaseCompiler::emitTableGet() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing index; uint32_t tableIndex; if (!iter_.readTableGet(&tableIndex, &index)) { return false; } if (deadCode_) { return true; } // get(index:u32, table:u32) -> uintptr_t(AnyRef) pushI32(tableIndex); if (!emitInstanceCall(lineOrBytecode, SASigTableGet, /*pushReturnedValue=*/false)) { return false; } // Push the resulting anyref back on the eval stack. NOTE: needRef() must // not kill the value in the register. RegPtr r = RegPtr(ReturnReg); needRef(r); pushRef(r); return true; } [[nodiscard]] bool BaseCompiler::emitTableGrow() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing delta; Nothing initValue; uint32_t tableIndex; if (!iter_.readTableGrow(&tableIndex, &initValue, &delta)) { return false; } if (deadCode_) { return true; } // grow(initValue:anyref, delta:u32, table:u32) -> u32 pushI32(tableIndex); return emitInstanceCall(lineOrBytecode, SASigTableGrow); } [[nodiscard]] bool BaseCompiler::emitTableSet() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); Nothing index, value; uint32_t tableIndex; if (!iter_.readTableSet(&tableIndex, &index, &value)) { return false; } if (deadCode_) { return true; } // set(index:u32, value:ref, table:u32) -> i32 pushI32(tableIndex); return emitInstanceCall(lineOrBytecode, SASigTableSet, /*pushReturnedValue=*/false); } [[nodiscard]] bool BaseCompiler::emitTableSize() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t tableIndex; if (!iter_.readTableSize(&tableIndex)) { return false; } if (deadCode_) { return true; } // size(table:u32) -> u32 pushI32(tableIndex); return emitInstanceCall(lineOrBytecode, SASigTableSize); } #endif bool BaseCompiler::emitStructNew() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); uint32_t typeIndex; NothingVector args; if (!iter_.readStructNew(&typeIndex, &args)) { return false; } if (deadCode_) { return true; } // Allocate zeroed storage. The parameter to StructNew is an index into a // descriptor table that the instance has. // // Returns null on OOM. const StructType& structType = moduleEnv_.types[typeIndex].structType(); const TypeIdDesc& structTypeId = moduleEnv_.typeIds[typeIndex]; RegPtr rst = needRef(); fr.loadTlsPtr(WasmTlsReg); masm.loadWasmGlobalPtr(structTypeId.globalDataOffset(), rst); pushRef(rst); if (!emitInstanceCall(lineOrBytecode, SASigStructNew)) { return false; } // Optimization opportunity: Iterate backward to pop arguments off the // stack. This will generate more instructions than we want, since we // really only need to pop the stack once at the end, not for every element, // but to do better we need a bit more machinery to load elements off the // stack into registers. RegPtr rp = popRef(); RegPtr rdata = rp; if (!structType.isInline_) { rdata = needRef(); masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rdata); } // Optimization opportunity: when the value being stored is a known // zero/null we need store nothing. This case may be somewhat common // because struct.new forces a value to be specified for every field. uint32_t fieldNo = structType.fields_.length(); while (fieldNo-- > 0) { uint32_t offs = structType.objectBaseFieldOffset(fieldNo); switch (structType.fields_[fieldNo].type.kind()) { case ValType::I32: { RegI32 r = popI32(); masm.store32(r, Address(rdata, offs)); freeI32(r); break; } case ValType::I64: { RegI64 r = popI64(); masm.store64(r, Address(rdata, offs)); freeI64(r); break; } case ValType::F32: { RegF32 r = popF32(); masm.storeFloat32(r, Address(rdata, offs)); freeF32(r); break; } case ValType::F64: { RegF64 r = popF64(); masm.storeDouble(r, Address(rdata, offs)); freeF64(r); break; } case ValType::Ref: { RegPtr value = popRef(); masm.storePtr(value, Address(rdata, offs)); // A write barrier is needed here for the extremely unlikely case // that the object is allocated in the tenured area - a result of // a GC artifact. Label skipBarrier; sync(); RegPtr rowner = rp; if (!structType.isInline_) { rowner = needRef(); masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfOwner()), rowner); } RegPtr otherScratch = needRef(); EmitWasmPostBarrierGuard(masm, Some(rowner), otherScratch, value, &skipBarrier); freeRef(otherScratch); if (!structType.isInline_) { freeRef(rowner); } freeRef(value); // TODO/AnyRef-boxing: With boxed immediates and strings, the write // barrier is going to have to be more complicated. ASSERT_ANYREF_IS_JSOBJECT; pushRef(rp); // Save rp across the call RegPtr valueAddr = needRef(); masm.computeEffectiveAddress(Address(rdata, offs), valueAddr); if (!emitPostBarrierCall(valueAddr)) { // Consumes valueAddr return false; } popRef(rp); // Restore rp if (!structType.isInline_) { masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rdata); } masm.bind(&skipBarrier); break; } default: { MOZ_CRASH("Unexpected field type"); } } } if (!structType.isInline_) { freeRef(rdata); } pushRef(rp); return true; } bool BaseCompiler::emitStructGet() { uint32_t typeIndex; uint32_t fieldIndex; Nothing nothing; if (!iter_.readStructGet(&typeIndex, &fieldIndex, ¬hing)) { return false; } if (deadCode_) { return true; } const StructType& structType = moduleEnv_.types[typeIndex].structType(); RegPtr rp = popRef(); Label ok; masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok); trap(Trap::NullPointerDereference); masm.bind(&ok); if (!structType.isInline_) { masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rp); } uint32_t offs = structType.objectBaseFieldOffset(fieldIndex); switch (structType.fields_[fieldIndex].type.kind()) { case ValType::I32: { RegI32 r = needI32(); masm.load32(Address(rp, offs), r); pushI32(r); break; } case ValType::I64: { RegI64 r = needI64(); masm.load64(Address(rp, offs), r); pushI64(r); break; } case ValType::F32: { RegF32 r = needF32(); masm.loadFloat32(Address(rp, offs), r); pushF32(r); break; } case ValType::F64: { RegF64 r = needF64(); masm.loadDouble(Address(rp, offs), r); pushF64(r); break; } case ValType::Ref: { RegPtr r = needRef(); masm.loadPtr(Address(rp, offs), r); pushRef(r); break; } default: { MOZ_CRASH("Unexpected field type"); } } freeRef(rp); return true; } bool BaseCompiler::emitStructSet() { uint32_t typeIndex; uint32_t fieldIndex; Nothing nothing; if (!iter_.readStructSet(&typeIndex, &fieldIndex, ¬hing, ¬hing)) { return false; } if (deadCode_) { return true; } const StructType& structType = moduleEnv_.types[typeIndex].structType(); RegI32 ri; RegI64 rl; RegF32 rf; RegF64 rd; RegPtr rr; // Reserve this register early if we will need it so that it is not taken by // rr or rp. RegPtr valueAddr; if (structType.fields_[fieldIndex].type.isReference()) { valueAddr = RegPtr(PreBarrierReg); needRef(valueAddr); } switch (structType.fields_[fieldIndex].type.kind()) { case ValType::I32: ri = popI32(); break; case ValType::I64: rl = popI64(); break; case ValType::F32: rf = popF32(); break; case ValType::F64: rd = popF64(); break; case ValType::Ref: rr = popRef(); break; default: MOZ_CRASH("Unexpected field type"); } RegPtr rp = popRef(); Label ok; masm.branchTestPtr(Assembler::NonZero, rp, rp, &ok); trap(Trap::NullPointerDereference); masm.bind(&ok); if (!structType.isInline_) { masm.loadPtr(Address(rp, OutlineTypedObject::offsetOfData()), rp); } uint32_t offs = structType.objectBaseFieldOffset(fieldIndex); switch (structType.fields_[fieldIndex].type.kind()) { case ValType::I32: { masm.store32(ri, Address(rp, offs)); freeI32(ri); break; } case ValType::I64: { masm.store64(rl, Address(rp, offs)); freeI64(rl); break; } case ValType::F32: { masm.storeFloat32(rf, Address(rp, offs)); freeF32(rf); break; } case ValType::F64: { masm.storeDouble(rd, Address(rp, offs)); freeF64(rd); break; } case ValType::Ref: { masm.computeEffectiveAddress(Address(rp, offs), valueAddr); // Bug 1617908. Ensure that if a TypedObject is not inline, then its // underlying ArrayBuffer also is not inline, or the barrier logic fails. static_assert(InlineTypedObject::MaxInlineBytes >= ArrayBufferObject::MaxInlineBytes); // emitBarrieredStore consumes valueAddr if (!emitBarrieredStore(structType.isInline_ ? Some(rp) : Nothing(), valueAddr, rr)) { return false; } freeRef(rr); break; } default: { MOZ_CRASH("Unexpected field type"); } } freeRef(rp); return true; } bool BaseCompiler::emitStructNarrow() { uint32_t lineOrBytecode = readCallSiteLineOrBytecode(); ValType inputType, outputType; Nothing nothing; if (!iter_.readStructNarrow(&inputType, &outputType, ¬hing)) { return false; } if (deadCode_) { return true; } // struct.narrow validation ensures that these hold. MOZ_ASSERT(inputType.isEqRef() || moduleEnv_.types.isStructType(inputType.refType())); MOZ_ASSERT(outputType.isEqRef() || moduleEnv_.types.isStructType(outputType.refType())); MOZ_ASSERT_IF(outputType.isEqRef(), inputType.isEqRef()); // EqRef -> EqRef is a no-op, just leave the value on the stack. if (inputType.isEqRef() && outputType.isEqRef()) { return true; } RegPtr rp = popRef(); // Dynamic downcast eqref|(optref T) -> (optref U), leaves rp or null const TypeIdDesc& outputStructTypeId = moduleEnv_.typeIds[outputType.refType().typeIndex()]; RegPtr rst = needRef(); fr.loadTlsPtr(WasmTlsReg); masm.loadWasmGlobalPtr(outputStructTypeId.globalDataOffset(), rst); pushRef(rst); pushRef(rp); return emitInstanceCall(lineOrBytecode, SASigStructNarrow); } #ifdef ENABLE_WASM_SIMD // Emitter trampolines used by abstracted SIMD operations. Naming here follows // the SIMD spec pretty closely. static void AndV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.bitwiseAndSimd128(rs, rsd); } static void OrV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.bitwiseOrSimd128(rs, rsd); } static void XorV128(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.bitwiseXorSimd128(rs, rsd); } static void AddI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.addInt8x16(rs, rsd); } static void AddI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.addInt16x8(rs, rsd); } static void AddI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.addInt32x4(rs, rsd); } static void AddF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.addFloat32x4(rs, rsd); } static void AddI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.addInt64x2(rs, rsd); } static void AddF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.addFloat64x2(rs, rsd); } static void AddSatI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.addSatInt8x16(rs, rsd); } static void AddSatUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedAddSatInt8x16(rs, rsd); } static void AddSatI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.addSatInt16x8(rs, rsd); } static void AddSatUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedAddSatInt16x8(rs, rsd); } static void SubI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.subInt8x16(rs, rsd); } static void SubI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.subInt16x8(rs, rsd); } static void SubI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.subInt32x4(rs, rsd); } static void SubF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.subFloat32x4(rs, rsd); } static void SubI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.subInt64x2(rs, rsd); } static void SubF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.subFloat64x2(rs, rsd); } static void SubSatI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.subSatInt8x16(rs, rsd); } static void SubSatUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedSubSatInt8x16(rs, rsd); } static void SubSatI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.subSatInt16x8(rs, rsd); } static void SubSatUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedSubSatInt16x8(rs, rsd); } static void MulI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.mulInt16x8(rs, rsd); } static void MulI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.mulInt32x4(rs, rsd); } static void MulF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.mulFloat32x4(rs, rsd); } # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) static void MulI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd, RegV128 temp) { masm.mulInt64x2(rs, rsd, temp); } # endif static void MulF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.mulFloat64x2(rs, rsd); } static void DivF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.divFloat32x4(rs, rsd); } static void DivF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.divFloat64x2(rs, rsd); } # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) static void MinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { masm.minFloat32x4(rs, rsd, temp1, temp2); } static void MinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { masm.minFloat64x2(rs, rsd, temp1, temp2); } static void MaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { masm.maxFloat32x4(rs, rsd, temp1, temp2); } static void MaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { masm.maxFloat64x2(rs, rsd, temp1, temp2); } static void PMinF32x4(MacroAssembler& masm, RegV128 rsd, RegV128 rs, RhsDestOp) { masm.pseudoMinFloat32x4(rsd, rs); } static void PMinF64x2(MacroAssembler& masm, RegV128 rsd, RegV128 rs, RhsDestOp) { masm.pseudoMinFloat64x2(rsd, rs); } static void PMaxF32x4(MacroAssembler& masm, RegV128 rsd, RegV128 rs, RhsDestOp) { masm.pseudoMaxFloat32x4(rsd, rs); } static void PMaxF64x2(MacroAssembler& masm, RegV128 rsd, RegV128 rs, RhsDestOp) { masm.pseudoMaxFloat64x2(rsd, rs); } # elif defined(JS_CODEGEN_ARM64) static void MinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.minFloat32x4(rs, rsd); } static void MinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.minFloat64x2(rs, rsd); } static void MaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.maxFloat32x4(rs, rsd); } static void MaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.maxFloat64x2(rs, rsd); } static void PMinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.pseudoMinFloat32x4(rs, rsd); } static void PMinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.pseudoMinFloat64x2(rs, rsd); } static void PMaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.pseudoMaxFloat32x4(rs, rsd); } static void PMaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.pseudoMaxFloat64x2(rs, rsd); } # endif static void DotI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.widenDotInt16x8(rs, rsd); } static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd) { masm.compareInt8x16(cond, rs, rsd); } static void CmpI16x8(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd) { masm.compareInt16x8(cond, rs, rsd); } static void CmpI32x4(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd) { masm.compareInt32x4(cond, rs, rsd); } # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) static void CmpUI8x16(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { masm.unsignedCompareInt8x16(cond, rs, rsd, temp1, temp2); } static void CmpUI16x8(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { masm.unsignedCompareInt16x8(cond, rs, rsd, temp1, temp2); } static void CmpUI32x4(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd, RegV128 temp1, RegV128 temp2) { masm.unsignedCompareInt32x4(cond, rs, rsd, temp1, temp2); } # else static void CmpUI8x16(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd) { masm.compareInt8x16(cond, rs, rsd); } static void CmpUI16x8(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd) { masm.compareInt16x8(cond, rs, rsd); } static void CmpUI32x4(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd) { masm.compareInt32x4(cond, rs, rsd); } # endif static void CmpF32x4(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd) { masm.compareFloat32x4(cond, rs, rsd); } static void CmpF64x2(MacroAssembler& masm, Assembler::Condition cond, RegV128 rs, RegV128 rsd) { masm.compareFloat64x2(cond, rs, rsd); } static void NegI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.negInt8x16(rs, rd); } static void NegI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.negInt16x8(rs, rd); } static void NegI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.negInt32x4(rs, rd); } static void NegI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.negInt64x2(rs, rd); } static void NegF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.negFloat32x4(rs, rd); } static void NegF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.negFloat64x2(rs, rd); } static void AbsF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.absFloat32x4(rs, rd); } static void AbsF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.absFloat64x2(rs, rd); } static void SqrtF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.sqrtFloat32x4(rs, rd); } static void SqrtF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.sqrtFloat64x2(rs, rd); } static void CeilF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.ceilFloat32x4(rs, rd); } static void FloorF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.floorFloat32x4(rs, rd); } static void TruncF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.truncFloat32x4(rs, rd); } static void NearestF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.nearestFloat32x4(rs, rd); } static void CeilF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.ceilFloat64x2(rs, rd); } static void FloorF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.floorFloat64x2(rs, rd); } static void TruncF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.truncFloat64x2(rs, rd); } static void NearestF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.nearestFloat64x2(rs, rd); } static void NotV128(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.bitwiseNotSimd128(rs, rd); } # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp1, RegV128 temp2) { masm.leftShiftInt8x16(rs, rsd, temp1, temp2); } static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { masm.leftShiftInt16x8(rs, rsd, temp); } static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { masm.leftShiftInt32x4(rs, rsd, temp); } static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { masm.leftShiftInt64x2(rs, rsd, temp); } static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp1, RegV128 temp2) { masm.rightShiftInt8x16(rs, rsd, temp1, temp2); } static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp1, RegV128 temp2) { masm.unsignedRightShiftInt8x16(rs, rsd, temp1, temp2); } static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { masm.rightShiftInt16x8(rs, rsd, temp); } static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { masm.unsignedRightShiftInt16x8(rs, rsd, temp); } static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { masm.rightShiftInt32x4(rs, rsd, temp); } static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { masm.unsignedRightShiftInt32x4(rs, rsd, temp); } static void ShiftRightUI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { masm.unsignedRightShiftInt64x2(rs, rsd, temp); } # elif defined(JS_CODEGEN_ARM64) static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { masm.leftShiftInt8x16(rs, rsd); } static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { masm.leftShiftInt16x8(rs, rsd); } static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { masm.leftShiftInt32x4(rs, rsd); } static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { masm.leftShiftInt64x2(rs, rsd); } static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegV128 temp) { masm.rightShiftInt8x16(rs, rsd, temp); } static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegV128 temp) { masm.unsignedRightShiftInt8x16(rs, rsd, temp); } static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegV128 temp) { masm.rightShiftInt16x8(rs, rsd, temp); } static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegV128 temp) { masm.unsignedRightShiftInt16x8(rs, rsd, temp); } static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegV128 temp) { masm.rightShiftInt32x4(rs, rsd, temp); } static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegV128 temp) { masm.unsignedRightShiftInt32x4(rs, rsd, temp); } # endif static void AverageUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedAverageInt8x16(rs, rsd); } static void AverageUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedAverageInt16x8(rs, rsd); } static void MinI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.minInt8x16(rs, rsd); } static void MinUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedMinInt8x16(rs, rsd); } static void MaxI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.maxInt8x16(rs, rsd); } static void MaxUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedMaxInt8x16(rs, rsd); } static void MinI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.minInt16x8(rs, rsd); } static void MinUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedMinInt16x8(rs, rsd); } static void MaxI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.maxInt16x8(rs, rsd); } static void MaxUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedMaxInt16x8(rs, rsd); } static void MinI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.minInt32x4(rs, rsd); } static void MinUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedMinInt32x4(rs, rsd); } static void MaxI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.maxInt32x4(rs, rsd); } static void MaxUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedMaxInt32x4(rs, rsd); } static void NarrowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.narrowInt16x8(rs, rsd); } static void NarrowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedNarrowInt16x8(rs, rsd); } static void NarrowI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.narrowInt32x4(rs, rsd); } static void NarrowUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.unsignedNarrowInt32x4(rs, rsd); } static void WidenLowI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.widenLowInt8x16(rs, rd); } static void WidenHighI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.widenHighInt8x16(rs, rd); } static void WidenLowUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.unsignedWidenLowInt8x16(rs, rd); } static void WidenHighUI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.unsignedWidenHighInt8x16(rs, rd); } static void WidenLowI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.widenLowInt16x8(rs, rd); } static void WidenHighI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.widenHighInt16x8(rs, rd); } static void WidenLowUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.unsignedWidenLowInt16x8(rs, rd); } static void WidenHighUI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.unsignedWidenHighInt16x8(rs, rd); } static void AbsI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.absInt8x16(rs, rd); } static void AbsI16x8(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.absInt16x8(rs, rd); } static void AbsI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.absInt32x4(rs, rd); } static void ExtractLaneI8x16(MacroAssembler& masm, uint32_t laneIndex, RegV128 rs, RegI32 rd) { masm.extractLaneInt8x16(laneIndex, rs, rd); } static void ExtractLaneUI8x16(MacroAssembler& masm, uint32_t laneIndex, RegV128 rs, RegI32 rd) { masm.unsignedExtractLaneInt8x16(laneIndex, rs, rd); } static void ExtractLaneI16x8(MacroAssembler& masm, uint32_t laneIndex, RegV128 rs, RegI32 rd) { masm.extractLaneInt16x8(laneIndex, rs, rd); } static void ExtractLaneUI16x8(MacroAssembler& masm, uint32_t laneIndex, RegV128 rs, RegI32 rd) { masm.unsignedExtractLaneInt16x8(laneIndex, rs, rd); } static void ExtractLaneI32x4(MacroAssembler& masm, uint32_t laneIndex, RegV128 rs, RegI32 rd) { masm.extractLaneInt32x4(laneIndex, rs, rd); } static void ExtractLaneI64x2(MacroAssembler& masm, uint32_t laneIndex, RegV128 rs, RegI64 rd) { masm.extractLaneInt64x2(laneIndex, rs, rd); } static void ExtractLaneF32x4(MacroAssembler& masm, uint32_t laneIndex, RegV128 rs, RegF32 rd) { masm.extractLaneFloat32x4(laneIndex, rs, rd); } static void ExtractLaneF64x2(MacroAssembler& masm, uint32_t laneIndex, RegV128 rs, RegF64 rd) { masm.extractLaneFloat64x2(laneIndex, rs, rd); } static void ReplaceLaneI8x16(MacroAssembler& masm, uint32_t laneIndex, RegI32 rs, RegV128 rsd) { masm.replaceLaneInt8x16(laneIndex, rs, rsd); } static void ReplaceLaneI16x8(MacroAssembler& masm, uint32_t laneIndex, RegI32 rs, RegV128 rsd) { masm.replaceLaneInt16x8(laneIndex, rs, rsd); } static void ReplaceLaneI32x4(MacroAssembler& masm, uint32_t laneIndex, RegI32 rs, RegV128 rsd) { masm.replaceLaneInt32x4(laneIndex, rs, rsd); } static void ReplaceLaneI64x2(MacroAssembler& masm, uint32_t laneIndex, RegI64 rs, RegV128 rsd) { masm.replaceLaneInt64x2(laneIndex, rs, rsd); } static void ReplaceLaneF32x4(MacroAssembler& masm, uint32_t laneIndex, RegF32 rs, RegV128 rsd) { masm.replaceLaneFloat32x4(laneIndex, rs, rsd); } static void ReplaceLaneF64x2(MacroAssembler& masm, uint32_t laneIndex, RegF64 rs, RegV128 rsd) { masm.replaceLaneFloat64x2(laneIndex, rs, rsd); } static void SplatI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rd) { masm.splatX16(rs, rd); } static void SplatI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rd) { masm.splatX8(rs, rd); } static void SplatI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rd) { masm.splatX4(rs, rd); } static void SplatI64x2(MacroAssembler& masm, RegI64 rs, RegV128 rd) { masm.splatX2(rs, rd); } static void SplatF32x4(MacroAssembler& masm, RegF32 rs, RegV128 rd) { masm.splatX4(rs, rd); } static void SplatF64x2(MacroAssembler& masm, RegF64 rs, RegV128 rd) { masm.splatX2(rs, rd); } // This is the same op independent of lanes: it tests for any nonzero bit. static void AnyTrue(MacroAssembler& masm, RegV128 rs, RegI32 rd) { masm.anyTrueSimd128(rs, rd); } static void AllTrueI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd) { masm.allTrueInt8x16(rs, rd); } static void AllTrueI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd) { masm.allTrueInt16x8(rs, rd); } static void AllTrueI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd) { masm.allTrueInt32x4(rs, rd); } # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) static void BitmaskI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd) { masm.bitmaskInt8x16(rs, rd); } static void BitmaskI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd) { masm.bitmaskInt16x8(rs, rd); } static void BitmaskI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd) { masm.bitmaskInt32x4(rs, rd); } static void Swizzle(MacroAssembler& masm, RegV128 rs, RegV128 rsd, RegV128 temp) { masm.swizzleInt8x16(rs, rsd, temp); } # elif defined(JS_CODEGEN_ARM64) static void BitmaskI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd, RegV128 temp) { masm.bitmaskInt8x16(rs, rd, temp); } static void BitmaskI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd, RegV128 temp) { masm.bitmaskInt16x8(rs, rd, temp); } static void BitmaskI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd, RegV128 temp) { masm.bitmaskInt32x4(rs, rd, temp); } static void Swizzle(MacroAssembler& masm, RegV128 rs, RegV128 rsd) { masm.swizzleInt8x16(rs, rsd); } # endif static void ConvertI32x4ToF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.convertInt32x4ToFloat32x4(rs, rd); } static void ConvertUI32x4ToF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.unsignedConvertInt32x4ToFloat32x4(rs, rd); } static void ConvertF32x4ToI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) { masm.truncSatFloat32x4ToInt32x4(rs, rd); } static void ConvertF32x4ToUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd, RegV128 temp) { masm.unsignedTruncSatFloat32x4ToInt32x4(rs, rd, temp); } template void BaseCompiler::emitVectorUnop(void (*op)(MacroAssembler& masm, SourceType rs, DestType rd)) { SourceType rs = pop(); DestType rd = need(); op(masm, rs, rd); free(rs); push(rd); } template void BaseCompiler::emitVectorUnop(void (*op)(MacroAssembler& masm, SourceType rs, DestType rd, TempType temp)) { SourceType rs = pop(); DestType rd = need(); TempType temp = need(); op(masm, rs, rd, temp); free(rs); free(temp); push(rd); } template void BaseCompiler::emitVectorUnop(ImmType immediate, void (*op)(MacroAssembler&, ImmType, SourceType, DestType)) { SourceType rs = pop(); DestType rd = need(); op(masm, immediate, rs, rd); free(rs); push(rd); } template void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType src, LhsDestType srcDest)) { RhsType rs = pop(); LhsDestType rsd = pop(); op(masm, rs, rsd); free(rs); push(rsd); } template void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsDestType src, LhsType srcDest, RhsDestOp)) { RhsDestType rsd = pop(); LhsType rs = pop(); op(masm, rsd, rs, RhsDestOp::True); free(rs); push(rsd); } template void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType rs, LhsDestType rsd, TempType temp)) { RhsType rs = pop(); LhsDestType rsd = pop(); TempType temp = need(); op(masm, rs, rsd, temp); free(rs); free(temp); push(rsd); } template void BaseCompiler::emitVectorBinop(void (*op)(MacroAssembler& masm, RhsType rs, LhsDestType rsd, TempType1 temp1, TempType2 temp2)) { RhsType rs = pop(); LhsDestType rsd = pop(); TempType1 temp1 = need(); TempType2 temp2 = need(); op(masm, rs, rsd, temp1, temp2); free(rs); free(temp1); free(temp2); push(rsd); } template void BaseCompiler::emitVectorBinop(ImmType immediate, void (*op)(MacroAssembler&, ImmType, RhsType, LhsDestType)) { RhsType rs = pop(); LhsDestType rsd = pop(); op(masm, immediate, rs, rsd); free(rs); push(rsd); } template void BaseCompiler::emitVectorBinop(ImmType immediate, void (*op)(MacroAssembler&, ImmType, RhsType, LhsDestType, TempType1 temp1, TempType2 temp2)) { RhsType rs = pop(); LhsDestType rsd = pop(); TempType1 temp1 = need(); TempType2 temp2 = need(); op(masm, immediate, rs, rsd, temp1, temp2); free(rs); free(temp1); free(temp2); push(rsd); } void BaseCompiler::emitVectorAndNot() { // We want x & ~y but the available operation is ~x & y, so reverse the // operands. RegV128 r, rs; pop2xV128(&r, &rs); masm.bitwiseNotAndSimd128(r, rs); freeV128(r); pushV128(rs); } bool BaseCompiler::emitLoadSplat(Scalar::Type viewType) { // We can implement loadSplat mostly as load + splat because the push of the // result onto the value stack in loadCommon normally will not generate any // code, it will leave the value in a register which we will consume. LinearMemoryAddress addr; if (!iter_.readLoadSplat(Scalar::byteSize(viewType), &addr)) { return false; } if (deadCode_) { return true; } // We use uint types when we can on the general assumption that unsigned loads // might be smaller/faster on some platforms, because no sign extension needs // to be done after the sub-register load. MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset()); switch (viewType) { case Scalar::Uint8: if (!loadCommon(&access, AccessCheck(), ValType::I32)) { return false; } emitVectorUnop(SplatI8x16); break; case Scalar::Uint16: if (!loadCommon(&access, AccessCheck(), ValType::I32)) { return false; } emitVectorUnop(SplatI16x8); break; case Scalar::Uint32: if (!loadCommon(&access, AccessCheck(), ValType::I32)) { return false; } emitVectorUnop(SplatI32x4); break; case Scalar::Int64: if (!loadCommon(&access, AccessCheck(), ValType::I64)) { return false; } emitVectorUnop(SplatI64x2); break; default: MOZ_CRASH(); } return true; } bool BaseCompiler::emitLoadZero(Scalar::Type viewType) { // LoadZero has the structure of LoadSplat LinearMemoryAddress addr; if (!iter_.readLoadSplat(Scalar::byteSize(viewType), &addr)) { return false; } if (deadCode_) { return true; } MemoryAccessDesc access(viewType, addr.align, addr.offset, bytecodeOffset()); access.setZeroExtendSimd128Load(); return loadCommon(&access, AccessCheck(), ValType::V128); } bool BaseCompiler::emitLoadExtend(Scalar::Type viewType) { LinearMemoryAddress addr; if (!iter_.readLoadExtend(&addr)) { return false; } if (deadCode_) { return true; } MemoryAccessDesc access(Scalar::Int64, addr.align, addr.offset, bytecodeOffset()); if (!loadCommon(&access, AccessCheck(), ValType::I64)) { return false; } RegI64 rs = popI64(); RegV128 rd = needV128(); masm.moveGPR64ToDouble(rs, rd); switch (viewType) { case Scalar::Int8: masm.widenLowInt8x16(rd, rd); break; case Scalar::Uint8: masm.unsignedWidenLowInt8x16(rd, rd); break; case Scalar::Int16: masm.widenLowInt16x8(rd, rd); break; case Scalar::Uint16: masm.unsignedWidenLowInt16x8(rd, rd); break; case Scalar::Int32: masm.widenLowInt32x4(rd, rd); break; case Scalar::Uint32: masm.unsignedWidenLowInt32x4(rd, rd); break; default: MOZ_CRASH(); } freeI64(rs); pushV128(rd); return true; } bool BaseCompiler::emitBitselect() { Nothing unused_a, unused_b, unused_c; if (!iter_.readVectorSelect(&unused_a, &unused_b, &unused_c)) { return false; } if (deadCode_) { return true; } RegV128 rs3 = popV128(); // Control RegV128 rs2 = popV128(); // 'false' vector RegV128 rs1 = popV128(); // 'true' vector # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) // On x86, certain register assignments will result in more compact code: we // want output=rs1 and tmp=rs3. Attend to this after we see what other // platforms want/need. RegV128 tmp = needV128(); // Distinguished tmp, for now masm.bitwiseSelectSimd128(rs3, rs1, rs2, rs1, tmp); freeV128(rs2); freeV128(rs3); freeV128(tmp); pushV128(rs1); # elif defined(JS_CODEGEN_ARM64) // Note register conventions differ significantly from x86. masm.bitwiseSelectSimd128(rs1, rs2, rs3); freeV128(rs1); freeV128(rs2); pushV128(rs3); # else MOZ_CRASH("NYI"); # endif return true; } bool BaseCompiler::emitVectorShuffle() { Nothing unused_a, unused_b; V128 shuffleMask; if (!iter_.readVectorShuffle(&unused_a, &unused_b, &shuffleMask)) { return false; } if (deadCode_) { return true; } RegV128 rd, rs; pop2xV128(&rd, &rs); masm.shuffleInt8x16(shuffleMask.bytes, rs, rd); freeV128(rs); pushV128(rd); return true; } // Signed case must be scalarized on x86/x64 and requires CL. // Signed and unsigned cases must be scalarized on ARM64. bool BaseCompiler::emitVectorShiftRightI64x2(bool isUnsigned) { Nothing unused_a, unused_b; if (!iter_.readVectorShift(&unused_a, &unused_b)) { return false; } if (deadCode_) { return true; } # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) if (isUnsigned) { emitVectorBinop(ShiftRightUI64x2); return true; } # endif # if defined(JS_CODEGEN_X86) needI32(specific_.ecx); RegI32 count = popI32ToSpecific(specific_.ecx); # elif defined(JS_CODEGEN_X64) RegI32 count; if (Assembler::HasBMI2()) { count = popI32(); } else { needI32(specific_.ecx); count = popI32ToSpecific(specific_.ecx); } # elif defined(JS_CODEGEN_ARM64) RegI32 count = popI32(); # endif RegV128 lhsDest = popV128(); RegI64 tmp = needI64(); masm.and32(Imm32(63), count); masm.extractLaneInt64x2(0, lhsDest, tmp); if (isUnsigned) { masm.rshift64(count, tmp); } else { masm.rshift64Arithmetic(count, tmp); } masm.replaceLaneInt64x2(0, tmp, lhsDest); masm.extractLaneInt64x2(1, lhsDest, tmp); if (isUnsigned) { masm.rshift64(count, tmp); } else { masm.rshift64Arithmetic(count, tmp); } masm.replaceLaneInt64x2(1, tmp, lhsDest); freeI64(tmp); freeI32(count); pushV128(lhsDest); return true; } // Must be scalarized on ARM64. bool BaseCompiler::emitVectorMulI64x2() { Nothing unused_a, unused_b; if (!iter_.readBinary(ValType::V128, &unused_a, &unused_b)) { return false; } if (deadCode_) { return true; } # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) emitVectorBinop(MulI64x2); # elif defined(JS_CODEGEN_ARM64) RegV128 r, rs; pop2xV128(&r, &rs); RegI64 temp1 = needI64(); RegI64 temp2 = needI64(); masm.extractLaneInt64x2(0, r, temp1); masm.extractLaneInt64x2(0, rs, temp2); masm.mul64(temp2, temp1, Register::Invalid()); masm.replaceLaneInt64x2(0, temp1, r); masm.extractLaneInt64x2(1, r, temp1); masm.extractLaneInt64x2(1, rs, temp2); masm.mul64(temp2, temp1, Register::Invalid()); masm.replaceLaneInt64x2(1, temp1, r); freeI64(temp1); freeI64(temp2); freeV128(rs); pushV128(r); # else MOZ_CRASH("NYI"); # endif return true; } #endif bool BaseCompiler::emitBody() { MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isSome()); if (!iter_.readFunctionStart(func_.index)) { return false; } initControl(controlItem(), ResultType::Empty()); for (;;) { Nothing unused_a, unused_b; #ifdef DEBUG performRegisterLeakCheck(); assertStackInvariants(); #endif #define dispatchBinary(doEmit, type) \ iter_.readBinary(type, &unused_a, &unused_b) && \ (deadCode_ || (doEmit(), true)) #define dispatchUnary(doEmit, type) \ iter_.readUnary(type, &unused_a) && (deadCode_ || (doEmit(), true)) #define dispatchComparison(doEmit, operandType, compareOp) \ iter_.readComparison(operandType, &unused_a, &unused_b) && \ (deadCode_ || (doEmit(compareOp, operandType), true)) #define dispatchConversion(doEmit, inType, outType) \ iter_.readConversion(inType, outType, &unused_a) && \ (deadCode_ || (doEmit(), true)) #define dispatchConversionOOM(doEmit, inType, outType) \ iter_.readConversion(inType, outType, &unused_a) && (deadCode_ || doEmit()) #define dispatchCalloutConversionOOM(doEmit, symbol, inType, outType) \ iter_.readConversion(inType, outType, &unused_a) && \ (deadCode_ || doEmit(symbol, inType, outType)) #define dispatchIntDivCallout(doEmit, symbol, type) \ iter_.readBinary(type, &unused_a, &unused_b) && \ (deadCode_ || doEmit(symbol, type)) #define dispatchVectorBinary(op) \ iter_.readBinary(ValType::V128, &unused_a, &unused_b) && \ (deadCode_ || (emitVectorBinop(op), true)) #define dispatchVectorUnary(op) \ iter_.readUnary(ValType::V128, &unused_a) && \ (deadCode_ || (emitVectorUnop(op), true)) #define dispatchVectorComparison(op, compareOp) \ iter_.readBinary(ValType::V128, &unused_a, &unused_b) && \ (deadCode_ || (emitVectorBinop(compareOp, op), true)) #define dispatchVectorVariableShift(op) \ iter_.readVectorShift(&unused_a, &unused_b) && \ (deadCode_ || (emitVectorBinop(op), true)) #define dispatchExtractLane(op, outType, laneLimit) \ iter_.readExtractLane(outType, laneLimit, &laneIndex, &unused_a) && \ (deadCode_ || (emitVectorUnop(laneIndex, op), true)) #define dispatchReplaceLane(op, inType, laneLimit) \ iter_.readReplaceLane(inType, laneLimit, &laneIndex, &unused_a, \ &unused_b) && \ (deadCode_ || (emitVectorBinop(laneIndex, op), true)) #define dispatchSplat(op, inType) \ iter_.readConversion(inType, ValType::V128, &unused_a) && \ (deadCode_ || (emitVectorUnop(op), true)) #define dispatchVectorReduction(op) \ iter_.readConversion(ValType::V128, ValType::I32, &unused_a) && \ (deadCode_ || (emitVectorUnop(op), true)) #ifdef DEBUG // Check that the number of ref-typed entries in the operand stack matches // reality. # define CHECK_POINTER_COUNT \ do { \ MOZ_ASSERT(countMemRefsOnStk() == stackMapGenerator_.memRefsOnStk); \ } while (0) #else # define CHECK_POINTER_COUNT \ do { \ } while (0) #endif #ifdef ENABLE_WASM_SIMD_EXPERIMENTAL # define CHECK_SIMD_EXPERIMENTAL() (void)(0) #else # define CHECK_SIMD_EXPERIMENTAL() break #endif #define CHECK(E) \ if (!(E)) return false #define NEXT() \ { \ CHECK_POINTER_COUNT; \ continue; \ } #define CHECK_NEXT(E) \ if (!(E)) return false; \ { \ CHECK_POINTER_COUNT; \ continue; \ } CHECK(stk_.reserve(stk_.length() + MaxPushesPerOpcode)); OpBytes op; CHECK(iter_.readOp(&op)); // When compilerEnv_.debugEnabled(), every operator has breakpoint site but // Op::End. if (compilerEnv_.debugEnabled() && op.b0 != (uint16_t)Op::End) { // TODO sync only registers that can be clobbered by the exit // prologue/epilogue or disable these registers for use in // baseline compiler when compilerEnv_.debugEnabled() is set. sync(); insertBreakablePoint(CallSiteDesc::Breakpoint); if (!createStackMap("debug: per insn")) { return false; } } // Going below framePushedAtEntryToBody would imply that we've // popped off the machine stack, part of the frame created by // beginFunction(). MOZ_ASSERT(masm.framePushed() >= stackMapGenerator_.framePushedAtEntryToBody.value()); // At this point we're definitely not generating code for a function call. MOZ_ASSERT( stackMapGenerator_.framePushedExcludingOutboundCallArgs.isNothing()); switch (op.b0) { case uint16_t(Op::End): if (!emitEnd()) { return false; } if (iter_.controlStackEmpty()) { return true; } NEXT(); // Control opcodes case uint16_t(Op::Nop): CHECK_NEXT(iter_.readNop()); case uint16_t(Op::Drop): CHECK_NEXT(emitDrop()); case uint16_t(Op::Block): CHECK_NEXT(emitBlock()); case uint16_t(Op::Loop): CHECK_NEXT(emitLoop()); case uint16_t(Op::If): CHECK_NEXT(emitIf()); case uint16_t(Op::Else): CHECK_NEXT(emitElse()); #ifdef ENABLE_WASM_EXCEPTIONS case uint16_t(Op::Try): if (!moduleEnv_.exceptionsEnabled()) { return iter_.unrecognizedOpcode(&op); } CHECK_NEXT(emitTry()); case uint16_t(Op::Catch): if (!moduleEnv_.exceptionsEnabled()) { return iter_.unrecognizedOpcode(&op); } CHECK_NEXT(emitCatch()); case uint16_t(Op::Throw): if (!moduleEnv_.exceptionsEnabled()) { return iter_.unrecognizedOpcode(&op); } CHECK_NEXT(emitThrow()); #endif case uint16_t(Op::Br): CHECK_NEXT(emitBr()); case uint16_t(Op::BrIf): CHECK_NEXT(emitBrIf()); case uint16_t(Op::BrTable): CHECK_NEXT(emitBrTable()); case uint16_t(Op::Return): CHECK_NEXT(emitReturn()); case uint16_t(Op::Unreachable): CHECK(iter_.readUnreachable()); if (!deadCode_) { trap(Trap::Unreachable); deadCode_ = true; } NEXT(); // Calls case uint16_t(Op::Call): CHECK_NEXT(emitCall()); case uint16_t(Op::CallIndirect): CHECK_NEXT(emitCallIndirect()); // Locals and globals case uint16_t(Op::GetLocal): CHECK_NEXT(emitGetLocal()); case uint16_t(Op::SetLocal): CHECK_NEXT(emitSetLocal()); case uint16_t(Op::TeeLocal): CHECK_NEXT(emitTeeLocal()); case uint16_t(Op::GetGlobal): CHECK_NEXT(emitGetGlobal()); case uint16_t(Op::SetGlobal): CHECK_NEXT(emitSetGlobal()); #ifdef ENABLE_WASM_REFTYPES case uint16_t(Op::TableGet): CHECK_NEXT(emitTableGet()); case uint16_t(Op::TableSet): CHECK_NEXT(emitTableSet()); #endif // Select case uint16_t(Op::SelectNumeric): CHECK_NEXT(emitSelect(/*typed*/ false)); case uint16_t(Op::SelectTyped): if (!moduleEnv_.refTypesEnabled()) { return iter_.unrecognizedOpcode(&op); } CHECK_NEXT(emitSelect(/*typed*/ true)); // I32 case uint16_t(Op::I32Const): { int32_t i32; CHECK(iter_.readI32Const(&i32)); if (!deadCode_) { pushI32(i32); } NEXT(); } case uint16_t(Op::I32Add): CHECK_NEXT(dispatchBinary(emitAddI32, ValType::I32)); case uint16_t(Op::I32Sub): CHECK_NEXT(dispatchBinary(emitSubtractI32, ValType::I32)); case uint16_t(Op::I32Mul): CHECK_NEXT(dispatchBinary(emitMultiplyI32, ValType::I32)); case uint16_t(Op::I32DivS): CHECK_NEXT(dispatchBinary(emitQuotientI32, ValType::I32)); case uint16_t(Op::I32DivU): CHECK_NEXT(dispatchBinary(emitQuotientU32, ValType::I32)); case uint16_t(Op::I32RemS): CHECK_NEXT(dispatchBinary(emitRemainderI32, ValType::I32)); case uint16_t(Op::I32RemU): CHECK_NEXT(dispatchBinary(emitRemainderU32, ValType::I32)); case uint16_t(Op::I32Eqz): CHECK_NEXT(dispatchConversion(emitEqzI32, ValType::I32, ValType::I32)); case uint16_t(Op::I32TruncSF32): CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI32<0>, ValType::F32, ValType::I32)); case uint16_t(Op::I32TruncUF32): CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI32, ValType::F32, ValType::I32)); case uint16_t(Op::I32TruncSF64): CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI32<0>, ValType::F64, ValType::I32)); case uint16_t(Op::I32TruncUF64): CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI32, ValType::F64, ValType::I32)); case uint16_t(Op::I32WrapI64): CHECK_NEXT( dispatchConversion(emitWrapI64ToI32, ValType::I64, ValType::I32)); case uint16_t(Op::I32ReinterpretF32): CHECK_NEXT(dispatchConversion(emitReinterpretF32AsI32, ValType::F32, ValType::I32)); case uint16_t(Op::I32Clz): CHECK_NEXT(dispatchUnary(emitClzI32, ValType::I32)); case uint16_t(Op::I32Ctz): CHECK_NEXT(dispatchUnary(emitCtzI32, ValType::I32)); case uint16_t(Op::I32Popcnt): CHECK_NEXT(dispatchUnary(emitPopcntI32, ValType::I32)); case uint16_t(Op::I32Or): CHECK_NEXT(dispatchBinary(emitOrI32, ValType::I32)); case uint16_t(Op::I32And): CHECK_NEXT(dispatchBinary(emitAndI32, ValType::I32)); case uint16_t(Op::I32Xor): CHECK_NEXT(dispatchBinary(emitXorI32, ValType::I32)); case uint16_t(Op::I32Shl): CHECK_NEXT(dispatchBinary(emitShlI32, ValType::I32)); case uint16_t(Op::I32ShrS): CHECK_NEXT(dispatchBinary(emitShrI32, ValType::I32)); case uint16_t(Op::I32ShrU): CHECK_NEXT(dispatchBinary(emitShrU32, ValType::I32)); case uint16_t(Op::I32Load8S): CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int8)); case uint16_t(Op::I32Load8U): CHECK_NEXT(emitLoad(ValType::I32, Scalar::Uint8)); case uint16_t(Op::I32Load16S): CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int16)); case uint16_t(Op::I32Load16U): CHECK_NEXT(emitLoad(ValType::I32, Scalar::Uint16)); case uint16_t(Op::I32Load): CHECK_NEXT(emitLoad(ValType::I32, Scalar::Int32)); case uint16_t(Op::I32Store8): CHECK_NEXT(emitStore(ValType::I32, Scalar::Int8)); case uint16_t(Op::I32Store16): CHECK_NEXT(emitStore(ValType::I32, Scalar::Int16)); case uint16_t(Op::I32Store): CHECK_NEXT(emitStore(ValType::I32, Scalar::Int32)); case uint16_t(Op::I32Rotr): CHECK_NEXT(dispatchBinary(emitRotrI32, ValType::I32)); case uint16_t(Op::I32Rotl): CHECK_NEXT(dispatchBinary(emitRotlI32, ValType::I32)); // I64 case uint16_t(Op::I64Const): { int64_t i64; CHECK(iter_.readI64Const(&i64)); if (!deadCode_) { pushI64(i64); } NEXT(); } case uint16_t(Op::I64Add): CHECK_NEXT(dispatchBinary(emitAddI64, ValType::I64)); case uint16_t(Op::I64Sub): CHECK_NEXT(dispatchBinary(emitSubtractI64, ValType::I64)); case uint16_t(Op::I64Mul): CHECK_NEXT(dispatchBinary(emitMultiplyI64, ValType::I64)); case uint16_t(Op::I64DivS): #ifdef RABALDR_INT_DIV_I64_CALLOUT CHECK_NEXT(dispatchIntDivCallout( emitDivOrModI64BuiltinCall, SymbolicAddress::DivI64, ValType::I64)); #else CHECK_NEXT(dispatchBinary(emitQuotientI64, ValType::I64)); #endif case uint16_t(Op::I64DivU): #ifdef RABALDR_INT_DIV_I64_CALLOUT CHECK_NEXT(dispatchIntDivCallout(emitDivOrModI64BuiltinCall, SymbolicAddress::UDivI64, ValType::I64)); #else CHECK_NEXT(dispatchBinary(emitQuotientU64, ValType::I64)); #endif case uint16_t(Op::I64RemS): #ifdef RABALDR_INT_DIV_I64_CALLOUT CHECK_NEXT(dispatchIntDivCallout( emitDivOrModI64BuiltinCall, SymbolicAddress::ModI64, ValType::I64)); #else CHECK_NEXT(dispatchBinary(emitRemainderI64, ValType::I64)); #endif case uint16_t(Op::I64RemU): #ifdef RABALDR_INT_DIV_I64_CALLOUT CHECK_NEXT(dispatchIntDivCallout(emitDivOrModI64BuiltinCall, SymbolicAddress::UModI64, ValType::I64)); #else CHECK_NEXT(dispatchBinary(emitRemainderU64, ValType::I64)); #endif case uint16_t(Op::I64TruncSF32): #ifdef RABALDR_FLOAT_TO_I64_CALLOUT CHECK_NEXT( dispatchCalloutConversionOOM(emitConvertFloatingToInt64Callout, SymbolicAddress::TruncateDoubleToInt64, ValType::F32, ValType::I64)); #else CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI64<0>, ValType::F32, ValType::I64)); #endif case uint16_t(Op::I64TruncUF32): #ifdef RABALDR_FLOAT_TO_I64_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertFloatingToInt64Callout, SymbolicAddress::TruncateDoubleToUint64, ValType::F32, ValType::I64)); #else CHECK_NEXT(dispatchConversionOOM(emitTruncateF32ToI64, ValType::F32, ValType::I64)); #endif case uint16_t(Op::I64TruncSF64): #ifdef RABALDR_FLOAT_TO_I64_CALLOUT CHECK_NEXT( dispatchCalloutConversionOOM(emitConvertFloatingToInt64Callout, SymbolicAddress::TruncateDoubleToInt64, ValType::F64, ValType::I64)); #else CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI64<0>, ValType::F64, ValType::I64)); #endif case uint16_t(Op::I64TruncUF64): #ifdef RABALDR_FLOAT_TO_I64_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertFloatingToInt64Callout, SymbolicAddress::TruncateDoubleToUint64, ValType::F64, ValType::I64)); #else CHECK_NEXT(dispatchConversionOOM(emitTruncateF64ToI64, ValType::F64, ValType::I64)); #endif case uint16_t(Op::I64ExtendSI32): CHECK_NEXT( dispatchConversion(emitExtendI32ToI64, ValType::I32, ValType::I64)); case uint16_t(Op::I64ExtendUI32): CHECK_NEXT( dispatchConversion(emitExtendU32ToI64, ValType::I32, ValType::I64)); case uint16_t(Op::I64ReinterpretF64): CHECK_NEXT(dispatchConversion(emitReinterpretF64AsI64, ValType::F64, ValType::I64)); case uint16_t(Op::I64Or): CHECK_NEXT(dispatchBinary(emitOrI64, ValType::I64)); case uint16_t(Op::I64And): CHECK_NEXT(dispatchBinary(emitAndI64, ValType::I64)); case uint16_t(Op::I64Xor): CHECK_NEXT(dispatchBinary(emitXorI64, ValType::I64)); case uint16_t(Op::I64Shl): CHECK_NEXT(dispatchBinary(emitShlI64, ValType::I64)); case uint16_t(Op::I64ShrS): CHECK_NEXT(dispatchBinary(emitShrI64, ValType::I64)); case uint16_t(Op::I64ShrU): CHECK_NEXT(dispatchBinary(emitShrU64, ValType::I64)); case uint16_t(Op::I64Rotr): CHECK_NEXT(dispatchBinary(emitRotrI64, ValType::I64)); case uint16_t(Op::I64Rotl): CHECK_NEXT(dispatchBinary(emitRotlI64, ValType::I64)); case uint16_t(Op::I64Clz): CHECK_NEXT(dispatchUnary(emitClzI64, ValType::I64)); case uint16_t(Op::I64Ctz): CHECK_NEXT(dispatchUnary(emitCtzI64, ValType::I64)); case uint16_t(Op::I64Popcnt): CHECK_NEXT(dispatchUnary(emitPopcntI64, ValType::I64)); case uint16_t(Op::I64Eqz): CHECK_NEXT(dispatchConversion(emitEqzI64, ValType::I64, ValType::I32)); case uint16_t(Op::I64Load8S): CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int8)); case uint16_t(Op::I64Load16S): CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int16)); case uint16_t(Op::I64Load32S): CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int32)); case uint16_t(Op::I64Load8U): CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint8)); case uint16_t(Op::I64Load16U): CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint16)); case uint16_t(Op::I64Load32U): CHECK_NEXT(emitLoad(ValType::I64, Scalar::Uint32)); case uint16_t(Op::I64Load): CHECK_NEXT(emitLoad(ValType::I64, Scalar::Int64)); case uint16_t(Op::I64Store8): CHECK_NEXT(emitStore(ValType::I64, Scalar::Int8)); case uint16_t(Op::I64Store16): CHECK_NEXT(emitStore(ValType::I64, Scalar::Int16)); case uint16_t(Op::I64Store32): CHECK_NEXT(emitStore(ValType::I64, Scalar::Int32)); case uint16_t(Op::I64Store): CHECK_NEXT(emitStore(ValType::I64, Scalar::Int64)); // F32 case uint16_t(Op::F32Const): { float f32; CHECK(iter_.readF32Const(&f32)); if (!deadCode_) { pushF32(f32); } NEXT(); } case uint16_t(Op::F32Add): CHECK_NEXT(dispatchBinary(emitAddF32, ValType::F32)); case uint16_t(Op::F32Sub): CHECK_NEXT(dispatchBinary(emitSubtractF32, ValType::F32)); case uint16_t(Op::F32Mul): CHECK_NEXT(dispatchBinary(emitMultiplyF32, ValType::F32)); case uint16_t(Op::F32Div): CHECK_NEXT(dispatchBinary(emitDivideF32, ValType::F32)); case uint16_t(Op::F32Min): CHECK_NEXT(dispatchBinary(emitMinF32, ValType::F32)); case uint16_t(Op::F32Max): CHECK_NEXT(dispatchBinary(emitMaxF32, ValType::F32)); case uint16_t(Op::F32Neg): CHECK_NEXT(dispatchUnary(emitNegateF32, ValType::F32)); case uint16_t(Op::F32Abs): CHECK_NEXT(dispatchUnary(emitAbsF32, ValType::F32)); case uint16_t(Op::F32Sqrt): CHECK_NEXT(dispatchUnary(emitSqrtF32, ValType::F32)); case uint16_t(Op::F32Ceil): CHECK_NEXT( emitUnaryMathBuiltinCall(SymbolicAddress::CeilF, ValType::F32)); case uint16_t(Op::F32Floor): CHECK_NEXT( emitUnaryMathBuiltinCall(SymbolicAddress::FloorF, ValType::F32)); case uint16_t(Op::F32DemoteF64): CHECK_NEXT(dispatchConversion(emitConvertF64ToF32, ValType::F64, ValType::F32)); case uint16_t(Op::F32ConvertSI32): CHECK_NEXT(dispatchConversion(emitConvertI32ToF32, ValType::I32, ValType::F32)); case uint16_t(Op::F32ConvertUI32): CHECK_NEXT(dispatchConversion(emitConvertU32ToF32, ValType::I32, ValType::F32)); case uint16_t(Op::F32ConvertSI64): #ifdef RABALDR_I64_TO_FLOAT_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertInt64ToFloatingCallout, SymbolicAddress::Int64ToFloat32, ValType::I64, ValType::F32)); #else CHECK_NEXT(dispatchConversion(emitConvertI64ToF32, ValType::I64, ValType::F32)); #endif case uint16_t(Op::F32ConvertUI64): #ifdef RABALDR_I64_TO_FLOAT_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertInt64ToFloatingCallout, SymbolicAddress::Uint64ToFloat32, ValType::I64, ValType::F32)); #else CHECK_NEXT(dispatchConversion(emitConvertU64ToF32, ValType::I64, ValType::F32)); #endif case uint16_t(Op::F32ReinterpretI32): CHECK_NEXT(dispatchConversion(emitReinterpretI32AsF32, ValType::I32, ValType::F32)); case uint16_t(Op::F32Load): CHECK_NEXT(emitLoad(ValType::F32, Scalar::Float32)); case uint16_t(Op::F32Store): CHECK_NEXT(emitStore(ValType::F32, Scalar::Float32)); case uint16_t(Op::F32CopySign): CHECK_NEXT(dispatchBinary(emitCopysignF32, ValType::F32)); case uint16_t(Op::F32Nearest): CHECK_NEXT(emitUnaryMathBuiltinCall(SymbolicAddress::NearbyIntF, ValType::F32)); case uint16_t(Op::F32Trunc): CHECK_NEXT( emitUnaryMathBuiltinCall(SymbolicAddress::TruncF, ValType::F32)); // F64 case uint16_t(Op::F64Const): { double f64; CHECK(iter_.readF64Const(&f64)); if (!deadCode_) { pushF64(f64); } NEXT(); } case uint16_t(Op::F64Add): CHECK_NEXT(dispatchBinary(emitAddF64, ValType::F64)); case uint16_t(Op::F64Sub): CHECK_NEXT(dispatchBinary(emitSubtractF64, ValType::F64)); case uint16_t(Op::F64Mul): CHECK_NEXT(dispatchBinary(emitMultiplyF64, ValType::F64)); case uint16_t(Op::F64Div): CHECK_NEXT(dispatchBinary(emitDivideF64, ValType::F64)); case uint16_t(Op::F64Min): CHECK_NEXT(dispatchBinary(emitMinF64, ValType::F64)); case uint16_t(Op::F64Max): CHECK_NEXT(dispatchBinary(emitMaxF64, ValType::F64)); case uint16_t(Op::F64Neg): CHECK_NEXT(dispatchUnary(emitNegateF64, ValType::F64)); case uint16_t(Op::F64Abs): CHECK_NEXT(dispatchUnary(emitAbsF64, ValType::F64)); case uint16_t(Op::F64Sqrt): CHECK_NEXT(dispatchUnary(emitSqrtF64, ValType::F64)); case uint16_t(Op::F64Ceil): CHECK_NEXT( emitUnaryMathBuiltinCall(SymbolicAddress::CeilD, ValType::F64)); case uint16_t(Op::F64Floor): CHECK_NEXT( emitUnaryMathBuiltinCall(SymbolicAddress::FloorD, ValType::F64)); case uint16_t(Op::F64PromoteF32): CHECK_NEXT(dispatchConversion(emitConvertF32ToF64, ValType::F32, ValType::F64)); case uint16_t(Op::F64ConvertSI32): CHECK_NEXT(dispatchConversion(emitConvertI32ToF64, ValType::I32, ValType::F64)); case uint16_t(Op::F64ConvertUI32): CHECK_NEXT(dispatchConversion(emitConvertU32ToF64, ValType::I32, ValType::F64)); case uint16_t(Op::F64ConvertSI64): #ifdef RABALDR_I64_TO_FLOAT_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertInt64ToFloatingCallout, SymbolicAddress::Int64ToDouble, ValType::I64, ValType::F64)); #else CHECK_NEXT(dispatchConversion(emitConvertI64ToF64, ValType::I64, ValType::F64)); #endif case uint16_t(Op::F64ConvertUI64): #ifdef RABALDR_I64_TO_FLOAT_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertInt64ToFloatingCallout, SymbolicAddress::Uint64ToDouble, ValType::I64, ValType::F64)); #else CHECK_NEXT(dispatchConversion(emitConvertU64ToF64, ValType::I64, ValType::F64)); #endif case uint16_t(Op::F64Load): CHECK_NEXT(emitLoad(ValType::F64, Scalar::Float64)); case uint16_t(Op::F64Store): CHECK_NEXT(emitStore(ValType::F64, Scalar::Float64)); case uint16_t(Op::F64ReinterpretI64): CHECK_NEXT(dispatchConversion(emitReinterpretI64AsF64, ValType::I64, ValType::F64)); case uint16_t(Op::F64CopySign): CHECK_NEXT(dispatchBinary(emitCopysignF64, ValType::F64)); case uint16_t(Op::F64Nearest): CHECK_NEXT(emitUnaryMathBuiltinCall(SymbolicAddress::NearbyIntD, ValType::F64)); case uint16_t(Op::F64Trunc): CHECK_NEXT( emitUnaryMathBuiltinCall(SymbolicAddress::TruncD, ValType::F64)); // Comparisons case uint16_t(Op::I32Eq): CHECK_NEXT( dispatchComparison(emitCompareI32, ValType::I32, Assembler::Equal)); case uint16_t(Op::I32Ne): CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32, Assembler::NotEqual)); case uint16_t(Op::I32LtS): CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32, Assembler::LessThan)); case uint16_t(Op::I32LeS): CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32, Assembler::LessThanOrEqual)); case uint16_t(Op::I32GtS): CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32, Assembler::GreaterThan)); case uint16_t(Op::I32GeS): CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32, Assembler::GreaterThanOrEqual)); case uint16_t(Op::I32LtU): CHECK_NEXT( dispatchComparison(emitCompareI32, ValType::I32, Assembler::Below)); case uint16_t(Op::I32LeU): CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32, Assembler::BelowOrEqual)); case uint16_t(Op::I32GtU): CHECK_NEXT( dispatchComparison(emitCompareI32, ValType::I32, Assembler::Above)); case uint16_t(Op::I32GeU): CHECK_NEXT(dispatchComparison(emitCompareI32, ValType::I32, Assembler::AboveOrEqual)); case uint16_t(Op::I64Eq): CHECK_NEXT( dispatchComparison(emitCompareI64, ValType::I64, Assembler::Equal)); case uint16_t(Op::I64Ne): CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64, Assembler::NotEqual)); case uint16_t(Op::I64LtS): CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64, Assembler::LessThan)); case uint16_t(Op::I64LeS): CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64, Assembler::LessThanOrEqual)); case uint16_t(Op::I64GtS): CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64, Assembler::GreaterThan)); case uint16_t(Op::I64GeS): CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64, Assembler::GreaterThanOrEqual)); case uint16_t(Op::I64LtU): CHECK_NEXT( dispatchComparison(emitCompareI64, ValType::I64, Assembler::Below)); case uint16_t(Op::I64LeU): CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64, Assembler::BelowOrEqual)); case uint16_t(Op::I64GtU): CHECK_NEXT( dispatchComparison(emitCompareI64, ValType::I64, Assembler::Above)); case uint16_t(Op::I64GeU): CHECK_NEXT(dispatchComparison(emitCompareI64, ValType::I64, Assembler::AboveOrEqual)); case uint16_t(Op::F32Eq): CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32, Assembler::DoubleEqual)); case uint16_t(Op::F32Ne): CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32, Assembler::DoubleNotEqualOrUnordered)); case uint16_t(Op::F32Lt): CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32, Assembler::DoubleLessThan)); case uint16_t(Op::F32Le): CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32, Assembler::DoubleLessThanOrEqual)); case uint16_t(Op::F32Gt): CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32, Assembler::DoubleGreaterThan)); case uint16_t(Op::F32Ge): CHECK_NEXT(dispatchComparison(emitCompareF32, ValType::F32, Assembler::DoubleGreaterThanOrEqual)); case uint16_t(Op::F64Eq): CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64, Assembler::DoubleEqual)); case uint16_t(Op::F64Ne): CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64, Assembler::DoubleNotEqualOrUnordered)); case uint16_t(Op::F64Lt): CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64, Assembler::DoubleLessThan)); case uint16_t(Op::F64Le): CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64, Assembler::DoubleLessThanOrEqual)); case uint16_t(Op::F64Gt): CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64, Assembler::DoubleGreaterThan)); case uint16_t(Op::F64Ge): CHECK_NEXT(dispatchComparison(emitCompareF64, ValType::F64, Assembler::DoubleGreaterThanOrEqual)); // Sign extensions case uint16_t(Op::I32Extend8S): CHECK_NEXT( dispatchConversion(emitExtendI32_8, ValType::I32, ValType::I32)); case uint16_t(Op::I32Extend16S): CHECK_NEXT( dispatchConversion(emitExtendI32_16, ValType::I32, ValType::I32)); case uint16_t(Op::I64Extend8S): CHECK_NEXT( dispatchConversion(emitExtendI64_8, ValType::I64, ValType::I64)); case uint16_t(Op::I64Extend16S): CHECK_NEXT( dispatchConversion(emitExtendI64_16, ValType::I64, ValType::I64)); case uint16_t(Op::I64Extend32S): CHECK_NEXT( dispatchConversion(emitExtendI64_32, ValType::I64, ValType::I64)); // Memory Related case uint16_t(Op::MemoryGrow): CHECK_NEXT(emitMemoryGrow()); case uint16_t(Op::MemorySize): CHECK_NEXT(emitMemorySize()); #ifdef ENABLE_WASM_FUNCTION_REFERENCES case uint16_t(Op::RefAsNonNull): if (!moduleEnv_.functionReferencesEnabled()) { return iter_.unrecognizedOpcode(&op); } CHECK_NEXT(emitRefAsNonNull()); case uint16_t(Op::BrOnNull): if (!moduleEnv_.functionReferencesEnabled()) { return iter_.unrecognizedOpcode(&op); } CHECK_NEXT(emitBrOnNull()); #endif #ifdef ENABLE_WASM_GC case uint16_t(Op::RefEq): if (!moduleEnv_.gcTypesEnabled()) { return iter_.unrecognizedOpcode(&op); } CHECK_NEXT(dispatchComparison(emitCompareRef, RefType::eq(), Assembler::Equal)); #endif #ifdef ENABLE_WASM_REFTYPES case uint16_t(Op::RefFunc): CHECK_NEXT(emitRefFunc()); break; case uint16_t(Op::RefNull): CHECK_NEXT(emitRefNull()); break; case uint16_t(Op::RefIsNull): CHECK_NEXT(emitRefIsNull()); break; #endif #ifdef ENABLE_WASM_GC // "GC" operations case uint16_t(Op::GcPrefix): { if (!moduleEnv_.gcTypesEnabled()) { return iter_.unrecognizedOpcode(&op); } switch (op.b1) { case uint32_t(GcOp::StructNew): CHECK_NEXT(emitStructNew()); case uint32_t(GcOp::StructGet): CHECK_NEXT(emitStructGet()); case uint32_t(GcOp::StructSet): CHECK_NEXT(emitStructSet()); case uint32_t(GcOp::StructNarrow): CHECK_NEXT(emitStructNarrow()); default: break; } // switch (op.b1) return iter_.unrecognizedOpcode(&op); } #endif #ifdef ENABLE_WASM_SIMD // SIMD operations case uint16_t(Op::SimdPrefix): { uint32_t laneIndex; if (!moduleEnv_.v128Enabled()) { return iter_.unrecognizedOpcode(&op); } switch (op.b1) { case uint32_t(SimdOp::I8x16ExtractLaneS): CHECK_NEXT(dispatchExtractLane(ExtractLaneI8x16, ValType::I32, 16)); case uint32_t(SimdOp::I8x16ExtractLaneU): CHECK_NEXT( dispatchExtractLane(ExtractLaneUI8x16, ValType::I32, 16)); case uint32_t(SimdOp::I16x8ExtractLaneS): CHECK_NEXT(dispatchExtractLane(ExtractLaneI16x8, ValType::I32, 8)); case uint32_t(SimdOp::I16x8ExtractLaneU): CHECK_NEXT(dispatchExtractLane(ExtractLaneUI16x8, ValType::I32, 8)); case uint32_t(SimdOp::I32x4ExtractLane): CHECK_NEXT(dispatchExtractLane(ExtractLaneI32x4, ValType::I32, 4)); case uint32_t(SimdOp::I64x2ExtractLane): CHECK_NEXT(dispatchExtractLane(ExtractLaneI64x2, ValType::I64, 2)); case uint32_t(SimdOp::F32x4ExtractLane): CHECK_NEXT(dispatchExtractLane(ExtractLaneF32x4, ValType::F32, 4)); case uint32_t(SimdOp::F64x2ExtractLane): CHECK_NEXT(dispatchExtractLane(ExtractLaneF64x2, ValType::F64, 2)); case uint32_t(SimdOp::I8x16Splat): CHECK_NEXT(dispatchSplat(SplatI8x16, ValType::I32)); case uint32_t(SimdOp::I16x8Splat): CHECK_NEXT(dispatchSplat(SplatI16x8, ValType::I32)); case uint32_t(SimdOp::I32x4Splat): CHECK_NEXT(dispatchSplat(SplatI32x4, ValType::I32)); case uint32_t(SimdOp::I64x2Splat): CHECK_NEXT(dispatchSplat(SplatI64x2, ValType::I64)); case uint32_t(SimdOp::F32x4Splat): CHECK_NEXT(dispatchSplat(SplatF32x4, ValType::F32)); case uint32_t(SimdOp::F64x2Splat): CHECK_NEXT(dispatchSplat(SplatF64x2, ValType::F64)); case uint32_t(SimdOp::I8x16AnyTrue): case uint32_t(SimdOp::I16x8AnyTrue): case uint32_t(SimdOp::I32x4AnyTrue): CHECK_NEXT(dispatchVectorReduction(AnyTrue)); case uint32_t(SimdOp::I8x16AllTrue): CHECK_NEXT(dispatchVectorReduction(AllTrueI8x16)); case uint32_t(SimdOp::I16x8AllTrue): CHECK_NEXT(dispatchVectorReduction(AllTrueI16x8)); case uint32_t(SimdOp::I32x4AllTrue): CHECK_NEXT(dispatchVectorReduction(AllTrueI32x4)); case uint32_t(SimdOp::I8x16Bitmask): CHECK_NEXT(dispatchVectorReduction(BitmaskI8x16)); case uint32_t(SimdOp::I16x8Bitmask): CHECK_NEXT(dispatchVectorReduction(BitmaskI16x8)); case uint32_t(SimdOp::I32x4Bitmask): CHECK_NEXT(dispatchVectorReduction(BitmaskI32x4)); case uint32_t(SimdOp::I8x16ReplaceLane): CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI8x16, ValType::I32, 16)); case uint32_t(SimdOp::I16x8ReplaceLane): CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI16x8, ValType::I32, 8)); case uint32_t(SimdOp::I32x4ReplaceLane): CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI32x4, ValType::I32, 4)); case uint32_t(SimdOp::I64x2ReplaceLane): CHECK_NEXT(dispatchReplaceLane(ReplaceLaneI64x2, ValType::I64, 2)); case uint32_t(SimdOp::F32x4ReplaceLane): CHECK_NEXT(dispatchReplaceLane(ReplaceLaneF32x4, ValType::F32, 4)); case uint32_t(SimdOp::F64x2ReplaceLane): CHECK_NEXT(dispatchReplaceLane(ReplaceLaneF64x2, ValType::F64, 2)); case uint32_t(SimdOp::I8x16Eq): CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::Equal)); case uint32_t(SimdOp::I8x16Ne): CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::NotEqual)); case uint32_t(SimdOp::I8x16LtS): CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::LessThan)); case uint32_t(SimdOp::I8x16LtU): CHECK_NEXT(dispatchVectorComparison(CmpUI8x16, Assembler::Below)); case uint32_t(SimdOp::I8x16GtS): CHECK_NEXT( dispatchVectorComparison(CmpI8x16, Assembler::GreaterThan)); case uint32_t(SimdOp::I8x16GtU): CHECK_NEXT(dispatchVectorComparison(CmpUI8x16, Assembler::Above)); case uint32_t(SimdOp::I8x16LeS): CHECK_NEXT( dispatchVectorComparison(CmpI8x16, Assembler::LessThanOrEqual)); case uint32_t(SimdOp::I8x16LeU): CHECK_NEXT( dispatchVectorComparison(CmpUI8x16, Assembler::BelowOrEqual)); case uint32_t(SimdOp::I8x16GeS): CHECK_NEXT(dispatchVectorComparison(CmpI8x16, Assembler::GreaterThanOrEqual)); case uint32_t(SimdOp::I8x16GeU): CHECK_NEXT( dispatchVectorComparison(CmpUI8x16, Assembler::AboveOrEqual)); case uint32_t(SimdOp::I16x8Eq): CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::Equal)); case uint32_t(SimdOp::I16x8Ne): CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::NotEqual)); case uint32_t(SimdOp::I16x8LtS): CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::LessThan)); case uint32_t(SimdOp::I16x8LtU): CHECK_NEXT(dispatchVectorComparison(CmpUI16x8, Assembler::Below)); case uint32_t(SimdOp::I16x8GtS): CHECK_NEXT( dispatchVectorComparison(CmpI16x8, Assembler::GreaterThan)); case uint32_t(SimdOp::I16x8GtU): CHECK_NEXT(dispatchVectorComparison(CmpUI16x8, Assembler::Above)); case uint32_t(SimdOp::I16x8LeS): CHECK_NEXT( dispatchVectorComparison(CmpI16x8, Assembler::LessThanOrEqual)); case uint32_t(SimdOp::I16x8LeU): CHECK_NEXT( dispatchVectorComparison(CmpUI16x8, Assembler::BelowOrEqual)); case uint32_t(SimdOp::I16x8GeS): CHECK_NEXT(dispatchVectorComparison(CmpI16x8, Assembler::GreaterThanOrEqual)); case uint32_t(SimdOp::I16x8GeU): CHECK_NEXT( dispatchVectorComparison(CmpUI16x8, Assembler::AboveOrEqual)); case uint32_t(SimdOp::I32x4Eq): CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::Equal)); case uint32_t(SimdOp::I32x4Ne): CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::NotEqual)); case uint32_t(SimdOp::I32x4LtS): CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::LessThan)); case uint32_t(SimdOp::I32x4LtU): CHECK_NEXT(dispatchVectorComparison(CmpUI32x4, Assembler::Below)); case uint32_t(SimdOp::I32x4GtS): CHECK_NEXT( dispatchVectorComparison(CmpI32x4, Assembler::GreaterThan)); case uint32_t(SimdOp::I32x4GtU): CHECK_NEXT(dispatchVectorComparison(CmpUI32x4, Assembler::Above)); case uint32_t(SimdOp::I32x4LeS): CHECK_NEXT( dispatchVectorComparison(CmpI32x4, Assembler::LessThanOrEqual)); case uint32_t(SimdOp::I32x4LeU): CHECK_NEXT( dispatchVectorComparison(CmpUI32x4, Assembler::BelowOrEqual)); case uint32_t(SimdOp::I32x4GeS): CHECK_NEXT(dispatchVectorComparison(CmpI32x4, Assembler::GreaterThanOrEqual)); case uint32_t(SimdOp::I32x4GeU): CHECK_NEXT( dispatchVectorComparison(CmpUI32x4, Assembler::AboveOrEqual)); case uint32_t(SimdOp::F32x4Eq): CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::Equal)); case uint32_t(SimdOp::F32x4Ne): CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::NotEqual)); case uint32_t(SimdOp::F32x4Lt): CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::LessThan)); case uint32_t(SimdOp::F32x4Gt): CHECK_NEXT( dispatchVectorComparison(CmpF32x4, Assembler::GreaterThan)); case uint32_t(SimdOp::F32x4Le): CHECK_NEXT( dispatchVectorComparison(CmpF32x4, Assembler::LessThanOrEqual)); case uint32_t(SimdOp::F32x4Ge): CHECK_NEXT(dispatchVectorComparison(CmpF32x4, Assembler::GreaterThanOrEqual)); case uint32_t(SimdOp::F64x2Eq): CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::Equal)); case uint32_t(SimdOp::F64x2Ne): CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::NotEqual)); case uint32_t(SimdOp::F64x2Lt): CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::LessThan)); case uint32_t(SimdOp::F64x2Gt): CHECK_NEXT( dispatchVectorComparison(CmpF64x2, Assembler::GreaterThan)); case uint32_t(SimdOp::F64x2Le): CHECK_NEXT( dispatchVectorComparison(CmpF64x2, Assembler::LessThanOrEqual)); case uint32_t(SimdOp::F64x2Ge): CHECK_NEXT(dispatchVectorComparison(CmpF64x2, Assembler::GreaterThanOrEqual)); case uint32_t(SimdOp::V128And): CHECK_NEXT(dispatchVectorBinary(AndV128)); case uint32_t(SimdOp::V128Or): CHECK_NEXT(dispatchVectorBinary(OrV128)); case uint32_t(SimdOp::V128Xor): CHECK_NEXT(dispatchVectorBinary(XorV128)); case uint32_t(SimdOp::V128AndNot): CHECK_NEXT(dispatchBinary(emitVectorAndNot, ValType::V128)); case uint32_t(SimdOp::I8x16AvgrU): CHECK_NEXT(dispatchVectorBinary(AverageUI8x16)); case uint32_t(SimdOp::I16x8AvgrU): CHECK_NEXT(dispatchVectorBinary(AverageUI16x8)); case uint32_t(SimdOp::I8x16Add): CHECK_NEXT(dispatchVectorBinary(AddI8x16)); case uint32_t(SimdOp::I8x16AddSaturateS): CHECK_NEXT(dispatchVectorBinary(AddSatI8x16)); case uint32_t(SimdOp::I8x16AddSaturateU): CHECK_NEXT(dispatchVectorBinary(AddSatUI8x16)); case uint32_t(SimdOp::I8x16Sub): CHECK_NEXT(dispatchVectorBinary(SubI8x16)); case uint32_t(SimdOp::I8x16SubSaturateS): CHECK_NEXT(dispatchVectorBinary(SubSatI8x16)); case uint32_t(SimdOp::I8x16SubSaturateU): CHECK_NEXT(dispatchVectorBinary(SubSatUI8x16)); case uint32_t(SimdOp::I8x16MinS): CHECK_NEXT(dispatchVectorBinary(MinI8x16)); case uint32_t(SimdOp::I8x16MinU): CHECK_NEXT(dispatchVectorBinary(MinUI8x16)); case uint32_t(SimdOp::I8x16MaxS): CHECK_NEXT(dispatchVectorBinary(MaxI8x16)); case uint32_t(SimdOp::I8x16MaxU): CHECK_NEXT(dispatchVectorBinary(MaxUI8x16)); case uint32_t(SimdOp::I16x8Add): CHECK_NEXT(dispatchVectorBinary(AddI16x8)); case uint32_t(SimdOp::I16x8AddSaturateS): CHECK_NEXT(dispatchVectorBinary(AddSatI16x8)); case uint32_t(SimdOp::I16x8AddSaturateU): CHECK_NEXT(dispatchVectorBinary(AddSatUI16x8)); case uint32_t(SimdOp::I16x8Sub): CHECK_NEXT(dispatchVectorBinary(SubI16x8)); case uint32_t(SimdOp::I16x8SubSaturateS): CHECK_NEXT(dispatchVectorBinary(SubSatI16x8)); case uint32_t(SimdOp::I16x8SubSaturateU): CHECK_NEXT(dispatchVectorBinary(SubSatUI16x8)); case uint32_t(SimdOp::I16x8Mul): CHECK_NEXT(dispatchVectorBinary(MulI16x8)); case uint32_t(SimdOp::I16x8MinS): CHECK_NEXT(dispatchVectorBinary(MinI16x8)); case uint32_t(SimdOp::I16x8MinU): CHECK_NEXT(dispatchVectorBinary(MinUI16x8)); case uint32_t(SimdOp::I16x8MaxS): CHECK_NEXT(dispatchVectorBinary(MaxI16x8)); case uint32_t(SimdOp::I16x8MaxU): CHECK_NEXT(dispatchVectorBinary(MaxUI16x8)); case uint32_t(SimdOp::I32x4Add): CHECK_NEXT(dispatchVectorBinary(AddI32x4)); case uint32_t(SimdOp::I32x4Sub): CHECK_NEXT(dispatchVectorBinary(SubI32x4)); case uint32_t(SimdOp::I32x4Mul): CHECK_NEXT(dispatchVectorBinary(MulI32x4)); case uint32_t(SimdOp::I32x4MinS): CHECK_NEXT(dispatchVectorBinary(MinI32x4)); case uint32_t(SimdOp::I32x4MinU): CHECK_NEXT(dispatchVectorBinary(MinUI32x4)); case uint32_t(SimdOp::I32x4MaxS): CHECK_NEXT(dispatchVectorBinary(MaxI32x4)); case uint32_t(SimdOp::I32x4MaxU): CHECK_NEXT(dispatchVectorBinary(MaxUI32x4)); case uint32_t(SimdOp::I64x2Add): CHECK_NEXT(dispatchVectorBinary(AddI64x2)); case uint32_t(SimdOp::I64x2Sub): CHECK_NEXT(dispatchVectorBinary(SubI64x2)); case uint32_t(SimdOp::I64x2Mul): CHECK_NEXT(emitVectorMulI64x2()); case uint32_t(SimdOp::F32x4Add): CHECK_NEXT(dispatchVectorBinary(AddF32x4)); case uint32_t(SimdOp::F32x4Sub): CHECK_NEXT(dispatchVectorBinary(SubF32x4)); case uint32_t(SimdOp::F32x4Mul): CHECK_NEXT(dispatchVectorBinary(MulF32x4)); case uint32_t(SimdOp::F32x4Div): CHECK_NEXT(dispatchVectorBinary(DivF32x4)); case uint32_t(SimdOp::F32x4Min): CHECK_NEXT(dispatchVectorBinary(MinF32x4)); case uint32_t(SimdOp::F32x4Max): CHECK_NEXT(dispatchVectorBinary(MaxF32x4)); case uint32_t(SimdOp::F64x2Add): CHECK_NEXT(dispatchVectorBinary(AddF64x2)); case uint32_t(SimdOp::F64x2Sub): CHECK_NEXT(dispatchVectorBinary(SubF64x2)); case uint32_t(SimdOp::F64x2Mul): CHECK_NEXT(dispatchVectorBinary(MulF64x2)); case uint32_t(SimdOp::F64x2Div): CHECK_NEXT(dispatchVectorBinary(DivF64x2)); case uint32_t(SimdOp::F64x2Min): CHECK_NEXT(dispatchVectorBinary(MinF64x2)); case uint32_t(SimdOp::F64x2Max): CHECK_NEXT(dispatchVectorBinary(MaxF64x2)); case uint32_t(SimdOp::I8x16NarrowSI16x8): CHECK_NEXT(dispatchVectorBinary(NarrowI16x8)); case uint32_t(SimdOp::I8x16NarrowUI16x8): CHECK_NEXT(dispatchVectorBinary(NarrowUI16x8)); case uint32_t(SimdOp::I16x8NarrowSI32x4): CHECK_NEXT(dispatchVectorBinary(NarrowI32x4)); case uint32_t(SimdOp::I16x8NarrowUI32x4): CHECK_NEXT(dispatchVectorBinary(NarrowUI32x4)); case uint32_t(SimdOp::V8x16Swizzle): CHECK_NEXT(dispatchVectorBinary(Swizzle)); case uint32_t(SimdOp::F32x4PMax): CHECK_NEXT(dispatchVectorBinary(PMaxF32x4)); case uint32_t(SimdOp::F32x4PMin): CHECK_NEXT(dispatchVectorBinary(PMinF32x4)); case uint32_t(SimdOp::F64x2PMax): CHECK_NEXT(dispatchVectorBinary(PMaxF64x2)); case uint32_t(SimdOp::F64x2PMin): CHECK_NEXT(dispatchVectorBinary(PMinF64x2)); case uint32_t(SimdOp::I32x4DotSI16x8): CHECK_NEXT(dispatchVectorBinary(DotI16x8)); case uint32_t(SimdOp::I8x16Neg): CHECK_NEXT(dispatchVectorUnary(NegI8x16)); case uint32_t(SimdOp::I16x8Neg): CHECK_NEXT(dispatchVectorUnary(NegI16x8)); case uint32_t(SimdOp::I16x8WidenLowSI8x16): CHECK_NEXT(dispatchVectorUnary(WidenLowI8x16)); case uint32_t(SimdOp::I16x8WidenHighSI8x16): CHECK_NEXT(dispatchVectorUnary(WidenHighI8x16)); case uint32_t(SimdOp::I16x8WidenLowUI8x16): CHECK_NEXT(dispatchVectorUnary(WidenLowUI8x16)); case uint32_t(SimdOp::I16x8WidenHighUI8x16): CHECK_NEXT(dispatchVectorUnary(WidenHighUI8x16)); case uint32_t(SimdOp::I32x4Neg): CHECK_NEXT(dispatchVectorUnary(NegI32x4)); case uint32_t(SimdOp::I32x4WidenLowSI16x8): CHECK_NEXT(dispatchVectorUnary(WidenLowI16x8)); case uint32_t(SimdOp::I32x4WidenHighSI16x8): CHECK_NEXT(dispatchVectorUnary(WidenHighI16x8)); case uint32_t(SimdOp::I32x4WidenLowUI16x8): CHECK_NEXT(dispatchVectorUnary(WidenLowUI16x8)); case uint32_t(SimdOp::I32x4WidenHighUI16x8): CHECK_NEXT(dispatchVectorUnary(WidenHighUI16x8)); case uint32_t(SimdOp::I32x4TruncSSatF32x4): CHECK_NEXT(dispatchVectorUnary(ConvertF32x4ToI32x4)); case uint32_t(SimdOp::I32x4TruncUSatF32x4): CHECK_NEXT(dispatchVectorUnary(ConvertF32x4ToUI32x4)); case uint32_t(SimdOp::I64x2Neg): CHECK_NEXT(dispatchVectorUnary(NegI64x2)); case uint32_t(SimdOp::F32x4Abs): CHECK_NEXT(dispatchVectorUnary(AbsF32x4)); case uint32_t(SimdOp::F32x4Neg): CHECK_NEXT(dispatchVectorUnary(NegF32x4)); case uint32_t(SimdOp::F32x4Sqrt): CHECK_NEXT(dispatchVectorUnary(SqrtF32x4)); case uint32_t(SimdOp::F32x4ConvertSI32x4): CHECK_NEXT(dispatchVectorUnary(ConvertI32x4ToF32x4)); case uint32_t(SimdOp::F32x4ConvertUI32x4): CHECK_NEXT(dispatchVectorUnary(ConvertUI32x4ToF32x4)); case uint32_t(SimdOp::F64x2Abs): CHECK_NEXT(dispatchVectorUnary(AbsF64x2)); case uint32_t(SimdOp::F64x2Neg): CHECK_NEXT(dispatchVectorUnary(NegF64x2)); case uint32_t(SimdOp::F64x2Sqrt): CHECK_NEXT(dispatchVectorUnary(SqrtF64x2)); case uint32_t(SimdOp::V128Not): CHECK_NEXT(dispatchVectorUnary(NotV128)); case uint32_t(SimdOp::I8x16Abs): CHECK_NEXT(dispatchVectorUnary(AbsI8x16)); case uint32_t(SimdOp::I16x8Abs): CHECK_NEXT(dispatchVectorUnary(AbsI16x8)); case uint32_t(SimdOp::I32x4Abs): CHECK_NEXT(dispatchVectorUnary(AbsI32x4)); case uint32_t(SimdOp::F32x4Ceil): CHECK_NEXT(dispatchVectorUnary(CeilF32x4)); case uint32_t(SimdOp::F32x4Floor): CHECK_NEXT(dispatchVectorUnary(FloorF32x4)); case uint32_t(SimdOp::F32x4Trunc): CHECK_NEXT(dispatchVectorUnary(TruncF32x4)); case uint32_t(SimdOp::F32x4Nearest): CHECK_NEXT(dispatchVectorUnary(NearestF32x4)); case uint32_t(SimdOp::F64x2Ceil): CHECK_NEXT(dispatchVectorUnary(CeilF64x2)); case uint32_t(SimdOp::F64x2Floor): CHECK_NEXT(dispatchVectorUnary(FloorF64x2)); case uint32_t(SimdOp::F64x2Trunc): CHECK_NEXT(dispatchVectorUnary(TruncF64x2)); case uint32_t(SimdOp::F64x2Nearest): CHECK_NEXT(dispatchVectorUnary(NearestF64x2)); case uint32_t(SimdOp::I8x16Shl): CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI8x16)); case uint32_t(SimdOp::I8x16ShrS): CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI8x16)); case uint32_t(SimdOp::I8x16ShrU): CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI8x16)); case uint32_t(SimdOp::I16x8Shl): CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI16x8)); case uint32_t(SimdOp::I16x8ShrS): CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI16x8)); case uint32_t(SimdOp::I16x8ShrU): CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI16x8)); case uint32_t(SimdOp::I32x4Shl): CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI32x4)); case uint32_t(SimdOp::I32x4ShrS): CHECK_NEXT(dispatchVectorVariableShift(ShiftRightI32x4)); case uint32_t(SimdOp::I32x4ShrU): CHECK_NEXT(dispatchVectorVariableShift(ShiftRightUI32x4)); case uint32_t(SimdOp::I64x2Shl): CHECK_NEXT(dispatchVectorVariableShift(ShiftLeftI64x2)); case uint32_t(SimdOp::I64x2ShrS): CHECK_NEXT(emitVectorShiftRightI64x2(/* isUnsigned */ false)); case uint32_t(SimdOp::I64x2ShrU): CHECK_NEXT(emitVectorShiftRightI64x2(/* isUnsigned */ true)); case uint32_t(SimdOp::V128Bitselect): CHECK_NEXT(emitBitselect()); case uint32_t(SimdOp::V8x16Shuffle): CHECK_NEXT(emitVectorShuffle()); case uint32_t(SimdOp::V128Const): { V128 v128; CHECK(iter_.readV128Const(&v128)); if (!deadCode_) { pushV128(v128); } NEXT(); } case uint32_t(SimdOp::V128Load): CHECK_NEXT(emitLoad(ValType::V128, Scalar::Simd128)); case uint32_t(SimdOp::V8x16LoadSplat): CHECK_NEXT(emitLoadSplat(Scalar::Uint8)); case uint32_t(SimdOp::V16x8LoadSplat): CHECK_NEXT(emitLoadSplat(Scalar::Uint16)); case uint32_t(SimdOp::V32x4LoadSplat): CHECK_NEXT(emitLoadSplat(Scalar::Uint32)); case uint32_t(SimdOp::V64x2LoadSplat): CHECK_NEXT(emitLoadSplat(Scalar::Int64)); case uint32_t(SimdOp::I16x8LoadS8x8): CHECK_NEXT(emitLoadExtend(Scalar::Int8)); case uint32_t(SimdOp::I16x8LoadU8x8): CHECK_NEXT(emitLoadExtend(Scalar::Uint8)); case uint32_t(SimdOp::I32x4LoadS16x4): CHECK_NEXT(emitLoadExtend(Scalar::Int16)); case uint32_t(SimdOp::I32x4LoadU16x4): CHECK_NEXT(emitLoadExtend(Scalar::Uint16)); case uint32_t(SimdOp::I64x2LoadS32x2): CHECK_NEXT(emitLoadExtend(Scalar::Int32)); case uint32_t(SimdOp::I64x2LoadU32x2): CHECK_NEXT(emitLoadExtend(Scalar::Uint32)); case uint32_t(SimdOp::V128Load32Zero): CHECK_NEXT(emitLoadZero(Scalar::Float32)); case uint32_t(SimdOp::V128Load64Zero): CHECK_NEXT(emitLoadZero(Scalar::Float64)); case uint32_t(SimdOp::V128Store): CHECK_NEXT(emitStore(ValType::V128, Scalar::Simd128)); default: break; } // switch (op.b1) return iter_.unrecognizedOpcode(&op); } #endif // ENABLE_WASM_SIMD // "Miscellaneous" operations case uint16_t(Op::MiscPrefix): { switch (op.b1) { case uint32_t(MiscOp::I32TruncSSatF32): CHECK_NEXT( dispatchConversionOOM(emitTruncateF32ToI32, ValType::F32, ValType::I32)); case uint32_t(MiscOp::I32TruncUSatF32): CHECK_NEXT(dispatchConversionOOM( emitTruncateF32ToI32, ValType::F32, ValType::I32)); case uint32_t(MiscOp::I32TruncSSatF64): CHECK_NEXT( dispatchConversionOOM(emitTruncateF64ToI32, ValType::F64, ValType::I32)); case uint32_t(MiscOp::I32TruncUSatF64): CHECK_NEXT(dispatchConversionOOM( emitTruncateF64ToI32, ValType::F64, ValType::I32)); case uint32_t(MiscOp::I64TruncSSatF32): #ifdef RABALDR_FLOAT_TO_I64_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertFloatingToInt64Callout, SymbolicAddress::SaturatingTruncateDoubleToInt64, ValType::F32, ValType::I64)); #else CHECK_NEXT( dispatchConversionOOM(emitTruncateF32ToI64, ValType::F32, ValType::I64)); #endif case uint32_t(MiscOp::I64TruncUSatF32): #ifdef RABALDR_FLOAT_TO_I64_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertFloatingToInt64Callout, SymbolicAddress::SaturatingTruncateDoubleToUint64, ValType::F32, ValType::I64)); #else CHECK_NEXT(dispatchConversionOOM( emitTruncateF32ToI64, ValType::F32, ValType::I64)); #endif case uint32_t(MiscOp::I64TruncSSatF64): #ifdef RABALDR_FLOAT_TO_I64_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertFloatingToInt64Callout, SymbolicAddress::SaturatingTruncateDoubleToInt64, ValType::F64, ValType::I64)); #else CHECK_NEXT( dispatchConversionOOM(emitTruncateF64ToI64, ValType::F64, ValType::I64)); #endif case uint32_t(MiscOp::I64TruncUSatF64): #ifdef RABALDR_FLOAT_TO_I64_CALLOUT CHECK_NEXT(dispatchCalloutConversionOOM( emitConvertFloatingToInt64Callout, SymbolicAddress::SaturatingTruncateDoubleToUint64, ValType::F64, ValType::I64)); #else CHECK_NEXT(dispatchConversionOOM( emitTruncateF64ToI64, ValType::F64, ValType::I64)); #endif case uint32_t(MiscOp::MemCopy): CHECK_NEXT(emitMemCopy()); case uint32_t(MiscOp::DataDrop): CHECK_NEXT(emitDataOrElemDrop(/*isData=*/true)); case uint32_t(MiscOp::MemFill): CHECK_NEXT(emitMemFill()); case uint32_t(MiscOp::MemInit): CHECK_NEXT(emitMemOrTableInit(/*isMem=*/true)); case uint32_t(MiscOp::TableCopy): CHECK_NEXT(emitTableCopy()); case uint32_t(MiscOp::ElemDrop): CHECK_NEXT(emitDataOrElemDrop(/*isData=*/false)); case uint32_t(MiscOp::TableInit): CHECK_NEXT(emitMemOrTableInit(/*isMem=*/false)); #ifdef ENABLE_WASM_REFTYPES case uint32_t(MiscOp::TableFill): CHECK_NEXT(emitTableFill()); case uint32_t(MiscOp::TableGrow): CHECK_NEXT(emitTableGrow()); case uint32_t(MiscOp::TableSize): CHECK_NEXT(emitTableSize()); #endif default: break; } // switch (op.b1) return iter_.unrecognizedOpcode(&op); } // Thread operations case uint16_t(Op::ThreadPrefix): { if (moduleEnv_.sharedMemoryEnabled() == Shareable::False) { return iter_.unrecognizedOpcode(&op); } switch (op.b1) { case uint32_t(ThreadOp::Wake): CHECK_NEXT(emitWake()); case uint32_t(ThreadOp::I32Wait): CHECK_NEXT(emitWait(ValType::I32, 4)); case uint32_t(ThreadOp::I64Wait): CHECK_NEXT(emitWait(ValType::I64, 8)); case uint32_t(ThreadOp::Fence): CHECK_NEXT(emitFence()); case uint32_t(ThreadOp::I32AtomicLoad): CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Int32)); case uint32_t(ThreadOp::I64AtomicLoad): CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Int64)); case uint32_t(ThreadOp::I32AtomicLoad8U): CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Uint8)); case uint32_t(ThreadOp::I32AtomicLoad16U): CHECK_NEXT(emitAtomicLoad(ValType::I32, Scalar::Uint16)); case uint32_t(ThreadOp::I64AtomicLoad8U): CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint8)); case uint32_t(ThreadOp::I64AtomicLoad16U): CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint16)); case uint32_t(ThreadOp::I64AtomicLoad32U): CHECK_NEXT(emitAtomicLoad(ValType::I64, Scalar::Uint32)); case uint32_t(ThreadOp::I32AtomicStore): CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Int32)); case uint32_t(ThreadOp::I64AtomicStore): CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Int64)); case uint32_t(ThreadOp::I32AtomicStore8U): CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Uint8)); case uint32_t(ThreadOp::I32AtomicStore16U): CHECK_NEXT(emitAtomicStore(ValType::I32, Scalar::Uint16)); case uint32_t(ThreadOp::I64AtomicStore8U): CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint8)); case uint32_t(ThreadOp::I64AtomicStore16U): CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint16)); case uint32_t(ThreadOp::I64AtomicStore32U): CHECK_NEXT(emitAtomicStore(ValType::I64, Scalar::Uint32)); case uint32_t(ThreadOp::I32AtomicAdd): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchAddOp)); case uint32_t(ThreadOp::I64AtomicAdd): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchAddOp)); case uint32_t(ThreadOp::I32AtomicAdd8U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchAddOp)); case uint32_t(ThreadOp::I32AtomicAdd16U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchAddOp)); case uint32_t(ThreadOp::I64AtomicAdd8U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchAddOp)); case uint32_t(ThreadOp::I64AtomicAdd16U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchAddOp)); case uint32_t(ThreadOp::I64AtomicAdd32U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchAddOp)); case uint32_t(ThreadOp::I32AtomicSub): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchSubOp)); case uint32_t(ThreadOp::I64AtomicSub): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchSubOp)); case uint32_t(ThreadOp::I32AtomicSub8U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchSubOp)); case uint32_t(ThreadOp::I32AtomicSub16U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchSubOp)); case uint32_t(ThreadOp::I64AtomicSub8U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchSubOp)); case uint32_t(ThreadOp::I64AtomicSub16U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchSubOp)); case uint32_t(ThreadOp::I64AtomicSub32U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchSubOp)); case uint32_t(ThreadOp::I32AtomicAnd): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchAndOp)); case uint32_t(ThreadOp::I64AtomicAnd): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchAndOp)); case uint32_t(ThreadOp::I32AtomicAnd8U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchAndOp)); case uint32_t(ThreadOp::I32AtomicAnd16U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchAndOp)); case uint32_t(ThreadOp::I64AtomicAnd8U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchAndOp)); case uint32_t(ThreadOp::I64AtomicAnd16U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchAndOp)); case uint32_t(ThreadOp::I64AtomicAnd32U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchAndOp)); case uint32_t(ThreadOp::I32AtomicOr): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchOrOp)); case uint32_t(ThreadOp::I64AtomicOr): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchOrOp)); case uint32_t(ThreadOp::I32AtomicOr8U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchOrOp)); case uint32_t(ThreadOp::I32AtomicOr16U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchOrOp)); case uint32_t(ThreadOp::I64AtomicOr8U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchOrOp)); case uint32_t(ThreadOp::I64AtomicOr16U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchOrOp)); case uint32_t(ThreadOp::I64AtomicOr32U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchOrOp)); case uint32_t(ThreadOp::I32AtomicXor): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Int32, AtomicFetchXorOp)); case uint32_t(ThreadOp::I64AtomicXor): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Int64, AtomicFetchXorOp)); case uint32_t(ThreadOp::I32AtomicXor8U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint8, AtomicFetchXorOp)); case uint32_t(ThreadOp::I32AtomicXor16U): CHECK_NEXT( emitAtomicRMW(ValType::I32, Scalar::Uint16, AtomicFetchXorOp)); case uint32_t(ThreadOp::I64AtomicXor8U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint8, AtomicFetchXorOp)); case uint32_t(ThreadOp::I64AtomicXor16U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint16, AtomicFetchXorOp)); case uint32_t(ThreadOp::I64AtomicXor32U): CHECK_NEXT( emitAtomicRMW(ValType::I64, Scalar::Uint32, AtomicFetchXorOp)); case uint32_t(ThreadOp::I32AtomicXchg): CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Int32)); case uint32_t(ThreadOp::I64AtomicXchg): CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Int64)); case uint32_t(ThreadOp::I32AtomicXchg8U): CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Uint8)); case uint32_t(ThreadOp::I32AtomicXchg16U): CHECK_NEXT(emitAtomicXchg(ValType::I32, Scalar::Uint16)); case uint32_t(ThreadOp::I64AtomicXchg8U): CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint8)); case uint32_t(ThreadOp::I64AtomicXchg16U): CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint16)); case uint32_t(ThreadOp::I64AtomicXchg32U): CHECK_NEXT(emitAtomicXchg(ValType::I64, Scalar::Uint32)); case uint32_t(ThreadOp::I32AtomicCmpXchg): CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Int32)); case uint32_t(ThreadOp::I64AtomicCmpXchg): CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Int64)); case uint32_t(ThreadOp::I32AtomicCmpXchg8U): CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Uint8)); case uint32_t(ThreadOp::I32AtomicCmpXchg16U): CHECK_NEXT(emitAtomicCmpXchg(ValType::I32, Scalar::Uint16)); case uint32_t(ThreadOp::I64AtomicCmpXchg8U): CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint8)); case uint32_t(ThreadOp::I64AtomicCmpXchg16U): CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint16)); case uint32_t(ThreadOp::I64AtomicCmpXchg32U): CHECK_NEXT(emitAtomicCmpXchg(ValType::I64, Scalar::Uint32)); default: return iter_.unrecognizedOpcode(&op); } break; } // asm.js and other private operations case uint16_t(Op::MozPrefix): return iter_.unrecognizedOpcode(&op); default: return iter_.unrecognizedOpcode(&op); } #undef CHECK #undef NEXT #undef CHECK_NEXT #undef CHECK_POINTER_COUNT #undef CHECK_SIMD_EXPERIMENTAL #undef dispatchBinary #undef dispatchUnary #undef dispatchComparison #undef dispatchConversion #undef dispatchConversionOOM #undef dispatchCalloutConversionOOM #undef dispatchIntDivCallout #undef dispatchVectorBinary #undef dispatchVectorUnary #undef dispatchVectorComparison #undef dispatchExtractLane #undef dispatchReplaceLane #undef dispatchSplat #undef dispatchVectorReduction MOZ_CRASH("unreachable"); } MOZ_CRASH("unreachable"); } bool BaseCompiler::emitFunction() { if (!beginFunction()) { return false; } if (!emitBody()) { return false; } if (!endFunction()) { return false; } return true; } BaseCompiler::BaseCompiler(const ModuleEnvironment& moduleEnv, const CompilerEnvironment& compilerEnv, const FuncCompileInput& func, const ValTypeVector& locals, const MachineState& trapExitLayout, size_t trapExitLayoutNumWords, Decoder& decoder, StkVector& stkSource, TempAllocator* alloc, MacroAssembler* masm, StackMaps* stackMaps) : moduleEnv_(moduleEnv), compilerEnv_(compilerEnv), iter_(moduleEnv, decoder), func_(func), lastReadCallSite_(0), alloc_(alloc->fallible()), locals_(locals), deadCode_(false), bceSafe_(0), latentOp_(LatentOp::None), latentType_(ValType::I32), latentIntCmp_(Assembler::Equal), latentDoubleCmp_(Assembler::DoubleEqual), masm(*masm), fr(*masm), stackMapGenerator_(stackMaps, trapExitLayout, trapExitLayoutNumWords, *masm), stkSource_(stkSource) { // Our caller, BaselineCompileFunctions, will lend us the vector contents to // use for the eval stack. To get hold of those contents, we'll temporarily // installing an empty one in its place. MOZ_ASSERT(stk_.empty()); stk_.swap(stkSource_); // Assuming that previously processed wasm functions are well formed, the // eval stack should now be empty. But empty it anyway; any non-emptyness // at this point will cause chaos. stk_.clear(); } BaseCompiler::~BaseCompiler() { stk_.swap(stkSource_); // We've returned the eval stack vector contents to our caller, // BaselineCompileFunctions. We expect the vector we get in return to be // empty since that's what we swapped for the stack vector in our // constructor. MOZ_ASSERT(stk_.empty()); } bool BaseCompiler::init() { ra.init(this); if (!SigD_.append(ValType::F64)) { return false; } if (!SigF_.append(ValType::F32)) { return false; } ArgTypeVector args(funcType()); if (!fr.setupLocals(locals_, args, compilerEnv_.debugEnabled(), &localInfo_)) { return false; } return true; } FuncOffsets BaseCompiler::finish() { MOZ_ASSERT(done(), "all bytes must be consumed"); MOZ_ASSERT(func_.callSiteLineNums.length() == lastReadCallSite_); MOZ_ASSERT(stk_.empty()); MOZ_ASSERT(stackMapGenerator_.memRefsOnStk == 0); masm.flushBuffer(); return offsets_; } } // namespace wasm } // namespace js bool js::wasm::BaselinePlatformSupport() { #if defined(JS_CODEGEN_ARM) // Simplifying assumption: require SDIV and UDIV. // // I have no good data on ARM populations allowing me to say that // X% of devices in the market implement SDIV and UDIV. However, // they are definitely implemented on the Cortex-A7 and Cortex-A15 // and on all ARMv8 systems. if (!HasIDIV()) { return false; } #endif #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || \ defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \ defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64) return true; #else return false; #endif } bool js::wasm::BaselineCompileFunctions(const ModuleEnvironment& moduleEnv, const CompilerEnvironment& compilerEnv, LifoAlloc& lifo, const FuncCompileInputVector& inputs, CompiledCode* code, UniqueChars* error) { MOZ_ASSERT(compilerEnv.tier() == Tier::Baseline); MOZ_ASSERT(moduleEnv.kind == ModuleKind::Wasm); // The MacroAssembler will sometimes access the jitContext. TempAllocator alloc(&lifo); JitContext jitContext(&alloc); MOZ_ASSERT(IsCompilingWasm()); WasmMacroAssembler masm(alloc, moduleEnv); // Swap in already-allocated empty vectors to avoid malloc/free. MOZ_ASSERT(code->empty()); if (!code->swap(masm)) { return false; } // Create a description of the stack layout created by GenerateTrapExit(). MachineState trapExitLayout; size_t trapExitLayoutNumWords; GenerateTrapExitMachineState(&trapExitLayout, &trapExitLayoutNumWords); // The compiler's operand stack. We reuse it across all functions so as to // avoid malloc/free. Presize it to 128 elements in the hope of avoiding // reallocation later. StkVector stk; if (!stk.reserve(128)) { return false; } for (const FuncCompileInput& func : inputs) { Decoder d(func.begin, func.end, func.lineOrBytecode, error); // Build the local types vector. ValTypeVector locals; if (!locals.appendAll(moduleEnv.funcs[func.index].type->args())) { return false; } if (!DecodeLocalEntries(d, moduleEnv.types, moduleEnv.features, &locals)) { return false; } // One-pass baseline compilation. BaseCompiler f(moduleEnv, compilerEnv, func, locals, trapExitLayout, trapExitLayoutNumWords, d, stk, &alloc, &masm, &code->stackMaps); if (!f.init()) { return false; } if (!f.emitFunction()) { return false; } if (!code->codeRanges.emplaceBack(func.index, func.lineOrBytecode, f.finish())) { return false; } } masm.finish(); if (masm.oom()) { return false; } return code->swap(masm); } #ifdef DEBUG bool js::wasm::IsValidStackMapKey(bool debugEnabled, const uint8_t* nextPC) { # if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) const uint8_t* insn = nextPC; return (insn[-2] == 0x0F && insn[-1] == 0x0B) || // ud2 (insn[-2] == 0xFF && (insn[-1] & 0xF8) == 0xD0) || // call *%r_ insn[-5] == 0xE8 || // call simm32 (debugEnabled && insn[-5] == 0x0F && insn[-4] == 0x1F && insn[-3] == 0x44 && insn[-2] == 0x00 && insn[-1] == 0x00); // nop_five # elif defined(JS_CODEGEN_ARM) const uint32_t* insn = (const uint32_t*)nextPC; return ((uintptr_t(insn) & 3) == 0) && // must be ARM, not Thumb (insn[-1] == 0xe7f000f0 || // udf (insn[-1] & 0xfffffff0) == 0xe12fff30 || // blx reg (ARM, enc A1) (insn[-1] & 0xff000000) == 0xeb000000 || // bl simm24 (ARM, enc A1) (debugEnabled && insn[-1] == 0xe320f000)); // "as_nop" # elif defined(JS_CODEGEN_ARM64) const uint32_t hltInsn = 0xd4a00000; const uint32_t* insn = (const uint32_t*)nextPC; return ((uintptr_t(insn) & 3) == 0) && (insn[-1] == hltInsn || // hlt (insn[-1] & 0xfffffc1f) == 0xd63f0000 || // blr reg (insn[-1] & 0xfc000000) == 0x94000000 || // bl simm26 (debugEnabled && insn[-1] == 0xd503201f)); // nop # else MOZ_CRASH("IsValidStackMapKey: requires implementation on this platform"); # endif } #endif #undef RABALDR_INT_DIV_I64_CALLOUT #undef RABALDR_I64_TO_FLOAT_CALLOUT #undef RABALDR_FLOAT_TO_I64_CALLOUT