/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * * Copyright 2016 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "wasm/WasmBCClass.h" #include "wasm/WasmBCDefs.h" #include "wasm/WasmBCRegDefs.h" #include "jit/MacroAssembler-inl.h" #include "wasm/WasmBCClass-inl.h" #include "wasm/WasmBCCodegen-inl.h" #include "wasm/WasmBCRegDefs-inl.h" #include "wasm/WasmBCRegMgmt-inl.h" #include "wasm/WasmBCStkMgmt-inl.h" namespace js { namespace wasm { ////////////////////////////////////////////////////////////////////////////// // // Heap access subroutines. // Bounds check elimination. // // We perform BCE on two kinds of address expressions: on constant heap pointers // that are known to be in the heap or will be handled by the out-of-bounds trap // handler; and on local variables that have been checked in dominating code // without being updated since. // // For an access through a constant heap pointer + an offset we can eliminate // the bounds check if the sum of the address and offset is below the sum of the // minimum memory length and the offset guard length. // // For an access through a local variable + an offset we can eliminate the // bounds check if the local variable has already been checked and has not been // updated since, and the offset is less than the guard limit. // // To track locals for which we can eliminate checks we use a bit vector // bceSafe_ that has a bit set for those locals whose bounds have been checked // and which have not subsequently been set. Initially this vector is zero. // // In straight-line code a bit is set when we perform a bounds check on an // access via the local and is reset when the variable is updated. // // In control flow, the bit vector is manipulated as follows. Each ControlItem // has a value bceSafeOnEntry, which is the value of bceSafe_ on entry to the // item, and a value bceSafeOnExit, which is initially ~0. On a branch (br, // brIf, brTable), we always AND the branch target's bceSafeOnExit with the // value of bceSafe_ at the branch point. On exiting an item by falling out of // it, provided we're not in dead code, we AND the current value of bceSafe_ // into the item's bceSafeOnExit. Additional processing depends on the item // type: // // - After a block, set bceSafe_ to the block's bceSafeOnExit. // // - On loop entry, after pushing the ControlItem, set bceSafe_ to zero; the // back edges would otherwise require us to iterate to a fixedpoint. // // - After a loop, the bceSafe_ is left unchanged, because only fallthrough // control flow will reach that point and the bceSafe_ value represents the // correct state of the fallthrough path. // // - Set bceSafe_ to the ControlItem's bceSafeOnEntry at both the 'then' branch // and the 'else' branch. // // - After an if-then-else, set bceSafe_ to the if-then-else's bceSafeOnExit. // // - After an if-then, set bceSafe_ to the if-then's bceSafeOnExit AND'ed with // the if-then's bceSafeOnEntry. // // Finally, when the debugger allows locals to be mutated we must disable BCE // for references via a local, by returning immediately from bceCheckLocal if // compilerEnv_.debugEnabled() is true. void BaseCompiler::bceCheckLocal(MemoryAccessDesc* access, AccessCheck* check, uint32_t local) { if (local >= sizeof(BCESet) * 8) { return; } uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled()); if ((bceSafe_ & (BCESet(1) << local)) && access->offset64() < offsetGuardLimit) { check->omitBoundsCheck = true; } // The local becomes safe even if the offset is beyond the guard limit. bceSafe_ |= (BCESet(1) << local); } void BaseCompiler::bceLocalIsUpdated(uint32_t local) { if (local >= sizeof(BCESet) * 8) { return; } bceSafe_ &= ~(BCESet(1) << local); } // Alignment check elimination. // // Alignment checks for atomic operations can be omitted if the pointer is a // constant and the pointer + offset is aligned. Alignment checking that can't // be omitted can still be simplified by checking only the pointer if the offset // is aligned. // // (In addition, alignment checking of the pointer can be omitted if the pointer // has been checked in dominating code, but we don't do that yet.) template <> RegI32 BaseCompiler::popConstMemoryAccess(MemoryAccessDesc* access, AccessCheck* check) { int32_t addrTemp; MOZ_ALWAYS_TRUE(popConst(&addrTemp)); uint32_t addr = addrTemp; uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled()); uint64_t ea = uint64_t(addr) + uint64_t(access->offset()); uint64_t limit = moduleEnv_.memory->initialLength32() + offsetGuardLimit; check->omitBoundsCheck = ea < limit; check->omitAlignmentCheck = (ea & (access->byteSize() - 1)) == 0; // Fold the offset into the pointer if we can, as this is always // beneficial. if (ea <= UINT32_MAX) { addr = uint32_t(ea); access->clearOffset(); } RegI32 r = needI32(); moveImm32(int32_t(addr), r); return r; } #ifdef ENABLE_WASM_MEMORY64 template <> RegI64 BaseCompiler::popConstMemoryAccess(MemoryAccessDesc* access, AccessCheck* check) { int64_t addrTemp; MOZ_ALWAYS_TRUE(popConst(&addrTemp)); uint64_t addr = addrTemp; uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled()); uint64_t ea = addr + access->offset64(); bool overflow = ea < addr; uint64_t limit = moduleEnv_.memory->initialLength64() + offsetGuardLimit; if (!overflow) { check->omitBoundsCheck = ea < limit; check->omitAlignmentCheck = (ea & (access->byteSize() - 1)) == 0; // Fold the offset into the pointer if we can, as this is always // beneficial. addr = uint64_t(ea); access->clearOffset(); } RegI64 r = needI64(); moveImm64(int64_t(addr), r); return r; } #endif template RegType BaseCompiler::popMemoryAccess(MemoryAccessDesc* access, AccessCheck* check) { check->onlyPointerAlignment = (access->offset64() & (access->byteSize() - 1)) == 0; // If there's a constant it will have the correct type for RegType. if (hasConst()) { return popConstMemoryAccess(access, check); } // If there's a local it will have the correct type for RegType. uint32_t local; if (peekLocal(&local)) { bceCheckLocal(access, check, local); } return pop(); } #ifdef JS_64BIT static inline RegI64 RegPtrToRegIntptr(RegPtr r) { return RegI64(Register64(Register(r))); } # ifndef WASM_HAS_HEAPREG static inline RegPtr RegIntptrToRegPtr(RegI64 r) { return RegPtr(Register64(r).reg); } # endif #else static inline RegI32 RegPtrToRegIntptr(RegPtr r) { return RegI32(Register(r)); } # ifndef WASM_HAS_HEAPREG static inline RegPtr RegIntptrToRegPtr(RegI32 r) { return RegPtr(Register(r)); } # endif #endif #ifdef WASM_HAS_HEAPREG void BaseCompiler::pushHeapBase() { RegPtr heapBase = need(); move(RegPtr(HeapReg), heapBase); push(RegPtrToRegIntptr(heapBase)); } #else void BaseCompiler::pushHeapBase() { RegPtr heapBase = need(); # ifdef RABALDR_PIN_INSTANCE movePtr(RegPtr(InstanceReg), heapBase); # else fr.loadInstancePtr(heapBase); # endif masm.loadPtr(Address(heapBase, Instance::offsetOfMemoryBase()), heapBase); push(RegPtrToRegIntptr(heapBase)); } #endif void BaseCompiler::branchAddNoOverflow(uint64_t offset, RegI32 ptr, Label* ok) { // The invariant holds because ptr is RegI32 - this is m32. MOZ_ASSERT(offset <= UINT32_MAX); masm.branchAdd32(Assembler::CarryClear, Imm32(uint32_t(offset)), ptr, ok); } #ifdef ENABLE_WASM_MEMORY64 void BaseCompiler::branchAddNoOverflow(uint64_t offset, RegI64 ptr, Label* ok) { # if defined(JS_64BIT) masm.branchAddPtr(Assembler::CarryClear, ImmWord(offset), Register64(ptr).reg, ok); # else masm.branchAdd64(Assembler::CarryClear, Imm64(offset), ptr, ok); # endif } #endif void BaseCompiler::branchTestLowZero(RegI32 ptr, Imm32 mask, Label* ok) { masm.branchTest32(Assembler::Zero, ptr, mask, ok); } #ifdef ENABLE_WASM_MEMORY64 void BaseCompiler::branchTestLowZero(RegI64 ptr, Imm32 mask, Label* ok) { # ifdef JS_64BIT masm.branchTestPtr(Assembler::Zero, Register64(ptr).reg, mask, ok); # else masm.branchTestPtr(Assembler::Zero, ptr.low, mask, ok); # endif } #endif void BaseCompiler::boundsCheck4GBOrLargerAccess(RegPtr instance, RegI32 ptr, Label* ok) { #ifdef JS_64BIT // Extend the value to 64 bits, check the 64-bit value against the 64-bit // bound, then chop back to 32 bits. On most platform the extending and // chopping are no-ops. It's important that the value we end up with has // flowed through the Spectre mask // Note, ptr and ptr64 are the same register. RegI64 ptr64 = fromI32(ptr); // In principle there may be non-zero bits in the upper bits of the // register; clear them. # ifdef RABALDR_ZERO_EXTENDS masm.debugAssertCanonicalInt32(ptr); # else masm.move32To64ZeroExtend(ptr, ptr64); # endif boundsCheck4GBOrLargerAccess(instance, ptr64, ok); // Restore the value to the canonical form for a 32-bit value in a // 64-bit register and/or the appropriate form for further use in the // indexing instruction. # ifdef RABALDR_ZERO_EXTENDS // The canonical value is zero-extended; we already have that. # else masm.move64To32(ptr64, ptr); # endif #else // No support needed, we have max 2GB heap on 32-bit MOZ_CRASH("No 32-bit support"); #endif } void BaseCompiler::boundsCheckBelow4GBAccess(RegPtr instance, RegI32 ptr, Label* ok) { // If the memory's max size is known to be smaller than 64K pages exactly, // we can use a 32-bit check and avoid extension and wrapping. masm.wasmBoundsCheck32( Assembler::Below, ptr, Address(instance, Instance::offsetOfBoundsCheckLimit()), ok); } void BaseCompiler::boundsCheck4GBOrLargerAccess(RegPtr instance, RegI64 ptr, Label* ok) { // Any Spectre mitigation will appear to update the ptr64 register. masm.wasmBoundsCheck64( Assembler::Below, ptr, Address(instance, Instance::offsetOfBoundsCheckLimit()), ok); } void BaseCompiler::boundsCheckBelow4GBAccess(RegPtr instance, RegI64 ptr, Label* ok) { // The bounds check limit is valid to 64 bits, so there's no sense in doing // anything complicated here. There may be optimization paths here in the // future and they may differ on 32-bit and 64-bit. boundsCheck4GBOrLargerAccess(instance, ptr, ok); } // Make sure the ptr could be used as an index register. static inline void ToValidIndex(MacroAssembler& masm, RegI32 ptr) { #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \ defined(JS_CODEGEN_RISCV64) // When ptr is used as an index, it will be added to a 64-bit register. // So we should explicitly promote ptr to 64-bit. Since now ptr holds a // unsigned 32-bit value, we zero-extend it to 64-bit here. masm.move32To64ZeroExtend(ptr, Register64(ptr)); #endif } #if defined(ENABLE_WASM_MEMORY64) static inline void ToValidIndex(MacroAssembler& masm, RegI64 ptr) {} #endif // RegIndexType is RegI32 for Memory32 and RegI64 for Memory64. template void BaseCompiler::prepareMemoryAccess(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegIndexType ptr) { uint32_t offsetGuardLimit = GetMaxOffsetGuardLimit(moduleEnv_.hugeMemoryEnabled()); // Fold offset if necessary for further computations. if (access->offset64() >= offsetGuardLimit || access->offset64() > UINT32_MAX || (access->isAtomic() && !check->omitAlignmentCheck && !check->onlyPointerAlignment)) { Label ok; branchAddNoOverflow(access->offset64(), ptr, &ok); masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset()); masm.bind(&ok); access->clearOffset(); check->onlyPointerAlignment = true; } // Alignment check if required. if (access->isAtomic() && !check->omitAlignmentCheck) { MOZ_ASSERT(check->onlyPointerAlignment); // We only care about the low pointer bits here. Label ok; branchTestLowZero(ptr, Imm32(access->byteSize() - 1), &ok); masm.wasmTrap(Trap::UnalignedAccess, bytecodeOffset()); masm.bind(&ok); } // Ensure no instance if we don't need it. if (moduleEnv_.hugeMemoryEnabled()) { // We have HeapReg and no bounds checking and need load neither // memoryBase nor boundsCheckLimit from instance. MOZ_ASSERT_IF(check->omitBoundsCheck, instance.isInvalid()); } #ifdef WASM_HAS_HEAPREG // We have HeapReg and don't need to load the memoryBase from instance. MOZ_ASSERT_IF(check->omitBoundsCheck, instance.isInvalid()); #endif // Bounds check if required. if (!moduleEnv_.hugeMemoryEnabled() && !check->omitBoundsCheck) { Label ok; #ifdef JS_64BIT // The checking depends on how many bits are in the pointer and how many // bits are in the bound. static_assert(0x100000000 % PageSize == 0); if (!moduleEnv_.memory->boundsCheckLimitIs32Bits() && MaxMemoryPages(moduleEnv_.memory->indexType()) >= Pages(0x100000000 / PageSize)) { boundsCheck4GBOrLargerAccess(instance, ptr, &ok); } else { boundsCheckBelow4GBAccess(instance, ptr, &ok); } #else boundsCheckBelow4GBAccess(instance, ptr, &ok); #endif masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset()); masm.bind(&ok); } ToValidIndex(masm, ptr); } template void BaseCompiler::computeEffectiveAddress(MemoryAccessDesc* access) { if (access->offset()) { Label ok; RegIndexType ptr = pop(); branchAddNoOverflow(access->offset64(), ptr, &ok); masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset()); masm.bind(&ok); access->clearOffset(); push(ptr); } } bool BaseCompiler::needInstanceForAccess(const AccessCheck& check) { #ifndef WASM_HAS_HEAPREG // Platform requires instance for memory base. return true; #else return !moduleEnv_.hugeMemoryEnabled() && !check.omitBoundsCheck; #endif } RegPtr BaseCompiler::maybeLoadInstanceForAccess(const AccessCheck& check) { if (needInstanceForAccess(check)) { #ifdef RABALDR_PIN_INSTANCE // NOTE, returning InstanceReg here depends for correctness on *ALL* // clients not attempting to free this register and not push it on the value // stack. // // We have assertions in place to guard against that, so the risk of the // leaky abstraction is acceptable. performRegisterLeakCheck() will ensure // that after every bytecode, the union of available registers from the // regalloc and used registers from the stack equals the set of allocatable // registers at startup. Thus if the instance is freed incorrectly it will // end up in that union via the regalloc, and if it is pushed incorrectly it // will end up in the union via the stack. return RegPtr(InstanceReg); #else RegPtr instance = need(); fr.loadInstancePtr(instance); return instance; #endif } return RegPtr::Invalid(); } RegPtr BaseCompiler::maybeLoadInstanceForAccess(const AccessCheck& check, RegPtr specific) { if (needInstanceForAccess(check)) { #ifdef RABALDR_PIN_INSTANCE movePtr(RegPtr(InstanceReg), specific); #else fr.loadInstancePtr(specific); #endif return specific; } return RegPtr::Invalid(); } ////////////////////////////////////////////////////////////////////////////// // // Load and store. void BaseCompiler::executeLoad(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegI32 ptr, AnyReg dest, RegI32 temp) { // Emit the load. At this point, 64-bit offsets will have been resolved. #if defined(JS_CODEGEN_X64) MOZ_ASSERT(temp.isInvalid()); Operand srcAddr(HeapReg, ptr, TimesOne, access->offset()); if (dest.tag == AnyReg::I64) { masm.wasmLoadI64(*access, srcAddr, dest.i64()); } else { masm.wasmLoad(*access, srcAddr, dest.any()); } #elif defined(JS_CODEGEN_X86) MOZ_ASSERT(temp.isInvalid()); masm.addPtr(Address(instance, Instance::offsetOfMemoryBase()), ptr); Operand srcAddr(ptr, access->offset()); if (dest.tag == AnyReg::I64) { MOZ_ASSERT(dest.i64() == specific_.abiReturnRegI64); masm.wasmLoadI64(*access, srcAddr, dest.i64()); } else { // For 8 bit loads, this will generate movsbl or movzbl, so // there's no constraint on what the output register may be. masm.wasmLoad(*access, srcAddr, dest.any()); } #elif defined(JS_CODEGEN_MIPS64) if (IsUnaligned(*access)) { switch (dest.tag) { case AnyReg::I64: masm.wasmUnalignedLoadI64(*access, HeapReg, ptr, ptr, dest.i64(), temp); break; case AnyReg::F32: masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f32(), temp); break; case AnyReg::F64: masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f64(), temp); break; case AnyReg::I32: masm.wasmUnalignedLoad(*access, HeapReg, ptr, ptr, dest.i32(), temp); break; default: MOZ_CRASH("Unexpected type"); } } else { if (dest.tag == AnyReg::I64) { masm.wasmLoadI64(*access, HeapReg, ptr, ptr, dest.i64()); } else { masm.wasmLoad(*access, HeapReg, ptr, ptr, dest.any()); } } #elif defined(JS_CODEGEN_ARM) MOZ_ASSERT(temp.isInvalid()); if (dest.tag == AnyReg::I64) { masm.wasmLoadI64(*access, HeapReg, ptr, ptr, dest.i64()); } else { masm.wasmLoad(*access, HeapReg, ptr, ptr, dest.any()); } #elif defined(JS_CODEGEN_ARM64) MOZ_ASSERT(temp.isInvalid()); if (dest.tag == AnyReg::I64) { masm.wasmLoadI64(*access, HeapReg, ptr, dest.i64()); } else { masm.wasmLoad(*access, HeapReg, ptr, dest.any()); } #elif defined(JS_CODEGEN_LOONG64) MOZ_ASSERT(temp.isInvalid()); if (dest.tag == AnyReg::I64) { masm.wasmLoadI64(*access, HeapReg, ptr, ptr, dest.i64()); } else { masm.wasmLoad(*access, HeapReg, ptr, ptr, dest.any()); } #elif defined(JS_CODEGEN_RISCV64) MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); #else MOZ_CRASH("BaseCompiler platform hook: load"); #endif } // ptr and dest may be the same iff dest is I32. // This may destroy ptr even if ptr and dest are not the same. void BaseCompiler::load(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegI32 ptr, AnyReg dest, RegI32 temp) { prepareMemoryAccess(access, check, instance, ptr); executeLoad(access, check, instance, ptr, dest, temp); } #ifdef ENABLE_WASM_MEMORY64 void BaseCompiler::load(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegI64 ptr, AnyReg dest, RegI64 temp) { prepareMemoryAccess(access, check, instance, ptr); # if !defined(JS_64BIT) // On 32-bit systems we have a maximum 2GB heap and bounds checking has // been applied to ensure that the 64-bit pointer is valid. return executeLoad(access, check, instance, RegI32(ptr.low), dest, maybeFromI64(temp)); # elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) // On x64 and arm64 the 32-bit code simply assumes that the high bits of the // 64-bit pointer register are zero and performs a 64-bit add. Thus the code // generated is the same for the 64-bit and the 32-bit case. return executeLoad(access, check, instance, RegI32(ptr.reg), dest, maybeFromI64(temp)); # elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) // On mips64 and loongarch64, the 'prepareMemoryAccess' function will make // sure that ptr holds a valid 64-bit index value. Thus the code generated in // 'executeLoad' is the same for the 64-bit and the 32-bit case. return executeLoad(access, check, instance, RegI32(ptr.reg), dest, maybeFromI64(temp)); # else MOZ_CRASH("Missing platform hook"); # endif } #endif void BaseCompiler::executeStore(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegI32 ptr, AnyReg src, RegI32 temp) { // Emit the store. At this point, 64-bit offsets will have been resolved. #if defined(JS_CODEGEN_X64) MOZ_ASSERT(temp.isInvalid()); Operand dstAddr(HeapReg, ptr, TimesOne, access->offset()); masm.wasmStore(*access, src.any(), dstAddr); #elif defined(JS_CODEGEN_X86) MOZ_ASSERT(temp.isInvalid()); masm.addPtr(Address(instance, Instance::offsetOfMemoryBase()), ptr); Operand dstAddr(ptr, access->offset()); if (access->type() == Scalar::Int64) { masm.wasmStoreI64(*access, src.i64(), dstAddr); } else { AnyRegister value; ScratchI8 scratch(*this); if (src.tag == AnyReg::I64) { if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) { masm.mov(src.i64().low, scratch); value = AnyRegister(scratch); } else { value = AnyRegister(src.i64().low); } } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) { masm.mov(src.i32(), scratch); value = AnyRegister(scratch); } else { value = src.any(); } masm.wasmStore(*access, value, dstAddr); } #elif defined(JS_CODEGEN_ARM) MOZ_ASSERT(temp.isInvalid()); if (access->type() == Scalar::Int64) { masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr); } else if (src.tag == AnyReg::I64) { masm.wasmStore(*access, AnyRegister(src.i64().low), HeapReg, ptr, ptr); } else { masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr); } #elif defined(JS_CODEGEN_MIPS64) if (IsUnaligned(*access)) { switch (src.tag) { case AnyReg::I64: masm.wasmUnalignedStoreI64(*access, src.i64(), HeapReg, ptr, ptr, temp); break; case AnyReg::F32: masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr, temp); break; case AnyReg::F64: masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr, temp); break; case AnyReg::I32: masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr, temp); break; default: MOZ_CRASH("Unexpected type"); } } else { if (src.tag == AnyReg::I64) { masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr); } else { masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr); } } #elif defined(JS_CODEGEN_ARM64) MOZ_ASSERT(temp.isInvalid()); if (access->type() == Scalar::Int64) { masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr); } else { masm.wasmStore(*access, src.any(), HeapReg, ptr); } #elif defined(JS_CODEGEN_LOONG64) MOZ_ASSERT(temp.isInvalid()); if (access->type() == Scalar::Int64) { masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr); } else { masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr); } #else MOZ_CRASH("BaseCompiler platform hook: store"); #endif } // ptr and src must not be the same register. // This may destroy ptr and src. void BaseCompiler::store(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegI32 ptr, AnyReg src, RegI32 temp) { prepareMemoryAccess(access, check, instance, ptr); executeStore(access, check, instance, ptr, src, temp); } #ifdef ENABLE_WASM_MEMORY64 void BaseCompiler::store(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegI64 ptr, AnyReg src, RegI64 temp) { prepareMemoryAccess(access, check, instance, ptr); // See comments in load() # if !defined(JS_64BIT) return executeStore(access, check, instance, RegI32(ptr.low), src, maybeFromI64(temp)); # elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) || \ defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) return executeStore(access, check, instance, RegI32(ptr.reg), src, maybeFromI64(temp)); # else MOZ_CRASH("Missing platform hook"); # endif } #endif template void BaseCompiler::doLoadCommon(MemoryAccessDesc* access, AccessCheck check, ValType type) { RegPtr instance; RegType temp; #if defined(JS_CODEGEN_MIPS64) temp = need(); #endif switch (type.kind()) { case ValType::I32: { RegType rp = popMemoryAccess(access, &check); RegI32 rv = needI32(); instance = maybeLoadInstanceForAccess(check); load(access, &check, instance, rp, AnyReg(rv), temp); push(rv); free(rp); break; } case ValType::I64: { RegI64 rv; RegType rp; #ifdef JS_CODEGEN_X86 rv = specific_.abiReturnRegI64; needI64(rv); rp = popMemoryAccess(access, &check); #else rp = popMemoryAccess(access, &check); rv = needI64(); #endif instance = maybeLoadInstanceForAccess(check); load(access, &check, instance, rp, AnyReg(rv), temp); push(rv); free(rp); break; } case ValType::F32: { RegType rp = popMemoryAccess(access, &check); RegF32 rv = needF32(); instance = maybeLoadInstanceForAccess(check); load(access, &check, instance, rp, AnyReg(rv), temp); push(rv); free(rp); break; } case ValType::F64: { RegType rp = popMemoryAccess(access, &check); RegF64 rv = needF64(); instance = maybeLoadInstanceForAccess(check); load(access, &check, instance, rp, AnyReg(rv), temp); push(rv); free(rp); break; } #ifdef ENABLE_WASM_SIMD case ValType::V128: { RegType rp = popMemoryAccess(access, &check); RegV128 rv = needV128(); instance = maybeLoadInstanceForAccess(check); load(access, &check, instance, rp, AnyReg(rv), temp); push(rv); free(rp); break; } #endif default: MOZ_CRASH("load type"); break; } #ifndef RABALDR_PIN_INSTANCE maybeFree(instance); #endif maybeFree(temp); } void BaseCompiler::loadCommon(MemoryAccessDesc* access, AccessCheck check, ValType type) { if (isMem32()) { doLoadCommon(access, check, type); } else { #ifdef ENABLE_WASM_MEMORY64 doLoadCommon(access, check, type); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } } template void BaseCompiler::doStoreCommon(MemoryAccessDesc* access, AccessCheck check, ValType resultType) { RegPtr instance; RegType temp; #if defined(JS_CODEGEN_MIPS64) temp = need(); #endif switch (resultType.kind()) { case ValType::I32: { RegI32 rv = popI32(); RegType rp = popMemoryAccess(access, &check); instance = maybeLoadInstanceForAccess(check); store(access, &check, instance, rp, AnyReg(rv), temp); free(rp); free(rv); break; } case ValType::I64: { RegI64 rv = popI64(); RegType rp = popMemoryAccess(access, &check); instance = maybeLoadInstanceForAccess(check); store(access, &check, instance, rp, AnyReg(rv), temp); free(rp); free(rv); break; } case ValType::F32: { RegF32 rv = popF32(); RegType rp = popMemoryAccess(access, &check); instance = maybeLoadInstanceForAccess(check); store(access, &check, instance, rp, AnyReg(rv), temp); free(rp); free(rv); break; } case ValType::F64: { RegF64 rv = popF64(); RegType rp = popMemoryAccess(access, &check); instance = maybeLoadInstanceForAccess(check); store(access, &check, instance, rp, AnyReg(rv), temp); free(rp); free(rv); break; } #ifdef ENABLE_WASM_SIMD case ValType::V128: { RegV128 rv = popV128(); RegType rp = popMemoryAccess(access, &check); instance = maybeLoadInstanceForAccess(check); store(access, &check, instance, rp, AnyReg(rv), temp); free(rp); free(rv); break; } #endif default: MOZ_CRASH("store type"); break; } #ifndef RABALDR_PIN_INSTANCE maybeFree(instance); #endif maybeFree(temp); } void BaseCompiler::storeCommon(MemoryAccessDesc* access, AccessCheck check, ValType type) { if (isMem32()) { doStoreCommon(access, check, type); } else { #ifdef ENABLE_WASM_MEMORY64 doStoreCommon(access, check, type); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } } // Convert something that may contain a heap index into a Register that can be // used in an access. static inline Register ToRegister(RegI32 r) { return Register(r); } #ifdef ENABLE_WASM_MEMORY64 # ifdef JS_PUNBOX64 static inline Register ToRegister(RegI64 r) { return r.reg; } # else static inline Register ToRegister(RegI64 r) { return r.low; } # endif #endif ////////////////////////////////////////////////////////////////////////////// // // Atomic operations. // // The atomic operations have very diverse per-platform needs for register // allocation and temps. To handle that, the implementations are structured as // a per-operation framework method that calls into platform-specific helpers // (usually called PopAndAllocate, Perform, and Deallocate) in a per-operation // namespace. This structure results in a little duplication and boilerplate // but is otherwise clean and flexible and keeps code and supporting definitions // entirely co-located. #ifdef WASM_HAS_HEAPREG // RegIndexType is RegI32 for Memory32 and RegI64 for Memory64. template BaseIndex BaseCompiler::prepareAtomicMemoryAccess(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegIndexType ptr) { MOZ_ASSERT(needInstanceForAccess(*check) == instance.isValid()); prepareMemoryAccess(access, check, instance, ptr); // At this point, 64-bit offsets will have been resolved. return BaseIndex(HeapReg, ToRegister(ptr), TimesOne, access->offset()); } #else // Some consumers depend on the returned Address not incorporating instance, as // instance may be the scratch register. // // RegIndexType is RegI32 for Memory32 and RegI64 for Memory64. template Address BaseCompiler::prepareAtomicMemoryAccess(MemoryAccessDesc* access, AccessCheck* check, RegPtr instance, RegIndexType ptr) { MOZ_ASSERT(needInstanceForAccess(*check) == instance.isValid()); prepareMemoryAccess(access, check, instance, ptr); masm.addPtr(Address(instance, Instance::offsetOfMemoryBase()), ToRegister(ptr)); // At this point, 64-bit offsets will have been resolved. return Address(ToRegister(ptr), access->offset()); } #endif #ifndef WASM_HAS_HEAPREG # ifdef JS_CODEGEN_X86 using ScratchAtomicNoHeapReg = ScratchEBX; # else # error "Unimplemented porting interface" # endif #endif ////////////////////////////////////////////////////////////////////////////// // // Atomic load and store. namespace atomic_load64 { #ifdef JS_CODEGEN_ARM static void Allocate(BaseCompiler* bc, RegI64* rd, RegI64*) { *rd = bc->needI64Pair(); } static void Deallocate(BaseCompiler* bc, RegI64) {} #elif defined JS_CODEGEN_X86 static void Allocate(BaseCompiler* bc, RegI64* rd, RegI64* temp) { // The result is in edx:eax, and we need ecx:ebx as a temp. But ebx will also // be used as a scratch, so don't manage that here. bc->needI32(bc->specific_.ecx); *temp = bc->specific_.ecx_ebx; bc->needI64(bc->specific_.edx_eax); *rd = bc->specific_.edx_eax; } static void Deallocate(BaseCompiler* bc, RegI64 temp) { // See comment above. MOZ_ASSERT(temp.high == js::jit::ecx); bc->freeI32(bc->specific_.ecx); } #elif defined(__wasi__) || (defined(JS_CODEGEN_NONE) && !defined(JS_64BIT)) static void Allocate(BaseCompiler*, RegI64*, RegI64*) {} static void Deallocate(BaseCompiler*, RegI64) {} #endif } // namespace atomic_load64 #if !defined(JS_64BIT) template void BaseCompiler::atomicLoad64(MemoryAccessDesc* access) { RegI64 rd, temp; atomic_load64::Allocate(this, &rd, &temp); AccessCheck check; RegIndexType rp = popMemoryAccess(access, &check); # ifdef WASM_HAS_HEAPREG RegPtr instance = maybeLoadInstanceForAccess(check); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); masm.wasmAtomicLoad64(*access, memaddr, temp, rd); # ifndef RABALDR_PIN_INSTANCE maybeFree(instance); # endif # else ScratchAtomicNoHeapReg scratch(*this); RegPtr instance = maybeLoadInstanceForAccess(check, RegIntptrToRegPtr(scratch)); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); masm.wasmAtomicLoad64(*access, memaddr, temp, rd); MOZ_ASSERT(instance == scratch); # endif free(rp); atomic_load64::Deallocate(this, temp); pushI64(rd); } #endif void BaseCompiler::atomicLoad(MemoryAccessDesc* access, ValType type) { Scalar::Type viewType = access->type(); if (Scalar::byteSize(viewType) <= sizeof(void*)) { loadCommon(access, AccessCheck(), type); return; } MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); #if !defined(JS_64BIT) if (isMem32()) { atomicLoad64(access); } else { # ifdef ENABLE_WASM_MEMORY64 atomicLoad64(access); # else MOZ_CRASH("Memory64 not enabled / supported on this platform"); # endif } #else MOZ_CRASH("Should not happen"); #endif } void BaseCompiler::atomicStore(MemoryAccessDesc* access, ValType type) { Scalar::Type viewType = access->type(); if (Scalar::byteSize(viewType) <= sizeof(void*)) { storeCommon(access, AccessCheck(), type); return; } MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); #if !defined(JS_64BIT) if (isMem32()) { atomicXchg64(access, WantResult(false)); } else { # ifdef ENABLE_WASM_MEMORY64 atomicXchg64(access, WantResult(false)); # else MOZ_CRASH("Memory64 not enabled / supported on this platform"); # endif } #else MOZ_CRASH("Should not happen"); #endif } ////////////////////////////////////////////////////////////////////////////// // // Atomic RMW op= operations. void BaseCompiler::atomicRMW(MemoryAccessDesc* access, ValType type, AtomicOp op) { Scalar::Type viewType = access->type(); if (Scalar::byteSize(viewType) <= 4) { if (isMem32()) { atomicRMW32(access, type, op); } else { #ifdef ENABLE_WASM_MEMORY64 atomicRMW32(access, type, op); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } } else { MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); if (isMem32()) { atomicRMW64(access, type, op); } else { #ifdef ENABLE_WASM_MEMORY64 atomicRMW64(access, type, op); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } } } namespace atomic_rmw32 { #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) struct Temps { // On x86 we use the ScratchI32 for the temp, otherwise we'd run out of // registers for 64-bit operations. # if defined(JS_CODEGEN_X64) RegI32 t0; # endif }; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, AtomicOp op, RegI32* rd, RegI32* rv, Temps* temps) { bc->needI32(bc->specific_.eax); if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) { // We use xadd, so source and destination are the same. Using // eax here is overconstraining, but for byte operations on x86 // we do need something with a byte register. if (type == ValType::I64) { *rv = bc->popI64ToSpecificI32(bc->specific_.eax); } else { *rv = bc->popI32ToSpecific(bc->specific_.eax); } *rd = *rv; } else { // We use a cmpxchg loop. The output must be eax; the input // must be in a separate register since it may be used several // times. if (type == ValType::I64) { *rv = bc->popI64ToI32(); } else { *rv = bc->popI32(); } *rd = bc->specific_.eax; # ifdef JS_CODEGEN_X64 temps->t0 = bc->needI32(); # endif } } template static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, T srcAddr, AtomicOp op, RegI32 rv, RegI32 rd, const Temps& temps) { # ifdef JS_CODEGEN_X64 RegI32 temp = temps.t0; # else RegI32 temp; ScratchI32 scratch(*bc); if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) { temp = scratch; } # endif bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temp, rd); } static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) { if (rv != bc->specific_.eax) { bc->freeI32(rv); } # ifdef JS_CODEGEN_X64 bc->maybeFree(temps.t0); # endif } #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) struct Temps { RegI32 t0; }; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, AtomicOp op, RegI32* rd, RegI32* rv, Temps* temps) { *rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32(); temps->t0 = bc->needI32(); *rd = bc->needI32(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, AtomicOp op, RegI32 rv, RegI32 rd, const Temps& temps) { bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps.t0, rd); } static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) { bc->freeI32(rv); bc->freeI32(temps.t0); } #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) struct Temps { RegI32 t0, t1, t2; }; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, AtomicOp op, RegI32* rd, RegI32* rv, Temps* temps) { *rv = type == ValType::I64 ? bc->popI64ToI32() : bc->popI32(); if (Scalar::byteSize(viewType) < 4) { temps->t0 = bc->needI32(); temps->t1 = bc->needI32(); temps->t2 = bc->needI32(); } *rd = bc->needI32(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, AtomicOp op, RegI32 rv, RegI32 rd, const Temps& temps) { bc->masm.wasmAtomicFetchOp(access, op, rv, srcAddr, temps.t0, temps.t1, temps.t2, rd); } static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) { bc->freeI32(rv); bc->maybeFree(temps.t0); bc->maybeFree(temps.t1); bc->maybeFree(temps.t2); } #elif defined(JS_CODEGEN_RISCV64) using Temps = Nothing; static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, AtomicOp, RegI32*, RegI32*, Temps*) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Perform(BaseCompiler*, const MemoryAccessDesc&, BaseIndex, AtomicOp, RegI32, RegI32, const Temps&) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Deallocate(BaseCompiler*, RegI32, const Temps&) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32) using Temps = Nothing; static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, AtomicOp, RegI32*, RegI32*, Temps*) {} static void Perform(BaseCompiler*, const MemoryAccessDesc&, BaseIndex, AtomicOp, RegI32, RegI32, const Temps&) {} static void Deallocate(BaseCompiler*, RegI32, const Temps&) {} #endif } // namespace atomic_rmw32 template void BaseCompiler::atomicRMW32(MemoryAccessDesc* access, ValType type, AtomicOp op) { Scalar::Type viewType = access->type(); RegI32 rd, rv; atomic_rmw32::Temps temps; atomic_rmw32::PopAndAllocate(this, type, viewType, op, &rd, &rv, &temps); AccessCheck check; RegIndexType rp = popMemoryAccess(access, &check); RegPtr instance = maybeLoadInstanceForAccess(check); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); atomic_rmw32::Perform(this, *access, memaddr, op, rv, rd, temps); #ifndef RABALDR_PIN_INSTANCE maybeFree(instance); #endif atomic_rmw32::Deallocate(this, rv, temps); free(rp); if (type == ValType::I64) { pushU32AsI64(rd); } else { pushI32(rd); } } namespace atomic_rmw64 { #if defined(JS_CODEGEN_X64) static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd, RegI64* rv, RegI64* temp) { if (op == AtomicFetchAddOp || op == AtomicFetchSubOp) { // We use xaddq, so input and output must be the same register. *rv = bc->popI64(); *rd = *rv; } else { // We use a cmpxchgq loop, so the output must be rax and we need a temp. bc->needI64(bc->specific_.rax); *rd = bc->specific_.rax; *rv = bc->popI64(); *temp = bc->needI64(); } } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, AtomicOp op, RegI64 rv, RegI64 temp, RegI64 rd) { bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd); } static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) { bc->maybeFree(temp); if (op != AtomicFetchAddOp && op != AtomicFetchSubOp) { bc->freeI64(rv); } } #elif defined(JS_CODEGEN_X86) // Register allocation is tricky, see comments at atomic_xchg64 below. // // - Initially rv=ecx:edx and eax is reserved, rd=unallocated. // - Then rp is popped into esi+edi because those are the only available. // - The Setup operation makes rd=edx:eax. // - Deallocation then frees only the ecx part of rv. // // The temp is unused here. static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd, RegI64* rv, RegI64*) { bc->needI32(bc->specific_.eax); bc->needI32(bc->specific_.ecx); bc->needI32(bc->specific_.edx); *rv = RegI64(Register64(bc->specific_.ecx, bc->specific_.edx)); bc->popI64ToSpecific(*rv); } static void Setup(BaseCompiler* bc, RegI64* rd) { *rd = bc->specific_.edx_eax; } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, Address srcAddr, AtomicOp op, RegI64 rv, RegI64, RegI64 rd, const ScratchAtomicNoHeapReg& scratch) { MOZ_ASSERT(rv.high == bc->specific_.ecx); MOZ_ASSERT(Register(scratch) == js::jit::ebx); bc->fr.pushGPR(rv.high); bc->fr.pushGPR(rv.low); Address value(StackPointer, 0); bc->masm.wasmAtomicFetchOp64(access, op, value, srcAddr, bc->specific_.ecx_ebx, rd); bc->fr.popBytes(8); } static void Deallocate(BaseCompiler* bc, AtomicOp, RegI64, RegI64) { bc->freeI32(bc->specific_.ecx); } #elif defined(JS_CODEGEN_ARM) static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd, RegI64* rv, RegI64* temp) { // We use a ldrex/strexd loop so the temp and the output must be // odd/even pairs. *rv = bc->popI64(); *temp = bc->needI64Pair(); *rd = bc->needI64Pair(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, AtomicOp op, RegI64 rv, RegI64 temp, RegI64 rd) { bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd); } static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) { bc->freeI64(rv); bc->freeI64(temp); } #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \ defined(JS_CODEGEN_LOONG64) static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd, RegI64* rv, RegI64* temp) { *rv = bc->popI64(); *temp = bc->needI64(); *rd = bc->needI64(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, AtomicOp op, RegI64 rv, RegI64 temp, RegI64 rd) { bc->masm.wasmAtomicFetchOp64(access, op, rv, srcAddr, temp, rd); } static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) { bc->freeI64(rv); bc->freeI64(temp); } #elif defined(JS_CODEGEN_RISCV64) static void PopAndAllocate(BaseCompiler*, AtomicOp, RegI64*, RegI64*, RegI64*) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Perform(BaseCompiler*, const MemoryAccessDesc&, BaseIndex, AtomicOp op, RegI64, RegI64, RegI64) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Deallocate(BaseCompiler*, AtomicOp, RegI64, RegI64) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32) static void PopAndAllocate(BaseCompiler*, AtomicOp, RegI64*, RegI64*, RegI64*) { } static void Perform(BaseCompiler*, const MemoryAccessDesc&, BaseIndex, AtomicOp op, RegI64, RegI64, RegI64) {} static void Deallocate(BaseCompiler*, AtomicOp, RegI64, RegI64) {} #endif } // namespace atomic_rmw64 template void BaseCompiler::atomicRMW64(MemoryAccessDesc* access, ValType type, AtomicOp op) { RegI64 rd, rv, temp; atomic_rmw64::PopAndAllocate(this, op, &rd, &rv, &temp); AccessCheck check; RegIndexType rp = popMemoryAccess(access, &check); #if defined(WASM_HAS_HEAPREG) RegPtr instance = maybeLoadInstanceForAccess(check); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); atomic_rmw64::Perform(this, *access, memaddr, op, rv, temp, rd); # ifndef RABALDR_PIN_INSTANCE maybeFree(instance); # endif #else ScratchAtomicNoHeapReg scratch(*this); RegPtr instance = maybeLoadInstanceForAccess(check, RegIntptrToRegPtr(scratch)); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); atomic_rmw64::Setup(this, &rd); atomic_rmw64::Perform(this, *access, memaddr, op, rv, temp, rd, scratch); MOZ_ASSERT(instance == scratch); #endif free(rp); atomic_rmw64::Deallocate(this, op, rv, temp); pushI64(rd); } ////////////////////////////////////////////////////////////////////////////// // // Atomic exchange (also used for atomic store in some cases). void BaseCompiler::atomicXchg(MemoryAccessDesc* access, ValType type) { Scalar::Type viewType = access->type(); if (Scalar::byteSize(viewType) <= 4) { if (isMem32()) { atomicXchg32(access, type); } else { #ifdef ENABLE_WASM_MEMORY64 atomicXchg32(access, type); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } } else { MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); if (isMem32()) { atomicXchg64(access, WantResult(true)); } else { #ifdef ENABLE_WASM_MEMORY64 atomicXchg64(access, WantResult(true)); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } } } namespace atomic_xchg32 { #if defined(JS_CODEGEN_X64) using Temps = Nothing; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, RegI32* rd, RegI32* rv, Temps*) { // The xchg instruction reuses rv as rd. *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32(); *rd = *rv; } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI32 rv, RegI32 rd, const Temps&) { bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd); } static void Deallocate(BaseCompiler* bc, RegI32, const Temps&) {} #elif defined(JS_CODEGEN_X86) using Temps = Nothing; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, RegI32* rd, RegI32* rv, Temps*) { // The xchg instruction reuses rv as rd. *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32(); *rd = *rv; } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, Address srcAddr, RegI32 rv, RegI32 rd, const Temps&) { if (access.type() == Scalar::Uint8 && !bc->ra.isSingleByteI32(rd)) { ScratchI8 scratch(*bc); // The output register must have a byte persona. bc->masm.wasmAtomicExchange(access, srcAddr, rv, scratch); bc->masm.movl(scratch, rd); } else { bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd); } } static void Deallocate(BaseCompiler* bc, RegI32, const Temps&) {} #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) using Temps = Nothing; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, RegI32* rd, RegI32* rv, Temps*) { *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32(); *rd = bc->needI32(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI32 rv, RegI32 rd, const Temps&) { bc->masm.wasmAtomicExchange(access, srcAddr, rv, rd); } static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps&) { bc->freeI32(rv); } #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) struct Temps { RegI32 t0, t1, t2; }; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, RegI32* rd, RegI32* rv, Temps* temps) { *rv = (type == ValType::I64) ? bc->popI64ToI32() : bc->popI32(); if (Scalar::byteSize(viewType) < 4) { temps->t0 = bc->needI32(); temps->t1 = bc->needI32(); temps->t2 = bc->needI32(); } *rd = bc->needI32(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI32 rv, RegI32 rd, const Temps& temps) { bc->masm.wasmAtomicExchange(access, srcAddr, rv, temps.t0, temps.t1, temps.t2, rd); } static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) { bc->freeI32(rv); bc->maybeFree(temps.t0); bc->maybeFree(temps.t1); bc->maybeFree(temps.t2); } #elif defined(JS_CODEGEN_RISCV64) using Temps = Nothing; static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, RegI32*, RegI32*, Temps*) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Perform(BaseCompiler*, const MemoryAccessDesc&, BaseIndex, RegI32, RegI32, const Temps&) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Deallocate(BaseCompiler*, RegI32, const Temps&) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32) using Temps = Nothing; static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, RegI32*, RegI32*, Temps*) {} static void Perform(BaseCompiler*, const MemoryAccessDesc&, BaseIndex, RegI32, RegI32, const Temps&) {} static void Deallocate(BaseCompiler*, RegI32, const Temps&) {} #endif } // namespace atomic_xchg32 template void BaseCompiler::atomicXchg32(MemoryAccessDesc* access, ValType type) { Scalar::Type viewType = access->type(); RegI32 rd, rv; atomic_xchg32::Temps temps; atomic_xchg32::PopAndAllocate(this, type, viewType, &rd, &rv, &temps); AccessCheck check; RegIndexType rp = popMemoryAccess(access, &check); RegPtr instance = maybeLoadInstanceForAccess(check); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); atomic_xchg32::Perform(this, *access, memaddr, rv, rd, temps); #ifndef RABALDR_PIN_INSTANCE maybeFree(instance); #endif free(rp); atomic_xchg32::Deallocate(this, rv, temps); if (type == ValType::I64) { pushU32AsI64(rd); } else { pushI32(rd); } } namespace atomic_xchg64 { #if defined(JS_CODEGEN_X64) static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) { *rv = bc->popI64(); *rd = *rv; } static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64) { bc->maybeFree(rd); } #elif defined(JS_CODEGEN_X86) // Register allocation is tricky in several ways. // // - For a 64-bit access on memory64 we need six registers for rd, rv, and rp, // but have only five (as the temp ebx is needed too), so we target all // registers explicitly to make sure there's space. // // - We'll be using cmpxchg8b, and when we do the operation, rv must be in // ecx:ebx, and rd must be edx:eax. We can't use ebx for rv initially because // we need ebx for a scratch also, so use a separate temp and move the value // to ebx just before the operation. // // In sum: // // - Initially rv=ecx:edx and eax is reserved, rd=unallocated. // - Then rp is popped into esi+edi because those are the only available. // - The Setup operation makes rv=ecx:ebx and rd=edx:eax and moves edx->ebx. // - Deallocation then frees only the ecx part of rv. static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) { bc->needI32(bc->specific_.ecx); bc->needI32(bc->specific_.edx); bc->needI32(bc->specific_.eax); *rv = RegI64(Register64(bc->specific_.ecx, bc->specific_.edx)); bc->popI64ToSpecific(*rv); } static void Setup(BaseCompiler* bc, RegI64* rv, RegI64* rd, const ScratchAtomicNoHeapReg& scratch) { MOZ_ASSERT(rv->high == bc->specific_.ecx); MOZ_ASSERT(Register(scratch) == js::jit::ebx); bc->masm.move32(rv->low, scratch); *rv = bc->specific_.ecx_ebx; *rd = bc->specific_.edx_eax; } static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) { MOZ_ASSERT(rd == bc->specific_.edx_eax || rd == RegI64::Invalid()); bc->maybeFree(rd); bc->freeI32(bc->specific_.ecx); } #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \ defined(JS_CODEGEN_LOONG64) static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) { *rv = bc->popI64(); *rd = bc->needI64(); } static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) { bc->freeI64(rv); bc->maybeFree(rd); } #elif defined(JS_CODEGEN_ARM) static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) { // Both rv and rd must be odd/even pairs. *rv = bc->popI64ToSpecific(bc->needI64Pair()); *rd = bc->needI64Pair(); } static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) { bc->freeI64(rv); bc->maybeFree(rd); } #elif defined(JS_CODEGEN_RISCV64) static void PopAndAllocate(BaseCompiler*, RegI64*, RegI64*) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Deallocate(BaseCompiler*, RegI64, RegI64) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32) static void PopAndAllocate(BaseCompiler*, RegI64*, RegI64*) {} static void Deallocate(BaseCompiler*, RegI64, RegI64) {} #endif } // namespace atomic_xchg64 template void BaseCompiler::atomicXchg64(MemoryAccessDesc* access, WantResult wantResult) { RegI64 rd, rv; atomic_xchg64::PopAndAllocate(this, &rd, &rv); AccessCheck check; RegIndexType rp = popMemoryAccess(access, &check); #ifdef WASM_HAS_HEAPREG RegPtr instance = maybeLoadInstanceForAccess(check); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); masm.wasmAtomicExchange64(*access, memaddr, rv, rd); # ifndef RABALDR_PIN_INSTANCE maybeFree(instance); # endif #else ScratchAtomicNoHeapReg scratch(*this); RegPtr instance = maybeLoadInstanceForAccess(check, RegIntptrToRegPtr(scratch)); Address memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); atomic_xchg64::Setup(this, &rv, &rd, scratch); masm.wasmAtomicExchange64(*access, memaddr, rv, rd); MOZ_ASSERT(instance == scratch); #endif free(rp); if (wantResult) { pushI64(rd); rd = RegI64::Invalid(); } atomic_xchg64::Deallocate(this, rd, rv); } ////////////////////////////////////////////////////////////////////////////// // // Atomic compare-exchange. void BaseCompiler::atomicCmpXchg(MemoryAccessDesc* access, ValType type) { Scalar::Type viewType = access->type(); if (Scalar::byteSize(viewType) <= 4) { if (isMem32()) { atomicCmpXchg32(access, type); } else { #ifdef ENABLE_WASM_MEMORY64 atomicCmpXchg32(access, type); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } } else { MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8); if (isMem32()) { atomicCmpXchg64(access, type); } else { #ifdef ENABLE_WASM_MEMORY64 atomicCmpXchg64(access, type); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } } } namespace atomic_cmpxchg32 { #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) using Temps = Nothing; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, RegI32* rexpect, RegI32* rnew, RegI32* rd, Temps*) { // For cmpxchg, the expected value and the result are both in eax. bc->needI32(bc->specific_.eax); if (type == ValType::I64) { *rnew = bc->popI64ToI32(); *rexpect = bc->popI64ToSpecificI32(bc->specific_.eax); } else { *rnew = bc->popI32(); *rexpect = bc->popI32ToSpecific(bc->specific_.eax); } *rd = *rexpect; } template static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, T srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd, const Temps&) { # if defined(JS_CODEGEN_X86) ScratchI8 scratch(*bc); if (access.type() == Scalar::Uint8) { MOZ_ASSERT(rd == bc->specific_.eax); if (!bc->ra.isSingleByteI32(rnew)) { // The replacement value must have a byte persona. bc->masm.movl(rnew, scratch); rnew = scratch; } } # endif bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd); } static void Deallocate(BaseCompiler* bc, RegI32, RegI32 rnew, const Temps&) { bc->freeI32(rnew); } #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) using Temps = Nothing; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, RegI32* rexpect, RegI32* rnew, RegI32* rd, Temps*) { if (type == ValType::I64) { *rnew = bc->popI64ToI32(); *rexpect = bc->popI64ToI32(); } else { *rnew = bc->popI32(); *rexpect = bc->popI32(); } *rd = bc->needI32(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd, const Temps&) { bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, rd); } static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew, const Temps&) { bc->freeI32(rnew); bc->freeI32(rexpect); } #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) struct Temps { RegI32 t0, t1, t2; }; static void PopAndAllocate(BaseCompiler* bc, ValType type, Scalar::Type viewType, RegI32* rexpect, RegI32* rnew, RegI32* rd, Temps* temps) { if (type == ValType::I64) { *rnew = bc->popI64ToI32(); *rexpect = bc->popI64ToI32(); } else { *rnew = bc->popI32(); *rexpect = bc->popI32(); } if (Scalar::byteSize(viewType) < 4) { temps->t0 = bc->needI32(); temps->t1 = bc->needI32(); temps->t2 = bc->needI32(); } *rd = bc->needI32(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI32 rexpect, RegI32 rnew, RegI32 rd, const Temps& temps) { bc->masm.wasmCompareExchange(access, srcAddr, rexpect, rnew, temps.t0, temps.t1, temps.t2, rd); } static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew, const Temps& temps) { bc->freeI32(rnew); bc->freeI32(rexpect); bc->maybeFree(temps.t0); bc->maybeFree(temps.t1); bc->maybeFree(temps.t2); } #elif defined(JS_CODEGEN_RISCV64) using Temps = Nothing; static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, RegI32*, RegI32*, RegI32*, Temps*) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Perform(BaseCompiler*, const MemoryAccessDesc&, BaseIndex, RegI32, RegI32, RegI32, const Temps& temps) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Deallocate(BaseCompiler*, RegI32, RegI32, const Temps&) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32) using Temps = Nothing; static void PopAndAllocate(BaseCompiler*, ValType, Scalar::Type, RegI32*, RegI32*, RegI32*, Temps*) {} static void Perform(BaseCompiler*, const MemoryAccessDesc&, BaseIndex, RegI32, RegI32, RegI32, const Temps& temps) {} static void Deallocate(BaseCompiler*, RegI32, RegI32, const Temps&) {} #endif } // namespace atomic_cmpxchg32 template void BaseCompiler::atomicCmpXchg32(MemoryAccessDesc* access, ValType type) { Scalar::Type viewType = access->type(); RegI32 rexpect, rnew, rd; atomic_cmpxchg32::Temps temps; atomic_cmpxchg32::PopAndAllocate(this, type, viewType, &rexpect, &rnew, &rd, &temps); AccessCheck check; RegIndexType rp = popMemoryAccess(access, &check); RegPtr instance = maybeLoadInstanceForAccess(check); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); atomic_cmpxchg32::Perform(this, *access, memaddr, rexpect, rnew, rd, temps); #ifndef RABALDR_PIN_INSTANCE maybeFree(instance); #endif free(rp); atomic_cmpxchg32::Deallocate(this, rexpect, rnew, temps); if (type == ValType::I64) { pushU32AsI64(rd); } else { pushI32(rd); } } namespace atomic_cmpxchg64 { // The templates are needed for x86 code generation, which needs complicated // register allocation for memory64. template static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew, RegI64* rd); template static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew); #if defined(JS_CODEGEN_X64) template static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew, RegI64* rd) { // For cmpxchg, the expected value and the result are both in rax. bc->needI64(bc->specific_.rax); *rnew = bc->popI64(); *rexpect = bc->popI64ToSpecific(bc->specific_.rax); *rd = *rexpect; } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) { bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd); } template static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) { bc->freeI64(rnew); } #elif defined(JS_CODEGEN_X86) template static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd, ScratchAtomicNoHeapReg& scratch); // Memory32: For cmpxchg8b, the expected value and the result are both in // edx:eax, and the replacement value is in ecx:ebx. But we can't allocate ebx // initially because we need it later for a scratch, so instead we allocate a // temp to hold the low word of 'new'. template <> void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew, RegI64* rd) { bc->needI64(bc->specific_.edx_eax); bc->needI32(bc->specific_.ecx); RegI32 tmp = bc->needI32(); *rnew = bc->popI64ToSpecific(RegI64(Register64(bc->specific_.ecx, tmp))); *rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax); *rd = *rexpect; } template <> void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd, ScratchAtomicNoHeapReg& scratch) { MOZ_ASSERT(Register(scratch) == js::jit::ebx); MOZ_ASSERT(rnew.high == bc->specific_.ecx); bc->masm.move32(rnew.low, ebx); bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, bc->specific_.ecx_ebx, rd); } template <> void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) { bc->freeI64(rnew); } // Memory64: Register allocation is particularly hairy here. With memory64, we // have up to seven live values: i64 expected-value, i64 new-value, i64 pointer, // and instance. The instance can use the scratch but there's no avoiding that // we'll run out of registers. // // Unlike for the rmw ops, we can't use edx as the rnew.low since it's used // for the rexpect.high. And we can't push anything onto the stack while we're // popping the memory address because the memory address may be on the stack. # ifdef ENABLE_WASM_MEMORY64 template <> void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew, RegI64* rd) { // We reserve these (and ebx). The 64-bit pointer will end up in esi+edi. bc->needI32(bc->specific_.eax); bc->needI32(bc->specific_.ecx); bc->needI32(bc->specific_.edx); // Pop the 'new' value and stash it in the instance scratch area. Do not // initialize *rnew to anything. RegI64 tmp(Register64(bc->specific_.ecx, bc->specific_.edx)); bc->popI64ToSpecific(tmp); { ScratchPtr instanceScratch(*bc); bc->stashI64(instanceScratch, tmp); } *rexpect = bc->popI64ToSpecific(bc->specific_.edx_eax); *rd = *rexpect; } template <> void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, Address srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd, ScratchAtomicNoHeapReg& scratch) { MOZ_ASSERT(rnew.isInvalid()); rnew = bc->specific_.ecx_ebx; bc->unstashI64(RegPtr(Register(bc->specific_.ecx)), rnew); bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd); } template <> void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) { // edx:ebx have been pushed as the result, and the pointer was freed // separately in the caller, so just free ecx. bc->free(bc->specific_.ecx); } # endif #elif defined(JS_CODEGEN_ARM) template static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew, RegI64* rd) { // The replacement value and the result must both be odd/even pairs. *rnew = bc->popI64Pair(); *rexpect = bc->popI64(); *rd = bc->needI64Pair(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) { bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd); } template static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) { bc->freeI64(rexpect); bc->freeI64(rnew); } #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \ defined(JS_CODEGEN_LOONG64) template static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew, RegI64* rd) { *rnew = bc->popI64(); *rexpect = bc->popI64(); *rd = bc->needI64(); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) { bc->masm.wasmCompareExchange64(access, srcAddr, rexpect, rnew, rd); } template static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) { bc->freeI64(rexpect); bc->freeI64(rnew); } #elif defined(JS_CODEGEN_RISCV64) template static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew, RegI64* rd) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } template static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) { MOZ_CRASH("UNIMPLEMENTED ON RISCV64"); } #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32) template static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew, RegI64* rd) {} static void Perform(BaseCompiler* bc, const MemoryAccessDesc& access, BaseIndex srcAddr, RegI64 rexpect, RegI64 rnew, RegI64 rd) { } template static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {} #endif } // namespace atomic_cmpxchg64 template void BaseCompiler::atomicCmpXchg64(MemoryAccessDesc* access, ValType type) { RegI64 rexpect, rnew, rd; atomic_cmpxchg64::PopAndAllocate(this, &rexpect, &rnew, &rd); AccessCheck check; RegIndexType rp = popMemoryAccess(access, &check); #ifdef WASM_HAS_HEAPREG RegPtr instance = maybeLoadInstanceForAccess(check); auto memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); atomic_cmpxchg64::Perform(this, *access, memaddr, rexpect, rnew, rd); # ifndef RABALDR_PIN_INSTANCE maybeFree(instance); # endif #else ScratchAtomicNoHeapReg scratch(*this); RegPtr instance = maybeLoadInstanceForAccess(check, RegIntptrToRegPtr(scratch)); Address memaddr = prepareAtomicMemoryAccess(access, &check, instance, rp); atomic_cmpxchg64::Perform(this, *access, memaddr, rexpect, rnew, rd, scratch); MOZ_ASSERT(instance == scratch); #endif free(rp); atomic_cmpxchg64::Deallocate(this, rexpect, rnew); pushI64(rd); } ////////////////////////////////////////////////////////////////////////////// // // Synchronization. bool BaseCompiler::atomicWait(ValType type, MemoryAccessDesc* access) { switch (type.kind()) { case ValType::I32: { RegI64 timeout = popI64(); RegI32 val = popI32(); if (isMem32()) { computeEffectiveAddress(access); } else { #ifdef ENABLE_WASM_MEMORY64 computeEffectiveAddress(access); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } pushI32(val); pushI64(timeout); if (!emitInstanceCall(isMem32() ? SASigWaitI32M32 : SASigWaitI32M64)) { return false; } break; } case ValType::I64: { RegI64 timeout = popI64(); RegI64 val = popI64(); if (isMem32()) { computeEffectiveAddress(access); } else { #ifdef ENABLE_WASM_MEMORY64 # ifdef JS_CODEGEN_X86 { ScratchPtr scratch(*this); stashI64(scratch, val); freeI64(val); } # endif computeEffectiveAddress(access); # ifdef JS_CODEGEN_X86 { ScratchPtr scratch(*this); val = needI64(); unstashI64(scratch, val); } # endif #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } pushI64(val); pushI64(timeout); if (!emitInstanceCall(isMem32() ? SASigWaitI64M32 : SASigWaitI64M64)) { return false; } break; } default: MOZ_CRASH(); } return true; } bool BaseCompiler::atomicWake(MemoryAccessDesc* access) { RegI32 count = popI32(); if (isMem32()) { computeEffectiveAddress(access); } else { #ifdef ENABLE_WASM_MEMORY64 computeEffectiveAddress(access); #else MOZ_CRASH("Memory64 not enabled / supported on this platform"); #endif } pushI32(count); return emitInstanceCall(isMem32() ? SASigWakeM32 : SASigWakeM64); } ////////////////////////////////////////////////////////////////////////////// // // Bulk memory. void BaseCompiler::memCopyInlineM32() { MOZ_ASSERT(MaxInlineMemoryCopyLength != 0); int32_t signedLength; MOZ_ALWAYS_TRUE(popConst(&signedLength)); uint32_t length = signedLength; MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryCopyLength); RegI32 src = popI32(); RegI32 dest = popI32(); // Compute the number of copies of each width we will need to do size_t remainder = length; #ifdef ENABLE_WASM_SIMD size_t numCopies16 = 0; if (MacroAssembler::SupportsFastUnalignedFPAccesses()) { numCopies16 = remainder / sizeof(V128); remainder %= sizeof(V128); } #endif #ifdef JS_64BIT size_t numCopies8 = remainder / sizeof(uint64_t); remainder %= sizeof(uint64_t); #endif size_t numCopies4 = remainder / sizeof(uint32_t); remainder %= sizeof(uint32_t); size_t numCopies2 = remainder / sizeof(uint16_t); remainder %= sizeof(uint16_t); size_t numCopies1 = remainder; // Load all source bytes onto the value stack from low to high using the // widest transfer width we can for the system. We will trap without writing // anything if any source byte is out-of-bounds. bool omitBoundsCheck = false; size_t offset = 0; #ifdef ENABLE_WASM_SIMD for (uint32_t i = 0; i < numCopies16; i++) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Simd128, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; loadCommon(&access, check, ValType::V128); offset += sizeof(V128); omitBoundsCheck = true; } #endif #ifdef JS_64BIT for (uint32_t i = 0; i < numCopies8; i++) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; loadCommon(&access, check, ValType::I64); offset += sizeof(uint64_t); omitBoundsCheck = true; } #endif for (uint32_t i = 0; i < numCopies4; i++) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; loadCommon(&access, check, ValType::I32); offset += sizeof(uint32_t); omitBoundsCheck = true; } if (numCopies2) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; loadCommon(&access, check, ValType::I32); offset += sizeof(uint16_t); omitBoundsCheck = true; } if (numCopies1) { RegI32 temp = needI32(); moveI32(src, temp); pushI32(temp); MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; loadCommon(&access, check, ValType::I32); } // Store all source bytes from the value stack to the destination from // high to low. We will trap without writing anything on the first store // if any dest byte is out-of-bounds. offset = length; omitBoundsCheck = false; if (numCopies1) { offset -= sizeof(uint8_t); RegI32 value = popI32(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(value); MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset()); AccessCheck check; storeCommon(&access, check, ValType::I32); omitBoundsCheck = true; } if (numCopies2) { offset -= sizeof(uint16_t); RegI32 value = popI32(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(value); MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; storeCommon(&access, check, ValType::I32); omitBoundsCheck = true; } for (uint32_t i = 0; i < numCopies4; i++) { offset -= sizeof(uint32_t); RegI32 value = popI32(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(value); MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; storeCommon(&access, check, ValType::I32); omitBoundsCheck = true; } #ifdef JS_64BIT for (uint32_t i = 0; i < numCopies8; i++) { offset -= sizeof(uint64_t); RegI64 value = popI64(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI64(value); MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; storeCommon(&access, check, ValType::I64); omitBoundsCheck = true; } #endif #ifdef ENABLE_WASM_SIMD for (uint32_t i = 0; i < numCopies16; i++) { offset -= sizeof(V128); RegV128 value = popV128(); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushV128(value); MemoryAccessDesc access(Scalar::Simd128, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; storeCommon(&access, check, ValType::V128); omitBoundsCheck = true; } #endif freeI32(dest); freeI32(src); } void BaseCompiler::memFillInlineM32() { MOZ_ASSERT(MaxInlineMemoryFillLength != 0); int32_t signedLength; int32_t signedValue; MOZ_ALWAYS_TRUE(popConst(&signedLength)); MOZ_ALWAYS_TRUE(popConst(&signedValue)); uint32_t length = uint32_t(signedLength); uint32_t value = uint32_t(signedValue); MOZ_ASSERT(length != 0 && length <= MaxInlineMemoryFillLength); RegI32 dest = popI32(); // Compute the number of copies of each width we will need to do size_t remainder = length; #ifdef ENABLE_WASM_SIMD size_t numCopies16 = 0; if (MacroAssembler::SupportsFastUnalignedFPAccesses()) { numCopies16 = remainder / sizeof(V128); remainder %= sizeof(V128); } #endif #ifdef JS_64BIT size_t numCopies8 = remainder / sizeof(uint64_t); remainder %= sizeof(uint64_t); #endif size_t numCopies4 = remainder / sizeof(uint32_t); remainder %= sizeof(uint32_t); size_t numCopies2 = remainder / sizeof(uint16_t); remainder %= sizeof(uint16_t); size_t numCopies1 = remainder; MOZ_ASSERT(numCopies2 <= 1 && numCopies1 <= 1); // Generate splatted definitions for wider fills as needed #ifdef ENABLE_WASM_SIMD V128 val16(value); #endif #ifdef JS_64BIT uint64_t val8 = SplatByteToUInt(value, 8); #endif uint32_t val4 = SplatByteToUInt(value, 4); uint32_t val2 = SplatByteToUInt(value, 2); uint32_t val1 = value; // Store the fill value to the destination from high to low. We will trap // without writing anything on the first store if any dest byte is // out-of-bounds. size_t offset = length; bool omitBoundsCheck = false; if (numCopies1) { offset -= sizeof(uint8_t); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(val1); MemoryAccessDesc access(Scalar::Uint8, 1, offset, bytecodeOffset()); AccessCheck check; storeCommon(&access, check, ValType::I32); omitBoundsCheck = true; } if (numCopies2) { offset -= sizeof(uint16_t); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(val2); MemoryAccessDesc access(Scalar::Uint16, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; storeCommon(&access, check, ValType::I32); omitBoundsCheck = true; } for (uint32_t i = 0; i < numCopies4; i++) { offset -= sizeof(uint32_t); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI32(val4); MemoryAccessDesc access(Scalar::Uint32, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; storeCommon(&access, check, ValType::I32); omitBoundsCheck = true; } #ifdef JS_64BIT for (uint32_t i = 0; i < numCopies8; i++) { offset -= sizeof(uint64_t); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushI64(val8); MemoryAccessDesc access(Scalar::Int64, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; storeCommon(&access, check, ValType::I64); omitBoundsCheck = true; } #endif #ifdef ENABLE_WASM_SIMD for (uint32_t i = 0; i < numCopies16; i++) { offset -= sizeof(V128); RegI32 temp = needI32(); moveI32(dest, temp); pushI32(temp); pushV128(val16); MemoryAccessDesc access(Scalar::Simd128, 1, offset, bytecodeOffset()); AccessCheck check; check.omitBoundsCheck = omitBoundsCheck; storeCommon(&access, check, ValType::V128); omitBoundsCheck = true; } #endif freeI32(dest); } ////////////////////////////////////////////////////////////////////////////// // // SIMD and Relaxed SIMD. #ifdef ENABLE_WASM_SIMD void BaseCompiler::loadSplat(MemoryAccessDesc* access) { // We can implement loadSplat mostly as load + splat because the push of the // result onto the value stack in loadCommon normally will not generate any // code, it will leave the value in a register which we will consume. // We use uint types when we can on the general assumption that unsigned loads // might be smaller/faster on some platforms, because no sign extension needs // to be done after the sub-register load. RegV128 rd = needV128(); switch (access->type()) { case Scalar::Uint8: { loadCommon(access, AccessCheck(), ValType::I32); RegI32 rs = popI32(); masm.splatX16(rs, rd); free(rs); break; } case Scalar::Uint16: { loadCommon(access, AccessCheck(), ValType::I32); RegI32 rs = popI32(); masm.splatX8(rs, rd); free(rs); break; } case Scalar::Uint32: { loadCommon(access, AccessCheck(), ValType::I32); RegI32 rs = popI32(); masm.splatX4(rs, rd); free(rs); break; } case Scalar::Int64: { loadCommon(access, AccessCheck(), ValType::I64); RegI64 rs = popI64(); masm.splatX2(rs, rd); free(rs); break; } default: MOZ_CRASH(); } pushV128(rd); } void BaseCompiler::loadZero(MemoryAccessDesc* access) { access->setZeroExtendSimd128Load(); loadCommon(access, AccessCheck(), ValType::V128); } void BaseCompiler::loadExtend(MemoryAccessDesc* access, Scalar::Type viewType) { loadCommon(access, AccessCheck(), ValType::I64); RegI64 rs = popI64(); RegV128 rd = needV128(); masm.moveGPR64ToDouble(rs, rd); switch (viewType) { case Scalar::Int8: masm.widenLowInt8x16(rd, rd); break; case Scalar::Uint8: masm.unsignedWidenLowInt8x16(rd, rd); break; case Scalar::Int16: masm.widenLowInt16x8(rd, rd); break; case Scalar::Uint16: masm.unsignedWidenLowInt16x8(rd, rd); break; case Scalar::Int32: masm.widenLowInt32x4(rd, rd); break; case Scalar::Uint32: masm.unsignedWidenLowInt32x4(rd, rd); break; default: MOZ_CRASH(); } freeI64(rs); pushV128(rd); } void BaseCompiler::loadLane(MemoryAccessDesc* access, uint32_t laneIndex) { ValType type = access->type() == Scalar::Int64 ? ValType::I64 : ValType::I32; RegV128 rsd = popV128(); loadCommon(access, AccessCheck(), type); if (type == ValType::I32) { RegI32 rs = popI32(); switch (access->type()) { case Scalar::Uint8: masm.replaceLaneInt8x16(laneIndex, rs, rsd); break; case Scalar::Uint16: masm.replaceLaneInt16x8(laneIndex, rs, rsd); break; case Scalar::Int32: masm.replaceLaneInt32x4(laneIndex, rs, rsd); break; default: MOZ_CRASH("unsupported access type"); } freeI32(rs); } else { MOZ_ASSERT(type == ValType::I64); RegI64 rs = popI64(); masm.replaceLaneInt64x2(laneIndex, rs, rsd); freeI64(rs); } pushV128(rsd); } void BaseCompiler::storeLane(MemoryAccessDesc* access, uint32_t laneIndex) { ValType type = access->type() == Scalar::Int64 ? ValType::I64 : ValType::I32; RegV128 rs = popV128(); if (type == ValType::I32) { RegI32 tmp = needI32(); switch (access->type()) { case Scalar::Uint8: masm.extractLaneInt8x16(laneIndex, rs, tmp); break; case Scalar::Uint16: masm.extractLaneInt16x8(laneIndex, rs, tmp); break; case Scalar::Int32: masm.extractLaneInt32x4(laneIndex, rs, tmp); break; default: MOZ_CRASH("unsupported laneSize"); } pushI32(tmp); } else { MOZ_ASSERT(type == ValType::I64); RegI64 tmp = needI64(); masm.extractLaneInt64x2(laneIndex, rs, tmp); pushI64(tmp); } freeV128(rs); storeCommon(access, AccessCheck(), type); } #endif // ENABLE_WASM_SIMD } // namespace wasm } // namespace js