diff options
Diffstat (limited to '')
-rw-r--r-- | js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp | 2161 |
1 files changed, 2161 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp new file mode 100644 index 0000000000..e474f83530 --- /dev/null +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp @@ -0,0 +1,2161 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/MacroAssembler-x86-shared.h" + +#include "mozilla/Casting.h" + +#include "jsmath.h" + +#include "jit/JitFrames.h" +#include "jit/MacroAssembler.h" +#include "js/ScalarType.h" // js::Scalar::Type + +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +// Note: this function clobbers the input register. +void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { + ScratchDoubleScope scratch(*this); + MOZ_ASSERT(input != scratch); + Label positive, done; + + // <= 0 or NaN --> 0 + zeroDouble(scratch); + branchDouble(DoubleGreaterThan, input, scratch, &positive); + { + move32(Imm32(0), output); + jump(&done); + } + + bind(&positive); + + // Add 0.5 and truncate. + loadConstantDouble(0.5, scratch); + addDouble(scratch, input); + + Label outOfRange; + + // Truncate to int32 and ensure the result <= 255. This relies on the + // processor setting output to a value > 255 for doubles outside the int32 + // range (for instance 0x80000000). + vcvttsd2si(input, output); + branch32(Assembler::Above, output, Imm32(255), &outOfRange); + { + // Check if we had a tie. + convertInt32ToDouble(output, scratch); + branchDouble(DoubleNotEqual, input, scratch, &done); + + // It was a tie. Mask out the ones bit to get an even value. + // See also js_TypedArray_uint8_clamp_double. + and32(Imm32(~1), output); + jump(&done); + } + + // > 255 --> 255 + bind(&outOfRange); + { move32(Imm32(255), output); } + + bind(&done); +} + +bool MacroAssemblerX86Shared::buildOOLFakeExitFrame(void* fakeReturnAddr) { + asMasm().PushFrameDescriptor(FrameType::IonJS); + asMasm().Push(ImmPtr(fakeReturnAddr)); + asMasm().Push(FramePointer); + return true; +} + +void MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, + Register scratch, Label* label, + bool maybeNonZero) { + // Determines whether the low double contained in the XMM register reg + // is equal to -0.0. + +#if defined(JS_CODEGEN_X86) + Label nonZero; + + // if not already compared to zero + if (maybeNonZero) { + ScratchDoubleScope scratchDouble(asMasm()); + + // Compare to zero. Lets through {0, -0}. + zeroDouble(scratchDouble); + + // If reg is non-zero, jump to nonZero. + asMasm().branchDouble(DoubleNotEqual, reg, scratchDouble, &nonZero); + } + // Input register is either zero or negative zero. Retrieve sign of input. + vmovmskpd(reg, scratch); + + // If reg is 1 or 3, input is negative zero. + // If reg is 0 or 2, input is a normal zero. + asMasm().branchTest32(NonZero, scratch, Imm32(1), label); + + bind(&nonZero); +#elif defined(JS_CODEGEN_X64) + vmovq(reg, scratch); + cmpq(Imm32(1), scratch); + j(Overflow, label); +#endif +} + +void MacroAssemblerX86Shared::branchNegativeZeroFloat32(FloatRegister reg, + Register scratch, + Label* label) { + vmovd(reg, scratch); + cmp32(scratch, Imm32(1)); + j(Overflow, label); +} + +MacroAssembler& MacroAssemblerX86Shared::asMasm() { + return *static_cast<MacroAssembler*>(this); +} + +const MacroAssembler& MacroAssemblerX86Shared::asMasm() const { + return *static_cast<const MacroAssembler*>(this); +} + +template <class T, class Map> +T* MacroAssemblerX86Shared::getConstant(const typename T::Pod& value, Map& map, + Vector<T, 0, SystemAllocPolicy>& vec) { + using AddPtr = typename Map::AddPtr; + size_t index; + if (AddPtr p = map.lookupForAdd(value)) { + index = p->value(); + } else { + index = vec.length(); + enoughMemory_ &= vec.append(T(value)); + if (!enoughMemory_) { + return nullptr; + } + enoughMemory_ &= map.add(p, value, index); + if (!enoughMemory_) { + return nullptr; + } + } + return &vec[index]; +} + +MacroAssemblerX86Shared::Float* MacroAssemblerX86Shared::getFloat(float f) { + return getConstant<Float, FloatMap>(f, floatMap_, floats_); +} + +MacroAssemblerX86Shared::Double* MacroAssemblerX86Shared::getDouble(double d) { + return getConstant<Double, DoubleMap>(d, doubleMap_, doubles_); +} + +MacroAssemblerX86Shared::SimdData* MacroAssemblerX86Shared::getSimdData( + const SimdConstant& v) { + return getConstant<SimdData, SimdMap>(v, simdMap_, simds_); +} + +void MacroAssemblerX86Shared::binarySimd128( + const SimdConstant& rhs, FloatRegister lhsDest, + void (MacroAssembler::*regOp)(const Operand&, FloatRegister, FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister)) { + ScratchSimd128Scope scratch(asMasm()); + if (maybeInlineSimd128Int(rhs, scratch)) { + (asMasm().*regOp)(Operand(scratch), lhsDest, lhsDest); + } else { + (asMasm().*constOp)(rhs, lhsDest); + } +} + +void MacroAssemblerX86Shared::binarySimd128( + FloatRegister lhs, const SimdConstant& rhs, FloatRegister dest, + void (MacroAssembler::*regOp)(const Operand&, FloatRegister, FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister, + FloatRegister)) { + ScratchSimd128Scope scratch(asMasm()); + if (maybeInlineSimd128Int(rhs, scratch)) { + (asMasm().*regOp)(Operand(scratch), lhs, dest); + } else { + (asMasm().*constOp)(rhs, lhs, dest); + } +} + +void MacroAssemblerX86Shared::binarySimd128( + const SimdConstant& rhs, FloatRegister lhs, + void (MacroAssembler::*regOp)(const Operand&, FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister)) { + ScratchSimd128Scope scratch(asMasm()); + if (maybeInlineSimd128Int(rhs, scratch)) { + (asMasm().*regOp)(Operand(scratch), lhs); + } else { + (asMasm().*constOp)(rhs, lhs); + } +} + +void MacroAssemblerX86Shared::bitwiseTestSimd128(const SimdConstant& rhs, + FloatRegister lhs) { + ScratchSimd128Scope scratch(asMasm()); + if (maybeInlineSimd128Int(rhs, scratch)) { + vptest(scratch, lhs); + } else { + asMasm().vptestSimd128(rhs, lhs); + } +} + +void MacroAssemblerX86Shared::minMaxDouble(FloatRegister first, + FloatRegister second, bool canBeNaN, + bool isMax) { + Label done, nan, minMaxInst; + + // Do a vucomisd to catch equality and NaNs, which both require special + // handling. If the operands are ordered and inequal, we branch straight to + // the min/max instruction. If we wanted, we could also branch for less-than + // or greater-than here instead of using min/max, however these conditions + // will sometimes be hard on the branch predictor. + vucomisd(second, first); + j(Assembler::NotEqual, &minMaxInst); + if (canBeNaN) { + j(Assembler::Parity, &nan); + } + + // Ordered and equal. The operands are bit-identical unless they are zero + // and negative zero. These instructions merge the sign bits in that + // case, and are no-ops otherwise. + if (isMax) { + vandpd(second, first, first); + } else { + vorpd(second, first, first); + } + jump(&done); + + // x86's min/max are not symmetric; if either operand is a NaN, they return + // the read-only operand. We need to return a NaN if either operand is a + // NaN, so we explicitly check for a NaN in the read-write operand. + if (canBeNaN) { + bind(&nan); + vucomisd(first, first); + j(Assembler::Parity, &done); + } + + // When the values are inequal, or second is NaN, x86's min and max will + // return the value we need. + bind(&minMaxInst); + if (isMax) { + vmaxsd(second, first, first); + } else { + vminsd(second, first, first); + } + + bind(&done); +} + +void MacroAssemblerX86Shared::minMaxFloat32(FloatRegister first, + FloatRegister second, bool canBeNaN, + bool isMax) { + Label done, nan, minMaxInst; + + // Do a vucomiss to catch equality and NaNs, which both require special + // handling. If the operands are ordered and inequal, we branch straight to + // the min/max instruction. If we wanted, we could also branch for less-than + // or greater-than here instead of using min/max, however these conditions + // will sometimes be hard on the branch predictor. + vucomiss(second, first); + j(Assembler::NotEqual, &minMaxInst); + if (canBeNaN) { + j(Assembler::Parity, &nan); + } + + // Ordered and equal. The operands are bit-identical unless they are zero + // and negative zero. These instructions merge the sign bits in that + // case, and are no-ops otherwise. + if (isMax) { + vandps(second, first, first); + } else { + vorps(second, first, first); + } + jump(&done); + + // x86's min/max are not symmetric; if either operand is a NaN, they return + // the read-only operand. We need to return a NaN if either operand is a + // NaN, so we explicitly check for a NaN in the read-write operand. + if (canBeNaN) { + bind(&nan); + vucomiss(first, first); + j(Assembler::Parity, &done); + } + + // When the values are inequal, or second is NaN, x86's min and max will + // return the value we need. + bind(&minMaxInst); + if (isMax) { + vmaxss(second, first, first); + } else { + vminss(second, first, first); + } + + bind(&done); +} + +#ifdef ENABLE_WASM_SIMD +bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { + switch (op) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I8x16ShrS: + *mask = 7; + break; + case wasm::SimdOp::I16x8Shl: + case wasm::SimdOp::I16x8ShrU: + case wasm::SimdOp::I16x8ShrS: + *mask = 15; + break; + case wasm::SimdOp::I32x4Shl: + case wasm::SimdOp::I32x4ShrU: + case wasm::SimdOp::I32x4ShrS: + *mask = 31; + break; + case wasm::SimdOp::I64x2Shl: + case wasm::SimdOp::I64x2ShrU: + case wasm::SimdOp::I64x2ShrS: + *mask = 63; + break; + default: + MOZ_CRASH("Unexpected shift operation"); + } + return true; +} +#endif + +//{{{ check_macroassembler_style +// =============================================================== +// MacroAssembler high-level usage. + +void MacroAssembler::flush() {} + +void MacroAssembler::comment(const char* msg) { masm.comment(msg); } + +// This operation really consists of five phases, in order to enforce the +// restriction that on x86_shared, srcDest must be eax and edx will be +// clobbered. +// +// Input: { rhs, lhsOutput } +// +// [PUSH] Preserve registers +// [MOVE] Generate moves to specific registers +// +// [DIV] Input: { regForRhs, EAX } +// [DIV] extend EAX into EDX +// [DIV] x86 Division operator +// [DIV] Ouptut: { EAX, EDX } +// +// [MOVE] Move specific registers to outputs +// [POP] Restore registers +// +// Output: { lhsOutput, remainderOutput } +void MacroAssembler::flexibleDivMod32(Register rhs, Register lhsOutput, + Register remOutput, bool isUnsigned, + const LiveRegisterSet&) { + // Currently this helper can't handle this situation. + MOZ_ASSERT(lhsOutput != rhs); + MOZ_ASSERT(lhsOutput != remOutput); + + // Choose a register that is not edx, or eax to hold the rhs; + // ebx is chosen arbitrarily, and will be preserved if necessary. + Register regForRhs = (rhs == eax || rhs == edx) ? ebx : rhs; + + // Add registers we will be clobbering as live, but + // also remove the set we do not restore. + LiveRegisterSet preserve; + preserve.add(edx); + preserve.add(eax); + preserve.add(regForRhs); + + preserve.takeUnchecked(lhsOutput); + preserve.takeUnchecked(remOutput); + + PushRegsInMask(preserve); + + // Shuffle input into place. + moveRegPair(lhsOutput, rhs, eax, regForRhs); + + // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. + if (isUnsigned) { + mov(ImmWord(0), edx); + udiv(regForRhs); + } else { + cdq(); + idiv(regForRhs); + } + + moveRegPair(eax, edx, lhsOutput, remOutput); + + PopRegsInMask(preserve); +} + +void MacroAssembler::flexibleQuotient32( + Register rhs, Register srcDest, bool isUnsigned, + const LiveRegisterSet& volatileLiveRegs) { + // Choose an arbitrary register that isn't eax, edx, rhs or srcDest; + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + regs.takeUnchecked(eax); + regs.takeUnchecked(edx); + regs.takeUnchecked(rhs); + regs.takeUnchecked(srcDest); + + Register remOut = regs.takeAny(); + push(remOut); + flexibleDivMod32(rhs, srcDest, remOut, isUnsigned, volatileLiveRegs); + pop(remOut); +} + +void MacroAssembler::flexibleRemainder32( + Register rhs, Register srcDest, bool isUnsigned, + const LiveRegisterSet& volatileLiveRegs) { + // Choose an arbitrary register that isn't eax, edx, rhs or srcDest + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + regs.takeUnchecked(eax); + regs.takeUnchecked(edx); + regs.takeUnchecked(rhs); + regs.takeUnchecked(srcDest); + + Register remOut = regs.takeAny(); + push(remOut); + flexibleDivMod32(rhs, srcDest, remOut, isUnsigned, volatileLiveRegs); + mov(remOut, srcDest); + pop(remOut); +} + +// =============================================================== +// Stack manipulation functions. + +size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) { + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + return set.gprs().size() * sizeof(intptr_t) + fpuSet.getPushSizeInBytes(); +} + +void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { + mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); + + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + unsigned numFpu = fpuSet.size(); + int32_t diffF = fpuSet.getPushSizeInBytes(); + int32_t diffG = set.gprs().size() * sizeof(intptr_t); + + // On x86, always use push to push the integer registers, as it's fast + // on modern hardware and it's a small instruction. + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + Push(*iter); + } + MOZ_ASSERT(diffG == 0); + (void)diffG; + + reserveStack(diffF); + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + diffF -= reg.size(); + numFpu -= 1; + Address spillAddress(StackPointer, diffF); + if (reg.isDouble()) { + storeDouble(reg, spillAddress); + } else if (reg.isSingle()) { + storeFloat32(reg, spillAddress); + } else if (reg.isSimd128()) { + storeUnalignedSimd128(reg, spillAddress); + } else { + MOZ_CRASH("Unknown register type."); + } + } + MOZ_ASSERT(numFpu == 0); + (void)numFpu; + + // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with + // GetPushSizeInBytes. + size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t); + MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4); + MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0); + diffF -= alignExtra; + MOZ_ASSERT(diffF == 0); + + // The macroassembler will keep the stack sizeof(uintptr_t)-aligned, so + // we don't need to take into account `alignExtra` here. + MOZ_ASSERT(framePushed() - framePushedInitial == + PushRegsInMaskSizeInBytes(set)); +} + +void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, + Register) { + mozilla::DebugOnly<size_t> offsetInitial = dest.offset; + + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + unsigned numFpu = fpuSet.size(); + int32_t diffF = fpuSet.getPushSizeInBytes(); + int32_t diffG = set.gprs().size() * sizeof(intptr_t); + + MOZ_ASSERT(dest.offset >= diffG + diffF); + + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + dest.offset -= sizeof(intptr_t); + storePtr(*iter, dest); + } + MOZ_ASSERT(diffG == 0); + (void)diffG; + + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + diffF -= reg.size(); + numFpu -= 1; + dest.offset -= reg.size(); + if (reg.isDouble()) { + storeDouble(reg, dest); + } else if (reg.isSingle()) { + storeFloat32(reg, dest); + } else if (reg.isSimd128()) { + storeUnalignedSimd128(reg, dest); + } else { + MOZ_CRASH("Unknown register type."); + } + } + MOZ_ASSERT(numFpu == 0); + (void)numFpu; + + // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with + // GetPushSizeInBytes. + size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t); + MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4); + MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0); + diffF -= alignExtra; + MOZ_ASSERT(diffF == 0); + + // What this means is: if `alignExtra` is nonzero, then the save area size + // actually used is `alignExtra` bytes smaller than what + // PushRegsInMaskSizeInBytes claims. Hence we need to compensate for that. + MOZ_ASSERT(alignExtra + offsetInitial - dest.offset == + PushRegsInMaskSizeInBytes(set)); +} + +void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, + LiveRegisterSet ignore) { + mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); + + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + unsigned numFpu = fpuSet.size(); + int32_t diffG = set.gprs().size() * sizeof(intptr_t); + int32_t diffF = fpuSet.getPushSizeInBytes(); + const int32_t reservedG = diffG; + const int32_t reservedF = diffF; + + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + diffF -= reg.size(); + numFpu -= 1; + if (ignore.has(reg)) { + continue; + } + + Address spillAddress(StackPointer, diffF); + if (reg.isDouble()) { + loadDouble(spillAddress, reg); + } else if (reg.isSingle()) { + loadFloat32(spillAddress, reg); + } else if (reg.isSimd128()) { + loadUnalignedSimd128(spillAddress, reg); + } else { + MOZ_CRASH("Unknown register type."); + } + } + freeStack(reservedF); + MOZ_ASSERT(numFpu == 0); + (void)numFpu; + // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with + // GetPushBytesInSize. + diffF -= diffF % sizeof(uintptr_t); + MOZ_ASSERT(diffF == 0); + + // On x86, use pop to pop the integer registers, if we're not going to + // ignore any slots, as it's fast on modern hardware and it's a small + // instruction. + if (ignore.emptyGeneral()) { + for (GeneralRegisterForwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + Pop(*iter); + } + } else { + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); + ++iter) { + diffG -= sizeof(intptr_t); + if (!ignore.has(*iter)) { + loadPtr(Address(StackPointer, diffG), *iter); + } + } + freeStack(reservedG); + } + MOZ_ASSERT(diffG == 0); + + MOZ_ASSERT(framePushedInitial - framePushed() == + PushRegsInMaskSizeInBytes(set)); +} + +void MacroAssembler::Push(const Operand op) { + push(op); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(Register reg) { + push(reg); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const Imm32 imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmWord imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmPtr imm) { + Push(ImmWord(uintptr_t(imm.value))); +} + +void MacroAssembler::Push(const ImmGCPtr ptr) { + push(ptr); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(FloatRegister t) { + push(t); + adjustFrame(sizeof(double)); +} + +void MacroAssembler::PushFlags() { + pushFlags(); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Pop(const Operand op) { + pop(op); + implicitPop(sizeof(intptr_t)); +} + +void MacroAssembler::Pop(Register reg) { + pop(reg); + implicitPop(sizeof(intptr_t)); +} + +void MacroAssembler::Pop(FloatRegister reg) { + pop(reg); + implicitPop(sizeof(double)); +} + +void MacroAssembler::Pop(const ValueOperand& val) { + popValue(val); + implicitPop(sizeof(Value)); +} + +void MacroAssembler::PopFlags() { + popFlags(); + implicitPop(sizeof(intptr_t)); +} + +void MacroAssembler::PopStackPtr() { Pop(StackPointer); } + +void MacroAssembler::freeStackTo(uint32_t framePushed) { + MOZ_ASSERT(framePushed <= framePushed_); + lea(Operand(FramePointer, -int32_t(framePushed)), StackPointer); + framePushed_ = framePushed; +} + +// =============================================================== +// Simple call functions. + +CodeOffset MacroAssembler::call(Register reg) { return Assembler::call(reg); } + +CodeOffset MacroAssembler::call(Label* label) { return Assembler::call(label); } + +void MacroAssembler::call(const Address& addr) { + Assembler::call(Operand(addr.base, addr.offset)); +} + +CodeOffset MacroAssembler::call(wasm::SymbolicAddress target) { + mov(target, eax); + return Assembler::call(eax); +} + +void MacroAssembler::call(ImmWord target) { Assembler::call(target); } + +void MacroAssembler::call(ImmPtr target) { Assembler::call(target); } + +void MacroAssembler::call(JitCode* target) { Assembler::call(target); } + +CodeOffset MacroAssembler::callWithPatch() { + return Assembler::callWithPatch(); +} +void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) { + Assembler::patchCall(callerOffset, calleeOffset); +} + +void MacroAssembler::callAndPushReturnAddress(Register reg) { call(reg); } + +void MacroAssembler::callAndPushReturnAddress(Label* label) { call(label); } + +// =============================================================== +// Patchable near/far jumps. + +CodeOffset MacroAssembler::farJumpWithPatch() { + return Assembler::farJumpWithPatch(); +} + +void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) { + Assembler::patchFarJump(farJump, targetOffset); +} + +CodeOffset MacroAssembler::nopPatchableToCall() { + masm.nop_five(); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::patchNopToCall(uint8_t* callsite, uint8_t* target) { + Assembler::patchFiveByteNopToCall(callsite, target); +} + +void MacroAssembler::patchCallToNop(uint8_t* callsite) { + Assembler::patchCallToFiveByteNop(callsite); +} + +// =============================================================== +// Jit Frames. + +uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) { + CodeLabel cl; + + mov(&cl, scratch); + Push(scratch); + bind(&cl); + uint32_t retAddr = currentOffset(); + + addCodeLabel(cl); + return retAddr; +} + +// =============================================================== +// WebAssembly + +FaultingCodeOffset MacroAssembler::wasmTrapInstruction() { + return FaultingCodeOffset(ud2().offset()); +} + +void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, + Register boundsCheckLimit, Label* ok) { + cmp32(index, boundsCheckLimit); + j(cond, ok); + if (JitOptions.spectreIndexMasking) { + cmovCCl(cond, Operand(boundsCheckLimit), index); + } +} + +void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, + Address boundsCheckLimit, Label* ok) { + cmp32(index, Operand(boundsCheckLimit)); + j(cond, ok); + if (JitOptions.spectreIndexMasking) { + cmovCCl(cond, Operand(boundsCheckLimit), index); + } +} + +// RAII class that generates the jumps to traps when it's destructed, to +// prevent some code duplication in the outOfLineWasmTruncateXtoY methods. +struct MOZ_RAII AutoHandleWasmTruncateToIntErrors { + MacroAssembler& masm; + Label inputIsNaN; + Label intOverflow; + wasm::BytecodeOffset off; + + explicit AutoHandleWasmTruncateToIntErrors(MacroAssembler& masm, + wasm::BytecodeOffset off) + : masm(masm), off(off) {} + + ~AutoHandleWasmTruncateToIntErrors() { + // Handle errors. These cases are not in arbitrary order: code will + // fall through to intOverflow. + masm.bind(&intOverflow); + masm.wasmTrap(wasm::Trap::IntegerOverflow, off); + + masm.bind(&inputIsNaN); + masm.wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + } +}; + +void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input, + Register output, + bool isSaturating, + Label* oolEntry) { + vcvttsd2si(input, output); + cmp32(output, Imm32(1)); + j(Assembler::Overflow, oolEntry); +} + +void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input, + Register output, + bool isSaturating, + Label* oolEntry) { + vcvttss2si(input, output); + cmp32(output, Imm32(1)); + j(Assembler::Overflow, oolEntry); +} + +void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input, + Register output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + bool isUnsigned = flags & TRUNC_UNSIGNED; + bool isSaturating = flags & TRUNC_SATURATING; + + if (isSaturating) { + if (isUnsigned) { + // Negative overflow and NaN both are converted to 0, and the only + // other case is positive overflow which is converted to + // UINT32_MAX. + Label nonNegative; + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &nonNegative); + move32(Imm32(0), output); + jump(rejoin); + + bind(&nonNegative); + move32(Imm32(UINT32_MAX), output); + } else { + // Negative overflow is already saturated to INT32_MIN, so we only + // have to handle NaN and positive overflow here. + Label notNaN; + branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); + move32(Imm32(0), output); + jump(rejoin); + + bind(¬NaN); + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleLessThan, input, fpscratch, rejoin); + sub32(Imm32(1), output); + } + jump(rejoin); + return; + } + + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // For unsigned, fall through to intOverflow failure case. + if (isUnsigned) { + return; + } + + // Handle special values. + + // We've used vcvttsd2si. The only valid double values that can + // truncate to INT32_MIN are in ]INT32_MIN - 1; INT32_MIN]. + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(double(INT32_MIN) - 1.0, fpscratch); + branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch, + &traps.intOverflow); + + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, + &traps.intOverflow); + jump(rejoin); +} + +void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input, + Register output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + bool isUnsigned = flags & TRUNC_UNSIGNED; + bool isSaturating = flags & TRUNC_SATURATING; + + if (isSaturating) { + if (isUnsigned) { + // Negative overflow and NaN both are converted to 0, and the only + // other case is positive overflow which is converted to + // UINT32_MAX. + Label nonNegative; + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &nonNegative); + move32(Imm32(0), output); + jump(rejoin); + + bind(&nonNegative); + move32(Imm32(UINT32_MAX), output); + } else { + // Negative overflow is already saturated to INT32_MIN, so we only + // have to handle NaN and positive overflow here. + Label notNaN; + branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); + move32(Imm32(0), output); + jump(rejoin); + + bind(¬NaN); + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleLessThan, input, fpscratch, rejoin); + sub32(Imm32(1), output); + } + jump(rejoin); + return; + } + + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // For unsigned, fall through to intOverflow failure case. + if (isUnsigned) { + return; + } + + // Handle special values. + + // We've used vcvttss2si. Check that the input wasn't + // float(INT32_MIN), which is the only legimitate input that + // would truncate to INT32_MIN. + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(float(INT32_MIN), fpscratch); + branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); + jump(rejoin); +} + +void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input, + Register64 output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + bool isUnsigned = flags & TRUNC_UNSIGNED; + bool isSaturating = flags & TRUNC_SATURATING; + + if (isSaturating) { + if (isUnsigned) { + // Negative overflow and NaN both are converted to 0, and the only + // other case is positive overflow which is converted to + // UINT64_MAX. + Label positive; + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, &positive); + move64(Imm64(0), output); + jump(rejoin); + + bind(&positive); + move64(Imm64(UINT64_MAX), output); + } else { + // Negative overflow is already saturated to INT64_MIN, so we only + // have to handle NaN and positive overflow here. + Label notNaN; + branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); + move64(Imm64(0), output); + jump(rejoin); + + bind(¬NaN); + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleLessThan, input, fpscratch, rejoin); + sub64(Imm64(1), output); + } + jump(rejoin); + return; + } + + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // Handle special values. + if (isUnsigned) { + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, + &traps.intOverflow); + loadConstantDouble(-1.0, fpscratch); + branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch, + &traps.intOverflow); + jump(rejoin); + return; + } + + // We've used vcvtsd2sq. The only legit value whose i64 + // truncation is INT64_MIN is double(INT64_MIN): exponent is so + // high that the highest resolution around is much more than 1. + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(double(int64_t(INT64_MIN)), fpscratch); + branchDouble(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); + jump(rejoin); +} + +void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input, + Register64 output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + bool isUnsigned = flags & TRUNC_UNSIGNED; + bool isSaturating = flags & TRUNC_SATURATING; + + if (isSaturating) { + if (isUnsigned) { + // Negative overflow and NaN both are converted to 0, and the only + // other case is positive overflow which is converted to + // UINT64_MAX. + Label positive; + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, &positive); + move64(Imm64(0), output); + jump(rejoin); + + bind(&positive); + move64(Imm64(UINT64_MAX), output); + } else { + // Negative overflow is already saturated to INT64_MIN, so we only + // have to handle NaN and positive overflow here. + Label notNaN; + branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); + move64(Imm64(0), output); + jump(rejoin); + + bind(¬NaN); + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleLessThan, input, fpscratch, rejoin); + sub64(Imm64(1), output); + } + jump(rejoin); + return; + } + + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // Handle special values. + if (isUnsigned) { + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, + &traps.intOverflow); + loadConstantFloat32(-1.0f, fpscratch); + branchFloat(Assembler::DoubleLessThanOrEqual, input, fpscratch, + &traps.intOverflow); + jump(rejoin); + return; + } + + // We've used vcvtss2sq. See comment in outOfLineWasmTruncateDoubleToInt64. + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(float(int64_t(INT64_MIN)), fpscratch); + branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); + jump(rejoin); +} + +void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch, + ExitFrameType type) { + enterFakeExitFrame(cxreg, scratch, type); +} + +// ======================================================================== +// Primitive atomic operations. + +static void ExtendTo32(MacroAssembler& masm, Scalar::Type type, Register r) { + switch (Scalar::byteSize(type)) { + case 1: + if (Scalar::isSignedIntType(type)) { + masm.movsbl(r, r); + } else { + masm.movzbl(r, r); + } + break; + case 2: + if (Scalar::isSignedIntType(type)) { + masm.movswl(r, r); + } else { + masm.movzwl(r, r); + } + break; + default: + break; + } +} + +static inline void CheckBytereg(Register r) { +#ifdef DEBUG + AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); + MOZ_ASSERT(byteRegs.has(r)); +#endif +} + +static inline void CheckBytereg(Imm32 r) { + // Nothing +} + +template <typename T> +static void CompareExchange(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, const T& mem, Register oldval, + Register newval, Register output) { + MOZ_ASSERT(output == eax); + + if (oldval != output) { + masm.movl(oldval, output); + } + + if (access) { + masm.append(*access, wasm::TrapMachineInsn::Atomic, + FaultingCodeOffset(masm.currentOffset())); + } + + // NOTE: the generated code must match the assembly code in gen_cmpxchg in + // GenerateAtomicOperations.py + switch (Scalar::byteSize(type)) { + case 1: + CheckBytereg(newval); + masm.lock_cmpxchgb(newval, Operand(mem)); + break; + case 2: + masm.lock_cmpxchgw(newval, Operand(mem)); + break; + case 4: + masm.lock_cmpxchgl(newval, Operand(mem)); + break; + default: + MOZ_CRASH("Invalid"); + } + + ExtendTo32(masm, type, output); +} + +void MacroAssembler::compareExchange(Scalar::Type type, const Synchronization&, + const Address& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, nullptr, type, mem, oldval, newval, output); +} + +void MacroAssembler::compareExchange(Scalar::Type type, const Synchronization&, + const BaseIndex& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, nullptr, type, mem, oldval, newval, output); +} + +void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, + const Address& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, &access, access.type(), mem, oldval, newval, output); +} + +void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, &access, access.type(), mem, oldval, newval, output); +} + +template <typename T> +static void AtomicExchange(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, const T& mem, Register value, + Register output) +// NOTE: the generated code must match the assembly code in gen_exchange in +// GenerateAtomicOperations.py +{ + if (value != output) { + masm.movl(value, output); + } + + if (access) { + masm.append(*access, wasm::TrapMachineInsn::Atomic, + FaultingCodeOffset(masm.currentOffset())); + } + + switch (Scalar::byteSize(type)) { + case 1: + CheckBytereg(output); + masm.xchgb(output, Operand(mem)); + break; + case 2: + masm.xchgw(output, Operand(mem)); + break; + case 4: + masm.xchgl(output, Operand(mem)); + break; + default: + MOZ_CRASH("Invalid"); + } + ExtendTo32(masm, type, output); +} + +void MacroAssembler::atomicExchange(Scalar::Type type, const Synchronization&, + const Address& mem, Register value, + Register output) { + AtomicExchange(*this, nullptr, type, mem, value, output); +} + +void MacroAssembler::atomicExchange(Scalar::Type type, const Synchronization&, + const BaseIndex& mem, Register value, + Register output) { + AtomicExchange(*this, nullptr, type, mem, value, output); +} + +void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, + const Address& mem, Register value, + Register output) { + AtomicExchange(*this, &access, access.type(), mem, value, output); +} + +void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register value, + Register output) { + AtomicExchange(*this, &access, access.type(), mem, value, output); +} + +static void SetupValue(MacroAssembler& masm, AtomicOp op, Imm32 src, + Register output) { + if (op == AtomicFetchSubOp) { + masm.movl(Imm32(-src.value), output); + } else { + masm.movl(src, output); + } +} + +static void SetupValue(MacroAssembler& masm, AtomicOp op, Register src, + Register output) { + if (src != output) { + masm.movl(src, output); + } + if (op == AtomicFetchSubOp) { + masm.negl(output); + } +} + +template <typename T, typename V> +static void AtomicFetchOp(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type arrayType, AtomicOp op, V value, + const T& mem, Register temp, Register output) { + // Note value can be an Imm or a Register. + + // NOTE: the generated code must match the assembly code in gen_fetchop in + // GenerateAtomicOperations.py +#define ATOMIC_BITOP_BODY(LOAD, LOAD_DESCR, OP, LOCK_CMPXCHG) \ + do { \ + MOZ_ASSERT(output != temp); \ + MOZ_ASSERT(output == eax); \ + if (access) \ + masm.append(*access, LOAD_DESCR, \ + FaultingCodeOffset(masm.currentOffset())); \ + masm.LOAD(Operand(mem), eax); \ + Label again; \ + masm.bind(&again); \ + masm.movl(eax, temp); \ + masm.OP(value, temp); \ + masm.LOCK_CMPXCHG(temp, Operand(mem)); \ + masm.j(MacroAssembler::NonZero, &again); \ + } while (0) + + MOZ_ASSERT_IF(op == AtomicFetchAddOp || op == AtomicFetchSubOp, + temp == InvalidReg); + + switch (Scalar::byteSize(arrayType)) { + case 1: + CheckBytereg(output); + switch (op) { + case AtomicFetchAddOp: + case AtomicFetchSubOp: + CheckBytereg(value); // But not for the bitwise ops + SetupValue(masm, op, value, output); + if (access) { + masm.append(*access, wasm::TrapMachineInsn::Atomic, + FaultingCodeOffset(masm.currentOffset())); + } + masm.lock_xaddb(output, Operand(mem)); + break; + case AtomicFetchAndOp: + CheckBytereg(temp); + ATOMIC_BITOP_BODY(movb, wasm::TrapMachineInsn::Load8, andl, + lock_cmpxchgb); + break; + case AtomicFetchOrOp: + CheckBytereg(temp); + ATOMIC_BITOP_BODY(movb, wasm::TrapMachineInsn::Load8, orl, + lock_cmpxchgb); + break; + case AtomicFetchXorOp: + CheckBytereg(temp); + ATOMIC_BITOP_BODY(movb, wasm::TrapMachineInsn::Load8, xorl, + lock_cmpxchgb); + break; + default: + MOZ_CRASH(); + } + break; + case 2: + switch (op) { + case AtomicFetchAddOp: + case AtomicFetchSubOp: + SetupValue(masm, op, value, output); + if (access) { + masm.append(*access, wasm::TrapMachineInsn::Atomic, + FaultingCodeOffset(masm.currentOffset())); + } + masm.lock_xaddw(output, Operand(mem)); + break; + case AtomicFetchAndOp: + ATOMIC_BITOP_BODY(movw, wasm::TrapMachineInsn::Load16, andl, + lock_cmpxchgw); + break; + case AtomicFetchOrOp: + ATOMIC_BITOP_BODY(movw, wasm::TrapMachineInsn::Load16, orl, + lock_cmpxchgw); + break; + case AtomicFetchXorOp: + ATOMIC_BITOP_BODY(movw, wasm::TrapMachineInsn::Load16, xorl, + lock_cmpxchgw); + break; + default: + MOZ_CRASH(); + } + break; + case 4: + switch (op) { + case AtomicFetchAddOp: + case AtomicFetchSubOp: + SetupValue(masm, op, value, output); + if (access) { + masm.append(*access, wasm::TrapMachineInsn::Atomic, + FaultingCodeOffset(masm.currentOffset())); + } + masm.lock_xaddl(output, Operand(mem)); + break; + case AtomicFetchAndOp: + ATOMIC_BITOP_BODY(movl, wasm::TrapMachineInsn::Load32, andl, + lock_cmpxchgl); + break; + case AtomicFetchOrOp: + ATOMIC_BITOP_BODY(movl, wasm::TrapMachineInsn::Load32, orl, + lock_cmpxchgl); + break; + case AtomicFetchXorOp: + ATOMIC_BITOP_BODY(movl, wasm::TrapMachineInsn::Load32, xorl, + lock_cmpxchgl); + break; + default: + MOZ_CRASH(); + } + break; + default: + MOZ_CRASH("Invalid size"); + } + ExtendTo32(masm, arrayType, output); + +#undef ATOMIC_BITOP_BODY +} + +void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp, Register output) { + AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Register value, const Address& mem, + Register temp, Register output) { + AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Imm32 value, const BaseIndex& mem, + Register temp, Register output) { + AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Imm32 value, const Address& mem, + Register temp, Register output) { + AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const Address& mem, Register temp, + Register output) { + AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Imm32 value, + const Address& mem, Register temp, + Register output) { + AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const BaseIndex& mem, Register temp, + Register output) { + AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Imm32 value, + const BaseIndex& mem, Register temp, + Register output) { + AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); +} + +template <typename T, typename V> +static void AtomicEffectOp(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type arrayType, AtomicOp op, V value, + const T& mem) { + if (access) { + masm.append(*access, wasm::TrapMachineInsn::Atomic, + FaultingCodeOffset(masm.currentOffset())); + } + + switch (Scalar::byteSize(arrayType)) { + case 1: + switch (op) { + case AtomicFetchAddOp: + masm.lock_addb(value, Operand(mem)); + break; + case AtomicFetchSubOp: + masm.lock_subb(value, Operand(mem)); + break; + case AtomicFetchAndOp: + masm.lock_andb(value, Operand(mem)); + break; + case AtomicFetchOrOp: + masm.lock_orb(value, Operand(mem)); + break; + case AtomicFetchXorOp: + masm.lock_xorb(value, Operand(mem)); + break; + default: + MOZ_CRASH(); + } + break; + case 2: + switch (op) { + case AtomicFetchAddOp: + masm.lock_addw(value, Operand(mem)); + break; + case AtomicFetchSubOp: + masm.lock_subw(value, Operand(mem)); + break; + case AtomicFetchAndOp: + masm.lock_andw(value, Operand(mem)); + break; + case AtomicFetchOrOp: + masm.lock_orw(value, Operand(mem)); + break; + case AtomicFetchXorOp: + masm.lock_xorw(value, Operand(mem)); + break; + default: + MOZ_CRASH(); + } + break; + case 4: + switch (op) { + case AtomicFetchAddOp: + masm.lock_addl(value, Operand(mem)); + break; + case AtomicFetchSubOp: + masm.lock_subl(value, Operand(mem)); + break; + case AtomicFetchAndOp: + masm.lock_andl(value, Operand(mem)); + break; + case AtomicFetchOrOp: + masm.lock_orl(value, Operand(mem)); + break; + case AtomicFetchXorOp: + masm.lock_xorl(value, Operand(mem)); + break; + default: + MOZ_CRASH(); + } + break; + default: + MOZ_CRASH(); + } +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const Address& mem, Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, &access, access.type(), op, value, mem); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Imm32 value, + const Address& mem, Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, &access, access.type(), op, value, mem); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const BaseIndex& mem, Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, &access, access.type(), op, value, mem); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Imm32 value, + const BaseIndex& mem, Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, &access, access.type(), op, value, mem); +} + +// ======================================================================== +// JS atomic operations. + +template <typename T> +static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, const T& mem, + Register oldval, Register newval, Register temp, + AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.compareExchange(arrayType, sync, mem, oldval, newval, temp); + masm.convertUInt32ToDouble(temp, output.fpu()); + } else { + masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr()); + } +} + +void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const Address& mem, Register oldval, + Register newval, Register temp, + AnyRegister output) { + CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); +} + +void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const BaseIndex& mem, Register oldval, + Register newval, Register temp, + AnyRegister output) { + CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); +} + +template <typename T> +static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, const T& mem, + Register value, Register temp, + AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicExchange(arrayType, sync, mem, value, temp); + masm.convertUInt32ToDouble(temp, output.fpu()); + } else { + masm.atomicExchange(arrayType, sync, mem, value, output.gpr()); + } +} + +void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const Address& mem, Register value, + Register temp, AnyRegister output) { + AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); +} + +void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const BaseIndex& mem, Register value, + Register temp, AnyRegister output) { + AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); +} + +template <typename T> +static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const T& mem, Register temp1, + Register temp2, AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); + masm.convertUInt32ToDouble(temp1, output.fpu()); + } else { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); + } +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const Address& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Register value, const Address& mem, + Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Imm32 value, const Address& mem, + Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Imm32 value, const BaseIndex& mem, + Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); +} + +template <typename T> +static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Imm32 value, const T& mem, Register temp1, + Register temp2, AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); + masm.convertUInt32ToDouble(temp1, output.fpu()); + } else { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); + } +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Imm32 value, const Address& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Imm32 value, const BaseIndex& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +// ======================================================================== +// Spectre Mitigations. + +void MacroAssembler::speculationBarrier() { + // Spectre mitigation recommended by Intel and AMD suggest to use lfence as + // a way to force all speculative execution of instructions to end. + MOZ_ASSERT(HasSSE2()); + masm.lfence(); +} + +void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + if (HasSSE41()) { + // Fail on negative-zero. + branchNegativeZeroFloat32(src, dest, fail); + + // Round toward -Infinity. + { + ScratchFloat32Scope scratch(*this); + vroundss(X86Encoding::RoundDown, src, scratch); + truncateFloat32ToInt32(scratch, dest, fail); + } + } else { + Label negative, end; + + // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. + { + ScratchFloat32Scope scratch(*this); + zeroFloat32(scratch); + branchFloat(Assembler::DoubleLessThan, src, scratch, &negative); + } + + // Fail on negative-zero. + branchNegativeZeroFloat32(src, dest, fail); + + // Input is non-negative, so truncation correctly rounds. + truncateFloat32ToInt32(src, dest, fail); + jump(&end); + + // Input is negative, but isn't -0. + // Negative values go on a comparatively expensive path, since no + // native rounding mode matches JS semantics. Still better than callVM. + bind(&negative); + { + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + truncateFloat32ToInt32(src, dest, fail); + + // Test whether the input double was integer-valued. + { + ScratchFloat32Scope scratch(*this); + convertInt32ToFloat32(dest, scratch); + branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end); + } + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + subl(Imm32(1), dest); + // Cannot overflow: output was already checked against INT_MIN. + } + + bind(&end); + } +} + +void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + if (HasSSE41()) { + // Fail on negative-zero. + branchNegativeZero(src, dest, fail); + + // Round toward -Infinity. + { + ScratchDoubleScope scratch(*this); + vroundsd(X86Encoding::RoundDown, src, scratch); + truncateDoubleToInt32(scratch, dest, fail); + } + } else { + Label negative, end; + + // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. + { + ScratchDoubleScope scratch(*this); + zeroDouble(scratch); + branchDouble(Assembler::DoubleLessThan, src, scratch, &negative); + } + + // Fail on negative-zero. + branchNegativeZero(src, dest, fail); + + // Input is non-negative, so truncation correctly rounds. + truncateDoubleToInt32(src, dest, fail); + jump(&end); + + // Input is negative, but isn't -0. + // Negative values go on a comparatively expensive path, since no + // native rounding mode matches JS semantics. Still better than callVM. + bind(&negative); + { + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + truncateDoubleToInt32(src, dest, fail); + + // Test whether the input double was integer-valued. + { + ScratchDoubleScope scratch(*this); + convertInt32ToDouble(dest, scratch); + branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end); + } + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + subl(Imm32(1), dest); + // Cannot overflow: output was already checked against INT_MIN. + } + + bind(&end); + } +} + +void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + ScratchFloat32Scope scratch(*this); + + Label lessThanOrEqualMinusOne; + + // If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. + // Fail if x > -1 and the sign bit is set. + loadConstantFloat32(-1.f, scratch); + branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, + &lessThanOrEqualMinusOne); + vmovmskps(src, dest); + branchTest32(Assembler::NonZero, dest, Imm32(1), fail); + + if (HasSSE41()) { + // x <= -1 or x > -0 + bind(&lessThanOrEqualMinusOne); + // Round toward +Infinity. + vroundss(X86Encoding::RoundUp, src, scratch); + truncateFloat32ToInt32(scratch, dest, fail); + return; + } + + // No SSE4.1 + Label end; + + // x >= 0 and x is not -0.0. We can truncate integer values, and truncate and + // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, + // as the truncate operation will return INT_MIN and we'll fail. + truncateFloat32ToInt32(src, dest, fail); + convertInt32ToFloat32(dest, scratch); + branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end); + + // Input is not integer-valued, add 1 to obtain the ceiling value. + // If input > INT_MAX, output == INT_MAX so adding 1 will overflow. + branchAdd32(Assembler::Overflow, Imm32(1), dest, fail); + jump(&end); + + // x <= -1, truncation is the way to go. + bind(&lessThanOrEqualMinusOne); + truncateFloat32ToInt32(src, dest, fail); + + bind(&end); +} + +void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + ScratchDoubleScope scratch(*this); + + Label lessThanOrEqualMinusOne; + + // If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. + // Fail if x > -1 and the sign bit is set. + loadConstantDouble(-1.0, scratch); + branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, + &lessThanOrEqualMinusOne); + vmovmskpd(src, dest); + branchTest32(Assembler::NonZero, dest, Imm32(1), fail); + + if (HasSSE41()) { + // x <= -1 or x > -0 + bind(&lessThanOrEqualMinusOne); + // Round toward +Infinity. + vroundsd(X86Encoding::RoundUp, src, scratch); + truncateDoubleToInt32(scratch, dest, fail); + return; + } + + // No SSE4.1 + Label end; + + // x >= 0 and x is not -0.0. We can truncate integer values, and truncate and + // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, + // as the truncate operation will return INT_MIN and we'll fail. + truncateDoubleToInt32(src, dest, fail); + convertInt32ToDouble(dest, scratch); + branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end); + + // Input is not integer-valued, add 1 to obtain the ceiling value. + // If input > INT_MAX, output == INT_MAX so adding 1 will overflow. + branchAdd32(Assembler::Overflow, Imm32(1), dest, fail); + jump(&end); + + // x <= -1, truncation is the way to go. + bind(&lessThanOrEqualMinusOne); + truncateDoubleToInt32(src, dest, fail); + + bind(&end); +} + +void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + Label lessThanOrEqualMinusOne; + + // Bail on ]-1; -0] range + { + ScratchDoubleScope scratch(*this); + loadConstantDouble(-1, scratch); + branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, + &lessThanOrEqualMinusOne); + } + + // Test for remaining values with the sign bit set, i.e. ]-1; -0] + vmovmskpd(src, dest); + branchTest32(Assembler::NonZero, dest, Imm32(1), fail); + + // x <= -1 or x >= +0, truncation is the way to go. + bind(&lessThanOrEqualMinusOne); + truncateDoubleToInt32(src, dest, fail); +} + +void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + Label lessThanOrEqualMinusOne; + + // Bail on ]-1; -0] range + { + ScratchFloat32Scope scratch(*this); + loadConstantFloat32(-1.f, scratch); + branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, + &lessThanOrEqualMinusOne); + } + + // Test for remaining values with the sign bit set, i.e. ]-1; -0] + vmovmskps(src, dest); + branchTest32(Assembler::NonZero, dest, Imm32(1), fail); + + // x <= -1 or x >= +0, truncation is the way to go. + bind(&lessThanOrEqualMinusOne); + truncateFloat32ToInt32(src, dest, fail); +} + +void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest, + FloatRegister temp, Label* fail) { + ScratchFloat32Scope scratch(*this); + + Label negativeOrZero, negative, end; + + // Branch to a slow path for non-positive inputs. Doesn't catch NaN. + zeroFloat32(scratch); + loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); + branchFloat(Assembler::DoubleLessThanOrEqual, src, scratch, &negativeOrZero); + { + // Input is strictly positive or NaN. Add the biggest float less than 0.5 + // and truncate, rounding down (because if the input is the biggest float + // less than 0.5, adding 0.5 would undesirably round up to 1). Note that we + // have to add the input to the temp register because we're not allowed to + // modify the input register. + addFloat32(src, temp); + truncateFloat32ToInt32(temp, dest, fail); + jump(&end); + } + + // Input is negative, +0 or -0. + bind(&negativeOrZero); + { + // Branch on negative input. + j(Assembler::NotEqual, &negative); + + // Fail on negative-zero. + branchNegativeZeroFloat32(src, dest, fail); + + // Input is +0. + xor32(dest, dest); + jump(&end); + } + + // Input is negative. + bind(&negative); + { + // Inputs in [-0.5, 0) are rounded to -0. Fail. + loadConstantFloat32(-0.5f, scratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, src, scratch, fail); + + // Other negative inputs need the biggest float less than 0.5 added. + // + // The result is stored in the temp register (currently contains the biggest + // float less than 0.5). + addFloat32(src, temp); + + if (HasSSE41()) { + // Round toward -Infinity. + vroundss(X86Encoding::RoundDown, temp, scratch); + + // Truncate. + truncateFloat32ToInt32(scratch, dest, fail); + } else { + // Round toward -Infinity without the benefit of ROUNDSS. + + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + truncateFloat32ToInt32(temp, dest, fail); + + // Test whether the truncated float was integer-valued. + convertInt32ToFloat32(dest, scratch); + branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + subl(Imm32(1), dest); + // Cannot overflow: output was already checked against INT_MIN. + } + } + + bind(&end); +} + +void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest, + FloatRegister temp, Label* fail) { + ScratchDoubleScope scratch(*this); + + Label negativeOrZero, negative, end; + + // Branch to a slow path for non-positive inputs. Doesn't catch NaN. + zeroDouble(scratch); + loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); + branchDouble(Assembler::DoubleLessThanOrEqual, src, scratch, &negativeOrZero); + { + // Input is strictly positive or NaN. Add the biggest double less than 0.5 + // and truncate, rounding down (because if the input is the biggest double + // less than 0.5, adding 0.5 would undesirably round up to 1). Note that we + // have to add the input to the temp register because we're not allowed to + // modify the input register. + addDouble(src, temp); + truncateDoubleToInt32(temp, dest, fail); + jump(&end); + } + + // Input is negative, +0 or -0. + bind(&negativeOrZero); + { + // Branch on negative input. + j(Assembler::NotEqual, &negative); + + // Fail on negative-zero. + branchNegativeZero(src, dest, fail, /* maybeNonZero = */ false); + + // Input is +0 + xor32(dest, dest); + jump(&end); + } + + // Input is negative. + bind(&negative); + { + // Inputs in [-0.5, 0) are rounded to -0. Fail. + loadConstantDouble(-0.5, scratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, src, scratch, fail); + + // Other negative inputs need the biggest double less than 0.5 added. + // + // The result is stored in the temp register (currently contains the biggest + // double less than 0.5). + addDouble(src, temp); + + if (HasSSE41()) { + // Round toward -Infinity. + vroundsd(X86Encoding::RoundDown, temp, scratch); + + // Truncate. + truncateDoubleToInt32(scratch, dest, fail); + } else { + // Round toward -Infinity without the benefit of ROUNDSD. + + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + truncateDoubleToInt32(temp, dest, fail); + + // Test whether the truncated double was integer-valued. + convertInt32ToDouble(dest, scratch); + branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + subl(Imm32(1), dest); + // Cannot overflow: output was already checked against INT_MIN. + } + } + + bind(&end); +} + +void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(HasRoundInstruction(mode)); + vroundsd(Assembler::ToX86RoundingMode(mode), src, dest); +} + +void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(HasRoundInstruction(mode)); + vroundss(Assembler::ToX86RoundingMode(mode), src, dest); +} + +void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, + FloatRegister output) { + ScratchDoubleScope scratch(*this); + + // TODO Support AVX2 + if (rhs == output) { + MOZ_ASSERT(lhs != rhs); + double keepSignMask = mozilla::BitwiseCast<double>(INT64_MIN); + loadConstantDouble(keepSignMask, scratch); + vandpd(scratch, rhs, output); + + double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX); + loadConstantDouble(clearSignMask, scratch); + vandpd(lhs, scratch, scratch); + } else { + double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX); + loadConstantDouble(clearSignMask, scratch); + vandpd(scratch, lhs, output); + + double keepSignMask = mozilla::BitwiseCast<double>(INT64_MIN); + loadConstantDouble(keepSignMask, scratch); + vandpd(rhs, scratch, scratch); + } + + vorpd(scratch, output, output); +} + +void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs, + FloatRegister output) { + ScratchFloat32Scope scratch(*this); + + // TODO Support AVX2 + if (rhs == output) { + MOZ_ASSERT(lhs != rhs); + float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN); + loadConstantFloat32(keepSignMask, scratch); + vandps(scratch, output, output); + + float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX); + loadConstantFloat32(clearSignMask, scratch); + vandps(lhs, scratch, scratch); + } else { + float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX); + loadConstantFloat32(clearSignMask, scratch); + vandps(scratch, lhs, output); + + float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN); + loadConstantFloat32(keepSignMask, scratch); + vandps(rhs, scratch, scratch); + } + + vorps(scratch, output, output); +} + +void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) { + if (IsShiftInScaleRange(shift)) { + computeEffectiveAddress( + BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer); + return; + } + lshift32(Imm32(shift), indexTemp32); + addPtr(indexTemp32, pointer); +} + +//}}} check_macroassembler_style |