/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "jit/x86-shared/MacroAssembler-x86-shared.h" #include "mozilla/Casting.h" #include "jsmath.h" #include "jit/JitFrames.h" #include "jit/MacroAssembler.h" #include "js/ScalarType.h" // js::Scalar::Type #include "jit/MacroAssembler-inl.h" using namespace js; using namespace js::jit; // Note: this function clobbers the input register. void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { ScratchDoubleScope scratch(*this); MOZ_ASSERT(input != scratch); Label positive, done; // <= 0 or NaN --> 0 zeroDouble(scratch); branchDouble(DoubleGreaterThan, input, scratch, &positive); { move32(Imm32(0), output); jump(&done); } bind(&positive); // Add 0.5 and truncate. loadConstantDouble(0.5, scratch); addDouble(scratch, input); Label outOfRange; // Truncate to int32 and ensure the result <= 255. This relies on the // processor setting output to a value > 255 for doubles outside the int32 // range (for instance 0x80000000). vcvttsd2si(input, output); branch32(Assembler::Above, output, Imm32(255), &outOfRange); { // Check if we had a tie. convertInt32ToDouble(output, scratch); branchDouble(DoubleNotEqual, input, scratch, &done); // It was a tie. Mask out the ones bit to get an even value. // See also js_TypedArray_uint8_clamp_double. and32(Imm32(~1), output); jump(&done); } // > 255 --> 255 bind(&outOfRange); { move32(Imm32(255), output); } bind(&done); } bool MacroAssemblerX86Shared::buildOOLFakeExitFrame(void* fakeReturnAddr) { asMasm().PushFrameDescriptor(FrameType::IonJS); asMasm().Push(ImmPtr(fakeReturnAddr)); asMasm().Push(FramePointer); return true; } void MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, Register scratch, Label* label, bool maybeNonZero) { // Determines whether the low double contained in the XMM register reg // is equal to -0.0. #if defined(JS_CODEGEN_X86) Label nonZero; // if not already compared to zero if (maybeNonZero) { ScratchDoubleScope scratchDouble(asMasm()); // Compare to zero. Lets through {0, -0}. zeroDouble(scratchDouble); // If reg is non-zero, jump to nonZero. asMasm().branchDouble(DoubleNotEqual, reg, scratchDouble, &nonZero); } // Input register is either zero or negative zero. Retrieve sign of input. vmovmskpd(reg, scratch); // If reg is 1 or 3, input is negative zero. // If reg is 0 or 2, input is a normal zero. asMasm().branchTest32(NonZero, scratch, Imm32(1), label); bind(&nonZero); #elif defined(JS_CODEGEN_X64) vmovq(reg, scratch); cmpq(Imm32(1), scratch); j(Overflow, label); #endif } void MacroAssemblerX86Shared::branchNegativeZeroFloat32(FloatRegister reg, Register scratch, Label* label) { vmovd(reg, scratch); cmp32(scratch, Imm32(1)); j(Overflow, label); } MacroAssembler& MacroAssemblerX86Shared::asMasm() { return *static_cast(this); } const MacroAssembler& MacroAssemblerX86Shared::asMasm() const { return *static_cast(this); } template T* MacroAssemblerX86Shared::getConstant(const typename T::Pod& value, Map& map, Vector& vec) { using AddPtr = typename Map::AddPtr; size_t index; if (AddPtr p = map.lookupForAdd(value)) { index = p->value(); } else { index = vec.length(); enoughMemory_ &= vec.append(T(value)); if (!enoughMemory_) { return nullptr; } enoughMemory_ &= map.add(p, value, index); if (!enoughMemory_) { return nullptr; } } return &vec[index]; } MacroAssemblerX86Shared::Float* MacroAssemblerX86Shared::getFloat(float f) { return getConstant(f, floatMap_, floats_); } MacroAssemblerX86Shared::Double* MacroAssemblerX86Shared::getDouble(double d) { return getConstant(d, doubleMap_, doubles_); } MacroAssemblerX86Shared::SimdData* MacroAssemblerX86Shared::getSimdData( const SimdConstant& v) { return getConstant(v, simdMap_, simds_); } void MacroAssemblerX86Shared::binarySimd128( const SimdConstant& rhs, FloatRegister lhsDest, void (MacroAssembler::*regOp)(const Operand&, FloatRegister, FloatRegister), void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister)) { ScratchSimd128Scope scratch(asMasm()); if (maybeInlineSimd128Int(rhs, scratch)) { (asMasm().*regOp)(Operand(scratch), lhsDest, lhsDest); } else { (asMasm().*constOp)(rhs, lhsDest); } } void MacroAssemblerX86Shared::binarySimd128( FloatRegister lhs, const SimdConstant& rhs, FloatRegister dest, void (MacroAssembler::*regOp)(const Operand&, FloatRegister, FloatRegister), void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister, FloatRegister)) { ScratchSimd128Scope scratch(asMasm()); if (maybeInlineSimd128Int(rhs, scratch)) { (asMasm().*regOp)(Operand(scratch), lhs, dest); } else { (asMasm().*constOp)(rhs, lhs, dest); } } void MacroAssemblerX86Shared::binarySimd128( const SimdConstant& rhs, FloatRegister lhs, void (MacroAssembler::*regOp)(const Operand&, FloatRegister), void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister)) { ScratchSimd128Scope scratch(asMasm()); if (maybeInlineSimd128Int(rhs, scratch)) { (asMasm().*regOp)(Operand(scratch), lhs); } else { (asMasm().*constOp)(rhs, lhs); } } void MacroAssemblerX86Shared::bitwiseTestSimd128(const SimdConstant& rhs, FloatRegister lhs) { ScratchSimd128Scope scratch(asMasm()); if (maybeInlineSimd128Int(rhs, scratch)) { vptest(scratch, lhs); } else { asMasm().vptestSimd128(rhs, lhs); } } void MacroAssemblerX86Shared::minMaxDouble(FloatRegister first, FloatRegister second, bool canBeNaN, bool isMax) { Label done, nan, minMaxInst; // Do a vucomisd to catch equality and NaNs, which both require special // handling. If the operands are ordered and inequal, we branch straight to // the min/max instruction. If we wanted, we could also branch for less-than // or greater-than here instead of using min/max, however these conditions // will sometimes be hard on the branch predictor. vucomisd(second, first); j(Assembler::NotEqual, &minMaxInst); if (canBeNaN) { j(Assembler::Parity, &nan); } // Ordered and equal. The operands are bit-identical unless they are zero // and negative zero. These instructions merge the sign bits in that // case, and are no-ops otherwise. if (isMax) { vandpd(second, first, first); } else { vorpd(second, first, first); } jump(&done); // x86's min/max are not symmetric; if either operand is a NaN, they return // the read-only operand. We need to return a NaN if either operand is a // NaN, so we explicitly check for a NaN in the read-write operand. if (canBeNaN) { bind(&nan); vucomisd(first, first); j(Assembler::Parity, &done); } // When the values are inequal, or second is NaN, x86's min and max will // return the value we need. bind(&minMaxInst); if (isMax) { vmaxsd(second, first, first); } else { vminsd(second, first, first); } bind(&done); } void MacroAssemblerX86Shared::minMaxFloat32(FloatRegister first, FloatRegister second, bool canBeNaN, bool isMax) { Label done, nan, minMaxInst; // Do a vucomiss to catch equality and NaNs, which both require special // handling. If the operands are ordered and inequal, we branch straight to // the min/max instruction. If we wanted, we could also branch for less-than // or greater-than here instead of using min/max, however these conditions // will sometimes be hard on the branch predictor. vucomiss(second, first); j(Assembler::NotEqual, &minMaxInst); if (canBeNaN) { j(Assembler::Parity, &nan); } // Ordered and equal. The operands are bit-identical unless they are zero // and negative zero. These instructions merge the sign bits in that // case, and are no-ops otherwise. if (isMax) { vandps(second, first, first); } else { vorps(second, first, first); } jump(&done); // x86's min/max are not symmetric; if either operand is a NaN, they return // the read-only operand. We need to return a NaN if either operand is a // NaN, so we explicitly check for a NaN in the read-write operand. if (canBeNaN) { bind(&nan); vucomiss(first, first); j(Assembler::Parity, &done); } // When the values are inequal, or second is NaN, x86's min and max will // return the value we need. bind(&minMaxInst); if (isMax) { vmaxss(second, first, first); } else { vminss(second, first, first); } bind(&done); } #ifdef ENABLE_WASM_SIMD bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { switch (op) { case wasm::SimdOp::I8x16Shl: case wasm::SimdOp::I8x16ShrU: case wasm::SimdOp::I8x16ShrS: *mask = 7; break; case wasm::SimdOp::I16x8Shl: case wasm::SimdOp::I16x8ShrU: case wasm::SimdOp::I16x8ShrS: *mask = 15; break; case wasm::SimdOp::I32x4Shl: case wasm::SimdOp::I32x4ShrU: case wasm::SimdOp::I32x4ShrS: *mask = 31; break; case wasm::SimdOp::I64x2Shl: case wasm::SimdOp::I64x2ShrU: case wasm::SimdOp::I64x2ShrS: *mask = 63; break; default: MOZ_CRASH("Unexpected shift operation"); } return true; } #endif //{{{ check_macroassembler_style // =============================================================== // MacroAssembler high-level usage. void MacroAssembler::flush() {} void MacroAssembler::comment(const char* msg) { masm.comment(msg); } // This operation really consists of five phases, in order to enforce the // restriction that on x86_shared, srcDest must be eax and edx will be // clobbered. // // Input: { rhs, lhsOutput } // // [PUSH] Preserve registers // [MOVE] Generate moves to specific registers // // [DIV] Input: { regForRhs, EAX } // [DIV] extend EAX into EDX // [DIV] x86 Division operator // [DIV] Ouptut: { EAX, EDX } // // [MOVE] Move specific registers to outputs // [POP] Restore registers // // Output: { lhsOutput, remainderOutput } void MacroAssembler::flexibleDivMod32(Register rhs, Register lhsOutput, Register remOutput, bool isUnsigned, const LiveRegisterSet&) { // Currently this helper can't handle this situation. MOZ_ASSERT(lhsOutput != rhs); MOZ_ASSERT(lhsOutput != remOutput); // Choose a register that is not edx, or eax to hold the rhs; // ebx is chosen arbitrarily, and will be preserved if necessary. Register regForRhs = (rhs == eax || rhs == edx) ? ebx : rhs; // Add registers we will be clobbering as live, but // also remove the set we do not restore. LiveRegisterSet preserve; preserve.add(edx); preserve.add(eax); preserve.add(regForRhs); preserve.takeUnchecked(lhsOutput); preserve.takeUnchecked(remOutput); PushRegsInMask(preserve); // Shuffle input into place. moveRegPair(lhsOutput, rhs, eax, regForRhs); if (oom()) { return; } // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. if (isUnsigned) { mov(ImmWord(0), edx); udiv(regForRhs); } else { cdq(); idiv(regForRhs); } moveRegPair(eax, edx, lhsOutput, remOutput); if (oom()) { return; } PopRegsInMask(preserve); } void MacroAssembler::flexibleQuotient32( Register rhs, Register srcDest, bool isUnsigned, const LiveRegisterSet& volatileLiveRegs) { // Choose an arbitrary register that isn't eax, edx, rhs or srcDest; AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); regs.takeUnchecked(eax); regs.takeUnchecked(edx); regs.takeUnchecked(rhs); regs.takeUnchecked(srcDest); Register remOut = regs.takeAny(); push(remOut); flexibleDivMod32(rhs, srcDest, remOut, isUnsigned, volatileLiveRegs); pop(remOut); } void MacroAssembler::flexibleRemainder32( Register rhs, Register srcDest, bool isUnsigned, const LiveRegisterSet& volatileLiveRegs) { // Choose an arbitrary register that isn't eax, edx, rhs or srcDest AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); regs.takeUnchecked(eax); regs.takeUnchecked(edx); regs.takeUnchecked(rhs); regs.takeUnchecked(srcDest); Register remOut = regs.takeAny(); push(remOut); flexibleDivMod32(rhs, srcDest, remOut, isUnsigned, volatileLiveRegs); mov(remOut, srcDest); pop(remOut); } // =============================================================== // Stack manipulation functions. size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) { FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); return set.gprs().size() * sizeof(intptr_t) + fpuSet.getPushSizeInBytes(); } void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { mozilla::DebugOnly framePushedInitial = framePushed(); FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); unsigned numFpu = fpuSet.size(); int32_t diffF = fpuSet.getPushSizeInBytes(); int32_t diffG = set.gprs().size() * sizeof(intptr_t); // On x86, always use push to push the integer registers, as it's fast // on modern hardware and it's a small instruction. for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { diffG -= sizeof(intptr_t); Push(*iter); } MOZ_ASSERT(diffG == 0); (void)diffG; reserveStack(diffF); for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { FloatRegister reg = *iter; diffF -= reg.size(); numFpu -= 1; Address spillAddress(StackPointer, diffF); if (reg.isDouble()) { storeDouble(reg, spillAddress); } else if (reg.isSingle()) { storeFloat32(reg, spillAddress); } else if (reg.isSimd128()) { storeUnalignedSimd128(reg, spillAddress); } else { MOZ_CRASH("Unknown register type."); } } MOZ_ASSERT(numFpu == 0); (void)numFpu; // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushSizeInBytes. size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t); MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4); MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0); diffF -= alignExtra; MOZ_ASSERT(diffF == 0); // The macroassembler will keep the stack sizeof(uintptr_t)-aligned, so // we don't need to take into account `alignExtra` here. MOZ_ASSERT(framePushed() - framePushedInitial == PushRegsInMaskSizeInBytes(set)); } void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, Register) { mozilla::DebugOnly offsetInitial = dest.offset; FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); unsigned numFpu = fpuSet.size(); int32_t diffF = fpuSet.getPushSizeInBytes(); int32_t diffG = set.gprs().size() * sizeof(intptr_t); MOZ_ASSERT(dest.offset >= diffG + diffF); for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { diffG -= sizeof(intptr_t); dest.offset -= sizeof(intptr_t); storePtr(*iter, dest); } MOZ_ASSERT(diffG == 0); (void)diffG; for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { FloatRegister reg = *iter; diffF -= reg.size(); numFpu -= 1; dest.offset -= reg.size(); if (reg.isDouble()) { storeDouble(reg, dest); } else if (reg.isSingle()) { storeFloat32(reg, dest); } else if (reg.isSimd128()) { storeUnalignedSimd128(reg, dest); } else { MOZ_CRASH("Unknown register type."); } } MOZ_ASSERT(numFpu == 0); (void)numFpu; // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushSizeInBytes. size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t); MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4); MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0); diffF -= alignExtra; MOZ_ASSERT(diffF == 0); // What this means is: if `alignExtra` is nonzero, then the save area size // actually used is `alignExtra` bytes smaller than what // PushRegsInMaskSizeInBytes claims. Hence we need to compensate for that. MOZ_ASSERT(alignExtra + offsetInitial - dest.offset == PushRegsInMaskSizeInBytes(set)); } void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore) { mozilla::DebugOnly framePushedInitial = framePushed(); FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); unsigned numFpu = fpuSet.size(); int32_t diffG = set.gprs().size() * sizeof(intptr_t); int32_t diffF = fpuSet.getPushSizeInBytes(); const int32_t reservedG = diffG; const int32_t reservedF = diffF; for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { FloatRegister reg = *iter; diffF -= reg.size(); numFpu -= 1; if (ignore.has(reg)) { continue; } Address spillAddress(StackPointer, diffF); if (reg.isDouble()) { loadDouble(spillAddress, reg); } else if (reg.isSingle()) { loadFloat32(spillAddress, reg); } else if (reg.isSimd128()) { loadUnalignedSimd128(spillAddress, reg); } else { MOZ_CRASH("Unknown register type."); } } freeStack(reservedF); MOZ_ASSERT(numFpu == 0); (void)numFpu; // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushBytesInSize. diffF -= diffF % sizeof(uintptr_t); MOZ_ASSERT(diffF == 0); // On x86, use pop to pop the integer registers, if we're not going to // ignore any slots, as it's fast on modern hardware and it's a small // instruction. if (ignore.emptyGeneral()) { for (GeneralRegisterForwardIterator iter(set.gprs()); iter.more(); ++iter) { diffG -= sizeof(intptr_t); Pop(*iter); } } else { for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { diffG -= sizeof(intptr_t); if (!ignore.has(*iter)) { loadPtr(Address(StackPointer, diffG), *iter); } } freeStack(reservedG); } MOZ_ASSERT(diffG == 0); MOZ_ASSERT(framePushedInitial - framePushed() == PushRegsInMaskSizeInBytes(set)); } void MacroAssembler::Push(const Operand op) { push(op); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(Register reg) { push(reg); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(const Imm32 imm) { push(imm); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(const ImmWord imm) { push(imm); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(const ImmPtr imm) { Push(ImmWord(uintptr_t(imm.value))); } void MacroAssembler::Push(const ImmGCPtr ptr) { push(ptr); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(FloatRegister t) { push(t); adjustFrame(sizeof(double)); } void MacroAssembler::PushFlags() { pushFlags(); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Pop(const Operand op) { pop(op); implicitPop(sizeof(intptr_t)); } void MacroAssembler::Pop(Register reg) { pop(reg); implicitPop(sizeof(intptr_t)); } void MacroAssembler::Pop(FloatRegister reg) { pop(reg); implicitPop(sizeof(double)); } void MacroAssembler::Pop(const ValueOperand& val) { popValue(val); implicitPop(sizeof(Value)); } void MacroAssembler::PopFlags() { popFlags(); implicitPop(sizeof(intptr_t)); } void MacroAssembler::PopStackPtr() { Pop(StackPointer); } // =============================================================== // Simple call functions. CodeOffset MacroAssembler::call(Register reg) { return Assembler::call(reg); } CodeOffset MacroAssembler::call(Label* label) { return Assembler::call(label); } void MacroAssembler::call(const Address& addr) { Assembler::call(Operand(addr.base, addr.offset)); } CodeOffset MacroAssembler::call(wasm::SymbolicAddress target) { mov(target, eax); return Assembler::call(eax); } void MacroAssembler::call(ImmWord target) { Assembler::call(target); } void MacroAssembler::call(ImmPtr target) { Assembler::call(target); } void MacroAssembler::call(JitCode* target) { Assembler::call(target); } CodeOffset MacroAssembler::callWithPatch() { return Assembler::callWithPatch(); } void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) { Assembler::patchCall(callerOffset, calleeOffset); } void MacroAssembler::callAndPushReturnAddress(Register reg) { call(reg); } void MacroAssembler::callAndPushReturnAddress(Label* label) { call(label); } // =============================================================== // Patchable near/far jumps. CodeOffset MacroAssembler::farJumpWithPatch() { return Assembler::farJumpWithPatch(); } void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) { Assembler::patchFarJump(farJump, targetOffset); } CodeOffset MacroAssembler::nopPatchableToCall() { masm.nop_five(); return CodeOffset(currentOffset()); } void MacroAssembler::patchNopToCall(uint8_t* callsite, uint8_t* target) { Assembler::patchFiveByteNopToCall(callsite, target); } void MacroAssembler::patchCallToNop(uint8_t* callsite) { Assembler::patchCallToFiveByteNop(callsite); } // =============================================================== // Jit Frames. uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) { CodeLabel cl; mov(&cl, scratch); Push(scratch); bind(&cl); uint32_t retAddr = currentOffset(); addCodeLabel(cl); return retAddr; } // =============================================================== // WebAssembly CodeOffset MacroAssembler::wasmTrapInstruction() { return ud2(); } void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, Register boundsCheckLimit, Label* ok) { cmp32(index, boundsCheckLimit); j(cond, ok); if (JitOptions.spectreIndexMasking) { cmovCCl(cond, Operand(boundsCheckLimit), index); } } void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, Address boundsCheckLimit, Label* ok) { cmp32(index, Operand(boundsCheckLimit)); j(cond, ok); if (JitOptions.spectreIndexMasking) { cmovCCl(cond, Operand(boundsCheckLimit), index); } } // RAII class that generates the jumps to traps when it's destructed, to // prevent some code duplication in the outOfLineWasmTruncateXtoY methods. struct MOZ_RAII AutoHandleWasmTruncateToIntErrors { MacroAssembler& masm; Label inputIsNaN; Label intOverflow; wasm::BytecodeOffset off; explicit AutoHandleWasmTruncateToIntErrors(MacroAssembler& masm, wasm::BytecodeOffset off) : masm(masm), off(off) {} ~AutoHandleWasmTruncateToIntErrors() { // Handle errors. These cases are not in arbitrary order: code will // fall through to intOverflow. masm.bind(&intOverflow); masm.wasmTrap(wasm::Trap::IntegerOverflow, off); masm.bind(&inputIsNaN); masm.wasmTrap(wasm::Trap::InvalidConversionToInteger, off); } }; void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input, Register output, bool isSaturating, Label* oolEntry) { vcvttsd2si(input, output); cmp32(output, Imm32(1)); j(Assembler::Overflow, oolEntry); } void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input, Register output, bool isSaturating, Label* oolEntry) { vcvttss2si(input, output); cmp32(output, Imm32(1)); j(Assembler::Overflow, oolEntry); } void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input, Register output, TruncFlags flags, wasm::BytecodeOffset off, Label* rejoin) { bool isUnsigned = flags & TRUNC_UNSIGNED; bool isSaturating = flags & TRUNC_SATURATING; if (isSaturating) { if (isUnsigned) { // Negative overflow and NaN both are converted to 0, and the only // other case is positive overflow which is converted to // UINT32_MAX. Label nonNegative; ScratchDoubleScope fpscratch(*this); loadConstantDouble(0.0, fpscratch); branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &nonNegative); move32(Imm32(0), output); jump(rejoin); bind(&nonNegative); move32(Imm32(UINT32_MAX), output); } else { // Negative overflow is already saturated to INT32_MIN, so we only // have to handle NaN and positive overflow here. Label notNaN; branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); move32(Imm32(0), output); jump(rejoin); bind(¬NaN); ScratchDoubleScope fpscratch(*this); loadConstantDouble(0.0, fpscratch); branchDouble(Assembler::DoubleLessThan, input, fpscratch, rejoin); sub32(Imm32(1), output); } jump(rejoin); return; } AutoHandleWasmTruncateToIntErrors traps(*this, off); // Eagerly take care of NaNs. branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); // For unsigned, fall through to intOverflow failure case. if (isUnsigned) { return; } // Handle special values. // We've used vcvttsd2si. The only valid double values that can // truncate to INT32_MIN are in ]INT32_MIN - 1; INT32_MIN]. ScratchDoubleScope fpscratch(*this); loadConstantDouble(double(INT32_MIN) - 1.0, fpscratch); branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch, &traps.intOverflow); loadConstantDouble(0.0, fpscratch); branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, &traps.intOverflow); jump(rejoin); } void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input, Register output, TruncFlags flags, wasm::BytecodeOffset off, Label* rejoin) { bool isUnsigned = flags & TRUNC_UNSIGNED; bool isSaturating = flags & TRUNC_SATURATING; if (isSaturating) { if (isUnsigned) { // Negative overflow and NaN both are converted to 0, and the only // other case is positive overflow which is converted to // UINT32_MAX. Label nonNegative; ScratchFloat32Scope fpscratch(*this); loadConstantFloat32(0.0f, fpscratch); branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &nonNegative); move32(Imm32(0), output); jump(rejoin); bind(&nonNegative); move32(Imm32(UINT32_MAX), output); } else { // Negative overflow is already saturated to INT32_MIN, so we only // have to handle NaN and positive overflow here. Label notNaN; branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); move32(Imm32(0), output); jump(rejoin); bind(¬NaN); ScratchFloat32Scope fpscratch(*this); loadConstantFloat32(0.0f, fpscratch); branchFloat(Assembler::DoubleLessThan, input, fpscratch, rejoin); sub32(Imm32(1), output); } jump(rejoin); return; } AutoHandleWasmTruncateToIntErrors traps(*this, off); // Eagerly take care of NaNs. branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); // For unsigned, fall through to intOverflow failure case. if (isUnsigned) { return; } // Handle special values. // We've used vcvttss2si. Check that the input wasn't // float(INT32_MIN), which is the only legimitate input that // would truncate to INT32_MIN. ScratchFloat32Scope fpscratch(*this); loadConstantFloat32(float(INT32_MIN), fpscratch); branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); jump(rejoin); } void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input, Register64 output, TruncFlags flags, wasm::BytecodeOffset off, Label* rejoin) { bool isUnsigned = flags & TRUNC_UNSIGNED; bool isSaturating = flags & TRUNC_SATURATING; if (isSaturating) { if (isUnsigned) { // Negative overflow and NaN both are converted to 0, and the only // other case is positive overflow which is converted to // UINT64_MAX. Label positive; ScratchDoubleScope fpscratch(*this); loadConstantDouble(0.0, fpscratch); branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, &positive); move64(Imm64(0), output); jump(rejoin); bind(&positive); move64(Imm64(UINT64_MAX), output); } else { // Negative overflow is already saturated to INT64_MIN, so we only // have to handle NaN and positive overflow here. Label notNaN; branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); move64(Imm64(0), output); jump(rejoin); bind(¬NaN); ScratchDoubleScope fpscratch(*this); loadConstantDouble(0.0, fpscratch); branchDouble(Assembler::DoubleLessThan, input, fpscratch, rejoin); sub64(Imm64(1), output); } jump(rejoin); return; } AutoHandleWasmTruncateToIntErrors traps(*this, off); // Eagerly take care of NaNs. branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); // Handle special values. if (isUnsigned) { ScratchDoubleScope fpscratch(*this); loadConstantDouble(0.0, fpscratch); branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, &traps.intOverflow); loadConstantDouble(-1.0, fpscratch); branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch, &traps.intOverflow); jump(rejoin); return; } // We've used vcvtsd2sq. The only legit value whose i64 // truncation is INT64_MIN is double(INT64_MIN): exponent is so // high that the highest resolution around is much more than 1. ScratchDoubleScope fpscratch(*this); loadConstantDouble(double(int64_t(INT64_MIN)), fpscratch); branchDouble(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); jump(rejoin); } void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input, Register64 output, TruncFlags flags, wasm::BytecodeOffset off, Label* rejoin) { bool isUnsigned = flags & TRUNC_UNSIGNED; bool isSaturating = flags & TRUNC_SATURATING; if (isSaturating) { if (isUnsigned) { // Negative overflow and NaN both are converted to 0, and the only // other case is positive overflow which is converted to // UINT64_MAX. Label positive; ScratchFloat32Scope fpscratch(*this); loadConstantFloat32(0.0f, fpscratch); branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, &positive); move64(Imm64(0), output); jump(rejoin); bind(&positive); move64(Imm64(UINT64_MAX), output); } else { // Negative overflow is already saturated to INT64_MIN, so we only // have to handle NaN and positive overflow here. Label notNaN; branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); move64(Imm64(0), output); jump(rejoin); bind(¬NaN); ScratchFloat32Scope fpscratch(*this); loadConstantFloat32(0.0f, fpscratch); branchFloat(Assembler::DoubleLessThan, input, fpscratch, rejoin); sub64(Imm64(1), output); } jump(rejoin); return; } AutoHandleWasmTruncateToIntErrors traps(*this, off); // Eagerly take care of NaNs. branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); // Handle special values. if (isUnsigned) { ScratchFloat32Scope fpscratch(*this); loadConstantFloat32(0.0f, fpscratch); branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, &traps.intOverflow); loadConstantFloat32(-1.0f, fpscratch); branchFloat(Assembler::DoubleLessThanOrEqual, input, fpscratch, &traps.intOverflow); jump(rejoin); return; } // We've used vcvtss2sq. See comment in outOfLineWasmTruncateDoubleToInt64. ScratchFloat32Scope fpscratch(*this); loadConstantFloat32(float(int64_t(INT64_MIN)), fpscratch); branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); jump(rejoin); } void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch, ExitFrameType type) { enterFakeExitFrame(cxreg, scratch, type); } // ======================================================================== // Primitive atomic operations. static void ExtendTo32(MacroAssembler& masm, Scalar::Type type, Register r) { switch (Scalar::byteSize(type)) { case 1: if (Scalar::isSignedIntType(type)) { masm.movsbl(r, r); } else { masm.movzbl(r, r); } break; case 2: if (Scalar::isSignedIntType(type)) { masm.movswl(r, r); } else { masm.movzwl(r, r); } break; default: break; } } static inline void CheckBytereg(Register r) { #ifdef DEBUG AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); MOZ_ASSERT(byteRegs.has(r)); #endif } static inline void CheckBytereg(Imm32 r) { // Nothing } template static void CompareExchange(MacroAssembler& masm, const wasm::MemoryAccessDesc* access, Scalar::Type type, const T& mem, Register oldval, Register newval, Register output) { MOZ_ASSERT(output == eax); if (oldval != output) { masm.movl(oldval, output); } if (access) { masm.append(*access, masm.size()); } // NOTE: the generated code must match the assembly code in gen_cmpxchg in // GenerateAtomicOperations.py switch (Scalar::byteSize(type)) { case 1: CheckBytereg(newval); masm.lock_cmpxchgb(newval, Operand(mem)); break; case 2: masm.lock_cmpxchgw(newval, Operand(mem)); break; case 4: masm.lock_cmpxchgl(newval, Operand(mem)); break; } ExtendTo32(masm, type, output); } void MacroAssembler::compareExchange(Scalar::Type type, const Synchronization&, const Address& mem, Register oldval, Register newval, Register output) { CompareExchange(*this, nullptr, type, mem, oldval, newval, output); } void MacroAssembler::compareExchange(Scalar::Type type, const Synchronization&, const BaseIndex& mem, Register oldval, Register newval, Register output) { CompareExchange(*this, nullptr, type, mem, oldval, newval, output); } void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, const Address& mem, Register oldval, Register newval, Register output) { CompareExchange(*this, &access, access.type(), mem, oldval, newval, output); } void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, const BaseIndex& mem, Register oldval, Register newval, Register output) { CompareExchange(*this, &access, access.type(), mem, oldval, newval, output); } template static void AtomicExchange(MacroAssembler& masm, const wasm::MemoryAccessDesc* access, Scalar::Type type, const T& mem, Register value, Register output) // NOTE: the generated code must match the assembly code in gen_exchange in // GenerateAtomicOperations.py { if (value != output) { masm.movl(value, output); } if (access) { masm.append(*access, masm.size()); } switch (Scalar::byteSize(type)) { case 1: CheckBytereg(output); masm.xchgb(output, Operand(mem)); break; case 2: masm.xchgw(output, Operand(mem)); break; case 4: masm.xchgl(output, Operand(mem)); break; default: MOZ_CRASH("Invalid"); } ExtendTo32(masm, type, output); } void MacroAssembler::atomicExchange(Scalar::Type type, const Synchronization&, const Address& mem, Register value, Register output) { AtomicExchange(*this, nullptr, type, mem, value, output); } void MacroAssembler::atomicExchange(Scalar::Type type, const Synchronization&, const BaseIndex& mem, Register value, Register output) { AtomicExchange(*this, nullptr, type, mem, value, output); } void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, const Address& mem, Register value, Register output) { AtomicExchange(*this, &access, access.type(), mem, value, output); } void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, const BaseIndex& mem, Register value, Register output) { AtomicExchange(*this, &access, access.type(), mem, value, output); } static void SetupValue(MacroAssembler& masm, AtomicOp op, Imm32 src, Register output) { if (op == AtomicFetchSubOp) { masm.movl(Imm32(-src.value), output); } else { masm.movl(src, output); } } static void SetupValue(MacroAssembler& masm, AtomicOp op, Register src, Register output) { if (src != output) { masm.movl(src, output); } if (op == AtomicFetchSubOp) { masm.negl(output); } } template static void AtomicFetchOp(MacroAssembler& masm, const wasm::MemoryAccessDesc* access, Scalar::Type arrayType, AtomicOp op, V value, const T& mem, Register temp, Register output) { // Note value can be an Imm or a Register. // NOTE: the generated code must match the assembly code in gen_fetchop in // GenerateAtomicOperations.py #define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG) \ do { \ MOZ_ASSERT(output != temp); \ MOZ_ASSERT(output == eax); \ if (access) masm.append(*access, masm.size()); \ masm.LOAD(Operand(mem), eax); \ Label again; \ masm.bind(&again); \ masm.movl(eax, temp); \ masm.OP(value, temp); \ masm.LOCK_CMPXCHG(temp, Operand(mem)); \ masm.j(MacroAssembler::NonZero, &again); \ } while (0) MOZ_ASSERT_IF(op == AtomicFetchAddOp || op == AtomicFetchSubOp, temp == InvalidReg); switch (Scalar::byteSize(arrayType)) { case 1: CheckBytereg(output); switch (op) { case AtomicFetchAddOp: case AtomicFetchSubOp: CheckBytereg(value); // But not for the bitwise ops SetupValue(masm, op, value, output); if (access) masm.append(*access, masm.size()); masm.lock_xaddb(output, Operand(mem)); break; case AtomicFetchAndOp: CheckBytereg(temp); ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb); break; case AtomicFetchOrOp: CheckBytereg(temp); ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb); break; case AtomicFetchXorOp: CheckBytereg(temp); ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb); break; default: MOZ_CRASH(); } break; case 2: switch (op) { case AtomicFetchAddOp: case AtomicFetchSubOp: SetupValue(masm, op, value, output); if (access) masm.append(*access, masm.size()); masm.lock_xaddw(output, Operand(mem)); break; case AtomicFetchAndOp: ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw); break; case AtomicFetchOrOp: ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw); break; case AtomicFetchXorOp: ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw); break; default: MOZ_CRASH(); } break; case 4: switch (op) { case AtomicFetchAddOp: case AtomicFetchSubOp: SetupValue(masm, op, value, output); if (access) masm.append(*access, masm.size()); masm.lock_xaddl(output, Operand(mem)); break; case AtomicFetchAndOp: ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchgl); break; case AtomicFetchOrOp: ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchgl); break; case AtomicFetchXorOp: ATOMIC_BITOP_BODY(movl, xorl, lock_cmpxchgl); break; default: MOZ_CRASH(); } break; } ExtendTo32(masm, arrayType, output); #undef ATOMIC_BITOP_BODY } void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, const Synchronization&, AtomicOp op, Register value, const BaseIndex& mem, Register temp, Register output) { AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); } void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, const Synchronization&, AtomicOp op, Register value, const Address& mem, Register temp, Register output) { AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); } void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, const Synchronization&, AtomicOp op, Imm32 value, const BaseIndex& mem, Register temp, Register output) { AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); } void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, const Synchronization&, AtomicOp op, Imm32 value, const Address& mem, Register temp, Register output) { AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); } void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Register value, const Address& mem, Register temp, Register output) { AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); } void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Imm32 value, const Address& mem, Register temp, Register output) { AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); } void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Register value, const BaseIndex& mem, Register temp, Register output) { AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); } void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Imm32 value, const BaseIndex& mem, Register temp, Register output) { AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); } template static void AtomicEffectOp(MacroAssembler& masm, const wasm::MemoryAccessDesc* access, Scalar::Type arrayType, AtomicOp op, V value, const T& mem) { if (access) { masm.append(*access, masm.size()); } switch (Scalar::byteSize(arrayType)) { case 1: switch (op) { case AtomicFetchAddOp: masm.lock_addb(value, Operand(mem)); break; case AtomicFetchSubOp: masm.lock_subb(value, Operand(mem)); break; case AtomicFetchAndOp: masm.lock_andb(value, Operand(mem)); break; case AtomicFetchOrOp: masm.lock_orb(value, Operand(mem)); break; case AtomicFetchXorOp: masm.lock_xorb(value, Operand(mem)); break; default: MOZ_CRASH(); } break; case 2: switch (op) { case AtomicFetchAddOp: masm.lock_addw(value, Operand(mem)); break; case AtomicFetchSubOp: masm.lock_subw(value, Operand(mem)); break; case AtomicFetchAndOp: masm.lock_andw(value, Operand(mem)); break; case AtomicFetchOrOp: masm.lock_orw(value, Operand(mem)); break; case AtomicFetchXorOp: masm.lock_xorw(value, Operand(mem)); break; default: MOZ_CRASH(); } break; case 4: switch (op) { case AtomicFetchAddOp: masm.lock_addl(value, Operand(mem)); break; case AtomicFetchSubOp: masm.lock_subl(value, Operand(mem)); break; case AtomicFetchAndOp: masm.lock_andl(value, Operand(mem)); break; case AtomicFetchOrOp: masm.lock_orl(value, Operand(mem)); break; case AtomicFetchXorOp: masm.lock_xorl(value, Operand(mem)); break; default: MOZ_CRASH(); } break; default: MOZ_CRASH(); } } void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Register value, const Address& mem, Register temp) { MOZ_ASSERT(temp == InvalidReg); AtomicEffectOp(*this, &access, access.type(), op, value, mem); } void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Imm32 value, const Address& mem, Register temp) { MOZ_ASSERT(temp == InvalidReg); AtomicEffectOp(*this, &access, access.type(), op, value, mem); } void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Register value, const BaseIndex& mem, Register temp) { MOZ_ASSERT(temp == InvalidReg); AtomicEffectOp(*this, &access, access.type(), op, value, mem); } void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Imm32 value, const BaseIndex& mem, Register temp) { MOZ_ASSERT(temp == InvalidReg); AtomicEffectOp(*this, &access, access.type(), op, value, mem); } // ======================================================================== // JS atomic operations. template static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, const Synchronization& sync, const T& mem, Register oldval, Register newval, Register temp, AnyRegister output) { if (arrayType == Scalar::Uint32) { masm.compareExchange(arrayType, sync, mem, oldval, newval, temp); masm.convertUInt32ToDouble(temp, output.fpu()); } else { masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr()); } } void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, const Synchronization& sync, const Address& mem, Register oldval, Register newval, Register temp, AnyRegister output) { CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); } void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, const Synchronization& sync, const BaseIndex& mem, Register oldval, Register newval, Register temp, AnyRegister output) { CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); } template static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, const Synchronization& sync, const T& mem, Register value, Register temp, AnyRegister output) { if (arrayType == Scalar::Uint32) { masm.atomicExchange(arrayType, sync, mem, value, temp); masm.convertUInt32ToDouble(temp, output.fpu()); } else { masm.atomicExchange(arrayType, sync, mem, value, output.gpr()); } } void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, const Synchronization& sync, const Address& mem, Register value, Register temp, AnyRegister output) { AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); } void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, const Synchronization& sync, const BaseIndex& mem, Register value, Register temp, AnyRegister output) { AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); } template static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Register value, const T& mem, Register temp1, Register temp2, AnyRegister output) { if (arrayType == Scalar::Uint32) { masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); masm.convertUInt32ToDouble(temp1, output.fpu()); } else { masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); } } void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Register value, const Address& mem, Register temp1, Register temp2, AnyRegister output) { AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); } void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Register value, const BaseIndex& mem, Register temp1, Register temp2, AnyRegister output) { AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); } void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, const Synchronization&, AtomicOp op, Register value, const BaseIndex& mem, Register temp) { MOZ_ASSERT(temp == InvalidReg); AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); } void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, const Synchronization&, AtomicOp op, Register value, const Address& mem, Register temp) { MOZ_ASSERT(temp == InvalidReg); AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); } void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, const Synchronization&, AtomicOp op, Imm32 value, const Address& mem, Register temp) { MOZ_ASSERT(temp == InvalidReg); AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); } void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Imm32 value, const BaseIndex& mem, Register temp) { MOZ_ASSERT(temp == InvalidReg); AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); } template static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Imm32 value, const T& mem, Register temp1, Register temp2, AnyRegister output) { if (arrayType == Scalar::Uint32) { masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); masm.convertUInt32ToDouble(temp1, output.fpu()); } else { masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); } } void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Imm32 value, const Address& mem, Register temp1, Register temp2, AnyRegister output) { AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); } void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Imm32 value, const BaseIndex& mem, Register temp1, Register temp2, AnyRegister output) { AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); } // ======================================================================== // Spectre Mitigations. void MacroAssembler::speculationBarrier() { // Spectre mitigation recommended by Intel and AMD suggest to use lfence as // a way to force all speculative execution of instructions to end. MOZ_ASSERT(HasSSE2()); masm.lfence(); } void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest, Label* fail) { if (HasSSE41()) { // Fail on negative-zero. branchNegativeZeroFloat32(src, dest, fail); // Round toward -Infinity. { ScratchFloat32Scope scratch(*this); vroundss(X86Encoding::RoundDown, src, scratch); truncateFloat32ToInt32(scratch, dest, fail); } } else { Label negative, end; // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. { ScratchFloat32Scope scratch(*this); zeroFloat32(scratch); branchFloat(Assembler::DoubleLessThan, src, scratch, &negative); } // Fail on negative-zero. branchNegativeZeroFloat32(src, dest, fail); // Input is non-negative, so truncation correctly rounds. truncateFloat32ToInt32(src, dest, fail); jump(&end); // Input is negative, but isn't -0. // Negative values go on a comparatively expensive path, since no // native rounding mode matches JS semantics. Still better than callVM. bind(&negative); { // Truncate and round toward zero. // This is off-by-one for everything but integer-valued inputs. truncateFloat32ToInt32(src, dest, fail); // Test whether the input double was integer-valued. { ScratchFloat32Scope scratch(*this); convertInt32ToFloat32(dest, scratch); branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end); } // Input is not integer-valued, so we rounded off-by-one in the // wrong direction. Correct by subtraction. subl(Imm32(1), dest); // Cannot overflow: output was already checked against INT_MIN. } bind(&end); } } void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest, Label* fail) { if (HasSSE41()) { // Fail on negative-zero. branchNegativeZero(src, dest, fail); // Round toward -Infinity. { ScratchDoubleScope scratch(*this); vroundsd(X86Encoding::RoundDown, src, scratch); truncateDoubleToInt32(scratch, dest, fail); } } else { Label negative, end; // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. { ScratchDoubleScope scratch(*this); zeroDouble(scratch); branchDouble(Assembler::DoubleLessThan, src, scratch, &negative); } // Fail on negative-zero. branchNegativeZero(src, dest, fail); // Input is non-negative, so truncation correctly rounds. truncateDoubleToInt32(src, dest, fail); jump(&end); // Input is negative, but isn't -0. // Negative values go on a comparatively expensive path, since no // native rounding mode matches JS semantics. Still better than callVM. bind(&negative); { // Truncate and round toward zero. // This is off-by-one for everything but integer-valued inputs. truncateDoubleToInt32(src, dest, fail); // Test whether the input double was integer-valued. { ScratchDoubleScope scratch(*this); convertInt32ToDouble(dest, scratch); branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end); } // Input is not integer-valued, so we rounded off-by-one in the // wrong direction. Correct by subtraction. subl(Imm32(1), dest); // Cannot overflow: output was already checked against INT_MIN. } bind(&end); } } void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest, Label* fail) { ScratchFloat32Scope scratch(*this); Label lessThanOrEqualMinusOne; // If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. // Fail if x > -1 and the sign bit is set. loadConstantFloat32(-1.f, scratch); branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, &lessThanOrEqualMinusOne); vmovmskps(src, dest); branchTest32(Assembler::NonZero, dest, Imm32(1), fail); if (HasSSE41()) { // x <= -1 or x > -0 bind(&lessThanOrEqualMinusOne); // Round toward +Infinity. vroundss(X86Encoding::RoundUp, src, scratch); truncateFloat32ToInt32(scratch, dest, fail); return; } // No SSE4.1 Label end; // x >= 0 and x is not -0.0. We can truncate integer values, and truncate and // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, // as the truncate operation will return INT_MIN and we'll fail. truncateFloat32ToInt32(src, dest, fail); convertInt32ToFloat32(dest, scratch); branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end); // Input is not integer-valued, add 1 to obtain the ceiling value. // If input > INT_MAX, output == INT_MAX so adding 1 will overflow. branchAdd32(Assembler::Overflow, Imm32(1), dest, fail); jump(&end); // x <= -1, truncation is the way to go. bind(&lessThanOrEqualMinusOne); truncateFloat32ToInt32(src, dest, fail); bind(&end); } void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest, Label* fail) { ScratchDoubleScope scratch(*this); Label lessThanOrEqualMinusOne; // If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. // Fail if x > -1 and the sign bit is set. loadConstantDouble(-1.0, scratch); branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, &lessThanOrEqualMinusOne); vmovmskpd(src, dest); branchTest32(Assembler::NonZero, dest, Imm32(1), fail); if (HasSSE41()) { // x <= -1 or x > -0 bind(&lessThanOrEqualMinusOne); // Round toward +Infinity. vroundsd(X86Encoding::RoundUp, src, scratch); truncateDoubleToInt32(scratch, dest, fail); return; } // No SSE4.1 Label end; // x >= 0 and x is not -0.0. We can truncate integer values, and truncate and // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, // as the truncate operation will return INT_MIN and we'll fail. truncateDoubleToInt32(src, dest, fail); convertInt32ToDouble(dest, scratch); branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end); // Input is not integer-valued, add 1 to obtain the ceiling value. // If input > INT_MAX, output == INT_MAX so adding 1 will overflow. branchAdd32(Assembler::Overflow, Imm32(1), dest, fail); jump(&end); // x <= -1, truncation is the way to go. bind(&lessThanOrEqualMinusOne); truncateDoubleToInt32(src, dest, fail); bind(&end); } void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest, Label* fail) { Label lessThanOrEqualMinusOne; // Bail on ]-1; -0] range { ScratchDoubleScope scratch(*this); loadConstantDouble(-1, scratch); branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, &lessThanOrEqualMinusOne); } // Test for remaining values with the sign bit set, i.e. ]-1; -0] vmovmskpd(src, dest); branchTest32(Assembler::NonZero, dest, Imm32(1), fail); // x <= -1 or x >= +0, truncation is the way to go. bind(&lessThanOrEqualMinusOne); truncateDoubleToInt32(src, dest, fail); } void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest, Label* fail) { Label lessThanOrEqualMinusOne; // Bail on ]-1; -0] range { ScratchFloat32Scope scratch(*this); loadConstantFloat32(-1.f, scratch); branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, &lessThanOrEqualMinusOne); } // Test for remaining values with the sign bit set, i.e. ]-1; -0] vmovmskps(src, dest); branchTest32(Assembler::NonZero, dest, Imm32(1), fail); // x <= -1 or x >= +0, truncation is the way to go. bind(&lessThanOrEqualMinusOne); truncateFloat32ToInt32(src, dest, fail); } void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest, FloatRegister temp, Label* fail) { ScratchFloat32Scope scratch(*this); Label negativeOrZero, negative, end; // Branch to a slow path for non-positive inputs. Doesn't catch NaN. zeroFloat32(scratch); loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); branchFloat(Assembler::DoubleLessThanOrEqual, src, scratch, &negativeOrZero); { // Input is strictly positive or NaN. Add the biggest float less than 0.5 // and truncate, rounding down (because if the input is the biggest float // less than 0.5, adding 0.5 would undesirably round up to 1). Note that we // have to add the input to the temp register because we're not allowed to // modify the input register. addFloat32(src, temp); truncateFloat32ToInt32(temp, dest, fail); jump(&end); } // Input is negative, +0 or -0. bind(&negativeOrZero); { // Branch on negative input. j(Assembler::NotEqual, &negative); // Fail on negative-zero. branchNegativeZeroFloat32(src, dest, fail); // Input is +0. xor32(dest, dest); jump(&end); } // Input is negative. bind(&negative); { // Inputs in [-0.5, 0) are rounded to -0. Fail. loadConstantFloat32(-0.5f, scratch); branchFloat(Assembler::DoubleGreaterThanOrEqual, src, scratch, fail); // Other negative inputs need the biggest float less than 0.5 added. // // The result is stored in the temp register (currently contains the biggest // float less than 0.5). addFloat32(src, temp); if (HasSSE41()) { // Round toward -Infinity. vroundss(X86Encoding::RoundDown, temp, scratch); // Truncate. truncateFloat32ToInt32(scratch, dest, fail); } else { // Round toward -Infinity without the benefit of ROUNDSS. // Truncate and round toward zero. // This is off-by-one for everything but integer-valued inputs. truncateFloat32ToInt32(temp, dest, fail); // Test whether the truncated float was integer-valued. convertInt32ToFloat32(dest, scratch); branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); // Input is not integer-valued, so we rounded off-by-one in the // wrong direction. Correct by subtraction. subl(Imm32(1), dest); // Cannot overflow: output was already checked against INT_MIN. } } bind(&end); } void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest, FloatRegister temp, Label* fail) { ScratchDoubleScope scratch(*this); Label negativeOrZero, negative, end; // Branch to a slow path for non-positive inputs. Doesn't catch NaN. zeroDouble(scratch); loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); branchDouble(Assembler::DoubleLessThanOrEqual, src, scratch, &negativeOrZero); { // Input is strictly positive or NaN. Add the biggest double less than 0.5 // and truncate, rounding down (because if the input is the biggest double // less than 0.5, adding 0.5 would undesirably round up to 1). Note that we // have to add the input to the temp register because we're not allowed to // modify the input register. addDouble(src, temp); truncateDoubleToInt32(temp, dest, fail); jump(&end); } // Input is negative, +0 or -0. bind(&negativeOrZero); { // Branch on negative input. j(Assembler::NotEqual, &negative); // Fail on negative-zero. branchNegativeZero(src, dest, fail, /* maybeNonZero = */ false); // Input is +0 xor32(dest, dest); jump(&end); } // Input is negative. bind(&negative); { // Inputs in [-0.5, 0) are rounded to -0. Fail. loadConstantDouble(-0.5, scratch); branchDouble(Assembler::DoubleGreaterThanOrEqual, src, scratch, fail); // Other negative inputs need the biggest double less than 0.5 added. // // The result is stored in the temp register (currently contains the biggest // double less than 0.5). addDouble(src, temp); if (HasSSE41()) { // Round toward -Infinity. vroundsd(X86Encoding::RoundDown, temp, scratch); // Truncate. truncateDoubleToInt32(scratch, dest, fail); } else { // Round toward -Infinity without the benefit of ROUNDSD. // Truncate and round toward zero. // This is off-by-one for everything but integer-valued inputs. truncateDoubleToInt32(temp, dest, fail); // Test whether the truncated double was integer-valued. convertInt32ToDouble(dest, scratch); branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); // Input is not integer-valued, so we rounded off-by-one in the // wrong direction. Correct by subtraction. subl(Imm32(1), dest); // Cannot overflow: output was already checked against INT_MIN. } } bind(&end); } void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src, FloatRegister dest) { MOZ_ASSERT(HasRoundInstruction(mode)); vroundsd(Assembler::ToX86RoundingMode(mode), src, dest); } void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src, FloatRegister dest) { MOZ_ASSERT(HasRoundInstruction(mode)); vroundss(Assembler::ToX86RoundingMode(mode), src, dest); } void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, FloatRegister output) { ScratchDoubleScope scratch(*this); // TODO Support AVX2 if (rhs == output) { MOZ_ASSERT(lhs != rhs); double keepSignMask = mozilla::BitwiseCast(INT64_MIN); loadConstantDouble(keepSignMask, scratch); vandpd(scratch, rhs, output); double clearSignMask = mozilla::BitwiseCast(INT64_MAX); loadConstantDouble(clearSignMask, scratch); vandpd(lhs, scratch, scratch); } else { double clearSignMask = mozilla::BitwiseCast(INT64_MAX); loadConstantDouble(clearSignMask, scratch); vandpd(scratch, lhs, output); double keepSignMask = mozilla::BitwiseCast(INT64_MIN); loadConstantDouble(keepSignMask, scratch); vandpd(rhs, scratch, scratch); } vorpd(scratch, output, output); } void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs, FloatRegister output) { ScratchFloat32Scope scratch(*this); // TODO Support AVX2 if (rhs == output) { MOZ_ASSERT(lhs != rhs); float keepSignMask = mozilla::BitwiseCast(INT32_MIN); loadConstantFloat32(keepSignMask, scratch); vandps(scratch, output, output); float clearSignMask = mozilla::BitwiseCast(INT32_MAX); loadConstantFloat32(clearSignMask, scratch); vandps(lhs, scratch, scratch); } else { float clearSignMask = mozilla::BitwiseCast(INT32_MAX); loadConstantFloat32(clearSignMask, scratch); vandps(scratch, lhs, output); float keepSignMask = mozilla::BitwiseCast(INT32_MIN); loadConstantFloat32(keepSignMask, scratch); vandps(rhs, scratch, scratch); } vorps(scratch, output, output); } void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, Register pointer) { if (IsShiftInScaleRange(shift)) { computeEffectiveAddress( BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer); return; } lshift32(Imm32(shift), indexTemp32); addPtr(indexTemp32, pointer); } //}}} check_macroassembler_style