/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "jit/arm64/MacroAssembler-arm64.h" #include "mozilla/MathAlgorithms.h" #include "mozilla/Maybe.h" #include "jsmath.h" #include "jit/arm64/MoveEmitter-arm64.h" #include "jit/arm64/SharedICRegisters-arm64.h" #include "jit/Bailouts.h" #include "jit/BaselineFrame.h" #include "jit/JitRuntime.h" #include "jit/MacroAssembler.h" #include "util/Memory.h" #include "vm/BigIntType.h" #include "vm/JitActivation.h" // js::jit::JitActivation #include "vm/JSContext.h" #include "vm/StringType.h" #include "jit/MacroAssembler-inl.h" namespace js { namespace jit { enum class Width { _32 = 32, _64 = 64 }; static inline ARMRegister X(Register r) { return ARMRegister(r, 64); } static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) { return masm.toARMRegister(r, 64); } static inline ARMRegister W(Register r) { return ARMRegister(r, 32); } static inline ARMRegister R(Register r, Width w) { return ARMRegister(r, unsigned(w)); } void MacroAssemblerCompat::boxValue(JSValueType type, Register src, Register dest) { #ifdef DEBUG if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) { Label upper32BitsZeroed; movePtr(ImmWord(UINT32_MAX), dest); asMasm().branchPtr(Assembler::BelowOrEqual, src, dest, &upper32BitsZeroed); breakpoint(); bind(&upper32BitsZeroed); } #endif Orr(ARMRegister(dest, 64), ARMRegister(src, 64), Operand(ImmShiftedTag(type).value)); } #ifdef ENABLE_WASM_SIMD bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { switch (op) { case wasm::SimdOp::I8x16Shl: case wasm::SimdOp::I8x16ShrU: case wasm::SimdOp::I8x16ShrS: *mask = 7; break; case wasm::SimdOp::I16x8Shl: case wasm::SimdOp::I16x8ShrU: case wasm::SimdOp::I16x8ShrS: *mask = 15; break; case wasm::SimdOp::I32x4Shl: case wasm::SimdOp::I32x4ShrU: case wasm::SimdOp::I32x4ShrS: *mask = 31; break; case wasm::SimdOp::I64x2Shl: case wasm::SimdOp::I64x2ShrU: case wasm::SimdOp::I64x2ShrS: *mask = 63; break; default: MOZ_CRASH("Unexpected shift operation"); } return true; } #endif void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { ARMRegister dest(output, 32); Fcvtns(dest, ARMFPRegister(input, 64)); { vixl::UseScratchRegisterScope temps(this); const ARMRegister scratch32 = temps.AcquireW(); Mov(scratch32, Operand(0xff)); Cmp(dest, scratch32); Csel(dest, dest, scratch32, LessThan); } Cmp(dest, Operand(0)); Csel(dest, dest, wzr, GreaterThan); } js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() { return *static_cast(this); } const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const { return *static_cast(this); } vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() { return *static_cast(this); } const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const { return *static_cast(this); } void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) { BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest); label->patchAt()->bind(bo.getOffset()); label->setLinkMode(CodeLabel::MoveImmediate); } BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) { const size_t numInst = 1; // Inserting one load instruction. const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes. uint8_t* literalAddr = (uint8_t*)(&ptr.value); // TODO: Should be const. // Scratch space for generating the load instruction. // // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary // index to the corresponding PoolEntry in the instruction itself. // // That index will be fixed up later when finishPool() // walks over all marked loads and calls PatchConstantPoolLoad(). uint32_t instructionScratch = 0; // Emit the instruction mask in the scratch space. // The offset doesn't matter: it will be fixed up later. vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64), 0); // Add the entry to the pool, fix up the LDR imm19 offset, // and add the completed instruction to the buffer. return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&instructionScratch, literalAddr); } BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr, Register dest) { const size_t numInst = 1; // Inserting one load instruction. const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes. uint8_t* literalAddr = (uint8_t*)(&ptr.value); // Scratch space for generating the load instruction. // // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary // index to the corresponding PoolEntry in the instruction itself. // // That index will be fixed up later when finishPool() // walks over all marked loads and calls PatchConstantPoolLoad(). uint32_t instructionScratch = 0; // Emit the instruction mask in the scratch space. // The offset doesn't matter: it will be fixed up later. vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64), 0); // Add the entry to the pool, fix up the LDR imm19 offset, // and add the completed instruction to the buffer. return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&instructionScratch, literalAddr); } void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) { loadPtr(src, dest); } void MacroAssemblerCompat::handleFailureWithHandlerTail(Label* profilerExitTail, Label* bailoutTail) { // Fail rather than silently create wrong code. MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); // Reserve space for exception information. int64_t size = (sizeof(ResumeFromException) + 7) & ~7; Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size)); syncStackPtr(); MOZ_ASSERT(!x0.Is(PseudoStackPointer64)); Mov(x0, PseudoStackPointer64); // Call the handler. using Fn = void (*)(ResumeFromException* rfe); asMasm().setupUnalignedABICall(r1); asMasm().passABIArg(r0); asMasm().callWithABI( ABIType::General, CheckUnsafeCallWithABI::DontCheckHasExitFrame); Label entryFrame; Label catch_; Label finally; Label returnBaseline; Label returnIon; Label bailout; Label wasm; Label wasmCatch; // Check the `asMasm` calls above didn't mess with the StackPointer identity. MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfKind()), r0); asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::EntryFrame), &entryFrame); asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch), &catch_); asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally), &finally); asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::ForcedReturnBaseline), &returnBaseline); asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon); asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout), &bailout); asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Wasm), &wasm); asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch), &wasmCatch); breakpoint(); // Invalid kind. // No exception handler. Load the error value, restore state and return from // the entry frame. bind(&entryFrame); moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand); loadPtr( Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), FramePointer); loadPtr( Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), PseudoStackPointer); // `retn` does indeed sync the stack pointer, but before doing that it reads // from the stack. Consequently, if we remove this call to syncStackPointer // then we take on the requirement to prove that the immediately preceding // loadPtr produces a value for PSP which maintains the SP <= PSP invariant. // That's a proof burden we don't want to take on. In general it would be // good to move (at some time in the future, not now) to a world where // *every* assignment to PSP or SP is followed immediately by a copy into // the other register. That would make all required correctness proofs // trivial in the sense that it requires only local inspection of code // immediately following (dominated by) any such assignment. syncStackPtr(); retn(Imm32(1 * sizeof(void*))); // Pop from stack and return. // If we found a catch handler, this must be a baseline frame. Restore state // and jump to the catch block. bind(&catch_); loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()), r0); loadPtr( Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), FramePointer); loadPtr( Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), PseudoStackPointer); syncStackPtr(); Br(x0); // If we found a finally block, this must be a baseline frame. Push three // values expected by the finally block: the exception, the exception stack, // and BooleanValue(true). bind(&finally); ARMRegister exception = x1; Ldr(exception, MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfException())); ARMRegister exceptionStack = x2; Ldr(exceptionStack, MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfExceptionStack())); Ldr(x0, MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfTarget())); Ldr(ARMRegister(FramePointer, 64), MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfFramePointer())); Ldr(PseudoStackPointer64, MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfStackPointer())); syncStackPtr(); push(exception); push(exceptionStack); pushValue(BooleanValue(true)); Br(x0); // Return BaselineFrame->returnValue() to the caller. // Used in debug mode and for GeneratorReturn. Label profilingInstrumentation; bind(&returnBaseline); loadPtr( Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), FramePointer); loadPtr( Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), PseudoStackPointer); // See comment further up beginning "`retn` does indeed sync the stack // pointer". That comment applies here too. syncStackPtr(); loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()), JSReturnOperand); jump(&profilingInstrumentation); // Return the given value to the caller. bind(&returnIon); loadValue( Address(PseudoStackPointer, ResumeFromException::offsetOfException()), JSReturnOperand); loadPtr( Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)), FramePointer); loadPtr( Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)), PseudoStackPointer); syncStackPtr(); // If profiling is enabled, then update the lastProfilingFrame to refer to // caller frame before returning. This code is shared by ForcedReturnIon // and ForcedReturnBaseline. bind(&profilingInstrumentation); { Label skipProfilingInstrumentation; AbsoluteAddress addressOfEnabled( asMasm().runtime()->geckoProfiler().addressOfEnabled()); asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0), &skipProfilingInstrumentation); jump(profilerExitTail); bind(&skipProfilingInstrumentation); } movePtr(FramePointer, PseudoStackPointer); syncStackPtr(); vixl::MacroAssembler::Pop(ARMRegister(FramePointer, 64)); vixl::MacroAssembler::Pop(vixl::lr); syncStackPtr(); vixl::MacroAssembler::Ret(vixl::lr); // If we are bailing out to baseline to handle an exception, jump to the // bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success. bind(&bailout); Ldr(x2, MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfBailoutInfo())); Ldr(PseudoStackPointer64, MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfStackPointer())); syncStackPtr(); Mov(x0, 1); jump(bailoutTail); // If we are throwing and the innermost frame was a wasm frame, reset SP and // FP; SP is pointing to the unwound return address to the wasm entry, so // we can just ret(). bind(&wasm); Ldr(x29, MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfFramePointer())); Ldr(PseudoStackPointer64, MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfStackPointer())); syncStackPtr(); Mov(x23, int64_t(wasm::FailInstanceReg)); ret(); // Found a wasm catch handler, restore state and jump to it. bind(&wasmCatch); loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()), r0); loadPtr( Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), r29); loadPtr( Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), PseudoStackPointer); syncStackPtr(); Br(x0); MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); } void MacroAssemblerCompat::profilerEnterFrame(Register framePtr, Register scratch) { asMasm().loadJSContext(scratch); loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch); storePtr(framePtr, Address(scratch, JitActivation::offsetOfLastProfilingFrame())); storePtr(ImmPtr(nullptr), Address(scratch, JitActivation::offsetOfLastProfilingCallSite())); } void MacroAssemblerCompat::profilerExitFrame() { jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail()); } Assembler::Condition MacroAssemblerCompat::testStringTruthy( bool truthy, const ValueOperand& value) { vixl::UseScratchRegisterScope temps(this); const Register scratch = temps.AcquireX().asUnsized(); const ARMRegister scratch32(scratch, 32); const ARMRegister scratch64(scratch, 64); MOZ_ASSERT(value.valueReg() != scratch); unboxString(value, scratch); Ldr(scratch32, MemOperand(scratch64, JSString::offsetOfLength())); Cmp(scratch32, Operand(0)); return truthy ? Condition::NonZero : Condition::Zero; } Assembler::Condition MacroAssemblerCompat::testBigIntTruthy( bool truthy, const ValueOperand& value) { vixl::UseScratchRegisterScope temps(this); const Register scratch = temps.AcquireX().asUnsized(); MOZ_ASSERT(value.valueReg() != scratch); unboxBigInt(value, scratch); load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch); cmp32(scratch, Imm32(0)); return truthy ? Condition::NonZero : Condition::Zero; } void MacroAssemblerCompat::breakpoint() { // Note, other payloads are possible, but GDB is known to misinterpret them // sometimes and iloop on the breakpoint instead of stopping properly. Brk(0xf000); } // Either `any` is valid or `sixtyfour` is valid. Return a 32-bit ARMRegister // in the first case and an ARMRegister of the desired size in the latter case. static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour, unsigned size = 64) { MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid())); if (sixtyfour == Register64::Invalid()) { return ARMRegister(any.gpr(), 32); } return ARMRegister(sixtyfour.reg, size); } // Assert that `sixtyfour` is invalid and then return an FP register from `any` // of the desired size. static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour, unsigned size) { MOZ_ASSERT(sixtyfour == Register64::Invalid()); return ARMFPRegister(any.fpu(), size); } void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register memoryBase_, Register ptr_, AnyRegister outany, Register64 out64) { access.assertOffsetInGuardPages(); uint32_t offset = access.offset(); MOZ_ASSERT(memoryBase_ != ptr_); ARMRegister memoryBase(memoryBase_, 64); ARMRegister ptr(ptr_, 64); if (offset) { vixl::UseScratchRegisterScope temps(this); ARMRegister scratch = temps.AcquireX(); Add(scratch, ptr, Operand(offset)); MemOperand srcAddr(memoryBase, scratch); wasmLoadImpl(access, srcAddr, outany, out64); } else { MemOperand srcAddr(memoryBase, ptr); wasmLoadImpl(access, srcAddr, outany, out64); } } void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access, MemOperand srcAddr, AnyRegister outany, Register64 out64) { MOZ_ASSERT_IF(access.isSplatSimd128Load() || access.isWidenSimd128Load(), access.type() == Scalar::Float64); // NOTE: the generated code must match the assembly code in gen_load in // GenerateAtomicOperations.py asMasm().memoryBarrierBefore(access.sync()); FaultingCodeOffset fco; switch (access.type()) { case Scalar::Int8: fco = Ldrsb(SelectGPReg(outany, out64), srcAddr); break; case Scalar::Uint8: fco = Ldrb(SelectGPReg(outany, out64), srcAddr); break; case Scalar::Int16: fco = Ldrsh(SelectGPReg(outany, out64), srcAddr); break; case Scalar::Uint16: fco = Ldrh(SelectGPReg(outany, out64), srcAddr); break; case Scalar::Int32: if (out64 != Register64::Invalid()) { fco = Ldrsw(SelectGPReg(outany, out64), srcAddr); } else { fco = Ldr(SelectGPReg(outany, out64, 32), srcAddr); } break; case Scalar::Uint32: fco = Ldr(SelectGPReg(outany, out64, 32), srcAddr); break; case Scalar::Int64: fco = Ldr(SelectGPReg(outany, out64), srcAddr); break; case Scalar::Float32: // LDR does the right thing also for access.isZeroExtendSimd128Load() fco = Ldr(SelectFPReg(outany, out64, 32), srcAddr); break; case Scalar::Float64: if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) { ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister scratch = Simd1D(scratch_); fco = Ldr(scratch, srcAddr); if (access.isSplatSimd128Load()) { Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0); } else { MOZ_ASSERT(access.isWidenSimd128Load()); switch (access.widenSimdOp()) { case wasm::SimdOp::V128Load8x8S: Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0); break; case wasm::SimdOp::V128Load8x8U: Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0); break; case wasm::SimdOp::V128Load16x4S: Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0); break; case wasm::SimdOp::V128Load16x4U: Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0); break; case wasm::SimdOp::V128Load32x2S: Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0); break; case wasm::SimdOp::V128Load32x2U: Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0); break; default: MOZ_CRASH("Unexpected widening op for wasmLoad"); } } } else { // LDR does the right thing also for access.isZeroExtendSimd128Load() fco = Ldr(SelectFPReg(outany, out64, 64), srcAddr); } break; case Scalar::Simd128: fco = Ldr(SelectFPReg(outany, out64, 128), srcAddr); break; case Scalar::Uint8Clamped: case Scalar::BigInt64: case Scalar::BigUint64: case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected array type"); } append(access, wasm::TrapMachineInsnForLoad(byteSize(access.type())), fco); asMasm().memoryBarrierAfter(access.sync()); } // Return true if `address` can be represented as an immediate (possibly scaled // by the access size) in an LDR/STR type instruction. // // For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro(). static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) { // The predicates below operate on signed values only. if (address > INT64_MAX) { return false; } // The access size is always a power of 2, so computing the log amounts to // counting trailing zeroes. unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize); return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) || MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize)); } void MacroAssemblerCompat::wasmLoadAbsolute( const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address, AnyRegister output, Register64 out64) { if (!IsLSImmediateOffset(address, access.byteSize())) { // The access will require the constant to be loaded into a temp register. // Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting // trap information. // // Almost all constant addresses will in practice be handled by a single MOV // so do not worry about additional optimizations here. vixl::UseScratchRegisterScope temps(this); ARMRegister scratch = temps.AcquireX(); Mov(scratch, address); MemOperand srcAddr(X(memoryBase), scratch); wasmLoadImpl(access, srcAddr, output, out64); } else { MemOperand srcAddr(X(memoryBase), address); wasmLoadImpl(access, srcAddr, output, out64); } } void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister valany, Register64 val64, Register memoryBase_, Register ptr_) { access.assertOffsetInGuardPages(); uint32_t offset = access.offset(); ARMRegister memoryBase(memoryBase_, 64); ARMRegister ptr(ptr_, 64); if (offset) { vixl::UseScratchRegisterScope temps(this); ARMRegister scratch = temps.AcquireX(); Add(scratch, ptr, Operand(offset)); MemOperand destAddr(memoryBase, scratch); wasmStoreImpl(access, destAddr, valany, val64); } else { MemOperand destAddr(memoryBase, ptr); wasmStoreImpl(access, destAddr, valany, val64); } } void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access, MemOperand dstAddr, AnyRegister valany, Register64 val64) { // NOTE: the generated code must match the assembly code in gen_store in // GenerateAtomicOperations.py asMasm().memoryBarrierBefore(access.sync()); FaultingCodeOffset fco; switch (access.type()) { case Scalar::Int8: case Scalar::Uint8: fco = Strb(SelectGPReg(valany, val64), dstAddr); break; case Scalar::Int16: case Scalar::Uint16: fco = Strh(SelectGPReg(valany, val64), dstAddr); break; case Scalar::Int32: case Scalar::Uint32: fco = Str(SelectGPReg(valany, val64), dstAddr); break; case Scalar::Int64: fco = Str(SelectGPReg(valany, val64), dstAddr); break; case Scalar::Float32: fco = Str(SelectFPReg(valany, val64, 32), dstAddr); break; case Scalar::Float64: fco = Str(SelectFPReg(valany, val64, 64), dstAddr); break; case Scalar::Simd128: fco = Str(SelectFPReg(valany, val64, 128), dstAddr); break; case Scalar::Uint8Clamped: case Scalar::BigInt64: case Scalar::BigUint64: case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected array type"); } append(access, wasm::TrapMachineInsnForStore(byteSize(access.type())), fco); asMasm().memoryBarrierAfter(access.sync()); } void MacroAssemblerCompat::wasmStoreAbsolute( const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64, Register memoryBase, uint64_t address) { // See comments in wasmLoadAbsolute. unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize()); if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) || IsImmLSUnscaled(int64_t(address)))) { vixl::UseScratchRegisterScope temps(this); ARMRegister scratch = temps.AcquireX(); Mov(scratch, address); MemOperand destAddr(X(memoryBase), scratch); wasmStoreImpl(access, destAddr, value, value64); } else { MemOperand destAddr(X(memoryBase), address); wasmStoreImpl(access, destAddr, value, value64); } } void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond, ARMFPRegister dest, ARMFPRegister lhs, ARMFPRegister rhs) { switch (cond) { case Assembler::Equal: Cmeq(dest, lhs, rhs); break; case Assembler::NotEqual: Cmeq(dest, lhs, rhs); Mvn(dest, dest); break; case Assembler::GreaterThan: Cmgt(dest, lhs, rhs); break; case Assembler::GreaterThanOrEqual: Cmge(dest, lhs, rhs); break; case Assembler::LessThan: Cmgt(dest, rhs, lhs); break; case Assembler::LessThanOrEqual: Cmge(dest, rhs, lhs); break; case Assembler::Above: Cmhi(dest, lhs, rhs); break; case Assembler::AboveOrEqual: Cmhs(dest, lhs, rhs); break; case Assembler::Below: Cmhi(dest, rhs, lhs); break; case Assembler::BelowOrEqual: Cmhs(dest, rhs, lhs); break; default: MOZ_CRASH("Unexpected SIMD integer condition"); } } void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond, ARMFPRegister dest, ARMFPRegister lhs, ARMFPRegister rhs) { switch (cond) { case Assembler::Equal: Fcmeq(dest, lhs, rhs); break; case Assembler::NotEqual: Fcmeq(dest, lhs, rhs); Mvn(dest, dest); break; case Assembler::GreaterThan: Fcmgt(dest, lhs, rhs); break; case Assembler::GreaterThanOrEqual: Fcmge(dest, lhs, rhs); break; case Assembler::LessThan: Fcmgt(dest, rhs, lhs); break; case Assembler::LessThanOrEqual: Fcmge(dest, rhs, lhs); break; default: MOZ_CRASH("Unexpected SIMD integer condition"); } } void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs, FloatRegister dest, bool isUnsigned) { ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister shift = Simd16B(scratch_); Dup(shift, ARMRegister(rhs, 32)); Neg(shift, shift); if (isUnsigned) { Ushl(Simd16B(dest), Simd16B(lhs), shift); } else { Sshl(Simd16B(dest), Simd16B(lhs), shift); } } void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs, FloatRegister dest, bool isUnsigned) { ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister shift = Simd8H(scratch_); Dup(shift, ARMRegister(rhs, 32)); Neg(shift, shift); if (isUnsigned) { Ushl(Simd8H(dest), Simd8H(lhs), shift); } else { Sshl(Simd8H(dest), Simd8H(lhs), shift); } } void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs, FloatRegister dest, bool isUnsigned) { ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister shift = Simd4S(scratch_); Dup(shift, ARMRegister(rhs, 32)); Neg(shift, shift); if (isUnsigned) { Ushl(Simd4S(dest), Simd4S(lhs), shift); } else { Sshl(Simd4S(dest), Simd4S(lhs), shift); } } void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs, FloatRegister dest, bool isUnsigned) { ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister shift = Simd2D(scratch_); Dup(shift, ARMRegister(rhs, 64)); Neg(shift, shift); if (isUnsigned) { Ushl(Simd2D(dest), Simd2D(lhs), shift); } else { Sshl(Simd2D(dest), Simd2D(lhs), shift); } } void MacroAssembler::reserveStack(uint32_t amount) { // TODO: This bumps |sp| every time we reserve using a second register. // It would save some instructions if we had a fixed frame size. vixl::MacroAssembler::Claim(Operand(amount)); adjustFrame(amount); } void MacroAssembler::Push(RegisterOrSP reg) { if (IsHiddenSP(reg)) { push(sp); } else { push(AsRegister(reg)); } adjustFrame(sizeof(intptr_t)); } //{{{ check_macroassembler_style // =============================================================== // MacroAssembler high-level usage. void MacroAssembler::flush() { Assembler::flush(); } // =============================================================== // Stack manipulation functions. // Routines for saving/restoring registers on the stack. The format is: // // (highest address) // // integer (X) regs in any order size: 8 * # int regs // // if # int regs is odd, // then an 8 byte alignment hole size: 0 or 8 // // double (D) regs in any order size: 8 * # double regs // // if # double regs is odd, // then an 8 byte alignment hole size: 0 or 8 // // vector (Q) regs in any order size: 16 * # vector regs // // (lowest address) // // Hence the size of the save area is 0 % 16. And, provided that the base // (highest) address is 16-aligned, then the vector reg save/restore accesses // will also be 16-aligned, as will pairwise operations for the double regs. // // Implied by this is that the format of the double and vector dump area // corresponds with what FloatRegister::GetPushSizeInBytes computes. // See block comment in MacroAssembler.h for more details. size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) { size_t numIntRegs = set.gprs().size(); return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) + FloatRegister::GetPushSizeInBytes(set.fpus()); } // Generate code to dump the values in `set`, either on the stack if `dest` is // `Nothing` or working backwards from the address denoted by `dest` if it is // `Some`. These two cases are combined so as to minimise the chance of // mistakenly generating different formats for the same `set`, given that the // `Some` `dest` case is used extremely rarely. static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set, mozilla::Maybe
dest) { static_assert(sizeof(FloatRegisters::RegisterContent) == 16); // If we're saving to arbitrary memory, check the destination is big enough. if (dest) { mozilla::DebugOnly bytesRequired = MacroAssembler::PushRegsInMaskSizeInBytes(set); MOZ_ASSERT(dest->offset >= 0); MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired); } // Note the high limit point; we'll check it again later. mozilla::DebugOnly maxExtentInitial = dest ? dest->offset : masm->framePushed(); // Gather up the integer registers in groups of four, and either push each // group as a single transfer so as to minimise the number of stack pointer // changes, or write them individually to memory. Take care to ensure the // space used remains 16-aligned. for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) { vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg}; size_t i; for (i = 0; i < 4 && iter.more(); i++) { src[i] = ARMRegister(*iter, 64); ++iter; } MOZ_ASSERT(i > 0); if (i == 1 || i == 3) { // Ensure the stack remains 16-aligned MOZ_ASSERT(!iter.more()); src[i] = vixl::xzr; i++; } MOZ_ASSERT(i == 2 || i == 4); if (dest) { for (size_t j = 0; j < i; j++) { Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr : src[j].code()); dest->offset -= sizeof(intptr_t); masm->storePtr(ireg, *dest); } } else { masm->adjustFrame(i * 8); masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); } } // Now the same for the FP double registers. Note that because of how // ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either // be present as a double register, or as a V128 register, but not both. // Firstly, round up the registers to be pushed. FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); vixl::CPURegister allSrcs[FloatRegisters::TotalPhys]; size_t numAllSrcs = 0; for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { FloatRegister reg = *iter; if (reg.isDouble()) { MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys); allSrcs[numAllSrcs] = ARMFPRegister(reg, 64); numAllSrcs++; } else { MOZ_ASSERT(reg.isSimd128()); } } MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); if ((numAllSrcs & 1) == 1) { // We've got an odd number of doubles. In order to maintain 16-alignment, // push the last register twice. We'll skip over the duplicate in // PopRegsInMaskIgnore. allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1]; numAllSrcs++; } MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0); // And now generate the transfers. size_t i; if (dest) { for (i = 0; i < numAllSrcs; i++) { FloatRegister freg = FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()), FloatRegisters::Kind::Double); dest->offset -= sizeof(double); masm->storeDouble(freg, *dest); } } else { i = 0; while (i < numAllSrcs) { vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg}; size_t j; for (j = 0; j < 4 && j + i < numAllSrcs; j++) { src[j] = allSrcs[j + i]; } masm->adjustFrame(8 * j); masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); i += j; } } MOZ_ASSERT(i == numAllSrcs); // Finally, deal with the SIMD (V128) registers. This is a bit simpler // as there's no need for special-casing to maintain 16-alignment. numAllSrcs = 0; for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { FloatRegister reg = *iter; if (reg.isSimd128()) { MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys); allSrcs[numAllSrcs] = ARMFPRegister(reg, 128); numAllSrcs++; } } MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); // Generate the transfers. if (dest) { for (i = 0; i < numAllSrcs; i++) { FloatRegister freg = FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()), FloatRegisters::Kind::Simd128); dest->offset -= FloatRegister::SizeOfSimd128; masm->storeUnalignedSimd128(freg, *dest); } } else { i = 0; while (i < numAllSrcs) { vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg}; size_t j; for (j = 0; j < 4 && j + i < numAllSrcs; j++) { src[j] = allSrcs[j + i]; } masm->adjustFrame(16 * j); masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); i += j; } } MOZ_ASSERT(i == numAllSrcs); // Final overrun check. if (dest) { MOZ_ASSERT(maxExtentInitial - dest->offset == MacroAssembler::PushRegsInMaskSizeInBytes(set)); } else { MOZ_ASSERT(masm->framePushed() - maxExtentInitial == MacroAssembler::PushRegsInMaskSizeInBytes(set)); } } void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { PushOrStoreRegsInMask(this, set, mozilla::Nothing()); } void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, Register scratch) { PushOrStoreRegsInMask(this, set, mozilla::Some(dest)); } // This is a helper function for PopRegsInMaskIgnore below. It emits the // loads described by dests[0] and [1] and offsets[0] and [1], generating a // load-pair if it can. static void GeneratePendingLoadsThenFlush(MacroAssembler* masm, vixl::CPURegister* dests, uint32_t* offsets, uint32_t transactionSize) { // Generate the loads .. if (!dests[0].IsNone()) { if (!dests[1].IsNone()) { // [0] and [1] both present. if (offsets[0] + transactionSize == offsets[1]) { masm->Ldp(dests[0], dests[1], MemOperand(masm->GetStackPointer64(), offsets[0])); } else { // Theoretically we could check for a load-pair with the destinations // switched, but our callers will never generate that. Hence there's // no loss in giving up at this point and generating two loads. masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0])); masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1])); } } else { // [0] only. masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0])); } } else { if (!dests[1].IsNone()) { // [1] only. Can't happen because callers always fill [0] before [1]. MOZ_CRASH("GenerateLoadsThenFlush"); } else { // Neither entry valid. This can happen. } } // .. and flush. dests[0] = dests[1] = vixl::NoCPUReg; offsets[0] = offsets[1] = 0; } void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore) { mozilla::DebugOnly framePushedInitial = framePushed(); // The offset of the data from the stack pointer. uint32_t offset = 0; // The set of FP/SIMD registers we need to restore. FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); // The set of registers to ignore. BroadcastToAllSizes() is used to avoid // any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore` // containing d17. FloatRegisterSet ignoreFpusBroadcasted( FloatRegister::BroadcastToAllSizes(ignore.fpus())); // First recover the SIMD (V128) registers. This is straightforward in that // we don't need to think about alignment holes. // These three form a two-entry queue that holds loads that we know we // need, but which we haven't yet emitted. vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg}; uint32_t pendingOffsets[2] = {0, 0}; size_t nPending = 0; for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) { FloatRegister reg = *iter; if (reg.isDouble()) { continue; } MOZ_RELEASE_ASSERT(reg.isSimd128()); uint32_t offsetForReg = offset; offset += FloatRegister::SizeOfSimd128; if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) { continue; } MOZ_ASSERT(nPending <= 2); if (nPending == 2) { GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16); nPending = 0; } pendingDests[nPending] = ARMFPRegister(reg, 128); pendingOffsets[nPending] = offsetForReg; nPending++; } GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16); nPending = 0; MOZ_ASSERT((offset % 16) == 0); // Now recover the FP double registers. This is more tricky in that we need // to skip over the lowest-addressed of them if the number of them was odd. if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) { offset += sizeof(double); } for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) { FloatRegister reg = *iter; if (reg.isSimd128()) { continue; } /* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */ uint32_t offsetForReg = offset; offset += sizeof(double); if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) { continue; } MOZ_ASSERT(nPending <= 2); if (nPending == 2) { GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); nPending = 0; } pendingDests[nPending] = ARMFPRegister(reg, 64); pendingOffsets[nPending] = offsetForReg; nPending++; } GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); nPending = 0; MOZ_ASSERT((offset % 16) == 0); MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes()); // And finally recover the integer registers, again skipping an alignment // hole if it exists. if ((set.gprs().size() & 1) == 1) { offset += sizeof(uint64_t); } for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) { Register reg = *iter; uint32_t offsetForReg = offset; offset += sizeof(uint64_t); if (ignore.has(reg)) { continue; } MOZ_ASSERT(nPending <= 2); if (nPending == 2) { GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); nPending = 0; } pendingDests[nPending] = ARMRegister(reg, 64); pendingOffsets[nPending] = offsetForReg; nPending++; } GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); MOZ_ASSERT((offset % 16) == 0); size_t bytesPushed = PushRegsInMaskSizeInBytes(set); MOZ_ASSERT(offset == bytesPushed); freeStack(bytesPushed); } void MacroAssembler::Push(Register reg) { push(reg); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(Register reg1, Register reg2, Register reg3, Register reg4) { push(reg1, reg2, reg3, reg4); adjustFrame(4 * sizeof(intptr_t)); } void MacroAssembler::Push(const Imm32 imm) { push(imm); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(const ImmWord imm) { push(imm); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(const ImmPtr imm) { push(imm); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(const ImmGCPtr ptr) { push(ptr); adjustFrame(sizeof(intptr_t)); } void MacroAssembler::Push(FloatRegister f) { push(f); adjustFrame(sizeof(double)); } void MacroAssembler::PushBoxed(FloatRegister reg) { subFromStackPtr(Imm32(sizeof(double))); boxDouble(reg, Address(getStackPointer(), 0)); adjustFrame(sizeof(double)); } void MacroAssembler::Pop(Register reg) { pop(reg); adjustFrame(-1 * int64_t(sizeof(int64_t))); } void MacroAssembler::Pop(FloatRegister f) { loadDouble(Address(getStackPointer(), 0), f); freeStack(sizeof(double)); } void MacroAssembler::Pop(const ValueOperand& val) { pop(val); adjustFrame(-1 * int64_t(sizeof(int64_t))); } void MacroAssembler::freeStackTo(uint32_t framePushed) { MOZ_ASSERT(framePushed <= framePushed_); Sub(GetStackPointer64(), X(FramePointer), Operand(int32_t(framePushed))); syncStackPtr(); framePushed_ = framePushed; } // =============================================================== // Simple call functions. CodeOffset MacroAssembler::call(Register reg) { // This sync has been observed (and is expected) to be necessary. // eg testcase: tests/debug/bug1107525.js syncStackPtr(); Blr(ARMRegister(reg, 64)); return CodeOffset(currentOffset()); } CodeOffset MacroAssembler::call(Label* label) { // This sync has been observed (and is expected) to be necessary. // eg testcase: tests/basic/testBug504520Harder.js syncStackPtr(); Bl(label); return CodeOffset(currentOffset()); } void MacroAssembler::call(ImmPtr imm) { // This sync has been observed (and is expected) to be necessary. // eg testcase: asm.js/testTimeout5.js syncStackPtr(); vixl::UseScratchRegisterScope temps(this); MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0 temps.Exclude(ScratchReg64); movePtr(imm, ScratchReg64.asUnsized()); Blr(ScratchReg64); } void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); } CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) { vixl::UseScratchRegisterScope temps(this); const Register scratch = temps.AcquireX().asUnsized(); // This sync is believed to be necessary, although no case in jit-test/tests // has been observed to cause SP != PSP here. syncStackPtr(); movePtr(imm, scratch); Blr(ARMRegister(scratch, 64)); return CodeOffset(currentOffset()); } void MacroAssembler::call(const Address& addr) { vixl::UseScratchRegisterScope temps(this); const Register scratch = temps.AcquireX().asUnsized(); // This sync has been observed (and is expected) to be necessary. // eg testcase: tests/backup-point-bug1315634.js syncStackPtr(); loadPtr(addr, scratch); Blr(ARMRegister(scratch, 64)); } void MacroAssembler::call(JitCode* c) { vixl::UseScratchRegisterScope temps(this); const ARMRegister scratch64 = temps.AcquireX(); // This sync has been observed (and is expected) to be necessary. // eg testcase: arrays/new-array-undefined-undefined-more-args-2.js syncStackPtr(); BufferOffset off = immPool64(scratch64, uint64_t(c->raw())); addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE); blr(scratch64); } CodeOffset MacroAssembler::callWithPatch() { // This needs to sync. Wasm goes through this one for intramodule calls. // // In other cases, wasm goes through masm.wasmCallImport(), // masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which // sync. // // This sync is believed to be necessary, although no case in jit-test/tests // has been observed to cause SP != PSP here. syncStackPtr(); bl(0, LabelDoc()); return CodeOffset(currentOffset()); } void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) { Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4)); MOZ_ASSERT(inst->IsBL()); ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4); ptrdiff_t relTarget00 = relTarget >> 2; MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0); MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00)); bl(inst, relTarget00); } CodeOffset MacroAssembler::farJumpWithPatch() { vixl::UseScratchRegisterScope temps(this); const ARMRegister scratch = temps.AcquireX(); const ARMRegister scratch2 = temps.AcquireX(); AutoForbidPoolsAndNops afp(this, /* max number of instructions in scope = */ 7); mozilla::DebugOnly before = currentOffset(); align(8); // At most one nop Label branch; adr(scratch2, &branch); ldr(scratch, vixl::MemOperand(scratch2, 4)); add(scratch2, scratch2, scratch); CodeOffset offs(currentOffset()); bind(&branch); br(scratch2); Emit(UINT32_MAX); Emit(UINT32_MAX); mozilla::DebugOnly after = currentOffset(); MOZ_ASSERT(after - before == 24 || after - before == 28); return offs; } void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) { Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4)); Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8)); int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset(); MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX); MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX); inst1->SetInstructionBits((uint32_t)distance); inst2->SetInstructionBits((uint32_t)(distance >> 32)); } CodeOffset MacroAssembler::nopPatchableToCall() { AutoForbidPoolsAndNops afp(this, /* max number of instructions in scope = */ 1); Nop(); return CodeOffset(currentOffset()); } void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) { uint8_t* inst = call - 4; Instruction* instr = reinterpret_cast(inst); MOZ_ASSERT(instr->IsBL() || instr->IsNOP()); bl(instr, (target - inst) >> 2); } void MacroAssembler::patchCallToNop(uint8_t* call) { uint8_t* inst = call - 4; Instruction* instr = reinterpret_cast(inst); MOZ_ASSERT(instr->IsBL() || instr->IsNOP()); nop(instr); } void MacroAssembler::pushReturnAddress() { MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid"); push(lr); } void MacroAssembler::popReturnAddress() { MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid"); pop(lr); } // =============================================================== // ABI function calls. void MacroAssembler::setupUnalignedABICall(Register scratch) { // Because wasm operates without the need for dynamic alignment of SP, it is // implied that this routine should never be called when generating wasm. MOZ_ASSERT(!IsCompilingWasm()); // The following won't work for SP -- needs slightly different logic. MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); setupNativeABICall(); dynamicAlignment_ = true; int64_t alignment = ~(int64_t(ABIStackAlignment) - 1); ARMRegister scratch64(scratch, 64); MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64)); // Always save LR -- Baseline ICs assume that LR isn't modified. push(lr); // Remember the stack address on entry. This is reloaded in callWithABIPost // below. Mov(scratch64, PseudoStackPointer64); // Make alignment, including the effective push of the previous sp. Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8)); And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment)); syncStackPtr(); // Store previous sp to the top of the stack, aligned. This is also // reloaded in callWithABIPost. Str(scratch64, MemOperand(PseudoStackPointer64, 0)); } void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) { // wasm operates without the need for dynamic alignment of SP. MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm)); MOZ_ASSERT(inCall_); uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar(); // ARM64 *really* wants SP to always be 16-aligned, so ensure this now. if (dynamicAlignment_) { stackForCall += ComputeByteAlignment(stackForCall, StackAlignment); } else { // This can happen when we attach out-of-line stubs for rare cases. For // example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line // chunk. uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0; stackForCall += ComputeByteAlignment( stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment); } *stackAdjust = stackForCall; reserveStack(*stackAdjust); { enoughMemory_ &= moveResolver_.resolve(); if (!enoughMemory_) { return; } MoveEmitter emitter(*this); emitter.emit(moveResolver_); emitter.finish(); } assertStackAlignment(ABIStackAlignment); } void MacroAssembler::callWithABIPost(uint32_t stackAdjust, ABIType result, bool callFromWasm) { // wasm operates without the need for dynamic alignment of SP. MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm)); // Call boundaries communicate stack via SP, so we must resync PSP now. initPseudoStackPtr(); freeStack(stackAdjust); if (dynamicAlignment_) { // This then-clause makes more sense if you first read // setupUnalignedABICall above. // // Restore the stack pointer from entry. The stack pointer will have been // saved by setupUnalignedABICall. This is fragile in that it assumes // that uses of this routine (callWithABIPost) with `dynamicAlignment_ == // true` are preceded by matching calls to setupUnalignedABICall. But // there's nothing that enforce that mechanically. If we really want to // enforce this, we could add a debug-only CallWithABIState enum to the // MacroAssembler and assert that setupUnalignedABICall updates it before // we get here, then reset it to its initial state. Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0)); syncStackPtr(); // Restore LR. This restores LR to the value stored by // setupUnalignedABICall, which should have been called just before // callWithABIPre. This is, per the above comment, also fragile. pop(lr); // SP may be < PSP now. That is expected from the behaviour of `pop`. It // is not clear why the following `syncStackPtr` is necessary, but it is: // without it, the following test segfaults: // tests/backup-point-bug1315634.js syncStackPtr(); } // If the ABI's return regs are where ION is expecting them, then // no other work needs to be done. #ifdef DEBUG MOZ_ASSERT(inCall_); inCall_ = false; #endif } void MacroAssembler::callWithABINoProfiler(Register fun, ABIType result) { vixl::UseScratchRegisterScope temps(this); const Register scratch = temps.AcquireX().asUnsized(); movePtr(fun, scratch); uint32_t stackAdjust; callWithABIPre(&stackAdjust); call(scratch); callWithABIPost(stackAdjust, result); } void MacroAssembler::callWithABINoProfiler(const Address& fun, ABIType result) { vixl::UseScratchRegisterScope temps(this); const Register scratch = temps.AcquireX().asUnsized(); loadPtr(fun, scratch); uint32_t stackAdjust; callWithABIPre(&stackAdjust); call(scratch); callWithABIPost(stackAdjust, result); } // =============================================================== // Jit Frames. uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) { enterNoPool(3); Label fakeCallsite; Adr(ARMRegister(scratch, 64), &fakeCallsite); Push(scratch); bind(&fakeCallsite); uint32_t pseudoReturnOffset = currentOffset(); leaveNoPool(); return pseudoReturnOffset; } bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) { asMasm().PushFrameDescriptor(FrameType::IonJS); asMasm().Push(ImmPtr(fakeReturnAddr)); asMasm().Push(FramePointer); return true; } // =============================================================== // Move instructions void MacroAssembler::moveValue(const TypedOrValueRegister& src, const ValueOperand& dest) { if (src.hasValue()) { moveValue(src.valueReg(), dest); return; } MIRType type = src.type(); AnyRegister reg = src.typedReg(); if (!IsFloatingPointType(type)) { boxNonDouble(ValueTypeFromMIRType(type), reg.gpr(), dest); return; } ScratchDoubleScope scratch(*this); FloatRegister freg = reg.fpu(); if (type == MIRType::Float32) { convertFloat32ToDouble(freg, scratch); freg = scratch; } boxDouble(freg, dest, scratch); } void MacroAssembler::moveValue(const ValueOperand& src, const ValueOperand& dest) { if (src == dest) { return; } movePtr(src.valueReg(), dest.valueReg()); } void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) { if (!src.isGCThing()) { movePtr(ImmWord(src.asRawBits()), dest.valueReg()); return; } BufferOffset load = movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg()); writeDataRelocation(src, load); } // =============================================================== // Branch functions void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) { And(ARMRegister(buffer, 64), ARMRegister(ptr, 64), Operand(int32_t(~gc::ChunkMask))); loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer); } void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr, Register temp, Label* label) { MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); MOZ_ASSERT(ptr != temp); MOZ_ASSERT(ptr != ScratchReg && ptr != ScratchReg2); // Both may be used internally. MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2); And(ARMRegister(temp, 64), ARMRegister(ptr, 64), Operand(int32_t(~gc::ChunkMask))); branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset), ImmWord(0), label); } void MacroAssembler::branchValueIsNurseryCell(Condition cond, const Address& address, Register temp, Label* label) { branchValueIsNurseryCellImpl(cond, address, temp, label); } void MacroAssembler::branchValueIsNurseryCell(Condition cond, ValueOperand value, Register temp, Label* label) { branchValueIsNurseryCellImpl(cond, value, temp, label); } template void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond, const T& value, Register temp, Label* label) { MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2); // Both may be used internally. Label done; branchTestGCThing(Assembler::NotEqual, value, cond == Assembler::Equal ? &done : label); getGCThingValueChunk(value, temp); branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset), ImmWord(0), label); bind(&done); } void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs, const Value& rhs, Label* label) { MOZ_ASSERT(cond == Equal || cond == NotEqual); vixl::UseScratchRegisterScope temps(this); const ARMRegister scratch64 = temps.AcquireX(); MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg()); moveValue(rhs, ValueOperand(scratch64.asUnsized())); Cmp(ARMRegister(lhs.valueReg(), 64), scratch64); B(label, cond); } // ======================================================================== // Memory access primitives. template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, MIRType valueType, const T& dest) { MOZ_ASSERT(valueType < MIRType::Value); if (valueType == MIRType::Double) { boxDouble(value.reg().typedReg().fpu(), dest); return; } if (value.constant()) { storeValue(value.value(), dest); } else { storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(), dest); } } template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, MIRType valueType, const Address& dest); template void MacroAssembler::storeUnboxedValue( const ConstantOrRegister& value, MIRType valueType, const BaseObjectElementIndex& dest); void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); } // ======================================================================== // wasm support FaultingCodeOffset MacroAssembler::wasmTrapInstruction() { AutoForbidPoolsAndNops afp(this, /* max number of instructions in scope = */ 1); FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); Unreachable(); return fco; } void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, Register boundsCheckLimit, Label* ok) { branch32(cond, index, boundsCheckLimit, ok); if (JitOptions.spectreIndexMasking) { csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond); } } void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, Address boundsCheckLimit, Label* ok) { branch32(cond, index, boundsCheckLimit, ok); if (JitOptions.spectreIndexMasking) { csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond); } } void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, Register64 boundsCheckLimit, Label* ok) { branchPtr(cond, index.reg, boundsCheckLimit.reg, ok); if (JitOptions.spectreIndexMasking) { csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64), cond); } } void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, Address boundsCheckLimit, Label* ok) { branchPtr(InvertCondition(cond), boundsCheckLimit, index.reg, ok); if (JitOptions.spectreIndexMasking) { csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64), cond); } } // FCVTZU behaves as follows: // // on NaN it produces zero // on too large it produces UINT_MAX (for appropriate type) // on too small it produces zero // // FCVTZS behaves as follows: // // on NaN it produces zero // on too large it produces INT_MAX (for appropriate type) // on too small it produces INT_MIN (ditto) void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_, Register output_, bool isSaturating, Label* oolEntry) { ARMRegister output(output_, 32); ARMFPRegister input(input_, 64); Fcvtzu(output, input); if (!isSaturating) { Cmp(output, 0); Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); B(oolEntry, Assembler::Equal); } } void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_, Register output_, bool isSaturating, Label* oolEntry) { ARMRegister output(output_, 32); ARMFPRegister input(input_, 32); Fcvtzu(output, input); if (!isSaturating) { Cmp(output, 0); Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); B(oolEntry, Assembler::Equal); } } void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_, Register output_, bool isSaturating, Label* oolEntry) { ARMRegister output(output_, 32); ARMFPRegister input(input_, 64); Fcvtzs(output, input); if (!isSaturating) { Cmp(output, 0); Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual); Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual); B(oolEntry, Assembler::Equal); } } void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_, Register output_, bool isSaturating, Label* oolEntry) { ARMRegister output(output_, 32); ARMFPRegister input(input_, 32); Fcvtzs(output, input); if (!isSaturating) { Cmp(output, 0); Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual); Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual); B(oolEntry, Assembler::Equal); } } void MacroAssembler::wasmTruncateDoubleToUInt64( FloatRegister input_, Register64 output_, bool isSaturating, Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { MOZ_ASSERT(tempDouble.isInvalid()); ARMRegister output(output_.reg, 64); ARMFPRegister input(input_, 64); Fcvtzu(output, input); if (!isSaturating) { Cmp(output, 0); Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); B(oolEntry, Assembler::Equal); bind(oolRejoin); } } void MacroAssembler::wasmTruncateFloat32ToUInt64( FloatRegister input_, Register64 output_, bool isSaturating, Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { MOZ_ASSERT(tempDouble.isInvalid()); ARMRegister output(output_.reg, 64); ARMFPRegister input(input_, 32); Fcvtzu(output, input); if (!isSaturating) { Cmp(output, 0); Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); B(oolEntry, Assembler::Equal); bind(oolRejoin); } } void MacroAssembler::wasmTruncateDoubleToInt64( FloatRegister input_, Register64 output_, bool isSaturating, Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { MOZ_ASSERT(tempDouble.isInvalid()); ARMRegister output(output_.reg, 64); ARMFPRegister input(input_, 64); Fcvtzs(output, input); if (!isSaturating) { Cmp(output, 0); Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual); Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual); B(oolEntry, Assembler::Equal); bind(oolRejoin); } } void MacroAssembler::wasmTruncateFloat32ToInt64( FloatRegister input_, Register64 output_, bool isSaturating, Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { ARMRegister output(output_.reg, 64); ARMFPRegister input(input_, 32); Fcvtzs(output, input); if (!isSaturating) { Cmp(output, 0); Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual); Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual); B(oolEntry, Assembler::Equal); bind(oolRejoin); } } void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input, Register output, TruncFlags flags, wasm::BytecodeOffset off, Label* rejoin) { Label notNaN; branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); wasmTrap(wasm::Trap::InvalidConversionToInteger, off); bind(¬NaN); Label isOverflow; const float two_31 = -float(INT32_MIN); ScratchFloat32Scope fpscratch(*this); if (flags & TRUNC_UNSIGNED) { loadConstantFloat32(two_31 * 2, fpscratch); branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &isOverflow); loadConstantFloat32(-1.0f, fpscratch); branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); } else { loadConstantFloat32(two_31, fpscratch); branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &isOverflow); loadConstantFloat32(-two_31, fpscratch); branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); } bind(&isOverflow); wasmTrap(wasm::Trap::IntegerOverflow, off); } void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input, Register output, TruncFlags flags, wasm::BytecodeOffset off, Label* rejoin) { Label notNaN; branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); wasmTrap(wasm::Trap::InvalidConversionToInteger, off); bind(¬NaN); Label isOverflow; const double two_31 = -double(INT32_MIN); ScratchDoubleScope fpscratch(*this); if (flags & TRUNC_UNSIGNED) { loadConstantDouble(two_31 * 2, fpscratch); branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &isOverflow); loadConstantDouble(-1.0, fpscratch); branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); } else { loadConstantDouble(two_31, fpscratch); branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &isOverflow); loadConstantDouble(-two_31 - 1, fpscratch); branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); } bind(&isOverflow); wasmTrap(wasm::Trap::IntegerOverflow, off); } void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input, Register64 output, TruncFlags flags, wasm::BytecodeOffset off, Label* rejoin) { Label notNaN; branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); wasmTrap(wasm::Trap::InvalidConversionToInteger, off); bind(¬NaN); Label isOverflow; const float two_63 = -float(INT64_MIN); ScratchFloat32Scope fpscratch(*this); if (flags & TRUNC_UNSIGNED) { loadConstantFloat32(two_63 * 2, fpscratch); branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &isOverflow); loadConstantFloat32(-1.0f, fpscratch); branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); } else { loadConstantFloat32(two_63, fpscratch); branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &isOverflow); loadConstantFloat32(-two_63, fpscratch); branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); } bind(&isOverflow); wasmTrap(wasm::Trap::IntegerOverflow, off); } void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input, Register64 output, TruncFlags flags, wasm::BytecodeOffset off, Label* rejoin) { Label notNaN; branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); wasmTrap(wasm::Trap::InvalidConversionToInteger, off); bind(¬NaN); Label isOverflow; const double two_63 = -double(INT64_MIN); ScratchDoubleScope fpscratch(*this); if (flags & TRUNC_UNSIGNED) { loadConstantDouble(two_63 * 2, fpscratch); branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &isOverflow); loadConstantDouble(-1.0, fpscratch); branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); } else { loadConstantDouble(two_63, fpscratch); branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, &isOverflow); loadConstantDouble(-two_63, fpscratch); branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); } bind(&isOverflow); wasmTrap(wasm::Trap::IntegerOverflow, off); } void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access, Register memoryBase, Register ptr, AnyRegister output) { wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid()); } void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, Register memoryBase, Register ptr, Register64 output) { wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output); } void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value, Register memoryBase, Register ptr) { wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr); } void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value, Register memoryBase, Register ptr) { wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr); } void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch, ExitFrameType type) { // Wasm stubs use the native SP, not the PSP. linkExitFrame(cxreg, scratch); MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64())); // SP has to be 16-byte aligned when we do a load/store, so push |type| twice // and then add 8 bytes to SP. This leaves SP unaligned. move32(Imm32(int32_t(type)), scratch); push(scratch, scratch); Add(sp, sp, 8); // Despite the above assertion, it is possible for control to flow from here // to the code generated by // MacroAssemblerCompat::handleFailureWithHandlerTail without any // intervening assignment to PSP. But handleFailureWithHandlerTail assumes // that PSP is the active stack pointer. Hence the following is necessary // for safety. Note we can't use initPseudoStackPtr here as that would // generate no instructions. Mov(PseudoStackPointer64, sp); } void MacroAssembler::widenInt32(Register r) { move32To64ZeroExtend(r, Register64(r)); } // ======================================================================== // Convert floating point. bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; } void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest, Register temp) { MOZ_ASSERT(temp == Register::Invalid()); Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); } void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) { Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); } void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest, Register temp) { MOZ_ASSERT(temp == Register::Invalid()); Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64)); } void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) { Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64)); } void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) { convertInt64ToDouble(Register64(src), dest); } // ======================================================================== // Primitive atomic operations. // The computed MemOperand must be Reg+0 because the load/store exclusive // instructions only take a single pointer register. static MemOperand ComputePointerForAtomic(MacroAssembler& masm, const Address& address, Register scratch) { if (address.offset == 0) { return MemOperand(X(masm, address.base), 0); } masm.Add(X(scratch), X(masm, address.base), address.offset); return MemOperand(X(scratch), 0); } static MemOperand ComputePointerForAtomic(MacroAssembler& masm, const BaseIndex& address, Register scratch) { masm.Add(X(scratch), X(masm, address.base), Operand(X(address.index), vixl::LSL, address.scale)); if (address.offset) { masm.Add(X(scratch), X(scratch), address.offset); } return MemOperand(X(scratch), 0); } // This sign extends to targetWidth and leaves any higher bits zero. static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType, Width targetWidth, Register src, Register dest) { bool signExtend = Scalar::isSignedIntType(srcType); switch (Scalar::byteSize(srcType)) { case 1: if (signExtend) { masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7); } else { masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7); } break; case 2: if (signExtend) { masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15); } else { masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15); } break; case 4: if (targetWidth == Width::_64) { if (signExtend) { masm.Sbfm(X(dest), X(src), 0, 31); } else { masm.Ubfm(X(dest), X(src), 0, 31); } } else if (src != dest) { masm.Mov(R(dest, targetWidth), R(src, targetWidth)); } break; case 8: if (src != dest) { masm.Mov(R(dest, targetWidth), R(src, targetWidth)); } break; default: MOZ_CRASH(); } } // Exclusive-loads zero-extend their values to the full width of the X register. // // Note, we've promised to leave the high bits of the 64-bit register clear if // the targetWidth is 32. static void LoadExclusive(MacroAssembler& masm, const wasm::MemoryAccessDesc* access, Scalar::Type srcType, Width targetWidth, MemOperand ptr, Register dest) { bool signExtend = Scalar::isSignedIntType(srcType); // With this address form, a single native ldxr* will be emitted, and the // AutoForbidPoolsAndNops ensures that the metadata is emitted at the // address of the ldxr*. Note that the use of AutoForbidPoolsAndNops is now // a "second class" solution; the right way to do this would be to have the // masm. calls produce an FaultingCodeOffset, and hand that value to // `masm.append`. MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0); switch (Scalar::byteSize(srcType)) { case 1: { { AutoForbidPoolsAndNops afp( &masm, /* max number of instructions in scope = */ 1); if (access) { masm.append(*access, wasm::TrapMachineInsn::Load8, FaultingCodeOffset(masm.currentOffset())); } masm.Ldxrb(W(dest), ptr); } if (signExtend) { masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7); } break; } case 2: { { AutoForbidPoolsAndNops afp( &masm, /* max number of instructions in scope = */ 1); if (access) { masm.append(*access, wasm::TrapMachineInsn::Load16, FaultingCodeOffset(masm.currentOffset())); } masm.Ldxrh(W(dest), ptr); } if (signExtend) { masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15); } break; } case 4: { { AutoForbidPoolsAndNops afp( &masm, /* max number of instructions in scope = */ 1); if (access) { masm.append(*access, wasm::TrapMachineInsn::Load32, FaultingCodeOffset(masm.currentOffset())); } masm.Ldxr(W(dest), ptr); } if (targetWidth == Width::_64 && signExtend) { masm.Sbfm(X(dest), X(dest), 0, 31); } break; } case 8: { { AutoForbidPoolsAndNops afp( &masm, /* max number of instructions in scope = */ 1); if (access) { masm.append(*access, wasm::TrapMachineInsn::Load64, FaultingCodeOffset(masm.currentOffset())); } masm.Ldxr(X(dest), ptr); } break; } default: { MOZ_CRASH(); } } } static void StoreExclusive(MacroAssembler& masm, Scalar::Type type, Register status, Register src, MemOperand ptr) { // Note, these are not decorated with a TrapSite only because they are // assumed to be preceded by a LoadExclusive to the same address, of the // same width, so that will always take the page fault if the address is bad. switch (Scalar::byteSize(type)) { case 1: masm.Stxrb(W(status), W(src), ptr); break; case 2: masm.Stxrh(W(status), W(src), ptr); break; case 4: masm.Stxr(W(status), W(src), ptr); break; case 8: masm.Stxr(W(status), X(src), ptr); break; } } static bool HasAtomicInstructions(MacroAssembler& masm) { return masm.asVIXL().GetCPUFeatures()->Has(vixl::CPUFeatures::kAtomics); } static inline bool SupportedAtomicInstructionOperands(Scalar::Type type, Width targetWidth) { if (targetWidth == Width::_32) { return byteSize(type) <= 4; } if (targetWidth == Width::_64) { return byteSize(type) == 8; } return false; } template static void CompareExchange(MacroAssembler& masm, const wasm::MemoryAccessDesc* access, Scalar::Type type, Width targetWidth, const Synchronization& sync, const T& mem, Register oldval, Register newval, Register output) { MOZ_ASSERT(oldval != output && newval != output); vixl::UseScratchRegisterScope temps(&masm); Register ptrScratch = temps.AcquireX().asUnsized(); MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); MOZ_ASSERT(ptr.base().asUnsized() != output); if (HasAtomicInstructions(masm) && SupportedAtomicInstructionOperands(type, targetWidth)) { masm.Mov(X(output), X(oldval)); // Capal is using same atomic mechanism as Ldxr/Stxr, and // consider it is the same for "Inner Shareable" domain. // Not updated gen_cmpxchg in GenerateAtomicOperations.py. masm.memoryBarrierBefore(sync); { AutoForbidPoolsAndNops afp(&masm, /* number of insns = */ 1); if (access) { masm.append(*access, wasm::TrapMachineInsn::Atomic, FaultingCodeOffset(masm.currentOffset())); } switch (byteSize(type)) { case 1: masm.Casalb(R(output, targetWidth), R(newval, targetWidth), ptr); break; case 2: masm.Casalh(R(output, targetWidth), R(newval, targetWidth), ptr); break; case 4: case 8: masm.Casal(R(output, targetWidth), R(newval, targetWidth), ptr); break; default: MOZ_CRASH("CompareExchange unsupported type"); } } masm.memoryBarrierAfter(sync); SignOrZeroExtend(masm, type, targetWidth, output, output); return; } // The target doesn't support atomics, so generate a LL-SC loop. This requires // only AArch64 v8.0. Label again; Label done; // NOTE: the generated code must match the assembly code in gen_cmpxchg in // GenerateAtomicOperations.py masm.memoryBarrierBefore(sync); Register scratch = temps.AcquireX().asUnsized(); masm.bind(&again); SignOrZeroExtend(masm, type, targetWidth, oldval, scratch); LoadExclusive(masm, access, type, targetWidth, ptr, output); masm.Cmp(R(output, targetWidth), R(scratch, targetWidth)); masm.B(&done, MacroAssembler::NotEqual); StoreExclusive(masm, type, scratch, newval, ptr); masm.Cbnz(W(scratch), &again); masm.bind(&done); masm.memoryBarrierAfter(sync); } template static void AtomicExchange(MacroAssembler& masm, const wasm::MemoryAccessDesc* access, Scalar::Type type, Width targetWidth, const Synchronization& sync, const T& mem, Register value, Register output) { MOZ_ASSERT(value != output); vixl::UseScratchRegisterScope temps(&masm); Register ptrScratch = temps.AcquireX().asUnsized(); MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); if (HasAtomicInstructions(masm) && SupportedAtomicInstructionOperands(type, targetWidth)) { // Swpal is using same atomic mechanism as Ldxr/Stxr, and // consider it is the same for "Inner Shareable" domain. // Not updated gen_exchange in GenerateAtomicOperations.py. masm.memoryBarrierBefore(sync); { AutoForbidPoolsAndNops afp(&masm, /* number of insns = */ 1); if (access) { masm.append(*access, wasm::TrapMachineInsn::Atomic, FaultingCodeOffset(masm.currentOffset())); } switch (byteSize(type)) { case 1: masm.Swpalb(R(value, targetWidth), R(output, targetWidth), ptr); break; case 2: masm.Swpalh(R(value, targetWidth), R(output, targetWidth), ptr); break; case 4: case 8: masm.Swpal(R(value, targetWidth), R(output, targetWidth), ptr); break; default: MOZ_CRASH("AtomicExchange unsupported type"); } } masm.memoryBarrierAfter(sync); SignOrZeroExtend(masm, type, targetWidth, output, output); return; } // The target doesn't support atomics, so generate a LL-SC loop. This requires // only AArch64 v8.0. Label again; // NOTE: the generated code must match the assembly code in gen_exchange in // GenerateAtomicOperations.py masm.memoryBarrierBefore(sync); Register scratch = temps.AcquireX().asUnsized(); masm.bind(&again); LoadExclusive(masm, access, type, targetWidth, ptr, output); StoreExclusive(masm, type, scratch, value, ptr); masm.Cbnz(W(scratch), &again); masm.memoryBarrierAfter(sync); } template static void AtomicFetchOp(MacroAssembler& masm, const wasm::MemoryAccessDesc* access, Scalar::Type type, Width targetWidth, const Synchronization& sync, AtomicOp op, const T& mem, Register value, Register temp, Register output) { MOZ_ASSERT(value != output); MOZ_ASSERT(value != temp); MOZ_ASSERT_IF(wantResult, output != temp); vixl::UseScratchRegisterScope temps(&masm); Register ptrScratch = temps.AcquireX().asUnsized(); MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); if (HasAtomicInstructions(masm) && SupportedAtomicInstructionOperands(type, targetWidth) && !isFloatingType(type)) { // LdXXXal/StXXXl is using same atomic mechanism as Ldxr/Stxr, and // consider it is the same for "Inner Shareable" domain. // Not updated gen_fetchop in GenerateAtomicOperations.py. masm.memoryBarrierBefore(sync); #define FETCH_OP_CASE(op, arg) \ { \ AutoForbidPoolsAndNops afp(&masm, /* num insns = */ 1); \ if (access) { \ masm.append(*access, wasm::TrapMachineInsn::Atomic, \ FaultingCodeOffset(masm.currentOffset())); \ } \ switch (byteSize(type)) { \ case 1: \ if (wantResult) { \ masm.Ld##op##alb(R(arg, targetWidth), R(output, targetWidth), ptr); \ } else { \ masm.St##op##lb(R(arg, targetWidth), ptr); \ } \ break; \ case 2: \ if (wantResult) { \ masm.Ld##op##alh(R(arg, targetWidth), R(output, targetWidth), ptr); \ } else { \ masm.St##op##lh(R(arg, targetWidth), ptr); \ } \ break; \ case 4: \ case 8: \ if (wantResult) { \ masm.Ld##op##al(R(arg, targetWidth), R(output, targetWidth), ptr); \ } else { \ masm.St##op##l(R(arg, targetWidth), ptr); \ } \ break; \ default: \ MOZ_CRASH("AtomicFetchOp unsupported type"); \ } \ } switch (op) { case AtomicFetchAddOp: FETCH_OP_CASE(add, value); break; case AtomicFetchSubOp: { Register scratch = temps.AcquireX().asUnsized(); masm.Neg(X(scratch), X(value)); FETCH_OP_CASE(add, scratch); break; } case AtomicFetchAndOp: { Register scratch = temps.AcquireX().asUnsized(); masm.Eor(X(scratch), X(value), Operand(~0)); FETCH_OP_CASE(clr, scratch); break; } case AtomicFetchOrOp: FETCH_OP_CASE(set, value); break; case AtomicFetchXorOp: FETCH_OP_CASE(eor, value); break; } masm.memoryBarrierAfter(sync); if (wantResult) { SignOrZeroExtend(masm, type, targetWidth, output, output); } return; } #undef FETCH_OP_CASE // The target doesn't support atomics, so generate a LL-SC loop. This requires // only AArch64 v8.0. Label again; // NOTE: the generated code must match the assembly code in gen_fetchop in // GenerateAtomicOperations.py masm.memoryBarrierBefore(sync); Register scratch = temps.AcquireX().asUnsized(); masm.bind(&again); LoadExclusive(masm, access, type, targetWidth, ptr, output); switch (op) { case AtomicFetchAddOp: masm.Add(X(temp), X(output), X(value)); break; case AtomicFetchSubOp: masm.Sub(X(temp), X(output), X(value)); break; case AtomicFetchAndOp: masm.And(X(temp), X(output), X(value)); break; case AtomicFetchOrOp: masm.Orr(X(temp), X(output), X(value)); break; case AtomicFetchXorOp: masm.Eor(X(temp), X(output), X(value)); break; } StoreExclusive(masm, type, scratch, temp, ptr); masm.Cbnz(W(scratch), &again); if (wantResult) { SignOrZeroExtend(masm, type, targetWidth, output, output); } masm.memoryBarrierAfter(sync); } void MacroAssembler::compareExchange(Scalar::Type type, const Synchronization& sync, const Address& mem, Register oldval, Register newval, Register output) { CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval, output); } void MacroAssembler::compareExchange(Scalar::Type type, const Synchronization& sync, const BaseIndex& mem, Register oldval, Register newval, Register output) { CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval, output); } void MacroAssembler::compareExchange64(const Synchronization& sync, const Address& mem, Register64 expect, Register64 replace, Register64 output) { CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, expect.reg, replace.reg, output.reg); } void MacroAssembler::compareExchange64(const Synchronization& sync, const BaseIndex& mem, Register64 expect, Register64 replace, Register64 output) { CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, expect.reg, replace.reg, output.reg); } void MacroAssembler::atomicExchange64(const Synchronization& sync, const Address& mem, Register64 value, Register64 output) { AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, value.reg, output.reg); } void MacroAssembler::atomicExchange64(const Synchronization& sync, const BaseIndex& mem, Register64 value, Register64 output) { AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, value.reg, output.reg); } void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op, Register64 value, const Address& mem, Register64 temp, Register64 output) { AtomicFetchOp(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, value.reg, temp.reg, output.reg); } void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op, Register64 value, const BaseIndex& mem, Register64 temp, Register64 output) { AtomicFetchOp(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, value.reg, temp.reg, output.reg); } void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op, Register64 value, const Address& mem, Register64 temp) { AtomicFetchOp(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, value.reg, temp.reg, temp.reg); } void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op, Register64 value, const BaseIndex& mem, Register64 temp) { AtomicFetchOp(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, value.reg, temp.reg, temp.reg); } void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, const Address& mem, Register oldval, Register newval, Register output) { CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, oldval, newval, output); } void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, const BaseIndex& mem, Register oldval, Register newval, Register output) { CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, oldval, newval, output); } void MacroAssembler::atomicExchange(Scalar::Type type, const Synchronization& sync, const Address& mem, Register value, Register output) { AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output); } void MacroAssembler::atomicExchange(Scalar::Type type, const Synchronization& sync, const BaseIndex& mem, Register value, Register output) { AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output); } void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, const Address& mem, Register value, Register output) { AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, value, output); } void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, const BaseIndex& mem, Register value, Register output) { AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, value, output); } void MacroAssembler::atomicFetchOp(Scalar::Type type, const Synchronization& sync, AtomicOp op, Register value, const Address& mem, Register temp, Register output) { AtomicFetchOp(*this, nullptr, type, Width::_32, sync, op, mem, value, temp, output); } void MacroAssembler::atomicFetchOp(Scalar::Type type, const Synchronization& sync, AtomicOp op, Register value, const BaseIndex& mem, Register temp, Register output) { AtomicFetchOp(*this, nullptr, type, Width::_32, sync, op, mem, value, temp, output); } void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Register value, const Address& mem, Register temp, Register output) { AtomicFetchOp(*this, &access, access.type(), Width::_32, access.sync(), op, mem, value, temp, output); } void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Register value, const BaseIndex& mem, Register temp, Register output) { AtomicFetchOp(*this, &access, access.type(), Width::_32, access.sync(), op, mem, value, temp, output); } void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Register value, const Address& mem, Register temp) { AtomicFetchOp(*this, &access, access.type(), Width::_32, access.sync(), op, mem, value, temp, temp); } void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, AtomicOp op, Register value, const BaseIndex& mem, Register temp) { AtomicFetchOp(*this, &access, access.type(), Width::_32, access.sync(), op, mem, value, temp, temp); } void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, const Address& mem, Register64 expect, Register64 replace, Register64 output) { CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, expect.reg, replace.reg, output.reg); } void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, const BaseIndex& mem, Register64 expect, Register64 replace, Register64 output) { CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, expect.reg, replace.reg, output.reg); } void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, const Address& mem, Register64 value, Register64 output) { AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, value.reg, output.reg); } void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, const BaseIndex& mem, Register64 value, Register64 output) { AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, value.reg, output.reg); } void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, AtomicOp op, Register64 value, const Address& mem, Register64 temp, Register64 output) { AtomicFetchOp(*this, &access, Scalar::Int64, Width::_64, access.sync(), op, mem, value.reg, temp.reg, output.reg); } void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, AtomicOp op, Register64 value, const BaseIndex& mem, Register64 temp, Register64 output) { AtomicFetchOp(*this, &access, Scalar::Int64, Width::_64, access.sync(), op, mem, value.reg, temp.reg, output.reg); } void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access, AtomicOp op, Register64 value, const BaseIndex& mem, Register64 temp) { AtomicFetchOp(*this, &access, Scalar::Int64, Width::_64, access.sync(), op, mem, value.reg, temp.reg, temp.reg); } // ======================================================================== // JS atomic operations. template static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, const Synchronization& sync, const T& mem, Register oldval, Register newval, Register temp, AnyRegister output) { if (arrayType == Scalar::Uint32) { masm.compareExchange(arrayType, sync, mem, oldval, newval, temp); masm.convertUInt32ToDouble(temp, output.fpu()); } else { masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr()); } } void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, const Synchronization& sync, const Address& mem, Register oldval, Register newval, Register temp, AnyRegister output) { CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); } void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, const Synchronization& sync, const BaseIndex& mem, Register oldval, Register newval, Register temp, AnyRegister output) { CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); } template static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, const Synchronization& sync, const T& mem, Register value, Register temp, AnyRegister output) { if (arrayType == Scalar::Uint32) { masm.atomicExchange(arrayType, sync, mem, value, temp); masm.convertUInt32ToDouble(temp, output.fpu()); } else { masm.atomicExchange(arrayType, sync, mem, value, output.gpr()); } } void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, const Synchronization& sync, const Address& mem, Register value, Register temp, AnyRegister output) { AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); } void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, const Synchronization& sync, const BaseIndex& mem, Register value, Register temp, AnyRegister output) { AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); } template static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Register value, const T& mem, Register temp1, Register temp2, AnyRegister output) { if (arrayType == Scalar::Uint32) { masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); masm.convertUInt32ToDouble(temp1, output.fpu()); } else { masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); } } void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Register value, const Address& mem, Register temp1, Register temp2, AnyRegister output) { AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); } void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Register value, const BaseIndex& mem, Register temp1, Register temp2, AnyRegister output) { AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); } void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Register value, const BaseIndex& mem, Register temp) { AtomicFetchOp(*this, nullptr, arrayType, Width::_32, sync, op, mem, value, temp, temp); } void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, const Synchronization& sync, AtomicOp op, Register value, const Address& mem, Register temp) { AtomicFetchOp(*this, nullptr, arrayType, Width::_32, sync, op, mem, value, temp, temp); } void MacroAssembler::flexibleQuotient32(Register rhs, Register srcDest, bool isUnsigned, const LiveRegisterSet&) { quotient32(rhs, srcDest, isUnsigned); } void MacroAssembler::flexibleRemainder32(Register rhs, Register srcDest, bool isUnsigned, const LiveRegisterSet&) { remainder32(rhs, srcDest, isUnsigned); } void MacroAssembler::flexibleDivMod32(Register rhs, Register srcDest, Register remOutput, bool isUnsigned, const LiveRegisterSet&) { vixl::UseScratchRegisterScope temps(this); ARMRegister scratch = temps.AcquireW(); ARMRegister src = temps.AcquireW(); // Preserve src for remainder computation Mov(src, ARMRegister(srcDest, 32)); if (isUnsigned) { Udiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32)); } else { Sdiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32)); } // Compute remainder Mul(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32)); Sub(ARMRegister(remOutput, 32), src, scratch); } CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) { AutoForbidPoolsAndNops afp(this, /* max number of instructions in scope = */ 1); CodeOffset offset(currentOffset()); adr(ARMRegister(dest, 64), 0, LabelDoc()); return offset; } void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc, CodeLocationLabel target) { ptrdiff_t off = target - loc; MOZ_RELEASE_ASSERT(vixl::IsInt21(off)); Instruction* cur = reinterpret_cast(loc.raw()); MOZ_ASSERT(cur->IsADR()); vixl::Register rd = vixl::Register::XRegFromCode(cur->Rd()); adr(cur, rd, off); } // ======================================================================== // Spectre Mitigations. void MacroAssembler::speculationBarrier() { // Conditional speculation barrier. csdb(); } void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest, Label* fail) { ARMFPRegister iFlt(src, 32); ARMRegister o64(dest, 64); ARMRegister o32(dest, 32); Label handleZero; Label fin; // Handle ±0 and NaN first. Fcmp(iFlt, 0.0); B(Assembler::Equal, &handleZero); // NaN is always a bail condition, just bail directly. B(Assembler::Overflow, fail); // Round towards negative infinity. Fcvtms(o64, iFlt); // Sign extend lower 32 bits to test if the result isn't an Int32. Cmp(o64, Operand(o64, vixl::SXTW)); B(NotEqual, fail); // Clear upper 32 bits. Uxtw(o64, o64); B(&fin); bind(&handleZero); // Move the top word of the float into the output reg, if it is non-zero, // then the original value was -0.0. Fmov(o32, iFlt); Cbnz(o32, fail); bind(&fin); } void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest, Label* fail) { ARMFPRegister iDbl(src, 64); ARMRegister o64(dest, 64); ARMRegister o32(dest, 32); Label handleZero; Label fin; // Handle ±0 and NaN first. Fcmp(iDbl, 0.0); B(Assembler::Equal, &handleZero); // NaN is always a bail condition, just bail directly. B(Assembler::Overflow, fail); // Round towards negative infinity. Fcvtms(o64, iDbl); // Sign extend lower 32 bits to test if the result isn't an Int32. Cmp(o64, Operand(o64, vixl::SXTW)); B(NotEqual, fail); // Clear upper 32 bits. Uxtw(o64, o64); B(&fin); bind(&handleZero); // Move the top word of the double into the output reg, if it is non-zero, // then the original value was -0.0. Fmov(o64, iDbl); Cbnz(o64, fail); bind(&fin); } void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest, Label* fail) { ARMFPRegister iFlt(src, 32); ARMRegister o64(dest, 64); ARMRegister o32(dest, 32); Label handleZero; Label fin; // Round towards positive infinity. Fcvtps(o64, iFlt); // Sign extend lower 32 bits to test if the result isn't an Int32. Cmp(o64, Operand(o64, vixl::SXTW)); B(NotEqual, fail); // We have to check for (-1, -0] and NaN when the result is zero. Cbz(o64, &handleZero); // Clear upper 32 bits. Uxtw(o64, o64); B(&fin); // Bail if the input is in (-1, -0] or NaN. bind(&handleZero); // Move the top word of the float into the output reg, if it is non-zero, // then the original value wasn't +0.0. Fmov(o32, iFlt); Cbnz(o32, fail); bind(&fin); } void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest, Label* fail) { ARMFPRegister iDbl(src, 64); ARMRegister o64(dest, 64); ARMRegister o32(dest, 32); Label handleZero; Label fin; // Round towards positive infinity. Fcvtps(o64, iDbl); // Sign extend lower 32 bits to test if the result isn't an Int32. Cmp(o64, Operand(o64, vixl::SXTW)); B(NotEqual, fail); // We have to check for (-1, -0] and NaN when the result is zero. Cbz(o64, &handleZero); // Clear upper 32 bits. Uxtw(o64, o64); B(&fin); // Bail if the input is in (-1, -0] or NaN. bind(&handleZero); // Move the top word of the double into the output reg, if it is non-zero, // then the original value wasn't +0.0. Fmov(o64, iDbl); Cbnz(o64, fail); bind(&fin); } void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest, Label* fail) { ARMFPRegister src32(src, 32); ARMRegister dest32(dest, 32); ARMRegister dest64(dest, 64); Label done, zeroCase; // Convert scalar to signed 64-bit fixed-point, rounding toward zero. // In the case of overflow, the output is saturated. // In the case of NaN and -0, the output is zero. Fcvtzs(dest64, src32); // If the output was zero, worry about special cases. Cbz(dest64, &zeroCase); // Sign extend lower 32 bits to test if the result isn't an Int32. Cmp(dest64, Operand(dest64, vixl::SXTW)); B(NotEqual, fail); // Clear upper 32 bits. Uxtw(dest64, dest64); // If the output was non-zero and wasn't saturated, just return it. B(&done); // Handle the case of a zero output: // 1. The input may have been NaN, requiring a failure. // 2. The input may have been in (-1,-0], requiring a failure. { bind(&zeroCase); // Combine test for negative and NaN values using a single bitwise // operation. // // | Decimal number | Bitwise representation | // |----------------|------------------------| // | -0 | 8000'0000 | // | +0 | 0000'0000 | // | +1 | 3f80'0000 | // | NaN (or +Inf) | 7fyx'xxxx, y >= 8 | // | -NaN (or -Inf) | ffyx'xxxx, y >= 8 | // // If any of two most significant bits is set, the number isn't in [0, 1). // (Recall that floating point numbers, except for NaN, are strictly ordered // when comparing their bitwise representation as signed integers.) Fmov(dest32, src32); Lsr(dest32, dest32, 30); Cbnz(dest32, fail); } bind(&done); } void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest, Label* fail) { ARMFPRegister src64(src, 64); ARMRegister dest64(dest, 64); ARMRegister dest32(dest, 32); Label done, zeroCase; // Convert scalar to signed 64-bit fixed-point, rounding toward zero. // In the case of overflow, the output is saturated. // In the case of NaN and -0, the output is zero. Fcvtzs(dest64, src64); // If the output was zero, worry about special cases. Cbz(dest64, &zeroCase); // Sign extend lower 32 bits to test if the result isn't an Int32. Cmp(dest64, Operand(dest64, vixl::SXTW)); B(NotEqual, fail); // Clear upper 32 bits. Uxtw(dest64, dest64); // If the output was non-zero and wasn't saturated, just return it. B(&done); // Handle the case of a zero output: // 1. The input may have been NaN, requiring a failure. // 2. The input may have been in (-1,-0], requiring a failure. { bind(&zeroCase); // Combine test for negative and NaN values using a single bitwise // operation. // // | Decimal number | Bitwise representation | // |----------------|------------------------| // | -0 | 8000'0000'0000'0000 | // | +0 | 0000'0000'0000'0000 | // | +1 | 3ff0'0000'0000'0000 | // | NaN (or +Inf) | 7ffx'xxxx'xxxx'xxxx | // | -NaN (or -Inf) | fffx'xxxx'xxxx'xxxx | // // If any of two most significant bits is set, the number isn't in [0, 1). // (Recall that floating point numbers, except for NaN, are strictly ordered // when comparing their bitwise representation as signed integers.) Fmov(dest64, src64); Lsr(dest64, dest64, 62); Cbnz(dest64, fail); } bind(&done); } void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest, FloatRegister temp, Label* fail) { ARMFPRegister src32(src, 32); ARMRegister dest32(dest, 32); ARMRegister dest64(dest, 64); Label negative, saturated, done; // Branch to a slow path if input < 0.0 due to complicated rounding rules. // Note that Fcmp with NaN unsets the negative flag. Fcmp(src32, 0.0); B(&negative, Assembler::Condition::lo); // Handle the simple case of a positive input, and also -0 and NaN. // Rounding proceeds with consideration of the fractional part of the input: // 1. If > 0.5, round to integer with higher absolute value (so, up). // 2. If < 0.5, round to integer with lower absolute value (so, down). // 3. If = 0.5, round to +Infinity (so, up). { // Convert to signed 64-bit integer, rounding halfway cases away from zero. // In the case of overflow, the output is saturated. // In the case of NaN and -0, the output is zero. Fcvtas(dest64, src32); // In the case of zero, the input may have been NaN or -0, which must bail. Cbnz(dest64, &saturated); // Combine test for -0 and NaN values using a single bitwise operation. // See truncFloat32ToInt32 for an explanation. Fmov(dest32, src32); Lsr(dest32, dest32, 30); Cbnz(dest32, fail); B(&done); } // Handle the complicated case of a negative input. // Rounding proceeds with consideration of the fractional part of the input: // 1. If > 0.5, round to integer with higher absolute value (so, down). // 2. If < 0.5, round to integer with lower absolute value (so, up). // 3. If = 0.5, round to +Infinity (so, up). bind(&negative); { // Inputs in [-0.5, 0) are rounded to -0. Fail. loadConstantFloat32(-0.5f, temp); branchFloat(Assembler::DoubleGreaterThanOrEqual, src, temp, fail); // Other negative inputs need the biggest double less than 0.5 added. loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); addFloat32(src, temp); // Round all values toward -Infinity. // In the case of overflow, the output is saturated. // NaN and -0 are already handled by the "positive number" path above. Fcvtms(dest64, temp); } bind(&saturated); // Sign extend lower 32 bits to test if the result isn't an Int32. Cmp(dest64, Operand(dest64, vixl::SXTW)); B(NotEqual, fail); // Clear upper 32 bits. Uxtw(dest64, dest64); bind(&done); } void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest, FloatRegister temp, Label* fail) { ARMFPRegister src64(src, 64); ARMRegister dest64(dest, 64); ARMRegister dest32(dest, 32); Label negative, saturated, done; // Branch to a slow path if input < 0.0 due to complicated rounding rules. // Note that Fcmp with NaN unsets the negative flag. Fcmp(src64, 0.0); B(&negative, Assembler::Condition::lo); // Handle the simple case of a positive input, and also -0 and NaN. // Rounding proceeds with consideration of the fractional part of the input: // 1. If > 0.5, round to integer with higher absolute value (so, up). // 2. If < 0.5, round to integer with lower absolute value (so, down). // 3. If = 0.5, round to +Infinity (so, up). { // Convert to signed 64-bit integer, rounding halfway cases away from zero. // In the case of overflow, the output is saturated. // In the case of NaN and -0, the output is zero. Fcvtas(dest64, src64); // In the case of zero, the input may have been NaN or -0, which must bail. Cbnz(dest64, &saturated); // Combine test for -0 and NaN values using a single bitwise operation. // See truncDoubleToInt32 for an explanation. Fmov(dest64, src64); Lsr(dest64, dest64, 62); Cbnz(dest64, fail); B(&done); } // Handle the complicated case of a negative input. // Rounding proceeds with consideration of the fractional part of the input: // 1. If > 0.5, round to integer with higher absolute value (so, down). // 2. If < 0.5, round to integer with lower absolute value (so, up). // 3. If = 0.5, round to +Infinity (so, up). bind(&negative); { // Inputs in [-0.5, 0) are rounded to -0. Fail. loadConstantDouble(-0.5, temp); branchDouble(Assembler::DoubleGreaterThanOrEqual, src, temp, fail); // Other negative inputs need the biggest double less than 0.5 added. loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); addDouble(src, temp); // Round all values toward -Infinity. // In the case of overflow, the output is saturated. // NaN and -0 are already handled by the "positive number" path above. Fcvtms(dest64, temp); } bind(&saturated); // Sign extend lower 32 bits to test if the result isn't an Int32. Cmp(dest64, Operand(dest64, vixl::SXTW)); B(NotEqual, fail); // Clear upper 32 bits. Uxtw(dest64, dest64); bind(&done); } void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src, FloatRegister dest) { switch (mode) { case RoundingMode::Up: frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); return; case RoundingMode::Down: frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); return; case RoundingMode::NearestTiesToEven: frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); return; case RoundingMode::TowardsZero: frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); return; } MOZ_CRASH("unexpected mode"); } void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src, FloatRegister dest) { switch (mode) { case RoundingMode::Up: frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); return; case RoundingMode::Down: frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); return; case RoundingMode::NearestTiesToEven: frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); return; case RoundingMode::TowardsZero: frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); return; } MOZ_CRASH("unexpected mode"); } void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, FloatRegister output) { ScratchDoubleScope scratch(*this); // Double with only the sign bit set loadConstantDouble(-0.0, scratch); if (lhs != output) { moveDouble(lhs, output); } bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); } void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs, FloatRegister output) { ScratchFloat32Scope scratch(*this); // Float with only the sign bit set loadConstantFloat32(-0.0f, scratch); if (lhs != output) { moveFloat32(lhs, output); } bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); } void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, Register pointer) { Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64), Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift)); } #ifdef ENABLE_WASM_TAIL_CALLS void MacroAssembler::wasmMarkSlowCall() { Mov(x28, x28); } const int32_t SlowCallMarker = 0xaa1c03fc; void MacroAssembler::wasmCheckSlowCallsite(Register ra, Label* notSlow, Register temp1, Register temp2) { MOZ_ASSERT(ra != temp2); Ldr(W(temp2), MemOperand(X(ra), 0)); Cmp(W(temp2), Operand(SlowCallMarker)); B(Assembler::NotEqual, notSlow); } #endif // ENABLE_WASM_TAIL_CALLS //}}} check_macroassembler_style } // namespace jit } // namespace js