diff options
Diffstat (limited to 'js/src/jit/arm64/MacroAssembler-arm64.cpp')
-rw-r--r-- | js/src/jit/arm64/MacroAssembler-arm64.cpp | 3140 |
1 files changed, 3140 insertions, 0 deletions
diff --git a/js/src/jit/arm64/MacroAssembler-arm64.cpp b/js/src/jit/arm64/MacroAssembler-arm64.cpp new file mode 100644 index 0000000000..a176a2974f --- /dev/null +++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp @@ -0,0 +1,3140 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/arm64/MacroAssembler-arm64.h" + +#include "mozilla/MathAlgorithms.h" +#include "mozilla/Maybe.h" + +#include "jsmath.h" + +#include "jit/arm64/MoveEmitter-arm64.h" +#include "jit/arm64/SharedICRegisters-arm64.h" +#include "jit/Bailouts.h" +#include "jit/BaselineFrame.h" +#include "jit/JitRuntime.h" +#include "jit/MacroAssembler.h" +#include "util/Memory.h" +#include "vm/BigIntType.h" +#include "vm/JitActivation.h" // js::jit::JitActivation +#include "vm/JSContext.h" +#include "vm/StringType.h" + +#include "jit/MacroAssembler-inl.h" + +namespace js { +namespace jit { + +enum class Width { _32 = 32, _64 = 64 }; + +static inline ARMRegister X(Register r) { return ARMRegister(r, 64); } + +static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) { + return masm.toARMRegister(r, 64); +} + +static inline ARMRegister W(Register r) { return ARMRegister(r, 32); } + +static inline ARMRegister R(Register r, Width w) { + return ARMRegister(r, unsigned(w)); +} + +void MacroAssemblerCompat::boxValue(JSValueType type, Register src, + Register dest) { +#ifdef DEBUG + if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) { + Label upper32BitsZeroed; + movePtr(ImmWord(UINT32_MAX), dest); + asMasm().branchPtr(Assembler::BelowOrEqual, src, dest, &upper32BitsZeroed); + breakpoint(); + bind(&upper32BitsZeroed); + } +#endif + Orr(ARMRegister(dest, 64), ARMRegister(src, 64), + Operand(ImmShiftedTag(type).value)); +} + +#ifdef ENABLE_WASM_SIMD +bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { + switch (op) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I8x16ShrS: + *mask = 7; + break; + case wasm::SimdOp::I16x8Shl: + case wasm::SimdOp::I16x8ShrU: + case wasm::SimdOp::I16x8ShrS: + *mask = 15; + break; + case wasm::SimdOp::I32x4Shl: + case wasm::SimdOp::I32x4ShrU: + case wasm::SimdOp::I32x4ShrS: + *mask = 31; + break; + case wasm::SimdOp::I64x2Shl: + case wasm::SimdOp::I64x2ShrU: + case wasm::SimdOp::I64x2ShrS: + *mask = 63; + break; + default: + MOZ_CRASH("Unexpected shift operation"); + } + return true; +} +#endif + +void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { + ARMRegister dest(output, 32); + Fcvtns(dest, ARMFPRegister(input, 64)); + + { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + + Mov(scratch32, Operand(0xff)); + Cmp(dest, scratch32); + Csel(dest, dest, scratch32, LessThan); + } + + Cmp(dest, Operand(0)); + Csel(dest, dest, wzr, GreaterThan); +} + +js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() { + return *static_cast<js::jit::MacroAssembler*>(this); +} + +const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const { + return *static_cast<const js::jit::MacroAssembler*>(this); +} + +vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() { + return *static_cast<vixl::MacroAssembler*>(this); +} + +const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const { + return *static_cast<const vixl::MacroAssembler*>(this); +} + +void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) { + BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest); + label->patchAt()->bind(bo.getOffset()); + label->setLinkMode(CodeLabel::MoveImmediate); +} + +BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) { + const size_t numInst = 1; // Inserting one load instruction. + const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes. + uint8_t* literalAddr = (uint8_t*)(&ptr.value); // TODO: Should be const. + + // Scratch space for generating the load instruction. + // + // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary + // index to the corresponding PoolEntry in the instruction itself. + // + // That index will be fixed up later when finishPool() + // walks over all marked loads and calls PatchConstantPoolLoad(). + uint32_t instructionScratch = 0; + + // Emit the instruction mask in the scratch space. + // The offset doesn't matter: it will be fixed up later. + vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64), + 0); + + // Add the entry to the pool, fix up the LDR imm19 offset, + // and add the completed instruction to the buffer. + return allocLiteralLoadEntry(numInst, numPoolEntries, + (uint8_t*)&instructionScratch, literalAddr); +} + +BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr, + Register dest) { + const size_t numInst = 1; // Inserting one load instruction. + const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes. + uint8_t* literalAddr = (uint8_t*)(&ptr.value); + + // Scratch space for generating the load instruction. + // + // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary + // index to the corresponding PoolEntry in the instruction itself. + // + // That index will be fixed up later when finishPool() + // walks over all marked loads and calls PatchConstantPoolLoad(). + uint32_t instructionScratch = 0; + + // Emit the instruction mask in the scratch space. + // The offset doesn't matter: it will be fixed up later. + vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64), + 0); + + // Add the entry to the pool, fix up the LDR imm19 offset, + // and add the completed instruction to the buffer. + return allocLiteralLoadEntry(numInst, numPoolEntries, + (uint8_t*)&instructionScratch, literalAddr); +} + +void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) { + loadPtr(src, dest); +} + +void MacroAssemblerCompat::handleFailureWithHandlerTail(Label* profilerExitTail, + Label* bailoutTail) { + // Fail rather than silently create wrong code. + MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); + + // Reserve space for exception information. + int64_t size = (sizeof(ResumeFromException) + 7) & ~7; + Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size)); + syncStackPtr(); + + MOZ_ASSERT(!x0.Is(PseudoStackPointer64)); + Mov(x0, PseudoStackPointer64); + + // Call the handler. + using Fn = void (*)(ResumeFromException * rfe); + asMasm().setupUnalignedABICall(r1); + asMasm().passABIArg(r0); + asMasm().callWithABI<Fn, HandleException>( + MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame); + + Label entryFrame; + Label catch_; + Label finally; + Label returnBaseline; + Label returnIon; + Label bailout; + Label wasm; + Label wasmCatch; + + // Check the `asMasm` calls above didn't mess with the StackPointer identity. + MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); + + loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfKind()), r0); + asMasm().branch32(Assembler::Equal, r0, + Imm32(ExceptionResumeKind::EntryFrame), &entryFrame); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch), + &catch_); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally), + &finally); + asMasm().branch32(Assembler::Equal, r0, + Imm32(ExceptionResumeKind::ForcedReturnBaseline), + &returnBaseline); + asMasm().branch32(Assembler::Equal, r0, + Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout), + &bailout); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Wasm), + &wasm); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch), + &wasmCatch); + + breakpoint(); // Invalid kind. + + // No exception handler. Load the error value, restore state and return from + // the entry frame. + bind(&entryFrame); + moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), + FramePointer); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), + PseudoStackPointer); + + // `retn` does indeed sync the stack pointer, but before doing that it reads + // from the stack. Consequently, if we remove this call to syncStackPointer + // then we take on the requirement to prove that the immediately preceding + // loadPtr produces a value for PSP which maintains the SP <= PSP invariant. + // That's a proof burden we don't want to take on. In general it would be + // good to move (at some time in the future, not now) to a world where + // *every* assignment to PSP or SP is followed immediately by a copy into + // the other register. That would make all required correctness proofs + // trivial in the sense that it requires only local inspection of code + // immediately following (dominated by) any such assignment. + syncStackPtr(); + retn(Imm32(1 * sizeof(void*))); // Pop from stack and return. + + // If we found a catch handler, this must be a baseline frame. Restore state + // and jump to the catch block. + bind(&catch_); + loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()), + r0); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), + FramePointer); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), + PseudoStackPointer); + syncStackPtr(); + Br(x0); + + // If we found a finally block, this must be a baseline frame. Push two + // values expected by the finally block: the exception and BooleanValue(true). + bind(&finally); + ARMRegister exception = x1; + Ldr(exception, MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfException())); + Ldr(x0, + MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfTarget())); + Ldr(ARMRegister(FramePointer, 64), + MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfFramePointer())); + Ldr(PseudoStackPointer64, + MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfStackPointer())); + syncStackPtr(); + push(exception); + pushValue(BooleanValue(true)); + Br(x0); + + // Return BaselineFrame->returnValue() to the caller. + // Used in debug mode and for GeneratorReturn. + Label profilingInstrumentation; + bind(&returnBaseline); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), + FramePointer); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), + PseudoStackPointer); + // See comment further up beginning "`retn` does indeed sync the stack + // pointer". That comment applies here too. + syncStackPtr(); + loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()), + JSReturnOperand); + jump(&profilingInstrumentation); + + // Return the given value to the caller. + bind(&returnIon); + loadValue( + Address(PseudoStackPointer, ResumeFromException::offsetOfException()), + JSReturnOperand); + loadPtr( + Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)), + FramePointer); + loadPtr( + Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)), + PseudoStackPointer); + syncStackPtr(); + + // If profiling is enabled, then update the lastProfilingFrame to refer to + // caller frame before returning. This code is shared by ForcedReturnIon + // and ForcedReturnBaseline. + bind(&profilingInstrumentation); + { + Label skipProfilingInstrumentation; + AbsoluteAddress addressOfEnabled( + asMasm().runtime()->geckoProfiler().addressOfEnabled()); + asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0), + &skipProfilingInstrumentation); + jump(profilerExitTail); + bind(&skipProfilingInstrumentation); + } + + movePtr(FramePointer, PseudoStackPointer); + syncStackPtr(); + vixl::MacroAssembler::Pop(ARMRegister(FramePointer, 64)); + + vixl::MacroAssembler::Pop(vixl::lr); + syncStackPtr(); + vixl::MacroAssembler::Ret(vixl::lr); + + // If we are bailing out to baseline to handle an exception, jump to the + // bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success. + bind(&bailout); + Ldr(x2, MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfBailoutInfo())); + Ldr(PseudoStackPointer64, + MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfStackPointer())); + syncStackPtr(); + Mov(x0, 1); + jump(bailoutTail); + + // If we are throwing and the innermost frame was a wasm frame, reset SP and + // FP; SP is pointing to the unwound return address to the wasm entry, so + // we can just ret(). + bind(&wasm); + Ldr(x29, MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfFramePointer())); + Ldr(PseudoStackPointer64, + MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfStackPointer())); + syncStackPtr(); + ret(); + + // Found a wasm catch handler, restore state and jump to it. + bind(&wasmCatch); + loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()), + r0); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), + r29); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), + PseudoStackPointer); + syncStackPtr(); + Br(x0); + + MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); +} + +void MacroAssemblerCompat::profilerEnterFrame(Register framePtr, + Register scratch) { + asMasm().loadJSContext(scratch); + loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch); + storePtr(framePtr, + Address(scratch, JitActivation::offsetOfLastProfilingFrame())); + storePtr(ImmPtr(nullptr), + Address(scratch, JitActivation::offsetOfLastProfilingCallSite())); +} + +void MacroAssemblerCompat::profilerExitFrame() { + jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail()); +} + +Assembler::Condition MacroAssemblerCompat::testStringTruthy( + bool truthy, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + const ARMRegister scratch32(scratch, 32); + const ARMRegister scratch64(scratch, 64); + + MOZ_ASSERT(value.valueReg() != scratch); + + unboxString(value, scratch); + Ldr(scratch32, MemOperand(scratch64, JSString::offsetOfLength())); + Cmp(scratch32, Operand(0)); + return truthy ? Condition::NonZero : Condition::Zero; +} + +Assembler::Condition MacroAssemblerCompat::testBigIntTruthy( + bool truthy, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + + MOZ_ASSERT(value.valueReg() != scratch); + + unboxBigInt(value, scratch); + load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch); + cmp32(scratch, Imm32(0)); + return truthy ? Condition::NonZero : Condition::Zero; +} + +void MacroAssemblerCompat::breakpoint() { + // Note, other payloads are possible, but GDB is known to misinterpret them + // sometimes and iloop on the breakpoint instead of stopping properly. + Brk(0); +} + +// Either `any` is valid or `sixtyfour` is valid. Return a 32-bit ARMRegister +// in the first case and an ARMRegister of the desired size in the latter case. + +static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour, + unsigned size = 64) { + MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid())); + + if (sixtyfour == Register64::Invalid()) { + return ARMRegister(any.gpr(), 32); + } + + return ARMRegister(sixtyfour.reg, size); +} + +// Assert that `sixtyfour` is invalid and then return an FP register from `any` +// of the desired size. + +static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour, + unsigned size) { + MOZ_ASSERT(sixtyfour == Register64::Invalid()); + return ARMFPRegister(any.fpu(), size); +} + +void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access, + Register memoryBase_, Register ptr_, + AnyRegister outany, Register64 out64) { + uint32_t offset = access.offset(); + MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit()); + + ARMRegister memoryBase(memoryBase_, 64); + ARMRegister ptr(ptr_, 64); + if (offset) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireX(); + Add(scratch, ptr, Operand(offset)); + MemOperand srcAddr(memoryBase, scratch); + wasmLoadImpl(access, srcAddr, outany, out64); + } else { + MemOperand srcAddr(memoryBase, ptr); + wasmLoadImpl(access, srcAddr, outany, out64); + } +} + +void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access, + MemOperand srcAddr, AnyRegister outany, + Register64 out64) { + // Reg+Reg and Reg+SmallImm addressing is directly encodable in one Load + // instruction, hence we expect exactly one instruction to be emitted in the + // window. + int32_t instructionsExpected = 1; + + // Splat and widen however require an additional instruction to be emitted + // after the load, so allow one more instruction in the window. + if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) { + MOZ_ASSERT(access.type() == Scalar::Float64); + instructionsExpected++; + } + + // NOTE: the generated code must match the assembly code in gen_load in + // GenerateAtomicOperations.py + asMasm().memoryBarrierBefore(access.sync()); + + { + // The AutoForbidPoolsAndNops asserts if we emit more than the expected + // number of instructions and thus ensures that the access metadata is + // emitted at the address of the Load. + AutoForbidPoolsAndNops afp(this, instructionsExpected); + + append(access, asMasm().currentOffset()); + switch (access.type()) { + case Scalar::Int8: + Ldrsb(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Uint8: + Ldrb(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Int16: + Ldrsh(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Uint16: + Ldrh(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Int32: + if (out64 != Register64::Invalid()) { + Ldrsw(SelectGPReg(outany, out64), srcAddr); + } else { + Ldr(SelectGPReg(outany, out64, 32), srcAddr); + } + break; + case Scalar::Uint32: + Ldr(SelectGPReg(outany, out64, 32), srcAddr); + break; + case Scalar::Int64: + Ldr(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Float32: + // LDR does the right thing also for access.isZeroExtendSimd128Load() + Ldr(SelectFPReg(outany, out64, 32), srcAddr); + break; + case Scalar::Float64: + if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister scratch = Simd1D(scratch_); + Ldr(scratch, srcAddr); + if (access.isSplatSimd128Load()) { + Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0); + } else { + MOZ_ASSERT(access.isWidenSimd128Load()); + switch (access.widenSimdOp()) { + case wasm::SimdOp::V128Load8x8S: + Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0); + break; + case wasm::SimdOp::V128Load8x8U: + Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0); + break; + case wasm::SimdOp::V128Load16x4S: + Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0); + break; + case wasm::SimdOp::V128Load16x4U: + Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0); + break; + case wasm::SimdOp::V128Load32x2S: + Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0); + break; + case wasm::SimdOp::V128Load32x2U: + Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0); + break; + default: + MOZ_CRASH("Unexpected widening op for wasmLoad"); + } + } + } else { + // LDR does the right thing also for access.isZeroExtendSimd128Load() + Ldr(SelectFPReg(outany, out64, 64), srcAddr); + } + break; + case Scalar::Simd128: + Ldr(SelectFPReg(outany, out64, 128), srcAddr); + break; + case Scalar::Uint8Clamped: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + } + } + + asMasm().memoryBarrierAfter(access.sync()); +} + +// Return true if `address` can be represented as an immediate (possibly scaled +// by the access size) in an LDR/STR type instruction. +// +// For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro(). +static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) { + // The predicates below operate on signed values only. + if (address > INT64_MAX) { + return false; + } + + // The access size is always a power of 2, so computing the log amounts to + // counting trailing zeroes. + unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize); + return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) || + MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize)); +} + +void MacroAssemblerCompat::wasmLoadAbsolute( + const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address, + AnyRegister output, Register64 out64) { + if (!IsLSImmediateOffset(address, access.byteSize())) { + // The access will require the constant to be loaded into a temp register. + // Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting + // trap information. + // + // Almost all constant addresses will in practice be handled by a single MOV + // so do not worry about additional optimizations here. + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireX(); + Mov(scratch, address); + MemOperand srcAddr(X(memoryBase), scratch); + wasmLoadImpl(access, srcAddr, output, out64); + } else { + MemOperand srcAddr(X(memoryBase), address); + wasmLoadImpl(access, srcAddr, output, out64); + } +} + +void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access, + AnyRegister valany, Register64 val64, + Register memoryBase_, Register ptr_) { + uint32_t offset = access.offset(); + MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit()); + + ARMRegister memoryBase(memoryBase_, 64); + ARMRegister ptr(ptr_, 64); + if (offset) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireX(); + Add(scratch, ptr, Operand(offset)); + MemOperand destAddr(memoryBase, scratch); + wasmStoreImpl(access, destAddr, valany, val64); + } else { + MemOperand destAddr(memoryBase, ptr); + wasmStoreImpl(access, destAddr, valany, val64); + } +} + +void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access, + MemOperand dstAddr, AnyRegister valany, + Register64 val64) { + // NOTE: the generated code must match the assembly code in gen_store in + // GenerateAtomicOperations.py + asMasm().memoryBarrierBefore(access.sync()); + + { + // Reg+Reg addressing is directly encodable in one Store instruction, hence + // the AutoForbidPoolsAndNops will ensure that the access metadata is + // emitted at the address of the Store. The AutoForbidPoolsAndNops will + // assert if we emit more than one instruction. + + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 1); + + append(access, asMasm().currentOffset()); + switch (access.type()) { + case Scalar::Int8: + case Scalar::Uint8: + Strb(SelectGPReg(valany, val64), dstAddr); + break; + case Scalar::Int16: + case Scalar::Uint16: + Strh(SelectGPReg(valany, val64), dstAddr); + break; + case Scalar::Int32: + case Scalar::Uint32: + Str(SelectGPReg(valany, val64), dstAddr); + break; + case Scalar::Int64: + Str(SelectGPReg(valany, val64), dstAddr); + break; + case Scalar::Float32: + Str(SelectFPReg(valany, val64, 32), dstAddr); + break; + case Scalar::Float64: + Str(SelectFPReg(valany, val64, 64), dstAddr); + break; + case Scalar::Simd128: + Str(SelectFPReg(valany, val64, 128), dstAddr); + break; + case Scalar::Uint8Clamped: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + } + } + + asMasm().memoryBarrierAfter(access.sync()); +} + +void MacroAssemblerCompat::wasmStoreAbsolute( + const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64, + Register memoryBase, uint64_t address) { + // See comments in wasmLoadAbsolute. + unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize()); + if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) || + IsImmLSUnscaled(int64_t(address)))) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireX(); + Mov(scratch, address); + MemOperand destAddr(X(memoryBase), scratch); + wasmStoreImpl(access, destAddr, value, value64); + } else { + MemOperand destAddr(X(memoryBase), address); + wasmStoreImpl(access, destAddr, value, value64); + } +} + +void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond, + ARMFPRegister dest, + ARMFPRegister lhs, + ARMFPRegister rhs) { + switch (cond) { + case Assembler::Equal: + Cmeq(dest, lhs, rhs); + break; + case Assembler::NotEqual: + Cmeq(dest, lhs, rhs); + Mvn(dest, dest); + break; + case Assembler::GreaterThan: + Cmgt(dest, lhs, rhs); + break; + case Assembler::GreaterThanOrEqual: + Cmge(dest, lhs, rhs); + break; + case Assembler::LessThan: + Cmgt(dest, rhs, lhs); + break; + case Assembler::LessThanOrEqual: + Cmge(dest, rhs, lhs); + break; + case Assembler::Above: + Cmhi(dest, lhs, rhs); + break; + case Assembler::AboveOrEqual: + Cmhs(dest, lhs, rhs); + break; + case Assembler::Below: + Cmhi(dest, rhs, lhs); + break; + case Assembler::BelowOrEqual: + Cmhs(dest, rhs, lhs); + break; + default: + MOZ_CRASH("Unexpected SIMD integer condition"); + } +} + +void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond, + ARMFPRegister dest, + ARMFPRegister lhs, + ARMFPRegister rhs) { + switch (cond) { + case Assembler::Equal: + Fcmeq(dest, lhs, rhs); + break; + case Assembler::NotEqual: + Fcmeq(dest, lhs, rhs); + Mvn(dest, dest); + break; + case Assembler::GreaterThan: + Fcmgt(dest, lhs, rhs); + break; + case Assembler::GreaterThanOrEqual: + Fcmge(dest, lhs, rhs); + break; + case Assembler::LessThan: + Fcmgt(dest, rhs, lhs); + break; + case Assembler::LessThanOrEqual: + Fcmge(dest, rhs, lhs); + break; + default: + MOZ_CRASH("Unexpected SIMD integer condition"); + } +} + +void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs, + FloatRegister dest, + bool isUnsigned) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister shift = Simd16B(scratch_); + + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); + + if (isUnsigned) { + Ushl(Simd16B(dest), Simd16B(lhs), shift); + } else { + Sshl(Simd16B(dest), Simd16B(lhs), shift); + } +} + +void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs, + FloatRegister dest, + bool isUnsigned) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister shift = Simd8H(scratch_); + + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); + + if (isUnsigned) { + Ushl(Simd8H(dest), Simd8H(lhs), shift); + } else { + Sshl(Simd8H(dest), Simd8H(lhs), shift); + } +} + +void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs, + FloatRegister dest, + bool isUnsigned) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister shift = Simd4S(scratch_); + + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); + + if (isUnsigned) { + Ushl(Simd4S(dest), Simd4S(lhs), shift); + } else { + Sshl(Simd4S(dest), Simd4S(lhs), shift); + } +} + +void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs, + FloatRegister dest, + bool isUnsigned) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister shift = Simd2D(scratch_); + + Dup(shift, ARMRegister(rhs, 64)); + Neg(shift, shift); + + if (isUnsigned) { + Ushl(Simd2D(dest), Simd2D(lhs), shift); + } else { + Sshl(Simd2D(dest), Simd2D(lhs), shift); + } +} + +void MacroAssembler::reserveStack(uint32_t amount) { + // TODO: This bumps |sp| every time we reserve using a second register. + // It would save some instructions if we had a fixed frame size. + vixl::MacroAssembler::Claim(Operand(amount)); + adjustFrame(amount); +} + +void MacroAssembler::Push(RegisterOrSP reg) { + if (IsHiddenSP(reg)) { + push(sp); + } else { + push(AsRegister(reg)); + } + adjustFrame(sizeof(intptr_t)); +} + +//{{{ check_macroassembler_style +// =============================================================== +// MacroAssembler high-level usage. + +void MacroAssembler::flush() { Assembler::flush(); } + +// =============================================================== +// Stack manipulation functions. + +// Routines for saving/restoring registers on the stack. The format is: +// +// (highest address) +// +// integer (X) regs in any order size: 8 * # int regs +// +// if # int regs is odd, +// then an 8 byte alignment hole size: 0 or 8 +// +// double (D) regs in any order size: 8 * # double regs +// +// if # double regs is odd, +// then an 8 byte alignment hole size: 0 or 8 +// +// vector (Q) regs in any order size: 16 * # vector regs +// +// (lowest address) +// +// Hence the size of the save area is 0 % 16. And, provided that the base +// (highest) address is 16-aligned, then the vector reg save/restore accesses +// will also be 16-aligned, as will pairwise operations for the double regs. +// +// Implied by this is that the format of the double and vector dump area +// corresponds with what FloatRegister::GetPushSizeInBytes computes. +// See block comment in MacroAssembler.h for more details. + +size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) { + size_t numIntRegs = set.gprs().size(); + return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) + + FloatRegister::GetPushSizeInBytes(set.fpus()); +} + +// Generate code to dump the values in `set`, either on the stack if `dest` is +// `Nothing` or working backwards from the address denoted by `dest` if it is +// `Some`. These two cases are combined so as to minimise the chance of +// mistakenly generating different formats for the same `set`, given that the +// `Some` `dest` case is used extremely rarely. +static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set, + mozilla::Maybe<Address> dest) { + static_assert(sizeof(FloatRegisters::RegisterContent) == 16); + + // If we're saving to arbitrary memory, check the destination is big enough. + if (dest) { + mozilla::DebugOnly<size_t> bytesRequired = + masm->PushRegsInMaskSizeInBytes(set); + MOZ_ASSERT(dest->offset >= 0); + MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired); + } + + // Note the high limit point; we'll check it again later. + mozilla::DebugOnly<size_t> maxExtentInitial = + dest ? dest->offset : masm->framePushed(); + + // Gather up the integer registers in groups of four, and either push each + // group as a single transfer so as to minimise the number of stack pointer + // changes, or write them individually to memory. Take care to ensure the + // space used remains 16-aligned. + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) { + vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg, + vixl::NoCPUReg}; + size_t i; + for (i = 0; i < 4 && iter.more(); i++) { + src[i] = ARMRegister(*iter, 64); + ++iter; + } + MOZ_ASSERT(i > 0); + + if (i == 1 || i == 3) { + // Ensure the stack remains 16-aligned + MOZ_ASSERT(!iter.more()); + src[i] = vixl::xzr; + i++; + } + MOZ_ASSERT(i == 2 || i == 4); + + if (dest) { + for (size_t j = 0; j < i; j++) { + Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr + : src[j].code()); + dest->offset -= sizeof(intptr_t); + masm->storePtr(ireg, *dest); + } + } else { + masm->adjustFrame(i * 8); + masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); + } + } + + // Now the same for the FP double registers. Note that because of how + // ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either + // be present as a double register, or as a V128 register, but not both. + // Firstly, round up the registers to be pushed. + + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + vixl::CPURegister allSrcs[FloatRegisters::TotalPhys]; + size_t numAllSrcs = 0; + + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + if (reg.isDouble()) { + MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys); + allSrcs[numAllSrcs] = ARMFPRegister(reg, 64); + numAllSrcs++; + } else { + MOZ_ASSERT(reg.isSimd128()); + } + } + MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); + + if ((numAllSrcs & 1) == 1) { + // We've got an odd number of doubles. In order to maintain 16-alignment, + // push the last register twice. We'll skip over the duplicate in + // PopRegsInMaskIgnore. + allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1]; + numAllSrcs++; + } + MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); + MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0); + + // And now generate the transfers. + size_t i; + if (dest) { + for (i = 0; i < numAllSrcs; i++) { + FloatRegister freg = + FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()), + FloatRegisters::Kind::Double); + dest->offset -= sizeof(double); + masm->storeDouble(freg, *dest); + } + } else { + i = 0; + while (i < numAllSrcs) { + vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, + vixl::NoCPUReg, vixl::NoCPUReg}; + size_t j; + for (j = 0; j < 4 && j + i < numAllSrcs; j++) { + src[j] = allSrcs[j + i]; + } + masm->adjustFrame(8 * j); + masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); + i += j; + } + } + MOZ_ASSERT(i == numAllSrcs); + + // Finally, deal with the SIMD (V128) registers. This is a bit simpler + // as there's no need for special-casing to maintain 16-alignment. + + numAllSrcs = 0; + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + if (reg.isSimd128()) { + MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys); + allSrcs[numAllSrcs] = ARMFPRegister(reg, 128); + numAllSrcs++; + } + } + MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); + + // Generate the transfers. + if (dest) { + for (i = 0; i < numAllSrcs; i++) { + FloatRegister freg = + FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()), + FloatRegisters::Kind::Simd128); + dest->offset -= FloatRegister::SizeOfSimd128; + masm->storeUnalignedSimd128(freg, *dest); + } + } else { + i = 0; + while (i < numAllSrcs) { + vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, + vixl::NoCPUReg, vixl::NoCPUReg}; + size_t j; + for (j = 0; j < 4 && j + i < numAllSrcs; j++) { + src[j] = allSrcs[j + i]; + } + masm->adjustFrame(16 * j); + masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); + i += j; + } + } + MOZ_ASSERT(i == numAllSrcs); + + // Final overrun check. + if (dest) { + MOZ_ASSERT(maxExtentInitial - dest->offset == + masm->PushRegsInMaskSizeInBytes(set)); + } else { + MOZ_ASSERT(masm->framePushed() - maxExtentInitial == + masm->PushRegsInMaskSizeInBytes(set)); + } +} + +void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { + PushOrStoreRegsInMask(this, set, mozilla::Nothing()); +} + +void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, + Register scratch) { + PushOrStoreRegsInMask(this, set, mozilla::Some(dest)); +} + +// This is a helper function for PopRegsInMaskIgnore below. It emits the +// loads described by dests[0] and [1] and offsets[0] and [1], generating a +// load-pair if it can. +static void GeneratePendingLoadsThenFlush(MacroAssembler* masm, + vixl::CPURegister* dests, + uint32_t* offsets, + uint32_t transactionSize) { + // Generate the loads .. + if (!dests[0].IsNone()) { + if (!dests[1].IsNone()) { + // [0] and [1] both present. + if (offsets[0] + transactionSize == offsets[1]) { + masm->Ldp(dests[0], dests[1], + MemOperand(masm->GetStackPointer64(), offsets[0])); + } else { + // Theoretically we could check for a load-pair with the destinations + // switched, but our callers will never generate that. Hence there's + // no loss in giving up at this point and generating two loads. + masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0])); + masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1])); + } + } else { + // [0] only. + masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0])); + } + } else { + if (!dests[1].IsNone()) { + // [1] only. Can't happen because callers always fill [0] before [1]. + MOZ_CRASH("GenerateLoadsThenFlush"); + } else { + // Neither entry valid. This can happen. + } + } + + // .. and flush. + dests[0] = dests[1] = vixl::NoCPUReg; + offsets[0] = offsets[1] = 0; +} + +void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, + LiveRegisterSet ignore) { + mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); + + // The offset of the data from the stack pointer. + uint32_t offset = 0; + + // The set of FP/SIMD registers we need to restore. + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + + // The set of registers to ignore. BroadcastToAllSizes() is used to avoid + // any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore` + // containing d17. + FloatRegisterSet ignoreFpusBroadcasted( + FloatRegister::BroadcastToAllSizes(ignore.fpus())); + + // First recover the SIMD (V128) registers. This is straightforward in that + // we don't need to think about alignment holes. + + // These three form a two-entry queue that holds loads that we know we + // need, but which we haven't yet emitted. + vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg}; + uint32_t pendingOffsets[2] = {0, 0}; + size_t nPending = 0; + + for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + if (reg.isDouble()) { + continue; + } + MOZ_RELEASE_ASSERT(reg.isSimd128()); + + uint32_t offsetForReg = offset; + offset += FloatRegister::SizeOfSimd128; + + if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) { + continue; + } + + MOZ_ASSERT(nPending <= 2); + if (nPending == 2) { + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16); + nPending = 0; + } + pendingDests[nPending] = ARMFPRegister(reg, 128); + pendingOffsets[nPending] = offsetForReg; + nPending++; + } + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16); + nPending = 0; + + MOZ_ASSERT((offset % 16) == 0); + + // Now recover the FP double registers. This is more tricky in that we need + // to skip over the lowest-addressed of them if the number of them was odd. + + if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) { + offset += sizeof(double); + } + + for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + if (reg.isSimd128()) { + continue; + } + /* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */ + + uint32_t offsetForReg = offset; + offset += sizeof(double); + + if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) { + continue; + } + + MOZ_ASSERT(nPending <= 2); + if (nPending == 2) { + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); + nPending = 0; + } + pendingDests[nPending] = ARMFPRegister(reg, 64); + pendingOffsets[nPending] = offsetForReg; + nPending++; + } + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); + nPending = 0; + + MOZ_ASSERT((offset % 16) == 0); + MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes()); + + // And finally recover the integer registers, again skipping an alignment + // hole if it exists. + + if ((set.gprs().size() & 1) == 1) { + offset += sizeof(uint64_t); + } + + for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) { + Register reg = *iter; + + uint32_t offsetForReg = offset; + offset += sizeof(uint64_t); + + if (ignore.has(reg)) { + continue; + } + + MOZ_ASSERT(nPending <= 2); + if (nPending == 2) { + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); + nPending = 0; + } + pendingDests[nPending] = ARMRegister(reg, 64); + pendingOffsets[nPending] = offsetForReg; + nPending++; + } + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); + + MOZ_ASSERT((offset % 16) == 0); + + size_t bytesPushed = PushRegsInMaskSizeInBytes(set); + MOZ_ASSERT(offset == bytesPushed); + freeStack(bytesPushed); +} + +void MacroAssembler::Push(Register reg) { + push(reg); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(Register reg1, Register reg2, Register reg3, + Register reg4) { + push(reg1, reg2, reg3, reg4); + adjustFrame(4 * sizeof(intptr_t)); +} + +void MacroAssembler::Push(const Imm32 imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmWord imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmPtr imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmGCPtr ptr) { + push(ptr); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(FloatRegister f) { + push(f); + adjustFrame(sizeof(double)); +} + +void MacroAssembler::PushBoxed(FloatRegister reg) { + subFromStackPtr(Imm32(sizeof(double))); + boxDouble(reg, Address(getStackPointer(), 0)); + adjustFrame(sizeof(double)); +} + +void MacroAssembler::Pop(Register reg) { + pop(reg); + adjustFrame(-1 * int64_t(sizeof(int64_t))); +} + +void MacroAssembler::Pop(FloatRegister f) { + loadDouble(Address(getStackPointer(), 0), f); + freeStack(sizeof(double)); +} + +void MacroAssembler::Pop(const ValueOperand& val) { + pop(val); + adjustFrame(-1 * int64_t(sizeof(int64_t))); +} + +// =============================================================== +// Simple call functions. + +CodeOffset MacroAssembler::call(Register reg) { + // This sync has been observed (and is expected) to be necessary. + // eg testcase: tests/debug/bug1107525.js + syncStackPtr(); + Blr(ARMRegister(reg, 64)); + return CodeOffset(currentOffset()); +} + +CodeOffset MacroAssembler::call(Label* label) { + // This sync has been observed (and is expected) to be necessary. + // eg testcase: tests/basic/testBug504520Harder.js + syncStackPtr(); + Bl(label); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::call(ImmPtr imm) { + // This sync has been observed (and is expected) to be necessary. + // eg testcase: asm.js/testTimeout5.js + syncStackPtr(); + vixl::UseScratchRegisterScope temps(this); + MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0 + temps.Exclude(ScratchReg64); + movePtr(imm, ScratchReg64.asUnsized()); + Blr(ScratchReg64); +} + +void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); } + +CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + // This sync is believed to be necessary, although no case in jit-test/tests + // has been observed to cause SP != PSP here. + syncStackPtr(); + movePtr(imm, scratch); + Blr(ARMRegister(scratch, 64)); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::call(const Address& addr) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + // This sync has been observed (and is expected) to be necessary. + // eg testcase: tests/backup-point-bug1315634.js + syncStackPtr(); + loadPtr(addr, scratch); + Blr(ARMRegister(scratch, 64)); +} + +void MacroAssembler::call(JitCode* c) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + // This sync has been observed (and is expected) to be necessary. + // eg testcase: arrays/new-array-undefined-undefined-more-args-2.js + syncStackPtr(); + BufferOffset off = immPool64(scratch64, uint64_t(c->raw())); + addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE); + blr(scratch64); +} + +CodeOffset MacroAssembler::callWithPatch() { + // This needs to sync. Wasm goes through this one for intramodule calls. + // + // In other cases, wasm goes through masm.wasmCallImport(), + // masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which + // sync. + // + // This sync is believed to be necessary, although no case in jit-test/tests + // has been observed to cause SP != PSP here. + syncStackPtr(); + bl(0, LabelDoc()); + return CodeOffset(currentOffset()); +} +void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) { + Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4)); + MOZ_ASSERT(inst->IsBL()); + ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4); + ptrdiff_t relTarget00 = relTarget >> 2; + MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0); + MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00)); + bl(inst, relTarget00); +} + +CodeOffset MacroAssembler::farJumpWithPatch() { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + const ARMRegister scratch2 = temps.AcquireX(); + + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 7); + + mozilla::DebugOnly<uint32_t> before = currentOffset(); + + align(8); // At most one nop + + Label branch; + adr(scratch2, &branch); + ldr(scratch, vixl::MemOperand(scratch2, 4)); + add(scratch2, scratch2, scratch); + CodeOffset offs(currentOffset()); + bind(&branch); + br(scratch2); + Emit(UINT32_MAX); + Emit(UINT32_MAX); + + mozilla::DebugOnly<uint32_t> after = currentOffset(); + + MOZ_ASSERT(after - before == 24 || after - before == 28); + + return offs; +} + +void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) { + Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4)); + Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8)); + + int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset(); + + MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX); + MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX); + + inst1->SetInstructionBits((uint32_t)distance); + inst2->SetInstructionBits((uint32_t)(distance >> 32)); +} + +CodeOffset MacroAssembler::nopPatchableToCall() { + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 1); + Nop(); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) { + uint8_t* inst = call - 4; + Instruction* instr = reinterpret_cast<Instruction*>(inst); + MOZ_ASSERT(instr->IsBL() || instr->IsNOP()); + bl(instr, (target - inst) >> 2); +} + +void MacroAssembler::patchCallToNop(uint8_t* call) { + uint8_t* inst = call - 4; + Instruction* instr = reinterpret_cast<Instruction*>(inst); + MOZ_ASSERT(instr->IsBL() || instr->IsNOP()); + nop(instr); +} + +void MacroAssembler::pushReturnAddress() { + MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid"); + push(lr); +} + +void MacroAssembler::popReturnAddress() { + MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid"); + pop(lr); +} + +// =============================================================== +// ABI function calls. + +void MacroAssembler::setupUnalignedABICall(Register scratch) { + // Because wasm operates without the need for dynamic alignment of SP, it is + // implied that this routine should never be called when generating wasm. + MOZ_ASSERT(!IsCompilingWasm()); + + // The following won't work for SP -- needs slightly different logic. + MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); + + setupNativeABICall(); + dynamicAlignment_ = true; + + int64_t alignment = ~(int64_t(ABIStackAlignment) - 1); + ARMRegister scratch64(scratch, 64); + MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64)); + + // Always save LR -- Baseline ICs assume that LR isn't modified. + push(lr); + + // Remember the stack address on entry. This is reloaded in callWithABIPost + // below. + Mov(scratch64, PseudoStackPointer64); + + // Make alignment, including the effective push of the previous sp. + Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8)); + And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment)); + syncStackPtr(); + + // Store previous sp to the top of the stack, aligned. This is also + // reloaded in callWithABIPost. + Str(scratch64, MemOperand(PseudoStackPointer64, 0)); +} + +void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) { + // wasm operates without the need for dynamic alignment of SP. + MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm)); + + MOZ_ASSERT(inCall_); + uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar(); + + // ARM64 *really* wants SP to always be 16-aligned, so ensure this now. + if (dynamicAlignment_) { + stackForCall += ComputeByteAlignment(stackForCall, StackAlignment); + } else { + // This can happen when we attach out-of-line stubs for rare cases. For + // example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line + // chunk. + uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0; + stackForCall += ComputeByteAlignment( + stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment); + } + + *stackAdjust = stackForCall; + reserveStack(*stackAdjust); + { + enoughMemory_ &= moveResolver_.resolve(); + if (!enoughMemory_) { + return; + } + MoveEmitter emitter(*this); + emitter.emit(moveResolver_); + emitter.finish(); + } + + // Call boundaries communicate stack via SP. + // (jseward, 2021Mar03) This sync may well be redundant, given that all of + // the MacroAssembler::call methods generate a sync before the call. + // Removing it does not cause any failures for all of jit-tests. + syncStackPtr(); + + assertStackAlignment(ABIStackAlignment); +} + +void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result, + bool callFromWasm) { + // wasm operates without the need for dynamic alignment of SP. + MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm)); + + // Call boundaries communicate stack via SP, so we must resync PSP now. + initPseudoStackPtr(); + + freeStack(stackAdjust); + + if (dynamicAlignment_) { + // This then-clause makes more sense if you first read + // setupUnalignedABICall above. + // + // Restore the stack pointer from entry. The stack pointer will have been + // saved by setupUnalignedABICall. This is fragile in that it assumes + // that uses of this routine (callWithABIPost) with `dynamicAlignment_ == + // true` are preceded by matching calls to setupUnalignedABICall. But + // there's nothing that enforce that mechanically. If we really want to + // enforce this, we could add a debug-only CallWithABIState enum to the + // MacroAssembler and assert that setupUnalignedABICall updates it before + // we get here, then reset it to its initial state. + Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0)); + syncStackPtr(); + + // Restore LR. This restores LR to the value stored by + // setupUnalignedABICall, which should have been called just before + // callWithABIPre. This is, per the above comment, also fragile. + pop(lr); + + // SP may be < PSP now. That is expected from the behaviour of `pop`. It + // is not clear why the following `syncStackPtr` is necessary, but it is: + // without it, the following test segfaults: + // tests/backup-point-bug1315634.js + syncStackPtr(); + } + + // If the ABI's return regs are where ION is expecting them, then + // no other work needs to be done. + +#ifdef DEBUG + MOZ_ASSERT(inCall_); + inCall_ = false; +#endif +} + +void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + movePtr(fun, scratch); + + uint32_t stackAdjust; + callWithABIPre(&stackAdjust); + call(scratch); + callWithABIPost(stackAdjust, result); +} + +void MacroAssembler::callWithABINoProfiler(const Address& fun, + MoveOp::Type result) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + loadPtr(fun, scratch); + + uint32_t stackAdjust; + callWithABIPre(&stackAdjust); + call(scratch); + callWithABIPost(stackAdjust, result); +} + +// =============================================================== +// Jit Frames. + +uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) { + enterNoPool(3); + Label fakeCallsite; + + Adr(ARMRegister(scratch, 64), &fakeCallsite); + Push(scratch); + bind(&fakeCallsite); + uint32_t pseudoReturnOffset = currentOffset(); + + leaveNoPool(); + return pseudoReturnOffset; +} + +bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) { + asMasm().PushFrameDescriptor(FrameType::IonJS); + asMasm().Push(ImmPtr(fakeReturnAddr)); + asMasm().Push(FramePointer); + return true; +} + +// =============================================================== +// Move instructions + +void MacroAssembler::moveValue(const TypedOrValueRegister& src, + const ValueOperand& dest) { + if (src.hasValue()) { + moveValue(src.valueReg(), dest); + return; + } + + MIRType type = src.type(); + AnyRegister reg = src.typedReg(); + + if (!IsFloatingPointType(type)) { + boxNonDouble(ValueTypeFromMIRType(type), reg.gpr(), dest); + return; + } + + ScratchDoubleScope scratch(*this); + FloatRegister freg = reg.fpu(); + if (type == MIRType::Float32) { + convertFloat32ToDouble(freg, scratch); + freg = scratch; + } + boxDouble(freg, dest, scratch); +} + +void MacroAssembler::moveValue(const ValueOperand& src, + const ValueOperand& dest) { + if (src == dest) { + return; + } + movePtr(src.valueReg(), dest.valueReg()); +} + +void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) { + if (!src.isGCThing()) { + movePtr(ImmWord(src.asRawBits()), dest.valueReg()); + return; + } + + BufferOffset load = + movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg()); + writeDataRelocation(src, load); +} + +// =============================================================== +// Branch functions + +void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) { + And(ARMRegister(buffer, 64), ARMRegister(ptr, 64), + Operand(int32_t(~gc::ChunkMask))); + loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer); +} + +void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr, + Register temp, Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + MOZ_ASSERT(ptr != temp); + MOZ_ASSERT(ptr != ScratchReg && + ptr != ScratchReg2); // Both may be used internally. + MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2); + + And(ARMRegister(temp, 64), ARMRegister(ptr, 64), + Operand(int32_t(~gc::ChunkMask))); + branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset), + ImmWord(0), label); +} + +void MacroAssembler::branchValueIsNurseryCell(Condition cond, + const Address& address, + Register temp, Label* label) { + branchValueIsNurseryCellImpl(cond, address, temp, label); +} + +void MacroAssembler::branchValueIsNurseryCell(Condition cond, + ValueOperand value, Register temp, + Label* label) { + branchValueIsNurseryCellImpl(cond, value, temp, label); +} + +template <typename T> +void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond, + const T& value, Register temp, + Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + MOZ_ASSERT(temp != ScratchReg && + temp != ScratchReg2); // Both may be used internally. + + Label done; + branchTestGCThing(Assembler::NotEqual, value, + cond == Assembler::Equal ? &done : label); + + getGCThingValueChunk(value, temp); + branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset), + ImmWord(0), label); + + bind(&done); +} + +void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs, + const Value& rhs, Label* label) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg()); + moveValue(rhs, ValueOperand(scratch64.asUnsized())); + Cmp(ARMRegister(lhs.valueReg(), 64), scratch64); + B(label, cond); +} + +// ======================================================================== +// Memory access primitives. +template <typename T> +void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, + MIRType valueType, const T& dest) { + MOZ_ASSERT(valueType < MIRType::Value); + + if (valueType == MIRType::Double) { + boxDouble(value.reg().typedReg().fpu(), dest); + return; + } + + if (value.constant()) { + storeValue(value.value(), dest); + } else { + storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(), + dest); + } +} + +template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, + MIRType valueType, + const Address& dest); +template void MacroAssembler::storeUnboxedValue( + const ConstantOrRegister& value, MIRType valueType, + const BaseObjectElementIndex& dest); + +void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); } + +// ======================================================================== +// wasm support + +CodeOffset MacroAssembler::wasmTrapInstruction() { + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 1); + CodeOffset offs(currentOffset()); + Unreachable(); + return offs; +} + +void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, + Register boundsCheckLimit, Label* ok) { + branch32(cond, index, boundsCheckLimit, ok); + if (JitOptions.spectreIndexMasking) { + csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond); + } +} + +void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, + Address boundsCheckLimit, Label* ok) { + branch32(cond, index, boundsCheckLimit, ok); + if (JitOptions.spectreIndexMasking) { + csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond); + } +} + +void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, + Register64 boundsCheckLimit, Label* ok) { + branchPtr(cond, index.reg, boundsCheckLimit.reg, ok); + if (JitOptions.spectreIndexMasking) { + csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64), + cond); + } +} + +void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, + Address boundsCheckLimit, Label* ok) { + branchPtr(InvertCondition(cond), boundsCheckLimit, index.reg, ok); + if (JitOptions.spectreIndexMasking) { + csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64), + cond); + } +} + +// FCVTZU behaves as follows: +// +// on NaN it produces zero +// on too large it produces UINT_MAX (for appropriate type) +// on too small it produces zero +// +// FCVTZS behaves as follows: +// +// on NaN it produces zero +// on too large it produces INT_MAX (for appropriate type) +// on too small it produces INT_MIN (ditto) + +void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_, + Register output_, + bool isSaturating, + Label* oolEntry) { + ARMRegister output(output_, 32); + ARMFPRegister input(input_, 64); + Fcvtzu(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + } +} + +void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_, + Register output_, + bool isSaturating, + Label* oolEntry) { + ARMRegister output(output_, 32); + ARMFPRegister input(input_, 32); + Fcvtzu(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + } +} + +void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_, + Register output_, + bool isSaturating, + Label* oolEntry) { + ARMRegister output(output_, 32); + ARMFPRegister input(input_, 64); + Fcvtzs(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual); + Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + } +} + +void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_, + Register output_, + bool isSaturating, + Label* oolEntry) { + ARMRegister output(output_, 32); + ARMFPRegister input(input_, 32); + Fcvtzs(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual); + Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + } +} + +void MacroAssembler::wasmTruncateDoubleToUInt64( + FloatRegister input_, Register64 output_, bool isSaturating, + Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { + MOZ_ASSERT(tempDouble.isInvalid()); + + ARMRegister output(output_.reg, 64); + ARMFPRegister input(input_, 64); + Fcvtzu(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + bind(oolRejoin); + } +} + +void MacroAssembler::wasmTruncateFloat32ToUInt64( + FloatRegister input_, Register64 output_, bool isSaturating, + Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { + MOZ_ASSERT(tempDouble.isInvalid()); + + ARMRegister output(output_.reg, 64); + ARMFPRegister input(input_, 32); + Fcvtzu(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + bind(oolRejoin); + } +} + +void MacroAssembler::wasmTruncateDoubleToInt64( + FloatRegister input_, Register64 output_, bool isSaturating, + Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { + MOZ_ASSERT(tempDouble.isInvalid()); + + ARMRegister output(output_.reg, 64); + ARMFPRegister input(input_, 64); + Fcvtzs(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual); + Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + bind(oolRejoin); + } +} + +void MacroAssembler::wasmTruncateFloat32ToInt64( + FloatRegister input_, Register64 output_, bool isSaturating, + Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { + ARMRegister output(output_.reg, 64); + ARMFPRegister input(input_, 32); + Fcvtzs(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual); + Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + bind(oolRejoin); + } +} + +void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input, + Register output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + Label notNaN; + branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); + wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + bind(¬NaN); + + Label isOverflow; + const float two_31 = -float(INT32_MIN); + ScratchFloat32Scope fpscratch(*this); + if (flags & TRUNC_UNSIGNED) { + loadConstantFloat32(two_31 * 2, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantFloat32(-1.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } else { + loadConstantFloat32(two_31, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantFloat32(-two_31, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); + } + bind(&isOverflow); + wasmTrap(wasm::Trap::IntegerOverflow, off); +} + +void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input, + Register output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + Label notNaN; + branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); + wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + bind(¬NaN); + + Label isOverflow; + const double two_31 = -double(INT32_MIN); + ScratchDoubleScope fpscratch(*this); + if (flags & TRUNC_UNSIGNED) { + loadConstantDouble(two_31 * 2, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantDouble(-1.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } else { + loadConstantDouble(two_31, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantDouble(-two_31 - 1, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } + bind(&isOverflow); + wasmTrap(wasm::Trap::IntegerOverflow, off); +} + +void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input, + Register64 output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + Label notNaN; + branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); + wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + bind(¬NaN); + + Label isOverflow; + const float two_63 = -float(INT64_MIN); + ScratchFloat32Scope fpscratch(*this); + if (flags & TRUNC_UNSIGNED) { + loadConstantFloat32(two_63 * 2, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantFloat32(-1.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } else { + loadConstantFloat32(two_63, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantFloat32(-two_63, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); + } + bind(&isOverflow); + wasmTrap(wasm::Trap::IntegerOverflow, off); +} + +void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input, + Register64 output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + Label notNaN; + branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); + wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + bind(¬NaN); + + Label isOverflow; + const double two_63 = -double(INT64_MIN); + ScratchDoubleScope fpscratch(*this); + if (flags & TRUNC_UNSIGNED) { + loadConstantDouble(two_63 * 2, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantDouble(-1.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } else { + loadConstantDouble(two_63, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantDouble(-two_63, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); + } + bind(&isOverflow); + wasmTrap(wasm::Trap::IntegerOverflow, off); +} + +void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access, + Register memoryBase, Register ptr, + AnyRegister output) { + wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid()); +} + +void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, + Register memoryBase, Register ptr, + Register64 output) { + wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output); +} + +void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, + AnyRegister value, Register memoryBase, + Register ptr) { + wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr); +} + +void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access, + Register64 value, Register memoryBase, + Register ptr) { + wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr); +} + +void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch, + ExitFrameType type) { + // Wasm stubs use the native SP, not the PSP. + + linkExitFrame(cxreg, scratch); + + MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64())); + + // SP has to be 16-byte aligned when we do a load/store, so push |type| twice + // and then add 8 bytes to SP. This leaves SP unaligned. + move32(Imm32(int32_t(type)), scratch); + push(scratch, scratch); + Add(sp, sp, 8); + + // Despite the above assertion, it is possible for control to flow from here + // to the code generated by + // MacroAssemblerCompat::handleFailureWithHandlerTail without any + // intervening assignment to PSP. But handleFailureWithHandlerTail assumes + // that PSP is the active stack pointer. Hence the following is necessary + // for safety. Note we can't use initPseudoStackPtr here as that would + // generate no instructions. + Mov(PseudoStackPointer64, sp); +} + +void MacroAssembler::widenInt32(Register r) { + move32To64ZeroExtend(r, Register64(r)); +} + +// ======================================================================== +// Convert floating point. + +bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; } + +void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest, + Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) { + Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest, + Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) { + Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) { + convertInt64ToDouble(Register64(src), dest); +} + +// ======================================================================== +// Primitive atomic operations. + +// The computed MemOperand must be Reg+0 because the load/store exclusive +// instructions only take a single pointer register. + +static MemOperand ComputePointerForAtomic(MacroAssembler& masm, + const Address& address, + Register scratch) { + if (address.offset == 0) { + return MemOperand(X(masm, address.base), 0); + } + + masm.Add(X(scratch), X(masm, address.base), address.offset); + return MemOperand(X(scratch), 0); +} + +static MemOperand ComputePointerForAtomic(MacroAssembler& masm, + const BaseIndex& address, + Register scratch) { + masm.Add(X(scratch), X(masm, address.base), + Operand(X(address.index), vixl::LSL, address.scale)); + if (address.offset) { + masm.Add(X(scratch), X(scratch), address.offset); + } + return MemOperand(X(scratch), 0); +} + +// This sign extends to targetWidth and leaves any higher bits zero. + +static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType, + Width targetWidth, Register src, Register dest) { + bool signExtend = Scalar::isSignedIntType(srcType); + + switch (Scalar::byteSize(srcType)) { + case 1: + if (signExtend) { + masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7); + } else { + masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7); + } + break; + case 2: + if (signExtend) { + masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15); + } else { + masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15); + } + break; + case 4: + if (targetWidth == Width::_64) { + if (signExtend) { + masm.Sbfm(X(dest), X(src), 0, 31); + } else { + masm.Ubfm(X(dest), X(src), 0, 31); + } + } else if (src != dest) { + masm.Mov(R(dest, targetWidth), R(src, targetWidth)); + } + break; + case 8: + if (src != dest) { + masm.Mov(R(dest, targetWidth), R(src, targetWidth)); + } + break; + default: + MOZ_CRASH(); + } +} + +// Exclusive-loads zero-extend their values to the full width of the X register. +// +// Note, we've promised to leave the high bits of the 64-bit register clear if +// the targetWidth is 32. + +static void LoadExclusive(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type srcType, Width targetWidth, + MemOperand ptr, Register dest) { + bool signExtend = Scalar::isSignedIntType(srcType); + + // With this address form, a single native ldxr* will be emitted, and the + // AutoForbidPoolsAndNops ensures that the metadata is emitted at the address + // of the ldxr*. + MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0); + + switch (Scalar::byteSize(srcType)) { + case 1: { + { + AutoForbidPoolsAndNops afp( + &masm, + /* max number of instructions in scope = */ 1); + if (access) { + masm.append(*access, masm.currentOffset()); + } + masm.Ldxrb(W(dest), ptr); + } + if (signExtend) { + masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7); + } + break; + } + case 2: { + { + AutoForbidPoolsAndNops afp( + &masm, + /* max number of instructions in scope = */ 1); + if (access) { + masm.append(*access, masm.currentOffset()); + } + masm.Ldxrh(W(dest), ptr); + } + if (signExtend) { + masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15); + } + break; + } + case 4: { + { + AutoForbidPoolsAndNops afp( + &masm, + /* max number of instructions in scope = */ 1); + if (access) { + masm.append(*access, masm.currentOffset()); + } + masm.Ldxr(W(dest), ptr); + } + if (targetWidth == Width::_64 && signExtend) { + masm.Sbfm(X(dest), X(dest), 0, 31); + } + break; + } + case 8: { + { + AutoForbidPoolsAndNops afp( + &masm, + /* max number of instructions in scope = */ 1); + if (access) { + masm.append(*access, masm.currentOffset()); + } + masm.Ldxr(X(dest), ptr); + } + break; + } + default: { + MOZ_CRASH(); + } + } +} + +static void StoreExclusive(MacroAssembler& masm, Scalar::Type type, + Register status, Register src, MemOperand ptr) { + switch (Scalar::byteSize(type)) { + case 1: + masm.Stxrb(W(status), W(src), ptr); + break; + case 2: + masm.Stxrh(W(status), W(src), ptr); + break; + case 4: + masm.Stxr(W(status), W(src), ptr); + break; + case 8: + masm.Stxr(W(status), X(src), ptr); + break; + } +} + +template <typename T> +static void CompareExchange(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, Width targetWidth, + const Synchronization& sync, const T& mem, + Register oldval, Register newval, Register output) { + MOZ_ASSERT(oldval != output && newval != output); + + Label again; + Label done; + + vixl::UseScratchRegisterScope temps(&masm); + + Register scratch2 = temps.AcquireX().asUnsized(); + MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2); + + MOZ_ASSERT(ptr.base().asUnsized() != output); + + // NOTE: the generated code must match the assembly code in gen_cmpxchg in + // GenerateAtomicOperations.py + masm.memoryBarrierBefore(sync); + + Register scratch = temps.AcquireX().asUnsized(); + + masm.bind(&again); + SignOrZeroExtend(masm, type, targetWidth, oldval, scratch); + LoadExclusive(masm, access, type, targetWidth, ptr, output); + masm.Cmp(R(output, targetWidth), R(scratch, targetWidth)); + masm.B(&done, MacroAssembler::NotEqual); + StoreExclusive(masm, type, scratch, newval, ptr); + masm.Cbnz(W(scratch), &again); + masm.bind(&done); + + masm.memoryBarrierAfter(sync); +} + +template <typename T> +static void AtomicExchange(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, Width targetWidth, + const Synchronization& sync, const T& mem, + Register value, Register output) { + MOZ_ASSERT(value != output); + + Label again; + + vixl::UseScratchRegisterScope temps(&masm); + + Register scratch2 = temps.AcquireX().asUnsized(); + MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2); + + // NOTE: the generated code must match the assembly code in gen_exchange in + // GenerateAtomicOperations.py + masm.memoryBarrierBefore(sync); + + Register scratch = temps.AcquireX().asUnsized(); + + masm.bind(&again); + LoadExclusive(masm, access, type, targetWidth, ptr, output); + StoreExclusive(masm, type, scratch, value, ptr); + masm.Cbnz(W(scratch), &again); + + masm.memoryBarrierAfter(sync); +} + +template <bool wantResult, typename T> +static void AtomicFetchOp(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, Width targetWidth, + const Synchronization& sync, AtomicOp op, + const T& mem, Register value, Register temp, + Register output) { + MOZ_ASSERT(value != output); + MOZ_ASSERT(value != temp); + MOZ_ASSERT_IF(wantResult, output != temp); + + Label again; + + vixl::UseScratchRegisterScope temps(&masm); + + Register scratch2 = temps.AcquireX().asUnsized(); + MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2); + + // NOTE: the generated code must match the assembly code in gen_fetchop in + // GenerateAtomicOperations.py + masm.memoryBarrierBefore(sync); + + Register scratch = temps.AcquireX().asUnsized(); + + masm.bind(&again); + LoadExclusive(masm, access, type, targetWidth, ptr, output); + switch (op) { + case AtomicFetchAddOp: + masm.Add(X(temp), X(output), X(value)); + break; + case AtomicFetchSubOp: + masm.Sub(X(temp), X(output), X(value)); + break; + case AtomicFetchAndOp: + masm.And(X(temp), X(output), X(value)); + break; + case AtomicFetchOrOp: + masm.Orr(X(temp), X(output), X(value)); + break; + case AtomicFetchXorOp: + masm.Eor(X(temp), X(output), X(value)); + break; + } + StoreExclusive(masm, type, scratch, temp, ptr); + masm.Cbnz(W(scratch), &again); + if (wantResult) { + SignOrZeroExtend(masm, type, targetWidth, output, output); + } + + masm.memoryBarrierAfter(sync); +} + +void MacroAssembler::compareExchange(Scalar::Type type, + const Synchronization& sync, + const Address& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval, + output); +} + +void MacroAssembler::compareExchange(Scalar::Type type, + const Synchronization& sync, + const BaseIndex& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval, + output); +} + +void MacroAssembler::compareExchange64(const Synchronization& sync, + const Address& mem, Register64 expect, + Register64 replace, Register64 output) { + CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, + expect.reg, replace.reg, output.reg); +} + +void MacroAssembler::compareExchange64(const Synchronization& sync, + const BaseIndex& mem, Register64 expect, + Register64 replace, Register64 output) { + CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, + expect.reg, replace.reg, output.reg); +} + +void MacroAssembler::atomicExchange64(const Synchronization& sync, + const Address& mem, Register64 value, + Register64 output) { + AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, + value.reg, output.reg); +} + +void MacroAssembler::atomicExchange64(const Synchronization& sync, + const BaseIndex& mem, Register64 value, + Register64 output) { + AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, + value.reg, output.reg); +} + +void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op, + Register64 value, const Address& mem, + Register64 temp, Register64 output) { + AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, + value.reg, temp.reg, output.reg); +} + +void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op, + Register64 value, const BaseIndex& mem, + Register64 temp, Register64 output) { + AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, + value.reg, temp.reg, output.reg); +} + +void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op, + Register64 value, const Address& mem, + Register64 temp) { + AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, + value.reg, temp.reg, temp.reg); +} + +void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op, + Register64 value, const BaseIndex& mem, + Register64 temp) { + AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, + value.reg, temp.reg, temp.reg); +} + +void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, + const Address& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, + oldval, newval, output); +} + +void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, + oldval, newval, output); +} + +void MacroAssembler::atomicExchange(Scalar::Type type, + const Synchronization& sync, + const Address& mem, Register value, + Register output) { + AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output); +} + +void MacroAssembler::atomicExchange(Scalar::Type type, + const Synchronization& sync, + const BaseIndex& mem, Register value, + Register output) { + AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output); +} + +void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, + const Address& mem, Register value, + Register output) { + AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, + value, output); +} + +void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register value, + Register output) { + AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, + value, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type type, + const Synchronization& sync, AtomicOp op, + Register value, const Address& mem, + Register temp, Register output) { + AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value, + temp, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type type, + const Synchronization& sync, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp, Register output) { + AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value, + temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const Address& mem, Register temp, + Register output) { + AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(), + op, mem, value, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const BaseIndex& mem, Register temp, + Register output) { + AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(), + op, mem, value, temp, output); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const Address& mem, Register temp) { + AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(), + op, mem, value, temp, temp); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const BaseIndex& mem, Register temp) { + AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(), + op, mem, value, temp, temp); +} + +void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, + const Address& mem, + Register64 expect, + Register64 replace, + Register64 output) { + CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, + expect.reg, replace.reg, output.reg); +} + +void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, + Register64 expect, + Register64 replace, + Register64 output) { + CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, + expect.reg, replace.reg, output.reg); +} + +void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, + const Address& mem, Register64 value, + Register64 output) { + AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, + value.reg, output.reg); +} + +void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, + Register64 value, Register64 output) { + AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, + value.reg, output.reg); +} + +void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register64 value, + const Address& mem, Register64 temp, + Register64 output) { + AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(), + op, mem, value.reg, temp.reg, output.reg); +} + +void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register64 value, + const BaseIndex& mem, Register64 temp, + Register64 output) { + AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(), + op, mem, value.reg, temp.reg, output.reg); +} + +void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register64 value, + const BaseIndex& mem, + Register64 temp) { + AtomicFetchOp<false>(*this, &access, Scalar::Int64, Width::_64, access.sync(), + op, mem, value.reg, temp.reg, temp.reg); +} + +// ======================================================================== +// JS atomic operations. + +template <typename T> +static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, const T& mem, + Register oldval, Register newval, Register temp, + AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.compareExchange(arrayType, sync, mem, oldval, newval, temp); + masm.convertUInt32ToDouble(temp, output.fpu()); + } else { + masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr()); + } +} + +void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const Address& mem, Register oldval, + Register newval, Register temp, + AnyRegister output) { + CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); +} + +void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const BaseIndex& mem, Register oldval, + Register newval, Register temp, + AnyRegister output) { + CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); +} + +template <typename T> +static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, const T& mem, + Register value, Register temp, + AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicExchange(arrayType, sync, mem, value, temp); + masm.convertUInt32ToDouble(temp, output.fpu()); + } else { + masm.atomicExchange(arrayType, sync, mem, value, output.gpr()); + } +} + +void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const Address& mem, Register value, + Register temp, AnyRegister output) { + AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); +} + +void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const BaseIndex& mem, Register value, + Register temp, AnyRegister output) { + AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); +} + +template <typename T> +static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const T& mem, Register temp1, + Register temp2, AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); + masm.convertUInt32ToDouble(temp1, output.fpu()); + } else { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); + } +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const Address& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp) { + AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem, + value, temp, temp); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const Address& mem, + Register temp) { + AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem, + value, temp, temp); +} + +void MacroAssembler::flexibleQuotient32(Register rhs, Register srcDest, + bool isUnsigned, + const LiveRegisterSet&) { + quotient32(rhs, srcDest, isUnsigned); +} + +void MacroAssembler::flexibleRemainder32(Register rhs, Register srcDest, + bool isUnsigned, + const LiveRegisterSet&) { + remainder32(rhs, srcDest, isUnsigned); +} + +void MacroAssembler::flexibleDivMod32(Register rhs, Register srcDest, + Register remOutput, bool isUnsigned, + const LiveRegisterSet&) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireW(); + ARMRegister src = temps.AcquireW(); + + // Preserve src for remainder computation + Mov(src, ARMRegister(srcDest, 32)); + + if (isUnsigned) { + Udiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32)); + } else { + Sdiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32)); + } + // Compute remainder + Mul(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32)); + Sub(ARMRegister(remOutput, 32), src, scratch); +} + +CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) { + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 1); + CodeOffset offset(currentOffset()); + adr(ARMRegister(dest, 64), 0, LabelDoc()); + return offset; +} + +void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc, + CodeLocationLabel target) { + ptrdiff_t off = target - loc; + MOZ_RELEASE_ASSERT(vixl::IsInt21(off)); + + Instruction* cur = reinterpret_cast<Instruction*>(loc.raw()); + MOZ_ASSERT(cur->IsADR()); + + vixl::Register rd = vixl::Register::XRegFromCode(cur->Rd()); + adr(cur, rd, off); +} + +// ======================================================================== +// Spectre Mitigations. + +void MacroAssembler::speculationBarrier() { + // Conditional speculation barrier. + csdb(); +} + +void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + floorf(src, dest, fail); +} + +void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + floor(src, dest, fail); +} + +void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + ceilf(src, dest, fail); +} + +void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + ceil(src, dest, fail); +} + +void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + const ARMFPRegister src32(src, 32); + + Label done, zeroCase; + + // Convert scalar to signed 32-bit fixed-point, rounding toward zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtzs(ARMRegister(dest, 32), src32); + + // If the output was zero, worry about special cases. + branch32(Assembler::Equal, dest, Imm32(0), &zeroCase); + + // Fail on overflow cases. + branch32(Assembler::Equal, dest, Imm32(INT_MAX), fail); + branch32(Assembler::Equal, dest, Imm32(INT_MIN), fail); + + // If the output was non-zero and wasn't saturated, just return it. + jump(&done); + + // Handle the case of a zero output: + // 1. The input may have been NaN, requiring a failure. + // 2. The input may have been in (-1,-0], requiring a failure. + { + bind(&zeroCase); + + // If input is a negative number that truncated to zero, the real + // output should be the non-integer -0. + // The use of "lt" instead of "lo" also catches unordered NaN input. + Fcmp(src32, 0.0f); + B(fail, vixl::lt); + + // Check explicitly for -0, bitwise. + Fmov(ARMRegister(dest, 32), src32); + branchTest32(Assembler::Signed, dest, dest, fail); + move32(Imm32(0), dest); + } + + bind(&done); +} + +void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + const ARMFPRegister src64(src, 64); + + Label done, zeroCase; + + // Convert scalar to signed 32-bit fixed-point, rounding toward zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtzs(ARMRegister(dest, 32), src64); + + // If the output was zero, worry about special cases. + branch32(Assembler::Equal, dest, Imm32(0), &zeroCase); + + // Fail on overflow cases. + branch32(Assembler::Equal, dest, Imm32(INT_MAX), fail); + branch32(Assembler::Equal, dest, Imm32(INT_MIN), fail); + + // If the output was non-zero and wasn't saturated, just return it. + jump(&done); + + // Handle the case of a zero output: + // 1. The input may have been NaN, requiring a failure. + // 2. The input may have been in (-1,-0], requiring a failure. + { + bind(&zeroCase); + + // If input is a negative number that truncated to zero, the real + // output should be the non-integer -0. + // The use of "lt" instead of "lo" also catches unordered NaN input. + Fcmp(src64, 0.0); + B(fail, vixl::lt); + + // Check explicitly for -0, bitwise. + Fmov(ARMRegister(dest, 64), src64); + branchTestPtr(Assembler::Signed, dest, dest, fail); + movePtr(ImmPtr(0), dest); + } + + bind(&done); +} + +void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest, + FloatRegister temp, Label* fail) { + const ARMFPRegister src32(src, 32); + ScratchFloat32Scope scratch(*this); + + Label negative, done; + + // Branch to a slow path if input < 0.0 due to complicated rounding rules. + // Note that Fcmp with NaN unsets the negative flag. + Fcmp(src32, 0.0); + B(&negative, Assembler::Condition::lo); + + // Handle the simple case of a positive input, and also -0 and NaN. + // Rounding proceeds with consideration of the fractional part of the input: + // 1. If > 0.5, round to integer with higher absolute value (so, up). + // 2. If < 0.5, round to integer with lower absolute value (so, down). + // 3. If = 0.5, round to +Infinity (so, up). + { + // Convert to signed 32-bit integer, rounding halfway cases away from zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtas(ARMRegister(dest, 32), src32); + // If the output potentially saturated, fail. + branch32(Assembler::Equal, dest, Imm32(INT_MAX), fail); + + // If the result of the rounding was non-zero, return the output. + // In the case of zero, the input may have been NaN or -0, which must bail. + branch32(Assembler::NotEqual, dest, Imm32(0), &done); + { + // If input is NaN, comparisons set the C and V bits of the NZCV flags. + Fcmp(src32, 0.0f); + B(fail, Assembler::Overflow); + + // Move all 32 bits of the input into a scratch register to check for -0. + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratchGPR32 = temps.AcquireW(); + Fmov(scratchGPR32, src32); + Cmp(scratchGPR32, vixl::Operand(uint32_t(0x80000000))); + B(fail, Assembler::Equal); + } + + jump(&done); + } + + // Handle the complicated case of a negative input. + // Rounding proceeds with consideration of the fractional part of the input: + // 1. If > 0.5, round to integer with higher absolute value (so, down). + // 2. If < 0.5, round to integer with lower absolute value (so, up). + // 3. If = 0.5, round to +Infinity (so, up). + bind(&negative); + { + // Inputs in [-0.5, 0) need 0.5 added; other negative inputs need + // the biggest double less than 0.5. + Label join; + loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); + loadConstantFloat32(-0.5f, scratch); + branchFloat(Assembler::DoubleLessThan, src, scratch, &join); + loadConstantFloat32(0.5f, temp); + bind(&join); + + addFloat32(src, temp); + // Round all values toward -Infinity. + // In the case of overflow, the output is saturated. + // NaN and -0 are already handled by the "positive number" path above. + Fcvtms(ARMRegister(dest, 32), temp); + // If the output potentially saturated, fail. + branch32(Assembler::Equal, dest, Imm32(INT_MIN), fail); + + // If output is zero, then the actual result is -0. Fail. + branchTest32(Assembler::Zero, dest, dest, fail); + } + + bind(&done); +} + +void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest, + FloatRegister temp, Label* fail) { + const ARMFPRegister src64(src, 64); + ScratchDoubleScope scratch(*this); + + Label negative, done; + + // Branch to a slow path if input < 0.0 due to complicated rounding rules. + // Note that Fcmp with NaN unsets the negative flag. + Fcmp(src64, 0.0); + B(&negative, Assembler::Condition::lo); + + // Handle the simple case of a positive input, and also -0 and NaN. + // Rounding proceeds with consideration of the fractional part of the input: + // 1. If > 0.5, round to integer with higher absolute value (so, up). + // 2. If < 0.5, round to integer with lower absolute value (so, down). + // 3. If = 0.5, round to +Infinity (so, up). + { + // Convert to signed 32-bit integer, rounding halfway cases away from zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtas(ARMRegister(dest, 32), src64); + // If the output potentially saturated, fail. + branch32(Assembler::Equal, dest, Imm32(INT_MAX), fail); + + // If the result of the rounding was non-zero, return the output. + // In the case of zero, the input may have been NaN or -0, which must bail. + branch32(Assembler::NotEqual, dest, Imm32(0), &done); + { + // If input is NaN, comparisons set the C and V bits of the NZCV flags. + Fcmp(src64, 0.0); + B(fail, Assembler::Overflow); + + // Move all 64 bits of the input into a scratch register to check for -0. + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratchGPR64 = temps.AcquireX(); + Fmov(scratchGPR64, src64); + Cmp(scratchGPR64, vixl::Operand(uint64_t(0x8000000000000000))); + B(fail, Assembler::Equal); + } + + jump(&done); + } + + // Handle the complicated case of a negative input. + // Rounding proceeds with consideration of the fractional part of the input: + // 1. If > 0.5, round to integer with higher absolute value (so, down). + // 2. If < 0.5, round to integer with lower absolute value (so, up). + // 3. If = 0.5, round to +Infinity (so, up). + bind(&negative); + { + // Inputs in [-0.5, 0) need 0.5 added; other negative inputs need + // the biggest double less than 0.5. + Label join; + loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); + loadConstantDouble(-0.5, scratch); + branchDouble(Assembler::DoubleLessThan, src, scratch, &join); + loadConstantDouble(0.5, temp); + bind(&join); + + addDouble(src, temp); + // Round all values toward -Infinity. + // In the case of overflow, the output is saturated. + // NaN and -0 are already handled by the "positive number" path above. + Fcvtms(ARMRegister(dest, 32), temp); + // If the output potentially saturated, fail. + branch32(Assembler::Equal, dest, Imm32(INT_MIN), fail); + + // If output is zero, then the actual result is -0. Fail. + branchTest32(Assembler::Zero, dest, dest, fail); + } + + bind(&done); +} + +void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src, + FloatRegister dest) { + switch (mode) { + case RoundingMode::Up: + frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + return; + case RoundingMode::Down: + frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + return; + case RoundingMode::NearestTiesToEven: + frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + return; + case RoundingMode::TowardsZero: + frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + return; + } + MOZ_CRASH("unexpected mode"); +} + +void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src, + FloatRegister dest) { + switch (mode) { + case RoundingMode::Up: + frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + return; + case RoundingMode::Down: + frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + return; + case RoundingMode::NearestTiesToEven: + frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + return; + case RoundingMode::TowardsZero: + frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + return; + } + MOZ_CRASH("unexpected mode"); +} + +void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, + FloatRegister output) { + ScratchDoubleScope scratch(*this); + + // Double with only the sign bit set + loadConstantDouble(-0.0, scratch); + + if (lhs != output) { + moveDouble(lhs, output); + } + + bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), + ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), + ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); +} + +void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs, + FloatRegister output) { + ScratchFloat32Scope scratch(*this); + + // Float with only the sign bit set + loadConstantFloat32(-0.0f, scratch); + + if (lhs != output) { + moveFloat32(lhs, output); + } + + bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), + ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), + ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); +} + +void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) { + Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64), + Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift)); +} + +//}}} check_macroassembler_style + +} // namespace jit +} // namespace js |