/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef A64_ASSEMBLER_A64_H_ #define A64_ASSEMBLER_A64_H_ #include #include "jit/arm64/vixl/Assembler-vixl.h" #include "jit/CompactBuffer.h" #include "jit/shared/Disassembler-shared.h" #include "wasm/WasmTypeDecls.h" namespace js { namespace jit { // VIXL imports. typedef vixl::Register ARMRegister; typedef vixl::FPRegister ARMFPRegister; using vixl::ARMBuffer; using vixl::Instruction; using LabelDoc = DisassemblerSpew::LabelDoc; using LiteralDoc = DisassemblerSpew::LiteralDoc; static const uint32_t AlignmentAtPrologue = 0; static const uint32_t AlignmentMidPrologue = 8; static const Scale ScalePointer = TimesEight; // The MacroAssembler uses scratch registers extensively and unexpectedly. // For safety, scratch registers should always be acquired using // vixl::UseScratchRegisterScope. static constexpr Register ScratchReg{Registers::ip0}; static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64}; static constexpr Register ScratchReg2{Registers::ip1}; static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64}; static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0, FloatRegisters::Double}; static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31, FloatRegisters::Double}; struct ScratchDoubleScope : public AutoFloatRegisterScope { explicit ScratchDoubleScope(MacroAssembler& masm) : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {} }; static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0, FloatRegisters::Single}; static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31, FloatRegisters::Single}; struct ScratchFloat32Scope : public AutoFloatRegisterScope { explicit ScratchFloat32Scope(MacroAssembler& masm) : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {} }; #ifdef ENABLE_WASM_SIMD static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0, FloatRegisters::Simd128}; static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31, FloatRegisters::Simd128}; struct ScratchSimd128Scope : public AutoFloatRegisterScope { explicit ScratchSimd128Scope(MacroAssembler& masm) : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {} }; #else struct ScratchSimd128Scope : public AutoFloatRegisterScope { explicit ScratchSimd128Scope(MacroAssembler& masm) : AutoFloatRegisterScope(masm, ScratchDoubleReg_) { MOZ_CRASH("SIMD not enabled"); } }; #endif static constexpr Register InvalidReg{Registers::Invalid}; static constexpr FloatRegister InvalidFloatReg = {}; static constexpr Register OsrFrameReg{Registers::x3}; static constexpr Register CallTempReg0{Registers::x9}; static constexpr Register CallTempReg1{Registers::x10}; static constexpr Register CallTempReg2{Registers::x11}; static constexpr Register CallTempReg3{Registers::x12}; static constexpr Register CallTempReg4{Registers::x13}; static constexpr Register CallTempReg5{Registers::x14}; static constexpr Register PreBarrierReg{Registers::x1}; static constexpr Register InterpreterPCReg{Registers::x9}; static constexpr Register ReturnReg{Registers::x0}; static constexpr Register64 ReturnReg64(ReturnReg); static constexpr Register JSReturnReg{Registers::x2}; static constexpr Register FramePointer{Registers::fp}; static constexpr ARMRegister FramePointer64{FramePointer, 64}; static constexpr Register ZeroRegister{Registers::sp}; static constexpr ARMRegister ZeroRegister64{Registers::sp, 64}; static constexpr ARMRegister ZeroRegister32{Registers::sp, 32}; // [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions // // ================ // // Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer: // // The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it // is used as the base pointer for a memory access. (SP+offset need not be // 16-byte aligned, but the SP value itself must be.) The SP register may // take on unaligned values but may not be used for a memory access while it // is unaligned. // // Stack-alignment checking can be enabled or disabled by a control register; // however that register cannot be modified by user space. We have to assume // stack alignment checking is enabled, and that does usually appear to be the // case. See the ARM Architecture Reference Manual, "D1.8.2 SP alignment // checking", for further details. // // A second constraint is forced upon us by the ARM64 ABI. This requires that // all accesses to the stack must be at or above SP. Accesses below SP are // strictly forbidden, presumably because the kernel might use that area of // memory for its own purposes -- in particular, signal delivery -- and hence // it may get trashed at any time. // // Note this doesn't mean that accesses to the stack must be based off // register SP. Only that the effective addresses must be >= SP, regardless // of how the address is formed. // // In order to allow word-wise pushes and pops, some of our ARM64 jits // (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to // be used as a PseudoStackPointer (PSP). // // Initially the PSP will have the same value as the SP. Code can, if it // wants, push a single word by subtracting 8 from the PSP, doing SP := PSP, // then storing the value at PSP+0. Given other constraints on the alignment // of the SP at function call boundaries, this works out OK, at the cost of // the two extra instructions per push / pop. // // This is all a bit messy, and is probably not robustly adhered to. However, // the following appear to be the intended, and mostly implemented, current // invariants: // // (1) PSP is "primary", SP is "secondary". Most stack refs are // PSP-relative. SP-relative is rare and (obviously) only done when we // know that SP is aligned. // // (2) At all times, the relationship SP <= PSP is maintained. The fact that // SP may validly be less than PSP means that pushes on the stack force // the two values to become equal, by copying PSP into SP. However, pops // behave differently: PSP moves back up and SP stays the same, since that // doesn't break the SP <= PSP invariant. // // (3) However, immediately before a call instruction, SP and PSP must be the // same. To enforce this, PSP is copied into SP by the arm64-specific // MacroAssembler::call routines. // // (4) Also, after a function has returned, it is expected that SP holds the // "primary" value. How exactly this is implemented remains not entirely // clear and merits further investigation. The following points are // believed to be relevant: // // - For calls to functions observing the system AArch64 ABI, PSP (x28) is // callee-saved. That, combined with (3) above, implies SP == PSP // immediately after the call returns. // // - JIT-generated routines return using MacroAssemblerCompat::retn, and // that copies PSP into SP (bizarrely; this would make more sense if it // copied SP into PSP); but in any case, the point is that they are the // same at the point that the return instruction executes. // // - MacroAssembler::callWithABIPost copies PSP into SP after the return // of a call requiring dynamic alignment. // // Given the above, it is unclear exactly where in the return sequence it // is expected that SP == PSP, and also whether it is the callee or caller // that is expected to enforce it. // // In general it would be nice to be able to move (at some time in the future, // not now) to a world where *every* assignment to PSP or SP is followed // immediately by a copy into the other register. That would make all // required correctness proofs trivial in the sense that it would require only // local inspection of code immediately following (dominated by) any such // assignment. For the moment, however, this is a guideline, not a hard // requirement. // // ================ // // Mechanics of keeping the stack pointers in sync: // // The following two methods require that the masm's SP has been set to the PSP // with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be // no-ops. The setup is performed manually by the jits after creating the masm. // // * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has // been updated, so SP needs to move too. This is used pretty liberally // throughout the code base. // // * MacroAssembler::initPseudoStackPtr() performs PSP := SP. This can be used // after calls to non-ABI compliant code; it's not used much. // // In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that // recognizes the instruction emitted by syncStackPtr(), and this is used to // skip that instruction a few places, should it be present, in the JS JIT where // code is generated to deal with toggled calls. // // In various places there are calls to MacroAssembler::syncStackPtr() which // appear to be redundant. Investigation shows that they often are redundant, // but not always. Finding and removing such redundancies would be quite some // work, so we live for now with the occasional redundant update. Perusal of // the Cortex-A55 and -A72 optimization guides shows no evidence that such // assignments are any more expensive than assignments between vanilla integer // registers, so the costs of such redundant updates are assumed to be small. // // Invariants on the PSP at function call boundaries: // // It *appears* that the following invariants exist: // // * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via // both registers. // // * On entry to C++ code, PSP == SP. Certainly it appears that all calls // created by the MacroAssembler::call(..) routines perform 'syncStackPtr' // immediately before the call, and all ABI calls are routed through the // MacroAssembler::call layer. // // * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the // active stack pointer and that PSP is dead. // // * The PSP is non-volatile (callee-saved). Along a normal return path from // JIT code, simply having PSP == SP on exit is correct, since the exit SP is // the same as the entry SP by the JIT ABI. // // * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be // used), and will not need to restore the PSP on return because x28 is // non-volatile in the ARM64 ABI. // // ================ // // Future cleanups to the SP-vs-PSP machinery: // // Currently we have somewhat unclear invariants, which are not obviously // always enforced, and which may require complex non-local reasoning. // Auditing the code to ensure that the invariants always hold, whilst not // generating duplicate syncs, is close to impossible. A future rework to // tidy this might be as follows. (This suggestion pertains the the entire // JIT complex: all of the JS compilers, wasm compilers, stub generators, // regexp compilers, etc). // // Currently we have that, in JIT-generated code, PSP is "primary" and SP is // "secondary", meaning that PSP has the "real" stack pointer value and SP is // updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP. // An exception to this scheme is the stubs code generated by WasmStubs.cpp, // which assumes that SP is "primary" and PSP is dead. // // It might give us an easier incremental path to eventually removing PSP // entirely if we switched to having SP always be the primary. That is: // // (1) SP is primary, PSP is secondary // (2) After any assignment to SP, it is copied into PSP // (3) All (non-frame-pointer-based) stack accesses are PSP-relative // (as at present) // // This would have the effect that: // // * It would reinstate the invariant that on all targets, the "real" SP value // is in the ABI-and-or-hardware-mandated stack pointer register. // // * It would give us a simple story about calls and returns: // - for calls to non-JIT generated code (viz, C++ etc), we need no extra // copies, because PSP (x28) is callee-saved // - for calls to JIT-generated code, we need no extra copies, because of (2) // above // // * We could incrementally migrate those parts of the code generator where we // know that SP is 16-aligned, to use SP- rather than PSP-relative accesses // // * The consistent use of (2) would remove the requirement to have to perform // path-dependent reasoning (for paths in the generated code, not in the // compiler) when reading/understanding the code. // // * x28 would become free for use by stubs and the baseline compiler without // having to worry about interoperating with code that expects x28 to hold a // valid PSP. // // One might ask what mechanical checks we can add to ensure correctness, rather // than having to verify these invariants by hand indefinitely. Maybe some // combination of: // // * In debug builds, compiling-in assert(SP == PSP) at critical places. This // can be done using the existing `assertStackPtrsSynced` function. // // * In debug builds, scanning sections of generated code to ensure no // SP-relative stack accesses have been created -- for some sections, at // least every assignment to SP is immediately followed by a copy to x28. // This would also facilitate detection of duplicate syncs. // // ================ // // Other investigative notes, for the code base at present: // // * Some disassembly dumps suggest that we sync the stack pointer too often. // This could be the result of various pieces of code working at cross // purposes when syncing the stack pointer, or of not paying attention to the // precise invariants. // // * As documented in RegExpNativeMacroAssembler.cpp, function // SMRegExpMacroAssembler::createStackFrame: // // // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for // // addressing. The register we use for PSP may however also be used by // // calling code, and it is nonvolatile, so save it. Do this as a special // // case first because the generic save/restore code needs the PSP to be // // initialized already. // // and also in function SMRegExpMacroAssembler::exitHandler: // // // Restore the saved value of the PSP register, this value is whatever the // // caller had saved in it, not any actual SP value, and it must not be // // overwritten subsequently. // // The original source for these comments was a patch for bug 1445907. // // * MacroAssembler-arm64.h has an interesting comment in the retn() // function: // // syncStackPtr(); // SP is always used to transmit the stack between calls. // // Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp, // at callWithABIPre and callWithABIPost. // // * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find // // // SP is used to transfer stack across call boundaries. // masm.initPseudoStackPtr(); // // after the return point of a callWithVMWrapper. The only reasonable // conclusion from all those (assuming they are right) is that SP == PSP. // // * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP // and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP // before a call to wasm code. // // ================ // StackPointer is intentionally undefined on ARM64 to prevent misuse: using // sp as a base register is only valid if sp % 16 == 0. static constexpr Register RealStackPointer{Registers::sp}; static constexpr Register PseudoStackPointer{Registers::x28}; static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64}; static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32}; static constexpr Register IntArgReg0{Registers::x0}; static constexpr Register IntArgReg1{Registers::x1}; static constexpr Register IntArgReg2{Registers::x2}; static constexpr Register IntArgReg3{Registers::x3}; static constexpr Register IntArgReg4{Registers::x4}; static constexpr Register IntArgReg5{Registers::x5}; static constexpr Register IntArgReg6{Registers::x6}; static constexpr Register IntArgReg7{Registers::x7}; static constexpr Register HeapReg{Registers::x21}; // Define unsized Registers. #define DEFINE_UNSIZED_REGISTERS(N) \ static constexpr Register r##N{Registers::x##N}; REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS) #undef DEFINE_UNSIZED_REGISTERS static constexpr Register ip0{Registers::x16}; static constexpr Register ip1{Registers::x17}; static constexpr Register fp{Registers::x29}; static constexpr Register lr{Registers::x30}; static constexpr Register rzr{Registers::xzr}; // Import VIXL registers into the js::jit namespace. #define IMPORT_VIXL_REGISTERS(N) \ static constexpr ARMRegister w##N = vixl::w##N; \ static constexpr ARMRegister x##N = vixl::x##N; REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS) #undef IMPORT_VIXL_REGISTERS static constexpr ARMRegister wzr = vixl::wzr; static constexpr ARMRegister xzr = vixl::xzr; static constexpr ARMRegister wsp = vixl::wsp; static constexpr ARMRegister sp = vixl::sp; // Import VIXL VRegisters into the js::jit namespace. #define IMPORT_VIXL_VREGISTERS(N) \ static constexpr ARMFPRegister s##N = vixl::s##N; \ static constexpr ARMFPRegister d##N = vixl::d##N; REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS) #undef IMPORT_VIXL_VREGISTERS static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg); // Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use // JSReturnOperand). static constexpr Register RegExpMatcherRegExpReg = CallTempReg0; static constexpr Register RegExpMatcherStringReg = CallTempReg1; static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2; // Registers used by RegExpExecTest stub (do not use ReturnReg). static constexpr Register RegExpExecTestRegExpReg = CallTempReg0; static constexpr Register RegExpExecTestStringReg = CallTempReg1; // Registers used by RegExpSearcher stub (do not use ReturnReg). static constexpr Register RegExpSearcherRegExpReg = CallTempReg0; static constexpr Register RegExpSearcherStringReg = CallTempReg1; static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2; static constexpr Register JSReturnReg_Type = r3; static constexpr Register JSReturnReg_Data = r2; static constexpr FloatRegister NANReg = {FloatRegisters::d14, FloatRegisters::Single}; // N.B. r8 isn't listed as an aapcs temp register, but we can use it as such // because we never use return-structs. static constexpr Register CallTempNonArgRegs[] = {r8, r9, r10, r11, r12, r13, r14, r15}; static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs); static constexpr uint32_t JitStackAlignment = 16; static constexpr uint32_t JitStackValueAlignment = JitStackAlignment / sizeof(Value); static_assert(JitStackAlignment % sizeof(Value) == 0 && JitStackValueAlignment >= 1, "Stack alignment should be a non-zero multiple of sizeof(Value)"); static constexpr uint32_t SimdMemoryAlignment = 16; static_assert(CodeAlignment % SimdMemoryAlignment == 0, "Code alignment should be larger than any of the alignments " "which are used for " "the constant sections of the code buffer. Thus it should be " "larger than the " "alignment for SIMD constants."); static const uint32_t WasmStackAlignment = SimdMemoryAlignment; static const uint32_t WasmTrapInstructionLength = 4; // See comments in wasm::GenerateFunctionPrologue. The difference between these // is the size of the largest callable prologue on the platform. static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; class Assembler : public vixl::Assembler { public: Assembler() : vixl::Assembler() {} typedef vixl::Condition Condition; void finish(); bool appendRawCode(const uint8_t* code, size_t numBytes); bool reserve(size_t size); bool swapBuffer(wasm::Bytes& bytes); // Emit the jump table, returning the BufferOffset to the first entry in the // table. BufferOffset emitExtendedJumpTable(); BufferOffset ExtendedJumpTable_; void executableCopy(uint8_t* buffer); BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op, const LiteralDoc& doc, ARMBuffer::PoolEntry* pe = nullptr); BufferOffset immPool64(ARMRegister dest, uint64_t value, ARMBuffer::PoolEntry* pe = nullptr); BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value, vixl::LoadLiteralOp op, const LiteralDoc& doc); BufferOffset fImmPool64(ARMFPRegister dest, double value); BufferOffset fImmPool32(ARMFPRegister dest, float value); uint32_t currentOffset() const { return nextOffset().getOffset(); } void bind(Label* label) { bind(label, nextOffset()); } void bind(Label* label, BufferOffset boff); void bind(CodeLabel* label) { label->target()->bind(currentOffset()); } void setUnlimitedBuffer() { armbuffer_.setUnlimited(); } bool oom() const { return AssemblerShared::oom() || armbuffer_.oom() || jumpRelocations_.oom() || dataRelocations_.oom(); } void copyJumpRelocationTable(uint8_t* dest) const { if (jumpRelocations_.length()) { memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length()); } } void copyDataRelocationTable(uint8_t* dest) const { if (dataRelocations_.length()) { memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length()); } } size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); } size_t dataRelocationTableBytes() const { return dataRelocations_.length(); } size_t bytesNeeded() const { return SizeOfCodeGenerated() + jumpRelocationTableBytes() + dataRelocationTableBytes(); } void processCodeLabels(uint8_t* rawCode) { for (const CodeLabel& label : codeLabels_) { Bind(rawCode, label); } } static void UpdateLoad64Value(Instruction* inst0, uint64_t value); static void Bind(uint8_t* rawCode, const CodeLabel& label) { auto mode = label.linkMode(); size_t patchAtOffset = label.patchAt().offset(); size_t targetOffset = label.target().offset(); if (mode == CodeLabel::MoveImmediate) { Instruction* inst = (Instruction*)(rawCode + patchAtOffset); Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset)); } else { *reinterpret_cast(rawCode + patchAtOffset) = rawCode + targetOffset; } } void retarget(Label* cur, Label* next); // The buffer is about to be linked. Ensure any constant pools or // excess bookkeeping has been flushed to the instruction stream. void flush() { armbuffer_.flushPool(); } void comment(const char* msg) { #ifdef JS_DISASM_ARM64 spew_.spew("; %s", msg); #endif } void setPrinter(Sprinter* sp) { #ifdef JS_DISASM_ARM64 spew_.setPrinter(sp); #endif } static bool SupportsFloatingPoint() { return true; } static bool SupportsUnalignedAccesses() { return true; } static bool SupportsFastUnalignedFPAccesses() { return true; } static bool SupportsWasmSimd() { return true; } static bool HasRoundInstruction(RoundingMode mode) { switch (mode) { case RoundingMode::Up: case RoundingMode::Down: case RoundingMode::NearestTiesToEven: case RoundingMode::TowardsZero: return true; } MOZ_CRASH("unexpected mode"); } protected: // Add a jump whose target is unknown until finalization. // The jump may not be patched at runtime. void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind); public: static uint32_t PatchWrite_NearCallSize() { return 4; } static uint32_t NopSize() { return 4; } static void PatchWrite_NearCall(CodeLocationLabel start, CodeLocationLabel toCall); static void PatchDataWithValueCheck(CodeLocationLabel label, PatchedImmPtr newValue, PatchedImmPtr expected); static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue, ImmPtr expected); static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) { // Raw is going to be the return address. uint32_t* raw = (uint32_t*)label.raw(); // Overwrite the 4 bytes before the return address, which will end up being // the call instruction. *(raw - 1) = imm.value; } static uint32_t AlignDoubleArg(uint32_t offset) { MOZ_CRASH("AlignDoubleArg()"); } static uintptr_t GetPointer(uint8_t* ptr) { Instruction* i = reinterpret_cast(ptr); uint64_t ret = i->Literal64(); return ret; } // Toggle a jmp or cmp emitted by toggledJump(). static void ToggleToJmp(CodeLocationLabel inst_); static void ToggleToCmp(CodeLocationLabel inst_); static void ToggleCall(CodeLocationLabel inst_, bool enabled); static void TraceJumpRelocations(JSTracer* trc, JitCode* code, CompactBufferReader& reader); static void TraceDataRelocations(JSTracer* trc, JitCode* code, CompactBufferReader& reader); void assertNoGCThings() const { #ifdef DEBUG MOZ_ASSERT(dataRelocations_.length() == 0); for (auto& j : pendingJumps_) { MOZ_ASSERT(j.kind == RelocationKind::HARDCODED); } #endif } public: // A Jump table entry is 2 instructions, with 8 bytes of raw data static const size_t SizeOfJumpTableEntry = 16; struct JumpTableEntry { uint32_t ldr; uint32_t br; void* data; Instruction* getLdr() { return reinterpret_cast(&ldr); } }; // Offset of the patchable target for the given entry. static const size_t OffsetOfJumpTableEntryPointer = 8; public: void writeCodePointer(CodeLabel* label) { armbuffer_.assertNoPoolAndNoNops(); uintptr_t x = uintptr_t(-1); BufferOffset off = EmitData(&x, sizeof(uintptr_t)); label->patchAt()->bind(off.getOffset()); } void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, const Disassembler::HeapAccess& heapAccess) { MOZ_CRASH("verifyHeapAccessDisassembly"); } protected: // Structure for fixing up pc-relative loads/jumps when the machine // code gets moved (executable copy, gc, etc.). struct RelativePatch { BufferOffset offset; void* target; RelocationKind kind; RelativePatch(BufferOffset offset, void* target, RelocationKind kind) : offset(offset), target(target), kind(kind) {} }; // List of jumps for which the target is either unknown until finalization, // or cannot be known due to GC. Each entry here requires a unique entry // in the extended jump table, and is patched at finalization. js::Vector pendingJumps_; // Final output formatters. CompactBufferWriter jumpRelocations_; CompactBufferWriter dataRelocations_; }; static const uint32_t NumIntArgRegs = 8; static const uint32_t NumFloatArgRegs = 8; class ABIArgGenerator { public: ABIArgGenerator() : intRegIndex_(0), floatRegIndex_(0), stackOffset_(0), current_() {} ABIArg next(MIRType argType); ABIArg& current() { return current_; } uint32_t stackBytesConsumedSoFar() const { return stackOffset_; } void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; } protected: unsigned intRegIndex_; unsigned floatRegIndex_; uint32_t stackOffset_; ABIArg current_; }; // These registers may be volatile or nonvolatile. static constexpr Register ABINonArgReg0 = r8; static constexpr Register ABINonArgReg1 = r9; static constexpr Register ABINonArgReg2 = r10; static constexpr Register ABINonArgReg3 = r11; // This register may be volatile or nonvolatile. Avoid d31 which is the // ScratchDoubleReg_. static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16, FloatRegisters::Single}; // These registers may be volatile or nonvolatile. // Note: these three registers are all guaranteed to be different static constexpr Register ABINonArgReturnReg0 = r8; static constexpr Register ABINonArgReturnReg1 = r9; static constexpr Register ABINonVolatileReg{Registers::x19}; // This register is guaranteed to be clobberable during the prologue and // epilogue of an ABI call which must preserve both ABI argument, return // and non-volatile registers. static constexpr Register ABINonArgReturnVolatileReg = lr; // Instance pointer argument register for WebAssembly functions. This must not // alias any other register used for passing function arguments or return // values. Preserved by WebAssembly functions. Must be nonvolatile. static constexpr Register InstanceReg{Registers::x23}; // Registers used for wasm table calls. These registers must be disjoint // from the ABI argument registers, InstanceReg and each other. static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0; static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1; static constexpr Register WasmTableCallSigReg = ABINonArgReg2; static constexpr Register WasmTableCallIndexReg = ABINonArgReg3; // Registers used for ref calls. static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0; static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1; static constexpr Register WasmCallRefReg = ABINonArgReg3; // Register used as a scratch along the return path in the fast js -> wasm stub // code. This must not overlap ReturnReg, JSReturnOperand, or InstanceReg. // It must be a volatile register. static constexpr Register WasmJitEntryReturnScratch = r9; static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, Register* out) { if (usedIntArgs >= NumIntArgRegs) { return false; } *out = Register::FromCode(usedIntArgs); return true; } static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, FloatRegister* out) { if (usedFloatArgs >= NumFloatArgRegs) { return false; } *out = FloatRegister::FromCode(usedFloatArgs); return true; } // Get a register in which we plan to put a quantity that will be used as an // integer argument. This differs from GetIntArgReg in that if we have no more // actual argument registers to use we will fall back on using whatever // CallTempReg* don't overlap the argument registers, and only fail once those // run out too. static inline bool GetTempRegForIntArg(uint32_t usedIntArgs, uint32_t usedFloatArgs, Register* out) { if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) { return true; } // Unfortunately, we have to assume things about the point at which // GetIntArgReg returns false, because we need to know how many registers it // can allocate. usedIntArgs -= NumIntArgRegs; if (usedIntArgs >= NumCallTempNonArgRegs) { return false; } *out = CallTempNonArgRegs[usedIntArgs]; return true; } inline Imm32 Imm64::firstHalf() const { return low(); } inline Imm32 Imm64::secondHalf() const { return hi(); } // Forbids nop filling for testing purposes. Not nestable. class AutoForbidNops { protected: Assembler* asm_; public: explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); } ~AutoForbidNops() { asm_->leaveNoNops(); } }; // Forbids pool generation during a specified interval. Not nestable. class AutoForbidPoolsAndNops : public AutoForbidNops { public: AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst) : AutoForbidNops(asm_) { asm_->enterNoPool(maxInst); } ~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); } }; } // namespace jit } // namespace js #endif // A64_ASSEMBLER_A64_H_