diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /js/src/jit/arm64/Assembler-arm64.h | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/jit/arm64/Assembler-arm64.h')
-rw-r--r-- | js/src/jit/arm64/Assembler-arm64.h | 801 |
1 files changed, 801 insertions, 0 deletions
diff --git a/js/src/jit/arm64/Assembler-arm64.h b/js/src/jit/arm64/Assembler-arm64.h new file mode 100644 index 0000000000..d025f2b63c --- /dev/null +++ b/js/src/jit/arm64/Assembler-arm64.h @@ -0,0 +1,801 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef A64_ASSEMBLER_A64_H_ +#define A64_ASSEMBLER_A64_H_ + +#include <iterator> + +#include "jit/arm64/vixl/Assembler-vixl.h" + +#include "jit/CompactBuffer.h" +#include "jit/shared/Disassembler-shared.h" +#include "wasm/WasmTypeDecls.h" + +namespace js { +namespace jit { + +// VIXL imports. +typedef vixl::Register ARMRegister; +typedef vixl::FPRegister ARMFPRegister; +using vixl::ARMBuffer; +using vixl::Instruction; + +using LabelDoc = DisassemblerSpew::LabelDoc; +using LiteralDoc = DisassemblerSpew::LiteralDoc; + +static const uint32_t AlignmentAtPrologue = 0; +static const uint32_t AlignmentMidPrologue = 8; +static const Scale ScalePointer = TimesEight; + +// The MacroAssembler uses scratch registers extensively and unexpectedly. +// For safety, scratch registers should always be acquired using +// vixl::UseScratchRegisterScope. +static constexpr Register ScratchReg{Registers::ip0}; +static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64}; + +static constexpr Register ScratchReg2{Registers::ip1}; +static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64}; + +static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0, + FloatRegisters::Double}; +static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31, + FloatRegisters::Double}; +struct ScratchDoubleScope : public AutoFloatRegisterScope { + explicit ScratchDoubleScope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {} +}; + +static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0, + FloatRegisters::Single}; +static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31, + FloatRegisters::Single}; +struct ScratchFloat32Scope : public AutoFloatRegisterScope { + explicit ScratchFloat32Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {} +}; + +#ifdef ENABLE_WASM_SIMD +static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0, + FloatRegisters::Simd128}; +static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31, + FloatRegisters::Simd128}; +struct ScratchSimd128Scope : public AutoFloatRegisterScope { + explicit ScratchSimd128Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {} +}; +#else +struct ScratchSimd128Scope : public AutoFloatRegisterScope { + explicit ScratchSimd128Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchDoubleReg_) { + MOZ_CRASH("SIMD not enabled"); + } +}; +#endif + +static constexpr Register InvalidReg{Registers::Invalid}; +static constexpr FloatRegister InvalidFloatReg = {}; + +static constexpr Register OsrFrameReg{Registers::x3}; +static constexpr Register CallTempReg0{Registers::x9}; +static constexpr Register CallTempReg1{Registers::x10}; +static constexpr Register CallTempReg2{Registers::x11}; +static constexpr Register CallTempReg3{Registers::x12}; +static constexpr Register CallTempReg4{Registers::x13}; +static constexpr Register CallTempReg5{Registers::x14}; + +static constexpr Register PreBarrierReg{Registers::x1}; + +static constexpr Register InterpreterPCReg{Registers::x9}; + +static constexpr Register ReturnReg{Registers::x0}; +static constexpr Register64 ReturnReg64(ReturnReg); +static constexpr Register JSReturnReg{Registers::x2}; +static constexpr Register FramePointer{Registers::fp}; +static constexpr ARMRegister FramePointer64{FramePointer, 64}; +static constexpr Register ZeroRegister{Registers::sp}; +static constexpr ARMRegister ZeroRegister64{Registers::sp, 64}; +static constexpr ARMRegister ZeroRegister32{Registers::sp, 32}; + +// [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions +// +// ================ +// +// Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer: +// +// The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it +// is used as the base pointer for a memory access. (SP+offset need not be +// 16-byte aligned, but the SP value itself must be.) The SP register may +// take on unaligned values but may not be used for a memory access while it +// is unaligned. +// +// Stack-alignment checking can be enabled or disabled by a control register; +// however that register cannot be modified by user space. We have to assume +// stack alignment checking is enabled, and that does usually appear to be the +// case. See the ARM Architecture Reference Manual, "D1.8.2 SP alignment +// checking", for further details. +// +// A second constraint is forced upon us by the ARM64 ABI. This requires that +// all accesses to the stack must be at or above SP. Accesses below SP are +// strictly forbidden, presumably because the kernel might use that area of +// memory for its own purposes -- in particular, signal delivery -- and hence +// it may get trashed at any time. +// +// Note this doesn't mean that accesses to the stack must be based off +// register SP. Only that the effective addresses must be >= SP, regardless +// of how the address is formed. +// +// In order to allow word-wise pushes and pops, some of our ARM64 jits +// (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to +// be used as a PseudoStackPointer (PSP). +// +// Initially the PSP will have the same value as the SP. Code can, if it +// wants, push a single word by subtracting 8 from the PSP, doing SP := PSP, +// then storing the value at PSP+0. Given other constraints on the alignment +// of the SP at function call boundaries, this works out OK, at the cost of +// the two extra instructions per push / pop. +// +// This is all a bit messy, and is probably not robustly adhered to. However, +// the following appear to be the intended, and mostly implemented, current +// invariants: +// +// (1) PSP is "primary", SP is "secondary". Most stack refs are +// PSP-relative. SP-relative is rare and (obviously) only done when we +// know that SP is aligned. +// +// (2) At all times, the relationship SP <= PSP is maintained. The fact that +// SP may validly be less than PSP means that pushes on the stack force +// the two values to become equal, by copying PSP into SP. However, pops +// behave differently: PSP moves back up and SP stays the same, since that +// doesn't break the SP <= PSP invariant. +// +// (3) However, immediately before a call instruction, SP and PSP must be the +// same. To enforce this, PSP is copied into SP by the arm64-specific +// MacroAssembler::call routines. +// +// (4) Also, after a function has returned, it is expected that SP holds the +// "primary" value. How exactly this is implemented remains not entirely +// clear and merits further investigation. The following points are +// believed to be relevant: +// +// - For calls to functions observing the system AArch64 ABI, PSP (x28) is +// callee-saved. That, combined with (3) above, implies SP == PSP +// immediately after the call returns. +// +// - JIT-generated routines return using MacroAssemblerCompat::retn, and +// that copies PSP into SP (bizarrely; this would make more sense if it +// copied SP into PSP); but in any case, the point is that they are the +// same at the point that the return instruction executes. +// +// - MacroAssembler::callWithABIPost copies PSP into SP after the return +// of a call requiring dynamic alignment. +// +// Given the above, it is unclear exactly where in the return sequence it +// is expected that SP == PSP, and also whether it is the callee or caller +// that is expected to enforce it. +// +// In general it would be nice to be able to move (at some time in the future, +// not now) to a world where *every* assignment to PSP or SP is followed +// immediately by a copy into the other register. That would make all +// required correctness proofs trivial in the sense that it would require only +// local inspection of code immediately following (dominated by) any such +// assignment. For the moment, however, this is a guideline, not a hard +// requirement. +// +// ================ +// +// Mechanics of keeping the stack pointers in sync: +// +// The following two methods require that the masm's SP has been set to the PSP +// with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be +// no-ops. The setup is performed manually by the jits after creating the masm. +// +// * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has +// been updated, so SP needs to move too. This is used pretty liberally +// throughout the code base. +// +// * MacroAssembler::initPseudoStackPtr() performs PSP := SP. This can be used +// after calls to non-ABI compliant code; it's not used much. +// +// In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that +// recognizes the instruction emitted by syncStackPtr(), and this is used to +// skip that instruction a few places, should it be present, in the JS JIT where +// code is generated to deal with toggled calls. +// +// In various places there are calls to MacroAssembler::syncStackPtr() which +// appear to be redundant. Investigation shows that they often are redundant, +// but not always. Finding and removing such redundancies would be quite some +// work, so we live for now with the occasional redundant update. Perusal of +// the Cortex-A55 and -A72 optimization guides shows no evidence that such +// assignments are any more expensive than assignments between vanilla integer +// registers, so the costs of such redundant updates are assumed to be small. +// +// Invariants on the PSP at function call boundaries: +// +// It *appears* that the following invariants exist: +// +// * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via +// both registers. +// +// * On entry to C++ code, PSP == SP. Certainly it appears that all calls +// created by the MacroAssembler::call(..) routines perform 'syncStackPtr' +// immediately before the call, and all ABI calls are routed through the +// MacroAssembler::call layer. +// +// * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the +// active stack pointer and that PSP is dead. +// +// * The PSP is non-volatile (callee-saved). Along a normal return path from +// JIT code, simply having PSP == SP on exit is correct, since the exit SP is +// the same as the entry SP by the JIT ABI. +// +// * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be +// used), and will not need to restore the PSP on return because x28 is +// non-volatile in the ARM64 ABI. +// +// ================ +// +// Future cleanups to the SP-vs-PSP machinery: +// +// Currently we have somewhat unclear invariants, which are not obviously +// always enforced, and which may require complex non-local reasoning. +// Auditing the code to ensure that the invariants always hold, whilst not +// generating duplicate syncs, is close to impossible. A future rework to +// tidy this might be as follows. (This suggestion pertains the the entire +// JIT complex: all of the JS compilers, wasm compilers, stub generators, +// regexp compilers, etc). +// +// Currently we have that, in JIT-generated code, PSP is "primary" and SP is +// "secondary", meaning that PSP has the "real" stack pointer value and SP is +// updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP. +// An exception to this scheme is the stubs code generated by WasmStubs.cpp, +// which assumes that SP is "primary" and PSP is dead. +// +// It might give us an easier incremental path to eventually removing PSP +// entirely if we switched to having SP always be the primary. That is: +// +// (1) SP is primary, PSP is secondary +// (2) After any assignment to SP, it is copied into PSP +// (3) All (non-frame-pointer-based) stack accesses are PSP-relative +// (as at present) +// +// This would have the effect that: +// +// * It would reinstate the invariant that on all targets, the "real" SP value +// is in the ABI-and-or-hardware-mandated stack pointer register. +// +// * It would give us a simple story about calls and returns: +// - for calls to non-JIT generated code (viz, C++ etc), we need no extra +// copies, because PSP (x28) is callee-saved +// - for calls to JIT-generated code, we need no extra copies, because of (2) +// above +// +// * We could incrementally migrate those parts of the code generator where we +// know that SP is 16-aligned, to use SP- rather than PSP-relative accesses +// +// * The consistent use of (2) would remove the requirement to have to perform +// path-dependent reasoning (for paths in the generated code, not in the +// compiler) when reading/understanding the code. +// +// * x28 would become free for use by stubs and the baseline compiler without +// having to worry about interoperating with code that expects x28 to hold a +// valid PSP. +// +// One might ask what mechanical checks we can add to ensure correctness, rather +// than having to verify these invariants by hand indefinitely. Maybe some +// combination of: +// +// * In debug builds, compiling-in assert(SP == PSP) at critical places. This +// can be done using the existing `assertStackPtrsSynced` function. +// +// * In debug builds, scanning sections of generated code to ensure no +// SP-relative stack accesses have been created -- for some sections, at +// least every assignment to SP is immediately followed by a copy to x28. +// This would also facilitate detection of duplicate syncs. +// +// ================ +// +// Other investigative notes, for the code base at present: +// +// * Some disassembly dumps suggest that we sync the stack pointer too often. +// This could be the result of various pieces of code working at cross +// purposes when syncing the stack pointer, or of not paying attention to the +// precise invariants. +// +// * As documented in RegExpNativeMacroAssembler.cpp, function +// SMRegExpMacroAssembler::createStackFrame: +// +// // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for +// // addressing. The register we use for PSP may however also be used by +// // calling code, and it is nonvolatile, so save it. Do this as a special +// // case first because the generic save/restore code needs the PSP to be +// // initialized already. +// +// and also in function SMRegExpMacroAssembler::exitHandler: +// +// // Restore the saved value of the PSP register, this value is whatever the +// // caller had saved in it, not any actual SP value, and it must not be +// // overwritten subsequently. +// +// The original source for these comments was a patch for bug 1445907. +// +// * MacroAssembler-arm64.h has an interesting comment in the retn() +// function: +// +// syncStackPtr(); // SP is always used to transmit the stack between calls. +// +// Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp, +// at callWithABIPre and callWithABIPost. +// +// * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find +// +// // SP is used to transfer stack across call boundaries. +// masm.initPseudoStackPtr(); +// +// after the return point of a callWithVMWrapper. The only reasonable +// conclusion from all those (assuming they are right) is that SP == PSP. +// +// * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP +// and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP +// before a call to wasm code. +// +// ================ + +// StackPointer is intentionally undefined on ARM64 to prevent misuse: using +// sp as a base register is only valid if sp % 16 == 0. +static constexpr Register RealStackPointer{Registers::sp}; + +static constexpr Register PseudoStackPointer{Registers::x28}; +static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64}; +static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32}; + +static constexpr Register IntArgReg0{Registers::x0}; +static constexpr Register IntArgReg1{Registers::x1}; +static constexpr Register IntArgReg2{Registers::x2}; +static constexpr Register IntArgReg3{Registers::x3}; +static constexpr Register IntArgReg4{Registers::x4}; +static constexpr Register IntArgReg5{Registers::x5}; +static constexpr Register IntArgReg6{Registers::x6}; +static constexpr Register IntArgReg7{Registers::x7}; +static constexpr Register HeapReg{Registers::x21}; + +// Define unsized Registers. +#define DEFINE_UNSIZED_REGISTERS(N) \ + static constexpr Register r##N{Registers::x##N}; +REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS) +#undef DEFINE_UNSIZED_REGISTERS +static constexpr Register ip0{Registers::x16}; +static constexpr Register ip1{Registers::x17}; +static constexpr Register fp{Registers::x29}; +static constexpr Register lr{Registers::x30}; +static constexpr Register rzr{Registers::xzr}; + +// Import VIXL registers into the js::jit namespace. +#define IMPORT_VIXL_REGISTERS(N) \ + static constexpr ARMRegister w##N = vixl::w##N; \ + static constexpr ARMRegister x##N = vixl::x##N; +REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS) +#undef IMPORT_VIXL_REGISTERS +static constexpr ARMRegister wzr = vixl::wzr; +static constexpr ARMRegister xzr = vixl::xzr; +static constexpr ARMRegister wsp = vixl::wsp; +static constexpr ARMRegister sp = vixl::sp; + +// Import VIXL VRegisters into the js::jit namespace. +#define IMPORT_VIXL_VREGISTERS(N) \ + static constexpr ARMFPRegister s##N = vixl::s##N; \ + static constexpr ARMFPRegister d##N = vixl::d##N; +REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS) +#undef IMPORT_VIXL_VREGISTERS + +static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg); + +// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use +// JSReturnOperand). +static constexpr Register RegExpMatcherRegExpReg = CallTempReg0; +static constexpr Register RegExpMatcherStringReg = CallTempReg1; +static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2; + +// Registers used by RegExpExecTest stub (do not use ReturnReg). +static constexpr Register RegExpExecTestRegExpReg = CallTempReg0; +static constexpr Register RegExpExecTestStringReg = CallTempReg1; + +// Registers used by RegExpSearcher stub (do not use ReturnReg). +static constexpr Register RegExpSearcherRegExpReg = CallTempReg0; +static constexpr Register RegExpSearcherStringReg = CallTempReg1; +static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2; + +static constexpr Register JSReturnReg_Type = r3; +static constexpr Register JSReturnReg_Data = r2; + +static constexpr FloatRegister NANReg = {FloatRegisters::d14, + FloatRegisters::Single}; +// N.B. r8 isn't listed as an aapcs temp register, but we can use it as such +// because we never use return-structs. +static constexpr Register CallTempNonArgRegs[] = {r8, r9, r10, r11, + r12, r13, r14, r15}; +static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs); + +static constexpr uint32_t JitStackAlignment = 16; + +static constexpr uint32_t JitStackValueAlignment = + JitStackAlignment / sizeof(Value); +static_assert(JitStackAlignment % sizeof(Value) == 0 && + JitStackValueAlignment >= 1, + "Stack alignment should be a non-zero multiple of sizeof(Value)"); + +static constexpr uint32_t SimdMemoryAlignment = 16; + +static_assert(CodeAlignment % SimdMemoryAlignment == 0, + "Code alignment should be larger than any of the alignments " + "which are used for " + "the constant sections of the code buffer. Thus it should be " + "larger than the " + "alignment for SIMD constants."); + +static const uint32_t WasmStackAlignment = SimdMemoryAlignment; +static const uint32_t WasmTrapInstructionLength = 4; + +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. +static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; + +class Assembler : public vixl::Assembler { + public: + Assembler() : vixl::Assembler() {} + + typedef vixl::Condition Condition; + + void finish(); + bool appendRawCode(const uint8_t* code, size_t numBytes); + bool reserve(size_t size); + bool swapBuffer(wasm::Bytes& bytes); + + // Emit the jump table, returning the BufferOffset to the first entry in the + // table. + BufferOffset emitExtendedJumpTable(); + BufferOffset ExtendedJumpTable_; + void executableCopy(uint8_t* buffer); + + BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op, + const LiteralDoc& doc, + ARMBuffer::PoolEntry* pe = nullptr); + BufferOffset immPool64(ARMRegister dest, uint64_t value, + ARMBuffer::PoolEntry* pe = nullptr); + BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value, + vixl::LoadLiteralOp op, const LiteralDoc& doc); + BufferOffset fImmPool64(ARMFPRegister dest, double value); + BufferOffset fImmPool32(ARMFPRegister dest, float value); + + uint32_t currentOffset() const { return nextOffset().getOffset(); } + + void bind(Label* label) { bind(label, nextOffset()); } + void bind(Label* label, BufferOffset boff); + void bind(CodeLabel* label) { label->target()->bind(currentOffset()); } + + void setUnlimitedBuffer() { armbuffer_.setUnlimited(); } + bool oom() const { + return AssemblerShared::oom() || armbuffer_.oom() || + jumpRelocations_.oom() || dataRelocations_.oom(); + } + + void copyJumpRelocationTable(uint8_t* dest) const { + if (jumpRelocations_.length()) { + memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length()); + } + } + void copyDataRelocationTable(uint8_t* dest) const { + if (dataRelocations_.length()) { + memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length()); + } + } + + size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); } + size_t dataRelocationTableBytes() const { return dataRelocations_.length(); } + size_t bytesNeeded() const { + return SizeOfCodeGenerated() + jumpRelocationTableBytes() + + dataRelocationTableBytes(); + } + + void processCodeLabels(uint8_t* rawCode) { + for (const CodeLabel& label : codeLabels_) { + Bind(rawCode, label); + } + } + + static void UpdateLoad64Value(Instruction* inst0, uint64_t value); + + static void Bind(uint8_t* rawCode, const CodeLabel& label) { + auto mode = label.linkMode(); + size_t patchAtOffset = label.patchAt().offset(); + size_t targetOffset = label.target().offset(); + + if (mode == CodeLabel::MoveImmediate) { + Instruction* inst = (Instruction*)(rawCode + patchAtOffset); + Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset)); + } else { + *reinterpret_cast<const void**>(rawCode + patchAtOffset) = + rawCode + targetOffset; + } + } + + void retarget(Label* cur, Label* next); + + // The buffer is about to be linked. Ensure any constant pools or + // excess bookkeeping has been flushed to the instruction stream. + void flush() { armbuffer_.flushPool(); } + + void comment(const char* msg) { +#ifdef JS_DISASM_ARM64 + spew_.spew("; %s", msg); +#endif + } + + void setPrinter(Sprinter* sp) { +#ifdef JS_DISASM_ARM64 + spew_.setPrinter(sp); +#endif + } + + static bool SupportsFloatingPoint() { return true; } + static bool SupportsUnalignedAccesses() { return true; } + static bool SupportsFastUnalignedFPAccesses() { return true; } + static bool SupportsWasmSimd() { return true; } + + static bool HasRoundInstruction(RoundingMode mode) { + switch (mode) { + case RoundingMode::Up: + case RoundingMode::Down: + case RoundingMode::NearestTiesToEven: + case RoundingMode::TowardsZero: + return true; + } + MOZ_CRASH("unexpected mode"); + } + + protected: + // Add a jump whose target is unknown until finalization. + // The jump may not be patched at runtime. + void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind); + + public: + static uint32_t PatchWrite_NearCallSize() { return 4; } + + static uint32_t NopSize() { return 4; } + + static void PatchWrite_NearCall(CodeLocationLabel start, + CodeLocationLabel toCall); + static void PatchDataWithValueCheck(CodeLocationLabel label, + PatchedImmPtr newValue, + PatchedImmPtr expected); + + static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue, + ImmPtr expected); + + static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) { + // Raw is going to be the return address. + uint32_t* raw = (uint32_t*)label.raw(); + // Overwrite the 4 bytes before the return address, which will end up being + // the call instruction. + *(raw - 1) = imm.value; + } + static uint32_t AlignDoubleArg(uint32_t offset) { + MOZ_CRASH("AlignDoubleArg()"); + } + static uintptr_t GetPointer(uint8_t* ptr) { + Instruction* i = reinterpret_cast<Instruction*>(ptr); + uint64_t ret = i->Literal64(); + return ret; + } + + // Toggle a jmp or cmp emitted by toggledJump(). + static void ToggleToJmp(CodeLocationLabel inst_); + static void ToggleToCmp(CodeLocationLabel inst_); + static void ToggleCall(CodeLocationLabel inst_, bool enabled); + + static void TraceJumpRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader); + static void TraceDataRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader); + + void assertNoGCThings() const { +#ifdef DEBUG + MOZ_ASSERT(dataRelocations_.length() == 0); + for (auto& j : pendingJumps_) { + MOZ_ASSERT(j.kind == RelocationKind::HARDCODED); + } +#endif + } + + public: + // A Jump table entry is 2 instructions, with 8 bytes of raw data + static const size_t SizeOfJumpTableEntry = 16; + + struct JumpTableEntry { + uint32_t ldr; + uint32_t br; + void* data; + + Instruction* getLdr() { return reinterpret_cast<Instruction*>(&ldr); } + }; + + // Offset of the patchable target for the given entry. + static const size_t OffsetOfJumpTableEntryPointer = 8; + + public: + void writeCodePointer(CodeLabel* label) { + armbuffer_.assertNoPoolAndNoNops(); + uintptr_t x = uintptr_t(-1); + BufferOffset off = EmitData(&x, sizeof(uintptr_t)); + label->patchAt()->bind(off.getOffset()); + } + + void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, + const Disassembler::HeapAccess& heapAccess) { + MOZ_CRASH("verifyHeapAccessDisassembly"); + } + + protected: + // Structure for fixing up pc-relative loads/jumps when the machine + // code gets moved (executable copy, gc, etc.). + struct RelativePatch { + BufferOffset offset; + void* target; + RelocationKind kind; + + RelativePatch(BufferOffset offset, void* target, RelocationKind kind) + : offset(offset), target(target), kind(kind) {} + }; + + // List of jumps for which the target is either unknown until finalization, + // or cannot be known due to GC. Each entry here requires a unique entry + // in the extended jump table, and is patched at finalization. + js::Vector<RelativePatch, 8, SystemAllocPolicy> pendingJumps_; + + // Final output formatters. + CompactBufferWriter jumpRelocations_; + CompactBufferWriter dataRelocations_; +}; + +static const uint32_t NumIntArgRegs = 8; +static const uint32_t NumFloatArgRegs = 8; + +class ABIArgGenerator { + public: + ABIArgGenerator() + : intRegIndex_(0), floatRegIndex_(0), stackOffset_(0), current_() {} + + ABIArg next(MIRType argType); + ABIArg& current() { return current_; } + uint32_t stackBytesConsumedSoFar() const { return stackOffset_; } + void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; } + + protected: + unsigned intRegIndex_; + unsigned floatRegIndex_; + uint32_t stackOffset_; + ABIArg current_; +}; + +// These registers may be volatile or nonvolatile. +static constexpr Register ABINonArgReg0 = r8; +static constexpr Register ABINonArgReg1 = r9; +static constexpr Register ABINonArgReg2 = r10; +static constexpr Register ABINonArgReg3 = r11; + +// This register may be volatile or nonvolatile. Avoid d31 which is the +// ScratchDoubleReg_. +static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16, + FloatRegisters::Single}; + +// These registers may be volatile or nonvolatile. +// Note: these three registers are all guaranteed to be different +static constexpr Register ABINonArgReturnReg0 = r8; +static constexpr Register ABINonArgReturnReg1 = r9; +static constexpr Register ABINonVolatileReg{Registers::x19}; + +// This register is guaranteed to be clobberable during the prologue and +// epilogue of an ABI call which must preserve both ABI argument, return +// and non-volatile registers. +static constexpr Register ABINonArgReturnVolatileReg = lr; + +// Instance pointer argument register for WebAssembly functions. This must not +// alias any other register used for passing function arguments or return +// values. Preserved by WebAssembly functions. Must be nonvolatile. +static constexpr Register InstanceReg{Registers::x23}; + +// Registers used for wasm table calls. These registers must be disjoint +// from the ABI argument registers, InstanceReg and each other. +static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0; +static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1; +static constexpr Register WasmTableCallSigReg = ABINonArgReg2; +static constexpr Register WasmTableCallIndexReg = ABINonArgReg3; + +// Registers used for ref calls. +static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0; +static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1; +static constexpr Register WasmCallRefReg = ABINonArgReg3; + +// Registers used for wasm tail calls operations. +static constexpr Register WasmTailCallInstanceScratchReg = ABINonArgReg1; +static constexpr Register WasmTailCallRAScratchReg = lr; +static constexpr Register WasmTailCallFPScratchReg = ABINonArgReg3; + +// Register used as a scratch along the return path in the fast js -> wasm stub +// code. This must not overlap ReturnReg, JSReturnOperand, or InstanceReg. +// It must be a volatile register. +static constexpr Register WasmJitEntryReturnScratch = r9; + +static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, + Register* out) { + if (usedIntArgs >= NumIntArgRegs) { + return false; + } + *out = Register::FromCode(usedIntArgs); + return true; +} + +static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, + FloatRegister* out) { + if (usedFloatArgs >= NumFloatArgRegs) { + return false; + } + *out = FloatRegister::FromCode(usedFloatArgs); + return true; +} + +// Get a register in which we plan to put a quantity that will be used as an +// integer argument. This differs from GetIntArgReg in that if we have no more +// actual argument registers to use we will fall back on using whatever +// CallTempReg* don't overlap the argument registers, and only fail once those +// run out too. +static inline bool GetTempRegForIntArg(uint32_t usedIntArgs, + uint32_t usedFloatArgs, Register* out) { + if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) { + return true; + } + // Unfortunately, we have to assume things about the point at which + // GetIntArgReg returns false, because we need to know how many registers it + // can allocate. + usedIntArgs -= NumIntArgRegs; + if (usedIntArgs >= NumCallTempNonArgRegs) { + return false; + } + *out = CallTempNonArgRegs[usedIntArgs]; + return true; +} + +inline Imm32 Imm64::firstHalf() const { return low(); } + +inline Imm32 Imm64::secondHalf() const { return hi(); } + +// Forbids nop filling for testing purposes. Nestable, but nested calls have +// no effect on the no-nops status; it is only the top level one that counts. +class AutoForbidNops { + protected: + Assembler* asm_; + + public: + explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); } + ~AutoForbidNops() { asm_->leaveNoNops(); } +}; + +// Forbids pool generation during a specified interval. Nestable, but nested +// calls must imply a no-pool area of the assembler buffer that is completely +// contained within the area implied by the outermost level call. +class AutoForbidPoolsAndNops : public AutoForbidNops { + public: + AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst) + : AutoForbidNops(asm_) { + asm_->enterNoPool(maxInst); + } + ~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); } +}; + +} // namespace jit +} // namespace js + +#endif // A64_ASSEMBLER_A64_H_ |