summaryrefslogtreecommitdiffstats
path: root/js/src/jit/arm64/Assembler-arm64.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /js/src/jit/arm64/Assembler-arm64.h
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/jit/arm64/Assembler-arm64.h')
-rw-r--r--js/src/jit/arm64/Assembler-arm64.h801
1 files changed, 801 insertions, 0 deletions
diff --git a/js/src/jit/arm64/Assembler-arm64.h b/js/src/jit/arm64/Assembler-arm64.h
new file mode 100644
index 0000000000..d025f2b63c
--- /dev/null
+++ b/js/src/jit/arm64/Assembler-arm64.h
@@ -0,0 +1,801 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef A64_ASSEMBLER_A64_H_
+#define A64_ASSEMBLER_A64_H_
+
+#include <iterator>
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+
+#include "jit/CompactBuffer.h"
+#include "jit/shared/Disassembler-shared.h"
+#include "wasm/WasmTypeDecls.h"
+
+namespace js {
+namespace jit {
+
+// VIXL imports.
+typedef vixl::Register ARMRegister;
+typedef vixl::FPRegister ARMFPRegister;
+using vixl::ARMBuffer;
+using vixl::Instruction;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+
+static const uint32_t AlignmentAtPrologue = 0;
+static const uint32_t AlignmentMidPrologue = 8;
+static const Scale ScalePointer = TimesEight;
+
+// The MacroAssembler uses scratch registers extensively and unexpectedly.
+// For safety, scratch registers should always be acquired using
+// vixl::UseScratchRegisterScope.
+static constexpr Register ScratchReg{Registers::ip0};
+static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64};
+
+static constexpr Register ScratchReg2{Registers::ip1};
+static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64};
+
+static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0,
+ FloatRegisters::Double};
+static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31,
+ FloatRegisters::Double};
+struct ScratchDoubleScope : public AutoFloatRegisterScope {
+ explicit ScratchDoubleScope(MacroAssembler& masm)
+ : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {}
+};
+
+static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0,
+ FloatRegisters::Single};
+static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31,
+ FloatRegisters::Single};
+struct ScratchFloat32Scope : public AutoFloatRegisterScope {
+ explicit ScratchFloat32Scope(MacroAssembler& masm)
+ : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {}
+};
+
+#ifdef ENABLE_WASM_SIMD
+static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0,
+ FloatRegisters::Simd128};
+static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31,
+ FloatRegisters::Simd128};
+struct ScratchSimd128Scope : public AutoFloatRegisterScope {
+ explicit ScratchSimd128Scope(MacroAssembler& masm)
+ : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {}
+};
+#else
+struct ScratchSimd128Scope : public AutoFloatRegisterScope {
+ explicit ScratchSimd128Scope(MacroAssembler& masm)
+ : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {
+ MOZ_CRASH("SIMD not enabled");
+ }
+};
+#endif
+
+static constexpr Register InvalidReg{Registers::Invalid};
+static constexpr FloatRegister InvalidFloatReg = {};
+
+static constexpr Register OsrFrameReg{Registers::x3};
+static constexpr Register CallTempReg0{Registers::x9};
+static constexpr Register CallTempReg1{Registers::x10};
+static constexpr Register CallTempReg2{Registers::x11};
+static constexpr Register CallTempReg3{Registers::x12};
+static constexpr Register CallTempReg4{Registers::x13};
+static constexpr Register CallTempReg5{Registers::x14};
+
+static constexpr Register PreBarrierReg{Registers::x1};
+
+static constexpr Register InterpreterPCReg{Registers::x9};
+
+static constexpr Register ReturnReg{Registers::x0};
+static constexpr Register64 ReturnReg64(ReturnReg);
+static constexpr Register JSReturnReg{Registers::x2};
+static constexpr Register FramePointer{Registers::fp};
+static constexpr ARMRegister FramePointer64{FramePointer, 64};
+static constexpr Register ZeroRegister{Registers::sp};
+static constexpr ARMRegister ZeroRegister64{Registers::sp, 64};
+static constexpr ARMRegister ZeroRegister32{Registers::sp, 32};
+
+// [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions
+//
+// ================
+//
+// Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer:
+//
+// The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it
+// is used as the base pointer for a memory access. (SP+offset need not be
+// 16-byte aligned, but the SP value itself must be.) The SP register may
+// take on unaligned values but may not be used for a memory access while it
+// is unaligned.
+//
+// Stack-alignment checking can be enabled or disabled by a control register;
+// however that register cannot be modified by user space. We have to assume
+// stack alignment checking is enabled, and that does usually appear to be the
+// case. See the ARM Architecture Reference Manual, "D1.8.2 SP alignment
+// checking", for further details.
+//
+// A second constraint is forced upon us by the ARM64 ABI. This requires that
+// all accesses to the stack must be at or above SP. Accesses below SP are
+// strictly forbidden, presumably because the kernel might use that area of
+// memory for its own purposes -- in particular, signal delivery -- and hence
+// it may get trashed at any time.
+//
+// Note this doesn't mean that accesses to the stack must be based off
+// register SP. Only that the effective addresses must be >= SP, regardless
+// of how the address is formed.
+//
+// In order to allow word-wise pushes and pops, some of our ARM64 jits
+// (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to
+// be used as a PseudoStackPointer (PSP).
+//
+// Initially the PSP will have the same value as the SP. Code can, if it
+// wants, push a single word by subtracting 8 from the PSP, doing SP := PSP,
+// then storing the value at PSP+0. Given other constraints on the alignment
+// of the SP at function call boundaries, this works out OK, at the cost of
+// the two extra instructions per push / pop.
+//
+// This is all a bit messy, and is probably not robustly adhered to. However,
+// the following appear to be the intended, and mostly implemented, current
+// invariants:
+//
+// (1) PSP is "primary", SP is "secondary". Most stack refs are
+// PSP-relative. SP-relative is rare and (obviously) only done when we
+// know that SP is aligned.
+//
+// (2) At all times, the relationship SP <= PSP is maintained. The fact that
+// SP may validly be less than PSP means that pushes on the stack force
+// the two values to become equal, by copying PSP into SP. However, pops
+// behave differently: PSP moves back up and SP stays the same, since that
+// doesn't break the SP <= PSP invariant.
+//
+// (3) However, immediately before a call instruction, SP and PSP must be the
+// same. To enforce this, PSP is copied into SP by the arm64-specific
+// MacroAssembler::call routines.
+//
+// (4) Also, after a function has returned, it is expected that SP holds the
+// "primary" value. How exactly this is implemented remains not entirely
+// clear and merits further investigation. The following points are
+// believed to be relevant:
+//
+// - For calls to functions observing the system AArch64 ABI, PSP (x28) is
+// callee-saved. That, combined with (3) above, implies SP == PSP
+// immediately after the call returns.
+//
+// - JIT-generated routines return using MacroAssemblerCompat::retn, and
+// that copies PSP into SP (bizarrely; this would make more sense if it
+// copied SP into PSP); but in any case, the point is that they are the
+// same at the point that the return instruction executes.
+//
+// - MacroAssembler::callWithABIPost copies PSP into SP after the return
+// of a call requiring dynamic alignment.
+//
+// Given the above, it is unclear exactly where in the return sequence it
+// is expected that SP == PSP, and also whether it is the callee or caller
+// that is expected to enforce it.
+//
+// In general it would be nice to be able to move (at some time in the future,
+// not now) to a world where *every* assignment to PSP or SP is followed
+// immediately by a copy into the other register. That would make all
+// required correctness proofs trivial in the sense that it would require only
+// local inspection of code immediately following (dominated by) any such
+// assignment. For the moment, however, this is a guideline, not a hard
+// requirement.
+//
+// ================
+//
+// Mechanics of keeping the stack pointers in sync:
+//
+// The following two methods require that the masm's SP has been set to the PSP
+// with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be
+// no-ops. The setup is performed manually by the jits after creating the masm.
+//
+// * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has
+// been updated, so SP needs to move too. This is used pretty liberally
+// throughout the code base.
+//
+// * MacroAssembler::initPseudoStackPtr() performs PSP := SP. This can be used
+// after calls to non-ABI compliant code; it's not used much.
+//
+// In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that
+// recognizes the instruction emitted by syncStackPtr(), and this is used to
+// skip that instruction a few places, should it be present, in the JS JIT where
+// code is generated to deal with toggled calls.
+//
+// In various places there are calls to MacroAssembler::syncStackPtr() which
+// appear to be redundant. Investigation shows that they often are redundant,
+// but not always. Finding and removing such redundancies would be quite some
+// work, so we live for now with the occasional redundant update. Perusal of
+// the Cortex-A55 and -A72 optimization guides shows no evidence that such
+// assignments are any more expensive than assignments between vanilla integer
+// registers, so the costs of such redundant updates are assumed to be small.
+//
+// Invariants on the PSP at function call boundaries:
+//
+// It *appears* that the following invariants exist:
+//
+// * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via
+// both registers.
+//
+// * On entry to C++ code, PSP == SP. Certainly it appears that all calls
+// created by the MacroAssembler::call(..) routines perform 'syncStackPtr'
+// immediately before the call, and all ABI calls are routed through the
+// MacroAssembler::call layer.
+//
+// * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the
+// active stack pointer and that PSP is dead.
+//
+// * The PSP is non-volatile (callee-saved). Along a normal return path from
+// JIT code, simply having PSP == SP on exit is correct, since the exit SP is
+// the same as the entry SP by the JIT ABI.
+//
+// * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be
+// used), and will not need to restore the PSP on return because x28 is
+// non-volatile in the ARM64 ABI.
+//
+// ================
+//
+// Future cleanups to the SP-vs-PSP machinery:
+//
+// Currently we have somewhat unclear invariants, which are not obviously
+// always enforced, and which may require complex non-local reasoning.
+// Auditing the code to ensure that the invariants always hold, whilst not
+// generating duplicate syncs, is close to impossible. A future rework to
+// tidy this might be as follows. (This suggestion pertains the the entire
+// JIT complex: all of the JS compilers, wasm compilers, stub generators,
+// regexp compilers, etc).
+//
+// Currently we have that, in JIT-generated code, PSP is "primary" and SP is
+// "secondary", meaning that PSP has the "real" stack pointer value and SP is
+// updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP.
+// An exception to this scheme is the stubs code generated by WasmStubs.cpp,
+// which assumes that SP is "primary" and PSP is dead.
+//
+// It might give us an easier incremental path to eventually removing PSP
+// entirely if we switched to having SP always be the primary. That is:
+//
+// (1) SP is primary, PSP is secondary
+// (2) After any assignment to SP, it is copied into PSP
+// (3) All (non-frame-pointer-based) stack accesses are PSP-relative
+// (as at present)
+//
+// This would have the effect that:
+//
+// * It would reinstate the invariant that on all targets, the "real" SP value
+// is in the ABI-and-or-hardware-mandated stack pointer register.
+//
+// * It would give us a simple story about calls and returns:
+// - for calls to non-JIT generated code (viz, C++ etc), we need no extra
+// copies, because PSP (x28) is callee-saved
+// - for calls to JIT-generated code, we need no extra copies, because of (2)
+// above
+//
+// * We could incrementally migrate those parts of the code generator where we
+// know that SP is 16-aligned, to use SP- rather than PSP-relative accesses
+//
+// * The consistent use of (2) would remove the requirement to have to perform
+// path-dependent reasoning (for paths in the generated code, not in the
+// compiler) when reading/understanding the code.
+//
+// * x28 would become free for use by stubs and the baseline compiler without
+// having to worry about interoperating with code that expects x28 to hold a
+// valid PSP.
+//
+// One might ask what mechanical checks we can add to ensure correctness, rather
+// than having to verify these invariants by hand indefinitely. Maybe some
+// combination of:
+//
+// * In debug builds, compiling-in assert(SP == PSP) at critical places. This
+// can be done using the existing `assertStackPtrsSynced` function.
+//
+// * In debug builds, scanning sections of generated code to ensure no
+// SP-relative stack accesses have been created -- for some sections, at
+// least every assignment to SP is immediately followed by a copy to x28.
+// This would also facilitate detection of duplicate syncs.
+//
+// ================
+//
+// Other investigative notes, for the code base at present:
+//
+// * Some disassembly dumps suggest that we sync the stack pointer too often.
+// This could be the result of various pieces of code working at cross
+// purposes when syncing the stack pointer, or of not paying attention to the
+// precise invariants.
+//
+// * As documented in RegExpNativeMacroAssembler.cpp, function
+// SMRegExpMacroAssembler::createStackFrame:
+//
+// // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
+// // addressing. The register we use for PSP may however also be used by
+// // calling code, and it is nonvolatile, so save it. Do this as a special
+// // case first because the generic save/restore code needs the PSP to be
+// // initialized already.
+//
+// and also in function SMRegExpMacroAssembler::exitHandler:
+//
+// // Restore the saved value of the PSP register, this value is whatever the
+// // caller had saved in it, not any actual SP value, and it must not be
+// // overwritten subsequently.
+//
+// The original source for these comments was a patch for bug 1445907.
+//
+// * MacroAssembler-arm64.h has an interesting comment in the retn()
+// function:
+//
+// syncStackPtr(); // SP is always used to transmit the stack between calls.
+//
+// Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp,
+// at callWithABIPre and callWithABIPost.
+//
+// * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find
+//
+// // SP is used to transfer stack across call boundaries.
+// masm.initPseudoStackPtr();
+//
+// after the return point of a callWithVMWrapper. The only reasonable
+// conclusion from all those (assuming they are right) is that SP == PSP.
+//
+// * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP
+// and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP
+// before a call to wasm code.
+//
+// ================
+
+// StackPointer is intentionally undefined on ARM64 to prevent misuse: using
+// sp as a base register is only valid if sp % 16 == 0.
+static constexpr Register RealStackPointer{Registers::sp};
+
+static constexpr Register PseudoStackPointer{Registers::x28};
+static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64};
+static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32};
+
+static constexpr Register IntArgReg0{Registers::x0};
+static constexpr Register IntArgReg1{Registers::x1};
+static constexpr Register IntArgReg2{Registers::x2};
+static constexpr Register IntArgReg3{Registers::x3};
+static constexpr Register IntArgReg4{Registers::x4};
+static constexpr Register IntArgReg5{Registers::x5};
+static constexpr Register IntArgReg6{Registers::x6};
+static constexpr Register IntArgReg7{Registers::x7};
+static constexpr Register HeapReg{Registers::x21};
+
+// Define unsized Registers.
+#define DEFINE_UNSIZED_REGISTERS(N) \
+ static constexpr Register r##N{Registers::x##N};
+REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS)
+#undef DEFINE_UNSIZED_REGISTERS
+static constexpr Register ip0{Registers::x16};
+static constexpr Register ip1{Registers::x17};
+static constexpr Register fp{Registers::x29};
+static constexpr Register lr{Registers::x30};
+static constexpr Register rzr{Registers::xzr};
+
+// Import VIXL registers into the js::jit namespace.
+#define IMPORT_VIXL_REGISTERS(N) \
+ static constexpr ARMRegister w##N = vixl::w##N; \
+ static constexpr ARMRegister x##N = vixl::x##N;
+REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS)
+#undef IMPORT_VIXL_REGISTERS
+static constexpr ARMRegister wzr = vixl::wzr;
+static constexpr ARMRegister xzr = vixl::xzr;
+static constexpr ARMRegister wsp = vixl::wsp;
+static constexpr ARMRegister sp = vixl::sp;
+
+// Import VIXL VRegisters into the js::jit namespace.
+#define IMPORT_VIXL_VREGISTERS(N) \
+ static constexpr ARMFPRegister s##N = vixl::s##N; \
+ static constexpr ARMFPRegister d##N = vixl::d##N;
+REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS)
+#undef IMPORT_VIXL_VREGISTERS
+
+static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg);
+
+// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use
+// JSReturnOperand).
+static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpMatcherStringReg = CallTempReg1;
+static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2;
+
+// Registers used by RegExpExecTest stub (do not use ReturnReg).
+static constexpr Register RegExpExecTestRegExpReg = CallTempReg0;
+static constexpr Register RegExpExecTestStringReg = CallTempReg1;
+
+// Registers used by RegExpSearcher stub (do not use ReturnReg).
+static constexpr Register RegExpSearcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpSearcherStringReg = CallTempReg1;
+static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2;
+
+static constexpr Register JSReturnReg_Type = r3;
+static constexpr Register JSReturnReg_Data = r2;
+
+static constexpr FloatRegister NANReg = {FloatRegisters::d14,
+ FloatRegisters::Single};
+// N.B. r8 isn't listed as an aapcs temp register, but we can use it as such
+// because we never use return-structs.
+static constexpr Register CallTempNonArgRegs[] = {r8, r9, r10, r11,
+ r12, r13, r14, r15};
+static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs);
+
+static constexpr uint32_t JitStackAlignment = 16;
+
+static constexpr uint32_t JitStackValueAlignment =
+ JitStackAlignment / sizeof(Value);
+static_assert(JitStackAlignment % sizeof(Value) == 0 &&
+ JitStackValueAlignment >= 1,
+ "Stack alignment should be a non-zero multiple of sizeof(Value)");
+
+static constexpr uint32_t SimdMemoryAlignment = 16;
+
+static_assert(CodeAlignment % SimdMemoryAlignment == 0,
+ "Code alignment should be larger than any of the alignments "
+ "which are used for "
+ "the constant sections of the code buffer. Thus it should be "
+ "larger than the "
+ "alignment for SIMD constants.");
+
+static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
+static const uint32_t WasmTrapInstructionLength = 4;
+
+// See comments in wasm::GenerateFunctionPrologue. The difference between these
+// is the size of the largest callable prologue on the platform.
+static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
+
+class Assembler : public vixl::Assembler {
+ public:
+ Assembler() : vixl::Assembler() {}
+
+ typedef vixl::Condition Condition;
+
+ void finish();
+ bool appendRawCode(const uint8_t* code, size_t numBytes);
+ bool reserve(size_t size);
+ bool swapBuffer(wasm::Bytes& bytes);
+
+ // Emit the jump table, returning the BufferOffset to the first entry in the
+ // table.
+ BufferOffset emitExtendedJumpTable();
+ BufferOffset ExtendedJumpTable_;
+ void executableCopy(uint8_t* buffer);
+
+ BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op,
+ const LiteralDoc& doc,
+ ARMBuffer::PoolEntry* pe = nullptr);
+ BufferOffset immPool64(ARMRegister dest, uint64_t value,
+ ARMBuffer::PoolEntry* pe = nullptr);
+ BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value,
+ vixl::LoadLiteralOp op, const LiteralDoc& doc);
+ BufferOffset fImmPool64(ARMFPRegister dest, double value);
+ BufferOffset fImmPool32(ARMFPRegister dest, float value);
+
+ uint32_t currentOffset() const { return nextOffset().getOffset(); }
+
+ void bind(Label* label) { bind(label, nextOffset()); }
+ void bind(Label* label, BufferOffset boff);
+ void bind(CodeLabel* label) { label->target()->bind(currentOffset()); }
+
+ void setUnlimitedBuffer() { armbuffer_.setUnlimited(); }
+ bool oom() const {
+ return AssemblerShared::oom() || armbuffer_.oom() ||
+ jumpRelocations_.oom() || dataRelocations_.oom();
+ }
+
+ void copyJumpRelocationTable(uint8_t* dest) const {
+ if (jumpRelocations_.length()) {
+ memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length());
+ }
+ }
+ void copyDataRelocationTable(uint8_t* dest) const {
+ if (dataRelocations_.length()) {
+ memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length());
+ }
+ }
+
+ size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); }
+ size_t dataRelocationTableBytes() const { return dataRelocations_.length(); }
+ size_t bytesNeeded() const {
+ return SizeOfCodeGenerated() + jumpRelocationTableBytes() +
+ dataRelocationTableBytes();
+ }
+
+ void processCodeLabels(uint8_t* rawCode) {
+ for (const CodeLabel& label : codeLabels_) {
+ Bind(rawCode, label);
+ }
+ }
+
+ static void UpdateLoad64Value(Instruction* inst0, uint64_t value);
+
+ static void Bind(uint8_t* rawCode, const CodeLabel& label) {
+ auto mode = label.linkMode();
+ size_t patchAtOffset = label.patchAt().offset();
+ size_t targetOffset = label.target().offset();
+
+ if (mode == CodeLabel::MoveImmediate) {
+ Instruction* inst = (Instruction*)(rawCode + patchAtOffset);
+ Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset));
+ } else {
+ *reinterpret_cast<const void**>(rawCode + patchAtOffset) =
+ rawCode + targetOffset;
+ }
+ }
+
+ void retarget(Label* cur, Label* next);
+
+ // The buffer is about to be linked. Ensure any constant pools or
+ // excess bookkeeping has been flushed to the instruction stream.
+ void flush() { armbuffer_.flushPool(); }
+
+ void comment(const char* msg) {
+#ifdef JS_DISASM_ARM64
+ spew_.spew("; %s", msg);
+#endif
+ }
+
+ void setPrinter(Sprinter* sp) {
+#ifdef JS_DISASM_ARM64
+ spew_.setPrinter(sp);
+#endif
+ }
+
+ static bool SupportsFloatingPoint() { return true; }
+ static bool SupportsUnalignedAccesses() { return true; }
+ static bool SupportsFastUnalignedFPAccesses() { return true; }
+ static bool SupportsWasmSimd() { return true; }
+
+ static bool HasRoundInstruction(RoundingMode mode) {
+ switch (mode) {
+ case RoundingMode::Up:
+ case RoundingMode::Down:
+ case RoundingMode::NearestTiesToEven:
+ case RoundingMode::TowardsZero:
+ return true;
+ }
+ MOZ_CRASH("unexpected mode");
+ }
+
+ protected:
+ // Add a jump whose target is unknown until finalization.
+ // The jump may not be patched at runtime.
+ void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind);
+
+ public:
+ static uint32_t PatchWrite_NearCallSize() { return 4; }
+
+ static uint32_t NopSize() { return 4; }
+
+ static void PatchWrite_NearCall(CodeLocationLabel start,
+ CodeLocationLabel toCall);
+ static void PatchDataWithValueCheck(CodeLocationLabel label,
+ PatchedImmPtr newValue,
+ PatchedImmPtr expected);
+
+ static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue,
+ ImmPtr expected);
+
+ static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) {
+ // Raw is going to be the return address.
+ uint32_t* raw = (uint32_t*)label.raw();
+ // Overwrite the 4 bytes before the return address, which will end up being
+ // the call instruction.
+ *(raw - 1) = imm.value;
+ }
+ static uint32_t AlignDoubleArg(uint32_t offset) {
+ MOZ_CRASH("AlignDoubleArg()");
+ }
+ static uintptr_t GetPointer(uint8_t* ptr) {
+ Instruction* i = reinterpret_cast<Instruction*>(ptr);
+ uint64_t ret = i->Literal64();
+ return ret;
+ }
+
+ // Toggle a jmp or cmp emitted by toggledJump().
+ static void ToggleToJmp(CodeLocationLabel inst_);
+ static void ToggleToCmp(CodeLocationLabel inst_);
+ static void ToggleCall(CodeLocationLabel inst_, bool enabled);
+
+ static void TraceJumpRelocations(JSTracer* trc, JitCode* code,
+ CompactBufferReader& reader);
+ static void TraceDataRelocations(JSTracer* trc, JitCode* code,
+ CompactBufferReader& reader);
+
+ void assertNoGCThings() const {
+#ifdef DEBUG
+ MOZ_ASSERT(dataRelocations_.length() == 0);
+ for (auto& j : pendingJumps_) {
+ MOZ_ASSERT(j.kind == RelocationKind::HARDCODED);
+ }
+#endif
+ }
+
+ public:
+ // A Jump table entry is 2 instructions, with 8 bytes of raw data
+ static const size_t SizeOfJumpTableEntry = 16;
+
+ struct JumpTableEntry {
+ uint32_t ldr;
+ uint32_t br;
+ void* data;
+
+ Instruction* getLdr() { return reinterpret_cast<Instruction*>(&ldr); }
+ };
+
+ // Offset of the patchable target for the given entry.
+ static const size_t OffsetOfJumpTableEntryPointer = 8;
+
+ public:
+ void writeCodePointer(CodeLabel* label) {
+ armbuffer_.assertNoPoolAndNoNops();
+ uintptr_t x = uintptr_t(-1);
+ BufferOffset off = EmitData(&x, sizeof(uintptr_t));
+ label->patchAt()->bind(off.getOffset());
+ }
+
+ void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
+ const Disassembler::HeapAccess& heapAccess) {
+ MOZ_CRASH("verifyHeapAccessDisassembly");
+ }
+
+ protected:
+ // Structure for fixing up pc-relative loads/jumps when the machine
+ // code gets moved (executable copy, gc, etc.).
+ struct RelativePatch {
+ BufferOffset offset;
+ void* target;
+ RelocationKind kind;
+
+ RelativePatch(BufferOffset offset, void* target, RelocationKind kind)
+ : offset(offset), target(target), kind(kind) {}
+ };
+
+ // List of jumps for which the target is either unknown until finalization,
+ // or cannot be known due to GC. Each entry here requires a unique entry
+ // in the extended jump table, and is patched at finalization.
+ js::Vector<RelativePatch, 8, SystemAllocPolicy> pendingJumps_;
+
+ // Final output formatters.
+ CompactBufferWriter jumpRelocations_;
+ CompactBufferWriter dataRelocations_;
+};
+
+static const uint32_t NumIntArgRegs = 8;
+static const uint32_t NumFloatArgRegs = 8;
+
+class ABIArgGenerator {
+ public:
+ ABIArgGenerator()
+ : intRegIndex_(0), floatRegIndex_(0), stackOffset_(0), current_() {}
+
+ ABIArg next(MIRType argType);
+ ABIArg& current() { return current_; }
+ uint32_t stackBytesConsumedSoFar() const { return stackOffset_; }
+ void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; }
+
+ protected:
+ unsigned intRegIndex_;
+ unsigned floatRegIndex_;
+ uint32_t stackOffset_;
+ ABIArg current_;
+};
+
+// These registers may be volatile or nonvolatile.
+static constexpr Register ABINonArgReg0 = r8;
+static constexpr Register ABINonArgReg1 = r9;
+static constexpr Register ABINonArgReg2 = r10;
+static constexpr Register ABINonArgReg3 = r11;
+
+// This register may be volatile or nonvolatile. Avoid d31 which is the
+// ScratchDoubleReg_.
+static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16,
+ FloatRegisters::Single};
+
+// These registers may be volatile or nonvolatile.
+// Note: these three registers are all guaranteed to be different
+static constexpr Register ABINonArgReturnReg0 = r8;
+static constexpr Register ABINonArgReturnReg1 = r9;
+static constexpr Register ABINonVolatileReg{Registers::x19};
+
+// This register is guaranteed to be clobberable during the prologue and
+// epilogue of an ABI call which must preserve both ABI argument, return
+// and non-volatile registers.
+static constexpr Register ABINonArgReturnVolatileReg = lr;
+
+// Instance pointer argument register for WebAssembly functions. This must not
+// alias any other register used for passing function arguments or return
+// values. Preserved by WebAssembly functions. Must be nonvolatile.
+static constexpr Register InstanceReg{Registers::x23};
+
+// Registers used for wasm table calls. These registers must be disjoint
+// from the ABI argument registers, InstanceReg and each other.
+static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmTableCallSigReg = ABINonArgReg2;
+static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
+
+// Registers used for ref calls.
+static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmCallRefReg = ABINonArgReg3;
+
+// Registers used for wasm tail calls operations.
+static constexpr Register WasmTailCallInstanceScratchReg = ABINonArgReg1;
+static constexpr Register WasmTailCallRAScratchReg = lr;
+static constexpr Register WasmTailCallFPScratchReg = ABINonArgReg3;
+
+// Register used as a scratch along the return path in the fast js -> wasm stub
+// code. This must not overlap ReturnReg, JSReturnOperand, or InstanceReg.
+// It must be a volatile register.
+static constexpr Register WasmJitEntryReturnScratch = r9;
+
+static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+ Register* out) {
+ if (usedIntArgs >= NumIntArgRegs) {
+ return false;
+ }
+ *out = Register::FromCode(usedIntArgs);
+ return true;
+}
+
+static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+ FloatRegister* out) {
+ if (usedFloatArgs >= NumFloatArgRegs) {
+ return false;
+ }
+ *out = FloatRegister::FromCode(usedFloatArgs);
+ return true;
+}
+
+// Get a register in which we plan to put a quantity that will be used as an
+// integer argument. This differs from GetIntArgReg in that if we have no more
+// actual argument registers to use we will fall back on using whatever
+// CallTempReg* don't overlap the argument registers, and only fail once those
+// run out too.
+static inline bool GetTempRegForIntArg(uint32_t usedIntArgs,
+ uint32_t usedFloatArgs, Register* out) {
+ if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) {
+ return true;
+ }
+ // Unfortunately, we have to assume things about the point at which
+ // GetIntArgReg returns false, because we need to know how many registers it
+ // can allocate.
+ usedIntArgs -= NumIntArgRegs;
+ if (usedIntArgs >= NumCallTempNonArgRegs) {
+ return false;
+ }
+ *out = CallTempNonArgRegs[usedIntArgs];
+ return true;
+}
+
+inline Imm32 Imm64::firstHalf() const { return low(); }
+
+inline Imm32 Imm64::secondHalf() const { return hi(); }
+
+// Forbids nop filling for testing purposes. Nestable, but nested calls have
+// no effect on the no-nops status; it is only the top level one that counts.
+class AutoForbidNops {
+ protected:
+ Assembler* asm_;
+
+ public:
+ explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); }
+ ~AutoForbidNops() { asm_->leaveNoNops(); }
+};
+
+// Forbids pool generation during a specified interval. Nestable, but nested
+// calls must imply a no-pool area of the assembler buffer that is completely
+// contained within the area implied by the outermost level call.
+class AutoForbidPoolsAndNops : public AutoForbidNops {
+ public:
+ AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst)
+ : AutoForbidNops(asm_) {
+ asm_->enterNoPool(maxInst);
+ }
+ ~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); }
+};
+
+} // namespace jit
+} // namespace js
+
+#endif // A64_ASSEMBLER_A64_H_