summaryrefslogtreecommitdiffstats
path: root/js/src/jit/arm64
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--js/src/jit/arm64/Architecture-arm64.cpp129
-rw-r--r--js/src/jit/arm64/Architecture-arm64.h773
-rw-r--r--js/src/jit/arm64/Assembler-arm64.cpp609
-rw-r--r--js/src/jit/arm64/Assembler-arm64.h793
-rw-r--r--js/src/jit/arm64/CodeGenerator-arm64.cpp4245
-rw-r--r--js/src/jit/arm64/CodeGenerator-arm64.h135
-rw-r--r--js/src/jit/arm64/LIR-arm64.h373
-rw-r--r--js/src/jit/arm64/Lowering-arm64.cpp1438
-rw-r--r--js/src/jit/arm64/Lowering-arm64.h135
-rw-r--r--js/src/jit/arm64/MacroAssembler-arm64-inl.h4079
-rw-r--r--js/src/jit/arm64/MacroAssembler-arm64.cpp3416
-rw-r--r--js/src/jit/arm64/MacroAssembler-arm64.h2206
-rw-r--r--js/src/jit/arm64/MoveEmitter-arm64.cpp329
-rw-r--r--js/src/jit/arm64/MoveEmitter-arm64.h99
-rw-r--r--js/src/jit/arm64/SharedICHelpers-arm64-inl.h79
-rw-r--r--js/src/jit/arm64/SharedICHelpers-arm64.h82
-rw-r--r--js/src/jit/arm64/SharedICRegisters-arm64.h51
-rw-r--r--js/src/jit/arm64/Trampoline-arm64.cpp840
-rw-r--r--js/src/jit/arm64/vixl/.clang-format4
-rw-r--r--js/src/jit/arm64/vixl/AUTHORS8
-rw-r--r--js/src/jit/arm64/vixl/Assembler-vixl.cpp5318
-rw-r--r--js/src/jit/arm64/vixl/Assembler-vixl.h4974
-rw-r--r--js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h179
-rw-r--r--js/src/jit/arm64/vixl/Constants-vixl.h2694
-rw-r--r--js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp231
-rw-r--r--js/src/jit/arm64/vixl/Cpu-Features-vixl.h397
-rw-r--r--js/src/jit/arm64/vixl/Cpu-vixl.cpp256
-rw-r--r--js/src/jit/arm64/vixl/Cpu-vixl.h241
-rw-r--r--js/src/jit/arm64/vixl/Debugger-vixl.cpp1535
-rw-r--r--js/src/jit/arm64/vixl/Debugger-vixl.h117
-rw-r--r--js/src/jit/arm64/vixl/Decoder-vixl.cpp899
-rw-r--r--js/src/jit/arm64/vixl/Decoder-vixl.h276
-rw-r--r--js/src/jit/arm64/vixl/Disasm-vixl.cpp3741
-rw-r--r--js/src/jit/arm64/vixl/Disasm-vixl.h181
-rw-r--r--js/src/jit/arm64/vixl/Globals-vixl.h272
-rw-r--r--js/src/jit/arm64/vixl/Instructions-vixl.cpp627
-rw-r--r--js/src/jit/arm64/vixl/Instructions-vixl.h817
-rw-r--r--js/src/jit/arm64/vixl/Instrument-vixl.cpp850
-rw-r--r--js/src/jit/arm64/vixl/Instrument-vixl.h109
-rw-r--r--js/src/jit/arm64/vixl/Logic-vixl.cpp4738
-rw-r--r--js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp2027
-rw-r--r--js/src/jit/arm64/vixl/MacroAssembler-vixl.h2622
-rw-r--r--js/src/jit/arm64/vixl/MozAssembler-vixl.cpp610
-rw-r--r--js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h356
-rw-r--r--js/src/jit/arm64/vixl/MozCachingDecoder.h179
-rw-r--r--js/src/jit/arm64/vixl/MozCpu-vixl.cpp226
-rw-r--r--js/src/jit/arm64/vixl/MozInstructions-vixl.cpp211
-rw-r--r--js/src/jit/arm64/vixl/MozSimulator-vixl.cpp1258
-rw-r--r--js/src/jit/arm64/vixl/Platform-vixl.h39
-rw-r--r--js/src/jit/arm64/vixl/README.md7
-rw-r--r--js/src/jit/arm64/vixl/Simulator-Constants-vixl.h140
-rw-r--r--js/src/jit/arm64/vixl/Simulator-vixl.cpp4371
-rw-r--r--js/src/jit/arm64/vixl/Simulator-vixl.h2592
-rw-r--r--js/src/jit/arm64/vixl/Utils-vixl.cpp555
-rw-r--r--js/src/jit/arm64/vixl/Utils-vixl.h1283
55 files changed, 64751 insertions, 0 deletions
diff --git a/js/src/jit/arm64/Architecture-arm64.cpp b/js/src/jit/arm64/Architecture-arm64.cpp
new file mode 100644
index 0000000000..eb3dd67b1a
--- /dev/null
+++ b/js/src/jit/arm64/Architecture-arm64.cpp
@@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/Architecture-arm64.h"
+
+#include <cstring>
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+#include "jit/FlushICache.h" // js::jit::FlushICache
+#include "jit/RegisterSets.h"
+
+namespace js {
+namespace jit {
+
+Registers::Code Registers::FromName(const char* name) {
+ // Check for some register aliases first.
+ if (strcmp(name, "ip0") == 0) {
+ return ip0;
+ }
+ if (strcmp(name, "ip1") == 0) {
+ return ip1;
+ }
+ if (strcmp(name, "fp") == 0) {
+ return fp;
+ }
+
+ for (uint32_t i = 0; i < Total; i++) {
+ if (strcmp(GetName(i), name) == 0) {
+ return Code(i);
+ }
+ }
+
+ return Invalid;
+}
+
+FloatRegisters::Code FloatRegisters::FromName(const char* name) {
+ for (size_t i = 0; i < Total; i++) {
+ if (strcmp(GetName(i), name) == 0) {
+ return Code(i);
+ }
+ }
+
+ return Invalid;
+}
+
+// This must sync with GetPushSizeInBytes just below and also with
+// MacroAssembler::PushRegsInMask.
+FloatRegisterSet FloatRegister::ReduceSetForPush(const FloatRegisterSet& s) {
+ SetType all = s.bits();
+ SetType set128b =
+ (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128;
+ SetType doubleSet =
+ (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble;
+ SetType singleSet =
+ (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle;
+
+ // See GetPushSizeInBytes.
+ SetType set64b = (singleSet | doubleSet) & ~set128b;
+
+ SetType reduced = (set128b << FloatRegisters::ShiftSimd128) |
+ (set64b << FloatRegisters::ShiftDouble);
+ return FloatRegisterSet(reduced);
+}
+
+// Compute the size of the dump area for |s.ReduceSetForPush()|, as defined by
+// MacroAssembler::PushRegsInMask for this target.
+uint32_t FloatRegister::GetPushSizeInBytes(const FloatRegisterSet& s) {
+ SetType all = s.bits();
+ SetType set128b =
+ (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128;
+ SetType doubleSet =
+ (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble;
+ SetType singleSet =
+ (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle;
+
+ // PushRegsInMask pushes singles as if they were doubles. Also we need to
+ // remove singles or doubles which are also pushed as part of a vector
+ // register.
+ SetType set64b = (singleSet | doubleSet) & ~set128b;
+
+ // The "+ 1) & ~1" is to take into account the alignment hole below the
+ // double-reg dump area. See MacroAssembler::PushRegsInMaskSizeInBytes.
+ return ((set64b.size() + 1) & ~1) * sizeof(double) +
+ set128b.size() * SizeOfSimd128;
+}
+
+uint32_t FloatRegister::getRegisterDumpOffsetInBytes() {
+ // See block comment in MacroAssembler.h for further required invariants.
+ static_assert(sizeof(jit::FloatRegisters::RegisterContent) == 16);
+ return encoding() * sizeof(jit::FloatRegisters::RegisterContent);
+}
+
+// For N in 0..31, if any of sN, dN or qN is a member of `s`, the returned set
+// will contain all of sN, dN and qN.
+FloatRegisterSet FloatRegister::BroadcastToAllSizes(const FloatRegisterSet& s) {
+ SetType all = s.bits();
+ SetType set128b =
+ (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128;
+ SetType doubleSet =
+ (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble;
+ SetType singleSet =
+ (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle;
+
+ SetType merged = set128b | doubleSet | singleSet;
+ SetType broadcasted = (merged << FloatRegisters::ShiftSimd128) |
+ (merged << FloatRegisters::ShiftDouble) |
+ (merged << FloatRegisters::ShiftSingle);
+
+ return FloatRegisterSet(broadcasted);
+}
+
+uint32_t GetARM64Flags() { return 0; }
+
+// CPU flags handling on ARM64 is currently different from other platforms:
+// the flags are computed and stored per-assembler and are thus "always
+// computed".
+bool CPUFlagsHaveBeenComputed() { return true; }
+
+void FlushICache(void* code, size_t size) {
+ vixl::CPU::EnsureIAndDCacheCoherency(code, size);
+}
+
+void FlushExecutionContext() { vixl::CPU::FlushExecutionContext(); }
+
+} // namespace jit
+} // namespace js
diff --git a/js/src/jit/arm64/Architecture-arm64.h b/js/src/jit/arm64/Architecture-arm64.h
new file mode 100644
index 0000000000..96bbc63848
--- /dev/null
+++ b/js/src/jit/arm64/Architecture-arm64.h
@@ -0,0 +1,773 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_Architecture_arm64_h
+#define jit_arm64_Architecture_arm64_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "jit/shared/Architecture-shared.h"
+
+#include "js/Utility.h"
+
+#define JS_HAS_HIDDEN_SP
+static const uint32_t HiddenSPEncoding = vixl::kSPRegInternalCode;
+
+namespace js {
+namespace jit {
+
+// AArch64 has 32 64-bit integer registers, x0 though x31.
+//
+// x31 (or, more accurately, the integer register with encoding 31, since
+// there is no x31 per se) is special and functions as both the stack pointer
+// and a zero register.
+//
+// The bottom 32 bits of each of the X registers is accessible as w0 through
+// w31. The program counter is not accessible as a register.
+//
+// SIMD and scalar floating-point registers share a register bank.
+// 32 bit float registers are s0 through s31.
+// 64 bit double registers are d0 through d31.
+// 128 bit SIMD registers are v0 through v31.
+// e.g., s0 is the bottom 32 bits of d0, which is the bottom 64 bits of v0.
+
+// AArch64 Calling Convention:
+// x0 - x7: arguments and return value
+// x8: indirect result (struct) location
+// x9 - x15: temporary registers
+// x16 - x17: intra-call-use registers (PLT, linker)
+// x18: platform specific use (TLS)
+// x19 - x28: callee-saved registers
+// x29: frame pointer
+// x30: link register
+
+// AArch64 Calling Convention for Floats:
+// d0 - d7: arguments and return value
+// d8 - d15: callee-saved registers
+// Bits 64:128 are not saved for v8-v15.
+// d16 - d31: temporary registers
+
+// AArch64 does not have soft float.
+
+class Registers {
+ public:
+ enum RegisterID {
+ w0 = 0,
+ x0 = 0,
+ w1 = 1,
+ x1 = 1,
+ w2 = 2,
+ x2 = 2,
+ w3 = 3,
+ x3 = 3,
+ w4 = 4,
+ x4 = 4,
+ w5 = 5,
+ x5 = 5,
+ w6 = 6,
+ x6 = 6,
+ w7 = 7,
+ x7 = 7,
+ w8 = 8,
+ x8 = 8,
+ w9 = 9,
+ x9 = 9,
+ w10 = 10,
+ x10 = 10,
+ w11 = 11,
+ x11 = 11,
+ w12 = 12,
+ x12 = 12,
+ w13 = 13,
+ x13 = 13,
+ w14 = 14,
+ x14 = 14,
+ w15 = 15,
+ x15 = 15,
+ w16 = 16,
+ x16 = 16,
+ ip0 = 16, // MacroAssembler scratch register 1.
+ w17 = 17,
+ x17 = 17,
+ ip1 = 17, // MacroAssembler scratch register 2.
+ w18 = 18,
+ x18 = 18,
+ tls = 18, // Platform-specific use (TLS).
+ w19 = 19,
+ x19 = 19,
+ w20 = 20,
+ x20 = 20,
+ w21 = 21,
+ x21 = 21,
+ w22 = 22,
+ x22 = 22,
+ w23 = 23,
+ x23 = 23,
+ w24 = 24,
+ x24 = 24,
+ w25 = 25,
+ x25 = 25,
+ w26 = 26,
+ x26 = 26,
+ w27 = 27,
+ x27 = 27,
+ w28 = 28,
+ x28 = 28,
+ w29 = 29,
+ x29 = 29,
+ fp = 29,
+ w30 = 30,
+ x30 = 30,
+ lr = 30,
+ w31 = 31,
+ x31 = 31,
+ wzr = 31,
+ xzr = 31,
+ sp = 31, // Special: both stack pointer and a zero register.
+ };
+ typedef uint8_t Code;
+ typedef uint32_t Encoding;
+ typedef uint32_t SetType;
+
+ static const Code Invalid = 0xFF;
+
+ union RegisterContent {
+ uintptr_t r;
+ };
+
+ static uint32_t SetSize(SetType x) {
+ static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+ return mozilla::CountPopulation32(x);
+ }
+ static uint32_t FirstBit(SetType x) {
+ return mozilla::CountTrailingZeroes32(x);
+ }
+ static uint32_t LastBit(SetType x) {
+ return 31 - mozilla::CountLeadingZeroes32(x);
+ }
+
+ static const char* GetName(uint32_t code) {
+ static const char* const Names[] = {
+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"};
+ static_assert(Total == std::size(Names), "Table is the correct size");
+ if (code >= Total) {
+ return "invalid";
+ }
+ return Names[code];
+ }
+
+ static Code FromName(const char* name);
+
+ static const uint32_t Total = 32;
+ static const uint32_t TotalPhys = 32;
+ static const uint32_t Allocatable =
+ 27; // No named special-function registers.
+
+ static const SetType AllMask = 0xFFFFFFFF;
+ static const SetType NoneMask = 0x0;
+
+ static const SetType ArgRegMask =
+ (1 << Registers::x0) | (1 << Registers::x1) | (1 << Registers::x2) |
+ (1 << Registers::x3) | (1 << Registers::x4) | (1 << Registers::x5) |
+ (1 << Registers::x6) | (1 << Registers::x7) | (1 << Registers::x8);
+
+ static const SetType VolatileMask =
+ (1 << Registers::x0) | (1 << Registers::x1) | (1 << Registers::x2) |
+ (1 << Registers::x3) | (1 << Registers::x4) | (1 << Registers::x5) |
+ (1 << Registers::x6) | (1 << Registers::x7) | (1 << Registers::x8) |
+ (1 << Registers::x9) | (1 << Registers::x10) | (1 << Registers::x11) |
+ (1 << Registers::x12) | (1 << Registers::x13) | (1 << Registers::x14) |
+ (1 << Registers::x15) | (1 << Registers::x16) | (1 << Registers::x17) |
+ (1 << Registers::x18);
+
+ static const SetType NonVolatileMask =
+ (1 << Registers::x19) | (1 << Registers::x20) | (1 << Registers::x21) |
+ (1 << Registers::x22) | (1 << Registers::x23) | (1 << Registers::x24) |
+ (1 << Registers::x25) | (1 << Registers::x26) | (1 << Registers::x27) |
+ (1 << Registers::x28) | (1 << Registers::x29) | (1 << Registers::x30);
+
+ static const SetType NonAllocatableMask =
+ (1 << Registers::x28) | // PseudoStackPointer.
+ (1 << Registers::ip0) | // First scratch register.
+ (1 << Registers::ip1) | // Second scratch register.
+ (1 << Registers::tls) | (1 << Registers::lr) | (1 << Registers::sp) |
+ (1 << Registers::fp);
+
+ static const SetType WrapperMask = VolatileMask;
+
+ // Registers returned from a JS -> JS call.
+ static const SetType JSCallMask = (1 << Registers::x2);
+
+ // Registers returned from a JS -> C call.
+ static const SetType CallMask = (1 << Registers::x0);
+
+ static const SetType AllocatableMask = AllMask & ~NonAllocatableMask;
+};
+
+// Smallest integer type that can hold a register bitmask.
+typedef uint32_t PackedRegisterMask;
+
+template <typename T>
+class TypedRegisterSet;
+
+// 128-bit bitset for FloatRegisters::SetType.
+
+class Bitset128 {
+ // The order (hi, lo) looks best in the debugger.
+ uint64_t hi, lo;
+
+ public:
+ MOZ_IMPLICIT constexpr Bitset128(uint64_t initial) : hi(0), lo(initial) {}
+ MOZ_IMPLICIT constexpr Bitset128(const Bitset128& that)
+ : hi(that.hi), lo(that.lo) {}
+
+ constexpr Bitset128(uint64_t hi, uint64_t lo) : hi(hi), lo(lo) {}
+
+ constexpr uint64_t high() const { return hi; }
+
+ constexpr uint64_t low() const { return lo; }
+
+ constexpr Bitset128 operator|(Bitset128 that) const {
+ return Bitset128(hi | that.hi, lo | that.lo);
+ }
+
+ constexpr Bitset128 operator&(Bitset128 that) const {
+ return Bitset128(hi & that.hi, lo & that.lo);
+ }
+
+ constexpr Bitset128 operator^(Bitset128 that) const {
+ return Bitset128(hi ^ that.hi, lo ^ that.lo);
+ }
+
+ constexpr Bitset128 operator~() const { return Bitset128(~hi, ~lo); }
+
+ // We must avoid shifting by the word width, which is complex. Inlining plus
+ // shift-by-constant will remove a lot of code in the normal case.
+
+ constexpr Bitset128 operator<<(size_t shift) const {
+ if (shift == 0) {
+ return *this;
+ }
+ if (shift < 64) {
+ return Bitset128((hi << shift) | (lo >> (64 - shift)), lo << shift);
+ }
+ if (shift == 64) {
+ return Bitset128(lo, 0);
+ }
+ return Bitset128(lo << (shift - 64), 0);
+ }
+
+ constexpr Bitset128 operator>>(size_t shift) const {
+ if (shift == 0) {
+ return *this;
+ }
+ if (shift < 64) {
+ return Bitset128(hi >> shift, (lo >> shift) | (hi << (64 - shift)));
+ }
+ if (shift == 64) {
+ return Bitset128(0, hi);
+ }
+ return Bitset128(0, hi >> (shift - 64));
+ }
+
+ constexpr bool operator==(Bitset128 that) const {
+ return lo == that.lo && hi == that.hi;
+ }
+
+ constexpr bool operator!=(Bitset128 that) const {
+ return lo != that.lo || hi != that.hi;
+ }
+
+ constexpr bool operator!() const { return (hi | lo) == 0; }
+
+ Bitset128& operator|=(const Bitset128& that) {
+ hi |= that.hi;
+ lo |= that.lo;
+ return *this;
+ }
+
+ Bitset128& operator&=(const Bitset128& that) {
+ hi &= that.hi;
+ lo &= that.lo;
+ return *this;
+ }
+
+ uint32_t size() const {
+ return mozilla::CountPopulation64(hi) + mozilla::CountPopulation64(lo);
+ }
+
+ uint32_t countTrailingZeroes() const {
+ if (lo) {
+ return mozilla::CountTrailingZeroes64(lo);
+ }
+ return mozilla::CountTrailingZeroes64(hi) + 64;
+ }
+
+ uint32_t countLeadingZeroes() const {
+ if (hi) {
+ return mozilla::CountLeadingZeroes64(hi);
+ }
+ return mozilla::CountLeadingZeroes64(lo) + 64;
+ }
+};
+
+class FloatRegisters {
+ public:
+ enum FPRegisterID {
+ s0 = 0,
+ d0 = 0,
+ v0 = 0,
+ s1 = 1,
+ d1 = 1,
+ v1 = 1,
+ s2 = 2,
+ d2 = 2,
+ v2 = 2,
+ s3 = 3,
+ d3 = 3,
+ v3 = 3,
+ s4 = 4,
+ d4 = 4,
+ v4 = 4,
+ s5 = 5,
+ d5 = 5,
+ v5 = 5,
+ s6 = 6,
+ d6 = 6,
+ v6 = 6,
+ s7 = 7,
+ d7 = 7,
+ v7 = 7,
+ s8 = 8,
+ d8 = 8,
+ v8 = 8,
+ s9 = 9,
+ d9 = 9,
+ v9 = 9,
+ s10 = 10,
+ d10 = 10,
+ v10 = 10,
+ s11 = 11,
+ d11 = 11,
+ v11 = 11,
+ s12 = 12,
+ d12 = 12,
+ v12 = 12,
+ s13 = 13,
+ d13 = 13,
+ v13 = 13,
+ s14 = 14,
+ d14 = 14,
+ v14 = 14,
+ s15 = 15,
+ d15 = 15,
+ v15 = 15,
+ s16 = 16,
+ d16 = 16,
+ v16 = 16,
+ s17 = 17,
+ d17 = 17,
+ v17 = 17,
+ s18 = 18,
+ d18 = 18,
+ v18 = 18,
+ s19 = 19,
+ d19 = 19,
+ v19 = 19,
+ s20 = 20,
+ d20 = 20,
+ v20 = 20,
+ s21 = 21,
+ d21 = 21,
+ v21 = 21,
+ s22 = 22,
+ d22 = 22,
+ v22 = 22,
+ s23 = 23,
+ d23 = 23,
+ v23 = 23,
+ s24 = 24,
+ d24 = 24,
+ v24 = 24,
+ s25 = 25,
+ d25 = 25,
+ v25 = 25,
+ s26 = 26,
+ d26 = 26,
+ v26 = 26,
+ s27 = 27,
+ d27 = 27,
+ v27 = 27,
+ s28 = 28,
+ d28 = 28,
+ v28 = 28,
+ s29 = 29,
+ d29 = 29,
+ v29 = 29,
+ s30 = 30,
+ d30 = 30,
+ v30 = 30,
+ s31 = 31,
+ d31 = 31,
+ v31 = 31, // Scratch register.
+ };
+
+ // Eight bits: (invalid << 7) | (kind << 5) | encoding
+ typedef uint8_t Code;
+ typedef FPRegisterID Encoding;
+ typedef Bitset128 SetType;
+
+ enum Kind : uint8_t { Single, Double, Simd128, NumTypes };
+
+ static constexpr Code Invalid = 0x80;
+
+ static const char* GetName(uint32_t code) {
+ // clang-format off
+ static const char* const Names[] = {
+ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9",
+ "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19",
+ "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29",
+ "s30", "s31",
+
+ "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9",
+ "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19",
+ "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29",
+ "d30", "d31",
+
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+ "v30", "v31",
+ };
+ // clang-format on
+ static_assert(Total == std::size(Names), "Table is the correct size");
+ if (code >= Total) {
+ return "invalid";
+ }
+ return Names[code];
+ }
+
+ static Code FromName(const char* name);
+
+ static const uint32_t TotalPhys = 32;
+ static const uint32_t Total = TotalPhys * NumTypes;
+ static const uint32_t Allocatable = 31; // Without d31, the scratch register.
+
+ static_assert(sizeof(SetType) * 8 >= Total,
+ "SetType should be large enough to enumerate all registers.");
+
+ static constexpr unsigned ShiftSingle = uint32_t(Single) * TotalPhys;
+ static constexpr unsigned ShiftDouble = uint32_t(Double) * TotalPhys;
+ static constexpr unsigned ShiftSimd128 = uint32_t(Simd128) * TotalPhys;
+
+ static constexpr SetType NoneMask = SetType(0);
+ static constexpr SetType AllPhysMask = ~(~SetType(0) << TotalPhys);
+ static constexpr SetType AllSingleMask = AllPhysMask << ShiftSingle;
+ static constexpr SetType AllDoubleMask = AllPhysMask << ShiftDouble;
+ static constexpr SetType AllSimd128Mask = AllPhysMask << ShiftSimd128;
+ static constexpr SetType AllMask =
+ AllDoubleMask | AllSingleMask | AllSimd128Mask;
+ static constexpr SetType AliasMask = (SetType(1) << ShiftSingle) |
+ (SetType(1) << ShiftDouble) |
+ (SetType(1) << ShiftSimd128);
+
+ static_assert(ShiftSingle == 0,
+ "Or the NonVolatileMask must be computed differently");
+
+ // s31 is the ScratchFloatReg.
+ static constexpr SetType NonVolatileSingleMask =
+ SetType((1 << FloatRegisters::s8) | (1 << FloatRegisters::s9) |
+ (1 << FloatRegisters::s10) | (1 << FloatRegisters::s11) |
+ (1 << FloatRegisters::s12) | (1 << FloatRegisters::s13) |
+ (1 << FloatRegisters::s14) | (1 << FloatRegisters::s15) |
+ (1 << FloatRegisters::s16) | (1 << FloatRegisters::s17) |
+ (1 << FloatRegisters::s18) | (1 << FloatRegisters::s19) |
+ (1 << FloatRegisters::s20) | (1 << FloatRegisters::s21) |
+ (1 << FloatRegisters::s22) | (1 << FloatRegisters::s23) |
+ (1 << FloatRegisters::s24) | (1 << FloatRegisters::s25) |
+ (1 << FloatRegisters::s26) | (1 << FloatRegisters::s27) |
+ (1 << FloatRegisters::s28) | (1 << FloatRegisters::s29) |
+ (1 << FloatRegisters::s30));
+
+ static constexpr SetType NonVolatileMask =
+ (NonVolatileSingleMask << ShiftSingle) |
+ (NonVolatileSingleMask << ShiftDouble) |
+ (NonVolatileSingleMask << ShiftSimd128);
+
+ static constexpr SetType VolatileMask = AllMask & ~NonVolatileMask;
+
+ static constexpr SetType WrapperMask = VolatileMask;
+
+ static_assert(ShiftSingle == 0,
+ "Or the NonAllocatableMask must be computed differently");
+
+ // d31 is the ScratchFloatReg.
+ static constexpr SetType NonAllocatableSingleMask =
+ (SetType(1) << FloatRegisters::s31);
+
+ static constexpr SetType NonAllocatableMask =
+ NonAllocatableSingleMask | (NonAllocatableSingleMask << ShiftDouble) |
+ (NonAllocatableSingleMask << ShiftSimd128);
+
+ static constexpr SetType AllocatableMask = AllMask & ~NonAllocatableMask;
+
+ // Content spilled during bailouts.
+ union RegisterContent {
+ float s;
+ double d;
+ uint8_t v128[16];
+ };
+
+ static constexpr Encoding encoding(Code c) {
+ // assert() not available in constexpr function.
+ // assert(c < Total);
+ return Encoding(c & 31);
+ }
+
+ static constexpr Kind kind(Code c) {
+ // assert() not available in constexpr function.
+ // assert(c < Total && ((c >> 5) & 3) < NumTypes);
+ return Kind((c >> 5) & 3);
+ }
+
+ static constexpr Code fromParts(uint32_t encoding, uint32_t kind,
+ uint32_t invalid) {
+ return Code((invalid << 7) | (kind << 5) | encoding);
+ }
+};
+
+static const uint32_t SpillSlotSize =
+ std::max(sizeof(Registers::RegisterContent),
+ sizeof(FloatRegisters::RegisterContent));
+
+static const uint32_t ShadowStackSpace = 0;
+
+// When our only strategy for far jumps is to encode the offset directly, and
+// not insert any jump islands during assembly for even further jumps, then the
+// architecture restricts us to -2^27 .. 2^27-4, to fit into a signed 28-bit
+// value. We further reduce this range to allow the far-jump inserting code to
+// have some breathing room.
+static const uint32_t JumpImmediateRange = ((1 << 27) - (20 * 1024 * 1024));
+
+static const uint32_t ABIStackAlignment = 16;
+static const uint32_t CodeAlignment = 16;
+static const bool StackKeptAligned = false;
+
+// Although sp is only usable if 16-byte alignment is kept,
+// the Pseudo-StackPointer enables use of 8-byte alignment.
+static const uint32_t StackAlignment = 8;
+static const uint32_t NativeFrameSize = 8;
+
+struct FloatRegister {
+ typedef FloatRegisters Codes;
+ typedef Codes::Code Code;
+ typedef Codes::Encoding Encoding;
+ typedef Codes::SetType SetType;
+
+ static uint32_t SetSize(SetType x) {
+ static_assert(sizeof(SetType) == 16, "SetType must be 128 bits");
+ x |= x >> FloatRegisters::TotalPhys;
+ x |= x >> FloatRegisters::TotalPhys;
+ x &= FloatRegisters::AllPhysMask;
+ MOZ_ASSERT(x.high() == 0);
+ MOZ_ASSERT((x.low() >> 32) == 0);
+ return mozilla::CountPopulation32(x.low());
+ }
+
+ static uint32_t FirstBit(SetType x) {
+ static_assert(sizeof(SetType) == 16, "SetType");
+ return x.countTrailingZeroes();
+ }
+ static uint32_t LastBit(SetType x) {
+ static_assert(sizeof(SetType) == 16, "SetType");
+ return 127 - x.countLeadingZeroes();
+ }
+
+ static constexpr size_t SizeOfSimd128 = 16;
+
+ private:
+ // These fields only hold valid values: an invalid register is always
+ // represented as a valid encoding and kind with the invalid_ bit set.
+ uint8_t encoding_; // 32 encodings
+ uint8_t kind_; // Double, Single, Simd128
+ bool invalid_;
+
+ typedef Codes::Kind Kind;
+
+ public:
+ constexpr FloatRegister(Encoding encoding, Kind kind)
+ : encoding_(encoding), kind_(kind), invalid_(false) {
+ // assert(uint32_t(encoding) < Codes::TotalPhys);
+ }
+
+ constexpr FloatRegister()
+ : encoding_(0), kind_(FloatRegisters::Double), invalid_(true) {}
+
+ static FloatRegister FromCode(uint32_t i) {
+ MOZ_ASSERT(i < Codes::Total);
+ return FloatRegister(FloatRegisters::encoding(i), FloatRegisters::kind(i));
+ }
+
+ bool isSingle() const {
+ MOZ_ASSERT(!invalid_);
+ return kind_ == FloatRegisters::Single;
+ }
+ bool isDouble() const {
+ MOZ_ASSERT(!invalid_);
+ return kind_ == FloatRegisters::Double;
+ }
+ bool isSimd128() const {
+ MOZ_ASSERT(!invalid_);
+ return kind_ == FloatRegisters::Simd128;
+ }
+ bool isInvalid() const { return invalid_; }
+
+ FloatRegister asSingle() const {
+ MOZ_ASSERT(!invalid_);
+ return FloatRegister(Encoding(encoding_), FloatRegisters::Single);
+ }
+ FloatRegister asDouble() const {
+ MOZ_ASSERT(!invalid_);
+ return FloatRegister(Encoding(encoding_), FloatRegisters::Double);
+ }
+ FloatRegister asSimd128() const {
+ MOZ_ASSERT(!invalid_);
+ return FloatRegister(Encoding(encoding_), FloatRegisters::Simd128);
+ }
+
+ constexpr uint32_t size() const {
+ MOZ_ASSERT(!invalid_);
+ if (kind_ == FloatRegisters::Double) {
+ return sizeof(double);
+ }
+ if (kind_ == FloatRegisters::Single) {
+ return sizeof(float);
+ }
+ MOZ_ASSERT(kind_ == FloatRegisters::Simd128);
+ return SizeOfSimd128;
+ }
+
+ constexpr Code code() const {
+ // assert(!invalid_);
+ return Codes::fromParts(encoding_, kind_, invalid_);
+ }
+
+ constexpr Encoding encoding() const {
+ MOZ_ASSERT(!invalid_);
+ return Encoding(encoding_);
+ }
+
+ const char* name() const { return FloatRegisters::GetName(code()); }
+ bool volatile_() const {
+ MOZ_ASSERT(!invalid_);
+ return !!((SetType(1) << code()) & FloatRegisters::VolatileMask);
+ }
+ constexpr bool operator!=(FloatRegister other) const {
+ return code() != other.code();
+ }
+ constexpr bool operator==(FloatRegister other) const {
+ return code() == other.code();
+ }
+
+ bool aliases(FloatRegister other) const {
+ return other.encoding_ == encoding_;
+ }
+ // This function mostly exists for the ARM backend. It is to ensure that two
+ // floating point registers' types are equivalent. e.g. S0 is not equivalent
+ // to D16, since S0 holds a float32, and D16 holds a Double.
+ // Since all floating point registers on x86 and x64 are equivalent, it is
+ // reasonable for this function to do the same.
+ bool equiv(FloatRegister other) const {
+ MOZ_ASSERT(!invalid_);
+ return kind_ == other.kind_;
+ }
+
+ uint32_t numAliased() const { return Codes::NumTypes; }
+ uint32_t numAlignedAliased() { return numAliased(); }
+
+ FloatRegister aliased(uint32_t aliasIdx) {
+ MOZ_ASSERT(!invalid_);
+ MOZ_ASSERT(aliasIdx < numAliased());
+ return FloatRegister(Encoding(encoding_),
+ Kind((aliasIdx + kind_) % Codes::NumTypes));
+ }
+ FloatRegister alignedAliased(uint32_t aliasIdx) { return aliased(aliasIdx); }
+ SetType alignedOrDominatedAliasedSet() const {
+ return Codes::AliasMask << encoding_;
+ }
+
+ static constexpr RegTypeName DefaultType = RegTypeName::Float64;
+
+ template <RegTypeName Name = DefaultType>
+ static SetType LiveAsIndexableSet(SetType s) {
+ return SetType(0);
+ }
+
+ template <RegTypeName Name = DefaultType>
+ static SetType AllocatableAsIndexableSet(SetType s) {
+ static_assert(Name != RegTypeName::Any, "Allocatable set are not iterable");
+ return LiveAsIndexableSet<Name>(s);
+ }
+
+ static TypedRegisterSet<FloatRegister> ReduceSetForPush(
+ const TypedRegisterSet<FloatRegister>& s);
+ static uint32_t GetPushSizeInBytes(const TypedRegisterSet<FloatRegister>& s);
+ uint32_t getRegisterDumpOffsetInBytes();
+
+ // For N in 0..31, if any of sN, dN or qN is a member of `s`, the
+ // returned set will contain all of sN, dN and qN.
+ static TypedRegisterSet<FloatRegister> BroadcastToAllSizes(
+ const TypedRegisterSet<FloatRegister>& s);
+};
+
+template <>
+inline FloatRegister::SetType
+FloatRegister::LiveAsIndexableSet<RegTypeName::Float32>(SetType set) {
+ return set & FloatRegisters::AllSingleMask;
+}
+
+template <>
+inline FloatRegister::SetType
+FloatRegister::LiveAsIndexableSet<RegTypeName::Float64>(SetType set) {
+ return set & FloatRegisters::AllDoubleMask;
+}
+
+template <>
+inline FloatRegister::SetType
+FloatRegister::LiveAsIndexableSet<RegTypeName::Vector128>(SetType set) {
+ return set & FloatRegisters::AllSimd128Mask;
+}
+
+template <>
+inline FloatRegister::SetType
+FloatRegister::LiveAsIndexableSet<RegTypeName::Any>(SetType set) {
+ return set;
+}
+
+// ARM/D32 has double registers that cannot be treated as float32.
+// Luckily, ARMv8 doesn't have the same misfortune.
+inline bool hasUnaliasedDouble() { return false; }
+
+// ARM prior to ARMv8 also has doubles that alias multiple floats.
+// Again, ARMv8 is in the clear.
+inline bool hasMultiAlias() { return false; }
+
+uint32_t GetARM64Flags();
+
+bool CanFlushICacheFromBackgroundThreads();
+
+} // namespace jit
+} // namespace js
+
+#endif // jit_arm64_Architecture_arm64_h
diff --git a/js/src/jit/arm64/Assembler-arm64.cpp b/js/src/jit/arm64/Assembler-arm64.cpp
new file mode 100644
index 0000000000..1e441ae635
--- /dev/null
+++ b/js/src/jit/arm64/Assembler-arm64.cpp
@@ -0,0 +1,609 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/Assembler-arm64.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include "gc/Marking.h"
+#include "jit/arm64/Architecture-arm64.h"
+#include "jit/arm64/MacroAssembler-arm64.h"
+#include "jit/arm64/vixl/Disasm-vixl.h"
+#include "jit/AutoWritableJitCode.h"
+#include "jit/ExecutableAllocator.h"
+#include "vm/Realm.h"
+
+#include "gc/StoreBuffer-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::CountLeadingZeroes32;
+using mozilla::DebugOnly;
+
+// Note this is used for inter-wasm calls and may pass arguments and results
+// in floating point registers even if the system ABI does not.
+
+ABIArg ABIArgGenerator::next(MIRType type) {
+ switch (type) {
+ case MIRType::Int32:
+ case MIRType::Int64:
+ case MIRType::Pointer:
+ case MIRType::RefOrNull:
+ case MIRType::StackResults:
+ if (intRegIndex_ == NumIntArgRegs) {
+ current_ = ABIArg(stackOffset_);
+ stackOffset_ += sizeof(uintptr_t);
+ break;
+ }
+ current_ = ABIArg(Register::FromCode(intRegIndex_));
+ intRegIndex_++;
+ break;
+
+ case MIRType::Float32:
+ case MIRType::Double:
+ if (floatRegIndex_ == NumFloatArgRegs) {
+ current_ = ABIArg(stackOffset_);
+ stackOffset_ += sizeof(double);
+ break;
+ }
+ current_ = ABIArg(FloatRegister(FloatRegisters::Encoding(floatRegIndex_),
+ type == MIRType::Double
+ ? FloatRegisters::Double
+ : FloatRegisters::Single));
+ floatRegIndex_++;
+ break;
+
+#ifdef ENABLE_WASM_SIMD
+ case MIRType::Simd128:
+ if (floatRegIndex_ == NumFloatArgRegs) {
+ stackOffset_ = AlignBytes(stackOffset_, SimdMemoryAlignment);
+ current_ = ABIArg(stackOffset_);
+ stackOffset_ += FloatRegister::SizeOfSimd128;
+ break;
+ }
+ current_ = ABIArg(FloatRegister(FloatRegisters::Encoding(floatRegIndex_),
+ FloatRegisters::Simd128));
+ floatRegIndex_++;
+ break;
+#endif
+
+ default:
+ // Note that in Assembler-x64.cpp there's a special case for Win64 which
+ // does not allow passing SIMD by value. Since there's Win64 on ARM64 we
+ // may need to duplicate that logic here.
+ MOZ_CRASH("Unexpected argument type");
+ }
+ return current_;
+}
+
+namespace js {
+namespace jit {
+
+void Assembler::finish() {
+ armbuffer_.flushPool();
+
+ // The extended jump table is part of the code buffer.
+ ExtendedJumpTable_ = emitExtendedJumpTable();
+ Assembler::FinalizeCode();
+}
+
+bool Assembler::appendRawCode(const uint8_t* code, size_t numBytes) {
+ flush();
+ return armbuffer_.appendRawCode(code, numBytes);
+}
+
+bool Assembler::reserve(size_t size) {
+ // This buffer uses fixed-size chunks so there's no point in reserving
+ // now vs. on-demand.
+ return !oom();
+}
+
+bool Assembler::swapBuffer(wasm::Bytes& bytes) {
+ // For now, specialize to the one use case. As long as wasm::Bytes is a
+ // Vector, not a linked-list of chunks, there's not much we can do other
+ // than copy.
+ MOZ_ASSERT(bytes.empty());
+ if (!bytes.resize(bytesNeeded())) {
+ return false;
+ }
+ armbuffer_.executableCopy(bytes.begin());
+ return true;
+}
+
+BufferOffset Assembler::emitExtendedJumpTable() {
+ if (!pendingJumps_.length() || oom()) {
+ return BufferOffset();
+ }
+
+ armbuffer_.flushPool();
+ armbuffer_.align(SizeOfJumpTableEntry);
+
+ BufferOffset tableOffset = armbuffer_.nextOffset();
+
+ for (size_t i = 0; i < pendingJumps_.length(); i++) {
+ // Each JumpTableEntry is of the form:
+ // LDR ip0 [PC, 8]
+ // BR ip0
+ // [Patchable 8-byte constant low bits]
+ // [Patchable 8-byte constant high bits]
+ DebugOnly<size_t> preOffset = size_t(armbuffer_.nextOffset().getOffset());
+
+ // The unguarded use of ScratchReg64 here is OK:
+ //
+ // - The present function is called from code that does not claim any
+ // scratch registers, we're done compiling user code and are emitting jump
+ // tables. Hence the scratch registers are available when we enter.
+ //
+ // - The pendingJumps_ represent jumps to other code sections that are not
+ // known to this MacroAssembler instance, and we're generating code to
+ // jump there. It is safe to assume that any code using such a generated
+ // branch to an unknown location did not store any valuable value in any
+ // scratch register. Hence the scratch registers can definitely be
+ // clobbered here.
+ //
+ // - Scratch register usage is restricted to sequential control flow within
+ // MacroAssembler functions. Hence the scratch registers will not be
+ // clobbered by ldr and br as they are Assembler primitives, not
+ // MacroAssembler functions.
+
+ ldr(ScratchReg64, ptrdiff_t(8 / vixl::kInstructionSize));
+ br(ScratchReg64);
+
+ DebugOnly<size_t> prePointer = size_t(armbuffer_.nextOffset().getOffset());
+ MOZ_ASSERT_IF(!oom(),
+ prePointer - preOffset == OffsetOfJumpTableEntryPointer);
+
+ brk(0x0);
+ brk(0x0);
+
+ DebugOnly<size_t> postOffset = size_t(armbuffer_.nextOffset().getOffset());
+
+ MOZ_ASSERT_IF(!oom(), postOffset - preOffset == SizeOfJumpTableEntry);
+ }
+
+ if (oom()) {
+ return BufferOffset();
+ }
+
+ return tableOffset;
+}
+
+void Assembler::executableCopy(uint8_t* buffer) {
+ // Copy the code and all constant pools into the output buffer.
+ armbuffer_.executableCopy(buffer);
+
+ // Patch any relative jumps that target code outside the buffer.
+ // The extended jump table may be used for distant jumps.
+ for (size_t i = 0; i < pendingJumps_.length(); i++) {
+ RelativePatch& rp = pendingJumps_[i];
+ MOZ_ASSERT(rp.target);
+
+ Instruction* target = (Instruction*)rp.target;
+ Instruction* branch = (Instruction*)(buffer + rp.offset.getOffset());
+ JumpTableEntry* extendedJumpTable = reinterpret_cast<JumpTableEntry*>(
+ buffer + ExtendedJumpTable_.getOffset());
+ if (branch->BranchType() != vixl::UnknownBranchType) {
+ if (branch->IsTargetReachable(target)) {
+ branch->SetImmPCOffsetTarget(target);
+ } else {
+ JumpTableEntry* entry = &extendedJumpTable[i];
+ branch->SetImmPCOffsetTarget(entry->getLdr());
+ entry->data = target;
+ }
+ } else {
+ // Currently a two-instruction call, it should be possible to optimize
+ // this into a single instruction call + nop in some instances, but this
+ // will work.
+ }
+ }
+}
+
+BufferOffset Assembler::immPool(ARMRegister dest, uint8_t* value,
+ vixl::LoadLiteralOp op, const LiteralDoc& doc,
+ ARMBuffer::PoolEntry* pe) {
+ uint32_t inst = op | Rt(dest);
+ const size_t numInst = 1;
+ const unsigned sizeOfPoolEntryInBytes = 4;
+ const unsigned numPoolEntries = sizeof(value) / sizeOfPoolEntryInBytes;
+ return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&inst, value,
+ doc, pe);
+}
+
+BufferOffset Assembler::immPool64(ARMRegister dest, uint64_t value,
+ ARMBuffer::PoolEntry* pe) {
+ return immPool(dest, (uint8_t*)&value, vixl::LDR_x_lit, LiteralDoc(value),
+ pe);
+}
+
+BufferOffset Assembler::fImmPool(ARMFPRegister dest, uint8_t* value,
+ vixl::LoadLiteralOp op,
+ const LiteralDoc& doc) {
+ uint32_t inst = op | Rt(dest);
+ const size_t numInst = 1;
+ const unsigned sizeOfPoolEntryInBits = 32;
+ const unsigned numPoolEntries = dest.size() / sizeOfPoolEntryInBits;
+ return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&inst, value,
+ doc);
+}
+
+BufferOffset Assembler::fImmPool64(ARMFPRegister dest, double value) {
+ return fImmPool(dest, (uint8_t*)&value, vixl::LDR_d_lit, LiteralDoc(value));
+}
+
+BufferOffset Assembler::fImmPool32(ARMFPRegister dest, float value) {
+ return fImmPool(dest, (uint8_t*)&value, vixl::LDR_s_lit, LiteralDoc(value));
+}
+
+void Assembler::bind(Label* label, BufferOffset targetOffset) {
+#ifdef JS_DISASM_ARM64
+ spew_.spewBind(label);
+#endif
+ // Nothing has seen the label yet: just mark the location.
+ // If we've run out of memory, don't attempt to modify the buffer which may
+ // not be there. Just mark the label as bound to the (possibly bogus)
+ // targetOffset.
+ if (!label->used() || oom()) {
+ label->bind(targetOffset.getOffset());
+ return;
+ }
+
+ // Get the most recent instruction that used the label, as stored in the
+ // label. This instruction is the head of an implicit linked list of label
+ // uses.
+ BufferOffset branchOffset(label);
+
+ while (branchOffset.assigned()) {
+ // Before overwriting the offset in this instruction, get the offset of
+ // the next link in the implicit branch list.
+ BufferOffset nextOffset = NextLink(branchOffset);
+
+ // Linking against the actual (Instruction*) would be invalid,
+ // since that Instruction could be anywhere in memory.
+ // Instead, just link against the correct relative offset, assuming
+ // no constant pools, which will be taken into consideration
+ // during finalization.
+ ptrdiff_t relativeByteOffset =
+ targetOffset.getOffset() - branchOffset.getOffset();
+ Instruction* link = getInstructionAt(branchOffset);
+
+ // This branch may still be registered for callbacks. Stop tracking it.
+ vixl::ImmBranchType branchType = link->BranchType();
+ vixl::ImmBranchRangeType branchRange =
+ Instruction::ImmBranchTypeToRange(branchType);
+ if (branchRange < vixl::NumShortBranchRangeTypes) {
+ BufferOffset deadline(
+ branchOffset.getOffset() +
+ Instruction::ImmBranchMaxForwardOffset(branchRange));
+ armbuffer_.unregisterBranchDeadline(branchRange, deadline);
+ }
+
+ // Is link able to reach the label?
+ if (link->IsPCRelAddressing() ||
+ link->IsTargetReachable(link + relativeByteOffset)) {
+ // Write a new relative offset into the instruction.
+ link->SetImmPCOffsetTarget(link + relativeByteOffset);
+ } else {
+ // This is a short-range branch, and it can't reach the label directly.
+ // Verify that it branches to a veneer: an unconditional branch.
+ MOZ_ASSERT(getInstructionAt(nextOffset)->BranchType() ==
+ vixl::UncondBranchType);
+ }
+
+ branchOffset = nextOffset;
+ }
+
+ // Bind the label, so that future uses may encode the offset immediately.
+ label->bind(targetOffset.getOffset());
+}
+
+void Assembler::addPendingJump(BufferOffset src, ImmPtr target,
+ RelocationKind reloc) {
+ MOZ_ASSERT(target.value != nullptr);
+
+ if (reloc == RelocationKind::JITCODE) {
+ jumpRelocations_.writeUnsigned(src.getOffset());
+ }
+
+ // This jump is not patchable at runtime. Extended jump table entry
+ // requirements cannot be known until finalization, so to be safe, give each
+ // jump and entry. This also causes GC tracing of the target.
+ enoughMemory_ &=
+ pendingJumps_.append(RelativePatch(src, target.value, reloc));
+}
+
+void Assembler::PatchWrite_NearCall(CodeLocationLabel start,
+ CodeLocationLabel toCall) {
+ Instruction* dest = (Instruction*)start.raw();
+ ptrdiff_t relTarget = (Instruction*)toCall.raw() - dest;
+ ptrdiff_t relTarget00 = relTarget >> 2;
+ MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0);
+ MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00));
+
+ bl(dest, relTarget00);
+}
+
+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
+ PatchedImmPtr newValue,
+ PatchedImmPtr expected) {
+ Instruction* i = (Instruction*)label.raw();
+ void** pValue = i->LiteralAddress<void**>();
+ MOZ_ASSERT(*pValue == expected.value);
+ *pValue = newValue.value;
+}
+
+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
+ ImmPtr newValue, ImmPtr expected) {
+ PatchDataWithValueCheck(label, PatchedImmPtr(newValue.value),
+ PatchedImmPtr(expected.value));
+}
+
+void Assembler::ToggleToJmp(CodeLocationLabel inst_) {
+ Instruction* i = (Instruction*)inst_.raw();
+ MOZ_ASSERT(i->IsAddSubImmediate());
+
+ // Refer to instruction layout in ToggleToCmp().
+ int imm19 = (int)i->Bits(23, 5);
+ MOZ_ASSERT(vixl::IsInt19(imm19));
+
+ b(i, imm19, Always);
+}
+
+void Assembler::ToggleToCmp(CodeLocationLabel inst_) {
+ Instruction* i = (Instruction*)inst_.raw();
+ MOZ_ASSERT(i->IsCondB());
+
+ int imm19 = i->ImmCondBranch();
+ // bit 23 is reserved, and the simulator throws an assertion when this happens
+ // It'll be messy to decode, but we can steal bit 30 or bit 31.
+ MOZ_ASSERT(vixl::IsInt18(imm19));
+
+ // 31 - 64-bit if set, 32-bit if unset. (OK!)
+ // 30 - sub if set, add if unset. (OK!)
+ // 29 - SetFlagsBit. Must be set.
+ // 22:23 - ShiftAddSub. (OK!)
+ // 10:21 - ImmAddSub. (OK!)
+ // 5:9 - First source register (Rn). (OK!)
+ // 0:4 - Destination Register. Must be xzr.
+
+ // From the above, there is a safe 19-bit contiguous region from 5:23.
+ Emit(i, vixl::ThirtyTwoBits | vixl::AddSubImmediateFixed | vixl::SUB |
+ Flags(vixl::SetFlags) | Rd(vixl::xzr) |
+ (imm19 << vixl::Rn_offset));
+}
+
+void Assembler::ToggleCall(CodeLocationLabel inst_, bool enabled) {
+ const Instruction* first = reinterpret_cast<Instruction*>(inst_.raw());
+ Instruction* load;
+ Instruction* call;
+
+ // There might be a constant pool at the very first instruction.
+ first = first->skipPool();
+
+ // Skip the stack pointer restore instruction.
+ if (first->IsStackPtrSync()) {
+ first = first->InstructionAtOffset(vixl::kInstructionSize)->skipPool();
+ }
+
+ load = const_cast<Instruction*>(first);
+
+ // The call instruction follows the load, but there may be an injected
+ // constant pool.
+ call = const_cast<Instruction*>(
+ load->InstructionAtOffset(vixl::kInstructionSize)->skipPool());
+
+ if (call->IsBLR() == enabled) {
+ return;
+ }
+
+ if (call->IsBLR()) {
+ // If the second instruction is blr(), then we have:
+ // ldr x17, [pc, offset]
+ // blr x17
+ MOZ_ASSERT(load->IsLDR());
+ // We want to transform this to:
+ // adr xzr, [pc, offset]
+ // nop
+ int32_t offset = load->ImmLLiteral();
+ adr(load, xzr, int32_t(offset));
+ nop(call);
+ } else {
+ // We have:
+ // adr xzr, [pc, offset] (or ldr x17, [pc, offset])
+ // nop
+ MOZ_ASSERT(load->IsADR() || load->IsLDR());
+ MOZ_ASSERT(call->IsNOP());
+ // Transform this to:
+ // ldr x17, [pc, offset]
+ // blr x17
+ int32_t offset = (int)load->ImmPCRawOffset();
+ MOZ_ASSERT(vixl::IsInt19(offset));
+ ldr(load, ScratchReg2_64, int32_t(offset));
+ blr(call, ScratchReg2_64);
+ }
+}
+
+// Patches loads generated by MacroAssemblerCompat::mov(CodeLabel*, Register).
+// The loading code is implemented in movePatchablePtr().
+void Assembler::UpdateLoad64Value(Instruction* inst0, uint64_t value) {
+ MOZ_ASSERT(inst0->IsLDR());
+ uint64_t* literal = inst0->LiteralAddress<uint64_t*>();
+ *literal = value;
+}
+
+class RelocationIterator {
+ CompactBufferReader reader_;
+ uint32_t offset_ = 0;
+
+ public:
+ explicit RelocationIterator(CompactBufferReader& reader) : reader_(reader) {}
+
+ bool read() {
+ if (!reader_.more()) {
+ return false;
+ }
+ offset_ = reader_.readUnsigned();
+ return true;
+ }
+
+ uint32_t offset() const { return offset_; }
+};
+
+static JitCode* CodeFromJump(JitCode* code, uint8_t* jump) {
+ const Instruction* inst = (const Instruction*)jump;
+ uint8_t* target;
+
+ // We're expecting a call created by MacroAssembler::call(JitCode*).
+ // It looks like:
+ //
+ // ldr scratch, [pc, offset]
+ // blr scratch
+ //
+ // If the call has been toggled by ToggleCall(), it looks like:
+ //
+ // adr xzr, [pc, offset]
+ // nop
+ //
+ // There might be a constant pool at the very first instruction.
+ // See also ToggleCall().
+ inst = inst->skipPool();
+
+ // Skip the stack pointer restore instruction.
+ if (inst->IsStackPtrSync()) {
+ inst = inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool();
+ }
+
+ if (inst->BranchType() != vixl::UnknownBranchType) {
+ // This is an immediate branch.
+ target = (uint8_t*)inst->ImmPCOffsetTarget();
+ } else if (inst->IsLDR()) {
+ // This is an ldr+blr call that is enabled. See ToggleCall().
+ mozilla::DebugOnly<const Instruction*> nextInst =
+ inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool();
+ MOZ_ASSERT(nextInst->IsNOP() || nextInst->IsBLR());
+ target = (uint8_t*)inst->Literal64();
+ } else if (inst->IsADR()) {
+ // This is a disabled call: adr+nop. See ToggleCall().
+ mozilla::DebugOnly<const Instruction*> nextInst =
+ inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool();
+ MOZ_ASSERT(nextInst->IsNOP());
+ ptrdiff_t offset = inst->ImmPCRawOffset() << vixl::kLiteralEntrySizeLog2;
+ // This is what Literal64 would do with the corresponding ldr.
+ memcpy(&target, inst + offset, sizeof(target));
+ } else {
+ MOZ_CRASH("Unrecognized jump instruction.");
+ }
+
+ // If the jump is within the code buffer, it uses the extended jump table.
+ if (target >= code->raw() &&
+ target < code->raw() + code->instructionsSize()) {
+ MOZ_ASSERT(target + Assembler::SizeOfJumpTableEntry <=
+ code->raw() + code->instructionsSize());
+
+ uint8_t** patchablePtr =
+ (uint8_t**)(target + Assembler::OffsetOfJumpTableEntryPointer);
+ target = *patchablePtr;
+ }
+
+ return JitCode::FromExecutable(target);
+}
+
+void Assembler::TraceJumpRelocations(JSTracer* trc, JitCode* code,
+ CompactBufferReader& reader) {
+ RelocationIterator iter(reader);
+ while (iter.read()) {
+ JitCode* child = CodeFromJump(code, code->raw() + iter.offset());
+ TraceManuallyBarrieredEdge(trc, &child, "rel32");
+ MOZ_ASSERT(child == CodeFromJump(code, code->raw() + iter.offset()));
+ }
+}
+
+/* static */
+void Assembler::TraceDataRelocations(JSTracer* trc, JitCode* code,
+ CompactBufferReader& reader) {
+ mozilla::Maybe<AutoWritableJitCode> awjc;
+
+ uint8_t* buffer = code->raw();
+
+ while (reader.more()) {
+ size_t offset = reader.readUnsigned();
+ Instruction* load = (Instruction*)&buffer[offset];
+
+ // The only valid traceable operation is a 64-bit load to an ARMRegister.
+ // Refer to movePatchablePtr() for generation.
+ MOZ_ASSERT(load->Mask(vixl::LoadLiteralMask) == vixl::LDR_x_lit);
+
+ uintptr_t* literalAddr = load->LiteralAddress<uintptr_t*>();
+ uintptr_t literal = *literalAddr;
+
+ // Data relocations can be for Values or for raw pointers. If a Value is
+ // zero-tagged, we can trace it as if it were a raw pointer. If a Value
+ // is not zero-tagged, we have to interpret it as a Value to ensure that the
+ // tag bits are masked off to recover the actual pointer.
+
+ if (literal >> JSVAL_TAG_SHIFT) {
+ // This relocation is a Value with a non-zero tag.
+ Value v = Value::fromRawBits(literal);
+ TraceManuallyBarrieredEdge(trc, &v, "jit-masm-value");
+ if (*literalAddr != v.asRawBits()) {
+ if (awjc.isNothing()) {
+ awjc.emplace(code);
+ }
+ *literalAddr = v.asRawBits();
+ }
+ continue;
+ }
+
+ // This relocation is a raw pointer or a Value with a zero tag.
+ // No barriers needed since the pointers are constants.
+ gc::Cell* cell = reinterpret_cast<gc::Cell*>(literal);
+ MOZ_ASSERT(gc::IsCellPointerValid(cell));
+ TraceManuallyBarrieredGenericPointerEdge(trc, &cell, "jit-masm-ptr");
+ if (uintptr_t(cell) != literal) {
+ if (awjc.isNothing()) {
+ awjc.emplace(code);
+ }
+ *literalAddr = uintptr_t(cell);
+ }
+ }
+}
+
+void Assembler::retarget(Label* label, Label* target) {
+#ifdef JS_DISASM_ARM64
+ spew_.spewRetarget(label, target);
+#endif
+ if (label->used()) {
+ if (target->bound()) {
+ bind(label, BufferOffset(target));
+ } else if (target->used()) {
+ // The target is not bound but used. Prepend label's branch list
+ // onto target's.
+ BufferOffset labelBranchOffset(label);
+
+ // Find the head of the use chain for label.
+ BufferOffset next = NextLink(labelBranchOffset);
+ while (next.assigned()) {
+ labelBranchOffset = next;
+ next = NextLink(next);
+ }
+
+ // Then patch the head of label's use chain to the tail of target's
+ // use chain, prepending the entire use chain of target.
+ SetNextLink(labelBranchOffset, BufferOffset(target));
+ target->use(label->offset());
+ } else {
+ // The target is unbound and unused. We can just take the head of
+ // the list hanging off of label, and dump that into target.
+ target->use(label->offset());
+ }
+ }
+ label->reset();
+}
+
+} // namespace jit
+} // namespace js
diff --git a/js/src/jit/arm64/Assembler-arm64.h b/js/src/jit/arm64/Assembler-arm64.h
new file mode 100644
index 0000000000..9745e9d262
--- /dev/null
+++ b/js/src/jit/arm64/Assembler-arm64.h
@@ -0,0 +1,793 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef A64_ASSEMBLER_A64_H_
+#define A64_ASSEMBLER_A64_H_
+
+#include <iterator>
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+
+#include "jit/CompactBuffer.h"
+#include "jit/shared/Disassembler-shared.h"
+#include "wasm/WasmTypeDecls.h"
+
+namespace js {
+namespace jit {
+
+// VIXL imports.
+typedef vixl::Register ARMRegister;
+typedef vixl::FPRegister ARMFPRegister;
+using vixl::ARMBuffer;
+using vixl::Instruction;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+
+static const uint32_t AlignmentAtPrologue = 0;
+static const uint32_t AlignmentMidPrologue = 8;
+static const Scale ScalePointer = TimesEight;
+
+// The MacroAssembler uses scratch registers extensively and unexpectedly.
+// For safety, scratch registers should always be acquired using
+// vixl::UseScratchRegisterScope.
+static constexpr Register ScratchReg{Registers::ip0};
+static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64};
+
+static constexpr Register ScratchReg2{Registers::ip1};
+static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64};
+
+static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0,
+ FloatRegisters::Double};
+static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31,
+ FloatRegisters::Double};
+struct ScratchDoubleScope : public AutoFloatRegisterScope {
+ explicit ScratchDoubleScope(MacroAssembler& masm)
+ : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {}
+};
+
+static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0,
+ FloatRegisters::Single};
+static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31,
+ FloatRegisters::Single};
+struct ScratchFloat32Scope : public AutoFloatRegisterScope {
+ explicit ScratchFloat32Scope(MacroAssembler& masm)
+ : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {}
+};
+
+#ifdef ENABLE_WASM_SIMD
+static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0,
+ FloatRegisters::Simd128};
+static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31,
+ FloatRegisters::Simd128};
+struct ScratchSimd128Scope : public AutoFloatRegisterScope {
+ explicit ScratchSimd128Scope(MacroAssembler& masm)
+ : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {}
+};
+#else
+struct ScratchSimd128Scope : public AutoFloatRegisterScope {
+ explicit ScratchSimd128Scope(MacroAssembler& masm)
+ : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {
+ MOZ_CRASH("SIMD not enabled");
+ }
+};
+#endif
+
+static constexpr Register InvalidReg{Registers::Invalid};
+static constexpr FloatRegister InvalidFloatReg = {};
+
+static constexpr Register OsrFrameReg{Registers::x3};
+static constexpr Register CallTempReg0{Registers::x9};
+static constexpr Register CallTempReg1{Registers::x10};
+static constexpr Register CallTempReg2{Registers::x11};
+static constexpr Register CallTempReg3{Registers::x12};
+static constexpr Register CallTempReg4{Registers::x13};
+static constexpr Register CallTempReg5{Registers::x14};
+
+static constexpr Register PreBarrierReg{Registers::x1};
+
+static constexpr Register InterpreterPCReg{Registers::x9};
+
+static constexpr Register ReturnReg{Registers::x0};
+static constexpr Register64 ReturnReg64(ReturnReg);
+static constexpr Register JSReturnReg{Registers::x2};
+static constexpr Register FramePointer{Registers::fp};
+static constexpr ARMRegister FramePointer64{FramePointer, 64};
+static constexpr Register ZeroRegister{Registers::sp};
+static constexpr ARMRegister ZeroRegister64{Registers::sp, 64};
+static constexpr ARMRegister ZeroRegister32{Registers::sp, 32};
+
+// [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions
+//
+// ================
+//
+// Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer:
+//
+// The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it
+// is used as the base pointer for a memory access. (SP+offset need not be
+// 16-byte aligned, but the SP value itself must be.) The SP register may
+// take on unaligned values but may not be used for a memory access while it
+// is unaligned.
+//
+// Stack-alignment checking can be enabled or disabled by a control register;
+// however that register cannot be modified by user space. We have to assume
+// stack alignment checking is enabled, and that does usually appear to be the
+// case. See the ARM Architecture Reference Manual, "D1.8.2 SP alignment
+// checking", for further details.
+//
+// A second constraint is forced upon us by the ARM64 ABI. This requires that
+// all accesses to the stack must be at or above SP. Accesses below SP are
+// strictly forbidden, presumably because the kernel might use that area of
+// memory for its own purposes -- in particular, signal delivery -- and hence
+// it may get trashed at any time.
+//
+// Note this doesn't mean that accesses to the stack must be based off
+// register SP. Only that the effective addresses must be >= SP, regardless
+// of how the address is formed.
+//
+// In order to allow word-wise pushes and pops, some of our ARM64 jits
+// (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to
+// be used as a PseudoStackPointer (PSP).
+//
+// Initially the PSP will have the same value as the SP. Code can, if it
+// wants, push a single word by subtracting 8 from the PSP, doing SP := PSP,
+// then storing the value at PSP+0. Given other constraints on the alignment
+// of the SP at function call boundaries, this works out OK, at the cost of
+// the two extra instructions per push / pop.
+//
+// This is all a bit messy, and is probably not robustly adhered to. However,
+// the following appear to be the intended, and mostly implemented, current
+// invariants:
+//
+// (1) PSP is "primary", SP is "secondary". Most stack refs are
+// PSP-relative. SP-relative is rare and (obviously) only done when we
+// know that SP is aligned.
+//
+// (2) At all times, the relationship SP <= PSP is maintained. The fact that
+// SP may validly be less than PSP means that pushes on the stack force
+// the two values to become equal, by copying PSP into SP. However, pops
+// behave differently: PSP moves back up and SP stays the same, since that
+// doesn't break the SP <= PSP invariant.
+//
+// (3) However, immediately before a call instruction, SP and PSP must be the
+// same. To enforce this, PSP is copied into SP by the arm64-specific
+// MacroAssembler::call routines.
+//
+// (4) Also, after a function has returned, it is expected that SP holds the
+// "primary" value. How exactly this is implemented remains not entirely
+// clear and merits further investigation. The following points are
+// believed to be relevant:
+//
+// - For calls to functions observing the system AArch64 ABI, PSP (x28) is
+// callee-saved. That, combined with (3) above, implies SP == PSP
+// immediately after the call returns.
+//
+// - JIT-generated routines return using MacroAssemblerCompat::retn, and
+// that copies PSP into SP (bizarrely; this would make more sense if it
+// copied SP into PSP); but in any case, the point is that they are the
+// same at the point that the return instruction executes.
+//
+// - MacroAssembler::callWithABIPost copies PSP into SP after the return
+// of a call requiring dynamic alignment.
+//
+// Given the above, it is unclear exactly where in the return sequence it
+// is expected that SP == PSP, and also whether it is the callee or caller
+// that is expected to enforce it.
+//
+// In general it would be nice to be able to move (at some time in the future,
+// not now) to a world where *every* assignment to PSP or SP is followed
+// immediately by a copy into the other register. That would make all
+// required correctness proofs trivial in the sense that it would require only
+// local inspection of code immediately following (dominated by) any such
+// assignment. For the moment, however, this is a guideline, not a hard
+// requirement.
+//
+// ================
+//
+// Mechanics of keeping the stack pointers in sync:
+//
+// The following two methods require that the masm's SP has been set to the PSP
+// with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be
+// no-ops. The setup is performed manually by the jits after creating the masm.
+//
+// * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has
+// been updated, so SP needs to move too. This is used pretty liberally
+// throughout the code base.
+//
+// * MacroAssembler::initPseudoStackPtr() performs PSP := SP. This can be used
+// after calls to non-ABI compliant code; it's not used much.
+//
+// In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that
+// recognizes the instruction emitted by syncStackPtr(), and this is used to
+// skip that instruction a few places, should it be present, in the JS JIT where
+// code is generated to deal with toggled calls.
+//
+// In various places there are calls to MacroAssembler::syncStackPtr() which
+// appear to be redundant. Investigation shows that they often are redundant,
+// but not always. Finding and removing such redundancies would be quite some
+// work, so we live for now with the occasional redundant update. Perusal of
+// the Cortex-A55 and -A72 optimization guides shows no evidence that such
+// assignments are any more expensive than assignments between vanilla integer
+// registers, so the costs of such redundant updates are assumed to be small.
+//
+// Invariants on the PSP at function call boundaries:
+//
+// It *appears* that the following invariants exist:
+//
+// * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via
+// both registers.
+//
+// * On entry to C++ code, PSP == SP. Certainly it appears that all calls
+// created by the MacroAssembler::call(..) routines perform 'syncStackPtr'
+// immediately before the call, and all ABI calls are routed through the
+// MacroAssembler::call layer.
+//
+// * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the
+// active stack pointer and that PSP is dead.
+//
+// * The PSP is non-volatile (callee-saved). Along a normal return path from
+// JIT code, simply having PSP == SP on exit is correct, since the exit SP is
+// the same as the entry SP by the JIT ABI.
+//
+// * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be
+// used), and will not need to restore the PSP on return because x28 is
+// non-volatile in the ARM64 ABI.
+//
+// ================
+//
+// Future cleanups to the SP-vs-PSP machinery:
+//
+// Currently we have somewhat unclear invariants, which are not obviously
+// always enforced, and which may require complex non-local reasoning.
+// Auditing the code to ensure that the invariants always hold, whilst not
+// generating duplicate syncs, is close to impossible. A future rework to
+// tidy this might be as follows. (This suggestion pertains the the entire
+// JIT complex: all of the JS compilers, wasm compilers, stub generators,
+// regexp compilers, etc).
+//
+// Currently we have that, in JIT-generated code, PSP is "primary" and SP is
+// "secondary", meaning that PSP has the "real" stack pointer value and SP is
+// updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP.
+// An exception to this scheme is the stubs code generated by WasmStubs.cpp,
+// which assumes that SP is "primary" and PSP is dead.
+//
+// It might give us an easier incremental path to eventually removing PSP
+// entirely if we switched to having SP always be the primary. That is:
+//
+// (1) SP is primary, PSP is secondary
+// (2) After any assignment to SP, it is copied into PSP
+// (3) All (non-frame-pointer-based) stack accesses are PSP-relative
+// (as at present)
+//
+// This would have the effect that:
+//
+// * It would reinstate the invariant that on all targets, the "real" SP value
+// is in the ABI-and-or-hardware-mandated stack pointer register.
+//
+// * It would give us a simple story about calls and returns:
+// - for calls to non-JIT generated code (viz, C++ etc), we need no extra
+// copies, because PSP (x28) is callee-saved
+// - for calls to JIT-generated code, we need no extra copies, because of (2)
+// above
+//
+// * We could incrementally migrate those parts of the code generator where we
+// know that SP is 16-aligned, to use SP- rather than PSP-relative accesses
+//
+// * The consistent use of (2) would remove the requirement to have to perform
+// path-dependent reasoning (for paths in the generated code, not in the
+// compiler) when reading/understanding the code.
+//
+// * x28 would become free for use by stubs and the baseline compiler without
+// having to worry about interoperating with code that expects x28 to hold a
+// valid PSP.
+//
+// One might ask what mechanical checks we can add to ensure correctness, rather
+// than having to verify these invariants by hand indefinitely. Maybe some
+// combination of:
+//
+// * In debug builds, compiling-in assert(SP == PSP) at critical places. This
+// can be done using the existing `assertStackPtrsSynced` function.
+//
+// * In debug builds, scanning sections of generated code to ensure no
+// SP-relative stack accesses have been created -- for some sections, at
+// least every assignment to SP is immediately followed by a copy to x28.
+// This would also facilitate detection of duplicate syncs.
+//
+// ================
+//
+// Other investigative notes, for the code base at present:
+//
+// * Some disassembly dumps suggest that we sync the stack pointer too often.
+// This could be the result of various pieces of code working at cross
+// purposes when syncing the stack pointer, or of not paying attention to the
+// precise invariants.
+//
+// * As documented in RegExpNativeMacroAssembler.cpp, function
+// SMRegExpMacroAssembler::createStackFrame:
+//
+// // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
+// // addressing. The register we use for PSP may however also be used by
+// // calling code, and it is nonvolatile, so save it. Do this as a special
+// // case first because the generic save/restore code needs the PSP to be
+// // initialized already.
+//
+// and also in function SMRegExpMacroAssembler::exitHandler:
+//
+// // Restore the saved value of the PSP register, this value is whatever the
+// // caller had saved in it, not any actual SP value, and it must not be
+// // overwritten subsequently.
+//
+// The original source for these comments was a patch for bug 1445907.
+//
+// * MacroAssembler-arm64.h has an interesting comment in the retn()
+// function:
+//
+// syncStackPtr(); // SP is always used to transmit the stack between calls.
+//
+// Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp,
+// at callWithABIPre and callWithABIPost.
+//
+// * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find
+//
+// // SP is used to transfer stack across call boundaries.
+// masm.initPseudoStackPtr();
+//
+// after the return point of a callWithVMWrapper. The only reasonable
+// conclusion from all those (assuming they are right) is that SP == PSP.
+//
+// * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP
+// and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP
+// before a call to wasm code.
+//
+// ================
+
+// StackPointer is intentionally undefined on ARM64 to prevent misuse: using
+// sp as a base register is only valid if sp % 16 == 0.
+static constexpr Register RealStackPointer{Registers::sp};
+
+static constexpr Register PseudoStackPointer{Registers::x28};
+static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64};
+static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32};
+
+static constexpr Register IntArgReg0{Registers::x0};
+static constexpr Register IntArgReg1{Registers::x1};
+static constexpr Register IntArgReg2{Registers::x2};
+static constexpr Register IntArgReg3{Registers::x3};
+static constexpr Register IntArgReg4{Registers::x4};
+static constexpr Register IntArgReg5{Registers::x5};
+static constexpr Register IntArgReg6{Registers::x6};
+static constexpr Register IntArgReg7{Registers::x7};
+static constexpr Register HeapReg{Registers::x21};
+
+// Define unsized Registers.
+#define DEFINE_UNSIZED_REGISTERS(N) \
+ static constexpr Register r##N{Registers::x##N};
+REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS)
+#undef DEFINE_UNSIZED_REGISTERS
+static constexpr Register ip0{Registers::x16};
+static constexpr Register ip1{Registers::x17};
+static constexpr Register fp{Registers::x29};
+static constexpr Register lr{Registers::x30};
+static constexpr Register rzr{Registers::xzr};
+
+// Import VIXL registers into the js::jit namespace.
+#define IMPORT_VIXL_REGISTERS(N) \
+ static constexpr ARMRegister w##N = vixl::w##N; \
+ static constexpr ARMRegister x##N = vixl::x##N;
+REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS)
+#undef IMPORT_VIXL_REGISTERS
+static constexpr ARMRegister wzr = vixl::wzr;
+static constexpr ARMRegister xzr = vixl::xzr;
+static constexpr ARMRegister wsp = vixl::wsp;
+static constexpr ARMRegister sp = vixl::sp;
+
+// Import VIXL VRegisters into the js::jit namespace.
+#define IMPORT_VIXL_VREGISTERS(N) \
+ static constexpr ARMFPRegister s##N = vixl::s##N; \
+ static constexpr ARMFPRegister d##N = vixl::d##N;
+REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS)
+#undef IMPORT_VIXL_VREGISTERS
+
+static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg);
+
+// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use
+// JSReturnOperand).
+static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpMatcherStringReg = CallTempReg1;
+static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2;
+
+// Registers used by RegExpExecTest stub (do not use ReturnReg).
+static constexpr Register RegExpExecTestRegExpReg = CallTempReg0;
+static constexpr Register RegExpExecTestStringReg = CallTempReg1;
+
+// Registers used by RegExpSearcher stub (do not use ReturnReg).
+static constexpr Register RegExpSearcherRegExpReg = CallTempReg0;
+static constexpr Register RegExpSearcherStringReg = CallTempReg1;
+static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2;
+
+static constexpr Register JSReturnReg_Type = r3;
+static constexpr Register JSReturnReg_Data = r2;
+
+static constexpr FloatRegister NANReg = {FloatRegisters::d14,
+ FloatRegisters::Single};
+// N.B. r8 isn't listed as an aapcs temp register, but we can use it as such
+// because we never use return-structs.
+static constexpr Register CallTempNonArgRegs[] = {r8, r9, r10, r11,
+ r12, r13, r14, r15};
+static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs);
+
+static constexpr uint32_t JitStackAlignment = 16;
+
+static constexpr uint32_t JitStackValueAlignment =
+ JitStackAlignment / sizeof(Value);
+static_assert(JitStackAlignment % sizeof(Value) == 0 &&
+ JitStackValueAlignment >= 1,
+ "Stack alignment should be a non-zero multiple of sizeof(Value)");
+
+static constexpr uint32_t SimdMemoryAlignment = 16;
+
+static_assert(CodeAlignment % SimdMemoryAlignment == 0,
+ "Code alignment should be larger than any of the alignments "
+ "which are used for "
+ "the constant sections of the code buffer. Thus it should be "
+ "larger than the "
+ "alignment for SIMD constants.");
+
+static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
+static const uint32_t WasmTrapInstructionLength = 4;
+
+// See comments in wasm::GenerateFunctionPrologue. The difference between these
+// is the size of the largest callable prologue on the platform.
+static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
+
+class Assembler : public vixl::Assembler {
+ public:
+ Assembler() : vixl::Assembler() {}
+
+ typedef vixl::Condition Condition;
+
+ void finish();
+ bool appendRawCode(const uint8_t* code, size_t numBytes);
+ bool reserve(size_t size);
+ bool swapBuffer(wasm::Bytes& bytes);
+
+ // Emit the jump table, returning the BufferOffset to the first entry in the
+ // table.
+ BufferOffset emitExtendedJumpTable();
+ BufferOffset ExtendedJumpTable_;
+ void executableCopy(uint8_t* buffer);
+
+ BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op,
+ const LiteralDoc& doc,
+ ARMBuffer::PoolEntry* pe = nullptr);
+ BufferOffset immPool64(ARMRegister dest, uint64_t value,
+ ARMBuffer::PoolEntry* pe = nullptr);
+ BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value,
+ vixl::LoadLiteralOp op, const LiteralDoc& doc);
+ BufferOffset fImmPool64(ARMFPRegister dest, double value);
+ BufferOffset fImmPool32(ARMFPRegister dest, float value);
+
+ uint32_t currentOffset() const { return nextOffset().getOffset(); }
+
+ void bind(Label* label) { bind(label, nextOffset()); }
+ void bind(Label* label, BufferOffset boff);
+ void bind(CodeLabel* label) { label->target()->bind(currentOffset()); }
+
+ void setUnlimitedBuffer() { armbuffer_.setUnlimited(); }
+ bool oom() const {
+ return AssemblerShared::oom() || armbuffer_.oom() ||
+ jumpRelocations_.oom() || dataRelocations_.oom();
+ }
+
+ void copyJumpRelocationTable(uint8_t* dest) const {
+ if (jumpRelocations_.length()) {
+ memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length());
+ }
+ }
+ void copyDataRelocationTable(uint8_t* dest) const {
+ if (dataRelocations_.length()) {
+ memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length());
+ }
+ }
+
+ size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); }
+ size_t dataRelocationTableBytes() const { return dataRelocations_.length(); }
+ size_t bytesNeeded() const {
+ return SizeOfCodeGenerated() + jumpRelocationTableBytes() +
+ dataRelocationTableBytes();
+ }
+
+ void processCodeLabels(uint8_t* rawCode) {
+ for (const CodeLabel& label : codeLabels_) {
+ Bind(rawCode, label);
+ }
+ }
+
+ static void UpdateLoad64Value(Instruction* inst0, uint64_t value);
+
+ static void Bind(uint8_t* rawCode, const CodeLabel& label) {
+ auto mode = label.linkMode();
+ size_t patchAtOffset = label.patchAt().offset();
+ size_t targetOffset = label.target().offset();
+
+ if (mode == CodeLabel::MoveImmediate) {
+ Instruction* inst = (Instruction*)(rawCode + patchAtOffset);
+ Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset));
+ } else {
+ *reinterpret_cast<const void**>(rawCode + patchAtOffset) =
+ rawCode + targetOffset;
+ }
+ }
+
+ void retarget(Label* cur, Label* next);
+
+ // The buffer is about to be linked. Ensure any constant pools or
+ // excess bookkeeping has been flushed to the instruction stream.
+ void flush() { armbuffer_.flushPool(); }
+
+ void comment(const char* msg) {
+#ifdef JS_DISASM_ARM64
+ spew_.spew("; %s", msg);
+#endif
+ }
+
+ void setPrinter(Sprinter* sp) {
+#ifdef JS_DISASM_ARM64
+ spew_.setPrinter(sp);
+#endif
+ }
+
+ static bool SupportsFloatingPoint() { return true; }
+ static bool SupportsUnalignedAccesses() { return true; }
+ static bool SupportsFastUnalignedFPAccesses() { return true; }
+ static bool SupportsWasmSimd() { return true; }
+
+ static bool HasRoundInstruction(RoundingMode mode) {
+ switch (mode) {
+ case RoundingMode::Up:
+ case RoundingMode::Down:
+ case RoundingMode::NearestTiesToEven:
+ case RoundingMode::TowardsZero:
+ return true;
+ }
+ MOZ_CRASH("unexpected mode");
+ }
+
+ protected:
+ // Add a jump whose target is unknown until finalization.
+ // The jump may not be patched at runtime.
+ void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind);
+
+ public:
+ static uint32_t PatchWrite_NearCallSize() { return 4; }
+
+ static uint32_t NopSize() { return 4; }
+
+ static void PatchWrite_NearCall(CodeLocationLabel start,
+ CodeLocationLabel toCall);
+ static void PatchDataWithValueCheck(CodeLocationLabel label,
+ PatchedImmPtr newValue,
+ PatchedImmPtr expected);
+
+ static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue,
+ ImmPtr expected);
+
+ static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) {
+ // Raw is going to be the return address.
+ uint32_t* raw = (uint32_t*)label.raw();
+ // Overwrite the 4 bytes before the return address, which will end up being
+ // the call instruction.
+ *(raw - 1) = imm.value;
+ }
+ static uint32_t AlignDoubleArg(uint32_t offset) {
+ MOZ_CRASH("AlignDoubleArg()");
+ }
+ static uintptr_t GetPointer(uint8_t* ptr) {
+ Instruction* i = reinterpret_cast<Instruction*>(ptr);
+ uint64_t ret = i->Literal64();
+ return ret;
+ }
+
+ // Toggle a jmp or cmp emitted by toggledJump().
+ static void ToggleToJmp(CodeLocationLabel inst_);
+ static void ToggleToCmp(CodeLocationLabel inst_);
+ static void ToggleCall(CodeLocationLabel inst_, bool enabled);
+
+ static void TraceJumpRelocations(JSTracer* trc, JitCode* code,
+ CompactBufferReader& reader);
+ static void TraceDataRelocations(JSTracer* trc, JitCode* code,
+ CompactBufferReader& reader);
+
+ void assertNoGCThings() const {
+#ifdef DEBUG
+ MOZ_ASSERT(dataRelocations_.length() == 0);
+ for (auto& j : pendingJumps_) {
+ MOZ_ASSERT(j.kind == RelocationKind::HARDCODED);
+ }
+#endif
+ }
+
+ public:
+ // A Jump table entry is 2 instructions, with 8 bytes of raw data
+ static const size_t SizeOfJumpTableEntry = 16;
+
+ struct JumpTableEntry {
+ uint32_t ldr;
+ uint32_t br;
+ void* data;
+
+ Instruction* getLdr() { return reinterpret_cast<Instruction*>(&ldr); }
+ };
+
+ // Offset of the patchable target for the given entry.
+ static const size_t OffsetOfJumpTableEntryPointer = 8;
+
+ public:
+ void writeCodePointer(CodeLabel* label) {
+ armbuffer_.assertNoPoolAndNoNops();
+ uintptr_t x = uintptr_t(-1);
+ BufferOffset off = EmitData(&x, sizeof(uintptr_t));
+ label->patchAt()->bind(off.getOffset());
+ }
+
+ void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
+ const Disassembler::HeapAccess& heapAccess) {
+ MOZ_CRASH("verifyHeapAccessDisassembly");
+ }
+
+ protected:
+ // Structure for fixing up pc-relative loads/jumps when the machine
+ // code gets moved (executable copy, gc, etc.).
+ struct RelativePatch {
+ BufferOffset offset;
+ void* target;
+ RelocationKind kind;
+
+ RelativePatch(BufferOffset offset, void* target, RelocationKind kind)
+ : offset(offset), target(target), kind(kind) {}
+ };
+
+ // List of jumps for which the target is either unknown until finalization,
+ // or cannot be known due to GC. Each entry here requires a unique entry
+ // in the extended jump table, and is patched at finalization.
+ js::Vector<RelativePatch, 8, SystemAllocPolicy> pendingJumps_;
+
+ // Final output formatters.
+ CompactBufferWriter jumpRelocations_;
+ CompactBufferWriter dataRelocations_;
+};
+
+static const uint32_t NumIntArgRegs = 8;
+static const uint32_t NumFloatArgRegs = 8;
+
+class ABIArgGenerator {
+ public:
+ ABIArgGenerator()
+ : intRegIndex_(0), floatRegIndex_(0), stackOffset_(0), current_() {}
+
+ ABIArg next(MIRType argType);
+ ABIArg& current() { return current_; }
+ uint32_t stackBytesConsumedSoFar() const { return stackOffset_; }
+ void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; }
+
+ protected:
+ unsigned intRegIndex_;
+ unsigned floatRegIndex_;
+ uint32_t stackOffset_;
+ ABIArg current_;
+};
+
+// These registers may be volatile or nonvolatile.
+static constexpr Register ABINonArgReg0 = r8;
+static constexpr Register ABINonArgReg1 = r9;
+static constexpr Register ABINonArgReg2 = r10;
+static constexpr Register ABINonArgReg3 = r11;
+
+// This register may be volatile or nonvolatile. Avoid d31 which is the
+// ScratchDoubleReg_.
+static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16,
+ FloatRegisters::Single};
+
+// These registers may be volatile or nonvolatile.
+// Note: these three registers are all guaranteed to be different
+static constexpr Register ABINonArgReturnReg0 = r8;
+static constexpr Register ABINonArgReturnReg1 = r9;
+static constexpr Register ABINonVolatileReg{Registers::x19};
+
+// This register is guaranteed to be clobberable during the prologue and
+// epilogue of an ABI call which must preserve both ABI argument, return
+// and non-volatile registers.
+static constexpr Register ABINonArgReturnVolatileReg = lr;
+
+// Instance pointer argument register for WebAssembly functions. This must not
+// alias any other register used for passing function arguments or return
+// values. Preserved by WebAssembly functions. Must be nonvolatile.
+static constexpr Register InstanceReg{Registers::x23};
+
+// Registers used for wasm table calls. These registers must be disjoint
+// from the ABI argument registers, InstanceReg and each other.
+static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmTableCallSigReg = ABINonArgReg2;
+static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
+
+// Registers used for ref calls.
+static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0;
+static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1;
+static constexpr Register WasmCallRefReg = ABINonArgReg3;
+
+// Register used as a scratch along the return path in the fast js -> wasm stub
+// code. This must not overlap ReturnReg, JSReturnOperand, or InstanceReg.
+// It must be a volatile register.
+static constexpr Register WasmJitEntryReturnScratch = r9;
+
+static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+ Register* out) {
+ if (usedIntArgs >= NumIntArgRegs) {
+ return false;
+ }
+ *out = Register::FromCode(usedIntArgs);
+ return true;
+}
+
+static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
+ FloatRegister* out) {
+ if (usedFloatArgs >= NumFloatArgRegs) {
+ return false;
+ }
+ *out = FloatRegister::FromCode(usedFloatArgs);
+ return true;
+}
+
+// Get a register in which we plan to put a quantity that will be used as an
+// integer argument. This differs from GetIntArgReg in that if we have no more
+// actual argument registers to use we will fall back on using whatever
+// CallTempReg* don't overlap the argument registers, and only fail once those
+// run out too.
+static inline bool GetTempRegForIntArg(uint32_t usedIntArgs,
+ uint32_t usedFloatArgs, Register* out) {
+ if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) {
+ return true;
+ }
+ // Unfortunately, we have to assume things about the point at which
+ // GetIntArgReg returns false, because we need to know how many registers it
+ // can allocate.
+ usedIntArgs -= NumIntArgRegs;
+ if (usedIntArgs >= NumCallTempNonArgRegs) {
+ return false;
+ }
+ *out = CallTempNonArgRegs[usedIntArgs];
+ return true;
+}
+
+inline Imm32 Imm64::firstHalf() const { return low(); }
+
+inline Imm32 Imm64::secondHalf() const { return hi(); }
+
+// Forbids nop filling for testing purposes. Not nestable.
+class AutoForbidNops {
+ protected:
+ Assembler* asm_;
+
+ public:
+ explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); }
+ ~AutoForbidNops() { asm_->leaveNoNops(); }
+};
+
+// Forbids pool generation during a specified interval. Not nestable.
+class AutoForbidPoolsAndNops : public AutoForbidNops {
+ public:
+ AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst)
+ : AutoForbidNops(asm_) {
+ asm_->enterNoPool(maxInst);
+ }
+ ~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); }
+};
+
+} // namespace jit
+} // namespace js
+
+#endif // A64_ASSEMBLER_A64_H_
diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp
new file mode 100644
index 0000000000..d738ea548e
--- /dev/null
+++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@@ -0,0 +1,4245 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/CodeGenerator-arm64.h"
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MathAlgorithms.h"
+
+#include "jsnum.h"
+
+#include "jit/CodeGenerator.h"
+#include "jit/InlineScriptTree.h"
+#include "jit/JitRuntime.h"
+#include "jit/MIR.h"
+#include "jit/MIRGraph.h"
+#include "jit/ReciprocalMulConstants.h"
+#include "vm/JSContext.h"
+#include "vm/Realm.h"
+#include "vm/Shape.h"
+
+#include "jit/shared/CodeGenerator-shared-inl.h"
+#include "vm/JSScript-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using JS::GenericNaN;
+using mozilla::FloorLog2;
+using mozilla::Maybe;
+using mozilla::NegativeInfinity;
+using mozilla::Nothing;
+using mozilla::Some;
+
+// shared
+CodeGeneratorARM64::CodeGeneratorARM64(MIRGenerator* gen, LIRGraph* graph,
+ MacroAssembler* masm)
+ : CodeGeneratorShared(gen, graph, masm) {}
+
+bool CodeGeneratorARM64::generateOutOfLineCode() {
+ AutoCreatedBy acb(masm, "CodeGeneratorARM64::generateOutOfLineCode");
+
+ if (!CodeGeneratorShared::generateOutOfLineCode()) {
+ return false;
+ }
+
+ if (deoptLabel_.used()) {
+ // All non-table-based bailouts will go here.
+ masm.bind(&deoptLabel_);
+
+ // Store the frame size, so the handler can recover the IonScript.
+ masm.push(Imm32(frameSize()));
+
+ TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler();
+ masm.jump(handler);
+ }
+
+ return !masm.oom();
+}
+
+void CodeGeneratorARM64::emitBranch(Assembler::Condition cond,
+ MBasicBlock* mirTrue,
+ MBasicBlock* mirFalse) {
+ if (isNextBlock(mirFalse->lir())) {
+ jumpToBlock(mirTrue, cond);
+ } else {
+ jumpToBlock(mirFalse, Assembler::InvertCondition(cond));
+ jumpToBlock(mirTrue);
+ }
+}
+
+void OutOfLineBailout::accept(CodeGeneratorARM64* codegen) {
+ codegen->visitOutOfLineBailout(this);
+}
+
+void CodeGenerator::visitTestIAndBranch(LTestIAndBranch* test) {
+ Register input = ToRegister(test->input());
+ MBasicBlock* mirTrue = test->ifTrue();
+ MBasicBlock* mirFalse = test->ifFalse();
+
+ // Jump to the True block if NonZero.
+ // Jump to the False block if Zero.
+ if (isNextBlock(mirFalse->lir())) {
+ masm.branch32(Assembler::NonZero, input, Imm32(0),
+ getJumpLabelForBranch(mirTrue));
+ } else {
+ masm.branch32(Assembler::Zero, input, Imm32(0),
+ getJumpLabelForBranch(mirFalse));
+ if (!isNextBlock(mirTrue->lir())) {
+ jumpToBlock(mirTrue);
+ }
+ }
+}
+
+void CodeGenerator::visitCompare(LCompare* comp) {
+ const MCompare* mir = comp->mir();
+ const MCompare::CompareType type = mir->compareType();
+ const Assembler::Condition cond = JSOpToCondition(type, comp->jsop());
+ const Register leftreg = ToRegister(comp->getOperand(0));
+ const LAllocation* right = comp->getOperand(1);
+ const Register defreg = ToRegister(comp->getDef(0));
+
+ if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol ||
+ type == MCompare::Compare_UIntPtr ||
+ type == MCompare::Compare_RefOrNull) {
+ if (right->isConstant()) {
+ MOZ_ASSERT(type == MCompare::Compare_UIntPtr);
+ masm.cmpPtrSet(cond, leftreg, Imm32(ToInt32(right)), defreg);
+ } else {
+ masm.cmpPtrSet(cond, leftreg, ToRegister(right), defreg);
+ }
+ return;
+ }
+
+ if (right->isConstant()) {
+ masm.cmp32Set(cond, leftreg, Imm32(ToInt32(right)), defreg);
+ } else {
+ masm.cmp32Set(cond, leftreg, ToRegister(right), defreg);
+ }
+}
+
+void CodeGenerator::visitCompareAndBranch(LCompareAndBranch* comp) {
+ const MCompare* mir = comp->cmpMir();
+ const MCompare::CompareType type = mir->compareType();
+ const LAllocation* left = comp->left();
+ const LAllocation* right = comp->right();
+
+ if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol ||
+ type == MCompare::Compare_UIntPtr ||
+ type == MCompare::Compare_RefOrNull) {
+ if (right->isConstant()) {
+ MOZ_ASSERT(type == MCompare::Compare_UIntPtr);
+ masm.cmpPtr(ToRegister(left), Imm32(ToInt32(right)));
+ } else {
+ masm.cmpPtr(ToRegister(left), ToRegister(right));
+ }
+ } else if (right->isConstant()) {
+ masm.cmp32(ToRegister(left), Imm32(ToInt32(right)));
+ } else {
+ masm.cmp32(ToRegister(left), ToRegister(right));
+ }
+
+ Assembler::Condition cond = JSOpToCondition(type, comp->jsop());
+ emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void CodeGeneratorARM64::bailoutIf(Assembler::Condition condition,
+ LSnapshot* snapshot) {
+ encode(snapshot);
+
+ InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+ OutOfLineBailout* ool = new (alloc()) OutOfLineBailout(snapshot);
+ addOutOfLineCode(ool,
+ new (alloc()) BytecodeSite(tree, tree->script()->code()));
+
+ masm.B(ool->entry(), condition);
+}
+
+void CodeGeneratorARM64::bailoutFrom(Label* label, LSnapshot* snapshot) {
+ MOZ_ASSERT_IF(!masm.oom(), label->used());
+ MOZ_ASSERT_IF(!masm.oom(), !label->bound());
+
+ encode(snapshot);
+
+ InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
+ OutOfLineBailout* ool = new (alloc()) OutOfLineBailout(snapshot);
+ addOutOfLineCode(ool,
+ new (alloc()) BytecodeSite(tree, tree->script()->code()));
+
+ masm.retarget(label, ool->entry());
+}
+
+void CodeGeneratorARM64::bailout(LSnapshot* snapshot) {
+ Label label;
+ masm.b(&label);
+ bailoutFrom(&label, snapshot);
+}
+
+void CodeGeneratorARM64::visitOutOfLineBailout(OutOfLineBailout* ool) {
+ masm.push(Imm32(ool->snapshot()->snapshotOffset()));
+ masm.B(&deoptLabel_);
+}
+
+void CodeGenerator::visitMinMaxD(LMinMaxD* ins) {
+ ARMFPRegister lhs(ToFloatRegister(ins->first()), 64);
+ ARMFPRegister rhs(ToFloatRegister(ins->second()), 64);
+ ARMFPRegister output(ToFloatRegister(ins->output()), 64);
+ if (ins->mir()->isMax()) {
+ masm.Fmax(output, lhs, rhs);
+ } else {
+ masm.Fmin(output, lhs, rhs);
+ }
+}
+
+void CodeGenerator::visitMinMaxF(LMinMaxF* ins) {
+ ARMFPRegister lhs(ToFloatRegister(ins->first()), 32);
+ ARMFPRegister rhs(ToFloatRegister(ins->second()), 32);
+ ARMFPRegister output(ToFloatRegister(ins->output()), 32);
+ if (ins->mir()->isMax()) {
+ masm.Fmax(output, lhs, rhs);
+ } else {
+ masm.Fmin(output, lhs, rhs);
+ }
+}
+
+template <typename T>
+static ARMRegister toWRegister(const T* a) {
+ return ARMRegister(ToRegister(a), 32);
+}
+
+template <typename T>
+static ARMRegister toXRegister(const T* a) {
+ return ARMRegister(ToRegister(a), 64);
+}
+
+Operand toWOperand(const LAllocation* a) {
+ if (a->isConstant()) {
+ return Operand(ToInt32(a));
+ }
+ return Operand(toWRegister(a));
+}
+
+vixl::CPURegister ToCPURegister(const LAllocation* a, Scalar::Type type) {
+ if (a->isFloatReg() && type == Scalar::Float64) {
+ return ARMFPRegister(ToFloatRegister(a), 64);
+ }
+ if (a->isFloatReg() && type == Scalar::Float32) {
+ return ARMFPRegister(ToFloatRegister(a), 32);
+ }
+ if (a->isGeneralReg()) {
+ return ARMRegister(ToRegister(a), 32);
+ }
+ MOZ_CRASH("Unknown LAllocation");
+}
+
+vixl::CPURegister ToCPURegister(const LDefinition* d, Scalar::Type type) {
+ return ToCPURegister(d->output(), type);
+}
+
+// Let |cond| be an ARM64 condition code that we could reasonably use in a
+// conditional branch or select following a comparison instruction. This
+// function returns the condition to use in the case where we swap the two
+// operands of the comparison instruction.
+Assembler::Condition GetCondForSwappedOperands(Assembler::Condition cond) {
+ // EQ and NE map to themselves
+ // Of the remaining 14 cases, 4 other pairings can meaningfully swap:
+ // HS -- LS
+ // LO -- HI
+ // GE -- LE
+ // GT -- LT
+ switch (cond) {
+ case vixl::eq:
+ case vixl::ne:
+ return cond;
+ case vixl::hs:
+ return vixl::ls;
+ case vixl::ls:
+ return vixl::hs;
+ case vixl::lo:
+ return vixl::hi;
+ case vixl::hi:
+ return vixl::lo;
+ case vixl::ge:
+ return vixl::le;
+ case vixl::le:
+ return vixl::ge;
+ case vixl::gt:
+ return vixl::lt;
+ case vixl::lt:
+ return vixl::gt;
+ default:
+ MOZ_CRASH("no meaningful swapped-operand condition");
+ }
+}
+
+void CodeGenerator::visitAddI(LAddI* ins) {
+ const LAllocation* lhs = ins->getOperand(0);
+ const LAllocation* rhs = ins->getOperand(1);
+ const LDefinition* dest = ins->getDef(0);
+
+ // Platforms with three-operand arithmetic ops don't need recovery.
+ MOZ_ASSERT(!ins->recoversInput());
+
+ if (ins->snapshot()) {
+ masm.Adds(toWRegister(dest), toWRegister(lhs), toWOperand(rhs));
+ bailoutIf(Assembler::Overflow, ins->snapshot());
+ } else {
+ masm.Add(toWRegister(dest), toWRegister(lhs), toWOperand(rhs));
+ }
+}
+
+void CodeGenerator::visitSubI(LSubI* ins) {
+ const LAllocation* lhs = ins->getOperand(0);
+ const LAllocation* rhs = ins->getOperand(1);
+ const LDefinition* dest = ins->getDef(0);
+
+ // Platforms with three-operand arithmetic ops don't need recovery.
+ MOZ_ASSERT(!ins->recoversInput());
+
+ if (ins->snapshot()) {
+ masm.Subs(toWRegister(dest), toWRegister(lhs), toWOperand(rhs));
+ bailoutIf(Assembler::Overflow, ins->snapshot());
+ } else {
+ masm.Sub(toWRegister(dest), toWRegister(lhs), toWOperand(rhs));
+ }
+}
+
+void CodeGenerator::visitMulI(LMulI* ins) {
+ const LAllocation* lhs = ins->getOperand(0);
+ const LAllocation* rhs = ins->getOperand(1);
+ const LDefinition* dest = ins->getDef(0);
+ MMul* mul = ins->mir();
+ MOZ_ASSERT_IF(mul->mode() == MMul::Integer,
+ !mul->canBeNegativeZero() && !mul->canOverflow());
+
+ Register lhsreg = ToRegister(lhs);
+ const ARMRegister lhsreg32 = ARMRegister(lhsreg, 32);
+ Register destreg = ToRegister(dest);
+ const ARMRegister destreg32 = ARMRegister(destreg, 32);
+
+ if (rhs->isConstant()) {
+ // Bailout on -0.0.
+ int32_t constant = ToInt32(rhs);
+ if (mul->canBeNegativeZero() && constant <= 0) {
+ Assembler::Condition bailoutCond =
+ (constant == 0) ? Assembler::LessThan : Assembler::Equal;
+ masm.Cmp(toWRegister(lhs), Operand(0));
+ bailoutIf(bailoutCond, ins->snapshot());
+ }
+
+ switch (constant) {
+ case -1:
+ masm.Negs(destreg32, Operand(lhsreg32));
+ break; // Go to overflow check.
+ case 0:
+ masm.Mov(destreg32, wzr);
+ return; // Avoid overflow check.
+ case 1:
+ if (destreg != lhsreg) {
+ masm.Mov(destreg32, lhsreg32);
+ }
+ return; // Avoid overflow check.
+ case 2:
+ if (!mul->canOverflow()) {
+ masm.Add(destreg32, lhsreg32, Operand(lhsreg32));
+ return; // Avoid overflow check.
+ }
+ masm.Adds(destreg32, lhsreg32, Operand(lhsreg32));
+ break; // Go to overflow check.
+ default:
+ // Use shift if cannot overflow and constant is a power of 2
+ if (!mul->canOverflow() && constant > 0) {
+ int32_t shift = FloorLog2(constant);
+ if ((1 << shift) == constant) {
+ masm.Lsl(destreg32, lhsreg32, shift);
+ return;
+ }
+ }
+
+ // Otherwise, just multiply. We have to check for overflow.
+ // Negative zero was handled above.
+ Label bailout;
+ Label* onOverflow = mul->canOverflow() ? &bailout : nullptr;
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const Register scratch = temps.AcquireW().asUnsized();
+
+ masm.move32(Imm32(constant), scratch);
+ masm.mul32(lhsreg, scratch, destreg, onOverflow);
+
+ if (onOverflow) {
+ MOZ_ASSERT(lhsreg != destreg);
+ bailoutFrom(&bailout, ins->snapshot());
+ }
+ return;
+ }
+
+ // Overflow check.
+ if (mul->canOverflow()) {
+ bailoutIf(Assembler::Overflow, ins->snapshot());
+ }
+ } else {
+ Register rhsreg = ToRegister(rhs);
+ const ARMRegister rhsreg32 = ARMRegister(rhsreg, 32);
+
+ Label bailout;
+ Label* onOverflow = mul->canOverflow() ? &bailout : nullptr;
+
+ if (mul->canBeNegativeZero()) {
+ // The product of two integer operands is negative zero iff one
+ // operand is zero, and the other is negative. Therefore, the
+ // sum of the two operands will also be negative (specifically,
+ // it will be the non-zero operand). If the result of the
+ // multiplication is 0, we can check the sign of the sum to
+ // determine whether we should bail out.
+
+ // This code can bailout, so lowering guarantees that the input
+ // operands are not overwritten.
+ MOZ_ASSERT(destreg != lhsreg);
+ MOZ_ASSERT(destreg != rhsreg);
+
+ // Do the multiplication.
+ masm.mul32(lhsreg, rhsreg, destreg, onOverflow);
+
+ // Set Zero flag if destreg is 0.
+ masm.test32(destreg, destreg);
+
+ // ccmn is 'conditional compare negative'.
+ // If the Zero flag is set:
+ // perform a compare negative (compute lhs+rhs and set flags)
+ // else:
+ // clear flags
+ masm.Ccmn(lhsreg32, rhsreg32, vixl::NoFlag, Assembler::Zero);
+
+ // Bails out if (lhs * rhs == 0) && (lhs + rhs < 0):
+ bailoutIf(Assembler::LessThan, ins->snapshot());
+
+ } else {
+ masm.mul32(lhsreg, rhsreg, destreg, onOverflow);
+ }
+ if (onOverflow) {
+ bailoutFrom(&bailout, ins->snapshot());
+ }
+ }
+}
+
+void CodeGenerator::visitDivI(LDivI* ins) {
+ const Register lhs = ToRegister(ins->lhs());
+ const Register rhs = ToRegister(ins->rhs());
+ const Register output = ToRegister(ins->output());
+
+ const ARMRegister lhs32 = toWRegister(ins->lhs());
+ const ARMRegister rhs32 = toWRegister(ins->rhs());
+ const ARMRegister temp32 = toWRegister(ins->getTemp(0));
+ const ARMRegister output32 = toWRegister(ins->output());
+
+ MDiv* mir = ins->mir();
+
+ Label done;
+
+ // Handle division by zero.
+ if (mir->canBeDivideByZero()) {
+ masm.test32(rhs, rhs);
+ if (mir->trapOnError()) {
+ Label nonZero;
+ masm.j(Assembler::NonZero, &nonZero);
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+ masm.bind(&nonZero);
+ } else if (mir->canTruncateInfinities()) {
+ // Truncated division by zero is zero: (Infinity|0 = 0).
+ Label nonZero;
+ masm.j(Assembler::NonZero, &nonZero);
+ masm.Mov(output32, wzr);
+ masm.jump(&done);
+ masm.bind(&nonZero);
+ } else {
+ MOZ_ASSERT(mir->fallible());
+ bailoutIf(Assembler::Zero, ins->snapshot());
+ }
+ }
+
+ // Handle an integer overflow from (INT32_MIN / -1).
+ // The integer division gives INT32_MIN, but should be -(double)INT32_MIN.
+ if (mir->canBeNegativeOverflow()) {
+ Label notOverflow;
+
+ // Branch to handle the non-overflow cases.
+ masm.branch32(Assembler::NotEqual, lhs, Imm32(INT32_MIN), &notOverflow);
+ masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notOverflow);
+
+ // Handle overflow.
+ if (mir->trapOnError()) {
+ masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset());
+ } else if (mir->canTruncateOverflow()) {
+ // (-INT32_MIN)|0 == INT32_MIN, which is already in lhs.
+ masm.move32(lhs, output);
+ masm.jump(&done);
+ } else {
+ MOZ_ASSERT(mir->fallible());
+ bailout(ins->snapshot());
+ }
+ masm.bind(&notOverflow);
+ }
+
+ // Handle negative zero: lhs == 0 && rhs < 0.
+ if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
+ Label nonZero;
+ masm.branch32(Assembler::NotEqual, lhs, Imm32(0), &nonZero);
+ masm.cmp32(rhs, Imm32(0));
+ bailoutIf(Assembler::LessThan, ins->snapshot());
+ masm.bind(&nonZero);
+ }
+
+ // Perform integer division.
+ if (mir->canTruncateRemainder()) {
+ masm.Sdiv(output32, lhs32, rhs32);
+ } else {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ ARMRegister scratch32 = temps.AcquireW();
+
+ // ARM does not automatically calculate the remainder.
+ // The ISR suggests multiplication to determine whether a remainder exists.
+ masm.Sdiv(scratch32, lhs32, rhs32);
+ masm.Mul(temp32, scratch32, rhs32);
+ masm.Cmp(lhs32, temp32);
+ bailoutIf(Assembler::NotEqual, ins->snapshot());
+ masm.Mov(output32, scratch32);
+ }
+
+ masm.bind(&done);
+}
+
+void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
+ const Register numerator = ToRegister(ins->numerator());
+ const ARMRegister numerator32 = toWRegister(ins->numerator());
+ const ARMRegister output32 = toWRegister(ins->output());
+
+ int32_t shift = ins->shift();
+ bool negativeDivisor = ins->negativeDivisor();
+ MDiv* mir = ins->mir();
+
+ if (!mir->isTruncated() && negativeDivisor) {
+ // 0 divided by a negative number returns a -0 double.
+ bailoutTest32(Assembler::Zero, numerator, numerator, ins->snapshot());
+ }
+
+ if (shift) {
+ if (!mir->isTruncated()) {
+ // If the remainder is != 0, bailout since this must be a double.
+ bailoutTest32(Assembler::NonZero, numerator,
+ Imm32(UINT32_MAX >> (32 - shift)), ins->snapshot());
+ }
+
+ if (mir->isUnsigned()) {
+ // shift right
+ masm.Lsr(output32, numerator32, shift);
+ } else {
+ ARMRegister temp32 = numerator32;
+ // Adjust the value so that shifting produces a correctly
+ // rounded result when the numerator is negative. See 10-1
+ // "Signed Division by a Known Power of 2" in Henry
+ // S. Warren, Jr.'s Hacker's Delight.
+ if (mir->canBeNegativeDividend() && mir->isTruncated()) {
+ if (shift > 1) {
+ // Copy the sign bit of the numerator. (= (2^32 - 1) or 0)
+ masm.Asr(output32, numerator32, 31);
+ temp32 = output32;
+ }
+ // Divide by 2^(32 - shift)
+ // i.e. (= (2^32 - 1) / 2^(32 - shift) or 0)
+ // i.e. (= (2^shift - 1) or 0)
+ masm.Lsr(output32, temp32, 32 - shift);
+ // If signed, make any 1 bit below the shifted bits to bubble up, such
+ // that once shifted the value would be rounded towards 0.
+ masm.Add(output32, output32, numerator32);
+ temp32 = output32;
+ }
+ masm.Asr(output32, temp32, shift);
+
+ if (negativeDivisor) {
+ masm.Neg(output32, output32);
+ }
+ }
+ return;
+ }
+
+ if (negativeDivisor) {
+ // INT32_MIN / -1 overflows.
+ if (!mir->isTruncated()) {
+ masm.Negs(output32, numerator32);
+ bailoutIf(Assembler::Overflow, ins->snapshot());
+ } else if (mir->trapOnError()) {
+ Label ok;
+ masm.Negs(output32, numerator32);
+ masm.branch(Assembler::NoOverflow, &ok);
+ masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset());
+ masm.bind(&ok);
+ } else {
+ // Do not set condition flags.
+ masm.Neg(output32, numerator32);
+ }
+ } else {
+ if (mir->isUnsigned() && !mir->isTruncated()) {
+ // Copy and set flags.
+ masm.Adds(output32, numerator32, 0);
+ // Unsigned division by 1 can overflow if output is not truncated, as we
+ // do not have an Unsigned type for MIR instructions.
+ bailoutIf(Assembler::Signed, ins->snapshot());
+ } else {
+ // Copy the result.
+ masm.Mov(output32, numerator32);
+ }
+ }
+}
+
+void CodeGenerator::visitDivConstantI(LDivConstantI* ins) {
+ const ARMRegister lhs32 = toWRegister(ins->numerator());
+ const ARMRegister lhs64 = toXRegister(ins->numerator());
+ const ARMRegister const32 = toWRegister(ins->temp());
+ const ARMRegister output32 = toWRegister(ins->output());
+ const ARMRegister output64 = toXRegister(ins->output());
+ int32_t d = ins->denominator();
+
+ // The absolute value of the denominator isn't a power of 2.
+ using mozilla::Abs;
+ MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0);
+
+ // We will first divide by Abs(d), and negate the answer if d is negative.
+ // If desired, this can be avoided by generalizing computeDivisionConstants.
+ auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(Abs(d));
+
+ // We first compute (M * n) >> 32, where M = rmc.multiplier.
+ masm.Mov(const32, int32_t(rmc.multiplier));
+ if (rmc.multiplier > INT32_MAX) {
+ MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32));
+
+ // We actually compute (int32_t(M) * n) instead, without the upper bit.
+ // Thus, (M * n) = (int32_t(M) * n) + n << 32.
+ //
+ // ((int32_t(M) * n) + n << 32) can't overflow, as both operands have
+ // opposite signs because int32_t(M) is negative.
+ masm.Lsl(output64, lhs64, 32);
+
+ // Store (M * n) in output64.
+ masm.Smaddl(output64, const32, lhs32, output64);
+ } else {
+ // Store (M * n) in output64.
+ masm.Smull(output64, const32, lhs32);
+ }
+
+ // (M * n) >> (32 + shift) is the truncated division answer if n is
+ // non-negative, as proved in the comments of computeDivisionConstants. We
+ // must add 1 later if n is negative to get the right answer in all cases.
+ masm.Asr(output64, output64, 32 + rmc.shiftAmount);
+
+ // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
+ // computed with just a sign-extending shift of 31 bits.
+ if (ins->canBeNegativeDividend()) {
+ masm.Asr(const32, lhs32, 31);
+ masm.Sub(output32, output32, const32);
+ }
+
+ // After this, output32 contains the correct truncated division result.
+ if (d < 0) {
+ masm.Neg(output32, output32);
+ }
+
+ if (!ins->mir()->isTruncated()) {
+ // This is a division op. Multiply the obtained value by d to check if
+ // the correct answer is an integer. This cannot overflow, since |d| > 1.
+ masm.Mov(const32, d);
+ masm.Msub(const32, output32, const32, lhs32);
+ // bailout if (lhs - output * d != 0)
+ masm.Cmp(const32, wzr);
+ auto bailoutCond = Assembler::NonZero;
+
+ // If lhs is zero and the divisor is negative, the answer should have
+ // been -0.
+ if (d < 0) {
+ // or bailout if (lhs == 0).
+ // ^ ^
+ // | '-- masm.Ccmp(lhs32, lhs32, .., ..)
+ // '-- masm.Ccmp(.., .., vixl::ZFlag, ! bailoutCond)
+ masm.Ccmp(lhs32, wzr, vixl::ZFlag, Assembler::Zero);
+ bailoutCond = Assembler::Zero;
+ }
+
+ // bailout if (lhs - output * d != 0) or (d < 0 && lhs == 0)
+ bailoutIf(bailoutCond, ins->snapshot());
+ }
+}
+
+void CodeGenerator::visitUDivConstantI(LUDivConstantI* ins) {
+ const ARMRegister lhs32 = toWRegister(ins->numerator());
+ const ARMRegister lhs64 = toXRegister(ins->numerator());
+ const ARMRegister const32 = toWRegister(ins->temp());
+ const ARMRegister output32 = toWRegister(ins->output());
+ const ARMRegister output64 = toXRegister(ins->output());
+ uint32_t d = ins->denominator();
+
+ if (d == 0) {
+ if (ins->mir()->isTruncated()) {
+ if (ins->mir()->trapOnError()) {
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero,
+ ins->mir()->bytecodeOffset());
+ } else {
+ masm.Mov(output32, wzr);
+ }
+ } else {
+ bailout(ins->snapshot());
+ }
+ return;
+ }
+
+ // The denominator isn't a power of 2 (see LDivPowTwoI).
+ MOZ_ASSERT((d & (d - 1)) != 0);
+
+ auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d);
+
+ // We first compute (M * n), where M = rmc.multiplier.
+ masm.Mov(const32, int32_t(rmc.multiplier));
+ masm.Umull(output64, const32, lhs32);
+ if (rmc.multiplier > UINT32_MAX) {
+ // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that
+ // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d,
+ // contradicting the proof of correctness in computeDivisionConstants.
+ MOZ_ASSERT(rmc.shiftAmount > 0);
+ MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33));
+
+ // We actually compute (uint32_t(M) * n) instead, without the upper bit.
+ // Thus, (M * n) = (uint32_t(M) * n) + n << 32.
+ //
+ // ((uint32_t(M) * n) + n << 32) can overflow. Hacker's Delight explains a
+ // trick to avoid this overflow case, but we can avoid it by computing the
+ // addition on 64 bits registers.
+ //
+ // Compute ((uint32_t(M) * n) >> 32 + n)
+ masm.Add(output64, lhs64, Operand(output64, vixl::LSR, 32));
+
+ // (M * n) >> (32 + shift) is the truncated division answer.
+ masm.Lsr(output64, output64, rmc.shiftAmount);
+ } else {
+ // (M * n) >> (32 + shift) is the truncated division answer.
+ masm.Lsr(output64, output64, 32 + rmc.shiftAmount);
+ }
+
+ // We now have the truncated division value. We are checking whether the
+ // division resulted in an integer, we multiply the obtained value by d and
+ // check the remainder of the division.
+ if (!ins->mir()->isTruncated()) {
+ masm.Mov(const32, d);
+ masm.Msub(const32, output32, const32, lhs32);
+ // bailout if (lhs - output * d != 0)
+ masm.Cmp(const32, const32);
+ bailoutIf(Assembler::NonZero, ins->snapshot());
+ }
+}
+
+void CodeGenerator::visitModI(LModI* ins) {
+ ARMRegister lhs = toWRegister(ins->lhs());
+ ARMRegister rhs = toWRegister(ins->rhs());
+ ARMRegister output = toWRegister(ins->output());
+ Label done;
+
+ MMod* mir = ins->mir();
+
+ // Prevent divide by zero.
+ if (mir->canBeDivideByZero()) {
+ if (mir->isTruncated()) {
+ if (mir->trapOnError()) {
+ Label nonZero;
+ masm.Cbnz(rhs, &nonZero);
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+ masm.bind(&nonZero);
+ } else {
+ // Truncated division by zero yields integer zero.
+ masm.Mov(output, rhs);
+ masm.Cbz(rhs, &done);
+ }
+ } else {
+ // Non-truncated division by zero produces a non-integer.
+ MOZ_ASSERT(!gen->compilingWasm());
+ masm.Cmp(rhs, Operand(0));
+ bailoutIf(Assembler::Equal, ins->snapshot());
+ }
+ }
+
+ // Signed division.
+ masm.Sdiv(output, lhs, rhs);
+
+ // Compute the remainder: output = lhs - (output * rhs).
+ masm.Msub(output, output, rhs, lhs);
+
+ if (mir->canBeNegativeDividend() && !mir->isTruncated()) {
+ // If output == 0 and lhs < 0, then the result should be double -0.0.
+ // Note that this guard handles lhs == INT_MIN and rhs == -1:
+ // output = INT_MIN - (INT_MIN / -1) * -1
+ // = INT_MIN - INT_MIN
+ // = 0
+ masm.Cbnz(output, &done);
+ bailoutCmp32(Assembler::LessThan, lhs, Imm32(0), ins->snapshot());
+ }
+
+ if (done.used()) {
+ masm.bind(&done);
+ }
+}
+
+void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) {
+ Register lhs = ToRegister(ins->getOperand(0));
+ ARMRegister lhsw = toWRegister(ins->getOperand(0));
+ ARMRegister outw = toWRegister(ins->output());
+
+ int32_t shift = ins->shift();
+ bool canBeNegative =
+ !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend();
+
+ Label negative;
+ if (canBeNegative) {
+ // Switch based on sign of the lhs.
+ // Positive numbers are just a bitmask.
+ masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
+ }
+
+ masm.And(outw, lhsw, Operand((uint32_t(1) << shift) - 1));
+
+ if (canBeNegative) {
+ Label done;
+ masm.jump(&done);
+
+ // Negative numbers need a negate, bitmask, negate.
+ masm.bind(&negative);
+ masm.Neg(outw, Operand(lhsw));
+ masm.And(outw, outw, Operand((uint32_t(1) << shift) - 1));
+
+ // Since a%b has the same sign as b, and a is negative in this branch,
+ // an answer of 0 means the correct result is actually -0. Bail out.
+ if (!ins->mir()->isTruncated()) {
+ masm.Negs(outw, Operand(outw));
+ bailoutIf(Assembler::Zero, ins->snapshot());
+ } else {
+ masm.Neg(outw, Operand(outw));
+ }
+
+ masm.bind(&done);
+ }
+}
+
+void CodeGenerator::visitModMaskI(LModMaskI* ins) {
+ MMod* mir = ins->mir();
+ int32_t shift = ins->shift();
+
+ const Register src = ToRegister(ins->getOperand(0));
+ const Register dest = ToRegister(ins->getDef(0));
+ const Register hold = ToRegister(ins->getTemp(0));
+ const Register remain = ToRegister(ins->getTemp(1));
+
+ const ARMRegister src32 = ARMRegister(src, 32);
+ const ARMRegister dest32 = ARMRegister(dest, 32);
+ const ARMRegister remain32 = ARMRegister(remain, 32);
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMRegister scratch32 = temps.AcquireW();
+ const Register scratch = scratch32.asUnsized();
+
+ // We wish to compute x % (1<<y) - 1 for a known constant, y.
+ //
+ // 1. Let b = (1<<y) and C = (1<<y)-1, then think of the 32 bit dividend as
+ // a number in base b, namely c_0*1 + c_1*b + c_2*b^2 ... c_n*b^n
+ //
+ // 2. Since both addition and multiplication commute with modulus:
+ // x % C == (c_0 + c_1*b + ... + c_n*b^n) % C ==
+ // (c_0 % C) + (c_1%C) * (b % C) + (c_2 % C) * (b^2 % C)...
+ //
+ // 3. Since b == C + 1, b % C == 1, and b^n % C == 1 the whole thing
+ // simplifies to: c_0 + c_1 + c_2 ... c_n % C
+ //
+ // Each c_n can easily be computed by a shift/bitextract, and the modulus
+ // can be maintained by simply subtracting by C whenever the number gets
+ // over C.
+ int32_t mask = (1 << shift) - 1;
+ Label loop;
+
+ // Register 'hold' holds -1 if the value was negative, 1 otherwise.
+ // The remain reg holds the remaining bits that have not been processed.
+ // The scratch reg serves as a temporary location to store extracted bits.
+ // The dest reg is the accumulator, becoming final result.
+ //
+ // Move the whole value into the remain.
+ masm.Mov(remain32, src32);
+ // Zero out the dest.
+ masm.Mov(dest32, wzr);
+ // Set the hold appropriately.
+ {
+ Label negative;
+ masm.branch32(Assembler::Signed, remain, Imm32(0), &negative);
+ masm.move32(Imm32(1), hold);
+ masm.jump(&loop);
+
+ masm.bind(&negative);
+ masm.move32(Imm32(-1), hold);
+ masm.neg32(remain);
+ }
+
+ // Begin the main loop.
+ masm.bind(&loop);
+ {
+ // Extract the bottom bits into scratch.
+ masm.And(scratch32, remain32, Operand(mask));
+ // Add those bits to the accumulator.
+ masm.Add(dest32, dest32, scratch32);
+ // Do a trial subtraction. This functions as a cmp but remembers the result.
+ masm.Subs(scratch32, dest32, Operand(mask));
+ // If (sum - C) > 0, store sum - C back into sum, thus performing a modulus.
+ {
+ Label sumSigned;
+ masm.branch32(Assembler::Signed, scratch, scratch, &sumSigned);
+ masm.Mov(dest32, scratch32);
+ masm.bind(&sumSigned);
+ }
+ // Get rid of the bits that we extracted before.
+ masm.Lsr(remain32, remain32, shift);
+ // If the shift produced zero, finish, otherwise, continue in the loop.
+ masm.branchTest32(Assembler::NonZero, remain, remain, &loop);
+ }
+
+ // Check the hold to see if we need to negate the result.
+ {
+ Label done;
+
+ // If the hold was non-zero, negate the result to match JS expectations.
+ masm.branchTest32(Assembler::NotSigned, hold, hold, &done);
+ if (mir->canBeNegativeDividend() && !mir->isTruncated()) {
+ // Bail in case of negative zero hold.
+ bailoutTest32(Assembler::Zero, hold, hold, ins->snapshot());
+ }
+
+ masm.neg32(dest);
+ masm.bind(&done);
+ }
+}
+
+void CodeGeneratorARM64::emitBigIntDiv(LBigIntDiv* ins, Register dividend,
+ Register divisor, Register output,
+ Label* fail) {
+ // Callers handle division by zero and integer overflow.
+
+ const ARMRegister dividend64(dividend, 64);
+ const ARMRegister divisor64(divisor, 64);
+
+ masm.Sdiv(/* result= */ dividend64, dividend64, divisor64);
+
+ // Create and return the result.
+ masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+ masm.initializeBigInt(output, dividend);
+}
+
+void CodeGeneratorARM64::emitBigIntMod(LBigIntMod* ins, Register dividend,
+ Register divisor, Register output,
+ Label* fail) {
+ // Callers handle division by zero and integer overflow.
+
+ const ARMRegister dividend64(dividend, 64);
+ const ARMRegister divisor64(divisor, 64);
+ const ARMRegister output64(output, 64);
+
+ // Signed division.
+ masm.Sdiv(output64, dividend64, divisor64);
+
+ // Compute the remainder: output = dividend - (output * divisor).
+ masm.Msub(/* result= */ dividend64, output64, divisor64, dividend64);
+
+ // Create and return the result.
+ masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail);
+ masm.initializeBigInt(output, dividend);
+}
+
+void CodeGenerator::visitBitNotI(LBitNotI* ins) {
+ const LAllocation* input = ins->getOperand(0);
+ const LDefinition* output = ins->getDef(0);
+ masm.Mvn(toWRegister(output), toWOperand(input));
+}
+
+void CodeGenerator::visitBitNotI64(LBitNotI64* ins) {
+ Register input = ToRegister(ins->input());
+ Register output = ToRegister(ins->output());
+ masm.Mvn(vixl::Register(output, 64), vixl::Register(input, 64));
+}
+
+void CodeGenerator::visitBitOpI(LBitOpI* ins) {
+ const ARMRegister lhs = toWRegister(ins->getOperand(0));
+ const Operand rhs = toWOperand(ins->getOperand(1));
+ const ARMRegister dest = toWRegister(ins->getDef(0));
+
+ switch (ins->bitop()) {
+ case JSOp::BitOr:
+ masm.Orr(dest, lhs, rhs);
+ break;
+ case JSOp::BitXor:
+ masm.Eor(dest, lhs, rhs);
+ break;
+ case JSOp::BitAnd:
+ masm.And(dest, lhs, rhs);
+ break;
+ default:
+ MOZ_CRASH("unexpected binary opcode");
+ }
+}
+
+void CodeGenerator::visitShiftI(LShiftI* ins) {
+ const ARMRegister lhs = toWRegister(ins->lhs());
+ const LAllocation* rhs = ins->rhs();
+ const ARMRegister dest = toWRegister(ins->output());
+
+ if (rhs->isConstant()) {
+ int32_t shift = ToInt32(rhs) & 0x1F;
+ switch (ins->bitop()) {
+ case JSOp::Lsh:
+ masm.Lsl(dest, lhs, shift);
+ break;
+ case JSOp::Rsh:
+ masm.Asr(dest, lhs, shift);
+ break;
+ case JSOp::Ursh:
+ if (shift) {
+ masm.Lsr(dest, lhs, shift);
+ } else if (ins->mir()->toUrsh()->fallible()) {
+ // x >>> 0 can overflow.
+ masm.Ands(dest, lhs, Operand(0xFFFFFFFF));
+ bailoutIf(Assembler::Signed, ins->snapshot());
+ } else {
+ masm.Mov(dest, lhs);
+ }
+ break;
+ default:
+ MOZ_CRASH("Unexpected shift op");
+ }
+ } else {
+ const ARMRegister rhsreg = toWRegister(rhs);
+ switch (ins->bitop()) {
+ case JSOp::Lsh:
+ masm.Lsl(dest, lhs, rhsreg);
+ break;
+ case JSOp::Rsh:
+ masm.Asr(dest, lhs, rhsreg);
+ break;
+ case JSOp::Ursh:
+ masm.Lsr(dest, lhs, rhsreg);
+ if (ins->mir()->toUrsh()->fallible()) {
+ /// x >>> 0 can overflow.
+ masm.Cmp(dest, Operand(0));
+ bailoutIf(Assembler::LessThan, ins->snapshot());
+ }
+ break;
+ default:
+ MOZ_CRASH("Unexpected shift op");
+ }
+ }
+}
+
+void CodeGenerator::visitUrshD(LUrshD* ins) {
+ const ARMRegister lhs = toWRegister(ins->lhs());
+ const LAllocation* rhs = ins->rhs();
+ const FloatRegister out = ToFloatRegister(ins->output());
+
+ const Register temp = ToRegister(ins->temp());
+ const ARMRegister temp32 = toWRegister(ins->temp());
+
+ if (rhs->isConstant()) {
+ int32_t shift = ToInt32(rhs) & 0x1F;
+ if (shift) {
+ masm.Lsr(temp32, lhs, shift);
+ masm.convertUInt32ToDouble(temp, out);
+ } else {
+ masm.convertUInt32ToDouble(ToRegister(ins->lhs()), out);
+ }
+ } else {
+ masm.And(temp32, toWRegister(rhs), Operand(0x1F));
+ masm.Lsr(temp32, lhs, temp32);
+ masm.convertUInt32ToDouble(temp, out);
+ }
+}
+
+void CodeGenerator::visitPowHalfD(LPowHalfD* ins) {
+ FloatRegister input = ToFloatRegister(ins->input());
+ FloatRegister output = ToFloatRegister(ins->output());
+
+ ScratchDoubleScope scratch(masm);
+
+ Label done, sqrt;
+
+ if (!ins->mir()->operandIsNeverNegativeInfinity()) {
+ // Branch if not -Infinity.
+ masm.loadConstantDouble(NegativeInfinity<double>(), scratch);
+
+ Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered;
+ if (ins->mir()->operandIsNeverNaN()) {
+ cond = Assembler::DoubleNotEqual;
+ }
+ masm.branchDouble(cond, input, scratch, &sqrt);
+
+ // Math.pow(-Infinity, 0.5) == Infinity.
+ masm.zeroDouble(output);
+ masm.subDouble(scratch, output);
+ masm.jump(&done);
+
+ masm.bind(&sqrt);
+ }
+
+ if (!ins->mir()->operandIsNeverNegativeZero()) {
+ // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5).
+ // Adding 0 converts any -0 to 0.
+ masm.zeroDouble(scratch);
+ masm.addDouble(input, scratch);
+ masm.sqrtDouble(scratch, output);
+ } else {
+ masm.sqrtDouble(input, output);
+ }
+
+ masm.bind(&done);
+}
+
+MoveOperand CodeGeneratorARM64::toMoveOperand(const LAllocation a) const {
+ if (a.isGeneralReg()) {
+ return MoveOperand(ToRegister(a));
+ }
+ if (a.isFloatReg()) {
+ return MoveOperand(ToFloatRegister(a));
+ }
+ MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress
+ : MoveOperand::Kind::Memory;
+ return MoveOperand(ToAddress(a), kind);
+}
+
+class js::jit::OutOfLineTableSwitch
+ : public OutOfLineCodeBase<CodeGeneratorARM64> {
+ MTableSwitch* mir_;
+ CodeLabel jumpLabel_;
+
+ void accept(CodeGeneratorARM64* codegen) override {
+ codegen->visitOutOfLineTableSwitch(this);
+ }
+
+ public:
+ explicit OutOfLineTableSwitch(MTableSwitch* mir) : mir_(mir) {}
+
+ MTableSwitch* mir() const { return mir_; }
+
+ CodeLabel* jumpLabel() { return &jumpLabel_; }
+};
+
+void CodeGeneratorARM64::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool) {
+ MTableSwitch* mir = ool->mir();
+
+ // Prevent nop and pools sequences to appear in the jump table.
+ AutoForbidPoolsAndNops afp(
+ &masm, (mir->numCases() + 1) * (sizeof(void*) / vixl::kInstructionSize));
+ masm.haltingAlign(sizeof(void*));
+ masm.bind(ool->jumpLabel());
+ masm.addCodeLabel(*ool->jumpLabel());
+
+ for (size_t i = 0; i < mir->numCases(); i++) {
+ LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir();
+ Label* caseheader = caseblock->label();
+ uint32_t caseoffset = caseheader->offset();
+
+ // The entries of the jump table need to be absolute addresses,
+ // and thus must be patched after codegen is finished.
+ CodeLabel cl;
+ masm.writeCodePointer(&cl);
+ cl.target()->bind(caseoffset);
+ masm.addCodeLabel(cl);
+ }
+}
+
+void CodeGeneratorARM64::emitTableSwitchDispatch(MTableSwitch* mir,
+ Register index,
+ Register base) {
+ Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
+
+ // Let the lowest table entry be indexed at 0.
+ if (mir->low() != 0) {
+ masm.sub32(Imm32(mir->low()), index);
+ }
+
+ // Jump to the default case if input is out of range.
+ int32_t cases = mir->numCases();
+ masm.branch32(Assembler::AboveOrEqual, index, Imm32(cases), defaultcase);
+
+ // Because the target code has not yet been generated, we cannot know the
+ // instruction offsets for use as jump targets. Therefore we construct
+ // an OutOfLineTableSwitch that winds up holding the jump table.
+ //
+ // Because the jump table is generated as part of out-of-line code,
+ // it is generated after all the regular codegen, so the jump targets
+ // are guaranteed to exist when generating the jump table.
+ OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(mir);
+ addOutOfLineCode(ool, mir);
+
+ // Use the index to get the address of the jump target from the table.
+ masm.mov(ool->jumpLabel(), base);
+ BaseIndex pointer(base, index, ScalePointer);
+
+ // Load the target from the jump table and branch to it.
+ masm.branchToComputedAddress(pointer);
+}
+
+void CodeGenerator::visitMathD(LMathD* math) {
+ ARMFPRegister lhs(ToFloatRegister(math->lhs()), 64);
+ ARMFPRegister rhs(ToFloatRegister(math->rhs()), 64);
+ ARMFPRegister output(ToFloatRegister(math->output()), 64);
+
+ switch (math->jsop()) {
+ case JSOp::Add:
+ masm.Fadd(output, lhs, rhs);
+ break;
+ case JSOp::Sub:
+ masm.Fsub(output, lhs, rhs);
+ break;
+ case JSOp::Mul:
+ masm.Fmul(output, lhs, rhs);
+ break;
+ case JSOp::Div:
+ masm.Fdiv(output, lhs, rhs);
+ break;
+ default:
+ MOZ_CRASH("unexpected opcode");
+ }
+}
+
+void CodeGenerator::visitMathF(LMathF* math) {
+ ARMFPRegister lhs(ToFloatRegister(math->lhs()), 32);
+ ARMFPRegister rhs(ToFloatRegister(math->rhs()), 32);
+ ARMFPRegister output(ToFloatRegister(math->output()), 32);
+
+ switch (math->jsop()) {
+ case JSOp::Add:
+ masm.Fadd(output, lhs, rhs);
+ break;
+ case JSOp::Sub:
+ masm.Fsub(output, lhs, rhs);
+ break;
+ case JSOp::Mul:
+ masm.Fmul(output, lhs, rhs);
+ break;
+ case JSOp::Div:
+ masm.Fdiv(output, lhs, rhs);
+ break;
+ default:
+ MOZ_CRASH("unexpected opcode");
+ }
+}
+
+void CodeGenerator::visitClzI(LClzI* lir) {
+ ARMRegister input = toWRegister(lir->input());
+ ARMRegister output = toWRegister(lir->output());
+ masm.Clz(output, input);
+}
+
+void CodeGenerator::visitCtzI(LCtzI* lir) {
+ Register input = ToRegister(lir->input());
+ Register output = ToRegister(lir->output());
+ masm.ctz32(input, output, /* knownNotZero = */ false);
+}
+
+void CodeGenerator::visitTruncateDToInt32(LTruncateDToInt32* ins) {
+ emitTruncateDouble(ToFloatRegister(ins->input()), ToRegister(ins->output()),
+ ins->mir());
+}
+
+void CodeGenerator::visitNearbyInt(LNearbyInt* lir) {
+ FloatRegister input = ToFloatRegister(lir->input());
+ FloatRegister output = ToFloatRegister(lir->output());
+
+ RoundingMode roundingMode = lir->mir()->roundingMode();
+ masm.nearbyIntDouble(roundingMode, input, output);
+}
+
+void CodeGenerator::visitNearbyIntF(LNearbyIntF* lir) {
+ FloatRegister input = ToFloatRegister(lir->input());
+ FloatRegister output = ToFloatRegister(lir->output());
+
+ RoundingMode roundingMode = lir->mir()->roundingMode();
+ masm.nearbyIntFloat32(roundingMode, input, output);
+}
+
+void CodeGenerator::visitWasmBuiltinTruncateDToInt32(
+ LWasmBuiltinTruncateDToInt32* lir) {
+ emitTruncateDouble(ToFloatRegister(lir->getOperand(0)),
+ ToRegister(lir->getDef(0)), lir->mir());
+}
+
+void CodeGenerator::visitTruncateFToInt32(LTruncateFToInt32* ins) {
+ emitTruncateFloat32(ToFloatRegister(ins->input()), ToRegister(ins->output()),
+ ins->mir());
+}
+
+void CodeGenerator::visitWasmBuiltinTruncateFToInt32(
+ LWasmBuiltinTruncateFToInt32* lir) {
+ emitTruncateFloat32(ToFloatRegister(lir->getOperand(0)),
+ ToRegister(lir->getDef(0)), lir->mir());
+}
+
+ValueOperand CodeGeneratorARM64::ToValue(LInstruction* ins, size_t pos) {
+ return ValueOperand(ToRegister(ins->getOperand(pos)));
+}
+
+ValueOperand CodeGeneratorARM64::ToTempValue(LInstruction* ins, size_t pos) {
+ MOZ_CRASH("CodeGeneratorARM64::ToTempValue");
+}
+
+void CodeGenerator::visitValue(LValue* value) {
+ ValueOperand result = ToOutValue(value);
+ masm.moveValue(value->value(), result);
+}
+
+void CodeGenerator::visitBox(LBox* box) {
+ const LAllocation* in = box->getOperand(0);
+ ValueOperand result = ToOutValue(box);
+
+ masm.moveValue(TypedOrValueRegister(box->type(), ToAnyRegister(in)), result);
+}
+
+void CodeGenerator::visitUnbox(LUnbox* unbox) {
+ MUnbox* mir = unbox->mir();
+
+ Register result = ToRegister(unbox->output());
+
+ if (mir->fallible()) {
+ const ValueOperand value = ToValue(unbox, LUnbox::Input);
+ Label bail;
+ switch (mir->type()) {
+ case MIRType::Int32:
+ masm.fallibleUnboxInt32(value, result, &bail);
+ break;
+ case MIRType::Boolean:
+ masm.fallibleUnboxBoolean(value, result, &bail);
+ break;
+ case MIRType::Object:
+ masm.fallibleUnboxObject(value, result, &bail);
+ break;
+ case MIRType::String:
+ masm.fallibleUnboxString(value, result, &bail);
+ break;
+ case MIRType::Symbol:
+ masm.fallibleUnboxSymbol(value, result, &bail);
+ break;
+ case MIRType::BigInt:
+ masm.fallibleUnboxBigInt(value, result, &bail);
+ break;
+ default:
+ MOZ_CRASH("Given MIRType cannot be unboxed.");
+ }
+ bailoutFrom(&bail, unbox->snapshot());
+ return;
+ }
+
+ // Infallible unbox.
+
+ ValueOperand input = ToValue(unbox, LUnbox::Input);
+
+#ifdef DEBUG
+ // Assert the types match.
+ JSValueTag tag = MIRTypeToTag(mir->type());
+ Label ok;
+ {
+ ScratchTagScope scratch(masm, input);
+ masm.splitTagForTest(input, scratch);
+ masm.cmpTag(scratch, ImmTag(tag));
+ }
+ masm.B(&ok, Assembler::Condition::Equal);
+ masm.assumeUnreachable("Infallible unbox type mismatch");
+ masm.bind(&ok);
+#endif
+
+ switch (mir->type()) {
+ case MIRType::Int32:
+ masm.unboxInt32(input, result);
+ break;
+ case MIRType::Boolean:
+ masm.unboxBoolean(input, result);
+ break;
+ case MIRType::Object:
+ masm.unboxObject(input, result);
+ break;
+ case MIRType::String:
+ masm.unboxString(input, result);
+ break;
+ case MIRType::Symbol:
+ masm.unboxSymbol(input, result);
+ break;
+ case MIRType::BigInt:
+ masm.unboxBigInt(input, result);
+ break;
+ default:
+ MOZ_CRASH("Given MIRType cannot be unboxed.");
+ }
+}
+
+void CodeGenerator::visitDouble(LDouble* ins) {
+ const LDefinition* out = ins->getDef(0);
+ masm.loadConstantDouble(ins->value(), ToFloatRegister(out));
+}
+
+void CodeGenerator::visitFloat32(LFloat32* ins) {
+ const LDefinition* out = ins->getDef(0);
+ masm.loadConstantFloat32(ins->value(), ToFloatRegister(out));
+}
+
+void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) {
+ const LAllocation* opd = test->input();
+ MBasicBlock* ifTrue = test->ifTrue();
+ MBasicBlock* ifFalse = test->ifFalse();
+
+ masm.Fcmp(ARMFPRegister(ToFloatRegister(opd), 64), 0.0);
+
+ // If the compare set the 0 bit, then the result is definitely false.
+ jumpToBlock(ifFalse, Assembler::Zero);
+
+ // Overflow means one of the operands was NaN, which is also false.
+ jumpToBlock(ifFalse, Assembler::Overflow);
+ jumpToBlock(ifTrue);
+}
+
+void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) {
+ const LAllocation* opd = test->input();
+ MBasicBlock* ifTrue = test->ifTrue();
+ MBasicBlock* ifFalse = test->ifFalse();
+
+ masm.Fcmp(ARMFPRegister(ToFloatRegister(opd), 32), 0.0);
+
+ // If the compare set the 0 bit, then the result is definitely false.
+ jumpToBlock(ifFalse, Assembler::Zero);
+
+ // Overflow means one of the operands was NaN, which is also false.
+ jumpToBlock(ifFalse, Assembler::Overflow);
+ jumpToBlock(ifTrue);
+}
+
+void CodeGenerator::visitCompareD(LCompareD* comp) {
+ const FloatRegister left = ToFloatRegister(comp->left());
+ const FloatRegister right = ToFloatRegister(comp->right());
+ ARMRegister output = toWRegister(comp->output());
+ Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+
+ masm.compareDouble(cond, left, right);
+ masm.cset(output, Assembler::ConditionFromDoubleCondition(cond));
+}
+
+void CodeGenerator::visitCompareF(LCompareF* comp) {
+ const FloatRegister left = ToFloatRegister(comp->left());
+ const FloatRegister right = ToFloatRegister(comp->right());
+ ARMRegister output = toWRegister(comp->output());
+ Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
+
+ masm.compareFloat(cond, left, right);
+ masm.cset(output, Assembler::ConditionFromDoubleCondition(cond));
+}
+
+void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) {
+ const FloatRegister left = ToFloatRegister(comp->left());
+ const FloatRegister right = ToFloatRegister(comp->right());
+ Assembler::DoubleCondition doubleCond =
+ JSOpToDoubleCondition(comp->cmpMir()->jsop());
+ Assembler::Condition cond =
+ Assembler::ConditionFromDoubleCondition(doubleCond);
+
+ masm.compareDouble(doubleCond, left, right);
+ emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) {
+ const FloatRegister left = ToFloatRegister(comp->left());
+ const FloatRegister right = ToFloatRegister(comp->right());
+ Assembler::DoubleCondition doubleCond =
+ JSOpToDoubleCondition(comp->cmpMir()->jsop());
+ Assembler::Condition cond =
+ Assembler::ConditionFromDoubleCondition(doubleCond);
+
+ masm.compareFloat(doubleCond, left, right);
+ emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void CodeGenerator::visitBitAndAndBranch(LBitAndAndBranch* baab) {
+ if (baab->is64()) {
+ ARMRegister regL = toXRegister(baab->left());
+ if (baab->right()->isConstant()) {
+ masm.Tst(regL, Operand(ToInt64(baab->right())));
+ } else {
+ masm.Tst(regL, toXRegister(baab->right()));
+ }
+ } else {
+ ARMRegister regL = toWRegister(baab->left());
+ if (baab->right()->isConstant()) {
+ masm.Tst(regL, Operand(ToInt32(baab->right())));
+ } else {
+ masm.Tst(regL, toWRegister(baab->right()));
+ }
+ }
+ emitBranch(baab->cond(), baab->ifTrue(), baab->ifFalse());
+}
+
+void CodeGenerator::visitWasmUint32ToDouble(LWasmUint32ToDouble* lir) {
+ masm.convertUInt32ToDouble(ToRegister(lir->input()),
+ ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitWasmUint32ToFloat32(LWasmUint32ToFloat32* lir) {
+ masm.convertUInt32ToFloat32(ToRegister(lir->input()),
+ ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitNotI(LNotI* ins) {
+ ARMRegister input = toWRegister(ins->input());
+ ARMRegister output = toWRegister(ins->output());
+
+ masm.Cmp(input, ZeroRegister32);
+ masm.Cset(output, Assembler::Zero);
+}
+
+// NZCV
+// NAN -> 0011
+// == -> 0110
+// < -> 1000
+// > -> 0010
+void CodeGenerator::visitNotD(LNotD* ins) {
+ ARMFPRegister input(ToFloatRegister(ins->input()), 64);
+ ARMRegister output = toWRegister(ins->output());
+
+ // Set output to 1 if input compares equal to 0.0, else 0.
+ masm.Fcmp(input, 0.0);
+ masm.Cset(output, Assembler::Equal);
+
+ // Comparison with NaN sets V in the NZCV register.
+ // If the input was NaN, output must now be zero, so it can be incremented.
+ // The instruction is read: "output = if NoOverflow then output else 0+1".
+ masm.Csinc(output, output, ZeroRegister32, Assembler::NoOverflow);
+}
+
+void CodeGenerator::visitNotF(LNotF* ins) {
+ ARMFPRegister input(ToFloatRegister(ins->input()), 32);
+ ARMRegister output = toWRegister(ins->output());
+
+ // Set output to 1 input compares equal to 0.0, else 0.
+ masm.Fcmp(input, 0.0);
+ masm.Cset(output, Assembler::Equal);
+
+ // Comparison with NaN sets V in the NZCV register.
+ // If the input was NaN, output must now be zero, so it can be incremented.
+ // The instruction is read: "output = if NoOverflow then output else 0+1".
+ masm.Csinc(output, output, ZeroRegister32, Assembler::NoOverflow);
+}
+
+void CodeGeneratorARM64::generateInvalidateEpilogue() {
+ // Ensure that there is enough space in the buffer for the OsiPoint patching
+ // to occur. Otherwise, we could overwrite the invalidation epilogue.
+ for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) {
+ masm.nop();
+ }
+
+ masm.bind(&invalidate_);
+
+ // Push the return address of the point that we bailout out onto the stack.
+ masm.push(lr);
+
+ // Push the Ion script onto the stack (when we determine what that pointer
+ // is).
+ invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
+
+ // Jump to the invalidator which will replace the current frame.
+ TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk();
+ masm.jump(thunk);
+}
+
+template <class U>
+Register getBase(U* mir) {
+ switch (mir->base()) {
+ case U::Heap:
+ return HeapReg;
+ }
+ return InvalidReg;
+}
+
+void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) {
+ const MAsmJSLoadHeap* mir = ins->mir();
+ MOZ_ASSERT(!mir->hasMemoryBase());
+
+ const LAllocation* ptr = ins->ptr();
+ const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+ Register ptrReg = ToRegister(ptr);
+ Scalar::Type accessType = mir->accessType();
+ bool isFloat = accessType == Scalar::Float32 || accessType == Scalar::Float64;
+ Label done;
+
+ if (mir->needsBoundsCheck()) {
+ Label boundsCheckPassed;
+ Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+ masm.wasmBoundsCheck32(Assembler::Below, ptrReg, boundsCheckLimitReg,
+ &boundsCheckPassed);
+ // Return a default value in case of a bounds-check failure.
+ if (isFloat) {
+ if (accessType == Scalar::Float32) {
+ masm.loadConstantFloat32(GenericNaN(), ToFloatRegister(ins->output()));
+ } else {
+ masm.loadConstantDouble(GenericNaN(), ToFloatRegister(ins->output()));
+ }
+ } else {
+ masm.Mov(ARMRegister(ToRegister(ins->output()), 64), 0);
+ }
+ masm.jump(&done);
+ masm.bind(&boundsCheckPassed);
+ }
+
+ MemOperand addr(ARMRegister(HeapReg, 64), ARMRegister(ptrReg, 64));
+ switch (accessType) {
+ case Scalar::Int8:
+ masm.Ldrb(toWRegister(ins->output()), addr);
+ masm.Sxtb(toWRegister(ins->output()), toWRegister(ins->output()));
+ break;
+ case Scalar::Uint8:
+ masm.Ldrb(toWRegister(ins->output()), addr);
+ break;
+ case Scalar::Int16:
+ masm.Ldrh(toWRegister(ins->output()), addr);
+ masm.Sxth(toWRegister(ins->output()), toWRegister(ins->output()));
+ break;
+ case Scalar::Uint16:
+ masm.Ldrh(toWRegister(ins->output()), addr);
+ break;
+ case Scalar::Int32:
+ case Scalar::Uint32:
+ masm.Ldr(toWRegister(ins->output()), addr);
+ break;
+ case Scalar::Float64:
+ masm.Ldr(ARMFPRegister(ToFloatRegister(ins->output()), 64), addr);
+ break;
+ case Scalar::Float32:
+ masm.Ldr(ARMFPRegister(ToFloatRegister(ins->output()), 32), addr);
+ break;
+ default:
+ MOZ_CRASH("unexpected array type");
+ }
+ if (done.used()) {
+ masm.bind(&done);
+ }
+}
+
+void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) {
+ const MAsmJSStoreHeap* mir = ins->mir();
+ MOZ_ASSERT(!mir->hasMemoryBase());
+
+ const LAllocation* ptr = ins->ptr();
+ const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
+
+ Register ptrReg = ToRegister(ptr);
+
+ Label done;
+ if (mir->needsBoundsCheck()) {
+ Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
+ masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ptrReg, boundsCheckLimitReg,
+ &done);
+ }
+
+ MemOperand addr(ARMRegister(HeapReg, 64), ARMRegister(ptrReg, 64));
+ switch (mir->accessType()) {
+ case Scalar::Int8:
+ case Scalar::Uint8:
+ masm.Strb(toWRegister(ins->value()), addr);
+ break;
+ case Scalar::Int16:
+ case Scalar::Uint16:
+ masm.Strh(toWRegister(ins->value()), addr);
+ break;
+ case Scalar::Int32:
+ case Scalar::Uint32:
+ masm.Str(toWRegister(ins->value()), addr);
+ break;
+ case Scalar::Float64:
+ masm.Str(ARMFPRegister(ToFloatRegister(ins->value()), 64), addr);
+ break;
+ case Scalar::Float32:
+ masm.Str(ARMFPRegister(ToFloatRegister(ins->value()), 32), addr);
+ break;
+ default:
+ MOZ_CRASH("unexpected array type");
+ }
+ if (done.used()) {
+ masm.bind(&done);
+ }
+}
+
+void CodeGenerator::visitWasmCompareExchangeHeap(
+ LWasmCompareExchangeHeap* ins) {
+ MWasmCompareExchangeHeap* mir = ins->mir();
+
+ Register ptr = ToRegister(ins->ptr());
+ Register oldval = ToRegister(ins->oldValue());
+ Register newval = ToRegister(ins->newValue());
+ Register out = ToRegister(ins->output());
+ MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+ BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset());
+
+ if (mir->access().type() == Scalar::Int64) {
+ masm.wasmCompareExchange64(mir->access(), srcAddr, Register64(oldval),
+ Register64(newval), Register64(out));
+ } else {
+ masm.wasmCompareExchange(mir->access(), srcAddr, oldval, newval, out);
+ }
+}
+
+void CodeGenerator::visitWasmAtomicExchangeHeap(LWasmAtomicExchangeHeap* ins) {
+ MWasmAtomicExchangeHeap* mir = ins->mir();
+
+ Register ptr = ToRegister(ins->ptr());
+ Register oldval = ToRegister(ins->value());
+ Register out = ToRegister(ins->output());
+ MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+ BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset());
+
+ if (mir->access().type() == Scalar::Int64) {
+ masm.wasmAtomicExchange64(mir->access(), srcAddr, Register64(oldval),
+ Register64(out));
+ } else {
+ masm.wasmAtomicExchange(mir->access(), srcAddr, oldval, out);
+ }
+}
+
+void CodeGenerator::visitWasmAtomicBinopHeap(LWasmAtomicBinopHeap* ins) {
+ MWasmAtomicBinopHeap* mir = ins->mir();
+
+ MOZ_ASSERT(mir->hasUses());
+
+ Register ptr = ToRegister(ins->ptr());
+ Register value = ToRegister(ins->value());
+ Register flagTemp = ToRegister(ins->flagTemp());
+ Register out = ToRegister(ins->output());
+ MOZ_ASSERT(ins->temp()->isBogusTemp());
+ MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+ BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset());
+ AtomicOp op = mir->operation();
+
+ if (mir->access().type() == Scalar::Int64) {
+ masm.wasmAtomicFetchOp64(mir->access(), op, Register64(value), srcAddr,
+ Register64(flagTemp), Register64(out));
+ } else {
+ masm.wasmAtomicFetchOp(mir->access(), op, value, srcAddr, flagTemp, out);
+ }
+}
+
+void CodeGenerator::visitWasmAtomicBinopHeapForEffect(
+ LWasmAtomicBinopHeapForEffect* ins) {
+ MWasmAtomicBinopHeap* mir = ins->mir();
+
+ MOZ_ASSERT(!mir->hasUses());
+
+ Register ptr = ToRegister(ins->ptr());
+ Register value = ToRegister(ins->value());
+ Register flagTemp = ToRegister(ins->flagTemp());
+ MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
+
+ BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset());
+ AtomicOp op = mir->operation();
+
+ if (mir->access().type() == Scalar::Int64) {
+ masm.wasmAtomicEffectOp64(mir->access(), op, Register64(value), srcAddr,
+ Register64(flagTemp));
+ } else {
+ masm.wasmAtomicEffectOp(mir->access(), op, value, srcAddr, flagTemp);
+ }
+}
+
+void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) {
+ const MWasmStackArg* mir = ins->mir();
+ Address dst(masm.getStackPointer(), mir->spOffset());
+ if (ins->arg()->isConstant()) {
+ masm.storePtr(ImmWord(ToInt32(ins->arg())), dst);
+ } else if (ins->arg()->isGeneralReg()) {
+ masm.storePtr(ToRegister(ins->arg()), dst);
+ } else {
+ switch (mir->input()->type()) {
+ case MIRType::Double:
+ masm.storeDouble(ToFloatRegister(ins->arg()), dst);
+ return;
+ case MIRType::Float32:
+ masm.storeFloat32(ToFloatRegister(ins->arg()), dst);
+ return;
+#ifdef ENABLE_WASM_SIMD
+ case MIRType::Simd128:
+ masm.storeUnalignedSimd128(ToFloatRegister(ins->arg()), dst);
+ return;
+#endif
+ default:
+ break;
+ }
+ MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE(
+ "unexpected mir type in WasmStackArg");
+ }
+}
+
+void CodeGenerator::visitUDiv(LUDiv* ins) {
+ MDiv* mir = ins->mir();
+ Register lhs = ToRegister(ins->lhs());
+ Register rhs = ToRegister(ins->rhs());
+ Register output = ToRegister(ins->output());
+ ARMRegister lhs32 = ARMRegister(lhs, 32);
+ ARMRegister rhs32 = ARMRegister(rhs, 32);
+ ARMRegister output32 = ARMRegister(output, 32);
+
+ // Prevent divide by zero.
+ if (mir->canBeDivideByZero()) {
+ if (mir->isTruncated()) {
+ if (mir->trapOnError()) {
+ Label nonZero;
+ masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero);
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+ masm.bind(&nonZero);
+ } else {
+ // ARM64 UDIV instruction will return 0 when divided by 0.
+ // No need for extra tests.
+ }
+ } else {
+ bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot());
+ }
+ }
+
+ // Unsigned division.
+ masm.Udiv(output32, lhs32, rhs32);
+
+ // If the remainder is > 0, bailout since this must be a double.
+ if (!mir->canTruncateRemainder()) {
+ Register remainder = ToRegister(ins->remainder());
+ ARMRegister remainder32 = ARMRegister(remainder, 32);
+
+ // Compute the remainder: remainder = lhs - (output * rhs).
+ masm.Msub(remainder32, output32, rhs32, lhs32);
+
+ bailoutTest32(Assembler::NonZero, remainder, remainder, ins->snapshot());
+ }
+
+ // Unsigned div can return a value that's not a signed int32.
+ // If our users aren't expecting that, bail.
+ if (!mir->isTruncated()) {
+ bailoutTest32(Assembler::Signed, output, output, ins->snapshot());
+ }
+}
+
+void CodeGenerator::visitUMod(LUMod* ins) {
+ MMod* mir = ins->mir();
+ ARMRegister lhs = toWRegister(ins->lhs());
+ ARMRegister rhs = toWRegister(ins->rhs());
+ ARMRegister output = toWRegister(ins->output());
+ Label done;
+
+ if (mir->canBeDivideByZero()) {
+ if (mir->isTruncated()) {
+ if (mir->trapOnError()) {
+ Label nonZero;
+ masm.Cbnz(rhs, &nonZero);
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset());
+ masm.bind(&nonZero);
+ } else {
+ // Truncated division by zero yields integer zero.
+ masm.Mov(output, rhs);
+ masm.Cbz(rhs, &done);
+ }
+ } else {
+ // Non-truncated division by zero produces a non-integer.
+ masm.Cmp(rhs, Operand(0));
+ bailoutIf(Assembler::Equal, ins->snapshot());
+ }
+ }
+
+ // Unsigned division.
+ masm.Udiv(output, lhs, rhs);
+
+ // Compute the remainder: output = lhs - (output * rhs).
+ masm.Msub(output, output, rhs, lhs);
+
+ if (!mir->isTruncated()) {
+ // Bail if the output would be negative.
+ //
+ // LUMod inputs may be Uint32, so care is taken to ensure the result
+ // is not unexpectedly signed.
+ bailoutCmp32(Assembler::LessThan, output, Imm32(0), ins->snapshot());
+ }
+
+ if (done.used()) {
+ masm.bind(&done);
+ }
+}
+
+void CodeGenerator::visitEffectiveAddress(LEffectiveAddress* ins) {
+ const MEffectiveAddress* mir = ins->mir();
+ const ARMRegister base = toWRegister(ins->base());
+ const ARMRegister index = toWRegister(ins->index());
+ const ARMRegister output = toWRegister(ins->output());
+
+ masm.Add(output, base, Operand(index, vixl::LSL, mir->scale()));
+ masm.Add(output, output, Operand(mir->displacement()));
+}
+
+void CodeGenerator::visitNegI(LNegI* ins) {
+ const ARMRegister input = toWRegister(ins->input());
+ const ARMRegister output = toWRegister(ins->output());
+ masm.Neg(output, input);
+}
+
+void CodeGenerator::visitNegI64(LNegI64* ins) {
+ const ARMRegister input = toXRegister(ins->input());
+ const ARMRegister output = toXRegister(ins->output());
+ masm.Neg(output, input);
+}
+
+void CodeGenerator::visitNegD(LNegD* ins) {
+ const ARMFPRegister input(ToFloatRegister(ins->input()), 64);
+ const ARMFPRegister output(ToFloatRegister(ins->output()), 64);
+ masm.Fneg(output, input);
+}
+
+void CodeGenerator::visitNegF(LNegF* ins) {
+ const ARMFPRegister input(ToFloatRegister(ins->input()), 32);
+ const ARMFPRegister output(ToFloatRegister(ins->output()), 32);
+ masm.Fneg(output, input);
+}
+
+void CodeGenerator::visitCompareExchangeTypedArrayElement(
+ LCompareExchangeTypedArrayElement* lir) {
+ Register elements = ToRegister(lir->elements());
+ AnyRegister output = ToAnyRegister(lir->output());
+ Register temp =
+ lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+ Register oldval = ToRegister(lir->oldval());
+ Register newval = ToRegister(lir->newval());
+
+ Scalar::Type arrayType = lir->mir()->arrayType();
+
+ if (lir->index()->isConstant()) {
+ Address dest = ToAddress(elements, lir->index(), arrayType);
+ masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
+ newval, temp, output);
+ } else {
+ BaseIndex dest(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(arrayType));
+ masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
+ newval, temp, output);
+ }
+}
+
+void CodeGenerator::visitAtomicExchangeTypedArrayElement(
+ LAtomicExchangeTypedArrayElement* lir) {
+ Register elements = ToRegister(lir->elements());
+ AnyRegister output = ToAnyRegister(lir->output());
+ Register temp =
+ lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+ Register value = ToRegister(lir->value());
+
+ Scalar::Type arrayType = lir->mir()->arrayType();
+
+ if (lir->index()->isConstant()) {
+ Address dest = ToAddress(elements, lir->index(), arrayType);
+ masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
+ output);
+ } else {
+ BaseIndex dest(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(arrayType));
+ masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp,
+ output);
+ }
+}
+
+void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {
+ Register elements = ToRegister(lir->elements());
+ Register temp = ToRegister(lir->temp());
+ Register64 temp64 = ToRegister64(lir->temp64());
+ Register out = ToRegister(lir->output());
+
+ const MLoadUnboxedScalar* mir = lir->mir();
+
+ Scalar::Type storageType = mir->storageType();
+
+ // NOTE: the generated code must match the assembly code in gen_load in
+ // GenerateAtomicOperations.py
+ auto sync = Synchronization::Load();
+
+ masm.memoryBarrierBefore(sync);
+ if (lir->index()->isConstant()) {
+ Address source =
+ ToAddress(elements, lir->index(), storageType, mir->offsetAdjustment());
+ masm.load64(source, temp64);
+ } else {
+ BaseIndex source(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(storageType), mir->offsetAdjustment());
+ masm.load64(source, temp64);
+ }
+ masm.memoryBarrierAfter(sync);
+
+ emitCreateBigInt(lir, storageType, temp64, out, temp);
+}
+
+void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {
+ Register elements = ToRegister(lir->elements());
+ Register value = ToRegister(lir->value());
+ Register64 temp1 = ToRegister64(lir->temp1());
+
+ Scalar::Type writeType = lir->mir()->writeType();
+
+ masm.loadBigInt64(value, temp1);
+
+ // NOTE: the generated code must match the assembly code in gen_store in
+ // GenerateAtomicOperations.py
+ auto sync = Synchronization::Store();
+
+ masm.memoryBarrierBefore(sync);
+ if (lir->index()->isConstant()) {
+ Address dest = ToAddress(elements, lir->index(), writeType);
+ masm.store64(temp1, dest);
+ } else {
+ BaseIndex dest(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(writeType));
+ masm.store64(temp1, dest);
+ }
+ masm.memoryBarrierAfter(sync);
+}
+
+void CodeGenerator::visitCompareExchangeTypedArrayElement64(
+ LCompareExchangeTypedArrayElement64* lir) {
+ Register elements = ToRegister(lir->elements());
+ Register oldval = ToRegister(lir->oldval());
+ Register newval = ToRegister(lir->newval());
+ Register64 temp1 = ToRegister64(lir->temp1());
+ Register64 temp2 = ToRegister64(lir->temp2());
+ Register out = ToRegister(lir->output());
+ Register64 tempOut(out);
+
+ Scalar::Type arrayType = lir->mir()->arrayType();
+
+ masm.loadBigInt64(oldval, temp1);
+ masm.loadBigInt64(newval, tempOut);
+
+ if (lir->index()->isConstant()) {
+ Address dest = ToAddress(elements, lir->index(), arrayType);
+ masm.compareExchange64(Synchronization::Full(), dest, temp1, tempOut,
+ temp2);
+ } else {
+ BaseIndex dest(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(arrayType));
+ masm.compareExchange64(Synchronization::Full(), dest, temp1, tempOut,
+ temp2);
+ }
+
+ emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg());
+}
+
+void CodeGenerator::visitAtomicExchangeTypedArrayElement64(
+ LAtomicExchangeTypedArrayElement64* lir) {
+ Register elements = ToRegister(lir->elements());
+ Register value = ToRegister(lir->value());
+ Register64 temp1 = ToRegister64(lir->temp1());
+ Register64 temp2 = Register64(ToRegister(lir->temp2()));
+ Register out = ToRegister(lir->output());
+
+ Scalar::Type arrayType = lir->mir()->arrayType();
+
+ masm.loadBigInt64(value, temp1);
+
+ if (lir->index()->isConstant()) {
+ Address dest = ToAddress(elements, lir->index(), arrayType);
+ masm.atomicExchange64(Synchronization::Full(), dest, temp1, temp2);
+ } else {
+ BaseIndex dest(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(arrayType));
+ masm.atomicExchange64(Synchronization::Full(), dest, temp1, temp2);
+ }
+
+ emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg());
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinop64(
+ LAtomicTypedArrayElementBinop64* lir) {
+ MOZ_ASSERT(!lir->mir()->isForEffect());
+
+ Register elements = ToRegister(lir->elements());
+ Register value = ToRegister(lir->value());
+ Register64 temp1 = ToRegister64(lir->temp1());
+ Register64 temp2 = ToRegister64(lir->temp2());
+ Register out = ToRegister(lir->output());
+ Register64 tempOut = Register64(out);
+
+ Scalar::Type arrayType = lir->mir()->arrayType();
+ AtomicOp atomicOp = lir->mir()->operation();
+
+ masm.loadBigInt64(value, temp1);
+
+ if (lir->index()->isConstant()) {
+ Address dest = ToAddress(elements, lir->index(), arrayType);
+ masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest,
+ tempOut, temp2);
+ } else {
+ BaseIndex dest(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(arrayType));
+ masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest,
+ tempOut, temp2);
+ }
+
+ emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg());
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect64(
+ LAtomicTypedArrayElementBinopForEffect64* lir) {
+ MOZ_ASSERT(lir->mir()->isForEffect());
+
+ Register elements = ToRegister(lir->elements());
+ Register value = ToRegister(lir->value());
+ Register64 temp1 = ToRegister64(lir->temp1());
+ Register64 temp2 = ToRegister64(lir->temp2());
+
+ Scalar::Type arrayType = lir->mir()->arrayType();
+ AtomicOp atomicOp = lir->mir()->operation();
+
+ masm.loadBigInt64(value, temp1);
+
+ if (lir->index()->isConstant()) {
+ Address dest = ToAddress(elements, lir->index(), arrayType);
+ masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest,
+ temp2);
+ } else {
+ BaseIndex dest(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(arrayType));
+ masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest,
+ temp2);
+ }
+}
+
+void CodeGeneratorARM64::emitSimpleBinaryI64(
+ LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* lir, JSOp op) {
+ const ARMRegister dest = ARMRegister(ToOutRegister64(lir).reg, 64);
+ const ARMRegister lhs =
+ ARMRegister(ToRegister64(lir->getInt64Operand(0)).reg, 64);
+ const LInt64Allocation rhsAlloc = lir->getInt64Operand(INT64_PIECES);
+ Operand rhs;
+
+ if (IsConstant(rhsAlloc)) {
+ rhs = Operand(ToInt64(rhsAlloc));
+ } else {
+ rhs = Operand(ARMRegister(ToRegister64(rhsAlloc).reg, 64));
+ }
+ switch (op) {
+ case JSOp::Add:
+ masm.Add(dest, lhs, rhs);
+ break;
+ case JSOp::Sub:
+ masm.Sub(dest, lhs, rhs);
+ break;
+ case JSOp::BitOr:
+ masm.Orr(dest, lhs, rhs);
+ break;
+ case JSOp::BitXor:
+ masm.Eor(dest, lhs, rhs);
+ break;
+ case JSOp::BitAnd:
+ masm.And(dest, lhs, rhs);
+ break;
+ default:
+ MOZ_CRASH("unexpected binary opcode");
+ }
+}
+
+void CodeGenerator::visitAddI64(LAddI64* lir) {
+ emitSimpleBinaryI64(lir, JSOp::Add);
+}
+
+void CodeGenerator::visitClzI64(LClzI64* ins) {
+ masm.clz64(ToRegister64(ins->getInt64Operand(0)), ToRegister(ins->output()));
+}
+
+void CodeGenerator::visitCtzI64(LCtzI64* ins) {
+ masm.ctz64(ToRegister64(ins->getInt64Operand(0)), ToRegister(ins->output()));
+}
+
+void CodeGenerator::visitMulI64(LMulI64* lir) {
+ const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
+ const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
+ const Register64 output = ToOutRegister64(lir);
+
+ if (IsConstant(rhs)) {
+ int64_t constant = ToInt64(rhs);
+ // Ad-hoc strength reduction, cf the x64 code as well as the 32-bit code
+ // higher up in this file. Bug 1712298 will lift this code to the MIR
+ // constant folding pass, or to lowering.
+ //
+ // This is for wasm integers only, so no input guards or overflow checking
+ // are needed.
+ switch (constant) {
+ case -1:
+ masm.Neg(ARMRegister(output.reg, 64),
+ ARMRegister(ToRegister64(lhs).reg, 64));
+ break;
+ case 0:
+ masm.Mov(ARMRegister(output.reg, 64), xzr);
+ break;
+ case 1:
+ if (ToRegister64(lhs) != output) {
+ masm.move64(ToRegister64(lhs), output);
+ }
+ break;
+ case 2:
+ masm.Add(ARMRegister(output.reg, 64),
+ ARMRegister(ToRegister64(lhs).reg, 64),
+ ARMRegister(ToRegister64(lhs).reg, 64));
+ break;
+ default:
+ // Use shift if constant is nonnegative power of 2.
+ if (constant > 0) {
+ int32_t shift = mozilla::FloorLog2(constant);
+ if (int64_t(1) << shift == constant) {
+ masm.Lsl(ARMRegister(output.reg, 64),
+ ARMRegister(ToRegister64(lhs).reg, 64), shift);
+ break;
+ }
+ }
+ masm.mul64(Imm64(constant), ToRegister64(lhs), output);
+ break;
+ }
+ } else {
+ masm.mul64(ToRegister64(lhs), ToRegister64(rhs), output);
+ }
+}
+
+void CodeGenerator::visitNotI64(LNotI64* lir) {
+ const Register64 input = ToRegister64(lir->getInt64Operand(0));
+ const Register64 output = ToOutRegister64(lir);
+ masm.Cmp(ARMRegister(input.reg, 64), ZeroRegister64);
+ masm.Cset(ARMRegister(output.reg, 64), Assembler::Zero);
+}
+
+void CodeGenerator::visitSubI64(LSubI64* lir) {
+ emitSimpleBinaryI64(lir, JSOp::Sub);
+}
+
+void CodeGenerator::visitPopcntI(LPopcntI* ins) {
+ Register input = ToRegister(ins->input());
+ Register output = ToRegister(ins->output());
+ Register temp = ToRegister(ins->temp0());
+ masm.popcnt32(input, output, temp);
+}
+
+void CodeGenerator::visitBitOpI64(LBitOpI64* lir) {
+ emitSimpleBinaryI64(lir, lir->bitop());
+}
+
+void CodeGenerator::visitShiftI64(LShiftI64* lir) {
+ ARMRegister lhs(ToRegister64(lir->getInt64Operand(LShiftI64::Lhs)).reg, 64);
+ LAllocation* rhsAlloc = lir->getOperand(LShiftI64::Rhs);
+ ARMRegister dest(ToOutRegister64(lir).reg, 64);
+
+ if (rhsAlloc->isConstant()) {
+ int32_t shift = int32_t(rhsAlloc->toConstant()->toInt64() & 0x3F);
+ if (shift == 0) {
+ if (lhs.code() != dest.code()) {
+ masm.Mov(dest, lhs);
+ }
+ } else {
+ switch (lir->bitop()) {
+ case JSOp::Lsh:
+ masm.Lsl(dest, lhs, shift);
+ break;
+ case JSOp::Rsh:
+ masm.Asr(dest, lhs, shift);
+ break;
+ case JSOp::Ursh:
+ masm.Lsr(dest, lhs, shift);
+ break;
+ default:
+ MOZ_CRASH("Unexpected shift op");
+ }
+ }
+ } else {
+ ARMRegister rhs(ToRegister(rhsAlloc), 64);
+ switch (lir->bitop()) {
+ case JSOp::Lsh:
+ masm.Lsl(dest, lhs, rhs);
+ break;
+ case JSOp::Rsh:
+ masm.Asr(dest, lhs, rhs);
+ break;
+ case JSOp::Ursh:
+ masm.Lsr(dest, lhs, rhs);
+ break;
+ default:
+ MOZ_CRASH("Unexpected shift op");
+ }
+ }
+}
+
+void CodeGenerator::visitWasmHeapBase(LWasmHeapBase* ins) {
+ MOZ_ASSERT(ins->instance()->isBogus());
+ masm.movePtr(HeapReg, ToRegister(ins->output()));
+}
+
+// If we have a constant base ptr, try to add the offset to it, to generate
+// better code when the full address is known. The addition may overflow past
+// 32 bits because the front end does nothing special if the base is a large
+// constant and base+offset overflows; sidestep this by performing the addition
+// anyway, overflowing to 64-bit.
+
+static Maybe<uint64_t> IsAbsoluteAddress(const LAllocation* ptr,
+ const wasm::MemoryAccessDesc& access) {
+ if (ptr->isConstantValue()) {
+ const MConstant* c = ptr->toConstant();
+ uint64_t base_address = c->type() == MIRType::Int32
+ ? uint64_t(uint32_t(c->toInt32()))
+ : uint64_t(c->toInt64());
+ uint64_t offset = access.offset();
+ return Some(base_address + offset);
+ }
+ return Nothing();
+}
+
+void CodeGenerator::visitWasmLoad(LWasmLoad* lir) {
+ const MWasmLoad* mir = lir->mir();
+
+ if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) {
+ masm.wasmLoadAbsolute(mir->access(), HeapReg, absAddr.value(),
+ ToAnyRegister(lir->output()), Register64::Invalid());
+ return;
+ }
+
+ // ptr is a GPR and is either a 32-bit value zero-extended to 64-bit, or a
+ // true 64-bit value.
+ masm.wasmLoad(mir->access(), HeapReg, ToRegister(lir->ptr()),
+ ToAnyRegister(lir->output()));
+}
+
+void CodeGenerator::visitCopySignD(LCopySignD* ins) {
+ MOZ_ASSERT(ins->getTemp(0)->isBogusTemp());
+ MOZ_ASSERT(ins->getTemp(1)->isBogusTemp());
+ masm.copySignDouble(ToFloatRegister(ins->getOperand(0)),
+ ToFloatRegister(ins->getOperand(1)),
+ ToFloatRegister(ins->getDef(0)));
+}
+
+void CodeGenerator::visitCopySignF(LCopySignF* ins) {
+ MOZ_ASSERT(ins->getTemp(0)->isBogusTemp());
+ MOZ_ASSERT(ins->getTemp(1)->isBogusTemp());
+ masm.copySignFloat32(ToFloatRegister(ins->getOperand(0)),
+ ToFloatRegister(ins->getOperand(1)),
+ ToFloatRegister(ins->getDef(0)));
+}
+
+void CodeGenerator::visitPopcntI64(LPopcntI64* lir) {
+ Register64 input = ToRegister64(lir->getInt64Operand(0));
+ Register64 output = ToOutRegister64(lir);
+ Register temp = ToRegister(lir->getTemp(0));
+ masm.popcnt64(input, output, temp);
+}
+
+void CodeGenerator::visitRotateI64(LRotateI64* lir) {
+ bool rotateLeft = lir->mir()->isLeftRotate();
+ Register64 input = ToRegister64(lir->input());
+ Register64 output = ToOutRegister64(lir);
+ const LAllocation* count = lir->count();
+
+ if (count->isConstant()) {
+ int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F);
+ if (c == 0) {
+ if (input != output) {
+ masm.move64(input, output);
+ return;
+ }
+ }
+ if (rotateLeft) {
+ masm.rotateLeft64(Imm32(c), input, output, InvalidReg);
+ } else {
+ masm.rotateRight64(Imm32(c), input, output, InvalidReg);
+ }
+ } else {
+ Register c = ToRegister(count);
+ if (rotateLeft) {
+ masm.rotateLeft64(c, input, output, InvalidReg);
+ } else {
+ masm.rotateRight64(c, input, output, InvalidReg);
+ }
+ }
+}
+
+void CodeGenerator::visitWasmStore(LWasmStore* lir) {
+ const MWasmStore* mir = lir->mir();
+
+ if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) {
+ masm.wasmStoreAbsolute(mir->access(), ToAnyRegister(lir->value()),
+ Register64::Invalid(), HeapReg, absAddr.value());
+ return;
+ }
+
+ masm.wasmStore(mir->access(), ToAnyRegister(lir->value()), HeapReg,
+ ToRegister(lir->ptr()));
+}
+
+void CodeGenerator::visitCompareI64(LCompareI64* lir) {
+ MCompare* mir = lir->mir();
+ MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 ||
+ mir->compareType() == MCompare::Compare_UInt64);
+
+ const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs);
+ const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs);
+ Register lhsReg = ToRegister64(lhs).reg;
+ Register output = ToRegister(lir->output());
+ bool isSigned = mir->compareType() == MCompare::Compare_Int64;
+
+ if (IsConstant(rhs)) {
+ masm.cmpPtrSet(JSOpToCondition(lir->jsop(), isSigned), lhsReg,
+ ImmWord(ToInt64(rhs)), output);
+ } else if (rhs.value().isGeneralReg()) {
+ masm.cmpPtrSet(JSOpToCondition(lir->jsop(), isSigned), lhsReg,
+ ToRegister64(rhs).reg, output);
+ } else {
+ masm.cmpPtrSet(
+ GetCondForSwappedOperands(JSOpToCondition(lir->jsop(), isSigned)),
+ ToAddress(rhs.value()), lhsReg, output);
+ }
+}
+
+void CodeGenerator::visitWasmSelect(LWasmSelect* lir) {
+ MIRType mirType = lir->mir()->type();
+ Register condReg = ToRegister(lir->condExpr());
+
+ masm.test32(condReg, condReg);
+
+ switch (mirType) {
+ case MIRType::Int32:
+ case MIRType::RefOrNull: {
+ Register outReg = ToRegister(lir->output());
+ Register trueReg = ToRegister(lir->trueExpr());
+ Register falseReg = ToRegister(lir->falseExpr());
+
+ if (mirType == MIRType::Int32) {
+ masm.Csel(ARMRegister(outReg, 32), ARMRegister(trueReg, 32),
+ ARMRegister(falseReg, 32), Assembler::NonZero);
+ } else {
+ masm.Csel(ARMRegister(outReg, 64), ARMRegister(trueReg, 64),
+ ARMRegister(falseReg, 64), Assembler::NonZero);
+ }
+ break;
+ }
+
+ case MIRType::Float32:
+ case MIRType::Double:
+ case MIRType::Simd128: {
+ FloatRegister outReg = ToFloatRegister(lir->output());
+ FloatRegister trueReg = ToFloatRegister(lir->trueExpr());
+ FloatRegister falseReg = ToFloatRegister(lir->falseExpr());
+
+ switch (mirType) {
+ case MIRType::Float32:
+ masm.Fcsel(ARMFPRegister(outReg, 32), ARMFPRegister(trueReg, 32),
+ ARMFPRegister(falseReg, 32), Assembler::NonZero);
+ break;
+ case MIRType::Double:
+ masm.Fcsel(ARMFPRegister(outReg, 64), ARMFPRegister(trueReg, 64),
+ ARMFPRegister(falseReg, 64), Assembler::NonZero);
+ break;
+#ifdef ENABLE_WASM_SIMD
+ case MIRType::Simd128: {
+ MOZ_ASSERT(outReg == trueReg);
+ Label done;
+ masm.j(Assembler::NonZero, &done);
+ masm.moveSimd128(falseReg, outReg);
+ masm.bind(&done);
+ break;
+ }
+#endif
+ default:
+ MOZ_CRASH();
+ }
+ break;
+ }
+
+ default: {
+ MOZ_CRASH("unhandled type in visitWasmSelect!");
+ }
+ }
+}
+
+// We expect to handle the cases: compare is {{U,}Int32, {U,}Int64}, Float32,
+// Double}, and select is {{U,}Int32, {U,}Int64}, Float32, Double},
+// independently.
+void CodeGenerator::visitWasmCompareAndSelect(LWasmCompareAndSelect* ins) {
+ MCompare::CompareType compTy = ins->compareType();
+
+ // Set flag.
+ if (compTy == MCompare::Compare_Int32 || compTy == MCompare::Compare_UInt32) {
+ Register lhs = ToRegister(ins->leftExpr());
+ if (ins->rightExpr()->isConstant()) {
+ masm.cmp32(lhs, Imm32(ins->rightExpr()->toConstant()->toInt32()));
+ } else {
+ masm.cmp32(lhs, ToRegister(ins->rightExpr()));
+ }
+ } else if (compTy == MCompare::Compare_Int64 ||
+ compTy == MCompare::Compare_UInt64) {
+ Register lhs = ToRegister(ins->leftExpr());
+ if (ins->rightExpr()->isConstant()) {
+ masm.cmpPtr(lhs, Imm64(ins->rightExpr()->toConstant()->toInt64()));
+ } else {
+ masm.cmpPtr(lhs, ToRegister(ins->rightExpr()));
+ }
+ } else if (compTy == MCompare::Compare_Float32) {
+ masm.compareFloat(JSOpToDoubleCondition(ins->jsop()),
+ ToFloatRegister(ins->leftExpr()),
+ ToFloatRegister(ins->rightExpr()));
+ } else if (compTy == MCompare::Compare_Double) {
+ masm.compareDouble(JSOpToDoubleCondition(ins->jsop()),
+ ToFloatRegister(ins->leftExpr()),
+ ToFloatRegister(ins->rightExpr()));
+ } else {
+ // Ref types not supported yet; v128 is not yet observed to be worth
+ // optimizing.
+ MOZ_CRASH("CodeGenerator::visitWasmCompareAndSelect: unexpected type (1)");
+ }
+
+ // Act on flag.
+ Assembler::Condition cond;
+ if (compTy == MCompare::Compare_Float32 ||
+ compTy == MCompare::Compare_Double) {
+ cond = Assembler::ConditionFromDoubleCondition(
+ JSOpToDoubleCondition(ins->jsop()));
+ } else {
+ cond = JSOpToCondition(compTy, ins->jsop());
+ }
+ MIRType insTy = ins->mir()->type();
+ if (insTy == MIRType::Int32 || insTy == MIRType::Int64) {
+ Register destReg = ToRegister(ins->output());
+ Register trueReg = ToRegister(ins->ifTrueExpr());
+ Register falseReg = ToRegister(ins->ifFalseExpr());
+ size_t size = insTy == MIRType::Int32 ? 32 : 64;
+ masm.Csel(ARMRegister(destReg, size), ARMRegister(trueReg, size),
+ ARMRegister(falseReg, size), cond);
+ } else if (insTy == MIRType::Float32 || insTy == MIRType::Double) {
+ FloatRegister destReg = ToFloatRegister(ins->output());
+ FloatRegister trueReg = ToFloatRegister(ins->ifTrueExpr());
+ FloatRegister falseReg = ToFloatRegister(ins->ifFalseExpr());
+ size_t size = MIRTypeToSize(insTy) * 8;
+ masm.Fcsel(ARMFPRegister(destReg, size), ARMFPRegister(trueReg, size),
+ ARMFPRegister(falseReg, size), cond);
+ } else {
+ // See above.
+ MOZ_CRASH("CodeGenerator::visitWasmCompareAndSelect: unexpected type (2)");
+ }
+}
+
+void CodeGenerator::visitWasmLoadI64(LWasmLoadI64* lir) {
+ const MWasmLoad* mir = lir->mir();
+
+ if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) {
+ masm.wasmLoadAbsolute(mir->access(), HeapReg, absAddr.value(),
+ AnyRegister(), ToOutRegister64(lir));
+ return;
+ }
+
+ masm.wasmLoadI64(mir->access(), HeapReg, ToRegister(lir->ptr()),
+ ToOutRegister64(lir));
+}
+
+void CodeGenerator::visitWasmStoreI64(LWasmStoreI64* lir) {
+ const MWasmStore* mir = lir->mir();
+
+ if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) {
+ masm.wasmStoreAbsolute(mir->access(), AnyRegister(),
+ ToRegister64(lir->value()), HeapReg,
+ absAddr.value());
+ return;
+ }
+
+ masm.wasmStoreI64(mir->access(), ToRegister64(lir->value()), HeapReg,
+ ToRegister(lir->ptr()));
+}
+
+void CodeGenerator::visitMemoryBarrier(LMemoryBarrier* ins) {
+ masm.memoryBarrier(ins->type());
+}
+
+void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) {
+ MWasmAddOffset* mir = lir->mir();
+ Register base = ToRegister(lir->base());
+ Register out = ToRegister(lir->output());
+
+ masm.Adds(ARMRegister(out, 32), ARMRegister(base, 32),
+ Operand(mir->offset()));
+ OutOfLineAbortingWasmTrap* ool = new (alloc())
+ OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds);
+ addOutOfLineCode(ool, mir);
+ masm.j(Assembler::CarrySet, ool->entry());
+}
+
+void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) {
+ MWasmAddOffset* mir = lir->mir();
+ Register64 base = ToRegister64(lir->base());
+ Register64 out = ToOutRegister64(lir);
+
+ masm.Adds(ARMRegister(out.reg, 64), ARMRegister(base.reg, 64),
+ Operand(mir->offset()));
+ OutOfLineAbortingWasmTrap* ool = new (alloc())
+ OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds);
+ addOutOfLineCode(ool, mir);
+ masm.j(Assembler::CarrySet, ool->entry());
+}
+
+void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) {
+ MOZ_ASSERT(lir->mir()->type() == MIRType::Int64);
+ Register condReg = ToRegister(lir->condExpr());
+ Register64 trueReg = ToRegister64(lir->trueExpr());
+ Register64 falseReg = ToRegister64(lir->falseExpr());
+ Register64 outReg = ToOutRegister64(lir);
+
+ masm.test32(condReg, condReg);
+ masm.Csel(ARMRegister(outReg.reg, 64), ARMRegister(trueReg.reg, 64),
+ ARMRegister(falseReg.reg, 64), Assembler::NonZero);
+}
+
+void CodeGenerator::visitSignExtendInt64(LSignExtendInt64* ins) {
+ Register64 input = ToRegister64(ins->getInt64Operand(0));
+ Register64 output = ToOutRegister64(ins);
+ switch (ins->mode()) {
+ case MSignExtendInt64::Byte:
+ masm.move8To64SignExtend(input.reg, output);
+ break;
+ case MSignExtendInt64::Half:
+ masm.move16To64SignExtend(input.reg, output);
+ break;
+ case MSignExtendInt64::Word:
+ masm.move32To64SignExtend(input.reg, output);
+ break;
+ }
+}
+
+void CodeGenerator::visitWasmReinterpret(LWasmReinterpret* lir) {
+ MOZ_ASSERT(gen->compilingWasm());
+ MWasmReinterpret* ins = lir->mir();
+
+ MIRType to = ins->type();
+ mozilla::DebugOnly<MIRType> from = ins->input()->type();
+
+ switch (to) {
+ case MIRType::Int32:
+ MOZ_ASSERT(from == MIRType::Float32);
+ masm.moveFloat32ToGPR(ToFloatRegister(lir->input()),
+ ToRegister(lir->output()));
+ break;
+ case MIRType::Float32:
+ MOZ_ASSERT(from == MIRType::Int32);
+ masm.moveGPRToFloat32(ToRegister(lir->input()),
+ ToFloatRegister(lir->output()));
+ break;
+ case MIRType::Double:
+ case MIRType::Int64:
+ MOZ_CRASH("not handled by this LIR opcode");
+ default:
+ MOZ_CRASH("unexpected WasmReinterpret");
+ }
+}
+
+void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) {
+ const MWasmStackArg* mir = ins->mir();
+ Address dst(masm.getStackPointer(), mir->spOffset());
+ if (IsConstant(ins->arg())) {
+ masm.store64(Imm64(ToInt64(ins->arg())), dst);
+ } else {
+ masm.store64(ToRegister64(ins->arg()), dst);
+ }
+}
+
+void CodeGenerator::visitTestI64AndBranch(LTestI64AndBranch* lir) {
+ Register64 input = ToRegister64(lir->getInt64Operand(0));
+ MBasicBlock* mirTrue = lir->ifTrue();
+ MBasicBlock* mirFalse = lir->ifFalse();
+
+ // Jump to the True block if NonZero.
+ // Jump to the False block if Zero.
+ if (isNextBlock(mirFalse->lir())) {
+ masm.Cbnz(ARMRegister(input.reg, 64), getJumpLabelForBranch(mirTrue));
+ } else {
+ masm.Cbz(ARMRegister(input.reg, 64), getJumpLabelForBranch(mirFalse));
+ if (!isNextBlock(mirTrue->lir())) {
+ jumpToBlock(mirTrue);
+ }
+ }
+}
+
+void CodeGenerator::visitWrapInt64ToInt32(LWrapInt64ToInt32* lir) {
+ const LAllocation* input = lir->getOperand(0);
+ Register output = ToRegister(lir->output());
+
+ if (lir->mir()->bottomHalf()) {
+ if (input->isMemory()) {
+ masm.load32(ToAddress(input), output);
+ } else {
+ // Really this is a 64-bit input register and we could use move64To32.
+ masm.Mov(ARMRegister(output, 32), ARMRegister(ToRegister(input), 32));
+ }
+ } else {
+ MOZ_CRASH("Not implemented.");
+ }
+}
+
+void CodeGenerator::visitExtendInt32ToInt64(LExtendInt32ToInt64* lir) {
+ Register input = ToRegister(lir->getOperand(0));
+ Register64 output = ToOutRegister64(lir);
+
+ if (lir->mir()->isUnsigned()) {
+ masm.move32To64ZeroExtend(input, output);
+ } else {
+ masm.move32To64SignExtend(input, output);
+ }
+}
+
+void CodeGenerator::visitWasmExtendU32Index(LWasmExtendU32Index* lir) {
+ // Generates no code on this platform because the input is assumed to have
+ // canonical form.
+ Register output = ToRegister(lir->output());
+ MOZ_ASSERT(ToRegister(lir->input()) == output);
+ masm.debugAssertCanonicalInt32(output);
+}
+
+void CodeGenerator::visitWasmWrapU32Index(LWasmWrapU32Index* lir) {
+ // Generates no code on this platform because the input is assumed to have
+ // canonical form.
+ Register output = ToRegister(lir->output());
+ MOZ_ASSERT(ToRegister(lir->input()) == output);
+ masm.debugAssertCanonicalInt32(output);
+}
+
+void CodeGenerator::visitCompareI64AndBranch(LCompareI64AndBranch* comp) {
+ const MCompare* mir = comp->cmpMir();
+ const mozilla::DebugOnly<MCompare::CompareType> type = mir->compareType();
+ const LInt64Allocation left =
+ comp->getInt64Operand(LCompareI64AndBranch::Lhs);
+ const LInt64Allocation right =
+ comp->getInt64Operand(LCompareI64AndBranch::Rhs);
+
+ MOZ_ASSERT(type == MCompare::Compare_Int64 ||
+ type == MCompare::Compare_UInt64);
+ if (IsConstant(right)) {
+ masm.Cmp(ARMRegister(ToRegister64(left).reg, 64), ToInt64(right));
+ } else {
+ masm.Cmp(ARMRegister(ToRegister64(left).reg, 64),
+ ARMRegister(ToRegister64(right).reg, 64));
+ }
+
+ bool isSigned = mir->compareType() == MCompare::Compare_Int64;
+ Assembler::Condition cond = JSOpToCondition(comp->jsop(), isSigned);
+ emitBranch(cond, comp->ifTrue(), comp->ifFalse());
+}
+
+void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) {
+ auto input = ToFloatRegister(lir->input());
+ auto output = ToRegister(lir->output());
+
+ MWasmTruncateToInt32* mir = lir->mir();
+ MIRType fromType = mir->input()->type();
+
+ MOZ_ASSERT(fromType == MIRType::Double || fromType == MIRType::Float32);
+
+ auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output);
+ addOutOfLineCode(ool, mir);
+
+ Label* oolEntry = ool->entry();
+ if (mir->isUnsigned()) {
+ if (fromType == MIRType::Double) {
+ masm.wasmTruncateDoubleToUInt32(input, output, mir->isSaturating(),
+ oolEntry);
+ } else if (fromType == MIRType::Float32) {
+ masm.wasmTruncateFloat32ToUInt32(input, output, mir->isSaturating(),
+ oolEntry);
+ } else {
+ MOZ_CRASH("unexpected type");
+ }
+
+ masm.bind(ool->rejoin());
+ return;
+ }
+
+ if (fromType == MIRType::Double) {
+ masm.wasmTruncateDoubleToInt32(input, output, mir->isSaturating(),
+ oolEntry);
+ } else if (fromType == MIRType::Float32) {
+ masm.wasmTruncateFloat32ToInt32(input, output, mir->isSaturating(),
+ oolEntry);
+ } else {
+ MOZ_CRASH("unexpected type");
+ }
+
+ masm.bind(ool->rejoin());
+}
+
+void CodeGenerator::visitWasmTruncateToInt64(LWasmTruncateToInt64* lir) {
+ FloatRegister input = ToFloatRegister(lir->input());
+ Register64 output = ToOutRegister64(lir);
+
+ MWasmTruncateToInt64* mir = lir->mir();
+ MIRType fromType = mir->input()->type();
+
+ MOZ_ASSERT(fromType == MIRType::Double || fromType == MIRType::Float32);
+
+ auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output);
+ addOutOfLineCode(ool, mir);
+
+ Label* oolEntry = ool->entry();
+ Label* oolRejoin = ool->rejoin();
+ bool isSaturating = mir->isSaturating();
+
+ if (fromType == MIRType::Double) {
+ if (mir->isUnsigned()) {
+ masm.wasmTruncateDoubleToUInt64(input, output, isSaturating, oolEntry,
+ oolRejoin, InvalidFloatReg);
+ } else {
+ masm.wasmTruncateDoubleToInt64(input, output, isSaturating, oolEntry,
+ oolRejoin, InvalidFloatReg);
+ }
+ } else {
+ if (mir->isUnsigned()) {
+ masm.wasmTruncateFloat32ToUInt64(input, output, isSaturating, oolEntry,
+ oolRejoin, InvalidFloatReg);
+ } else {
+ masm.wasmTruncateFloat32ToInt64(input, output, isSaturating, oolEntry,
+ oolRejoin, InvalidFloatReg);
+ }
+ }
+}
+
+void CodeGeneratorARM64::visitOutOfLineWasmTruncateCheck(
+ OutOfLineWasmTruncateCheck* ool) {
+ FloatRegister input = ool->input();
+ Register output = ool->output();
+ Register64 output64 = ool->output64();
+ MIRType fromType = ool->fromType();
+ MIRType toType = ool->toType();
+ Label* oolRejoin = ool->rejoin();
+ TruncFlags flags = ool->flags();
+ wasm::BytecodeOffset off = ool->bytecodeOffset();
+
+ if (fromType == MIRType::Float32) {
+ if (toType == MIRType::Int32) {
+ masm.oolWasmTruncateCheckF32ToI32(input, output, flags, off, oolRejoin);
+ } else if (toType == MIRType::Int64) {
+ masm.oolWasmTruncateCheckF32ToI64(input, output64, flags, off, oolRejoin);
+ } else {
+ MOZ_CRASH("unexpected type");
+ }
+ } else if (fromType == MIRType::Double) {
+ if (toType == MIRType::Int32) {
+ masm.oolWasmTruncateCheckF64ToI32(input, output, flags, off, oolRejoin);
+ } else if (toType == MIRType::Int64) {
+ masm.oolWasmTruncateCheckF64ToI64(input, output64, flags, off, oolRejoin);
+ } else {
+ MOZ_CRASH("unexpected type");
+ }
+ } else {
+ MOZ_CRASH("unexpected type");
+ }
+}
+
+void CodeGenerator::visitWasmReinterpretToI64(LWasmReinterpretToI64* lir) {
+ MOZ_ASSERT(lir->mir()->type() == MIRType::Int64);
+ MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Double);
+ masm.moveDoubleToGPR64(ToFloatRegister(lir->input()), ToOutRegister64(lir));
+}
+
+void CodeGenerator::visitWasmReinterpretFromI64(LWasmReinterpretFromI64* lir) {
+ MOZ_ASSERT(lir->mir()->type() == MIRType::Double);
+ MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Int64);
+ masm.moveGPR64ToDouble(
+ ToRegister64(lir->getInt64Operand(LWasmReinterpretFromI64::Input)),
+ ToFloatRegister(lir->output()));
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinop(
+ LAtomicTypedArrayElementBinop* lir) {
+ MOZ_ASSERT(!lir->mir()->isForEffect());
+
+ AnyRegister output = ToAnyRegister(lir->output());
+ Register elements = ToRegister(lir->elements());
+ Register flagTemp = ToRegister(lir->temp1());
+ Register outTemp =
+ lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
+ Register value = ToRegister(lir->value());
+
+ Scalar::Type arrayType = lir->mir()->arrayType();
+
+ if (lir->index()->isConstant()) {
+ Address mem = ToAddress(elements, lir->index(), arrayType);
+ masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
+ lir->mir()->operation(), value, mem, flagTemp, outTemp,
+ output);
+ } else {
+ BaseIndex mem(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(arrayType));
+ masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
+ lir->mir()->operation(), value, mem, flagTemp, outTemp,
+ output);
+ }
+}
+
+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect(
+ LAtomicTypedArrayElementBinopForEffect* lir) {
+ MOZ_ASSERT(lir->mir()->isForEffect());
+
+ Register elements = ToRegister(lir->elements());
+ Register flagTemp = ToRegister(lir->flagTemp());
+ Register value = ToRegister(lir->value());
+
+ Scalar::Type arrayType = lir->mir()->arrayType();
+
+ if (lir->index()->isConstant()) {
+ Address mem = ToAddress(elements, lir->index(), arrayType);
+ masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
+ lir->mir()->operation(), value, mem, flagTemp);
+ } else {
+ BaseIndex mem(elements, ToRegister(lir->index()),
+ ScaleFromScalarType(arrayType));
+ masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
+ lir->mir()->operation(), value, mem, flagTemp);
+ }
+}
+
+void CodeGenerator::visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir) {
+ Register64 input = ToRegister64(lir->getInt64Operand(0));
+ FloatRegister output = ToFloatRegister(lir->output());
+
+ MIRType outputType = lir->mir()->type();
+ MOZ_ASSERT(outputType == MIRType::Double || outputType == MIRType::Float32);
+
+ if (outputType == MIRType::Double) {
+ if (lir->mir()->isUnsigned()) {
+ masm.convertUInt64ToDouble(input, output, Register::Invalid());
+ } else {
+ masm.convertInt64ToDouble(input, output);
+ }
+ } else {
+ if (lir->mir()->isUnsigned()) {
+ masm.convertUInt64ToFloat32(input, output, Register::Invalid());
+ } else {
+ masm.convertInt64ToFloat32(input, output);
+ }
+ }
+}
+
+void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) {
+ Register lhs = ToRegister(lir->lhs());
+ Register rhs = ToRegister(lir->rhs());
+ Register output = ToRegister(lir->output());
+
+ Label done;
+
+ // Handle divide by zero.
+ if (lir->canBeDivideByZero()) {
+ Label isNotDivByZero;
+ masm.Cbnz(ARMRegister(rhs, 64), &isNotDivByZero);
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset());
+ masm.bind(&isNotDivByZero);
+ }
+
+ // Handle an integer overflow exception from INT64_MIN / -1.
+ if (lir->canBeNegativeOverflow()) {
+ Label noOverflow;
+ masm.branchPtr(Assembler::NotEqual, lhs, ImmWord(INT64_MIN), &noOverflow);
+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), &noOverflow);
+ if (lir->mir()->isMod()) {
+ masm.movePtr(ImmWord(0), output);
+ } else {
+ masm.wasmTrap(wasm::Trap::IntegerOverflow, lir->bytecodeOffset());
+ }
+ masm.jump(&done);
+ masm.bind(&noOverflow);
+ }
+
+ masm.Sdiv(ARMRegister(output, 64), ARMRegister(lhs, 64),
+ ARMRegister(rhs, 64));
+ if (lir->mir()->isMod()) {
+ masm.Msub(ARMRegister(output, 64), ARMRegister(output, 64),
+ ARMRegister(rhs, 64), ARMRegister(lhs, 64));
+ }
+ masm.bind(&done);
+}
+
+void CodeGenerator::visitUDivOrModI64(LUDivOrModI64* lir) {
+ Register lhs = ToRegister(lir->lhs());
+ Register rhs = ToRegister(lir->rhs());
+ Register output = ToRegister(lir->output());
+
+ Label done;
+
+ // Handle divide by zero.
+ if (lir->canBeDivideByZero()) {
+ Label isNotDivByZero;
+ masm.Cbnz(ARMRegister(rhs, 64), &isNotDivByZero);
+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset());
+ masm.bind(&isNotDivByZero);
+ }
+
+ masm.Udiv(ARMRegister(output, 64), ARMRegister(lhs, 64),
+ ARMRegister(rhs, 64));
+ if (lir->mir()->isMod()) {
+ masm.Msub(ARMRegister(output, 64), ARMRegister(output, 64),
+ ARMRegister(rhs, 64), ARMRegister(lhs, 64));
+ }
+ masm.bind(&done);
+}
+
+void CodeGenerator::visitSimd128(LSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ const LDefinition* out = ins->getDef(0);
+ masm.loadConstantSimd128(ins->simd128(), ToFloatRegister(out));
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::V128Bitselect: {
+ FloatRegister lhs = ToFloatRegister(ins->v0());
+ FloatRegister rhs = ToFloatRegister(ins->v1());
+ FloatRegister controlDest = ToFloatRegister(ins->v2());
+ masm.bitwiseSelectSimd128(lhs, rhs, controlDest);
+ break;
+ }
+ case wasm::SimdOp::F32x4RelaxedFma:
+ masm.fmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+ ToFloatRegister(ins->v2()));
+ break;
+ case wasm::SimdOp::F32x4RelaxedFnma:
+ masm.fnmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+ ToFloatRegister(ins->v2()));
+ break;
+ case wasm::SimdOp::F64x2RelaxedFma:
+ masm.fmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+ ToFloatRegister(ins->v2()));
+ break;
+ case wasm::SimdOp::F64x2RelaxedFnma:
+ masm.fnmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+ ToFloatRegister(ins->v2()));
+ break;
+ case wasm::SimdOp::I8x16RelaxedLaneSelect:
+ case wasm::SimdOp::I16x8RelaxedLaneSelect:
+ case wasm::SimdOp::I32x4RelaxedLaneSelect:
+ case wasm::SimdOp::I64x2RelaxedLaneSelect: {
+ FloatRegister lhs = ToFloatRegister(ins->v0());
+ FloatRegister rhs = ToFloatRegister(ins->v1());
+ FloatRegister maskDest = ToFloatRegister(ins->v2());
+ masm.laneSelectSimd128(maskDest, lhs, rhs, maskDest);
+ break;
+ }
+ case wasm::SimdOp::I32x4DotI8x16I7x16AddS:
+ masm.dotInt8x16Int7x16ThenAdd(
+ ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()),
+ ToFloatRegister(ins->v2()), ToFloatRegister(ins->temp()));
+ break;
+ default:
+ MOZ_CRASH("NYI");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister lhs = ToFloatRegister(ins->lhs());
+ FloatRegister rhs = ToFloatRegister(ins->rhs());
+ FloatRegister dest = ToFloatRegister(ins->output());
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::V128And:
+ masm.bitwiseAndSimd128(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::V128Or:
+ masm.bitwiseOrSimd128(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::V128Xor:
+ masm.bitwiseXorSimd128(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::V128AndNot:
+ masm.bitwiseAndNotSimd128(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16AvgrU:
+ masm.unsignedAverageInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8AvgrU:
+ masm.unsignedAverageInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16Add:
+ masm.addInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16AddSatS:
+ masm.addSatInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16AddSatU:
+ masm.unsignedAddSatInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16Sub:
+ masm.subInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16SubSatS:
+ masm.subSatInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16SubSatU:
+ masm.unsignedSubSatInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16MinS:
+ masm.minInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16MinU:
+ masm.unsignedMinInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16MaxS:
+ masm.maxInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16MaxU:
+ masm.unsignedMaxInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8Add:
+ masm.addInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8AddSatS:
+ masm.addSatInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8AddSatU:
+ masm.unsignedAddSatInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8Sub:
+ masm.subInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8SubSatS:
+ masm.subSatInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8SubSatU:
+ masm.unsignedSubSatInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8Mul:
+ masm.mulInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8MinS:
+ masm.minInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8MinU:
+ masm.unsignedMinInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8MaxS:
+ masm.maxInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8MaxU:
+ masm.unsignedMaxInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4Add:
+ masm.addInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4Sub:
+ masm.subInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4Mul:
+ masm.mulInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4MinS:
+ masm.minInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4MinU:
+ masm.unsignedMinInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4MaxS:
+ masm.maxInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4MaxU:
+ masm.unsignedMaxInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2Add:
+ masm.addInt64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2Sub:
+ masm.subInt64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2Mul: {
+ auto temp1 = ToFloatRegister(ins->getTemp(0));
+ auto temp2 = ToFloatRegister(ins->getTemp(1));
+ masm.mulInt64x2(lhs, rhs, dest, temp1, temp2);
+ break;
+ }
+ case wasm::SimdOp::F32x4Add:
+ masm.addFloat32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Sub:
+ masm.subFloat32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Mul:
+ masm.mulFloat32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Div:
+ masm.divFloat32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Min:
+ masm.minFloat32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Max:
+ masm.maxFloat32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Add:
+ masm.addFloat64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Sub:
+ masm.subFloat64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Mul:
+ masm.mulFloat64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Div:
+ masm.divFloat64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Min:
+ masm.minFloat64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Max:
+ masm.maxFloat64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16Swizzle:
+ masm.swizzleInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16RelaxedSwizzle:
+ masm.swizzleInt8x16Relaxed(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16NarrowI16x8S:
+ masm.narrowInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16NarrowI16x8U:
+ masm.unsignedNarrowInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8NarrowI32x4S:
+ masm.narrowInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8NarrowI32x4U:
+ masm.unsignedNarrowInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16Eq:
+ masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16Ne:
+ masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16LtS:
+ masm.compareInt8x16(Assembler::LessThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16GtS:
+ masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16LeS:
+ masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16GeS:
+ masm.compareInt8x16(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16LtU:
+ masm.compareInt8x16(Assembler::Below, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16GtU:
+ masm.compareInt8x16(Assembler::Above, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16LeU:
+ masm.compareInt8x16(Assembler::BelowOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16GeU:
+ masm.compareInt8x16(Assembler::AboveOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8Eq:
+ masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8Ne:
+ masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8LtS:
+ masm.compareInt16x8(Assembler::LessThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8GtS:
+ masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8LeS:
+ masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8GeS:
+ masm.compareInt16x8(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8LtU:
+ masm.compareInt16x8(Assembler::Below, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8GtU:
+ masm.compareInt16x8(Assembler::Above, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8LeU:
+ masm.compareInt16x8(Assembler::BelowOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8GeU:
+ masm.compareInt16x8(Assembler::AboveOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4Eq:
+ masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4Ne:
+ masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4LtS:
+ masm.compareInt32x4(Assembler::LessThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4GtS:
+ masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4LeS:
+ masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4GeS:
+ masm.compareInt32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4LtU:
+ masm.compareInt32x4(Assembler::Below, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4GtU:
+ masm.compareInt32x4(Assembler::Above, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4LeU:
+ masm.compareInt32x4(Assembler::BelowOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4GeU:
+ masm.compareInt32x4(Assembler::AboveOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2Eq:
+ masm.compareInt64x2(Assembler::Equal, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2LtS:
+ masm.compareInt64x2(Assembler::LessThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2GtS:
+ masm.compareInt64x2(Assembler::GreaterThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2LeS:
+ masm.compareInt64x2(Assembler::LessThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2GeS:
+ masm.compareInt64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2Ne:
+ masm.compareInt64x2(Assembler::NotEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Eq:
+ masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Ne:
+ masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Lt:
+ masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Gt:
+ masm.compareFloat32x4(Assembler::GreaterThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Le:
+ masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4Ge:
+ masm.compareFloat32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Eq:
+ masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Ne:
+ masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Lt:
+ masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Gt:
+ masm.compareFloat64x2(Assembler::GreaterThan, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Le:
+ masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2Ge:
+ masm.compareFloat64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4PMax:
+ masm.pseudoMaxFloat32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4PMin:
+ masm.pseudoMinFloat32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2PMax:
+ masm.pseudoMaxFloat64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2PMin:
+ masm.pseudoMinFloat64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4DotI16x8S:
+ masm.widenDotInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtmulLowI8x16S:
+ masm.extMulLowInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtmulHighI8x16S:
+ masm.extMulHighInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtmulLowI8x16U:
+ masm.unsignedExtMulLowInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtmulHighI8x16U:
+ masm.unsignedExtMulHighInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtmulLowI16x8S:
+ masm.extMulLowInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtmulHighI16x8S:
+ masm.extMulHighInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtmulLowI16x8U:
+ masm.unsignedExtMulLowInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtmulHighI16x8U:
+ masm.unsignedExtMulHighInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2ExtmulLowI32x4S:
+ masm.extMulLowInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2ExtmulHighI32x4S:
+ masm.extMulHighInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2ExtmulLowI32x4U:
+ masm.unsignedExtMulLowInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2ExtmulHighI32x4U:
+ masm.unsignedExtMulHighInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8Q15MulrSatS:
+ masm.q15MulrSatInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4RelaxedMin:
+ masm.minFloat32x4Relaxed(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F32x4RelaxedMax:
+ masm.maxFloat32x4Relaxed(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2RelaxedMin:
+ masm.minFloat64x2Relaxed(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::F64x2RelaxedMax:
+ masm.maxFloat64x2Relaxed(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8RelaxedQ15MulrS:
+ masm.q15MulrInt16x8Relaxed(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8DotI8x16I7x16S:
+ masm.dotInt8x16Int7x16(lhs, rhs, dest);
+ break;
+ default:
+ MOZ_CRASH("Binary SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmBinarySimd128WithConstant(
+ LWasmBinarySimd128WithConstant* ins) {
+ MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmVariableShiftSimd128(
+ LWasmVariableShiftSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister lhs = ToFloatRegister(ins->lhs());
+ Register rhs = ToRegister(ins->rhs());
+ FloatRegister dest = ToFloatRegister(ins->output());
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Shl:
+ masm.leftShiftInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16ShrS:
+ masm.rightShiftInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I8x16ShrU:
+ masm.unsignedRightShiftInt8x16(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8Shl:
+ masm.leftShiftInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8ShrS:
+ masm.rightShiftInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I16x8ShrU:
+ masm.unsignedRightShiftInt16x8(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4Shl:
+ masm.leftShiftInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4ShrS:
+ masm.rightShiftInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I32x4ShrU:
+ masm.unsignedRightShiftInt32x4(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2Shl:
+ masm.leftShiftInt64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2ShrS:
+ masm.rightShiftInt64x2(lhs, rhs, dest);
+ break;
+ case wasm::SimdOp::I64x2ShrU:
+ masm.unsignedRightShiftInt64x2(lhs, rhs, dest);
+ break;
+ default:
+ MOZ_CRASH("Shift SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmConstantShiftSimd128(
+ LWasmConstantShiftSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister src = ToFloatRegister(ins->src());
+ FloatRegister dest = ToFloatRegister(ins->output());
+ int32_t shift = ins->shift();
+
+ if (shift == 0) {
+ if (src != dest) {
+ masm.moveSimd128(src, dest);
+ }
+ return;
+ }
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Shl:
+ masm.leftShiftInt8x16(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I8x16ShrS:
+ masm.rightShiftInt8x16(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I8x16ShrU:
+ masm.unsignedRightShiftInt8x16(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I16x8Shl:
+ masm.leftShiftInt16x8(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I16x8ShrS:
+ masm.rightShiftInt16x8(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I16x8ShrU:
+ masm.unsignedRightShiftInt16x8(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I32x4Shl:
+ masm.leftShiftInt32x4(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I32x4ShrS:
+ masm.rightShiftInt32x4(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I32x4ShrU:
+ masm.unsignedRightShiftInt32x4(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I64x2Shl:
+ masm.leftShiftInt64x2(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I64x2ShrS:
+ masm.rightShiftInt64x2(Imm32(shift), src, dest);
+ break;
+ case wasm::SimdOp::I64x2ShrU:
+ masm.unsignedRightShiftInt64x2(Imm32(shift), src, dest);
+ break;
+ default:
+ MOZ_CRASH("Shift SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmSignReplicationSimd128(
+ LWasmSignReplicationSimd128* ins) {
+ MOZ_CRASH("No SIMD");
+}
+
+void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister lhs = ToFloatRegister(ins->lhs());
+ FloatRegister rhs = ToFloatRegister(ins->rhs());
+ FloatRegister dest = ToFloatRegister(ins->output());
+ MOZ_ASSERT(ins->temp()->isBogusTemp());
+ SimdConstant control = ins->control();
+ switch (ins->op()) {
+ case SimdShuffleOp::BLEND_8x16: {
+ masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
+ lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::BLEND_16x8: {
+ masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()),
+ lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: {
+ int8_t count = 16 - control.asInt8x16()[0];
+ MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
+ masm.concatAndRightShiftSimd128(lhs, rhs, dest, count);
+ break;
+ }
+ case SimdShuffleOp::INTERLEAVE_HIGH_8x16: {
+ masm.interleaveHighInt8x16(lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::INTERLEAVE_HIGH_16x8: {
+ masm.interleaveHighInt16x8(lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::INTERLEAVE_HIGH_32x4: {
+ masm.interleaveHighInt32x4(lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::INTERLEAVE_HIGH_64x2: {
+ masm.interleaveHighInt64x2(lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::INTERLEAVE_LOW_8x16: {
+ masm.interleaveLowInt8x16(lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::INTERLEAVE_LOW_16x8: {
+ masm.interleaveLowInt16x8(lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::INTERLEAVE_LOW_32x4: {
+ masm.interleaveLowInt32x4(lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::INTERLEAVE_LOW_64x2: {
+ masm.interleaveLowInt64x2(lhs, rhs, dest);
+ break;
+ }
+ case SimdShuffleOp::SHUFFLE_BLEND_8x16: {
+ masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()),
+ lhs, rhs, dest);
+ break;
+ }
+ default: {
+ MOZ_CRASH("Unsupported SIMD shuffle operation");
+ }
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister src = ToFloatRegister(ins->src());
+ FloatRegister dest = ToFloatRegister(ins->output());
+ SimdConstant control = ins->control();
+ switch (ins->op()) {
+ case SimdPermuteOp::BROADCAST_8x16: {
+ const SimdConstant::I8x16& mask = control.asInt8x16();
+ int8_t source = mask[0];
+ masm.splatX16(source, src, dest);
+ break;
+ }
+ case SimdPermuteOp::BROADCAST_16x8: {
+ const SimdConstant::I16x8& mask = control.asInt16x8();
+ int16_t source = mask[0];
+ masm.splatX8(source, src, dest);
+ break;
+ }
+ case SimdPermuteOp::MOVE: {
+ masm.moveSimd128(src, dest);
+ break;
+ }
+ case SimdPermuteOp::PERMUTE_8x16: {
+ const SimdConstant::I8x16& mask = control.asInt8x16();
+# ifdef DEBUG
+ mozilla::DebugOnly<int> i;
+ for (i = 0; i < 16 && mask[i] == i; i++) {
+ }
+ MOZ_ASSERT(i < 16, "Should have been a MOVE operation");
+# endif
+ masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest);
+ break;
+ }
+ case SimdPermuteOp::PERMUTE_16x8: {
+ const SimdConstant::I16x8& mask = control.asInt16x8();
+# ifdef DEBUG
+ mozilla::DebugOnly<int> i;
+ for (i = 0; i < 8 && mask[i] == i; i++) {
+ }
+ MOZ_ASSERT(i < 8, "Should have been a MOVE operation");
+# endif
+ masm.permuteInt16x8(reinterpret_cast<const uint16_t*>(mask), src, dest);
+ break;
+ }
+ case SimdPermuteOp::PERMUTE_32x4: {
+ const SimdConstant::I32x4& mask = control.asInt32x4();
+# ifdef DEBUG
+ mozilla::DebugOnly<int> i;
+ for (i = 0; i < 4 && mask[i] == i; i++) {
+ }
+ MOZ_ASSERT(i < 4, "Should have been a MOVE operation");
+# endif
+ masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest);
+ break;
+ }
+ case SimdPermuteOp::ROTATE_RIGHT_8x16: {
+ int8_t count = control.asInt8x16()[0];
+ MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
+ masm.rotateRightSimd128(src, dest, count);
+ break;
+ }
+ case SimdPermuteOp::SHIFT_LEFT_8x16: {
+ int8_t count = control.asInt8x16()[0];
+ MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
+ masm.leftShiftSimd128(Imm32(count), src, dest);
+ break;
+ }
+ case SimdPermuteOp::SHIFT_RIGHT_8x16: {
+ int8_t count = control.asInt8x16()[0];
+ MOZ_ASSERT(count > 0, "Should have been a MOVE operation");
+ masm.rightShiftSimd128(Imm32(count), src, dest);
+ break;
+ }
+ case SimdPermuteOp::REVERSE_16x8:
+ masm.reverseInt16x8(src, dest);
+ break;
+ case SimdPermuteOp::REVERSE_32x4:
+ masm.reverseInt32x4(src, dest);
+ break;
+ case SimdPermuteOp::REVERSE_64x2:
+ masm.reverseInt64x2(src, dest);
+ break;
+ default: {
+ MOZ_CRASH("Unsupported SIMD permutation operation");
+ }
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ToFloatRegister(ins->lhs()) == ToFloatRegister(ins->output()));
+ FloatRegister lhsDest = ToFloatRegister(ins->lhs());
+ const LAllocation* rhs = ins->rhs();
+ uint32_t laneIndex = ins->laneIndex();
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16ReplaceLane:
+ masm.replaceLaneInt8x16(laneIndex, ToRegister(rhs), lhsDest);
+ break;
+ case wasm::SimdOp::I16x8ReplaceLane:
+ masm.replaceLaneInt16x8(laneIndex, ToRegister(rhs), lhsDest);
+ break;
+ case wasm::SimdOp::I32x4ReplaceLane:
+ masm.replaceLaneInt32x4(laneIndex, ToRegister(rhs), lhsDest);
+ break;
+ case wasm::SimdOp::F32x4ReplaceLane:
+ masm.replaceLaneFloat32x4(laneIndex, ToFloatRegister(rhs), lhsDest);
+ break;
+ case wasm::SimdOp::F64x2ReplaceLane:
+ masm.replaceLaneFloat64x2(laneIndex, ToFloatRegister(rhs), lhsDest);
+ break;
+ default:
+ MOZ_CRASH("ReplaceLane SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReplaceInt64LaneSimd128(
+ LWasmReplaceInt64LaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_RELEASE_ASSERT(ins->simdOp() == wasm::SimdOp::I64x2ReplaceLane);
+ MOZ_ASSERT(ToFloatRegister(ins->lhs()) == ToFloatRegister(ins->output()));
+ masm.replaceLaneInt64x2(ins->laneIndex(), ToRegister64(ins->rhs()),
+ ToFloatRegister(ins->lhs()));
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister dest = ToFloatRegister(ins->output());
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Splat:
+ masm.splatX16(ToRegister(ins->src()), dest);
+ break;
+ case wasm::SimdOp::I16x8Splat:
+ masm.splatX8(ToRegister(ins->src()), dest);
+ break;
+ case wasm::SimdOp::I32x4Splat:
+ masm.splatX4(ToRegister(ins->src()), dest);
+ break;
+ case wasm::SimdOp::F32x4Splat:
+ masm.splatX4(ToFloatRegister(ins->src()), dest);
+ break;
+ case wasm::SimdOp::F64x2Splat:
+ masm.splatX2(ToFloatRegister(ins->src()), dest);
+ break;
+ default:
+ MOZ_CRASH("ScalarToSimd128 SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ Register64 src = ToRegister64(ins->src());
+ FloatRegister dest = ToFloatRegister(ins->output());
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I64x2Splat:
+ masm.splatX2(src, dest);
+ break;
+ case wasm::SimdOp::V128Load8x8S:
+ masm.moveGPR64ToDouble(src, dest);
+ masm.widenLowInt8x16(dest, dest);
+ break;
+ case wasm::SimdOp::V128Load8x8U:
+ masm.moveGPR64ToDouble(src, dest);
+ masm.unsignedWidenLowInt8x16(dest, dest);
+ break;
+ case wasm::SimdOp::V128Load16x4S:
+ masm.moveGPR64ToDouble(src, dest);
+ masm.widenLowInt16x8(dest, dest);
+ break;
+ case wasm::SimdOp::V128Load16x4U:
+ masm.moveGPR64ToDouble(src, dest);
+ masm.unsignedWidenLowInt16x8(dest, dest);
+ break;
+ case wasm::SimdOp::V128Load32x2S:
+ masm.moveGPR64ToDouble(src, dest);
+ masm.widenLowInt32x4(dest, dest);
+ break;
+ case wasm::SimdOp::V128Load32x2U:
+ masm.moveGPR64ToDouble(src, dest);
+ masm.unsignedWidenLowInt32x4(dest, dest);
+ break;
+ default:
+ MOZ_CRASH("Int64ToSimd128 SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister src = ToFloatRegister(ins->src());
+ FloatRegister dest = ToFloatRegister(ins->output());
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Neg:
+ masm.negInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I16x8Neg:
+ masm.negInt16x8(src, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtendLowI8x16S:
+ masm.widenLowInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtendHighI8x16S:
+ masm.widenHighInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtendLowI8x16U:
+ masm.unsignedWidenLowInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtendHighI8x16U:
+ masm.unsignedWidenHighInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I32x4Neg:
+ masm.negInt32x4(src, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtendLowI16x8S:
+ masm.widenLowInt16x8(src, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtendHighI16x8S:
+ masm.widenHighInt16x8(src, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtendLowI16x8U:
+ masm.unsignedWidenLowInt16x8(src, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtendHighI16x8U:
+ masm.unsignedWidenHighInt16x8(src, dest);
+ break;
+ case wasm::SimdOp::I32x4TruncSatF32x4S:
+ masm.truncSatFloat32x4ToInt32x4(src, dest);
+ break;
+ case wasm::SimdOp::I32x4TruncSatF32x4U:
+ masm.unsignedTruncSatFloat32x4ToInt32x4(src, dest);
+ break;
+ case wasm::SimdOp::I64x2Neg:
+ masm.negInt64x2(src, dest);
+ break;
+ case wasm::SimdOp::I64x2ExtendLowI32x4S:
+ masm.widenLowInt32x4(src, dest);
+ break;
+ case wasm::SimdOp::I64x2ExtendHighI32x4S:
+ masm.widenHighInt32x4(src, dest);
+ break;
+ case wasm::SimdOp::I64x2ExtendLowI32x4U:
+ masm.unsignedWidenLowInt32x4(src, dest);
+ break;
+ case wasm::SimdOp::I64x2ExtendHighI32x4U:
+ masm.unsignedWidenHighInt32x4(src, dest);
+ break;
+ case wasm::SimdOp::F32x4Abs:
+ masm.absFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F32x4Neg:
+ masm.negFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F32x4Sqrt:
+ masm.sqrtFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F32x4ConvertI32x4S:
+ masm.convertInt32x4ToFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F32x4ConvertI32x4U:
+ masm.unsignedConvertInt32x4ToFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F64x2Abs:
+ masm.absFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::F64x2Neg:
+ masm.negFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::F64x2Sqrt:
+ masm.sqrtFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::V128Not:
+ masm.bitwiseNotSimd128(src, dest);
+ break;
+ case wasm::SimdOp::I8x16Abs:
+ masm.absInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I16x8Abs:
+ masm.absInt16x8(src, dest);
+ break;
+ case wasm::SimdOp::I32x4Abs:
+ masm.absInt32x4(src, dest);
+ break;
+ case wasm::SimdOp::I64x2Abs:
+ masm.absInt64x2(src, dest);
+ break;
+ case wasm::SimdOp::F32x4Ceil:
+ masm.ceilFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F32x4Floor:
+ masm.floorFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F32x4Trunc:
+ masm.truncFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F32x4Nearest:
+ masm.nearestFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F64x2Ceil:
+ masm.ceilFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::F64x2Floor:
+ masm.floorFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::F64x2Trunc:
+ masm.truncFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::F64x2Nearest:
+ masm.nearestFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::F32x4DemoteF64x2Zero:
+ masm.convertFloat64x2ToFloat32x4(src, dest);
+ break;
+ case wasm::SimdOp::F64x2PromoteLowF32x4:
+ masm.convertFloat32x4ToFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::F64x2ConvertLowI32x4S:
+ masm.convertInt32x4ToFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::F64x2ConvertLowI32x4U:
+ masm.unsignedConvertInt32x4ToFloat64x2(src, dest);
+ break;
+ case wasm::SimdOp::I32x4TruncSatF64x2SZero:
+ masm.truncSatFloat64x2ToInt32x4(src, dest, ToFloatRegister(ins->temp()));
+ break;
+ case wasm::SimdOp::I32x4TruncSatF64x2UZero:
+ masm.unsignedTruncSatFloat64x2ToInt32x4(src, dest,
+ ToFloatRegister(ins->temp()));
+ break;
+ case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
+ masm.extAddPairwiseInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
+ masm.unsignedExtAddPairwiseInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
+ masm.extAddPairwiseInt16x8(src, dest);
+ break;
+ case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
+ masm.unsignedExtAddPairwiseInt16x8(src, dest);
+ break;
+ case wasm::SimdOp::I8x16Popcnt:
+ masm.popcntInt8x16(src, dest);
+ break;
+ case wasm::SimdOp::I32x4RelaxedTruncF32x4S:
+ masm.truncFloat32x4ToInt32x4Relaxed(src, dest);
+ break;
+ case wasm::SimdOp::I32x4RelaxedTruncF32x4U:
+ masm.unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest);
+ break;
+ case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero:
+ masm.truncFloat64x2ToInt32x4Relaxed(src, dest);
+ break;
+ case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero:
+ masm.unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest);
+ break;
+ default:
+ MOZ_CRASH("Unary SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister src = ToFloatRegister(ins->src());
+ const LDefinition* dest = ins->output();
+ uint32_t imm = ins->imm();
+ FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->getTemp(0));
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::V128AnyTrue:
+ masm.anyTrueSimd128(src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I8x16AllTrue:
+ masm.allTrueInt8x16(src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I16x8AllTrue:
+ masm.allTrueInt16x8(src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I32x4AllTrue:
+ masm.allTrueInt32x4(src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I64x2AllTrue:
+ masm.allTrueInt64x2(src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I8x16Bitmask:
+ masm.bitmaskInt8x16(src, ToRegister(dest), temp);
+ break;
+ case wasm::SimdOp::I16x8Bitmask:
+ masm.bitmaskInt16x8(src, ToRegister(dest), temp);
+ break;
+ case wasm::SimdOp::I32x4Bitmask:
+ masm.bitmaskInt32x4(src, ToRegister(dest), temp);
+ break;
+ case wasm::SimdOp::I64x2Bitmask:
+ masm.bitmaskInt64x2(src, ToRegister(dest), temp);
+ break;
+ case wasm::SimdOp::I8x16ExtractLaneS:
+ masm.extractLaneInt8x16(imm, src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I8x16ExtractLaneU:
+ masm.unsignedExtractLaneInt8x16(imm, src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I16x8ExtractLaneS:
+ masm.extractLaneInt16x8(imm, src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I16x8ExtractLaneU:
+ masm.unsignedExtractLaneInt16x8(imm, src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::I32x4ExtractLane:
+ masm.extractLaneInt32x4(imm, src, ToRegister(dest));
+ break;
+ case wasm::SimdOp::F32x4ExtractLane:
+ masm.extractLaneFloat32x4(imm, src, ToFloatRegister(dest));
+ break;
+ case wasm::SimdOp::F64x2ExtractLane:
+ masm.extractLaneFloat64x2(imm, src, ToFloatRegister(dest));
+ break;
+ default:
+ MOZ_CRASH("Reduce SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReduceAndBranchSimd128(
+ LWasmReduceAndBranchSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister src = ToFloatRegister(ins->src());
+
+ ScratchSimd128Scope scratch(masm);
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const Register test = temps.AcquireX().asUnsized();
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::V128AnyTrue:
+ masm.Addp(Simd1D(scratch), Simd2D(src));
+ masm.Umov(ARMRegister(test, 64), Simd1D(scratch), 0);
+ masm.branch64(Assembler::Equal, Register64(test), Imm64(0),
+ getJumpLabelForBranch(ins->ifFalse()));
+ jumpToBlock(ins->ifTrue());
+ break;
+ case wasm::SimdOp::I8x16AllTrue:
+ case wasm::SimdOp::I16x8AllTrue:
+ case wasm::SimdOp::I32x4AllTrue:
+ case wasm::SimdOp::I64x2AllTrue: {
+ // Compare all lanes to zero.
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16AllTrue:
+ masm.Cmeq(Simd16B(scratch), Simd16B(src), 0);
+ break;
+ case wasm::SimdOp::I16x8AllTrue:
+ masm.Cmeq(Simd8H(scratch), Simd8H(src), 0);
+ break;
+ case wasm::SimdOp::I32x4AllTrue:
+ masm.Cmeq(Simd4S(scratch), Simd4S(src), 0);
+ break;
+ case wasm::SimdOp::I64x2AllTrue:
+ masm.Cmeq(Simd2D(scratch), Simd2D(src), 0);
+ break;
+ default:
+ MOZ_CRASH();
+ }
+ masm.Addp(Simd1D(scratch), Simd2D(scratch));
+ masm.Umov(ARMRegister(test, 64), Simd1D(scratch), 0);
+ masm.branch64(Assembler::NotEqual, Register64(test), Imm64(0),
+ getJumpLabelForBranch(ins->ifFalse()));
+ jumpToBlock(ins->ifTrue());
+ break;
+ }
+ default:
+ MOZ_CRASH("Reduce-and-branch SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmReduceSimd128ToInt64(
+ LWasmReduceSimd128ToInt64* ins) {
+#ifdef ENABLE_WASM_SIMD
+ FloatRegister src = ToFloatRegister(ins->src());
+ Register64 dest = ToOutRegister64(ins);
+ uint32_t imm = ins->imm();
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I64x2ExtractLane:
+ masm.extractLaneInt64x2(imm, src, dest);
+ break;
+ default:
+ MOZ_CRASH("Reduce SimdOp not implemented");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+static inline wasm::MemoryAccessDesc DeriveMemoryAccessDesc(
+ const wasm::MemoryAccessDesc& access, Scalar::Type type) {
+ return wasm::MemoryAccessDesc(type, access.align(), access.offset(),
+ access.trapOffset());
+}
+
+void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ // Forward loading to wasmLoad, and use replaceLane after that.
+ const MWasmLoadLaneSimd128* mir = ins->mir();
+ Register temp = ToRegister(ins->temp());
+ FloatRegister src = ToFloatRegister(ins->src());
+ FloatRegister dest = ToFloatRegister(ins->output());
+ // replaceLane takes an lhsDest argument.
+ masm.moveSimd128(src, dest);
+ switch (ins->laneSize()) {
+ case 1: {
+ masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int8),
+ HeapReg, ToRegister(ins->ptr()), AnyRegister(temp));
+ masm.replaceLaneInt8x16(ins->laneIndex(), temp, dest);
+ break;
+ }
+ case 2: {
+ masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int16),
+ HeapReg, ToRegister(ins->ptr()), AnyRegister(temp));
+ masm.replaceLaneInt16x8(ins->laneIndex(), temp, dest);
+ break;
+ }
+ case 4: {
+ masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int32),
+ HeapReg, ToRegister(ins->ptr()), AnyRegister(temp));
+ masm.replaceLaneInt32x4(ins->laneIndex(), temp, dest);
+ break;
+ }
+ case 8: {
+ masm.wasmLoadI64(DeriveMemoryAccessDesc(mir->access(), Scalar::Int64),
+ HeapReg, ToRegister(ins->ptr()), Register64(temp));
+ masm.replaceLaneInt64x2(ins->laneIndex(), Register64(temp), dest);
+ break;
+ }
+ default:
+ MOZ_CRASH("Unsupported load lane size");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ // Forward storing to wasmStore for the result of extractLane.
+ const MWasmStoreLaneSimd128* mir = ins->mir();
+ Register temp = ToRegister(ins->temp());
+ FloatRegister src = ToFloatRegister(ins->src());
+ switch (ins->laneSize()) {
+ case 1: {
+ masm.extractLaneInt8x16(ins->laneIndex(), src, temp);
+ masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int8),
+ AnyRegister(temp), HeapReg, ToRegister(ins->ptr()));
+ break;
+ }
+ case 2: {
+ masm.extractLaneInt16x8(ins->laneIndex(), src, temp);
+ masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int16),
+ AnyRegister(temp), HeapReg, ToRegister(ins->ptr()));
+ break;
+ }
+ case 4: {
+ masm.extractLaneInt32x4(ins->laneIndex(), src, temp);
+ masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int32),
+ AnyRegister(temp), HeapReg, ToRegister(ins->ptr()));
+ break;
+ }
+ case 8: {
+ masm.extractLaneInt64x2(ins->laneIndex(), src, Register64(temp));
+ masm.wasmStoreI64(DeriveMemoryAccessDesc(mir->access(), Scalar::Int64),
+ Register64(temp), HeapReg, ToRegister(ins->ptr()));
+ break;
+ }
+ default:
+ MOZ_CRASH("Unsupported store lane size");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
diff --git a/js/src/jit/arm64/CodeGenerator-arm64.h b/js/src/jit/arm64/CodeGenerator-arm64.h
new file mode 100644
index 0000000000..43cd24fddf
--- /dev/null
+++ b/js/src/jit/arm64/CodeGenerator-arm64.h
@@ -0,0 +1,135 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_CodeGenerator_arm64_h
+#define jit_arm64_CodeGenerator_arm64_h
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/shared/CodeGenerator-shared.h"
+
+namespace js {
+namespace jit {
+
+class CodeGeneratorARM64;
+class OutOfLineBailout;
+class OutOfLineTableSwitch;
+
+using OutOfLineWasmTruncateCheck =
+ OutOfLineWasmTruncateCheckBase<CodeGeneratorARM64>;
+
+class CodeGeneratorARM64 : public CodeGeneratorShared {
+ friend class MoveResolverARM64;
+
+ protected:
+ CodeGeneratorARM64(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
+
+ NonAssertingLabel deoptLabel_;
+
+ MoveOperand toMoveOperand(const LAllocation a) const;
+
+ void bailoutIf(Assembler::Condition condition, LSnapshot* snapshot);
+ void bailoutFrom(Label* label, LSnapshot* snapshot);
+ void bailout(LSnapshot* snapshot);
+
+ template <typename T1, typename T2>
+ void bailoutCmpPtr(Assembler::Condition c, T1 lhs, T2 rhs,
+ LSnapshot* snapshot) {
+ masm.cmpPtr(lhs, rhs);
+ return bailoutIf(c, snapshot);
+ }
+ void bailoutTestPtr(Assembler::Condition c, Register lhs, Register rhs,
+ LSnapshot* snapshot) {
+ masm.testPtr(lhs, rhs);
+ return bailoutIf(c, snapshot);
+ }
+ template <typename T1, typename T2>
+ void bailoutCmp32(Assembler::Condition c, T1 lhs, T2 rhs,
+ LSnapshot* snapshot) {
+ masm.cmp32(lhs, rhs);
+ return bailoutIf(c, snapshot);
+ }
+ template <typename T1, typename T2>
+ void bailoutTest32(Assembler::Condition c, T1 lhs, T2 rhs,
+ LSnapshot* snapshot) {
+ masm.test32(lhs, rhs);
+ return bailoutIf(c, snapshot);
+ }
+ void bailoutIfFalseBool(Register reg, LSnapshot* snapshot) {
+ masm.test32(reg, Imm32(0xFF));
+ return bailoutIf(Assembler::Zero, snapshot);
+ }
+
+ bool generateOutOfLineCode();
+
+ // Emits a branch that directs control flow to the true block if |cond| is
+ // true, and the false block if |cond| is false.
+ void emitBranch(Assembler::Condition cond, MBasicBlock* ifTrue,
+ MBasicBlock* ifFalse);
+
+ void testNullEmitBranch(Assembler::Condition cond, const ValueOperand& value,
+ MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+ cond = masm.testNull(cond, value);
+ emitBranch(cond, ifTrue, ifFalse);
+ }
+ void testUndefinedEmitBranch(Assembler::Condition cond,
+ const ValueOperand& value, MBasicBlock* ifTrue,
+ MBasicBlock* ifFalse) {
+ cond = masm.testUndefined(cond, value);
+ emitBranch(cond, ifTrue, ifFalse);
+ }
+ void testObjectEmitBranch(Assembler::Condition cond,
+ const ValueOperand& value, MBasicBlock* ifTrue,
+ MBasicBlock* ifFalse) {
+ cond = masm.testObject(cond, value);
+ emitBranch(cond, ifTrue, ifFalse);
+ }
+ void testZeroEmitBranch(Assembler::Condition cond, Register reg,
+ MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+ MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+ masm.cmpPtr(reg, ImmWord(0));
+ emitBranch(cond, ifTrue, ifFalse);
+ }
+
+ void emitTableSwitchDispatch(MTableSwitch* mir, Register index,
+ Register base);
+
+ void emitBigIntDiv(LBigIntDiv* ins, Register dividend, Register divisor,
+ Register output, Label* fail);
+ void emitBigIntMod(LBigIntMod* ins, Register dividend, Register divisor,
+ Register output, Label* fail);
+ void emitSimpleBinaryI64(
+ LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* lir, JSOp op);
+
+ ValueOperand ToValue(LInstruction* ins, size_t pos);
+ ValueOperand ToTempValue(LInstruction* ins, size_t pos);
+
+ void generateInvalidateEpilogue();
+
+ public:
+ void visitOutOfLineBailout(OutOfLineBailout* ool);
+ void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool);
+ void visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool);
+};
+
+typedef CodeGeneratorARM64 CodeGeneratorSpecific;
+
+// An out-of-line bailout thunk.
+class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM64> {
+ protected: // Silence Clang warning.
+ LSnapshot* snapshot_;
+
+ public:
+ explicit OutOfLineBailout(LSnapshot* snapshot) : snapshot_(snapshot) {}
+
+ void accept(CodeGeneratorARM64* codegen) override;
+
+ LSnapshot* snapshot() const { return snapshot_; }
+};
+
+} // namespace jit
+} // namespace js
+
+#endif /* jit_arm64_CodeGenerator_arm64_h */
diff --git a/js/src/jit/arm64/LIR-arm64.h b/js/src/jit/arm64/LIR-arm64.h
new file mode 100644
index 0000000000..d825209b1e
--- /dev/null
+++ b/js/src/jit/arm64/LIR-arm64.h
@@ -0,0 +1,373 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_LIR_arm64_h
+#define jit_arm64_LIR_arm64_h
+
+namespace js {
+namespace jit {
+
+class LUnboxBase : public LInstructionHelper<1, 1, 0> {
+ public:
+ LUnboxBase(LNode::Opcode opcode, const LAllocation& input)
+ : LInstructionHelper(opcode) {
+ setOperand(0, input);
+ }
+
+ static const size_t Input = 0;
+
+ MUnbox* mir() const { return mir_->toUnbox(); }
+};
+
+class LUnbox : public LUnboxBase {
+ public:
+ LIR_HEADER(Unbox);
+
+ explicit LUnbox(const LAllocation& input) : LUnboxBase(classOpcode, input) {}
+
+ const char* extraName() const { return StringFromMIRType(mir()->type()); }
+};
+
+class LUnboxFloatingPoint : public LUnboxBase {
+ MIRType type_;
+
+ public:
+ LIR_HEADER(UnboxFloatingPoint);
+
+ LUnboxFloatingPoint(const LAllocation& input, MIRType type)
+ : LUnboxBase(classOpcode, input), type_(type) {}
+
+ MIRType type() const { return type_; }
+ const char* extraName() const { return StringFromMIRType(type_); }
+};
+
+// Convert a 32-bit unsigned integer to a double.
+class LWasmUint32ToDouble : public LInstructionHelper<1, 1, 0> {
+ public:
+ LIR_HEADER(WasmUint32ToDouble)
+
+ explicit LWasmUint32ToDouble(const LAllocation& input)
+ : LInstructionHelper(classOpcode) {
+ setOperand(0, input);
+ }
+};
+
+// Convert a 32-bit unsigned integer to a float32.
+class LWasmUint32ToFloat32 : public LInstructionHelper<1, 1, 0> {
+ public:
+ LIR_HEADER(WasmUint32ToFloat32)
+
+ explicit LWasmUint32ToFloat32(const LAllocation& input)
+ : LInstructionHelper(classOpcode) {
+ setOperand(0, input);
+ }
+};
+
+class LDivI : public LBinaryMath<1> {
+ public:
+ LIR_HEADER(DivI);
+
+ LDivI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp)
+ : LBinaryMath(classOpcode) {
+ setOperand(0, lhs);
+ setOperand(1, rhs);
+ setTemp(0, temp);
+ }
+
+ MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LDivPowTwoI : public LInstructionHelper<1, 1, 0> {
+ const int32_t shift_;
+ const bool negativeDivisor_;
+
+ public:
+ LIR_HEADER(DivPowTwoI)
+
+ LDivPowTwoI(const LAllocation& lhs, int32_t shift, bool negativeDivisor)
+ : LInstructionHelper(classOpcode),
+ shift_(shift),
+ negativeDivisor_(negativeDivisor) {
+ setOperand(0, lhs);
+ }
+
+ const LAllocation* numerator() { return getOperand(0); }
+
+ int32_t shift() { return shift_; }
+ bool negativeDivisor() { return negativeDivisor_; }
+
+ MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LDivConstantI : public LInstructionHelper<1, 1, 1> {
+ const int32_t denominator_;
+
+ public:
+ LIR_HEADER(DivConstantI)
+
+ LDivConstantI(const LAllocation& lhs, int32_t denominator,
+ const LDefinition& temp)
+ : LInstructionHelper(classOpcode), denominator_(denominator) {
+ setOperand(0, lhs);
+ setTemp(0, temp);
+ }
+
+ const LAllocation* numerator() { return getOperand(0); }
+ const LDefinition* temp() { return getTemp(0); }
+ int32_t denominator() const { return denominator_; }
+ MDiv* mir() const { return mir_->toDiv(); }
+ bool canBeNegativeDividend() const { return mir()->canBeNegativeDividend(); }
+};
+
+class LUDivConstantI : public LInstructionHelper<1, 1, 1> {
+ const int32_t denominator_;
+
+ public:
+ LIR_HEADER(UDivConstantI)
+
+ LUDivConstantI(const LAllocation& lhs, int32_t denominator,
+ const LDefinition& temp)
+ : LInstructionHelper(classOpcode), denominator_(denominator) {
+ setOperand(0, lhs);
+ setTemp(0, temp);
+ }
+
+ const LAllocation* numerator() { return getOperand(0); }
+ const LDefinition* temp() { return getTemp(0); }
+ int32_t denominator() const { return denominator_; }
+ MDiv* mir() const { return mir_->toDiv(); }
+};
+
+class LModI : public LBinaryMath<0> {
+ public:
+ LIR_HEADER(ModI);
+
+ LModI(const LAllocation& lhs, const LAllocation& rhs)
+ : LBinaryMath(classOpcode) {
+ setOperand(0, lhs);
+ setOperand(1, rhs);
+ }
+
+ MMod* mir() const { return mir_->toMod(); }
+};
+
+class LModPowTwoI : public LInstructionHelper<1, 1, 0> {
+ const int32_t shift_;
+
+ public:
+ LIR_HEADER(ModPowTwoI);
+ int32_t shift() { return shift_; }
+
+ LModPowTwoI(const LAllocation& lhs, int32_t shift)
+ : LInstructionHelper(classOpcode), shift_(shift) {
+ setOperand(0, lhs);
+ }
+
+ MMod* mir() const { return mir_->toMod(); }
+};
+
+class LModMaskI : public LInstructionHelper<1, 1, 2> {
+ const int32_t shift_;
+
+ public:
+ LIR_HEADER(ModMaskI);
+
+ LModMaskI(const LAllocation& lhs, const LDefinition& temp1,
+ const LDefinition& temp2, int32_t shift)
+ : LInstructionHelper(classOpcode), shift_(shift) {
+ setOperand(0, lhs);
+ setTemp(0, temp1);
+ setTemp(1, temp2);
+ }
+
+ int32_t shift() const { return shift_; }
+
+ MMod* mir() const { return mir_->toMod(); }
+};
+
+// Takes a tableswitch with an integer to decide
+class LTableSwitch : public LInstructionHelper<0, 1, 2> {
+ public:
+ LIR_HEADER(TableSwitch);
+
+ LTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
+ const LDefinition& jumpTablePointer, MTableSwitch* ins)
+ : LInstructionHelper(classOpcode) {
+ setOperand(0, in);
+ setTemp(0, inputCopy);
+ setTemp(1, jumpTablePointer);
+ setMir(ins);
+ }
+
+ MTableSwitch* mir() const { return mir_->toTableSwitch(); }
+
+ const LAllocation* index() { return getOperand(0); }
+ const LDefinition* tempInt() { return getTemp(0); }
+ // This is added to share the same CodeGenerator prefixes.
+ const LDefinition* tempPointer() { return getTemp(1); }
+};
+
+// Takes a tableswitch with an integer to decide
+class LTableSwitchV : public LInstructionHelper<0, BOX_PIECES, 3> {
+ public:
+ LIR_HEADER(TableSwitchV);
+
+ LTableSwitchV(const LBoxAllocation& input, const LDefinition& inputCopy,
+ const LDefinition& floatCopy,
+ const LDefinition& jumpTablePointer, MTableSwitch* ins)
+ : LInstructionHelper(classOpcode) {
+ setBoxOperand(InputValue, input);
+ setTemp(0, inputCopy);
+ setTemp(1, floatCopy);
+ setTemp(2, jumpTablePointer);
+ setMir(ins);
+ }
+
+ MTableSwitch* mir() const { return mir_->toTableSwitch(); }
+
+ static const size_t InputValue = 0;
+
+ const LDefinition* tempInt() { return getTemp(0); }
+ const LDefinition* tempFloat() { return getTemp(1); }
+ const LDefinition* tempPointer() { return getTemp(2); }
+};
+
+class LMulI : public LBinaryMath<0> {
+ public:
+ LIR_HEADER(MulI);
+
+ LMulI() : LBinaryMath(classOpcode) {}
+
+ MMul* mir() { return mir_->toMul(); }
+};
+
+class LUDiv : public LBinaryMath<1> {
+ public:
+ LIR_HEADER(UDiv);
+
+ LUDiv(const LAllocation& lhs, const LAllocation& rhs,
+ const LDefinition& remainder)
+ : LBinaryMath(classOpcode) {
+ setOperand(0, lhs);
+ setOperand(1, rhs);
+ setTemp(0, remainder);
+ }
+
+ const LDefinition* remainder() { return getTemp(0); }
+
+ MDiv* mir() { return mir_->toDiv(); }
+};
+
+class LUMod : public LBinaryMath<0> {
+ public:
+ LIR_HEADER(UMod);
+
+ LUMod(const LAllocation& lhs, const LAllocation& rhs)
+ : LBinaryMath(classOpcode) {
+ setOperand(0, lhs);
+ setOperand(1, rhs);
+ }
+
+ MMod* mir() { return mir_->toMod(); }
+};
+
+class LInt64ToFloatingPoint : public LInstructionHelper<1, 1, 0> {
+ public:
+ LIR_HEADER(Int64ToFloatingPoint);
+
+ explicit LInt64ToFloatingPoint(const LInt64Allocation& in)
+ : LInstructionHelper(classOpcode) {
+ setInt64Operand(0, in);
+ }
+
+ MInt64ToFloatingPoint* mir() const { return mir_->toInt64ToFloatingPoint(); }
+};
+
+class LWasmTruncateToInt64 : public LInstructionHelper<1, 1, 0> {
+ public:
+ LIR_HEADER(WasmTruncateToInt64);
+
+ explicit LWasmTruncateToInt64(const LAllocation& in)
+ : LInstructionHelper(classOpcode) {
+ setOperand(0, in);
+ }
+
+ MWasmTruncateToInt64* mir() const { return mir_->toWasmTruncateToInt64(); }
+};
+
+class LDivOrModI64 : public LBinaryMath<0> {
+ public:
+ LIR_HEADER(DivOrModI64)
+
+ LDivOrModI64(const LAllocation& lhs, const LAllocation& rhs)
+ : LBinaryMath(classOpcode) {
+ setOperand(0, lhs);
+ setOperand(1, rhs);
+ }
+
+ MBinaryArithInstruction* mir() const {
+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
+ return static_cast<MBinaryArithInstruction*>(mir_);
+ }
+
+ bool canBeDivideByZero() const {
+ if (mir_->isMod()) {
+ return mir_->toMod()->canBeDivideByZero();
+ }
+ return mir_->toDiv()->canBeDivideByZero();
+ }
+ bool canBeNegativeOverflow() const {
+ if (mir_->isMod()) {
+ return mir_->toMod()->canBeNegativeDividend();
+ }
+ return mir_->toDiv()->canBeNegativeOverflow();
+ }
+ wasm::BytecodeOffset bytecodeOffset() const {
+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
+ if (mir_->isMod()) {
+ return mir_->toMod()->bytecodeOffset();
+ }
+ return mir_->toDiv()->bytecodeOffset();
+ }
+};
+
+class LUDivOrModI64 : public LBinaryMath<0> {
+ public:
+ LIR_HEADER(UDivOrModI64);
+
+ LUDivOrModI64(const LAllocation& lhs, const LAllocation& rhs)
+ : LBinaryMath(classOpcode) {
+ setOperand(0, lhs);
+ setOperand(1, rhs);
+ }
+
+ const char* extraName() const {
+ return mir()->isTruncated() ? "Truncated" : nullptr;
+ }
+
+ MBinaryArithInstruction* mir() const {
+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
+ return static_cast<MBinaryArithInstruction*>(mir_);
+ }
+ bool canBeDivideByZero() const {
+ if (mir_->isMod()) {
+ return mir_->toMod()->canBeDivideByZero();
+ }
+ return mir_->toDiv()->canBeDivideByZero();
+ }
+ wasm::BytecodeOffset bytecodeOffset() const {
+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
+ if (mir_->isMod()) {
+ return mir_->toMod()->bytecodeOffset();
+ }
+ return mir_->toDiv()->bytecodeOffset();
+ }
+};
+
+} // namespace jit
+} // namespace js
+
+#endif /* jit_arm64_LIR_arm64_h */
diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp
new file mode 100644
index 0000000000..d71f22089d
--- /dev/null
+++ b/js/src/jit/arm64/Lowering-arm64.cpp
@@ -0,0 +1,1438 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/Lowering-arm64.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/Lowering.h"
+#include "jit/MIR.h"
+#include "jit/shared/Lowering-shared-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::FloorLog2;
+
+LBoxAllocation LIRGeneratorARM64::useBoxFixed(MDefinition* mir, Register reg1,
+ Register, bool useAtStart) {
+ MOZ_ASSERT(mir->type() == MIRType::Value);
+
+ ensureDefined(mir);
+ return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart));
+}
+
+LAllocation LIRGeneratorARM64::useByteOpRegister(MDefinition* mir) {
+ return useRegister(mir);
+}
+
+LAllocation LIRGeneratorARM64::useByteOpRegisterAtStart(MDefinition* mir) {
+ return useRegisterAtStart(mir);
+}
+
+LAllocation LIRGeneratorARM64::useByteOpRegisterOrNonDoubleConstant(
+ MDefinition* mir) {
+ return useRegisterOrNonDoubleConstant(mir);
+}
+
+LDefinition LIRGeneratorARM64::tempByteOpRegister() { return temp(); }
+
+LDefinition LIRGeneratorARM64::tempToUnbox() { return temp(); }
+
+void LIRGenerator::visitBox(MBox* box) {
+ MDefinition* opd = box->getOperand(0);
+
+ // If the operand is a constant, emit near its uses.
+ if (opd->isConstant() && box->canEmitAtUses()) {
+ emitAtUses(box);
+ return;
+ }
+
+ if (opd->isConstant()) {
+ define(new (alloc()) LValue(opd->toConstant()->toJSValue()), box,
+ LDefinition(LDefinition::BOX));
+ } else {
+ LBox* ins = new (alloc()) LBox(useRegister(opd), opd->type());
+ define(ins, box, LDefinition(LDefinition::BOX));
+ }
+}
+
+void LIRGenerator::visitUnbox(MUnbox* unbox) {
+ MDefinition* box = unbox->getOperand(0);
+ MOZ_ASSERT(box->type() == MIRType::Value);
+
+ LUnboxBase* lir;
+ if (IsFloatingPointType(unbox->type())) {
+ lir = new (alloc())
+ LUnboxFloatingPoint(useRegisterAtStart(box), unbox->type());
+ } else if (unbox->fallible()) {
+ // If the unbox is fallible, load the Value in a register first to
+ // avoid multiple loads.
+ lir = new (alloc()) LUnbox(useRegisterAtStart(box));
+ } else {
+ // FIXME: It should be possible to useAtStart() here, but the DEBUG
+ // code in CodeGenerator::visitUnbox() needs to handle non-Register
+ // cases. ARM64 doesn't have an Operand type.
+ lir = new (alloc()) LUnbox(useRegisterAtStart(box));
+ }
+
+ if (unbox->fallible()) {
+ assignSnapshot(lir, unbox->bailoutKind());
+ }
+
+ define(lir, unbox);
+}
+
+void LIRGenerator::visitReturnImpl(MDefinition* opd, bool isGenerator) {
+ MOZ_ASSERT(opd->type() == MIRType::Value);
+
+ LReturn* ins = new (alloc()) LReturn(isGenerator);
+ ins->setOperand(0, useFixed(opd, JSReturnReg));
+ add(ins);
+}
+
+// x = !y
+void LIRGeneratorARM64::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
+ MDefinition* mir, MDefinition* input) {
+ ins->setOperand(
+ 0, ins->snapshot() ? useRegister(input) : useRegisterAtStart(input));
+ define(
+ ins, mir,
+ LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+// z = x+y
+void LIRGeneratorARM64::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ ins->setOperand(0,
+ ins->snapshot() ? useRegister(lhs) : useRegisterAtStart(lhs));
+ ins->setOperand(1, ins->snapshot() ? useRegisterOrConstant(rhs)
+ : useRegisterOrConstantAtStart(rhs));
+ define(
+ ins, mir,
+ LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
+ MDefinition* mir, MDefinition* input) {
+ ins->setOperand(0, useRegisterAtStart(input));
+ define(
+ ins, mir,
+ LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+template <size_t Temps>
+void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+ ins->setOperand(1, useRegisterAtStart(rhs));
+ define(
+ ins, mir,
+ LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
+}
+
+template void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs);
+template void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, 1>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs);
+
+void LIRGeneratorARM64::lowerForALUInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, MDefinition* mir,
+ MDefinition* input) {
+ ins->setInt64Operand(0, useInt64RegisterAtStart(input));
+ defineInt64(ins, mir);
+}
+
+// These all currently have codegen that depends on reuse but only because the
+// masm API depends on that. We need new three-address masm APIs, for both
+// constant and variable rhs.
+//
+// MAdd => LAddI64
+// MSub => LSubI64
+// MBitAnd, MBitOr, MBitXor => LBitOpI64
+void LIRGeneratorARM64::lowerForALUInt64(
+ LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+ ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+ ins->setInt64Operand(INT64_PIECES, useInt64RegisterOrConstantAtStart(rhs));
+ defineInt64(ins, mir);
+}
+
+void LIRGeneratorARM64::lowerForMulInt64(LMulI64* ins, MMul* mir,
+ MDefinition* lhs, MDefinition* rhs) {
+ ins->setInt64Operand(LMulI64::Lhs, useInt64RegisterAtStart(lhs));
+ ins->setInt64Operand(LMulI64::Rhs, useInt64RegisterOrConstantAtStart(rhs));
+ defineInt64(ins, mir);
+}
+
+template <size_t Temps>
+void LIRGeneratorARM64::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+ ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+
+ static_assert(LShiftI64::Rhs == INT64_PIECES,
+ "Assume Rhs is located at INT64_PIECES.");
+ static_assert(LRotateI64::Count == INT64_PIECES,
+ "Assume Count is located at INT64_PIECES.");
+
+ ins->setOperand(INT64_PIECES, useRegisterOrConstantAtStart(rhs));
+ defineInt64(ins, mir);
+}
+
+template void LIRGeneratorARM64::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorARM64::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+void LIRGeneratorARM64::lowerForCompareI64AndBranch(MTest* mir, MCompare* comp,
+ JSOp op, MDefinition* left,
+ MDefinition* right,
+ MBasicBlock* ifTrue,
+ MBasicBlock* ifFalse) {
+ auto* lir = new (alloc())
+ LCompareI64AndBranch(comp, op, useInt64Register(left),
+ useInt64RegisterOrConstant(right), ifTrue, ifFalse);
+ add(lir, mir);
+}
+
+void LIRGeneratorARM64::lowerForBitAndAndBranch(LBitAndAndBranch* baab,
+ MInstruction* mir,
+ MDefinition* lhs,
+ MDefinition* rhs) {
+ baab->setOperand(0, useRegisterAtStart(lhs));
+ baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
+ add(baab, mir);
+}
+
+void LIRGeneratorARM64::lowerWasmBuiltinTruncateToInt32(
+ MWasmBuiltinTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+ if (opd->type() == MIRType::Double) {
+ define(new (alloc()) LWasmBuiltinTruncateDToInt32(
+ useRegister(opd), useFixed(ins->instance(), InstanceReg),
+ LDefinition::BogusTemp()),
+ ins);
+ return;
+ }
+
+ define(new (alloc()) LWasmBuiltinTruncateFToInt32(
+ useRegister(opd), useFixed(ins->instance(), InstanceReg),
+ LDefinition::BogusTemp()),
+ ins);
+}
+
+void LIRGeneratorARM64::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition,
+ LBlock* block, size_t lirIndex) {
+ lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
+}
+
+void LIRGeneratorARM64::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ ins->setOperand(0, useRegister(lhs));
+ ins->setOperand(1, useRegisterOrConstant(rhs));
+ define(ins, mir);
+}
+
+void LIRGeneratorARM64::lowerDivI(MDiv* div) {
+ if (div->isUnsigned()) {
+ lowerUDiv(div);
+ return;
+ }
+
+ if (div->rhs()->isConstant()) {
+ LAllocation lhs = useRegister(div->lhs());
+ int32_t rhs = div->rhs()->toConstant()->toInt32();
+ int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+ if (rhs != 0 && uint32_t(1) << shift == mozilla::Abs(rhs)) {
+ LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, shift, rhs < 0);
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ define(lir, div);
+ return;
+ }
+ if (rhs != 0) {
+ LDivConstantI* lir = new (alloc()) LDivConstantI(lhs, rhs, temp());
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ define(lir, div);
+ return;
+ }
+ }
+
+ LDivI* lir = new (alloc())
+ LDivI(useRegister(div->lhs()), useRegister(div->rhs()), temp());
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ define(lir, div);
+}
+
+void LIRGeneratorARM64::lowerNegI(MInstruction* ins, MDefinition* input) {
+ define(new (alloc()) LNegI(useRegisterAtStart(input)), ins);
+}
+
+void LIRGeneratorARM64::lowerNegI64(MInstruction* ins, MDefinition* input) {
+ defineInt64(new (alloc()) LNegI64(useInt64RegisterAtStart(input)), ins);
+}
+
+void LIRGeneratorARM64::lowerMulI(MMul* mul, MDefinition* lhs,
+ MDefinition* rhs) {
+ LMulI* lir = new (alloc()) LMulI;
+ if (mul->fallible()) {
+ assignSnapshot(lir, mul->bailoutKind());
+ }
+ lowerForALU(lir, mul, lhs, rhs);
+}
+
+void LIRGeneratorARM64::lowerModI(MMod* mod) {
+ if (mod->isUnsigned()) {
+ lowerUMod(mod);
+ return;
+ }
+
+ if (mod->rhs()->isConstant()) {
+ int32_t rhs = mod->rhs()->toConstant()->toInt32();
+ int32_t shift = FloorLog2(rhs);
+ if (rhs > 0 && 1 << shift == rhs) {
+ LModPowTwoI* lir =
+ new (alloc()) LModPowTwoI(useRegister(mod->lhs()), shift);
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ define(lir, mod);
+ return;
+ } else if (shift < 31 && (1 << (shift + 1)) - 1 == rhs) {
+ LModMaskI* lir = new (alloc())
+ LModMaskI(useRegister(mod->lhs()), temp(), temp(), shift + 1);
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ define(lir, mod);
+ }
+ }
+
+ LModI* lir =
+ new (alloc()) LModI(useRegister(mod->lhs()), useRegister(mod->rhs()));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ define(lir, mod);
+}
+
+void LIRGeneratorARM64::lowerDivI64(MDiv* div) {
+ if (div->isUnsigned()) {
+ lowerUDivI64(div);
+ return;
+ }
+
+ LDivOrModI64* lir = new (alloc())
+ LDivOrModI64(useRegister(div->lhs()), useRegister(div->rhs()));
+ defineInt64(lir, div);
+}
+
+void LIRGeneratorARM64::lowerUDivI64(MDiv* div) {
+ LUDivOrModI64* lir = new (alloc())
+ LUDivOrModI64(useRegister(div->lhs()), useRegister(div->rhs()));
+ defineInt64(lir, div);
+}
+
+void LIRGeneratorARM64::lowerUModI64(MMod* mod) {
+ LUDivOrModI64* lir = new (alloc())
+ LUDivOrModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
+ defineInt64(lir, mod);
+}
+
+void LIRGeneratorARM64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
+ MOZ_CRASH("We don't use runtime div for this architecture");
+}
+
+void LIRGeneratorARM64::lowerModI64(MMod* mod) {
+ if (mod->isUnsigned()) {
+ lowerUModI64(mod);
+ return;
+ }
+
+ LDivOrModI64* lir = new (alloc())
+ LDivOrModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
+ defineInt64(lir, mod);
+}
+
+void LIRGeneratorARM64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
+ MOZ_CRASH("We don't use runtime mod for this architecture");
+}
+
+void LIRGenerator::visitPowHalf(MPowHalf* ins) {
+ MDefinition* input = ins->input();
+ MOZ_ASSERT(input->type() == MIRType::Double);
+ LPowHalfD* lir = new (alloc()) LPowHalfD(useRegister(input));
+ define(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerWasmSelectI(MWasmSelect* select) {
+ if (select->type() == MIRType::Simd128) {
+ LAllocation t = useRegisterAtStart(select->trueExpr());
+ LAllocation f = useRegister(select->falseExpr());
+ LAllocation c = useRegister(select->condExpr());
+ auto* lir = new (alloc()) LWasmSelect(t, f, c);
+ defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
+ } else {
+ LAllocation t = useRegisterAtStart(select->trueExpr());
+ LAllocation f = useRegisterAtStart(select->falseExpr());
+ LAllocation c = useRegisterAtStart(select->condExpr());
+ define(new (alloc()) LWasmSelect(t, f, c), select);
+ }
+}
+
+void LIRGeneratorARM64::lowerWasmSelectI64(MWasmSelect* select) {
+ LInt64Allocation t = useInt64RegisterAtStart(select->trueExpr());
+ LInt64Allocation f = useInt64RegisterAtStart(select->falseExpr());
+ LAllocation c = useRegisterAtStart(select->condExpr());
+ defineInt64(new (alloc()) LWasmSelectI64(t, f, c), select);
+}
+
+// On arm64 we specialize the cases: compare is {{U,}Int32, {U,}Int64},
+// Float32, Double}, and select is {{U,}Int32, {U,}Int64}, Float32, Double},
+// independently.
+bool LIRGeneratorARM64::canSpecializeWasmCompareAndSelect(
+ MCompare::CompareType compTy, MIRType insTy) {
+ return (insTy == MIRType::Int32 || insTy == MIRType::Int64 ||
+ insTy == MIRType::Float32 || insTy == MIRType::Double) &&
+ (compTy == MCompare::Compare_Int32 ||
+ compTy == MCompare::Compare_UInt32 ||
+ compTy == MCompare::Compare_Int64 ||
+ compTy == MCompare::Compare_UInt64 ||
+ compTy == MCompare::Compare_Float32 ||
+ compTy == MCompare::Compare_Double);
+}
+
+void LIRGeneratorARM64::lowerWasmCompareAndSelect(MWasmSelect* ins,
+ MDefinition* lhs,
+ MDefinition* rhs,
+ MCompare::CompareType compTy,
+ JSOp jsop) {
+ MOZ_ASSERT(canSpecializeWasmCompareAndSelect(compTy, ins->type()));
+ LAllocation rhsAlloc;
+ if (compTy == MCompare::Compare_Float32 ||
+ compTy == MCompare::Compare_Double) {
+ rhsAlloc = useRegisterAtStart(rhs);
+ } else if (compTy == MCompare::Compare_Int32 ||
+ compTy == MCompare::Compare_UInt32 ||
+ compTy == MCompare::Compare_Int64 ||
+ compTy == MCompare::Compare_UInt64) {
+ rhsAlloc = useRegisterOrConstantAtStart(rhs);
+ } else {
+ MOZ_CRASH("Unexpected type");
+ }
+ auto* lir = new (alloc())
+ LWasmCompareAndSelect(useRegisterAtStart(lhs), rhsAlloc, compTy, jsop,
+ useRegisterAtStart(ins->trueExpr()),
+ useRegisterAtStart(ins->falseExpr()));
+ define(lir, ins);
+}
+
+void LIRGenerator::visitAbs(MAbs* ins) {
+ define(allocateAbs(ins, useRegisterAtStart(ins->input())), ins);
+}
+
+LTableSwitch* LIRGeneratorARM64::newLTableSwitch(const LAllocation& in,
+ const LDefinition& inputCopy,
+ MTableSwitch* tableswitch) {
+ return new (alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
+}
+
+LTableSwitchV* LIRGeneratorARM64::newLTableSwitchV(MTableSwitch* tableswitch) {
+ return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(),
+ tempDouble(), temp(), tableswitch);
+}
+
+void LIRGeneratorARM64::lowerUrshD(MUrsh* mir) {
+ MDefinition* lhs = mir->lhs();
+ MDefinition* rhs = mir->rhs();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Int32);
+ MOZ_ASSERT(rhs->type() == MIRType::Int32);
+
+ LUrshD* lir = new (alloc())
+ LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp());
+ define(lir, mir);
+}
+
+void LIRGeneratorARM64::lowerPowOfTwoI(MPow* mir) {
+ int32_t base = mir->input()->toConstant()->toInt32();
+ MDefinition* power = mir->power();
+
+ auto* lir = new (alloc()) LPowOfTwoI(useRegister(power), base);
+ assignSnapshot(lir, mir->bailoutKind());
+ define(lir, mir);
+}
+
+void LIRGeneratorARM64::lowerBigIntLsh(MBigIntLsh* ins) {
+ auto* lir = new (alloc()) LBigIntLsh(
+ useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerBigIntRsh(MBigIntRsh* ins) {
+ auto* lir = new (alloc()) LBigIntRsh(
+ useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerBigIntDiv(MBigIntDiv* ins) {
+ auto* lir = new (alloc()) LBigIntDiv(useRegister(ins->lhs()),
+ useRegister(ins->rhs()), temp(), temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerBigIntMod(MBigIntMod* ins) {
+ auto* lir = new (alloc()) LBigIntMod(useRegister(ins->lhs()),
+ useRegister(ins->rhs()), temp(), temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+#ifdef ENABLE_WASM_SIMD
+
+bool LIRGeneratorARM64::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
+ switch (op) {
+ case wasm::SimdOp::V128AnyTrue:
+ case wasm::SimdOp::I8x16AllTrue:
+ case wasm::SimdOp::I16x8AllTrue:
+ case wasm::SimdOp::I32x4AllTrue:
+ case wasm::SimdOp::I64x2AllTrue:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool LIRGeneratorARM64::canEmitWasmReduceSimd128AtUses(
+ MWasmReduceSimd128* ins) {
+ if (!ins->canEmitAtUses()) {
+ return false;
+ }
+ // Only specific ops generating int32.
+ if (ins->type() != MIRType::Int32) {
+ return false;
+ }
+ if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
+ return false;
+ }
+ // If never used then defer (it will be removed).
+ MUseIterator iter(ins->usesBegin());
+ if (iter == ins->usesEnd()) {
+ return true;
+ }
+ // We require an MTest consumer.
+ MNode* node = iter->consumer();
+ if (!node->isDefinition() || !node->toDefinition()->isTest()) {
+ return false;
+ }
+ // Defer only if there's only one use.
+ iter++;
+ return iter == ins->usesEnd();
+}
+
+#endif
+
+void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
+ switch (ins->type()) {
+ case MIRType::Int32:
+ define(new (alloc()) LNegI(useRegisterAtStart(ins->input())), ins);
+ break;
+ case MIRType::Float32:
+ define(new (alloc()) LNegF(useRegisterAtStart(ins->input())), ins);
+ break;
+ case MIRType::Double:
+ define(new (alloc()) LNegD(useRegisterAtStart(ins->input())), ins);
+ break;
+ default:
+ MOZ_CRASH("unexpected type");
+ }
+}
+
+void LIRGeneratorARM64::lowerUDiv(MDiv* div) {
+ LAllocation lhs = useRegister(div->lhs());
+ if (div->rhs()->isConstant()) {
+ // NOTE: the result of toInt32 is coerced to uint32_t.
+ uint32_t rhs = div->rhs()->toConstant()->toInt32();
+ int32_t shift = mozilla::FloorLog2(rhs);
+
+ if (rhs != 0 && uint32_t(1) << shift == rhs) {
+ LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, shift, false);
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ define(lir, div);
+ return;
+ }
+
+ LUDivConstantI* lir = new (alloc()) LUDivConstantI(lhs, rhs, temp());
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ define(lir, div);
+ return;
+ }
+
+ // Generate UDiv
+ LAllocation rhs = useRegister(div->rhs());
+ LDefinition remainder = LDefinition::BogusTemp();
+ if (!div->canTruncateRemainder()) {
+ remainder = temp();
+ }
+
+ LUDiv* lir = new (alloc()) LUDiv(lhs, rhs, remainder);
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ define(lir, div);
+}
+
+void LIRGeneratorARM64::lowerUMod(MMod* mod) {
+ LUMod* lir = new (alloc())
+ LUMod(useRegister(mod->getOperand(0)), useRegister(mod->getOperand(1)));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ define(lir, mod);
+}
+
+void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) {
+ MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
+ LWasmUint32ToDouble* lir =
+ new (alloc()) LWasmUint32ToDouble(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+}
+
+void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) {
+ MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
+ LWasmUint32ToFloat32* lir =
+ new (alloc()) LWasmUint32ToFloat32(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+}
+
+void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
+ MDefinition* base = ins->base();
+ MOZ_ASSERT(base->type() == MIRType::Int32);
+
+ MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+ MOZ_ASSERT_IF(ins->needsBoundsCheck(),
+ boundsCheckLimit->type() == MIRType::Int32);
+
+ LAllocation baseAlloc = useRegisterAtStart(base);
+
+ LAllocation limitAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(boundsCheckLimit)
+ : LAllocation();
+
+ // We have no memory-base value, meaning that HeapReg is to be used as the
+ // memory base. This follows from the definition of
+ // FunctionCompiler::maybeLoadMemoryBase() in WasmIonCompile.cpp.
+ MOZ_ASSERT(!ins->hasMemoryBase());
+ auto* lir =
+ new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, LAllocation());
+ define(lir, ins);
+}
+
+void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
+ MDefinition* base = ins->base();
+ MOZ_ASSERT(base->type() == MIRType::Int32);
+
+ MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+ MOZ_ASSERT_IF(ins->needsBoundsCheck(),
+ boundsCheckLimit->type() == MIRType::Int32);
+
+ LAllocation baseAlloc = useRegisterAtStart(base);
+
+ LAllocation limitAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(boundsCheckLimit)
+ : LAllocation();
+
+ // See comment in LIRGenerator::visitAsmJSStoreHeap just above.
+ MOZ_ASSERT(!ins->hasMemoryBase());
+ add(new (alloc()) LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
+ limitAlloc, LAllocation()),
+ ins);
+}
+
+void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) {
+ MDefinition* base = ins->base();
+ // See comment in visitWasmLoad re the type of 'base'.
+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+ // Note, the access type may be Int64 here.
+
+ LWasmCompareExchangeHeap* lir = new (alloc())
+ LWasmCompareExchangeHeap(useRegister(base), useRegister(ins->oldValue()),
+ useRegister(ins->newValue()));
+
+ define(lir, ins);
+}
+
+void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) {
+ MDefinition* base = ins->base();
+ // See comment in visitWasmLoad re the type of 'base'.
+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+ // Note, the access type may be Int64 here.
+
+ LWasmAtomicExchangeHeap* lir = new (alloc())
+ LWasmAtomicExchangeHeap(useRegister(base), useRegister(ins->value()));
+ define(lir, ins);
+}
+
+void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) {
+ MDefinition* base = ins->base();
+ // See comment in visitWasmLoad re the type of 'base'.
+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+ // Note, the access type may be Int64 here.
+
+ if (!ins->hasUses()) {
+ LWasmAtomicBinopHeapForEffect* lir =
+ new (alloc()) LWasmAtomicBinopHeapForEffect(useRegister(base),
+ useRegister(ins->value()),
+ /* flagTemp= */ temp());
+ add(lir, ins);
+ return;
+ }
+
+ LWasmAtomicBinopHeap* lir = new (alloc())
+ LWasmAtomicBinopHeap(useRegister(base), useRegister(ins->value()),
+ /* temp= */ LDefinition::BogusTemp(),
+ /* flagTemp= */ temp());
+ define(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerTruncateDToInt32(MTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Double);
+ define(new (alloc())
+ LTruncateDToInt32(useRegister(opd), LDefinition::BogusTemp()),
+ ins);
+}
+
+void LIRGeneratorARM64::lowerTruncateFToInt32(MTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Float32);
+ define(new (alloc())
+ LTruncateFToInt32(useRegister(opd), LDefinition::BogusTemp()),
+ ins);
+}
+
+void LIRGenerator::visitAtomicTypedArrayElementBinop(
+ MAtomicTypedArrayElementBinop* ins) {
+ MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index =
+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+ LAllocation value = useRegister(ins->value());
+
+ if (Scalar::isBigIntType(ins->arrayType())) {
+ LInt64Definition temp1 = tempInt64();
+ LInt64Definition temp2 = tempInt64();
+
+ // Case 1: the result of the operation is not used.
+ //
+ // We can omit allocating the result BigInt.
+
+ if (ins->isForEffect()) {
+ auto* lir = new (alloc()) LAtomicTypedArrayElementBinopForEffect64(
+ elements, index, value, temp1, temp2);
+ add(lir, ins);
+ return;
+ }
+
+ // Case 2: the result of the operation is used.
+
+ auto* lir = new (alloc())
+ LAtomicTypedArrayElementBinop64(elements, index, value, temp1, temp2);
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+ return;
+ }
+
+ if (ins->isForEffect()) {
+ auto* lir = new (alloc())
+ LAtomicTypedArrayElementBinopForEffect(elements, index, value, temp());
+ add(lir, ins);
+ return;
+ }
+
+ LDefinition tempDef1 = temp();
+ LDefinition tempDef2 = LDefinition::BogusTemp();
+ if (ins->arrayType() == Scalar::Uint32) {
+ tempDef2 = temp();
+ }
+
+ LAtomicTypedArrayElementBinop* lir = new (alloc())
+ LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
+
+ define(lir, ins);
+}
+
+void LIRGenerator::visitCompareExchangeTypedArrayElement(
+ MCompareExchangeTypedArrayElement* ins) {
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index =
+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+ const LAllocation newval = useRegister(ins->newval());
+ const LAllocation oldval = useRegister(ins->oldval());
+
+ if (Scalar::isBigIntType(ins->arrayType())) {
+ LInt64Definition temp1 = tempInt64();
+ LInt64Definition temp2 = tempInt64();
+
+ auto* lir = new (alloc()) LCompareExchangeTypedArrayElement64(
+ elements, index, oldval, newval, temp1, temp2);
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+ return;
+ }
+
+ // If the target is an FPReg then we need a temporary at the CodeGenerator
+ // level for creating the result.
+
+ LDefinition outTemp = LDefinition::BogusTemp();
+ if (ins->arrayType() == Scalar::Uint32) {
+ outTemp = temp();
+ }
+
+ LCompareExchangeTypedArrayElement* lir =
+ new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
+ newval, outTemp);
+
+ define(lir, ins);
+}
+
+void LIRGenerator::visitAtomicExchangeTypedArrayElement(
+ MAtomicExchangeTypedArrayElement* ins) {
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index =
+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+ const LAllocation value = useRegister(ins->value());
+
+ if (Scalar::isBigIntType(ins->arrayType())) {
+ LInt64Definition temp1 = tempInt64();
+ LDefinition temp2 = temp();
+
+ auto* lir = new (alloc()) LAtomicExchangeTypedArrayElement64(
+ elements, index, value, temp1, temp2);
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+ return;
+ }
+
+ MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
+
+ LDefinition tempDef = LDefinition::BogusTemp();
+ if (ins->arrayType() == Scalar::Uint32) {
+ tempDef = temp();
+ }
+
+ LAtomicExchangeTypedArrayElement* lir = new (alloc())
+ LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
+
+ define(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerAtomicLoad64(MLoadUnboxedScalar* ins) {
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index =
+ useRegisterOrIndexConstant(ins->index(), ins->storageType());
+
+ auto* lir = new (alloc()) LAtomicLoad64(elements, index, temp(), tempInt64());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorARM64::lowerAtomicStore64(MStoreUnboxedScalar* ins) {
+ LUse elements = useRegister(ins->elements());
+ LAllocation index =
+ useRegisterOrIndexConstant(ins->index(), ins->writeType());
+ LAllocation value = useRegister(ins->value());
+
+ add(new (alloc()) LAtomicStore64(elements, index, value, tempInt64()), ins);
+}
+
+void LIRGenerator::visitSubstr(MSubstr* ins) {
+ LSubstr* lir = new (alloc())
+ LSubstr(useRegister(ins->string()), useRegister(ins->begin()),
+ useRegister(ins->length()), temp(), temp(), temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+ defineInt64(new (alloc()) LWasmTruncateToInt64(useRegister(opd)), ins);
+}
+
+void LIRGeneratorARM64::lowerWasmBuiltinTruncateToInt64(
+ MWasmBuiltinTruncateToInt64* ins) {
+ MOZ_CRASH("We don't use WasmBuiltinTruncateToInt64 for arm64");
+}
+
+void LIRGeneratorARM64::lowerBuiltinInt64ToFloatingPoint(
+ MBuiltinInt64ToFloatingPoint* ins) {
+ MOZ_CRASH("We don't use it for this architecture");
+}
+
+void LIRGenerator::visitWasmHeapBase(MWasmHeapBase* ins) {
+ auto* lir = new (alloc()) LWasmHeapBase(LAllocation());
+ define(lir, ins);
+}
+
+void LIRGenerator::visitWasmLoad(MWasmLoad* ins) {
+ MDefinition* base = ins->base();
+ // 'base' is a GPR but may be of either type. If it is 32-bit it is
+ // zero-extended and can act as 64-bit.
+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+ LAllocation ptr = useRegisterOrConstantAtStart(base);
+
+ if (ins->type() == MIRType::Int64) {
+ auto* lir = new (alloc()) LWasmLoadI64(ptr);
+ defineInt64(lir, ins);
+ } else {
+ auto* lir = new (alloc()) LWasmLoad(ptr);
+ define(lir, ins);
+ }
+}
+
+void LIRGenerator::visitWasmStore(MWasmStore* ins) {
+ MDefinition* base = ins->base();
+ // See comment in visitWasmLoad re the type of 'base'.
+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
+
+ MDefinition* value = ins->value();
+
+ if (ins->access().type() == Scalar::Int64) {
+ LAllocation baseAlloc = useRegisterOrConstantAtStart(base);
+ LInt64Allocation valueAlloc = useInt64RegisterAtStart(value);
+ auto* lir = new (alloc()) LWasmStoreI64(baseAlloc, valueAlloc);
+ add(lir, ins);
+ return;
+ }
+
+ LAllocation baseAlloc = useRegisterOrConstantAtStart(base);
+ LAllocation valueAlloc = useRegisterAtStart(value);
+ auto* lir = new (alloc()) LWasmStore(baseAlloc, valueAlloc);
+ add(lir, ins);
+}
+
+void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Int64);
+ MOZ_ASSERT(IsFloatingPointType(ins->type()));
+
+ define(new (alloc()) LInt64ToFloatingPoint(useInt64Register(opd)), ins);
+}
+
+void LIRGenerator::visitCopySign(MCopySign* ins) {
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+
+ MOZ_ASSERT(IsFloatingPointType(lhs->type()));
+ MOZ_ASSERT(lhs->type() == rhs->type());
+ MOZ_ASSERT(lhs->type() == ins->type());
+
+ LInstructionHelper<1, 2, 2>* lir;
+ if (lhs->type() == MIRType::Double) {
+ lir = new (alloc()) LCopySignD();
+ } else {
+ lir = new (alloc()) LCopySignF();
+ }
+
+ lir->setOperand(0, useRegisterAtStart(lhs));
+ lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
+ ? useRegister(rhs)
+ : useRegisterAtStart(rhs));
+ // The copySignDouble and copySignFloat32 are optimized for lhs == output.
+ // It also prevents rhs == output when lhs != output, avoids clobbering.
+ defineReuseInput(lir, ins, 0);
+}
+
+void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) {
+ defineInt64(
+ new (alloc()) LExtendInt32ToInt64(useRegisterAtStart(ins->input())), ins);
+}
+
+void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) {
+ defineInt64(new (alloc())
+ LSignExtendInt64(useInt64RegisterAtStart(ins->input())),
+ ins);
+}
+
+void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->v0()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->v1()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->v2()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::V128Bitselect: {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+ useRegisterAtStart(ins->v2()));
+ // On ARM64, control register is used as output at machine instruction.
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+ break;
+ }
+ case wasm::SimdOp::F32x4RelaxedFma:
+ case wasm::SimdOp::F32x4RelaxedFnma:
+ case wasm::SimdOp::F64x2RelaxedFma:
+ case wasm::SimdOp::F64x2RelaxedFnma: {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+ useRegisterAtStart(ins->v2()));
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+ break;
+ }
+ case wasm::SimdOp::I32x4DotI8x16I7x16AddS: {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+ useRegisterAtStart(ins->v2()), tempSimd128());
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+ break;
+ }
+ case wasm::SimdOp::I8x16RelaxedLaneSelect:
+ case wasm::SimdOp::I16x8RelaxedLaneSelect:
+ case wasm::SimdOp::I32x4RelaxedLaneSelect:
+ case wasm::SimdOp::I64x2RelaxedLaneSelect: {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+ useRegisterAtStart(ins->v2()));
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+ break;
+ }
+ default:
+ MOZ_CRASH("NYI");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+ wasm::SimdOp op = ins->simdOp();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(rhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ LAllocation lhsAlloc = useRegisterAtStart(lhs);
+ LAllocation rhsAlloc = useRegisterAtStart(rhs);
+ LDefinition tempReg0 = LDefinition::BogusTemp();
+ LDefinition tempReg1 = LDefinition::BogusTemp();
+ if (op == wasm::SimdOp::I64x2Mul) {
+ tempReg0 = tempSimd128();
+ tempReg1 = tempSimd128();
+ }
+ auto* lir = new (alloc())
+ LWasmBinarySimd128(op, lhsAlloc, rhsAlloc, tempReg0, tempReg1);
+ define(lir, ins);
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+#ifdef ENABLE_WASM_SIMD
+bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle(
+ int8_t shuffle[16]) {
+ return false;
+}
+bool MWasmTernarySimd128::canRelaxBitselect() { return false; }
+
+bool MWasmBinarySimd128::canPmaddubsw() { return false; }
+#endif
+
+bool MWasmBinarySimd128::specializeForConstantRhs() {
+ // Probably many we want to do here
+ return false;
+}
+
+void LIRGenerator::visitWasmBinarySimd128WithConstant(
+ MWasmBinarySimd128WithConstant* ins) {
+ MOZ_CRASH("binary SIMD with constant NYI");
+}
+
+void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(rhs->type() == MIRType::Int32);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ if (rhs->isConstant()) {
+ int32_t shiftCount = rhs->toConstant()->toInt32();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Shl:
+ case wasm::SimdOp::I8x16ShrU:
+ case wasm::SimdOp::I8x16ShrS:
+ shiftCount &= 7;
+ break;
+ case wasm::SimdOp::I16x8Shl:
+ case wasm::SimdOp::I16x8ShrU:
+ case wasm::SimdOp::I16x8ShrS:
+ shiftCount &= 15;
+ break;
+ case wasm::SimdOp::I32x4Shl:
+ case wasm::SimdOp::I32x4ShrU:
+ case wasm::SimdOp::I32x4ShrS:
+ shiftCount &= 31;
+ break;
+ case wasm::SimdOp::I64x2Shl:
+ case wasm::SimdOp::I64x2ShrU:
+ case wasm::SimdOp::I64x2ShrS:
+ shiftCount &= 63;
+ break;
+ default:
+ MOZ_CRASH("Unexpected shift operation");
+ }
+# ifdef DEBUG
+ js::wasm::ReportSimdAnalysis("shift -> constant shift");
+# endif
+ auto* lir = new (alloc())
+ LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount);
+ define(lir, ins);
+ return;
+ }
+
+# ifdef DEBUG
+ js::wasm::ReportSimdAnalysis("shift -> variable shift");
+# endif
+
+ LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
+ LAllocation rhsAlloc = useRegisterAtStart(rhs);
+ auto* lir = new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc,
+ LDefinition::BogusTemp());
+ define(lir, ins);
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ SimdShuffle s = ins->shuffle();
+ switch (s.opd) {
+ case SimdShuffle::Operand::LEFT:
+ case SimdShuffle::Operand::RIGHT: {
+ LAllocation src;
+ switch (*s.permuteOp) {
+ case SimdPermuteOp::MOVE:
+ case SimdPermuteOp::BROADCAST_8x16:
+ case SimdPermuteOp::BROADCAST_16x8:
+ case SimdPermuteOp::PERMUTE_8x16:
+ case SimdPermuteOp::PERMUTE_16x8:
+ case SimdPermuteOp::PERMUTE_32x4:
+ case SimdPermuteOp::ROTATE_RIGHT_8x16:
+ case SimdPermuteOp::SHIFT_LEFT_8x16:
+ case SimdPermuteOp::SHIFT_RIGHT_8x16:
+ case SimdPermuteOp::REVERSE_16x8:
+ case SimdPermuteOp::REVERSE_32x4:
+ case SimdPermuteOp::REVERSE_64x2:
+ break;
+ default:
+ MOZ_CRASH("Unexpected operator");
+ }
+ if (s.opd == SimdShuffle::Operand::LEFT) {
+ src = useRegisterAtStart(ins->lhs());
+ } else {
+ src = useRegisterAtStart(ins->rhs());
+ }
+ auto* lir =
+ new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
+ define(lir, ins);
+ break;
+ }
+ case SimdShuffle::Operand::BOTH:
+ case SimdShuffle::Operand::BOTH_SWAPPED: {
+ LDefinition temp = LDefinition::BogusTemp();
+ LAllocation lhs;
+ LAllocation rhs;
+ if (s.opd == SimdShuffle::Operand::BOTH) {
+ lhs = useRegisterAtStart(ins->lhs());
+ rhs = useRegisterAtStart(ins->rhs());
+ } else {
+ lhs = useRegisterAtStart(ins->rhs());
+ rhs = useRegisterAtStart(ins->lhs());
+ }
+ auto* lir = new (alloc())
+ LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
+ define(lir, ins);
+ break;
+ }
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ // Optimal code generation reuses the lhs register because the rhs scalar is
+ // merged into a vector lhs.
+ LAllocation lhs = useRegisterAtStart(ins->lhs());
+ if (ins->rhs()->type() == MIRType::Int64) {
+ auto* lir = new (alloc())
+ LWasmReplaceInt64LaneSimd128(lhs, useInt64Register(ins->rhs()));
+ defineReuseInput(lir, ins, 0);
+ } else {
+ auto* lir =
+ new (alloc()) LWasmReplaceLaneSimd128(lhs, useRegister(ins->rhs()));
+ defineReuseInput(lir, ins, 0);
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ switch (ins->input()->type()) {
+ case MIRType::Int64: {
+ // 64-bit integer splats.
+ // Load-and-(sign|zero)extend.
+ auto* lir = new (alloc())
+ LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ case MIRType::Float32:
+ case MIRType::Double: {
+ // Floating-point splats.
+ auto* lir =
+ new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ default: {
+ // 32-bit integer splats.
+ auto* lir =
+ new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ LDefinition tempReg = LDefinition::BogusTemp();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Neg:
+ case wasm::SimdOp::I16x8Neg:
+ case wasm::SimdOp::I32x4Neg:
+ case wasm::SimdOp::I64x2Neg:
+ case wasm::SimdOp::F32x4Neg:
+ case wasm::SimdOp::F64x2Neg:
+ case wasm::SimdOp::F32x4Abs:
+ case wasm::SimdOp::F64x2Abs:
+ case wasm::SimdOp::V128Not:
+ case wasm::SimdOp::F32x4Sqrt:
+ case wasm::SimdOp::F64x2Sqrt:
+ case wasm::SimdOp::I8x16Abs:
+ case wasm::SimdOp::I16x8Abs:
+ case wasm::SimdOp::I32x4Abs:
+ case wasm::SimdOp::I64x2Abs:
+ case wasm::SimdOp::I32x4TruncSatF32x4S:
+ case wasm::SimdOp::F32x4ConvertI32x4U:
+ case wasm::SimdOp::I32x4TruncSatF32x4U:
+ case wasm::SimdOp::I16x8ExtendLowI8x16S:
+ case wasm::SimdOp::I16x8ExtendHighI8x16S:
+ case wasm::SimdOp::I16x8ExtendLowI8x16U:
+ case wasm::SimdOp::I16x8ExtendHighI8x16U:
+ case wasm::SimdOp::I32x4ExtendLowI16x8S:
+ case wasm::SimdOp::I32x4ExtendHighI16x8S:
+ case wasm::SimdOp::I32x4ExtendLowI16x8U:
+ case wasm::SimdOp::I32x4ExtendHighI16x8U:
+ case wasm::SimdOp::I64x2ExtendLowI32x4S:
+ case wasm::SimdOp::I64x2ExtendHighI32x4S:
+ case wasm::SimdOp::I64x2ExtendLowI32x4U:
+ case wasm::SimdOp::I64x2ExtendHighI32x4U:
+ case wasm::SimdOp::F32x4ConvertI32x4S:
+ case wasm::SimdOp::F32x4Ceil:
+ case wasm::SimdOp::F32x4Floor:
+ case wasm::SimdOp::F32x4Trunc:
+ case wasm::SimdOp::F32x4Nearest:
+ case wasm::SimdOp::F64x2Ceil:
+ case wasm::SimdOp::F64x2Floor:
+ case wasm::SimdOp::F64x2Trunc:
+ case wasm::SimdOp::F64x2Nearest:
+ case wasm::SimdOp::F32x4DemoteF64x2Zero:
+ case wasm::SimdOp::F64x2PromoteLowF32x4:
+ case wasm::SimdOp::F64x2ConvertLowI32x4S:
+ case wasm::SimdOp::F64x2ConvertLowI32x4U:
+ case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
+ case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
+ case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
+ case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
+ case wasm::SimdOp::I8x16Popcnt:
+ case wasm::SimdOp::I32x4RelaxedTruncF32x4S:
+ case wasm::SimdOp::I32x4RelaxedTruncF32x4U:
+ case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero:
+ case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero:
+ break;
+ case wasm::SimdOp::I32x4TruncSatF64x2SZero:
+ case wasm::SimdOp::I32x4TruncSatF64x2UZero:
+ tempReg = tempSimd128();
+ break;
+ default:
+ MOZ_CRASH("Unary SimdOp not implemented");
+ }
+
+ LUse input = useRegisterAtStart(ins->input());
+ LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(input, tempReg);
+ define(lir, ins);
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ if (canEmitWasmReduceSimd128AtUses(ins)) {
+ emitAtUses(ins);
+ return;
+ }
+
+ // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer
+ // useRegisterAtStart:
+ //
+ // - In most cases, the input type differs from the output type, so there's no
+ // conflict and it doesn't really matter.
+ //
+ // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero
+ // code being generated.
+ //
+ // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register
+ // to be targeted lowers register pressure if it's the last use of the
+ // input.
+
+ if (ins->type() == MIRType::Int64) {
+ auto* lir = new (alloc())
+ LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
+ defineInt64(lir, ins);
+ } else {
+ LDefinition tempReg = LDefinition::BogusTemp();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Bitmask:
+ case wasm::SimdOp::I16x8Bitmask:
+ case wasm::SimdOp::I32x4Bitmask:
+ case wasm::SimdOp::I64x2Bitmask:
+ tempReg = tempSimd128();
+ break;
+ default:
+ break;
+ }
+
+ // Ideally we would reuse the input register for floating extract_lane if
+ // the lane is zero, but constraints in the register allocator require the
+ // input and output register types to be the same.
+ auto* lir = new (alloc())
+ LWasmReduceSimd128(useRegisterAtStart(ins->input()), tempReg);
+ define(lir, ins);
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ // On 64-bit systems, the base pointer can be 32 bits or 64 bits. Either way,
+ // it fits in a GPR so we can ignore the Register/Register64 distinction here.
+
+ // Optimal allocation here reuses the value input for the output register
+ // because codegen otherwise has to copy the input to the output; this is
+ // because load-lane is implemented as load + replace-lane. Bug 1706106 may
+ // change all of that, so leave it alone for now.
+ LUse base = useRegisterAtStart(ins->base());
+ LUse inputUse = useRegisterAtStart(ins->value());
+ MOZ_ASSERT(!ins->hasMemoryBase());
+ LWasmLoadLaneSimd128* lir =
+ new (alloc()) LWasmLoadLaneSimd128(base, inputUse, temp(), LAllocation());
+ define(lir, ins);
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ // See comment above about the base pointer.
+
+ LUse base = useRegisterAtStart(ins->base());
+ LUse input = useRegisterAtStart(ins->value());
+ MOZ_ASSERT(!ins->hasMemoryBase());
+ LWasmStoreLaneSimd128* lir =
+ new (alloc()) LWasmStoreLaneSimd128(base, input, temp(), LAllocation());
+ add(lir, ins);
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
diff --git a/js/src/jit/arm64/Lowering-arm64.h b/js/src/jit/arm64/Lowering-arm64.h
new file mode 100644
index 0000000000..4ab52dd464
--- /dev/null
+++ b/js/src/jit/arm64/Lowering-arm64.h
@@ -0,0 +1,135 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_Lowering_arm64_h
+#define jit_arm64_Lowering_arm64_h
+
+#include "jit/shared/Lowering-shared.h"
+
+namespace js {
+namespace jit {
+
+class LIRGeneratorARM64 : public LIRGeneratorShared {
+ protected:
+ LIRGeneratorARM64(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph)
+ : LIRGeneratorShared(gen, graph, lirGraph) {}
+
+ // Returns a box allocation. reg2 is ignored on 64-bit platforms.
+ LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register reg2,
+ bool useAtStart = false);
+
+ LAllocation useByteOpRegister(MDefinition* mir);
+ LAllocation useByteOpRegisterAtStart(MDefinition* mir);
+ LAllocation useByteOpRegisterOrNonDoubleConstant(MDefinition* mir);
+ LDefinition tempByteOpRegister();
+
+ LDefinition tempToUnbox();
+
+ bool needTempForPostBarrier() { return true; }
+
+ // ARM64 has a scratch register, so no need for another temp for dispatch ICs.
+ LDefinition tempForDispatchCache(MIRType outputType = MIRType::None) {
+ return LDefinition::BogusTemp();
+ }
+
+ void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
+ size_t lirIndex);
+ void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
+ size_t lirIndex) {
+ lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
+ }
+ void defineInt64Phi(MPhi* phi, size_t lirIndex) {
+ defineTypedPhi(phi, lirIndex);
+ }
+ void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+ MDefinition* lhs, MDefinition* rhs);
+ void lowerUrshD(MUrsh* mir);
+
+ void lowerPowOfTwoI(MPow* mir);
+
+ void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+ MDefinition* input);
+ void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
+ MDefinition* lhs, MDefinition* rhs);
+
+ void lowerForALUInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins,
+ MDefinition* mir, MDefinition* input);
+ void lowerForALUInt64(
+ LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+ void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs,
+ MDefinition* rhs);
+ template <size_t Temps>
+ void lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+ void lowerForCompareI64AndBranch(MTest* mir, MCompare* comp, JSOp op,
+ MDefinition* left, MDefinition* right,
+ MBasicBlock* ifTrue, MBasicBlock* ifFalse);
+
+ void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
+ MDefinition* input);
+
+ template <size_t Temps>
+ void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
+ MDefinition* lhs, MDefinition* rhs);
+
+ void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins);
+ void lowerWasmBuiltinTruncateToInt64(MWasmBuiltinTruncateToInt64* ins);
+ void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
+ MDefinition* lhs, MDefinition* rhs);
+ void lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32* ins);
+ void lowerTruncateDToInt32(MTruncateToInt32* ins);
+ void lowerTruncateFToInt32(MTruncateToInt32* ins);
+ void lowerDivI(MDiv* div);
+ void lowerModI(MMod* mod);
+ void lowerDivI64(MDiv* div);
+ void lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div);
+ void lowerModI64(MMod* mod);
+ void lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod);
+ void lowerUDivI64(MDiv* div);
+ void lowerUModI64(MMod* mod);
+ void lowerNegI(MInstruction* ins, MDefinition* input);
+ void lowerNegI64(MInstruction* ins, MDefinition* input);
+ void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs);
+ void lowerUDiv(MDiv* div);
+ void lowerUMod(MMod* mod);
+ void lowerWasmSelectI(MWasmSelect* select);
+ void lowerWasmSelectI64(MWasmSelect* select);
+ bool canSpecializeWasmCompareAndSelect(MCompare::CompareType compTy,
+ MIRType insTy);
+ void lowerWasmCompareAndSelect(MWasmSelect* ins, MDefinition* lhs,
+ MDefinition* rhs, MCompare::CompareType compTy,
+ JSOp jsop);
+
+ void lowerBigIntLsh(MBigIntLsh* ins);
+ void lowerBigIntRsh(MBigIntRsh* ins);
+ void lowerBigIntDiv(MBigIntDiv* ins);
+ void lowerBigIntMod(MBigIntMod* ins);
+
+ void lowerAtomicLoad64(MLoadUnboxedScalar* ins);
+ void lowerAtomicStore64(MStoreUnboxedScalar* ins);
+
+#ifdef ENABLE_WASM_SIMD
+ bool canFoldReduceSimd128AndBranch(wasm::SimdOp op);
+ bool canEmitWasmReduceSimd128AtUses(MWasmReduceSimd128* ins);
+#endif
+
+ LTableSwitchV* newLTableSwitchV(MTableSwitch* ins);
+ LTableSwitch* newLTableSwitch(const LAllocation& in,
+ const LDefinition& inputCopy,
+ MTableSwitch* ins);
+
+ void lowerPhi(MPhi* phi);
+};
+
+typedef LIRGeneratorARM64 LIRGeneratorSpecific;
+
+} // namespace jit
+} // namespace js
+
+#endif /* jit_arm64_Lowering_arm64_h */
diff --git a/js/src/jit/arm64/MacroAssembler-arm64-inl.h b/js/src/jit/arm64/MacroAssembler-arm64-inl.h
new file mode 100644
index 0000000000..283867a29a
--- /dev/null
+++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h
@@ -0,0 +1,4079 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_MacroAssembler_arm64_inl_h
+#define jit_arm64_MacroAssembler_arm64_inl_h
+
+#include "jit/arm64/MacroAssembler-arm64.h"
+
+namespace js {
+namespace jit {
+
+//{{{ check_macroassembler_style
+
+void MacroAssembler::move64(Register64 src, Register64 dest) {
+ Mov(ARMRegister(dest.reg, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::move64(Imm64 imm, Register64 dest) {
+ Mov(ARMRegister(dest.reg, 64), imm.value);
+}
+
+void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
+ Fmov(ARMRegister(dest, 32), ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
+ Fmov(ARMFPRegister(dest, 32), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move8SignExtend(Register src, Register dest) {
+ Sxtb(ARMRegister(dest, 32), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move16SignExtend(Register src, Register dest) {
+ Sxth(ARMRegister(dest, 32), ARMRegister(src, 32));
+}
+
+void MacroAssembler::moveDoubleToGPR64(FloatRegister src, Register64 dest) {
+ Fmov(ARMRegister(dest.reg, 64), ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::moveGPR64ToDouble(Register64 src, FloatRegister dest) {
+ Fmov(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::move64To32(Register64 src, Register dest) {
+ Mov(ARMRegister(dest, 32), ARMRegister(src.reg, 32));
+}
+
+void MacroAssembler::move32To64ZeroExtend(Register src, Register64 dest) {
+ Uxtw(ARMRegister(dest.reg, 64), ARMRegister(src, 64));
+}
+
+void MacroAssembler::move8To64SignExtend(Register src, Register64 dest) {
+ Sxtb(ARMRegister(dest.reg, 64), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move16To64SignExtend(Register src, Register64 dest) {
+ Sxth(ARMRegister(dest.reg, 64), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move32To64SignExtend(Register src, Register64 dest) {
+ Sxtw(ARMRegister(dest.reg, 64), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move32SignExtendToPtr(Register src, Register dest) {
+ Sxtw(ARMRegister(dest, 64), ARMRegister(src, 32));
+}
+
+void MacroAssembler::move32ZeroExtendToPtr(Register src, Register dest) {
+ Uxtw(ARMRegister(dest, 64), ARMRegister(src, 64));
+}
+
+// ===============================================================
+// Load instructions
+
+void MacroAssembler::load32SignExtendToPtr(const Address& src, Register dest) {
+ load32(src, dest);
+ move32To64SignExtend(dest, Register64(dest));
+}
+
+void MacroAssembler::loadAbiReturnAddress(Register dest) { movePtr(lr, dest); }
+
+// ===============================================================
+// Logical instructions
+
+void MacroAssembler::not32(Register reg) {
+ Orn(ARMRegister(reg, 32), vixl::wzr, ARMRegister(reg, 32));
+}
+
+void MacroAssembler::notPtr(Register reg) {
+ Orn(ARMRegister(reg, 64), vixl::xzr, ARMRegister(reg, 64));
+}
+
+void MacroAssembler::and32(Register src, Register dest) {
+ And(ARMRegister(dest, 32), ARMRegister(dest, 32),
+ Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::and32(Imm32 imm, Register dest) {
+ And(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::and32(Imm32 imm, Register src, Register dest) {
+ And(ARMRegister(dest, 32), ARMRegister(src, 32), Operand(imm.value));
+}
+
+void MacroAssembler::and32(Imm32 imm, const Address& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+ load32(dest, scratch32.asUnsized());
+ And(scratch32, scratch32, Operand(imm.value));
+ store32(scratch32.asUnsized(), dest);
+}
+
+void MacroAssembler::and32(const Address& src, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != src.base);
+ load32(src, scratch32.asUnsized());
+ And(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32));
+}
+
+void MacroAssembler::andPtr(Register src, Register dest) {
+ And(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(ARMRegister(src, 64)));
+}
+
+void MacroAssembler::andPtr(Imm32 imm, Register dest) {
+ And(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::and64(Imm64 imm, Register64 dest) {
+ And(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+void MacroAssembler::and64(Register64 src, Register64 dest) {
+ And(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64),
+ ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::or64(Imm64 imm, Register64 dest) {
+ Orr(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+void MacroAssembler::or32(Imm32 imm, Register dest) {
+ Orr(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::or32(Register src, Register dest) {
+ Orr(ARMRegister(dest, 32), ARMRegister(dest, 32),
+ Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::or32(Imm32 imm, const Address& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+ load32(dest, scratch32.asUnsized());
+ Orr(scratch32, scratch32, Operand(imm.value));
+ store32(scratch32.asUnsized(), dest);
+}
+
+void MacroAssembler::orPtr(Register src, Register dest) {
+ Orr(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(ARMRegister(src, 64)));
+}
+
+void MacroAssembler::orPtr(Imm32 imm, Register dest) {
+ Orr(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::or64(Register64 src, Register64 dest) {
+ orPtr(src.reg, dest.reg);
+}
+
+void MacroAssembler::xor64(Register64 src, Register64 dest) {
+ xorPtr(src.reg, dest.reg);
+}
+
+void MacroAssembler::xor32(Register src, Register dest) {
+ Eor(ARMRegister(dest, 32), ARMRegister(dest, 32),
+ Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::xor32(Imm32 imm, Register dest) {
+ Eor(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::xor32(Imm32 imm, const Address& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+ load32(dest, scratch32.asUnsized());
+ Eor(scratch32, scratch32, Operand(imm.value));
+ store32(scratch32.asUnsized(), dest);
+}
+
+void MacroAssembler::xor32(const Address& src, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != src.base);
+ load32(src, scratch32.asUnsized());
+ Eor(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32));
+}
+
+void MacroAssembler::xorPtr(Register src, Register dest) {
+ Eor(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(ARMRegister(src, 64)));
+}
+
+void MacroAssembler::xorPtr(Imm32 imm, Register dest) {
+ Eor(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::xor64(Imm64 imm, Register64 dest) {
+ Eor(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+// ===============================================================
+// Swap instructions
+
+void MacroAssembler::byteSwap16SignExtend(Register reg) {
+ rev16(ARMRegister(reg, 32), ARMRegister(reg, 32));
+ sxth(ARMRegister(reg, 32), ARMRegister(reg, 32));
+}
+
+void MacroAssembler::byteSwap16ZeroExtend(Register reg) {
+ rev16(ARMRegister(reg, 32), ARMRegister(reg, 32));
+ uxth(ARMRegister(reg, 32), ARMRegister(reg, 32));
+}
+
+void MacroAssembler::byteSwap32(Register reg) {
+ rev(ARMRegister(reg, 32), ARMRegister(reg, 32));
+}
+
+void MacroAssembler::byteSwap64(Register64 reg) {
+ rev(ARMRegister(reg.reg, 64), ARMRegister(reg.reg, 64));
+}
+
+// ===============================================================
+// Arithmetic functions
+
+void MacroAssembler::add32(Register src, Register dest) {
+ Add(ARMRegister(dest, 32), ARMRegister(dest, 32),
+ Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::add32(Imm32 imm, Register dest) {
+ Add(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::add32(Imm32 imm, const Address& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+
+ Ldr(scratch32, toMemOperand(dest));
+ Add(scratch32, scratch32, Operand(imm.value));
+ Str(scratch32, toMemOperand(dest));
+}
+
+void MacroAssembler::addPtr(Register src, Register dest) {
+ addPtr(src, dest, dest);
+}
+
+void MacroAssembler::addPtr(Register src1, Register src2, Register dest) {
+ Add(ARMRegister(dest, 64), ARMRegister(src1, 64),
+ Operand(ARMRegister(src2, 64)));
+}
+
+void MacroAssembler::addPtr(Imm32 imm, Register dest) {
+ addPtr(imm, dest, dest);
+}
+
+void MacroAssembler::addPtr(Imm32 imm, Register src, Register dest) {
+ Add(ARMRegister(dest, 64), ARMRegister(src, 64), Operand(imm.value));
+}
+
+void MacroAssembler::addPtr(ImmWord imm, Register dest) {
+ Add(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::addPtr(Imm32 imm, const Address& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != dest.base);
+
+ Ldr(scratch64, toMemOperand(dest));
+ Add(scratch64, scratch64, Operand(imm.value));
+ Str(scratch64, toMemOperand(dest));
+}
+
+void MacroAssembler::addPtr(const Address& src, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != src.base);
+
+ Ldr(scratch64, toMemOperand(src));
+ Add(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(scratch64));
+}
+
+void MacroAssembler::add64(Register64 src, Register64 dest) {
+ addPtr(src.reg, dest.reg);
+}
+
+void MacroAssembler::add64(Imm32 imm, Register64 dest) {
+ Add(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+void MacroAssembler::add64(Imm64 imm, Register64 dest) {
+ Add(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+CodeOffset MacroAssembler::sub32FromStackPtrWithPatch(Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ AutoForbidPoolsAndNops afp(this,
+ /* max number of instructions in scope = */ 3);
+ CodeOffset offs = CodeOffset(currentOffset());
+ movz(scratch, 0, 0);
+ movk(scratch, 0, 16);
+ Sub(ARMRegister(dest, 64), sp, scratch);
+ return offs;
+}
+
+void MacroAssembler::patchSub32FromStackPtr(CodeOffset offset, Imm32 imm) {
+ Instruction* i1 = getInstructionAt(BufferOffset(offset.offset()));
+ MOZ_ASSERT(i1->IsMovz());
+ i1->SetInstructionBits(i1->InstructionBits() |
+ ImmMoveWide(uint16_t(imm.value)));
+
+ Instruction* i2 = getInstructionAt(BufferOffset(offset.offset() + 4));
+ MOZ_ASSERT(i2->IsMovk());
+ i2->SetInstructionBits(i2->InstructionBits() |
+ ImmMoveWide(uint16_t(imm.value >> 16)));
+}
+
+void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) {
+ fadd(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64),
+ ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) {
+ fadd(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32),
+ ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::sub32(Imm32 imm, Register dest) {
+ Sub(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+}
+
+void MacroAssembler::sub32(Register src, Register dest) {
+ Sub(ARMRegister(dest, 32), ARMRegister(dest, 32),
+ Operand(ARMRegister(src, 32)));
+}
+
+void MacroAssembler::sub32(const Address& src, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != src.base);
+ load32(src, scratch32.asUnsized());
+ Sub(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32));
+}
+
+void MacroAssembler::subPtr(Register src, Register dest) {
+ Sub(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(ARMRegister(src, 64)));
+}
+
+void MacroAssembler::subPtr(Register src, const Address& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != dest.base);
+
+ Ldr(scratch64, toMemOperand(dest));
+ Sub(scratch64, scratch64, Operand(ARMRegister(src, 64)));
+ Str(scratch64, toMemOperand(dest));
+}
+
+void MacroAssembler::subPtr(Imm32 imm, Register dest) {
+ Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+}
+
+void MacroAssembler::subPtr(const Address& addr, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != addr.base);
+
+ Ldr(scratch64, toMemOperand(addr));
+ Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(scratch64));
+}
+
+void MacroAssembler::sub64(Register64 src, Register64 dest) {
+ Sub(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64),
+ ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::sub64(Imm64 imm, Register64 dest) {
+ Sub(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value));
+}
+
+void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) {
+ fsub(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64),
+ ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) {
+ fsub(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32),
+ ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::mul32(Register rhs, Register srcDest) {
+ mul32(srcDest, rhs, srcDest, nullptr);
+}
+
+void MacroAssembler::mul32(Imm32 imm, Register srcDest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+
+ move32(imm, scratch32.asUnsized());
+ mul32(scratch32.asUnsized(), srcDest);
+}
+
+void MacroAssembler::mul32(Register src1, Register src2, Register dest,
+ Label* onOver) {
+ if (onOver) {
+ Smull(ARMRegister(dest, 64), ARMRegister(src1, 32), ARMRegister(src2, 32));
+ Cmp(ARMRegister(dest, 64), Operand(ARMRegister(dest, 32), vixl::SXTW));
+ B(onOver, NotEqual);
+
+ // Clear upper 32 bits.
+ Uxtw(ARMRegister(dest, 64), ARMRegister(dest, 64));
+ } else {
+ Mul(ARMRegister(dest, 32), ARMRegister(src1, 32), ARMRegister(src2, 32));
+ }
+}
+
+void MacroAssembler::mulHighUnsigned32(Imm32 imm, Register src, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+
+ Mov(scratch32, int32_t(imm.value));
+ Umull(ARMRegister(dest, 64), scratch32, ARMRegister(src, 32));
+
+ Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), 32);
+}
+
+void MacroAssembler::mulPtr(Register rhs, Register srcDest) {
+ Mul(ARMRegister(srcDest, 64), ARMRegister(srcDest, 64), ARMRegister(rhs, 64));
+}
+
+void MacroAssembler::mul64(Imm64 imm, const Register64& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(dest.reg != scratch64.asUnsized());
+ mov(ImmWord(imm.value), scratch64.asUnsized());
+ Mul(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), scratch64);
+}
+
+void MacroAssembler::mul64(const Register64& src, const Register64& dest,
+ const Register temp) {
+ MOZ_ASSERT(temp == Register::Invalid());
+ Mul(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64),
+ ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::mul64(const Register64& src1, const Register64& src2,
+ const Register64& dest) {
+ Mul(ARMRegister(dest.reg, 64), ARMRegister(src1.reg, 64),
+ ARMRegister(src2.reg, 64));
+}
+
+void MacroAssembler::mul64(Imm64 src1, const Register64& src2,
+ const Register64& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(dest.reg != scratch64.asUnsized());
+ mov(ImmWord(src1.value), scratch64.asUnsized());
+ Mul(ARMRegister(dest.reg, 64), ARMRegister(src2.reg, 64), scratch64);
+}
+
+void MacroAssembler::mulBy3(Register src, Register dest) {
+ ARMRegister xdest(dest, 64);
+ ARMRegister xsrc(src, 64);
+ Add(xdest, xsrc, Operand(xsrc, vixl::LSL, 1));
+}
+
+void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) {
+ fmul(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32),
+ ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) {
+ fmul(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64),
+ ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp,
+ FloatRegister dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(temp != scratch);
+ movePtr(imm, scratch);
+ const ARMFPRegister scratchDouble = temps.AcquireD();
+ Ldr(scratchDouble, MemOperand(Address(scratch, 0)));
+ fmul(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64), scratchDouble);
+}
+
+void MacroAssembler::quotient32(Register rhs, Register srcDest,
+ bool isUnsigned) {
+ if (isUnsigned) {
+ Udiv(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32),
+ ARMRegister(rhs, 32));
+ } else {
+ Sdiv(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32),
+ ARMRegister(rhs, 32));
+ }
+}
+
+// This does not deal with x % 0 or INT_MIN % -1, the caller needs to filter
+// those cases when they may occur.
+
+void MacroAssembler::remainder32(Register rhs, Register srcDest,
+ bool isUnsigned) {
+ vixl::UseScratchRegisterScope temps(this);
+ ARMRegister scratch = temps.AcquireW();
+ if (isUnsigned) {
+ Udiv(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32));
+ } else {
+ Sdiv(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32));
+ }
+ Mul(scratch, scratch, ARMRegister(rhs, 32));
+ Sub(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32), scratch);
+}
+
+void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) {
+ fdiv(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32),
+ ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) {
+ fdiv(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64),
+ ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::inc64(AbsoluteAddress dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratchAddr64 = temps.AcquireX();
+ const ARMRegister scratch64 = temps.AcquireX();
+
+ Mov(scratchAddr64, uint64_t(dest.addr));
+ Ldr(scratch64, MemOperand(scratchAddr64, 0));
+ Add(scratch64, scratch64, Operand(1));
+ Str(scratch64, MemOperand(scratchAddr64, 0));
+}
+
+void MacroAssembler::neg32(Register reg) {
+ Neg(ARMRegister(reg, 32), Operand(ARMRegister(reg, 32)));
+}
+
+void MacroAssembler::neg64(Register64 reg) { negPtr(reg.reg); }
+
+void MacroAssembler::negPtr(Register reg) {
+ Neg(ARMRegister(reg, 64), Operand(ARMRegister(reg, 64)));
+}
+
+void MacroAssembler::negateFloat(FloatRegister reg) {
+ fneg(ARMFPRegister(reg, 32), ARMFPRegister(reg, 32));
+}
+
+void MacroAssembler::negateDouble(FloatRegister reg) {
+ fneg(ARMFPRegister(reg, 64), ARMFPRegister(reg, 64));
+}
+
+void MacroAssembler::abs32(Register src, Register dest) {
+ Cmp(ARMRegister(src, 32), wzr);
+ Cneg(ARMRegister(dest, 32), ARMRegister(src, 32), Assembler::LessThan);
+}
+
+void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) {
+ fabs(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) {
+ fabs(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) {
+ fsqrt(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+}
+
+void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) {
+ fsqrt(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+}
+
+void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest,
+ bool handleNaN) {
+ MOZ_ASSERT(handleNaN); // Always true for wasm
+ fmin(ARMFPRegister(srcDest, 32), ARMFPRegister(srcDest, 32),
+ ARMFPRegister(other, 32));
+}
+
+void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest,
+ bool handleNaN) {
+ MOZ_ASSERT(handleNaN); // Always true for wasm
+ fmin(ARMFPRegister(srcDest, 64), ARMFPRegister(srcDest, 64),
+ ARMFPRegister(other, 64));
+}
+
+void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest,
+ bool handleNaN) {
+ MOZ_ASSERT(handleNaN); // Always true for wasm
+ fmax(ARMFPRegister(srcDest, 32), ARMFPRegister(srcDest, 32),
+ ARMFPRegister(other, 32));
+}
+
+void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest,
+ bool handleNaN) {
+ MOZ_ASSERT(handleNaN); // Always true for wasm
+ fmax(ARMFPRegister(srcDest, 64), ARMFPRegister(srcDest, 64),
+ ARMFPRegister(other, 64));
+}
+
+// ===============================================================
+// Shift functions
+
+void MacroAssembler::lshiftPtr(Imm32 imm, Register dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+ Lsl(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value);
+}
+
+void MacroAssembler::lshiftPtr(Register shift, Register dest) {
+ Lsl(ARMRegister(dest, 64), ARMRegister(dest, 64), ARMRegister(shift, 64));
+}
+
+void MacroAssembler::lshift64(Imm32 imm, Register64 dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+ lshiftPtr(imm, dest.reg);
+}
+
+void MacroAssembler::lshift64(Register shift, Register64 srcDest) {
+ Lsl(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64),
+ ARMRegister(shift, 64));
+}
+
+void MacroAssembler::lshift32(Register shift, Register dest) {
+ Lsl(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32));
+}
+
+void MacroAssembler::flexibleLshift32(Register src, Register dest) {
+ lshift32(src, dest);
+}
+
+void MacroAssembler::lshift32(Imm32 imm, Register dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+ Lsl(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value);
+}
+
+void MacroAssembler::rshiftPtr(Imm32 imm, Register dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+ Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value);
+}
+
+void MacroAssembler::rshiftPtr(Imm32 imm, Register src, Register dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+ Lsr(ARMRegister(dest, 64), ARMRegister(src, 64), imm.value);
+}
+
+void MacroAssembler::rshiftPtr(Register shift, Register dest) {
+ Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), ARMRegister(shift, 64));
+}
+
+void MacroAssembler::rshift32(Register shift, Register dest) {
+ Lsr(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32));
+}
+
+void MacroAssembler::flexibleRshift32(Register src, Register dest) {
+ rshift32(src, dest);
+}
+
+void MacroAssembler::rshift32(Imm32 imm, Register dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+ Lsr(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value);
+}
+
+void MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+ Asr(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value);
+}
+
+void MacroAssembler::rshift32Arithmetic(Register shift, Register dest) {
+ Asr(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32));
+}
+
+void MacroAssembler::rshift32Arithmetic(Imm32 imm, Register dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 32);
+ Asr(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value);
+}
+
+void MacroAssembler::flexibleRshift32Arithmetic(Register src, Register dest) {
+ rshift32Arithmetic(src, dest);
+}
+
+void MacroAssembler::rshift64(Imm32 imm, Register64 dest) {
+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
+ rshiftPtr(imm, dest.reg);
+}
+
+void MacroAssembler::rshift64(Register shift, Register64 srcDest) {
+ Lsr(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64),
+ ARMRegister(shift, 64));
+}
+
+void MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest) {
+ Asr(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), imm.value);
+}
+
+void MacroAssembler::rshift64Arithmetic(Register shift, Register64 srcDest) {
+ Asr(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64),
+ ARMRegister(shift, 64));
+}
+
+// ===============================================================
+// Condition functions
+
+void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs,
+ Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+
+ switch (cond) {
+ case Assembler::Equal:
+ case Assembler::NotEqual:
+ case Assembler::Above:
+ case Assembler::AboveOrEqual:
+ case Assembler::Below:
+ case Assembler::BelowOrEqual:
+ load8ZeroExtend(lhs, scratch);
+ cmp32Set(cond, scratch, Imm32(uint8_t(rhs.value)), dest);
+ break;
+
+ case Assembler::GreaterThan:
+ case Assembler::GreaterThanOrEqual:
+ case Assembler::LessThan:
+ case Assembler::LessThanOrEqual:
+ load8SignExtend(lhs, scratch);
+ cmp32Set(cond, scratch, Imm32(int8_t(rhs.value)), dest);
+ break;
+
+ default:
+ MOZ_CRASH("unexpected condition");
+ }
+}
+
+void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs,
+ Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+
+ switch (cond) {
+ case Assembler::Equal:
+ case Assembler::NotEqual:
+ case Assembler::Above:
+ case Assembler::AboveOrEqual:
+ case Assembler::Below:
+ case Assembler::BelowOrEqual:
+ load16ZeroExtend(lhs, scratch);
+ cmp32Set(cond, scratch, Imm32(uint16_t(rhs.value)), dest);
+ break;
+
+ case Assembler::GreaterThan:
+ case Assembler::GreaterThanOrEqual:
+ case Assembler::LessThan:
+ case Assembler::LessThanOrEqual:
+ load16SignExtend(lhs, scratch);
+ cmp32Set(cond, scratch, Imm32(int16_t(rhs.value)), dest);
+ break;
+
+ default:
+ MOZ_CRASH("unexpected condition");
+ }
+}
+
+template <typename T1, typename T2>
+void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) {
+ cmp32(lhs, rhs);
+ emitSet(cond, dest);
+}
+
+void MacroAssembler::cmp64Set(Condition cond, Address lhs, Imm64 rhs,
+ Register dest) {
+ cmpPtrSet(cond, lhs, ImmWord(static_cast<uintptr_t>(rhs.value)), dest);
+}
+
+template <typename T1, typename T2>
+void MacroAssembler::cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) {
+ cmpPtr(lhs, rhs);
+ emitSet(cond, dest);
+}
+
+// ===============================================================
+// Rotation functions
+
+void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) {
+ Ror(ARMRegister(dest, 32), ARMRegister(input, 32), (32 - count.value) & 31);
+}
+
+void MacroAssembler::rotateLeft(Register count, Register input, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireW();
+ // Really 32 - count, but the upper bits of the result are ignored.
+ Neg(scratch, ARMRegister(count, 32));
+ Ror(ARMRegister(dest, 32), ARMRegister(input, 32), scratch);
+}
+
+void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) {
+ Ror(ARMRegister(dest, 32), ARMRegister(input, 32), count.value & 31);
+}
+
+void MacroAssembler::rotateRight(Register count, Register input,
+ Register dest) {
+ Ror(ARMRegister(dest, 32), ARMRegister(input, 32), ARMRegister(count, 32));
+}
+
+void MacroAssembler::rotateLeft64(Register count, Register64 input,
+ Register64 dest, Register temp) {
+ MOZ_ASSERT(temp == Register::Invalid());
+
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ // Really 64 - count, but the upper bits of the result are ignored.
+ Neg(scratch, ARMRegister(count, 64));
+ Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64), scratch);
+}
+
+void MacroAssembler::rotateLeft64(Imm32 count, Register64 input,
+ Register64 dest, Register temp) {
+ MOZ_ASSERT(temp == Register::Invalid());
+
+ Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64),
+ (64 - count.value) & 63);
+}
+
+void MacroAssembler::rotateRight64(Register count, Register64 input,
+ Register64 dest, Register temp) {
+ MOZ_ASSERT(temp == Register::Invalid());
+
+ Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64),
+ ARMRegister(count, 64));
+}
+
+void MacroAssembler::rotateRight64(Imm32 count, Register64 input,
+ Register64 dest, Register temp) {
+ MOZ_ASSERT(temp == Register::Invalid());
+
+ Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64), count.value & 63);
+}
+
+// ===============================================================
+// Bit counting functions
+
+void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) {
+ Clz(ARMRegister(dest, 32), ARMRegister(src, 32));
+}
+
+void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) {
+ Rbit(ARMRegister(dest, 32), ARMRegister(src, 32));
+ Clz(ARMRegister(dest, 32), ARMRegister(dest, 32));
+}
+
+void MacroAssembler::clz64(Register64 src, Register dest) {
+ Clz(ARMRegister(dest, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::ctz64(Register64 src, Register dest) {
+ Rbit(ARMRegister(dest, 64), ARMRegister(src.reg, 64));
+ Clz(ARMRegister(dest, 64), ARMRegister(dest, 64));
+}
+
+void MacroAssembler::popcnt32(Register src_, Register dest_, Register tmp_) {
+ MOZ_ASSERT(tmp_ != Register::Invalid());
+
+ // Equivalent to mozilla::CountPopulation32().
+
+ ARMRegister src(src_, 32);
+ ARMRegister dest(dest_, 32);
+ ARMRegister tmp(tmp_, 32);
+
+ Mov(tmp, src);
+ if (src_ != dest_) {
+ Mov(dest, src);
+ }
+ Lsr(dest, dest, 1);
+ And(dest, dest, 0x55555555);
+ Sub(dest, tmp, dest);
+ Lsr(tmp, dest, 2);
+ And(tmp, tmp, 0x33333333);
+ And(dest, dest, 0x33333333);
+ Add(dest, tmp, dest);
+ Add(dest, dest, Operand(dest, vixl::LSR, 4));
+ And(dest, dest, 0x0F0F0F0F);
+ Add(dest, dest, Operand(dest, vixl::LSL, 8));
+ Add(dest, dest, Operand(dest, vixl::LSL, 16));
+ Lsr(dest, dest, 24);
+}
+
+void MacroAssembler::popcnt64(Register64 src_, Register64 dest_,
+ Register tmp_) {
+ MOZ_ASSERT(tmp_ != Register::Invalid());
+
+ // Equivalent to mozilla::CountPopulation64(), though likely more efficient.
+
+ ARMRegister src(src_.reg, 64);
+ ARMRegister dest(dest_.reg, 64);
+ ARMRegister tmp(tmp_, 64);
+
+ Mov(tmp, src);
+ if (src_ != dest_) {
+ Mov(dest, src);
+ }
+ Lsr(dest, dest, 1);
+ And(dest, dest, 0x5555555555555555);
+ Sub(dest, tmp, dest);
+ Lsr(tmp, dest, 2);
+ And(tmp, tmp, 0x3333333333333333);
+ And(dest, dest, 0x3333333333333333);
+ Add(dest, tmp, dest);
+ Add(dest, dest, Operand(dest, vixl::LSR, 4));
+ And(dest, dest, 0x0F0F0F0F0F0F0F0F);
+ Add(dest, dest, Operand(dest, vixl::LSL, 8));
+ Add(dest, dest, Operand(dest, vixl::LSL, 16));
+ Add(dest, dest, Operand(dest, vixl::LSL, 32));
+ Lsr(dest, dest, 56);
+}
+
+// ===============================================================
+// Branch functions
+
+void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+
+ switch (cond) {
+ case Assembler::Equal:
+ case Assembler::NotEqual:
+ case Assembler::Above:
+ case Assembler::AboveOrEqual:
+ case Assembler::Below:
+ case Assembler::BelowOrEqual:
+ load8ZeroExtend(lhs, scratch);
+ branch32(cond, scratch, Imm32(uint8_t(rhs.value)), label);
+ break;
+
+ case Assembler::GreaterThan:
+ case Assembler::GreaterThanOrEqual:
+ case Assembler::LessThan:
+ case Assembler::LessThanOrEqual:
+ load8SignExtend(lhs, scratch);
+ branch32(cond, scratch, Imm32(int8_t(rhs.value)), label);
+ break;
+
+ default:
+ MOZ_CRASH("unexpected condition");
+ }
+}
+
+void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+
+ switch (cond) {
+ case Assembler::Equal:
+ case Assembler::NotEqual:
+ case Assembler::Above:
+ case Assembler::AboveOrEqual:
+ case Assembler::Below:
+ case Assembler::BelowOrEqual:
+ load8ZeroExtend(lhs, scratch);
+ branch32(cond, scratch, rhs, label);
+ break;
+
+ case Assembler::GreaterThan:
+ case Assembler::GreaterThanOrEqual:
+ case Assembler::LessThan:
+ case Assembler::LessThanOrEqual:
+ load8SignExtend(lhs, scratch);
+ branch32(cond, scratch, rhs, label);
+ break;
+
+ default:
+ MOZ_CRASH("unexpected condition");
+ }
+}
+
+void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+
+ switch (cond) {
+ case Assembler::Equal:
+ case Assembler::NotEqual:
+ case Assembler::Above:
+ case Assembler::AboveOrEqual:
+ case Assembler::Below:
+ case Assembler::BelowOrEqual:
+ load16ZeroExtend(lhs, scratch);
+ branch32(cond, scratch, Imm32(uint16_t(rhs.value)), label);
+ break;
+
+ case Assembler::GreaterThan:
+ case Assembler::GreaterThanOrEqual:
+ case Assembler::LessThan:
+ case Assembler::LessThanOrEqual:
+ load16SignExtend(lhs, scratch);
+ branch32(cond, scratch, Imm32(int16_t(rhs.value)), label);
+ break;
+
+ default:
+ MOZ_CRASH("unexpected condition");
+ }
+}
+
+template <class L>
+void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs,
+ L label) {
+ cmp32(lhs, rhs);
+ B(label, cond);
+}
+
+template <class L>
+void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 imm,
+ L label) {
+ if (imm.value == 0 && cond == Assembler::Equal) {
+ Cbz(ARMRegister(lhs, 32), label);
+ } else if (imm.value == 0 && cond == Assembler::NotEqual) {
+ Cbnz(ARMRegister(lhs, 32), label);
+ } else {
+ cmp32(lhs, imm);
+ B(label, cond);
+ }
+}
+
+void MacroAssembler::branch32(Condition cond, Register lhs, const Address& rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs);
+ MOZ_ASSERT(scratch != rhs.base);
+ load32(rhs, scratch);
+ branch32(cond, lhs, scratch, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ MOZ_ASSERT(scratch != rhs);
+ load32(lhs, scratch);
+ branch32(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 imm,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ load32(lhs, scratch);
+ branch32(cond, scratch, imm, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
+ Register rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ movePtr(ImmPtr(lhs.addr), scratch);
+ branch32(cond, Address(scratch, 0), rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
+ Imm32 rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ load32(lhs, scratch);
+ branch32(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != lhs.base);
+ MOZ_ASSERT(scratch32.asUnsized() != lhs.index);
+ doBaseIndex(scratch32, lhs, vixl::LDR_w);
+ branch32(cond, scratch32.asUnsized(), rhs, label);
+}
+
+void MacroAssembler::branch32(Condition cond, wasm::SymbolicAddress lhs,
+ Imm32 rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ movePtr(lhs, scratch);
+ branch32(cond, Address(scratch, 0), rhs, label);
+}
+
+void MacroAssembler::branch64(Condition cond, Register64 lhs, Imm64 val,
+ Label* success, Label* fail) {
+ if (val.value == 0 && cond == Assembler::Equal) {
+ Cbz(ARMRegister(lhs.reg, 64), success);
+ } else if (val.value == 0 && cond == Assembler::NotEqual) {
+ Cbnz(ARMRegister(lhs.reg, 64), success);
+ } else {
+ Cmp(ARMRegister(lhs.reg, 64), val.value);
+ B(success, cond);
+ }
+ if (fail) {
+ B(fail);
+ }
+}
+
+void MacroAssembler::branch64(Condition cond, Register64 lhs, Register64 rhs,
+ Label* success, Label* fail) {
+ Cmp(ARMRegister(lhs.reg, 64), ARMRegister(rhs.reg, 64));
+ B(success, cond);
+ if (fail) {
+ B(fail);
+ }
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs, Imm64 val,
+ Label* label) {
+ MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+ "other condition codes not supported");
+
+ branchPtr(cond, lhs, ImmWord(val.value), label);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs,
+ Register64 rhs, Label* label) {
+ MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+ "other condition codes not supported");
+
+ branchPtr(cond, lhs, rhs.reg, label);
+}
+
+void MacroAssembler::branch64(Condition cond, const Address& lhs,
+ const Address& rhs, Register scratch,
+ Label* label) {
+ MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal,
+ "other condition codes not supported");
+ MOZ_ASSERT(lhs.base != scratch);
+ MOZ_ASSERT(rhs.base != scratch);
+
+ loadPtr(rhs, scratch);
+ branchPtr(cond, lhs, scratch, label);
+}
+
+template <class L>
+void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs,
+ L label) {
+ Cmp(ARMRegister(lhs, 64), ARMRegister(rhs, 64));
+ B(label, cond);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs,
+ Label* label) {
+ if (rhs.value == 0 && cond == Assembler::Equal) {
+ Cbz(ARMRegister(lhs, 64), label);
+ } else if (rhs.value == 0 && cond == Assembler::NotEqual) {
+ Cbnz(ARMRegister(lhs, 64), label);
+ } else {
+ cmpPtr(lhs, rhs);
+ B(label, cond);
+ }
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs,
+ Label* label) {
+ if (rhs.value == 0 && cond == Assembler::Equal) {
+ Cbz(ARMRegister(lhs, 64), label);
+ } else if (rhs.value == 0 && cond == Assembler::NotEqual) {
+ Cbnz(ARMRegister(lhs, 64), label);
+ } else {
+ cmpPtr(lhs, rhs);
+ B(label, cond);
+ }
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs);
+ movePtr(rhs, scratch);
+ branchPtr(cond, lhs, scratch, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs,
+ Label* label) {
+ if (rhs.value == 0 && cond == Assembler::Equal) {
+ Cbz(ARMRegister(lhs, 64), label);
+ } else if (rhs.value == 0 && cond == Assembler::NotEqual) {
+ Cbnz(ARMRegister(lhs, 64), label);
+ } else {
+ cmpPtr(lhs, rhs);
+ B(label, cond);
+ }
+}
+
+template <class L>
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs,
+ L label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ MOZ_ASSERT(scratch != rhs);
+ loadPtr(lhs, scratch);
+ branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ loadPtr(lhs, scratch);
+ branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch1_64 = temps.AcquireX();
+ const ARMRegister scratch2_64 = temps.AcquireX();
+ MOZ_ASSERT(scratch1_64.asUnsized() != lhs.base);
+ MOZ_ASSERT(scratch2_64.asUnsized() != lhs.base);
+
+ movePtr(rhs, scratch1_64.asUnsized());
+ loadPtr(lhs, scratch2_64.asUnsized());
+ branchPtr(cond, scratch2_64.asUnsized(), scratch1_64.asUnsized(), label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ loadPtr(lhs, scratch);
+ branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
+ Register rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != rhs);
+ loadPtr(lhs, scratch);
+ branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
+ ImmWord rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ loadPtr(lhs, scratch);
+ branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, wasm::SymbolicAddress lhs,
+ Register rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != rhs);
+ loadPtr(lhs, scratch);
+ branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
+ ImmWord rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ MOZ_ASSERT(scratch != lhs.index);
+ loadPtr(lhs, scratch);
+ branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
+ Register rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ MOZ_ASSERT(scratch != lhs.index);
+ loadPtr(lhs, scratch);
+ branchPtr(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchPrivatePtr(Condition cond, const Address& lhs,
+ Register rhs, Label* label) {
+ branchPtr(cond, lhs, rhs, label);
+}
+
+void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs,
+ FloatRegister rhs, Label* label) {
+ compareFloat(cond, lhs, rhs);
+ switch (cond) {
+ case DoubleNotEqual: {
+ Label unordered;
+ // not equal *and* ordered
+ branch(Overflow, &unordered);
+ branch(NotEqual, label);
+ bind(&unordered);
+ break;
+ }
+ case DoubleEqualOrUnordered:
+ branch(Overflow, label);
+ branch(Equal, label);
+ break;
+ default:
+ branch(Condition(cond), label);
+ }
+}
+
+void MacroAssembler::branchTruncateFloat32MaybeModUint32(FloatRegister src,
+ Register dest,
+ Label* fail) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+
+ ARMFPRegister src32(src, 32);
+ ARMRegister dest64(dest, 64);
+
+ MOZ_ASSERT(!scratch64.Is(dest64));
+
+ // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+ // In the case of overflow, the output is saturated.
+ // In the case of NaN and -0, the output is zero.
+ Fcvtzs(dest64, src32);
+
+ // Fail if the result is saturated, i.e. it's either INT64_MIN or INT64_MAX.
+ Add(scratch64, dest64, Operand(0x7fff'ffff'ffff'ffff));
+ Cmn(scratch64, 3);
+ B(fail, Assembler::Above);
+
+ // Clear upper 32 bits.
+ Uxtw(dest64, dest64);
+}
+
+void MacroAssembler::branchTruncateFloat32ToInt32(FloatRegister src,
+ Register dest, Label* fail) {
+ convertFloat32ToInt32(src, dest, fail, false);
+}
+
+void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs,
+ FloatRegister rhs, Label* label) {
+ compareDouble(cond, lhs, rhs);
+ switch (cond) {
+ case DoubleNotEqual: {
+ Label unordered;
+ // not equal *and* ordered
+ branch(Overflow, &unordered);
+ branch(NotEqual, label);
+ bind(&unordered);
+ break;
+ }
+ case DoubleEqualOrUnordered:
+ branch(Overflow, label);
+ branch(Equal, label);
+ break;
+ default:
+ branch(Condition(cond), label);
+ }
+}
+
+void MacroAssembler::branchTruncateDoubleMaybeModUint32(FloatRegister src,
+ Register dest,
+ Label* fail) {
+ // ARMv8.3 chips support the FJCVTZS instruction, which handles exactly this
+ // logic. But the simulator does not implement it, and when the simulator runs
+ // on ARM64 hardware we want to override vixl's detection of it.
+#if defined(JS_SIMULATOR_ARM64) && (defined(__aarch64__) || defined(_M_ARM64))
+ const bool fjscvt = false;
+#else
+ const bool fjscvt = CPUHas(vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
+#endif
+ if (fjscvt) {
+ Fjcvtzs(ARMRegister(dest, 32), ARMFPRegister(src, 64));
+ return;
+ }
+
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+
+ // An out of range integer will be saturated to the destination size.
+ ARMFPRegister src64(src, 64);
+ ARMRegister dest64(dest, 64);
+
+ MOZ_ASSERT(!scratch64.Is(dest64));
+
+ // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+ // In the case of overflow, the output is saturated.
+ // In the case of NaN and -0, the output is zero.
+ Fcvtzs(dest64, src64);
+
+ // Fail if the result is saturated, i.e. it's either INT64_MIN or INT64_MAX.
+ Add(scratch64, dest64, Operand(0x7fff'ffff'ffff'ffff));
+ Cmn(scratch64, 3);
+ B(fail, Assembler::Above);
+
+ // Clear upper 32 bits.
+ Uxtw(dest64, dest64);
+}
+
+void MacroAssembler::branchTruncateDoubleToInt32(FloatRegister src,
+ Register dest, Label* fail) {
+ ARMFPRegister src64(src, 64);
+ ARMRegister dest64(dest, 64);
+ ARMRegister dest32(dest, 32);
+
+ // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+ // In the case of overflow, the output is saturated.
+ // In the case of NaN and -0, the output is zero.
+ Fcvtzs(dest64, src64);
+
+ // Fail on overflow cases.
+ Cmp(dest64, Operand(dest32, vixl::SXTW));
+ B(fail, Assembler::NotEqual);
+
+ // Clear upper 32 bits.
+ Uxtw(dest64, dest64);
+}
+
+template <typename T>
+void MacroAssembler::branchAdd32(Condition cond, T src, Register dest,
+ Label* label) {
+ adds32(src, dest);
+ B(label, cond);
+}
+
+template <typename T>
+void MacroAssembler::branchSub32(Condition cond, T src, Register dest,
+ Label* label) {
+ subs32(src, dest);
+ branch(cond, label);
+}
+
+template <typename T>
+void MacroAssembler::branchMul32(Condition cond, T src, Register dest,
+ Label* label) {
+ MOZ_ASSERT(cond == Assembler::Overflow);
+ vixl::UseScratchRegisterScope temps(this);
+ mul32(src, dest, dest, label);
+}
+
+template <typename T>
+void MacroAssembler::branchRshift32(Condition cond, T src, Register dest,
+ Label* label) {
+ MOZ_ASSERT(cond == Zero || cond == NonZero);
+ rshift32(src, dest);
+ branch32(cond == Zero ? Equal : NotEqual, dest, Imm32(0), label);
+}
+
+void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) {
+ MOZ_ASSERT(cond == Overflow);
+ negs32(reg);
+ B(label, cond);
+}
+
+template <typename T>
+void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest,
+ Label* label) {
+ adds64(src, dest);
+ B(label, cond);
+}
+
+template <typename T>
+void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest,
+ Label* label) {
+ subs64(src, dest);
+ B(label, cond);
+}
+
+void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest,
+ Label* label) {
+ MOZ_ASSERT(cond == Assembler::Overflow);
+
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ const ARMRegister src64(src, 64);
+ const ARMRegister dest64(dest, 64);
+
+ Smulh(scratch64, dest64, src64);
+ Mul(dest64, dest64, src64);
+ Cmp(scratch64, Operand(dest64, vixl::ASR, 63));
+ B(label, NotEqual);
+}
+
+void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs,
+ Label* label) {
+ Subs(ARMRegister(lhs, 64), ARMRegister(lhs, 64), Operand(rhs.value));
+ B(cond, label);
+}
+
+template <class L>
+void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs,
+ L label) {
+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+ cond == NotSigned);
+ // The x86-biased front end prefers |test foo, foo| to |cmp foo, #0|. We look
+ // for the former pattern and expand as Cbz/Cbnz when possible.
+ if (lhs == rhs && cond == Zero) {
+ Cbz(ARMRegister(lhs, 32), label);
+ } else if (lhs == rhs && cond == NonZero) {
+ Cbnz(ARMRegister(lhs, 32), label);
+ } else {
+ test32(lhs, rhs);
+ B(label, cond);
+ }
+}
+
+template <class L>
+void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs,
+ L label) {
+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+ cond == NotSigned);
+ test32(lhs, rhs);
+ B(label, cond);
+}
+
+void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ load32(lhs, scratch);
+ branchTest32(cond, scratch, rhs, label);
+}
+
+void MacroAssembler::branchTest32(Condition cond, const AbsoluteAddress& lhs,
+ Imm32 rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ load32(lhs, scratch);
+ branchTest32(cond, scratch, rhs, label);
+}
+
+template <class L>
+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs,
+ L label) {
+ // See branchTest32.
+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
+ cond == NotSigned);
+ if (lhs == rhs && cond == Zero) {
+ Cbz(ARMRegister(lhs, 64), label);
+ } else if (lhs == rhs && cond == NonZero) {
+ Cbnz(ARMRegister(lhs, 64), label);
+ } else {
+ Tst(ARMRegister(lhs, 64), Operand(ARMRegister(rhs, 64)));
+ B(label, cond);
+ }
+}
+
+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs,
+ Label* label) {
+ Tst(ARMRegister(lhs, 64), Operand(rhs.value));
+ B(label, cond);
+}
+
+void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs,
+ Imm32 rhs, Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ loadPtr(lhs, scratch);
+ branchTestPtr(cond, scratch, rhs, label);
+}
+
+template <class L>
+void MacroAssembler::branchTest64(Condition cond, Register64 lhs,
+ Register64 rhs, Register temp, L label) {
+ branchTestPtr(cond, lhs.reg, rhs.reg, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond, Register tag,
+ Label* label) {
+ branchTestUndefinedImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond, const Address& address,
+ Label* label) {
+ branchTestUndefinedImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond,
+ const BaseIndex& address,
+ Label* label) {
+ branchTestUndefinedImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestUndefined(Condition cond,
+ const ValueOperand& value,
+ Label* label) {
+ branchTestUndefinedImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testUndefined(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, Register tag,
+ Label* label) {
+ branchTestInt32Impl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const Address& address,
+ Label* label) {
+ branchTestInt32Impl(cond, address, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestInt32Impl(cond, address, label);
+}
+
+void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value,
+ Label* label) {
+ branchTestInt32Impl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestInt32Impl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testInt32(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestInt32Truthy(bool truthy,
+ const ValueOperand& value,
+ Label* label) {
+ Condition c = testInt32Truthy(truthy, value);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, Register tag,
+ Label* label) {
+ branchTestDoubleImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const Address& address,
+ Label* label) {
+ branchTestDoubleImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestDoubleImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value,
+ Label* label) {
+ branchTestDoubleImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testDouble(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg,
+ Label* label) {
+ Fcmp(ARMFPRegister(reg, 64), 0.0);
+ if (!truthy) {
+ // falsy values are zero, and NaN.
+ branch(Zero, label);
+ branch(Overflow, label);
+ } else {
+ // truthy values are non-zero and not nan.
+ // If it is overflow
+ Label onFalse;
+ branch(Zero, &onFalse);
+ branch(Overflow, &onFalse);
+ B(label);
+ bind(&onFalse);
+ }
+}
+
+void MacroAssembler::branchTestNumber(Condition cond, Register tag,
+ Label* label) {
+ branchTestNumberImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value,
+ Label* label) {
+ branchTestNumberImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestNumberImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testNumber(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, Register tag,
+ Label* label) {
+ branchTestBooleanImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, const Address& address,
+ Label* label) {
+ branchTestBooleanImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestBooleanImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBoolean(Condition cond,
+ const ValueOperand& value,
+ Label* label) {
+ branchTestBooleanImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestBooleanImpl(Condition cond, const T& tag,
+ Label* label) {
+ Condition c = testBoolean(cond, tag);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestBooleanTruthy(bool truthy,
+ const ValueOperand& value,
+ Label* label) {
+ Condition c = testBooleanTruthy(truthy, value);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestString(Condition cond, Register tag,
+ Label* label) {
+ branchTestStringImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const Address& address,
+ Label* label) {
+ branchTestStringImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestStringImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value,
+ Label* label) {
+ branchTestStringImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestStringImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testString(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestStringTruthy(bool truthy,
+ const ValueOperand& value,
+ Label* label) {
+ Condition c = testStringTruthy(truthy, value);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, Register tag,
+ Label* label) {
+ branchTestSymbolImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const Address& address,
+ Label* label) {
+ branchTestSymbolImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestSymbolImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value,
+ Label* label) {
+ branchTestSymbolImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testSymbol(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, Register tag,
+ Label* label) {
+ branchTestBigIntImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const Address& address,
+ Label* label) {
+ branchTestBigIntImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestBigIntImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value,
+ Label* label) {
+ branchTestBigIntImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestBigIntImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testBigInt(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestBigIntTruthy(bool truthy,
+ const ValueOperand& value,
+ Label* label) {
+ Condition c = testBigIntTruthy(truthy, value);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, Register tag,
+ Label* label) {
+ branchTestNullImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const Address& address,
+ Label* label) {
+ branchTestNullImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestNullImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value,
+ Label* label) {
+ branchTestNullImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestNullImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testNull(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, Register tag,
+ Label* label) {
+ branchTestObjectImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const Address& address,
+ Label* label) {
+ branchTestObjectImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestObjectImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value,
+ Label* label) {
+ branchTestObjectImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestObjectImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testObject(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond, const Address& address,
+ Label* label) {
+ branchTestGCThingImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestGCThingImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestGCThing(Condition cond,
+ const ValueOperand& value,
+ Label* label) {
+ branchTestGCThingImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& src,
+ Label* label) {
+ Condition c = testGCThing(cond, src);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestPrimitive(Condition cond, Register tag,
+ Label* label) {
+ branchTestPrimitiveImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestPrimitive(Condition cond,
+ const ValueOperand& value,
+ Label* label) {
+ branchTestPrimitiveImpl(cond, value, label);
+}
+
+template <typename T>
+void MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t,
+ Label* label) {
+ Condition c = testPrimitive(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, Register tag,
+ Label* label) {
+ branchTestMagicImpl(cond, tag, label);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const Address& address,
+ Label* label) {
+ branchTestMagicImpl(cond, address, label);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address,
+ Label* label) {
+ branchTestMagicImpl(cond, address, label);
+}
+
+template <class L>
+void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value,
+ L label) {
+ branchTestMagicImpl(cond, value, label);
+}
+
+template <typename T, class L>
+void MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label) {
+ Condition c = testMagic(cond, t);
+ B(label, c);
+}
+
+void MacroAssembler::branchTestMagic(Condition cond, const Address& valaddr,
+ JSWhyMagic why, Label* label) {
+ uint64_t magic = MagicValue(why).asRawBits();
+ cmpPtr(valaddr, ImmWord(magic));
+ B(label, cond);
+}
+
+void MacroAssembler::branchTestValue(Condition cond, const BaseIndex& lhs,
+ const ValueOperand& rhs, Label* label) {
+ MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+ branchPtr(cond, lhs, rhs.valueReg(), label);
+}
+
+template <typename T>
+void MacroAssembler::testNumberSet(Condition cond, const T& src,
+ Register dest) {
+ cond = testNumber(cond, src);
+ emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testBooleanSet(Condition cond, const T& src,
+ Register dest) {
+ cond = testBoolean(cond, src);
+ emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testStringSet(Condition cond, const T& src,
+ Register dest) {
+ cond = testString(cond, src);
+ emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testSymbolSet(Condition cond, const T& src,
+ Register dest) {
+ cond = testSymbol(cond, src);
+ emitSet(cond, dest);
+}
+
+template <typename T>
+void MacroAssembler::testBigIntSet(Condition cond, const T& src,
+ Register dest) {
+ cond = testBigInt(cond, src);
+ emitSet(cond, dest);
+}
+
+void MacroAssembler::branchToComputedAddress(const BaseIndex& addr) {
+ vixl::UseScratchRegisterScope temps(&this->asVIXL());
+ const ARMRegister scratch64 = temps.AcquireX();
+ loadPtr(addr, scratch64.asUnsized());
+ Br(scratch64);
+}
+
+void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs,
+ Register src, Register dest) {
+ cmp32(lhs, rhs);
+ Csel(ARMRegister(dest, 32), ARMRegister(src, 32), ARMRegister(dest, 32),
+ cond);
+}
+
+void MacroAssembler::cmp32Move32(Condition cond, Register lhs,
+ const Address& rhs, Register src,
+ Register dest) {
+ MOZ_CRASH("NYI");
+}
+
+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, Register rhs,
+ Register src, Register dest) {
+ cmpPtr(lhs, rhs);
+ Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64),
+ cond);
+}
+
+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs,
+ const Address& rhs, Register src,
+ Register dest) {
+ MOZ_CRASH("NYI");
+}
+
+void MacroAssembler::cmp32Load32(Condition cond, Register lhs,
+ const Address& rhs, const Address& src,
+ Register dest) {
+ MOZ_CRASH("NYI");
+}
+
+void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs,
+ const Address& src, Register dest) {
+ MOZ_CRASH("NYI");
+}
+
+void MacroAssembler::cmp32MovePtr(Condition cond, Register lhs, Imm32 rhs,
+ Register src, Register dest) {
+ cmp32(lhs, rhs);
+ Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64),
+ cond);
+}
+
+void MacroAssembler::cmp32LoadPtr(Condition cond, const Address& lhs, Imm32 rhs,
+ const Address& src, Register dest) {
+ // ARM64 does not support conditional loads, so we use a branch with a CSel
+ // (to prevent Spectre attacks).
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+
+ // Can't use branch32() here, because it may select Cbz/Cbnz which don't
+ // affect condition flags.
+ Label done;
+ cmp32(lhs, rhs);
+ B(&done, Assembler::InvertCondition(cond));
+
+ loadPtr(src, scratch64.asUnsized());
+ Csel(ARMRegister(dest, 64), scratch64, ARMRegister(dest, 64), cond);
+ bind(&done);
+}
+
+void MacroAssembler::test32LoadPtr(Condition cond, const Address& addr,
+ Imm32 mask, const Address& src,
+ Register dest) {
+ MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero);
+
+ // ARM64 does not support conditional loads, so we use a branch with a CSel
+ // (to prevent Spectre attacks).
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ Label done;
+ branchTest32(Assembler::InvertCondition(cond), addr, mask, &done);
+ loadPtr(src, scratch64.asUnsized());
+ Csel(ARMRegister(dest, 64), scratch64, ARMRegister(dest, 64), cond);
+ bind(&done);
+}
+
+void MacroAssembler::test32MovePtr(Condition cond, const Address& addr,
+ Imm32 mask, Register src, Register dest) {
+ MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero);
+ test32(addr, mask);
+ Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64),
+ cond);
+}
+
+void MacroAssembler::spectreMovePtr(Condition cond, Register src,
+ Register dest) {
+ Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64),
+ cond);
+}
+
+void MacroAssembler::spectreZeroRegister(Condition cond, Register,
+ Register dest) {
+ Csel(ARMRegister(dest, 64), ARMRegister(dest, 64), vixl::xzr,
+ Assembler::InvertCondition(cond));
+}
+
+void MacroAssembler::spectreBoundsCheck32(Register index, Register length,
+ Register maybeScratch,
+ Label* failure) {
+ MOZ_ASSERT(length != maybeScratch);
+ MOZ_ASSERT(index != maybeScratch);
+
+ branch32(Assembler::BelowOrEqual, length, index, failure);
+
+ if (JitOptions.spectreIndexMasking) {
+ Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr,
+ Assembler::Above);
+ }
+}
+
+void MacroAssembler::spectreBoundsCheck32(Register index, const Address& length,
+ Register maybeScratch,
+ Label* failure) {
+ MOZ_ASSERT(index != length.base);
+ MOZ_ASSERT(length.base != maybeScratch);
+ MOZ_ASSERT(index != maybeScratch);
+
+ branch32(Assembler::BelowOrEqual, length, index, failure);
+
+ if (JitOptions.spectreIndexMasking) {
+ Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr,
+ Assembler::Above);
+ }
+}
+
+void MacroAssembler::spectreBoundsCheckPtr(Register index, Register length,
+ Register maybeScratch,
+ Label* failure) {
+ MOZ_ASSERT(length != maybeScratch);
+ MOZ_ASSERT(index != maybeScratch);
+
+ branchPtr(Assembler::BelowOrEqual, length, index, failure);
+
+ if (JitOptions.spectreIndexMasking) {
+ Csel(ARMRegister(index, 64), ARMRegister(index, 64), vixl::xzr,
+ Assembler::Above);
+ }
+}
+
+void MacroAssembler::spectreBoundsCheckPtr(Register index,
+ const Address& length,
+ Register maybeScratch,
+ Label* failure) {
+ MOZ_ASSERT(index != length.base);
+ MOZ_ASSERT(length.base != maybeScratch);
+ MOZ_ASSERT(index != maybeScratch);
+
+ branchPtr(Assembler::BelowOrEqual, length, index, failure);
+
+ if (JitOptions.spectreIndexMasking) {
+ Csel(ARMRegister(index, 64), ARMRegister(index, 64), vixl::xzr,
+ Assembler::Above);
+ }
+}
+
+// ========================================================================
+// Memory access primitives.
+void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src,
+ const Address& dest) {
+ Str(ARMFPRegister(src, 64), toMemOperand(dest));
+}
+void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src,
+ const BaseIndex& dest) {
+ doBaseIndex(ARMFPRegister(src, 64), dest, vixl::STR_d);
+}
+
+void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src,
+ const Address& addr) {
+ Str(ARMFPRegister(src, 32), toMemOperand(addr));
+}
+void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src,
+ const BaseIndex& addr) {
+ doBaseIndex(ARMFPRegister(src, 32), addr, vixl::STR_s);
+}
+
+void MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) {
+ // Bug 1715494: Discriminating barriers such as StoreStore are hard to reason
+ // about. Execute the full barrier for everything that requires a barrier.
+ if (barrier) {
+ Dmb(vixl::InnerShareable, vixl::BarrierAll);
+ }
+}
+
+// ===============================================================
+// Clamping functions.
+
+void MacroAssembler::clampIntToUint8(Register reg) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ const ARMRegister reg32(reg, 32);
+ MOZ_ASSERT(!scratch32.Is(reg32));
+
+ Cmp(reg32, Operand(reg32, vixl::UXTB));
+ Csel(reg32, reg32, vixl::wzr, Assembler::GreaterThanOrEqual);
+ Mov(scratch32, Operand(0xff));
+ Csel(reg32, reg32, scratch32, Assembler::LessThanOrEqual);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const ValueOperand& src, Register dest,
+ JSValueType type, Label* fail) {
+ MOZ_ASSERT(type == JSVAL_TYPE_OBJECT || type == JSVAL_TYPE_STRING ||
+ type == JSVAL_TYPE_SYMBOL || type == JSVAL_TYPE_BIGINT);
+ // dest := src XOR mask
+ // fail if dest >> JSVAL_TAG_SHIFT != 0
+ const ARMRegister src64(src.valueReg(), 64);
+ const ARMRegister dest64(dest, 64);
+ Eor(dest64, src64, Operand(JSVAL_TYPE_TO_SHIFTED_TAG(type)));
+ Cmp(vixl::xzr, Operand(dest64, vixl::LSR, JSVAL_TAG_SHIFT));
+ j(Assembler::NotEqual, fail);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const Address& src, Register dest,
+ JSValueType type, Label* fail) {
+ loadValue(src, ValueOperand(dest));
+ fallibleUnboxPtr(ValueOperand(dest), dest, type, fail);
+}
+
+void MacroAssembler::fallibleUnboxPtr(const BaseIndex& src, Register dest,
+ JSValueType type, Label* fail) {
+ loadValue(src, ValueOperand(dest));
+ fallibleUnboxPtr(ValueOperand(dest), dest, type, fail);
+}
+
+//}}} check_macroassembler_style
+
+// Wasm SIMD
+
+static inline ARMFPRegister SimdReg(FloatRegister r) {
+ MOZ_ASSERT(r.isSimd128());
+ return ARMFPRegister(r, 128);
+}
+
+static inline ARMFPRegister Simd16B(FloatRegister r) {
+ return SimdReg(r).V16B();
+}
+
+static inline ARMFPRegister Simd8B(FloatRegister r) { return SimdReg(r).V8B(); }
+
+static inline ARMFPRegister Simd8H(FloatRegister r) { return SimdReg(r).V8H(); }
+
+static inline ARMFPRegister Simd4H(FloatRegister r) { return SimdReg(r).V4H(); }
+
+static inline ARMFPRegister Simd4S(FloatRegister r) { return SimdReg(r).V4S(); }
+
+static inline ARMFPRegister Simd2S(FloatRegister r) { return SimdReg(r).V2S(); }
+
+static inline ARMFPRegister Simd2D(FloatRegister r) { return SimdReg(r).V2D(); }
+
+static inline ARMFPRegister Simd1D(FloatRegister r) { return SimdReg(r).V1D(); }
+
+static inline ARMFPRegister SimdQ(FloatRegister r) { return SimdReg(r).Q(); }
+
+//{{{ check_macroassembler_style
+
+// Moves
+
+void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) {
+ if (src != dest) {
+ Mov(SimdReg(dest), SimdReg(src));
+ }
+}
+
+void MacroAssembler::loadConstantSimd128(const SimdConstant& v,
+ FloatRegister dest) {
+ // Movi does not yet generate good code for many cases, bug 1664397.
+ SimdConstant c = SimdConstant::CreateX2((const int64_t*)v.bytes());
+ Movi(SimdReg(dest), c.asInt64x2()[1], c.asInt64x2()[0]);
+}
+
+// Splat
+
+void MacroAssembler::splatX16(Register src, FloatRegister dest) {
+ Dup(Simd16B(dest), ARMRegister(src, 32));
+}
+
+void MacroAssembler::splatX16(uint32_t srcLane, FloatRegister src,
+ FloatRegister dest) {
+ Dup(Simd16B(dest), Simd16B(src), srcLane);
+}
+
+void MacroAssembler::splatX8(Register src, FloatRegister dest) {
+ Dup(Simd8H(dest), ARMRegister(src, 32));
+}
+
+void MacroAssembler::splatX8(uint32_t srcLane, FloatRegister src,
+ FloatRegister dest) {
+ Dup(Simd8H(dest), Simd8H(src), srcLane);
+}
+
+void MacroAssembler::splatX4(Register src, FloatRegister dest) {
+ Dup(Simd4S(dest), ARMRegister(src, 32));
+}
+
+void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) {
+ Dup(Simd4S(dest), ARMFPRegister(src), 0);
+}
+
+void MacroAssembler::splatX2(Register64 src, FloatRegister dest) {
+ Dup(Simd2D(dest), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::splatX2(FloatRegister src, FloatRegister dest) {
+ Dup(Simd2D(dest), ARMFPRegister(src), 0);
+}
+
+// Extract lane as scalar. Float extraction does not canonicalize the value.
+
+void MacroAssembler::extractLaneInt8x16(uint32_t lane, FloatRegister src,
+ Register dest_) {
+ MOZ_ASSERT(lane < 16);
+ ARMRegister dest(dest_, 32);
+ Umov(dest, Simd4S(src), lane / 4);
+ Sbfx(dest, dest, (lane % 4) * 8, 8);
+}
+
+void MacroAssembler::unsignedExtractLaneInt8x16(uint32_t lane,
+ FloatRegister src,
+ Register dest_) {
+ MOZ_ASSERT(lane < 16);
+ ARMRegister dest(dest_, 32);
+ Umov(dest, Simd4S(src), lane / 4);
+ Ubfx(dest, dest, (lane % 4) * 8, 8);
+}
+
+void MacroAssembler::extractLaneInt16x8(uint32_t lane, FloatRegister src,
+ Register dest_) {
+ MOZ_ASSERT(lane < 8);
+ ARMRegister dest(dest_, 32);
+ Umov(dest, Simd4S(src), lane / 2);
+ Sbfx(dest, dest, (lane % 2) * 16, 16);
+}
+
+void MacroAssembler::unsignedExtractLaneInt16x8(uint32_t lane,
+ FloatRegister src,
+ Register dest_) {
+ MOZ_ASSERT(lane < 8);
+ ARMRegister dest(dest_, 32);
+ Umov(dest, Simd4S(src), lane / 2);
+ Ubfx(dest, dest, (lane % 2) * 16, 16);
+}
+
+void MacroAssembler::extractLaneInt32x4(uint32_t lane, FloatRegister src,
+ Register dest_) {
+ MOZ_ASSERT(lane < 4);
+ ARMRegister dest(dest_, 32);
+ Umov(dest, Simd4S(src), lane);
+}
+
+void MacroAssembler::extractLaneInt64x2(uint32_t lane, FloatRegister src,
+ Register64 dest_) {
+ MOZ_ASSERT(lane < 2);
+ ARMRegister dest(dest_.reg, 64);
+ Umov(dest, Simd2D(src), lane);
+}
+
+void MacroAssembler::extractLaneFloat32x4(uint32_t lane, FloatRegister src,
+ FloatRegister dest) {
+ MOZ_ASSERT(lane < 4);
+ Mov(ARMFPRegister(dest).V4S(), 0, Simd4S(src), lane);
+}
+
+void MacroAssembler::extractLaneFloat64x2(uint32_t lane, FloatRegister src,
+ FloatRegister dest) {
+ MOZ_ASSERT(lane < 2);
+ Mov(ARMFPRegister(dest).V2D(), 0, Simd2D(src), lane);
+}
+
+// Replace lane value
+
+void MacroAssembler::replaceLaneInt8x16(unsigned lane, Register rhs,
+ FloatRegister lhsDest) {
+ MOZ_ASSERT(lane < 16);
+ Mov(Simd16B(lhsDest), lane, ARMRegister(rhs, 32));
+}
+
+void MacroAssembler::replaceLaneInt16x8(unsigned lane, Register rhs,
+ FloatRegister lhsDest) {
+ MOZ_ASSERT(lane < 8);
+ Mov(Simd8H(lhsDest), lane, ARMRegister(rhs, 32));
+}
+
+void MacroAssembler::replaceLaneInt32x4(unsigned lane, Register rhs,
+ FloatRegister lhsDest) {
+ MOZ_ASSERT(lane < 4);
+ Mov(Simd4S(lhsDest), lane, ARMRegister(rhs, 32));
+}
+
+void MacroAssembler::replaceLaneInt64x2(unsigned lane, Register64 rhs,
+ FloatRegister lhsDest) {
+ MOZ_ASSERT(lane < 2);
+ Mov(Simd2D(lhsDest), lane, ARMRegister(rhs.reg, 64));
+}
+
+void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister rhs,
+ FloatRegister lhsDest) {
+ MOZ_ASSERT(lane < 4);
+ Mov(Simd4S(lhsDest), lane, ARMFPRegister(rhs).V4S(), 0);
+}
+
+void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister rhs,
+ FloatRegister lhsDest) {
+ MOZ_ASSERT(lane < 2);
+ Mov(Simd2D(lhsDest), lane, ARMFPRegister(rhs).V2D(), 0);
+}
+
+// Shuffle - blend and permute with immediate indices, and its many
+// specializations. Lane values other than those mentioned are illegal.
+
+// lane values 0..31
+void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister lhs,
+ FloatRegister rhs, FloatRegister dest) {
+ // The general solution generates ho-hum code. Realistic programs will use
+ // patterns that can be specialized, and this will be much better. That will
+ // be handled by bug 1656834, so don't worry about it here.
+
+ // Set scratch to the lanevalue when it selects from lhs or ~lanevalue when it
+ // selects from rhs.
+ ScratchSimd128Scope scratch(*this);
+ int8_t idx[16];
+
+ if (lhs == rhs) {
+ for (unsigned i = 0; i < 16; i++) {
+ idx[i] = lanes[i] < 16 ? lanes[i] : (lanes[i] - 16);
+ }
+ loadConstantSimd128(SimdConstant::CreateX16(idx), scratch);
+ Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(scratch));
+ return;
+ }
+
+ if (rhs != dest) {
+ for (unsigned i = 0; i < 16; i++) {
+ idx[i] = lanes[i] < 16 ? lanes[i] : ~(lanes[i] - 16);
+ }
+ } else {
+ MOZ_ASSERT(lhs != dest);
+ for (unsigned i = 0; i < 16; i++) {
+ idx[i] = lanes[i] < 16 ? ~lanes[i] : (lanes[i] - 16);
+ }
+ std::swap(lhs, rhs);
+ }
+ loadConstantSimd128(SimdConstant::CreateX16(idx), scratch);
+ Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(scratch));
+ Not(Simd16B(scratch), Simd16B(scratch));
+ Tbx(Simd16B(dest), Simd16B(rhs), Simd16B(scratch));
+}
+
+void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister rhs,
+ FloatRegister lhsDest) {
+ shuffleInt8x16(lanes, lhsDest, rhs, lhsDest);
+}
+
+void MacroAssembler::blendInt8x16(const uint8_t lanes[16], FloatRegister lhs,
+ FloatRegister rhs, FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ int8_t lanes_[16];
+
+ if (rhs == dest) {
+ for (unsigned i = 0; i < 16; i++) {
+ lanes_[i] = lanes[i] == 0 ? i : 16 + i;
+ }
+ loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch);
+ Tbx(Simd16B(dest), Simd16B(lhs), Simd16B(scratch));
+ return;
+ }
+
+ moveSimd128(lhs, dest);
+ for (unsigned i = 0; i < 16; i++) {
+ lanes_[i] = lanes[i] != 0 ? i : 16 + i;
+ }
+ loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch);
+ Tbx(Simd16B(dest), Simd16B(rhs), Simd16B(scratch));
+}
+
+void MacroAssembler::blendInt16x8(const uint16_t lanes[8], FloatRegister lhs,
+ FloatRegister rhs, FloatRegister dest) {
+ static_assert(sizeof(const uint16_t /*lanes*/[8]) == sizeof(uint8_t[16]));
+ blendInt8x16(reinterpret_cast<const uint8_t*>(lanes), lhs, rhs, dest);
+}
+
+void MacroAssembler::laneSelectSimd128(FloatRegister mask, FloatRegister lhs,
+ FloatRegister rhs, FloatRegister dest) {
+ MOZ_ASSERT(mask == dest);
+ Bsl(Simd16B(mask), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::interleaveHighInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Zip2(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::interleaveHighInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Zip2(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::interleaveHighInt64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Zip2(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+void MacroAssembler::interleaveHighInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Zip2(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::interleaveLowInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Zip1(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::interleaveLowInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Zip1(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::interleaveLowInt64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Zip1(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+void MacroAssembler::interleaveLowInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Zip1(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::permuteInt8x16(const uint8_t lanes[16], FloatRegister src,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ loadConstantSimd128(SimdConstant::CreateX16((const int8_t*)lanes), scratch);
+ Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch));
+}
+
+void MacroAssembler::permuteInt16x8(const uint16_t lanes[8], FloatRegister src,
+ FloatRegister dest) {
+ MOZ_ASSERT(lanes[0] < 8 && lanes[1] < 8 && lanes[2] < 8 && lanes[3] < 8 &&
+ lanes[4] < 8 && lanes[5] < 8 && lanes[6] < 8 && lanes[7] < 8);
+ const int8_t lanes_[16] = {
+ (int8_t)(lanes[0] << 1), (int8_t)((lanes[0] << 1) + 1),
+ (int8_t)(lanes[1] << 1), (int8_t)((lanes[1] << 1) + 1),
+ (int8_t)(lanes[2] << 1), (int8_t)((lanes[2] << 1) + 1),
+ (int8_t)(lanes[3] << 1), (int8_t)((lanes[3] << 1) + 1),
+ (int8_t)(lanes[4] << 1), (int8_t)((lanes[4] << 1) + 1),
+ (int8_t)(lanes[5] << 1), (int8_t)((lanes[5] << 1) + 1),
+ (int8_t)(lanes[6] << 1), (int8_t)((lanes[6] << 1) + 1),
+ (int8_t)(lanes[7] << 1), (int8_t)((lanes[7] << 1) + 1),
+ };
+ ScratchSimd128Scope scratch(*this);
+ loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch);
+ Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch));
+}
+
+void MacroAssembler::permuteInt32x4(const uint32_t lanes[4], FloatRegister src,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ const int8_t lanes_[16] = {
+ (int8_t)(lanes[0] << 2), (int8_t)((lanes[0] << 2) + 1),
+ (int8_t)((lanes[0] << 2) + 2), (int8_t)((lanes[0] << 2) + 3),
+ (int8_t)(lanes[1] << 2), (int8_t)((lanes[1] << 2) + 1),
+ (int8_t)((lanes[1] << 2) + 2), (int8_t)((lanes[1] << 2) + 3),
+ (int8_t)(lanes[2] << 2), (int8_t)((lanes[2] << 2) + 1),
+ (int8_t)((lanes[2] << 2) + 2), (int8_t)((lanes[2] << 2) + 3),
+ (int8_t)(lanes[3] << 2), (int8_t)((lanes[3] << 2) + 1),
+ (int8_t)((lanes[3] << 2) + 2), (int8_t)((lanes[3] << 2) + 3),
+ };
+ loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch);
+ Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch));
+}
+
+void MacroAssembler::rotateRightSimd128(FloatRegister src, FloatRegister dest,
+ uint32_t shift) {
+ Ext(Simd16B(dest), Simd16B(src), Simd16B(src), shift);
+}
+
+void MacroAssembler::leftShiftSimd128(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ MOZ_ASSERT(count.value < 16);
+ ScratchSimd128Scope scratch(*this);
+ Movi(Simd16B(scratch), 0);
+ Ext(Simd16B(dest), Simd16B(scratch), Simd16B(src), 16 - count.value);
+}
+
+void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ MOZ_ASSERT(count.value < 16);
+ ScratchSimd128Scope scratch(*this);
+ Movi(Simd16B(scratch), 0);
+ Ext(Simd16B(dest), Simd16B(src), Simd16B(scratch), count.value);
+}
+
+void MacroAssembler::concatAndRightShiftSimd128(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest,
+ uint32_t shift) {
+ MOZ_ASSERT(shift < 16);
+ Ext(Simd16B(dest), Simd16B(rhs), Simd16B(lhs), shift);
+}
+
+// Reverse bytes in lanes.
+
+void MacroAssembler::reverseInt16x8(FloatRegister src, FloatRegister dest) {
+ Rev16(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::reverseInt32x4(FloatRegister src, FloatRegister dest) {
+ Rev32(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::reverseInt64x2(FloatRegister src, FloatRegister dest) {
+ Rev64(Simd16B(dest), Simd16B(src));
+}
+
+// Swizzle - permute with variable indices. `rhs` holds the lanes parameter.
+
+void MacroAssembler::swizzleInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::swizzleInt8x16Relaxed(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+// Integer Add
+
+void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Add(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Add(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Add(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::addInt64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Add(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Integer Subtract
+
+void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sub(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::subInt64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sub(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Integer Multiply
+
+void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Mul(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Mul(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest, FloatRegister temp1,
+ FloatRegister temp2) {
+ // As documented at https://chromium-review.googlesource.com/c/v8/v8/+/1781696
+ // lhs = <D C> <B A>
+ // rhs = <H G> <F E>
+ // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low>
+ ScratchSimd128Scope scratch(*this);
+ Rev64(Simd4S(temp2), Simd4S(lhs)); // temp2 = <C D> <A B>
+ Mul(Simd4S(temp2), Simd4S(temp2), Simd4S(rhs)); // temp2 = <CH DG> <AF BE>
+ Xtn(Simd2S(temp1), Simd2D(rhs)); // temp1 = <0 0> <G E>
+ Addp(Simd4S(temp2), Simd4S(temp2), Simd4S(temp2)); // temp2 = <CH+DG AF+BE>..
+ Xtn(Simd2S(scratch), Simd2D(lhs)); // scratch = <0 0> <C A>
+ Shll(Simd2D(dest), Simd2S(temp2), 32); // dest = <(DG+CH)_low 0>
+ // <(BE+AF)_low 0>
+ Umlal(Simd2D(dest), Simd2S(scratch), Simd2S(temp1));
+}
+
+void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
+}
+
+void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest) {
+ Umull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs));
+}
+
+void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest) {
+ Umull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
+}
+
+void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest) {
+ Umull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs));
+}
+
+void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest) {
+ Umull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
+}
+
+void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest) {
+ Umull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs));
+}
+
+void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest) {
+ Umull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::q15MulrInt16x8Relaxed(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+// Integer Negate
+
+void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
+ Neg(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::negInt16x8(FloatRegister src, FloatRegister dest) {
+ Neg(Simd8H(dest), Simd8H(src));
+}
+
+void MacroAssembler::negInt32x4(FloatRegister src, FloatRegister dest) {
+ Neg(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::negInt64x2(FloatRegister src, FloatRegister dest) {
+ Neg(Simd2D(dest), Simd2D(src));
+}
+
+// Saturating integer add
+
+void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sqadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Uqadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sqadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Uqadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+// Saturating integer subtract
+
+void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sqsub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Uqsub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Sqsub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Uqsub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+// Lane-wise integer minimum
+
+void MacroAssembler::minInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smin(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Umin(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::minInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smin(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Umin(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::minInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Umin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+// Lane-wise integer maximum
+
+void MacroAssembler::maxInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smax(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Umax(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::maxInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smax(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Umax(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::maxInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Smax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Umax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+// Lane-wise integer rounding average
+
+void MacroAssembler::unsignedAverageInt8x16(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest) {
+ Urhadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::unsignedAverageInt16x8(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest) {
+ Urhadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+// Lane-wise integer absolute value
+
+void MacroAssembler::absInt8x16(FloatRegister src, FloatRegister dest) {
+ Abs(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::absInt16x8(FloatRegister src, FloatRegister dest) {
+ Abs(Simd8H(dest), Simd8H(src));
+}
+
+void MacroAssembler::absInt32x4(FloatRegister src, FloatRegister dest) {
+ Abs(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) {
+ Abs(Simd2D(dest), Simd2D(src));
+}
+
+// Left shift by variable scalar
+
+void MacroAssembler::leftShiftInt8x16(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope vscratch(*this);
+ Dup(Simd16B(vscratch), ARMRegister(rhs, 32));
+ Sshl(Simd16B(dest), Simd16B(lhs), Simd16B(vscratch));
+}
+
+void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Shl(Simd16B(dest), Simd16B(src), count.value);
+}
+
+void MacroAssembler::leftShiftInt16x8(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope vscratch(*this);
+ Dup(Simd8H(vscratch), ARMRegister(rhs, 32));
+ Sshl(Simd8H(dest), Simd8H(lhs), Simd8H(vscratch));
+}
+
+void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Shl(Simd8H(dest), Simd8H(src), count.value);
+}
+
+void MacroAssembler::leftShiftInt32x4(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope vscratch(*this);
+ Dup(Simd4S(vscratch), ARMRegister(rhs, 32));
+ Sshl(Simd4S(dest), Simd4S(lhs), Simd4S(vscratch));
+}
+
+void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Shl(Simd4S(dest), Simd4S(src), count.value);
+}
+
+void MacroAssembler::leftShiftInt64x2(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope vscratch(*this);
+ Dup(Simd2D(vscratch), ARMRegister(rhs, 64));
+ Sshl(Simd2D(dest), Simd2D(lhs), Simd2D(vscratch));
+}
+
+void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Shl(Simd2D(dest), Simd2D(src), count.value);
+}
+
+// Right shift by variable scalar
+
+void MacroAssembler::rightShiftInt8x16(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ MacroAssemblerCompat::rightShiftInt8x16(lhs, rhs, dest,
+ /* isUnsigned */ false);
+}
+
+void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Sshr(Simd16B(dest), Simd16B(src), count.value);
+}
+
+void MacroAssembler::unsignedRightShiftInt8x16(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ MacroAssemblerCompat::rightShiftInt8x16(lhs, rhs, dest,
+ /* isUnsigned */ true);
+}
+
+void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Ushr(Simd16B(dest), Simd16B(src), count.value);
+}
+
+void MacroAssembler::rightShiftInt16x8(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ MacroAssemblerCompat::rightShiftInt16x8(lhs, rhs, dest,
+ /* isUnsigned */ false);
+}
+
+void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Sshr(Simd8H(dest), Simd8H(src), count.value);
+}
+
+void MacroAssembler::unsignedRightShiftInt16x8(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ MacroAssemblerCompat::rightShiftInt16x8(lhs, rhs, dest,
+ /* isUnsigned */ true);
+}
+
+void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Ushr(Simd8H(dest), Simd8H(src), count.value);
+}
+
+void MacroAssembler::rightShiftInt32x4(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ MacroAssemblerCompat::rightShiftInt32x4(lhs, rhs, dest,
+ /* isUnsigned */ false);
+}
+
+void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Sshr(Simd4S(dest), Simd4S(src), count.value);
+}
+
+void MacroAssembler::unsignedRightShiftInt32x4(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ MacroAssemblerCompat::rightShiftInt32x4(lhs, rhs, dest,
+ /* isUnsigned */ true);
+}
+
+void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Ushr(Simd4S(dest), Simd4S(src), count.value);
+}
+
+void MacroAssembler::rightShiftInt64x2(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ MacroAssemblerCompat::rightShiftInt64x2(lhs, rhs, dest,
+ /* isUnsigned */ false);
+}
+
+void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Sshr(Simd2D(dest), Simd2D(src), count.value);
+}
+
+void MacroAssembler::unsignedRightShiftInt64x2(FloatRegister lhs, Register rhs,
+ FloatRegister dest) {
+ MacroAssemblerCompat::rightShiftInt64x2(lhs, rhs, dest,
+ /* isUnsigned */ true);
+}
+
+void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src,
+ FloatRegister dest) {
+ Ushr(Simd2D(dest), Simd2D(src), count.value);
+}
+
+// Bitwise and, or, xor, not
+
+void MacroAssembler::bitwiseAndSimd128(FloatRegister rhs,
+ FloatRegister lhsDest) {
+ And(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ And(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseOrSimd128(FloatRegister rhs,
+ FloatRegister lhsDest) {
+ Orr(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Orr(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseXorSimd128(FloatRegister rhs,
+ FloatRegister lhsDest) {
+ Eor(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Eor(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) {
+ Not(Simd16B(dest), Simd16B(src));
+}
+
+void MacroAssembler::bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Bic(Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+// Bitwise AND with complement: dest = ~lhs & rhs, note this is not what Wasm
+// wants but what the x86 hardware offers. Hence the name. Since arm64 has
+// dest = lhs & ~rhs we just swap operands.
+
+void MacroAssembler::bitwiseNotAndSimd128(FloatRegister rhs,
+ FloatRegister lhsDest) {
+ Bic(Simd16B(lhsDest), Simd16B(rhs), Simd16B(lhsDest));
+}
+
+// Bitwise select
+
+void MacroAssembler::bitwiseSelectSimd128(FloatRegister onTrue,
+ FloatRegister onFalse,
+ FloatRegister maskDest) {
+ Bsl(Simd16B(maskDest), Simd16B(onTrue), Simd16B(onFalse));
+}
+
+// Population count
+
+void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest) {
+ Cnt(Simd16B(dest), Simd16B(src));
+}
+
+// Any lane true, ie, any bit set
+
+void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest_) {
+ ScratchSimd128Scope scratch_(*this);
+ ARMFPRegister scratch(Simd1D(scratch_));
+ ARMRegister dest(dest_, 64);
+ Addp(scratch, Simd2D(src));
+ Umov(dest, scratch, 0);
+ Cmp(dest, Operand(0));
+ Cset(dest, Assembler::NonZero);
+}
+
+// All lanes true
+
+void MacroAssembler::allTrueInt8x16(FloatRegister src, Register dest_) {
+ ScratchSimd128Scope scratch(*this);
+ ARMRegister dest(dest_, 64);
+ Cmeq(Simd16B(scratch), Simd16B(src), 0);
+ Addp(Simd1D(scratch), Simd2D(scratch));
+ Umov(dest, Simd1D(scratch), 0);
+ Cmp(dest, Operand(0));
+ Cset(dest, Assembler::Zero);
+}
+
+void MacroAssembler::allTrueInt16x8(FloatRegister src, Register dest_) {
+ ScratchSimd128Scope scratch(*this);
+ ARMRegister dest(dest_, 64);
+ Cmeq(Simd8H(scratch), Simd8H(src), 0);
+ Addp(Simd1D(scratch), Simd2D(scratch));
+ Umov(dest, Simd1D(scratch), 0);
+ Cmp(dest, Operand(0));
+ Cset(dest, Assembler::Zero);
+}
+
+void MacroAssembler::allTrueInt32x4(FloatRegister src, Register dest_) {
+ ScratchSimd128Scope scratch(*this);
+ ARMRegister dest(dest_, 64);
+ Cmeq(Simd4S(scratch), Simd4S(src), 0);
+ Addp(Simd1D(scratch), Simd2D(scratch));
+ Umov(dest, Simd1D(scratch), 0);
+ Cmp(dest, Operand(0));
+ Cset(dest, Assembler::Zero);
+}
+
+void MacroAssembler::allTrueInt64x2(FloatRegister src, Register dest_) {
+ ScratchSimd128Scope scratch(*this);
+ ARMRegister dest(dest_, 64);
+ Cmeq(Simd2D(scratch), Simd2D(src), 0);
+ Addp(Simd1D(scratch), Simd2D(scratch));
+ Umov(dest, Simd1D(scratch), 0);
+ Cmp(dest, Operand(0));
+ Cset(dest, Assembler::Zero);
+}
+
+// Bitmask, ie extract and compress high bits of all lanes
+//
+// There's no direct support for this on the chip. These implementations come
+// from the writeup that added the instruction to the SIMD instruction set.
+// Generally, shifting and masking is used to isolate the sign bit of each
+// element in the right position, then a horizontal add creates the result. For
+// 8-bit elements an intermediate step is needed to assemble the bits of the
+// upper and lower 8 bytes into 8 halfwords.
+
+void MacroAssembler::bitmaskInt8x16(FloatRegister src, Register dest,
+ FloatRegister temp) {
+ ScratchSimd128Scope scratch(*this);
+ int8_t values[] = {1, 2, 4, 8, 16, 32, 64, -128,
+ 1, 2, 4, 8, 16, 32, 64, -128};
+ loadConstantSimd128(SimdConstant::CreateX16(values), temp);
+ Sshr(Simd16B(scratch), Simd16B(src), 7);
+ And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp));
+ Ext(Simd16B(temp), Simd16B(scratch), Simd16B(scratch), 8);
+ Zip1(Simd16B(temp), Simd16B(scratch), Simd16B(temp));
+ Addv(ARMFPRegister(temp, 16), Simd8H(temp));
+ Mov(ARMRegister(dest, 32), Simd8H(temp), 0);
+}
+
+void MacroAssembler::bitmaskInt16x8(FloatRegister src, Register dest,
+ FloatRegister temp) {
+ ScratchSimd128Scope scratch(*this);
+ int16_t values[] = {1, 2, 4, 8, 16, 32, 64, 128};
+ loadConstantSimd128(SimdConstant::CreateX8(values), temp);
+ Sshr(Simd8H(scratch), Simd8H(src), 15);
+ And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp));
+ Addv(ARMFPRegister(scratch, 16), Simd8H(scratch));
+ Mov(ARMRegister(dest, 32), Simd8H(scratch), 0);
+}
+
+void MacroAssembler::bitmaskInt32x4(FloatRegister src, Register dest,
+ FloatRegister temp) {
+ ScratchSimd128Scope scratch(*this);
+ int32_t values[] = {1, 2, 4, 8};
+ loadConstantSimd128(SimdConstant::CreateX4(values), temp);
+ Sshr(Simd4S(scratch), Simd4S(src), 31);
+ And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp));
+ Addv(ARMFPRegister(scratch, 32), Simd4S(scratch));
+ Mov(ARMRegister(dest, 32), Simd4S(scratch), 0);
+}
+
+void MacroAssembler::bitmaskInt64x2(FloatRegister src, Register dest,
+ FloatRegister temp) {
+ Sqxtn(Simd2S(temp), Simd2D(src));
+ Ushr(Simd2S(temp), Simd2S(temp), 31);
+ Usra(ARMFPRegister(temp, 64), ARMFPRegister(temp, 64), 31);
+ Fmov(ARMRegister(dest, 32), ARMFPRegister(temp, 32));
+}
+
+// Comparisons (integer and floating-point)
+
+void MacroAssembler::compareInt8x16(Assembler::Condition cond,
+ FloatRegister rhs, FloatRegister lhsDest) {
+ compareSimd128Int(cond, Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs));
+}
+
+void MacroAssembler::compareInt8x16(Assembler::Condition cond,
+ FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ compareSimd128Int(cond, Simd16B(dest), Simd16B(lhs), Simd16B(rhs));
+}
+
+void MacroAssembler::compareInt16x8(Assembler::Condition cond,
+ FloatRegister rhs, FloatRegister lhsDest) {
+ compareSimd128Int(cond, Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
+}
+
+void MacroAssembler::compareInt16x8(Assembler::Condition cond,
+ FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ compareSimd128Int(cond, Simd8H(dest), Simd8H(lhs), Simd8H(rhs));
+}
+
+void MacroAssembler::compareInt32x4(Assembler::Condition cond,
+ FloatRegister rhs, FloatRegister lhsDest) {
+ compareSimd128Int(cond, Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
+}
+
+void MacroAssembler::compareInt32x4(Assembler::Condition cond,
+ FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ compareSimd128Int(cond, Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::compareInt64x2(Assembler::Condition cond,
+ FloatRegister rhs, FloatRegister lhsDest) {
+ compareSimd128Int(cond, Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs));
+}
+
+void MacroAssembler::compareInt64x2(Assembler::Condition cond,
+ FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ compareSimd128Int(cond, Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
+ FloatRegister rhs,
+ FloatRegister lhsDest) {
+ compareSimd128Float(cond, Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
+}
+
+void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
+ FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ compareSimd128Float(cond, Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
+ FloatRegister rhs,
+ FloatRegister lhsDest) {
+ compareSimd128Float(cond, Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs));
+}
+
+void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
+ FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ compareSimd128Float(cond, Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Load
+
+void MacroAssembler::loadUnalignedSimd128(const Address& src,
+ FloatRegister dest) {
+ Ldr(ARMFPRegister(dest, 128), toMemOperand(src));
+}
+
+void MacroAssembler::loadUnalignedSimd128(const BaseIndex& address,
+ FloatRegister dest) {
+ doBaseIndex(ARMFPRegister(dest, 128), address, vixl::LDR_q);
+}
+
+// Store
+
+void MacroAssembler::storeUnalignedSimd128(FloatRegister src,
+ const Address& dest) {
+ Str(ARMFPRegister(src, 128), toMemOperand(dest));
+}
+
+void MacroAssembler::storeUnalignedSimd128(FloatRegister src,
+ const BaseIndex& dest) {
+ doBaseIndex(ARMFPRegister(src, 128), dest, vixl::STR_q);
+}
+
+// Floating point negation
+
+void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) {
+ Fneg(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::negFloat64x2(FloatRegister src, FloatRegister dest) {
+ Fneg(Simd2D(dest), Simd2D(src));
+}
+
+// Floating point absolute value
+
+void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) {
+ Fabs(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::absFloat64x2(FloatRegister src, FloatRegister dest) {
+ Fabs(Simd2D(dest), Simd2D(src));
+}
+
+// NaN-propagating minimum
+
+void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::minFloat32x4(FloatRegister rhs, FloatRegister lhsDest) {
+ Fmin(Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
+}
+
+void MacroAssembler::minFloat64x2(FloatRegister rhs, FloatRegister lhsDest) {
+ Fmin(Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs));
+}
+
+void MacroAssembler::minFloat64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmin(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// NaN-propagating maximum
+
+void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::maxFloat32x4(FloatRegister rhs, FloatRegister lhsDest) {
+ Fmax(Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
+}
+
+void MacroAssembler::maxFloat64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmax(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+void MacroAssembler::maxFloat64x2(FloatRegister rhs, FloatRegister lhsDest) {
+ Fmax(Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs));
+}
+
+// Floating add
+
+void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fadd(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::addFloat64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fadd(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Floating subtract
+
+void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fsub(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::subFloat64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fsub(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Floating division
+
+void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fdiv(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::divFloat64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fdiv(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Floating Multiply
+
+void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmul(Simd4S(dest), Simd4S(lhs), Simd4S(rhs));
+}
+
+void MacroAssembler::mulFloat64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmul(Simd2D(dest), Simd2D(lhs), Simd2D(rhs));
+}
+
+// Pairwise add
+
+void MacroAssembler::extAddPairwiseInt8x16(FloatRegister src,
+ FloatRegister dest) {
+ Saddlp(Simd8H(dest), Simd16B(src));
+}
+
+void MacroAssembler::unsignedExtAddPairwiseInt8x16(FloatRegister src,
+ FloatRegister dest) {
+ Uaddlp(Simd8H(dest), Simd16B(src));
+}
+
+void MacroAssembler::extAddPairwiseInt16x8(FloatRegister src,
+ FloatRegister dest) {
+ Saddlp(Simd4S(dest), Simd8H(src));
+}
+
+void MacroAssembler::unsignedExtAddPairwiseInt16x8(FloatRegister src,
+ FloatRegister dest) {
+ Uaddlp(Simd4S(dest), Simd8H(src));
+}
+
+// Floating square root
+
+void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) {
+ Fsqrt(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::sqrtFloat64x2(FloatRegister src, FloatRegister dest) {
+ Fsqrt(Simd2D(dest), Simd2D(src));
+}
+
+// Integer to floating point with rounding
+
+void MacroAssembler::convertInt32x4ToFloat32x4(FloatRegister src,
+ FloatRegister dest) {
+ Scvtf(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::unsignedConvertInt32x4ToFloat32x4(FloatRegister src,
+ FloatRegister dest) {
+ Ucvtf(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::convertInt32x4ToFloat64x2(FloatRegister src,
+ FloatRegister dest) {
+ Sshll(Simd2D(dest), Simd2S(src), 0);
+ Scvtf(Simd2D(dest), Simd2D(dest));
+}
+
+void MacroAssembler::unsignedConvertInt32x4ToFloat64x2(FloatRegister src,
+ FloatRegister dest) {
+ Ushll(Simd2D(dest), Simd2S(src), 0);
+ Ucvtf(Simd2D(dest), Simd2D(dest));
+}
+
+// Floating point to integer with saturation
+
+void MacroAssembler::truncSatFloat32x4ToInt32x4(FloatRegister src,
+ FloatRegister dest) {
+ Fcvtzs(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src,
+ FloatRegister dest) {
+ Fcvtzu(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::truncSatFloat64x2ToInt32x4(FloatRegister src,
+ FloatRegister dest,
+ FloatRegister temp) {
+ Fcvtzs(Simd2D(dest), Simd2D(src));
+ Sqxtn(Simd2S(dest), Simd2D(dest));
+}
+
+void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src,
+ FloatRegister dest,
+ FloatRegister temp) {
+ Fcvtzu(Simd2D(dest), Simd2D(src));
+ Uqxtn(Simd2S(dest), Simd2D(dest));
+}
+
+void MacroAssembler::truncFloat32x4ToInt32x4Relaxed(FloatRegister src,
+ FloatRegister dest) {
+ Fcvtzs(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::unsignedTruncFloat32x4ToInt32x4Relaxed(
+ FloatRegister src, FloatRegister dest) {
+ Fcvtzu(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::truncFloat64x2ToInt32x4Relaxed(FloatRegister src,
+ FloatRegister dest) {
+ Fcvtzs(Simd2D(dest), Simd2D(src));
+ Sqxtn(Simd2S(dest), Simd2D(dest));
+}
+
+void MacroAssembler::unsignedTruncFloat64x2ToInt32x4Relaxed(
+ FloatRegister src, FloatRegister dest) {
+ Fcvtzu(Simd2D(dest), Simd2D(src));
+ Uqxtn(Simd2S(dest), Simd2D(dest));
+}
+
+// Floating point narrowing
+
+void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src,
+ FloatRegister dest) {
+ Fcvtn(Simd2S(dest), Simd2D(src));
+}
+
+// Floating point widening
+
+void MacroAssembler::convertFloat32x4ToFloat64x2(FloatRegister src,
+ FloatRegister dest) {
+ Fcvtl(Simd2D(dest), Simd2S(src));
+}
+
+// Integer to integer narrowing
+
+void MacroAssembler::narrowInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ if (rhs == dest) {
+ Mov(scratch, SimdReg(rhs));
+ rhs = scratch;
+ }
+ Sqxtn(Simd8B(dest), Simd8H(lhs));
+ Sqxtn2(Simd16B(dest), Simd8H(rhs));
+}
+
+void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ if (rhs == dest) {
+ Mov(scratch, SimdReg(rhs));
+ rhs = scratch;
+ }
+ Sqxtun(Simd8B(dest), Simd8H(lhs));
+ Sqxtun2(Simd16B(dest), Simd8H(rhs));
+}
+
+void MacroAssembler::narrowInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ if (rhs == dest) {
+ Mov(scratch, SimdReg(rhs));
+ rhs = scratch;
+ }
+ Sqxtn(Simd4H(dest), Simd4S(lhs));
+ Sqxtn2(Simd8H(dest), Simd4S(rhs));
+}
+
+void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ if (rhs == dest) {
+ Mov(scratch, SimdReg(rhs));
+ rhs = scratch;
+ }
+ Sqxtun(Simd4H(dest), Simd4S(lhs));
+ Sqxtun2(Simd8H(dest), Simd4S(rhs));
+}
+
+// Integer to integer widening
+
+void MacroAssembler::widenLowInt8x16(FloatRegister src, FloatRegister dest) {
+ Sshll(Simd8H(dest), Simd8B(src), 0);
+}
+
+void MacroAssembler::widenHighInt8x16(FloatRegister src, FloatRegister dest) {
+ Sshll2(Simd8H(dest), Simd16B(src), 0);
+}
+
+void MacroAssembler::unsignedWidenLowInt8x16(FloatRegister src,
+ FloatRegister dest) {
+ Ushll(Simd8H(dest), Simd8B(src), 0);
+}
+
+void MacroAssembler::unsignedWidenHighInt8x16(FloatRegister src,
+ FloatRegister dest) {
+ Ushll2(Simd8H(dest), Simd16B(src), 0);
+}
+
+void MacroAssembler::widenLowInt16x8(FloatRegister src, FloatRegister dest) {
+ Sshll(Simd4S(dest), Simd4H(src), 0);
+}
+
+void MacroAssembler::widenHighInt16x8(FloatRegister src, FloatRegister dest) {
+ Sshll2(Simd4S(dest), Simd8H(src), 0);
+}
+
+void MacroAssembler::unsignedWidenLowInt16x8(FloatRegister src,
+ FloatRegister dest) {
+ Ushll(Simd4S(dest), Simd4H(src), 0);
+}
+
+void MacroAssembler::unsignedWidenHighInt16x8(FloatRegister src,
+ FloatRegister dest) {
+ Ushll2(Simd4S(dest), Simd8H(src), 0);
+}
+
+void MacroAssembler::widenLowInt32x4(FloatRegister src, FloatRegister dest) {
+ Sshll(Simd2D(dest), Simd2S(src), 0);
+}
+
+void MacroAssembler::unsignedWidenLowInt32x4(FloatRegister src,
+ FloatRegister dest) {
+ Ushll(Simd2D(dest), Simd2S(src), 0);
+}
+
+void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) {
+ Sshll2(Simd2D(dest), Simd4S(src), 0);
+}
+
+void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src,
+ FloatRegister dest) {
+ Ushll2(Simd2D(dest), Simd4S(src), 0);
+}
+
+// Compare-based minimum/maximum (experimental as of August, 2020)
+// https://github.com/WebAssembly/simd/pull/122
+
+void MacroAssembler::pseudoMinFloat32x4(FloatRegister rhsOrRhsDest,
+ FloatRegister lhsOrLhsDest) {
+ // Shut up the linter by using the same names as in the declaration, then
+ // aliasing here.
+ FloatRegister rhs = rhsOrRhsDest;
+ FloatRegister lhsDest = lhsOrLhsDest;
+ ScratchSimd128Scope scratch(*this);
+ Fcmgt(Simd4S(scratch), Simd4S(lhsDest), Simd4S(rhs));
+ Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest));
+ Mov(SimdReg(lhsDest), scratch);
+}
+
+void MacroAssembler::pseudoMinFloat32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ Fcmgt(Simd4S(scratch), Simd4S(lhs), Simd4S(rhs));
+ Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs));
+ Mov(SimdReg(dest), scratch);
+}
+
+void MacroAssembler::pseudoMinFloat64x2(FloatRegister rhsOrRhsDest,
+ FloatRegister lhsOrLhsDest) {
+ FloatRegister rhs = rhsOrRhsDest;
+ FloatRegister lhsDest = lhsOrLhsDest;
+ ScratchSimd128Scope scratch(*this);
+ Fcmgt(Simd2D(scratch), Simd2D(lhsDest), Simd2D(rhs));
+ Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest));
+ Mov(SimdReg(lhsDest), scratch);
+}
+
+void MacroAssembler::pseudoMinFloat64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ Fcmgt(Simd2D(scratch), Simd2D(lhs), Simd2D(rhs));
+ Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs));
+ Mov(SimdReg(dest), scratch);
+}
+
+void MacroAssembler::pseudoMaxFloat32x4(FloatRegister rhsOrRhsDest,
+ FloatRegister lhsOrLhsDest) {
+ FloatRegister rhs = rhsOrRhsDest;
+ FloatRegister lhsDest = lhsOrLhsDest;
+ ScratchSimd128Scope scratch(*this);
+ Fcmgt(Simd4S(scratch), Simd4S(rhs), Simd4S(lhsDest));
+ Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest));
+ Mov(SimdReg(lhsDest), scratch);
+}
+
+void MacroAssembler::pseudoMaxFloat32x4(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ Fcmgt(Simd4S(scratch), Simd4S(rhs), Simd4S(lhs));
+ Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs));
+ Mov(SimdReg(dest), scratch);
+}
+
+void MacroAssembler::pseudoMaxFloat64x2(FloatRegister rhsOrRhsDest,
+ FloatRegister lhsOrLhsDest) {
+ FloatRegister rhs = rhsOrRhsDest;
+ FloatRegister lhsDest = lhsOrLhsDest;
+ ScratchSimd128Scope scratch(*this);
+ Fcmgt(Simd2D(scratch), Simd2D(rhs), Simd2D(lhsDest));
+ Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest));
+ Mov(SimdReg(lhsDest), scratch);
+}
+
+void MacroAssembler::pseudoMaxFloat64x2(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ Fcmgt(Simd2D(scratch), Simd2D(rhs), Simd2D(lhs));
+ Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs));
+ Mov(SimdReg(dest), scratch);
+}
+
+// Widening/pairwise integer dot product (experimental as of August, 2020)
+// https://github.com/WebAssembly/simd/pull/127
+
+void MacroAssembler::widenDotInt16x8(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ Smull(Simd4S(scratch), Simd4H(lhs), Simd4H(rhs));
+ Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs));
+ Addp(Simd4S(dest), Simd4S(scratch), Simd4S(dest));
+}
+
+void MacroAssembler::dotInt8x16Int7x16(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ ScratchSimd128Scope scratch(*this);
+ Smull(Simd8H(scratch), Simd8B(lhs), Simd8B(rhs));
+ Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs));
+ Addp(Simd8H(dest), Simd8H(scratch), Simd8H(dest));
+}
+
+void MacroAssembler::dotInt8x16Int7x16ThenAdd(FloatRegister lhs,
+ FloatRegister rhs,
+ FloatRegister dest,
+ FloatRegister temp) {
+ ScratchSimd128Scope scratch(*this);
+ Smull(Simd8H(scratch), Simd8B(lhs), Simd8B(rhs));
+ Smull2(Simd8H(temp), Simd16B(lhs), Simd16B(rhs));
+ Addp(Simd8H(temp), Simd8H(scratch), Simd8H(temp));
+ Sadalp(Simd4S(dest), Simd8H(temp));
+}
+
+// Floating point rounding (experimental as of August, 2020)
+// https://github.com/WebAssembly/simd/pull/232
+
+void MacroAssembler::ceilFloat32x4(FloatRegister src, FloatRegister dest) {
+ Frintp(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::ceilFloat64x2(FloatRegister src, FloatRegister dest) {
+ Frintp(Simd2D(dest), Simd2D(src));
+}
+
+void MacroAssembler::floorFloat32x4(FloatRegister src, FloatRegister dest) {
+ Frintm(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::floorFloat64x2(FloatRegister src, FloatRegister dest) {
+ Frintm(Simd2D(dest), Simd2D(src));
+}
+
+void MacroAssembler::truncFloat32x4(FloatRegister src, FloatRegister dest) {
+ Frintz(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::truncFloat64x2(FloatRegister src, FloatRegister dest) {
+ Frintz(Simd2D(dest), Simd2D(src));
+}
+
+void MacroAssembler::nearestFloat32x4(FloatRegister src, FloatRegister dest) {
+ Frintn(Simd4S(dest), Simd4S(src));
+}
+
+void MacroAssembler::nearestFloat64x2(FloatRegister src, FloatRegister dest) {
+ Frintn(Simd2D(dest), Simd2D(src));
+}
+
+// Floating multiply-accumulate: srcDest [+-]= src1 * src2
+
+void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2,
+ FloatRegister srcDest) {
+ Fmla(Simd4S(srcDest), Simd4S(src1), Simd4S(src2));
+}
+
+void MacroAssembler::fnmaFloat32x4(FloatRegister src1, FloatRegister src2,
+ FloatRegister srcDest) {
+ Fmls(Simd4S(srcDest), Simd4S(src1), Simd4S(src2));
+}
+
+void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2,
+ FloatRegister srcDest) {
+ Fmla(Simd2D(srcDest), Simd2D(src1), Simd2D(src2));
+}
+
+void MacroAssembler::fnmaFloat64x2(FloatRegister src1, FloatRegister src2,
+ FloatRegister srcDest) {
+ Fmls(Simd2D(srcDest), Simd2D(src1), Simd2D(src2));
+}
+
+void MacroAssembler::minFloat32x4Relaxed(FloatRegister src,
+ FloatRegister srcDest) {
+ Fmin(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest));
+}
+
+void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmin(Simd4S(dest), Simd4S(rhs), Simd4S(lhs));
+}
+
+void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src,
+ FloatRegister srcDest) {
+ Fmax(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest));
+}
+
+void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmax(Simd4S(dest), Simd4S(rhs), Simd4S(lhs));
+}
+
+void MacroAssembler::minFloat64x2Relaxed(FloatRegister src,
+ FloatRegister srcDest) {
+ Fmin(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest));
+}
+
+void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmin(Simd2D(dest), Simd2D(rhs), Simd2D(lhs));
+}
+
+void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src,
+ FloatRegister srcDest) {
+ Fmax(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest));
+}
+
+void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister dest) {
+ Fmax(Simd2D(dest), Simd2D(rhs), Simd2D(lhs));
+}
+
+//}}} check_macroassembler_style
+// ===============================================================
+
+void MacroAssemblerCompat::addToStackPtr(Register src) {
+ Add(GetStackPointer64(), GetStackPointer64(), ARMRegister(src, 64));
+ // Given that required invariant SP <= PSP, this is probably pointless,
+ // since it gives PSP a larger value.
+ syncStackPtr();
+}
+
+void MacroAssemblerCompat::addToStackPtr(Imm32 imm) {
+ Add(GetStackPointer64(), GetStackPointer64(), Operand(imm.value));
+ // As above, probably pointless.
+ syncStackPtr();
+}
+
+void MacroAssemblerCompat::addToStackPtr(const Address& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ Ldr(scratch, toMemOperand(src));
+ Add(GetStackPointer64(), GetStackPointer64(), scratch);
+ // As above, probably pointless.
+ syncStackPtr();
+}
+
+void MacroAssemblerCompat::addStackPtrTo(Register dest) {
+ Add(ARMRegister(dest, 64), ARMRegister(dest, 64), GetStackPointer64());
+}
+
+void MacroAssemblerCompat::subFromStackPtr(Register src) {
+ Sub(GetStackPointer64(), GetStackPointer64(), ARMRegister(src, 64));
+ syncStackPtr();
+}
+
+void MacroAssemblerCompat::subFromStackPtr(Imm32 imm) {
+ Sub(GetStackPointer64(), GetStackPointer64(), Operand(imm.value));
+ syncStackPtr();
+}
+
+void MacroAssemblerCompat::subStackPtrFrom(Register dest) {
+ Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), GetStackPointer64());
+}
+
+void MacroAssemblerCompat::andToStackPtr(Imm32 imm) {
+ if (sp.Is(GetStackPointer64())) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ Mov(scratch, sp);
+ And(sp, scratch, Operand(imm.value));
+ // syncStackPtr() not needed since our SP is the real SP.
+ } else {
+ And(GetStackPointer64(), GetStackPointer64(), Operand(imm.value));
+ syncStackPtr();
+ }
+}
+
+void MacroAssemblerCompat::moveToStackPtr(Register src) {
+ Mov(GetStackPointer64(), ARMRegister(src, 64));
+ syncStackPtr();
+}
+
+void MacroAssemblerCompat::moveStackPtrTo(Register dest) {
+ Mov(ARMRegister(dest, 64), GetStackPointer64());
+}
+
+void MacroAssemblerCompat::loadStackPtr(const Address& src) {
+ if (sp.Is(GetStackPointer64())) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ Ldr(scratch, toMemOperand(src));
+ Mov(sp, scratch);
+ // syncStackPtr() not needed since our SP is the real SP.
+ } else {
+ Ldr(GetStackPointer64(), toMemOperand(src));
+ syncStackPtr();
+ }
+}
+
+void MacroAssemblerCompat::storeStackPtr(const Address& dest) {
+ if (sp.Is(GetStackPointer64())) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ Mov(scratch, sp);
+ Str(scratch, toMemOperand(dest));
+ } else {
+ Str(GetStackPointer64(), toMemOperand(dest));
+ }
+}
+
+void MacroAssemblerCompat::branchTestStackPtr(Condition cond, Imm32 rhs,
+ Label* label) {
+ if (sp.Is(GetStackPointer64())) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ Mov(scratch, sp);
+ Tst(scratch, Operand(rhs.value));
+ } else {
+ Tst(GetStackPointer64(), Operand(rhs.value));
+ }
+ B(label, cond);
+}
+
+void MacroAssemblerCompat::branchStackPtr(Condition cond, Register rhs_,
+ Label* label) {
+ ARMRegister rhs(rhs_, 64);
+ if (sp.Is(GetStackPointer64())) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ Mov(scratch, sp);
+ Cmp(scratch, rhs);
+ } else {
+ Cmp(GetStackPointer64(), rhs);
+ }
+ B(label, cond);
+}
+
+void MacroAssemblerCompat::branchStackPtrRhs(Condition cond, Address lhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ Ldr(scratch, toMemOperand(lhs));
+ // Cmp disallows SP as the rhs, so flip the operands and invert the
+ // condition.
+ Cmp(GetStackPointer64(), scratch);
+ B(label, Assembler::InvertCondition(cond));
+}
+
+void MacroAssemblerCompat::branchStackPtrRhs(Condition cond,
+ AbsoluteAddress lhs,
+ Label* label) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ loadPtr(lhs, scratch.asUnsized());
+ // Cmp disallows SP as the rhs, so flip the operands and invert the
+ // condition.
+ Cmp(GetStackPointer64(), scratch);
+ B(label, Assembler::InvertCondition(cond));
+}
+
+// If source is a double, load into dest.
+// If source is int32, convert to double and store in dest.
+// Else, branch to failure.
+void MacroAssemblerCompat::ensureDouble(const ValueOperand& source,
+ FloatRegister dest, Label* failure) {
+ Label isDouble, done;
+
+ {
+ ScratchTagScope tag(asMasm(), source);
+ splitTagForTest(source, tag);
+ asMasm().branchTestDouble(Assembler::Equal, tag, &isDouble);
+ asMasm().branchTestInt32(Assembler::NotEqual, tag, failure);
+ }
+
+ convertInt32ToDouble(source.valueReg(), dest);
+ jump(&done);
+
+ bind(&isDouble);
+ unboxDouble(source, dest);
+
+ bind(&done);
+}
+
+void MacroAssemblerCompat::unboxValue(const ValueOperand& src, AnyRegister dest,
+ JSValueType type) {
+ if (dest.isFloat()) {
+ Label notInt32, end;
+ asMasm().branchTestInt32(Assembler::NotEqual, src, &notInt32);
+ convertInt32ToDouble(src.valueReg(), dest.fpu());
+ jump(&end);
+ bind(&notInt32);
+ unboxDouble(src, dest.fpu());
+ bind(&end);
+ } else {
+ unboxNonDouble(src, dest.gpr(), type);
+ }
+}
+
+} // namespace jit
+} // namespace js
+
+#endif /* jit_arm64_MacroAssembler_arm64_inl_h */
diff --git a/js/src/jit/arm64/MacroAssembler-arm64.cpp b/js/src/jit/arm64/MacroAssembler-arm64.cpp
new file mode 100644
index 0000000000..a4aff730e6
--- /dev/null
+++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp
@@ -0,0 +1,3416 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/MacroAssembler-arm64.h"
+
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+
+#include "jsmath.h"
+
+#include "jit/arm64/MoveEmitter-arm64.h"
+#include "jit/arm64/SharedICRegisters-arm64.h"
+#include "jit/Bailouts.h"
+#include "jit/BaselineFrame.h"
+#include "jit/JitRuntime.h"
+#include "jit/MacroAssembler.h"
+#include "util/Memory.h"
+#include "vm/BigIntType.h"
+#include "vm/JitActivation.h" // js::jit::JitActivation
+#include "vm/JSContext.h"
+#include "vm/StringType.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+namespace js {
+namespace jit {
+
+enum class Width { _32 = 32, _64 = 64 };
+
+static inline ARMRegister X(Register r) { return ARMRegister(r, 64); }
+
+static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) {
+ return masm.toARMRegister(r, 64);
+}
+
+static inline ARMRegister W(Register r) { return ARMRegister(r, 32); }
+
+static inline ARMRegister R(Register r, Width w) {
+ return ARMRegister(r, unsigned(w));
+}
+
+void MacroAssemblerCompat::boxValue(JSValueType type, Register src,
+ Register dest) {
+#ifdef DEBUG
+ if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+ Label upper32BitsZeroed;
+ movePtr(ImmWord(UINT32_MAX), dest);
+ asMasm().branchPtr(Assembler::BelowOrEqual, src, dest, &upper32BitsZeroed);
+ breakpoint();
+ bind(&upper32BitsZeroed);
+ }
+#endif
+ Orr(ARMRegister(dest, 64), ARMRegister(src, 64),
+ Operand(ImmShiftedTag(type).value));
+}
+
+#ifdef ENABLE_WASM_SIMD
+bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) {
+ switch (op) {
+ case wasm::SimdOp::I8x16Shl:
+ case wasm::SimdOp::I8x16ShrU:
+ case wasm::SimdOp::I8x16ShrS:
+ *mask = 7;
+ break;
+ case wasm::SimdOp::I16x8Shl:
+ case wasm::SimdOp::I16x8ShrU:
+ case wasm::SimdOp::I16x8ShrS:
+ *mask = 15;
+ break;
+ case wasm::SimdOp::I32x4Shl:
+ case wasm::SimdOp::I32x4ShrU:
+ case wasm::SimdOp::I32x4ShrS:
+ *mask = 31;
+ break;
+ case wasm::SimdOp::I64x2Shl:
+ case wasm::SimdOp::I64x2ShrU:
+ case wasm::SimdOp::I64x2ShrS:
+ *mask = 63;
+ break;
+ default:
+ MOZ_CRASH("Unexpected shift operation");
+ }
+ return true;
+}
+#endif
+
+void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
+ ARMRegister dest(output, 32);
+ Fcvtns(dest, ARMFPRegister(input, 64));
+
+ {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+
+ Mov(scratch32, Operand(0xff));
+ Cmp(dest, scratch32);
+ Csel(dest, dest, scratch32, LessThan);
+ }
+
+ Cmp(dest, Operand(0));
+ Csel(dest, dest, wzr, GreaterThan);
+}
+
+js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() {
+ return *static_cast<js::jit::MacroAssembler*>(this);
+}
+
+const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const {
+ return *static_cast<const js::jit::MacroAssembler*>(this);
+}
+
+vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() {
+ return *static_cast<vixl::MacroAssembler*>(this);
+}
+
+const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const {
+ return *static_cast<const vixl::MacroAssembler*>(this);
+}
+
+void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) {
+ BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest);
+ label->patchAt()->bind(bo.getOffset());
+ label->setLinkMode(CodeLabel::MoveImmediate);
+}
+
+BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) {
+ const size_t numInst = 1; // Inserting one load instruction.
+ const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes.
+ uint8_t* literalAddr = (uint8_t*)(&ptr.value); // TODO: Should be const.
+
+ // Scratch space for generating the load instruction.
+ //
+ // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
+ // index to the corresponding PoolEntry in the instruction itself.
+ //
+ // That index will be fixed up later when finishPool()
+ // walks over all marked loads and calls PatchConstantPoolLoad().
+ uint32_t instructionScratch = 0;
+
+ // Emit the instruction mask in the scratch space.
+ // The offset doesn't matter: it will be fixed up later.
+ vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
+ 0);
+
+ // Add the entry to the pool, fix up the LDR imm19 offset,
+ // and add the completed instruction to the buffer.
+ return allocLiteralLoadEntry(numInst, numPoolEntries,
+ (uint8_t*)&instructionScratch, literalAddr);
+}
+
+BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr,
+ Register dest) {
+ const size_t numInst = 1; // Inserting one load instruction.
+ const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes.
+ uint8_t* literalAddr = (uint8_t*)(&ptr.value);
+
+ // Scratch space for generating the load instruction.
+ //
+ // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
+ // index to the corresponding PoolEntry in the instruction itself.
+ //
+ // That index will be fixed up later when finishPool()
+ // walks over all marked loads and calls PatchConstantPoolLoad().
+ uint32_t instructionScratch = 0;
+
+ // Emit the instruction mask in the scratch space.
+ // The offset doesn't matter: it will be fixed up later.
+ vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
+ 0);
+
+ // Add the entry to the pool, fix up the LDR imm19 offset,
+ // and add the completed instruction to the buffer.
+ return allocLiteralLoadEntry(numInst, numPoolEntries,
+ (uint8_t*)&instructionScratch, literalAddr);
+}
+
+void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) {
+ loadPtr(src, dest);
+}
+
+void MacroAssemblerCompat::handleFailureWithHandlerTail(Label* profilerExitTail,
+ Label* bailoutTail) {
+ // Fail rather than silently create wrong code.
+ MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
+
+ // Reserve space for exception information.
+ int64_t size = (sizeof(ResumeFromException) + 7) & ~7;
+ Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size));
+ syncStackPtr();
+
+ MOZ_ASSERT(!x0.Is(PseudoStackPointer64));
+ Mov(x0, PseudoStackPointer64);
+
+ // Call the handler.
+ using Fn = void (*)(ResumeFromException* rfe);
+ asMasm().setupUnalignedABICall(r1);
+ asMasm().passABIArg(r0);
+ asMasm().callWithABI<Fn, HandleException>(
+ MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+ Label entryFrame;
+ Label catch_;
+ Label finally;
+ Label returnBaseline;
+ Label returnIon;
+ Label bailout;
+ Label wasm;
+ Label wasmCatch;
+
+ // Check the `asMasm` calls above didn't mess with the StackPointer identity.
+ MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
+
+ loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfKind()), r0);
+ asMasm().branch32(Assembler::Equal, r0,
+ Imm32(ExceptionResumeKind::EntryFrame), &entryFrame);
+ asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch),
+ &catch_);
+ asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally),
+ &finally);
+ asMasm().branch32(Assembler::Equal, r0,
+ Imm32(ExceptionResumeKind::ForcedReturnBaseline),
+ &returnBaseline);
+ asMasm().branch32(Assembler::Equal, r0,
+ Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon);
+ asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout),
+ &bailout);
+ asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Wasm),
+ &wasm);
+ asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch),
+ &wasmCatch);
+
+ breakpoint(); // Invalid kind.
+
+ // No exception handler. Load the error value, restore state and return from
+ // the entry frame.
+ bind(&entryFrame);
+ moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
+ loadPtr(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
+ FramePointer);
+ loadPtr(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
+ PseudoStackPointer);
+
+ // `retn` does indeed sync the stack pointer, but before doing that it reads
+ // from the stack. Consequently, if we remove this call to syncStackPointer
+ // then we take on the requirement to prove that the immediately preceding
+ // loadPtr produces a value for PSP which maintains the SP <= PSP invariant.
+ // That's a proof burden we don't want to take on. In general it would be
+ // good to move (at some time in the future, not now) to a world where
+ // *every* assignment to PSP or SP is followed immediately by a copy into
+ // the other register. That would make all required correctness proofs
+ // trivial in the sense that it requires only local inspection of code
+ // immediately following (dominated by) any such assignment.
+ syncStackPtr();
+ retn(Imm32(1 * sizeof(void*))); // Pop from stack and return.
+
+ // If we found a catch handler, this must be a baseline frame. Restore state
+ // and jump to the catch block.
+ bind(&catch_);
+ loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()),
+ r0);
+ loadPtr(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
+ FramePointer);
+ loadPtr(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
+ PseudoStackPointer);
+ syncStackPtr();
+ Br(x0);
+
+ // If we found a finally block, this must be a baseline frame. Push two
+ // values expected by the finally block: the exception and BooleanValue(true).
+ bind(&finally);
+ ARMRegister exception = x1;
+ Ldr(exception, MemOperand(PseudoStackPointer64,
+ ResumeFromException::offsetOfException()));
+ Ldr(x0,
+ MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfTarget()));
+ Ldr(ARMRegister(FramePointer, 64),
+ MemOperand(PseudoStackPointer64,
+ ResumeFromException::offsetOfFramePointer()));
+ Ldr(PseudoStackPointer64,
+ MemOperand(PseudoStackPointer64,
+ ResumeFromException::offsetOfStackPointer()));
+ syncStackPtr();
+ push(exception);
+ pushValue(BooleanValue(true));
+ Br(x0);
+
+ // Return BaselineFrame->returnValue() to the caller.
+ // Used in debug mode and for GeneratorReturn.
+ Label profilingInstrumentation;
+ bind(&returnBaseline);
+ loadPtr(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
+ FramePointer);
+ loadPtr(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
+ PseudoStackPointer);
+ // See comment further up beginning "`retn` does indeed sync the stack
+ // pointer". That comment applies here too.
+ syncStackPtr();
+ loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()),
+ JSReturnOperand);
+ jump(&profilingInstrumentation);
+
+ // Return the given value to the caller.
+ bind(&returnIon);
+ loadValue(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfException()),
+ JSReturnOperand);
+ loadPtr(
+ Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)),
+ FramePointer);
+ loadPtr(
+ Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)),
+ PseudoStackPointer);
+ syncStackPtr();
+
+ // If profiling is enabled, then update the lastProfilingFrame to refer to
+ // caller frame before returning. This code is shared by ForcedReturnIon
+ // and ForcedReturnBaseline.
+ bind(&profilingInstrumentation);
+ {
+ Label skipProfilingInstrumentation;
+ AbsoluteAddress addressOfEnabled(
+ asMasm().runtime()->geckoProfiler().addressOfEnabled());
+ asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
+ &skipProfilingInstrumentation);
+ jump(profilerExitTail);
+ bind(&skipProfilingInstrumentation);
+ }
+
+ movePtr(FramePointer, PseudoStackPointer);
+ syncStackPtr();
+ vixl::MacroAssembler::Pop(ARMRegister(FramePointer, 64));
+
+ vixl::MacroAssembler::Pop(vixl::lr);
+ syncStackPtr();
+ vixl::MacroAssembler::Ret(vixl::lr);
+
+ // If we are bailing out to baseline to handle an exception, jump to the
+ // bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success.
+ bind(&bailout);
+ Ldr(x2, MemOperand(PseudoStackPointer64,
+ ResumeFromException::offsetOfBailoutInfo()));
+ Ldr(PseudoStackPointer64,
+ MemOperand(PseudoStackPointer64,
+ ResumeFromException::offsetOfStackPointer()));
+ syncStackPtr();
+ Mov(x0, 1);
+ jump(bailoutTail);
+
+ // If we are throwing and the innermost frame was a wasm frame, reset SP and
+ // FP; SP is pointing to the unwound return address to the wasm entry, so
+ // we can just ret().
+ bind(&wasm);
+ Ldr(x29, MemOperand(PseudoStackPointer64,
+ ResumeFromException::offsetOfFramePointer()));
+ Ldr(PseudoStackPointer64,
+ MemOperand(PseudoStackPointer64,
+ ResumeFromException::offsetOfStackPointer()));
+ syncStackPtr();
+ Mov(x23, int64_t(wasm::FailInstanceReg));
+ ret();
+
+ // Found a wasm catch handler, restore state and jump to it.
+ bind(&wasmCatch);
+ loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()),
+ r0);
+ loadPtr(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
+ r29);
+ loadPtr(
+ Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
+ PseudoStackPointer);
+ syncStackPtr();
+ Br(x0);
+
+ MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
+}
+
+void MacroAssemblerCompat::profilerEnterFrame(Register framePtr,
+ Register scratch) {
+ asMasm().loadJSContext(scratch);
+ loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
+ storePtr(framePtr,
+ Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
+ storePtr(ImmPtr(nullptr),
+ Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
+}
+
+void MacroAssemblerCompat::profilerExitFrame() {
+ jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail());
+}
+
+Assembler::Condition MacroAssemblerCompat::testStringTruthy(
+ bool truthy, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ const ARMRegister scratch32(scratch, 32);
+ const ARMRegister scratch64(scratch, 64);
+
+ MOZ_ASSERT(value.valueReg() != scratch);
+
+ unboxString(value, scratch);
+ Ldr(scratch32, MemOperand(scratch64, JSString::offsetOfLength()));
+ Cmp(scratch32, Operand(0));
+ return truthy ? Condition::NonZero : Condition::Zero;
+}
+
+Assembler::Condition MacroAssemblerCompat::testBigIntTruthy(
+ bool truthy, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+
+ MOZ_ASSERT(value.valueReg() != scratch);
+
+ unboxBigInt(value, scratch);
+ load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch);
+ cmp32(scratch, Imm32(0));
+ return truthy ? Condition::NonZero : Condition::Zero;
+}
+
+void MacroAssemblerCompat::breakpoint() {
+ // Note, other payloads are possible, but GDB is known to misinterpret them
+ // sometimes and iloop on the breakpoint instead of stopping properly.
+ Brk(0);
+}
+
+// Either `any` is valid or `sixtyfour` is valid. Return a 32-bit ARMRegister
+// in the first case and an ARMRegister of the desired size in the latter case.
+
+static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour,
+ unsigned size = 64) {
+ MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid()));
+
+ if (sixtyfour == Register64::Invalid()) {
+ return ARMRegister(any.gpr(), 32);
+ }
+
+ return ARMRegister(sixtyfour.reg, size);
+}
+
+// Assert that `sixtyfour` is invalid and then return an FP register from `any`
+// of the desired size.
+
+static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour,
+ unsigned size) {
+ MOZ_ASSERT(sixtyfour == Register64::Invalid());
+ return ARMFPRegister(any.fpu(), size);
+}
+
+void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
+ Register memoryBase_, Register ptr_,
+ AnyRegister outany, Register64 out64) {
+ uint32_t offset = access.offset();
+ MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
+
+ ARMRegister memoryBase(memoryBase_, 64);
+ ARMRegister ptr(ptr_, 64);
+ if (offset) {
+ vixl::UseScratchRegisterScope temps(this);
+ ARMRegister scratch = temps.AcquireX();
+ Add(scratch, ptr, Operand(offset));
+ MemOperand srcAddr(memoryBase, scratch);
+ wasmLoadImpl(access, srcAddr, outany, out64);
+ } else {
+ MemOperand srcAddr(memoryBase, ptr);
+ wasmLoadImpl(access, srcAddr, outany, out64);
+ }
+}
+
+void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
+ MemOperand srcAddr, AnyRegister outany,
+ Register64 out64) {
+ // Reg+Reg and Reg+SmallImm addressing is directly encodable in one Load
+ // instruction, hence we expect exactly one instruction to be emitted in the
+ // window.
+ int32_t instructionsExpected = 1;
+
+ // Splat and widen however require an additional instruction to be emitted
+ // after the load, so allow one more instruction in the window.
+ if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) {
+ MOZ_ASSERT(access.type() == Scalar::Float64);
+ instructionsExpected++;
+ }
+
+ // NOTE: the generated code must match the assembly code in gen_load in
+ // GenerateAtomicOperations.py
+ asMasm().memoryBarrierBefore(access.sync());
+
+ {
+ // The AutoForbidPoolsAndNops asserts if we emit more than the expected
+ // number of instructions and thus ensures that the access metadata is
+ // emitted at the address of the Load.
+ AutoForbidPoolsAndNops afp(this, instructionsExpected);
+
+ append(access, asMasm().currentOffset());
+ switch (access.type()) {
+ case Scalar::Int8:
+ Ldrsb(SelectGPReg(outany, out64), srcAddr);
+ break;
+ case Scalar::Uint8:
+ Ldrb(SelectGPReg(outany, out64), srcAddr);
+ break;
+ case Scalar::Int16:
+ Ldrsh(SelectGPReg(outany, out64), srcAddr);
+ break;
+ case Scalar::Uint16:
+ Ldrh(SelectGPReg(outany, out64), srcAddr);
+ break;
+ case Scalar::Int32:
+ if (out64 != Register64::Invalid()) {
+ Ldrsw(SelectGPReg(outany, out64), srcAddr);
+ } else {
+ Ldr(SelectGPReg(outany, out64, 32), srcAddr);
+ }
+ break;
+ case Scalar::Uint32:
+ Ldr(SelectGPReg(outany, out64, 32), srcAddr);
+ break;
+ case Scalar::Int64:
+ Ldr(SelectGPReg(outany, out64), srcAddr);
+ break;
+ case Scalar::Float32:
+ // LDR does the right thing also for access.isZeroExtendSimd128Load()
+ Ldr(SelectFPReg(outany, out64, 32), srcAddr);
+ break;
+ case Scalar::Float64:
+ if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) {
+ ScratchSimd128Scope scratch_(asMasm());
+ ARMFPRegister scratch = Simd1D(scratch_);
+ Ldr(scratch, srcAddr);
+ if (access.isSplatSimd128Load()) {
+ Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0);
+ } else {
+ MOZ_ASSERT(access.isWidenSimd128Load());
+ switch (access.widenSimdOp()) {
+ case wasm::SimdOp::V128Load8x8S:
+ Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
+ break;
+ case wasm::SimdOp::V128Load8x8U:
+ Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
+ break;
+ case wasm::SimdOp::V128Load16x4S:
+ Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
+ break;
+ case wasm::SimdOp::V128Load16x4U:
+ Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
+ break;
+ case wasm::SimdOp::V128Load32x2S:
+ Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
+ break;
+ case wasm::SimdOp::V128Load32x2U:
+ Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
+ break;
+ default:
+ MOZ_CRASH("Unexpected widening op for wasmLoad");
+ }
+ }
+ } else {
+ // LDR does the right thing also for access.isZeroExtendSimd128Load()
+ Ldr(SelectFPReg(outany, out64, 64), srcAddr);
+ }
+ break;
+ case Scalar::Simd128:
+ Ldr(SelectFPReg(outany, out64, 128), srcAddr);
+ break;
+ case Scalar::Uint8Clamped:
+ case Scalar::BigInt64:
+ case Scalar::BigUint64:
+ case Scalar::MaxTypedArrayViewType:
+ MOZ_CRASH("unexpected array type");
+ }
+ }
+
+ asMasm().memoryBarrierAfter(access.sync());
+}
+
+// Return true if `address` can be represented as an immediate (possibly scaled
+// by the access size) in an LDR/STR type instruction.
+//
+// For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro().
+static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) {
+ // The predicates below operate on signed values only.
+ if (address > INT64_MAX) {
+ return false;
+ }
+
+ // The access size is always a power of 2, so computing the log amounts to
+ // counting trailing zeroes.
+ unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize);
+ return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) ||
+ MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize));
+}
+
+void MacroAssemblerCompat::wasmLoadAbsolute(
+ const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address,
+ AnyRegister output, Register64 out64) {
+ if (!IsLSImmediateOffset(address, access.byteSize())) {
+ // The access will require the constant to be loaded into a temp register.
+ // Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting
+ // trap information.
+ //
+ // Almost all constant addresses will in practice be handled by a single MOV
+ // so do not worry about additional optimizations here.
+ vixl::UseScratchRegisterScope temps(this);
+ ARMRegister scratch = temps.AcquireX();
+ Mov(scratch, address);
+ MemOperand srcAddr(X(memoryBase), scratch);
+ wasmLoadImpl(access, srcAddr, output, out64);
+ } else {
+ MemOperand srcAddr(X(memoryBase), address);
+ wasmLoadImpl(access, srcAddr, output, out64);
+ }
+}
+
+void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
+ AnyRegister valany, Register64 val64,
+ Register memoryBase_, Register ptr_) {
+ uint32_t offset = access.offset();
+ MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit());
+
+ ARMRegister memoryBase(memoryBase_, 64);
+ ARMRegister ptr(ptr_, 64);
+ if (offset) {
+ vixl::UseScratchRegisterScope temps(this);
+ ARMRegister scratch = temps.AcquireX();
+ Add(scratch, ptr, Operand(offset));
+ MemOperand destAddr(memoryBase, scratch);
+ wasmStoreImpl(access, destAddr, valany, val64);
+ } else {
+ MemOperand destAddr(memoryBase, ptr);
+ wasmStoreImpl(access, destAddr, valany, val64);
+ }
+}
+
+void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
+ MemOperand dstAddr, AnyRegister valany,
+ Register64 val64) {
+ // NOTE: the generated code must match the assembly code in gen_store in
+ // GenerateAtomicOperations.py
+ asMasm().memoryBarrierBefore(access.sync());
+
+ {
+ // Reg+Reg addressing is directly encodable in one Store instruction, hence
+ // the AutoForbidPoolsAndNops will ensure that the access metadata is
+ // emitted at the address of the Store. The AutoForbidPoolsAndNops will
+ // assert if we emit more than one instruction.
+
+ AutoForbidPoolsAndNops afp(this,
+ /* max number of instructions in scope = */ 1);
+
+ append(access, asMasm().currentOffset());
+ switch (access.type()) {
+ case Scalar::Int8:
+ case Scalar::Uint8:
+ Strb(SelectGPReg(valany, val64), dstAddr);
+ break;
+ case Scalar::Int16:
+ case Scalar::Uint16:
+ Strh(SelectGPReg(valany, val64), dstAddr);
+ break;
+ case Scalar::Int32:
+ case Scalar::Uint32:
+ Str(SelectGPReg(valany, val64), dstAddr);
+ break;
+ case Scalar::Int64:
+ Str(SelectGPReg(valany, val64), dstAddr);
+ break;
+ case Scalar::Float32:
+ Str(SelectFPReg(valany, val64, 32), dstAddr);
+ break;
+ case Scalar::Float64:
+ Str(SelectFPReg(valany, val64, 64), dstAddr);
+ break;
+ case Scalar::Simd128:
+ Str(SelectFPReg(valany, val64, 128), dstAddr);
+ break;
+ case Scalar::Uint8Clamped:
+ case Scalar::BigInt64:
+ case Scalar::BigUint64:
+ case Scalar::MaxTypedArrayViewType:
+ MOZ_CRASH("unexpected array type");
+ }
+ }
+
+ asMasm().memoryBarrierAfter(access.sync());
+}
+
+void MacroAssemblerCompat::wasmStoreAbsolute(
+ const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64,
+ Register memoryBase, uint64_t address) {
+ // See comments in wasmLoadAbsolute.
+ unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize());
+ if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) ||
+ IsImmLSUnscaled(int64_t(address)))) {
+ vixl::UseScratchRegisterScope temps(this);
+ ARMRegister scratch = temps.AcquireX();
+ Mov(scratch, address);
+ MemOperand destAddr(X(memoryBase), scratch);
+ wasmStoreImpl(access, destAddr, value, value64);
+ } else {
+ MemOperand destAddr(X(memoryBase), address);
+ wasmStoreImpl(access, destAddr, value, value64);
+ }
+}
+
+void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond,
+ ARMFPRegister dest,
+ ARMFPRegister lhs,
+ ARMFPRegister rhs) {
+ switch (cond) {
+ case Assembler::Equal:
+ Cmeq(dest, lhs, rhs);
+ break;
+ case Assembler::NotEqual:
+ Cmeq(dest, lhs, rhs);
+ Mvn(dest, dest);
+ break;
+ case Assembler::GreaterThan:
+ Cmgt(dest, lhs, rhs);
+ break;
+ case Assembler::GreaterThanOrEqual:
+ Cmge(dest, lhs, rhs);
+ break;
+ case Assembler::LessThan:
+ Cmgt(dest, rhs, lhs);
+ break;
+ case Assembler::LessThanOrEqual:
+ Cmge(dest, rhs, lhs);
+ break;
+ case Assembler::Above:
+ Cmhi(dest, lhs, rhs);
+ break;
+ case Assembler::AboveOrEqual:
+ Cmhs(dest, lhs, rhs);
+ break;
+ case Assembler::Below:
+ Cmhi(dest, rhs, lhs);
+ break;
+ case Assembler::BelowOrEqual:
+ Cmhs(dest, rhs, lhs);
+ break;
+ default:
+ MOZ_CRASH("Unexpected SIMD integer condition");
+ }
+}
+
+void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond,
+ ARMFPRegister dest,
+ ARMFPRegister lhs,
+ ARMFPRegister rhs) {
+ switch (cond) {
+ case Assembler::Equal:
+ Fcmeq(dest, lhs, rhs);
+ break;
+ case Assembler::NotEqual:
+ Fcmeq(dest, lhs, rhs);
+ Mvn(dest, dest);
+ break;
+ case Assembler::GreaterThan:
+ Fcmgt(dest, lhs, rhs);
+ break;
+ case Assembler::GreaterThanOrEqual:
+ Fcmge(dest, lhs, rhs);
+ break;
+ case Assembler::LessThan:
+ Fcmgt(dest, rhs, lhs);
+ break;
+ case Assembler::LessThanOrEqual:
+ Fcmge(dest, rhs, lhs);
+ break;
+ default:
+ MOZ_CRASH("Unexpected SIMD integer condition");
+ }
+}
+
+void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs,
+ FloatRegister dest,
+ bool isUnsigned) {
+ ScratchSimd128Scope scratch_(asMasm());
+ ARMFPRegister shift = Simd16B(scratch_);
+
+ Dup(shift, ARMRegister(rhs, 32));
+ Neg(shift, shift);
+
+ if (isUnsigned) {
+ Ushl(Simd16B(dest), Simd16B(lhs), shift);
+ } else {
+ Sshl(Simd16B(dest), Simd16B(lhs), shift);
+ }
+}
+
+void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs,
+ FloatRegister dest,
+ bool isUnsigned) {
+ ScratchSimd128Scope scratch_(asMasm());
+ ARMFPRegister shift = Simd8H(scratch_);
+
+ Dup(shift, ARMRegister(rhs, 32));
+ Neg(shift, shift);
+
+ if (isUnsigned) {
+ Ushl(Simd8H(dest), Simd8H(lhs), shift);
+ } else {
+ Sshl(Simd8H(dest), Simd8H(lhs), shift);
+ }
+}
+
+void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs,
+ FloatRegister dest,
+ bool isUnsigned) {
+ ScratchSimd128Scope scratch_(asMasm());
+ ARMFPRegister shift = Simd4S(scratch_);
+
+ Dup(shift, ARMRegister(rhs, 32));
+ Neg(shift, shift);
+
+ if (isUnsigned) {
+ Ushl(Simd4S(dest), Simd4S(lhs), shift);
+ } else {
+ Sshl(Simd4S(dest), Simd4S(lhs), shift);
+ }
+}
+
+void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs,
+ FloatRegister dest,
+ bool isUnsigned) {
+ ScratchSimd128Scope scratch_(asMasm());
+ ARMFPRegister shift = Simd2D(scratch_);
+
+ Dup(shift, ARMRegister(rhs, 64));
+ Neg(shift, shift);
+
+ if (isUnsigned) {
+ Ushl(Simd2D(dest), Simd2D(lhs), shift);
+ } else {
+ Sshl(Simd2D(dest), Simd2D(lhs), shift);
+ }
+}
+
+void MacroAssembler::reserveStack(uint32_t amount) {
+ // TODO: This bumps |sp| every time we reserve using a second register.
+ // It would save some instructions if we had a fixed frame size.
+ vixl::MacroAssembler::Claim(Operand(amount));
+ adjustFrame(amount);
+}
+
+void MacroAssembler::Push(RegisterOrSP reg) {
+ if (IsHiddenSP(reg)) {
+ push(sp);
+ } else {
+ push(AsRegister(reg));
+ }
+ adjustFrame(sizeof(intptr_t));
+}
+
+//{{{ check_macroassembler_style
+// ===============================================================
+// MacroAssembler high-level usage.
+
+void MacroAssembler::flush() { Assembler::flush(); }
+
+// ===============================================================
+// Stack manipulation functions.
+
+// Routines for saving/restoring registers on the stack. The format is:
+//
+// (highest address)
+//
+// integer (X) regs in any order size: 8 * # int regs
+//
+// if # int regs is odd,
+// then an 8 byte alignment hole size: 0 or 8
+//
+// double (D) regs in any order size: 8 * # double regs
+//
+// if # double regs is odd,
+// then an 8 byte alignment hole size: 0 or 8
+//
+// vector (Q) regs in any order size: 16 * # vector regs
+//
+// (lowest address)
+//
+// Hence the size of the save area is 0 % 16. And, provided that the base
+// (highest) address is 16-aligned, then the vector reg save/restore accesses
+// will also be 16-aligned, as will pairwise operations for the double regs.
+//
+// Implied by this is that the format of the double and vector dump area
+// corresponds with what FloatRegister::GetPushSizeInBytes computes.
+// See block comment in MacroAssembler.h for more details.
+
+size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) {
+ size_t numIntRegs = set.gprs().size();
+ return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) +
+ FloatRegister::GetPushSizeInBytes(set.fpus());
+}
+
+// Generate code to dump the values in `set`, either on the stack if `dest` is
+// `Nothing` or working backwards from the address denoted by `dest` if it is
+// `Some`. These two cases are combined so as to minimise the chance of
+// mistakenly generating different formats for the same `set`, given that the
+// `Some` `dest` case is used extremely rarely.
+static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set,
+ mozilla::Maybe<Address> dest) {
+ static_assert(sizeof(FloatRegisters::RegisterContent) == 16);
+
+ // If we're saving to arbitrary memory, check the destination is big enough.
+ if (dest) {
+ mozilla::DebugOnly<size_t> bytesRequired =
+ masm->PushRegsInMaskSizeInBytes(set);
+ MOZ_ASSERT(dest->offset >= 0);
+ MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired);
+ }
+
+ // Note the high limit point; we'll check it again later.
+ mozilla::DebugOnly<size_t> maxExtentInitial =
+ dest ? dest->offset : masm->framePushed();
+
+ // Gather up the integer registers in groups of four, and either push each
+ // group as a single transfer so as to minimise the number of stack pointer
+ // changes, or write them individually to memory. Take care to ensure the
+ // space used remains 16-aligned.
+ for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) {
+ vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg,
+ vixl::NoCPUReg};
+ size_t i;
+ for (i = 0; i < 4 && iter.more(); i++) {
+ src[i] = ARMRegister(*iter, 64);
+ ++iter;
+ }
+ MOZ_ASSERT(i > 0);
+
+ if (i == 1 || i == 3) {
+ // Ensure the stack remains 16-aligned
+ MOZ_ASSERT(!iter.more());
+ src[i] = vixl::xzr;
+ i++;
+ }
+ MOZ_ASSERT(i == 2 || i == 4);
+
+ if (dest) {
+ for (size_t j = 0; j < i; j++) {
+ Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr
+ : src[j].code());
+ dest->offset -= sizeof(intptr_t);
+ masm->storePtr(ireg, *dest);
+ }
+ } else {
+ masm->adjustFrame(i * 8);
+ masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
+ }
+ }
+
+ // Now the same for the FP double registers. Note that because of how
+ // ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either
+ // be present as a double register, or as a V128 register, but not both.
+ // Firstly, round up the registers to be pushed.
+
+ FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
+ vixl::CPURegister allSrcs[FloatRegisters::TotalPhys];
+ size_t numAllSrcs = 0;
+
+ for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
+ FloatRegister reg = *iter;
+ if (reg.isDouble()) {
+ MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
+ allSrcs[numAllSrcs] = ARMFPRegister(reg, 64);
+ numAllSrcs++;
+ } else {
+ MOZ_ASSERT(reg.isSimd128());
+ }
+ }
+ MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
+
+ if ((numAllSrcs & 1) == 1) {
+ // We've got an odd number of doubles. In order to maintain 16-alignment,
+ // push the last register twice. We'll skip over the duplicate in
+ // PopRegsInMaskIgnore.
+ allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1];
+ numAllSrcs++;
+ }
+ MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
+ MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0);
+
+ // And now generate the transfers.
+ size_t i;
+ if (dest) {
+ for (i = 0; i < numAllSrcs; i++) {
+ FloatRegister freg =
+ FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
+ FloatRegisters::Kind::Double);
+ dest->offset -= sizeof(double);
+ masm->storeDouble(freg, *dest);
+ }
+ } else {
+ i = 0;
+ while (i < numAllSrcs) {
+ vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
+ vixl::NoCPUReg, vixl::NoCPUReg};
+ size_t j;
+ for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
+ src[j] = allSrcs[j + i];
+ }
+ masm->adjustFrame(8 * j);
+ masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
+ i += j;
+ }
+ }
+ MOZ_ASSERT(i == numAllSrcs);
+
+ // Finally, deal with the SIMD (V128) registers. This is a bit simpler
+ // as there's no need for special-casing to maintain 16-alignment.
+
+ numAllSrcs = 0;
+ for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
+ FloatRegister reg = *iter;
+ if (reg.isSimd128()) {
+ MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
+ allSrcs[numAllSrcs] = ARMFPRegister(reg, 128);
+ numAllSrcs++;
+ }
+ }
+ MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
+
+ // Generate the transfers.
+ if (dest) {
+ for (i = 0; i < numAllSrcs; i++) {
+ FloatRegister freg =
+ FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
+ FloatRegisters::Kind::Simd128);
+ dest->offset -= FloatRegister::SizeOfSimd128;
+ masm->storeUnalignedSimd128(freg, *dest);
+ }
+ } else {
+ i = 0;
+ while (i < numAllSrcs) {
+ vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
+ vixl::NoCPUReg, vixl::NoCPUReg};
+ size_t j;
+ for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
+ src[j] = allSrcs[j + i];
+ }
+ masm->adjustFrame(16 * j);
+ masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
+ i += j;
+ }
+ }
+ MOZ_ASSERT(i == numAllSrcs);
+
+ // Final overrun check.
+ if (dest) {
+ MOZ_ASSERT(maxExtentInitial - dest->offset ==
+ masm->PushRegsInMaskSizeInBytes(set));
+ } else {
+ MOZ_ASSERT(masm->framePushed() - maxExtentInitial ==
+ masm->PushRegsInMaskSizeInBytes(set));
+ }
+}
+
+void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
+ PushOrStoreRegsInMask(this, set, mozilla::Nothing());
+}
+
+void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
+ Register scratch) {
+ PushOrStoreRegsInMask(this, set, mozilla::Some(dest));
+}
+
+// This is a helper function for PopRegsInMaskIgnore below. It emits the
+// loads described by dests[0] and [1] and offsets[0] and [1], generating a
+// load-pair if it can.
+static void GeneratePendingLoadsThenFlush(MacroAssembler* masm,
+ vixl::CPURegister* dests,
+ uint32_t* offsets,
+ uint32_t transactionSize) {
+ // Generate the loads ..
+ if (!dests[0].IsNone()) {
+ if (!dests[1].IsNone()) {
+ // [0] and [1] both present.
+ if (offsets[0] + transactionSize == offsets[1]) {
+ masm->Ldp(dests[0], dests[1],
+ MemOperand(masm->GetStackPointer64(), offsets[0]));
+ } else {
+ // Theoretically we could check for a load-pair with the destinations
+ // switched, but our callers will never generate that. Hence there's
+ // no loss in giving up at this point and generating two loads.
+ masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
+ masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1]));
+ }
+ } else {
+ // [0] only.
+ masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
+ }
+ } else {
+ if (!dests[1].IsNone()) {
+ // [1] only. Can't happen because callers always fill [0] before [1].
+ MOZ_CRASH("GenerateLoadsThenFlush");
+ } else {
+ // Neither entry valid. This can happen.
+ }
+ }
+
+ // .. and flush.
+ dests[0] = dests[1] = vixl::NoCPUReg;
+ offsets[0] = offsets[1] = 0;
+}
+
+void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
+ LiveRegisterSet ignore) {
+ mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
+
+ // The offset of the data from the stack pointer.
+ uint32_t offset = 0;
+
+ // The set of FP/SIMD registers we need to restore.
+ FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
+
+ // The set of registers to ignore. BroadcastToAllSizes() is used to avoid
+ // any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore`
+ // containing d17.
+ FloatRegisterSet ignoreFpusBroadcasted(
+ FloatRegister::BroadcastToAllSizes(ignore.fpus()));
+
+ // First recover the SIMD (V128) registers. This is straightforward in that
+ // we don't need to think about alignment holes.
+
+ // These three form a two-entry queue that holds loads that we know we
+ // need, but which we haven't yet emitted.
+ vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg};
+ uint32_t pendingOffsets[2] = {0, 0};
+ size_t nPending = 0;
+
+ for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
+ FloatRegister reg = *iter;
+ if (reg.isDouble()) {
+ continue;
+ }
+ MOZ_RELEASE_ASSERT(reg.isSimd128());
+
+ uint32_t offsetForReg = offset;
+ offset += FloatRegister::SizeOfSimd128;
+
+ if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
+ continue;
+ }
+
+ MOZ_ASSERT(nPending <= 2);
+ if (nPending == 2) {
+ GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
+ nPending = 0;
+ }
+ pendingDests[nPending] = ARMFPRegister(reg, 128);
+ pendingOffsets[nPending] = offsetForReg;
+ nPending++;
+ }
+ GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
+ nPending = 0;
+
+ MOZ_ASSERT((offset % 16) == 0);
+
+ // Now recover the FP double registers. This is more tricky in that we need
+ // to skip over the lowest-addressed of them if the number of them was odd.
+
+ if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) {
+ offset += sizeof(double);
+ }
+
+ for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
+ FloatRegister reg = *iter;
+ if (reg.isSimd128()) {
+ continue;
+ }
+ /* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */
+
+ uint32_t offsetForReg = offset;
+ offset += sizeof(double);
+
+ if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
+ continue;
+ }
+
+ MOZ_ASSERT(nPending <= 2);
+ if (nPending == 2) {
+ GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
+ nPending = 0;
+ }
+ pendingDests[nPending] = ARMFPRegister(reg, 64);
+ pendingOffsets[nPending] = offsetForReg;
+ nPending++;
+ }
+ GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
+ nPending = 0;
+
+ MOZ_ASSERT((offset % 16) == 0);
+ MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes());
+
+ // And finally recover the integer registers, again skipping an alignment
+ // hole if it exists.
+
+ if ((set.gprs().size() & 1) == 1) {
+ offset += sizeof(uint64_t);
+ }
+
+ for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) {
+ Register reg = *iter;
+
+ uint32_t offsetForReg = offset;
+ offset += sizeof(uint64_t);
+
+ if (ignore.has(reg)) {
+ continue;
+ }
+
+ MOZ_ASSERT(nPending <= 2);
+ if (nPending == 2) {
+ GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
+ nPending = 0;
+ }
+ pendingDests[nPending] = ARMRegister(reg, 64);
+ pendingOffsets[nPending] = offsetForReg;
+ nPending++;
+ }
+ GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
+
+ MOZ_ASSERT((offset % 16) == 0);
+
+ size_t bytesPushed = PushRegsInMaskSizeInBytes(set);
+ MOZ_ASSERT(offset == bytesPushed);
+ freeStack(bytesPushed);
+}
+
+void MacroAssembler::Push(Register reg) {
+ push(reg);
+ adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(Register reg1, Register reg2, Register reg3,
+ Register reg4) {
+ push(reg1, reg2, reg3, reg4);
+ adjustFrame(4 * sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const Imm32 imm) {
+ push(imm);
+ adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmWord imm) {
+ push(imm);
+ adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmPtr imm) {
+ push(imm);
+ adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(const ImmGCPtr ptr) {
+ push(ptr);
+ adjustFrame(sizeof(intptr_t));
+}
+
+void MacroAssembler::Push(FloatRegister f) {
+ push(f);
+ adjustFrame(sizeof(double));
+}
+
+void MacroAssembler::PushBoxed(FloatRegister reg) {
+ subFromStackPtr(Imm32(sizeof(double)));
+ boxDouble(reg, Address(getStackPointer(), 0));
+ adjustFrame(sizeof(double));
+}
+
+void MacroAssembler::Pop(Register reg) {
+ pop(reg);
+ adjustFrame(-1 * int64_t(sizeof(int64_t)));
+}
+
+void MacroAssembler::Pop(FloatRegister f) {
+ loadDouble(Address(getStackPointer(), 0), f);
+ freeStack(sizeof(double));
+}
+
+void MacroAssembler::Pop(const ValueOperand& val) {
+ pop(val);
+ adjustFrame(-1 * int64_t(sizeof(int64_t)));
+}
+
+// ===============================================================
+// Simple call functions.
+
+CodeOffset MacroAssembler::call(Register reg) {
+ // This sync has been observed (and is expected) to be necessary.
+ // eg testcase: tests/debug/bug1107525.js
+ syncStackPtr();
+ Blr(ARMRegister(reg, 64));
+ return CodeOffset(currentOffset());
+}
+
+CodeOffset MacroAssembler::call(Label* label) {
+ // This sync has been observed (and is expected) to be necessary.
+ // eg testcase: tests/basic/testBug504520Harder.js
+ syncStackPtr();
+ Bl(label);
+ return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::call(ImmPtr imm) {
+ // This sync has been observed (and is expected) to be necessary.
+ // eg testcase: asm.js/testTimeout5.js
+ syncStackPtr();
+ vixl::UseScratchRegisterScope temps(this);
+ MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0
+ temps.Exclude(ScratchReg64);
+ movePtr(imm, ScratchReg64.asUnsized());
+ Blr(ScratchReg64);
+}
+
+void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); }
+
+CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ // This sync is believed to be necessary, although no case in jit-test/tests
+ // has been observed to cause SP != PSP here.
+ syncStackPtr();
+ movePtr(imm, scratch);
+ Blr(ARMRegister(scratch, 64));
+ return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::call(const Address& addr) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ // This sync has been observed (and is expected) to be necessary.
+ // eg testcase: tests/backup-point-bug1315634.js
+ syncStackPtr();
+ loadPtr(addr, scratch);
+ Blr(ARMRegister(scratch, 64));
+}
+
+void MacroAssembler::call(JitCode* c) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ // This sync has been observed (and is expected) to be necessary.
+ // eg testcase: arrays/new-array-undefined-undefined-more-args-2.js
+ syncStackPtr();
+ BufferOffset off = immPool64(scratch64, uint64_t(c->raw()));
+ addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE);
+ blr(scratch64);
+}
+
+CodeOffset MacroAssembler::callWithPatch() {
+ // This needs to sync. Wasm goes through this one for intramodule calls.
+ //
+ // In other cases, wasm goes through masm.wasmCallImport(),
+ // masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which
+ // sync.
+ //
+ // This sync is believed to be necessary, although no case in jit-test/tests
+ // has been observed to cause SP != PSP here.
+ syncStackPtr();
+ bl(0, LabelDoc());
+ return CodeOffset(currentOffset());
+}
+void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
+ Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4));
+ MOZ_ASSERT(inst->IsBL());
+ ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4);
+ ptrdiff_t relTarget00 = relTarget >> 2;
+ MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0);
+ MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00));
+ bl(inst, relTarget00);
+}
+
+CodeOffset MacroAssembler::farJumpWithPatch() {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ const ARMRegister scratch2 = temps.AcquireX();
+
+ AutoForbidPoolsAndNops afp(this,
+ /* max number of instructions in scope = */ 7);
+
+ mozilla::DebugOnly<uint32_t> before = currentOffset();
+
+ align(8); // At most one nop
+
+ Label branch;
+ adr(scratch2, &branch);
+ ldr(scratch, vixl::MemOperand(scratch2, 4));
+ add(scratch2, scratch2, scratch);
+ CodeOffset offs(currentOffset());
+ bind(&branch);
+ br(scratch2);
+ Emit(UINT32_MAX);
+ Emit(UINT32_MAX);
+
+ mozilla::DebugOnly<uint32_t> after = currentOffset();
+
+ MOZ_ASSERT(after - before == 24 || after - before == 28);
+
+ return offs;
+}
+
+void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) {
+ Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4));
+ Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8));
+
+ int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset();
+
+ MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX);
+ MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX);
+
+ inst1->SetInstructionBits((uint32_t)distance);
+ inst2->SetInstructionBits((uint32_t)(distance >> 32));
+}
+
+CodeOffset MacroAssembler::nopPatchableToCall() {
+ AutoForbidPoolsAndNops afp(this,
+ /* max number of instructions in scope = */ 1);
+ Nop();
+ return CodeOffset(currentOffset());
+}
+
+void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) {
+ uint8_t* inst = call - 4;
+ Instruction* instr = reinterpret_cast<Instruction*>(inst);
+ MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
+ bl(instr, (target - inst) >> 2);
+}
+
+void MacroAssembler::patchCallToNop(uint8_t* call) {
+ uint8_t* inst = call - 4;
+ Instruction* instr = reinterpret_cast<Instruction*>(inst);
+ MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
+ nop(instr);
+}
+
+void MacroAssembler::pushReturnAddress() {
+ MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
+ push(lr);
+}
+
+void MacroAssembler::popReturnAddress() {
+ MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
+ pop(lr);
+}
+
+// ===============================================================
+// ABI function calls.
+
+void MacroAssembler::setupUnalignedABICall(Register scratch) {
+ // Because wasm operates without the need for dynamic alignment of SP, it is
+ // implied that this routine should never be called when generating wasm.
+ MOZ_ASSERT(!IsCompilingWasm());
+
+ // The following won't work for SP -- needs slightly different logic.
+ MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
+
+ setupNativeABICall();
+ dynamicAlignment_ = true;
+
+ int64_t alignment = ~(int64_t(ABIStackAlignment) - 1);
+ ARMRegister scratch64(scratch, 64);
+ MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64));
+
+ // Always save LR -- Baseline ICs assume that LR isn't modified.
+ push(lr);
+
+ // Remember the stack address on entry. This is reloaded in callWithABIPost
+ // below.
+ Mov(scratch64, PseudoStackPointer64);
+
+ // Make alignment, including the effective push of the previous sp.
+ Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8));
+ And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment));
+ syncStackPtr();
+
+ // Store previous sp to the top of the stack, aligned. This is also
+ // reloaded in callWithABIPost.
+ Str(scratch64, MemOperand(PseudoStackPointer64, 0));
+}
+
+void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
+ // wasm operates without the need for dynamic alignment of SP.
+ MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
+
+ MOZ_ASSERT(inCall_);
+ uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
+
+ // ARM64 *really* wants SP to always be 16-aligned, so ensure this now.
+ if (dynamicAlignment_) {
+ stackForCall += ComputeByteAlignment(stackForCall, StackAlignment);
+ } else {
+ // This can happen when we attach out-of-line stubs for rare cases. For
+ // example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line
+ // chunk.
+ uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
+ stackForCall += ComputeByteAlignment(
+ stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
+ }
+
+ *stackAdjust = stackForCall;
+ reserveStack(*stackAdjust);
+ {
+ enoughMemory_ &= moveResolver_.resolve();
+ if (!enoughMemory_) {
+ return;
+ }
+ MoveEmitter emitter(*this);
+ emitter.emit(moveResolver_);
+ emitter.finish();
+ }
+
+ // Call boundaries communicate stack via SP.
+ // (jseward, 2021Mar03) This sync may well be redundant, given that all of
+ // the MacroAssembler::call methods generate a sync before the call.
+ // Removing it does not cause any failures for all of jit-tests.
+ syncStackPtr();
+
+ assertStackAlignment(ABIStackAlignment);
+}
+
+void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result,
+ bool callFromWasm) {
+ // wasm operates without the need for dynamic alignment of SP.
+ MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
+
+ // Call boundaries communicate stack via SP, so we must resync PSP now.
+ initPseudoStackPtr();
+
+ freeStack(stackAdjust);
+
+ if (dynamicAlignment_) {
+ // This then-clause makes more sense if you first read
+ // setupUnalignedABICall above.
+ //
+ // Restore the stack pointer from entry. The stack pointer will have been
+ // saved by setupUnalignedABICall. This is fragile in that it assumes
+ // that uses of this routine (callWithABIPost) with `dynamicAlignment_ ==
+ // true` are preceded by matching calls to setupUnalignedABICall. But
+ // there's nothing that enforce that mechanically. If we really want to
+ // enforce this, we could add a debug-only CallWithABIState enum to the
+ // MacroAssembler and assert that setupUnalignedABICall updates it before
+ // we get here, then reset it to its initial state.
+ Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0));
+ syncStackPtr();
+
+ // Restore LR. This restores LR to the value stored by
+ // setupUnalignedABICall, which should have been called just before
+ // callWithABIPre. This is, per the above comment, also fragile.
+ pop(lr);
+
+ // SP may be < PSP now. That is expected from the behaviour of `pop`. It
+ // is not clear why the following `syncStackPtr` is necessary, but it is:
+ // without it, the following test segfaults:
+ // tests/backup-point-bug1315634.js
+ syncStackPtr();
+ }
+
+ // If the ABI's return regs are where ION is expecting them, then
+ // no other work needs to be done.
+
+#ifdef DEBUG
+ MOZ_ASSERT(inCall_);
+ inCall_ = false;
+#endif
+}
+
+void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ movePtr(fun, scratch);
+
+ uint32_t stackAdjust;
+ callWithABIPre(&stackAdjust);
+ call(scratch);
+ callWithABIPost(stackAdjust, result);
+}
+
+void MacroAssembler::callWithABINoProfiler(const Address& fun,
+ MoveOp::Type result) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ loadPtr(fun, scratch);
+
+ uint32_t stackAdjust;
+ callWithABIPre(&stackAdjust);
+ call(scratch);
+ callWithABIPost(stackAdjust, result);
+}
+
+// ===============================================================
+// Jit Frames.
+
+uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) {
+ enterNoPool(3);
+ Label fakeCallsite;
+
+ Adr(ARMRegister(scratch, 64), &fakeCallsite);
+ Push(scratch);
+ bind(&fakeCallsite);
+ uint32_t pseudoReturnOffset = currentOffset();
+
+ leaveNoPool();
+ return pseudoReturnOffset;
+}
+
+bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) {
+ asMasm().PushFrameDescriptor(FrameType::IonJS);
+ asMasm().Push(ImmPtr(fakeReturnAddr));
+ asMasm().Push(FramePointer);
+ return true;
+}
+
+// ===============================================================
+// Move instructions
+
+void MacroAssembler::moveValue(const TypedOrValueRegister& src,
+ const ValueOperand& dest) {
+ if (src.hasValue()) {
+ moveValue(src.valueReg(), dest);
+ return;
+ }
+
+ MIRType type = src.type();
+ AnyRegister reg = src.typedReg();
+
+ if (!IsFloatingPointType(type)) {
+ boxNonDouble(ValueTypeFromMIRType(type), reg.gpr(), dest);
+ return;
+ }
+
+ ScratchDoubleScope scratch(*this);
+ FloatRegister freg = reg.fpu();
+ if (type == MIRType::Float32) {
+ convertFloat32ToDouble(freg, scratch);
+ freg = scratch;
+ }
+ boxDouble(freg, dest, scratch);
+}
+
+void MacroAssembler::moveValue(const ValueOperand& src,
+ const ValueOperand& dest) {
+ if (src == dest) {
+ return;
+ }
+ movePtr(src.valueReg(), dest.valueReg());
+}
+
+void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
+ if (!src.isGCThing()) {
+ movePtr(ImmWord(src.asRawBits()), dest.valueReg());
+ return;
+ }
+
+ BufferOffset load =
+ movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg());
+ writeDataRelocation(src, load);
+}
+
+// ===============================================================
+// Branch functions
+
+void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
+ And(ARMRegister(buffer, 64), ARMRegister(ptr, 64),
+ Operand(int32_t(~gc::ChunkMask)));
+ loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer);
+}
+
+void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
+ Register temp, Label* label) {
+ MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+ MOZ_ASSERT(ptr != temp);
+ MOZ_ASSERT(ptr != ScratchReg &&
+ ptr != ScratchReg2); // Both may be used internally.
+ MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2);
+
+ And(ARMRegister(temp, 64), ARMRegister(ptr, 64),
+ Operand(int32_t(~gc::ChunkMask)));
+ branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
+ ImmWord(0), label);
+}
+
+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
+ const Address& address,
+ Register temp, Label* label) {
+ branchValueIsNurseryCellImpl(cond, address, temp, label);
+}
+
+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
+ ValueOperand value, Register temp,
+ Label* label) {
+ branchValueIsNurseryCellImpl(cond, value, temp, label);
+}
+
+template <typename T>
+void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond,
+ const T& value, Register temp,
+ Label* label) {
+ MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
+ MOZ_ASSERT(temp != ScratchReg &&
+ temp != ScratchReg2); // Both may be used internally.
+
+ Label done;
+ branchTestGCThing(Assembler::NotEqual, value,
+ cond == Assembler::Equal ? &done : label);
+
+ getGCThingValueChunk(value, temp);
+ branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
+ ImmWord(0), label);
+
+ bind(&done);
+}
+
+void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
+ const Value& rhs, Label* label) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg());
+ moveValue(rhs, ValueOperand(scratch64.asUnsized()));
+ Cmp(ARMRegister(lhs.valueReg(), 64), scratch64);
+ B(label, cond);
+}
+
+// ========================================================================
+// Memory access primitives.
+template <typename T>
+void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
+ MIRType valueType, const T& dest) {
+ MOZ_ASSERT(valueType < MIRType::Value);
+
+ if (valueType == MIRType::Double) {
+ boxDouble(value.reg().typedReg().fpu(), dest);
+ return;
+ }
+
+ if (value.constant()) {
+ storeValue(value.value(), dest);
+ } else {
+ storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(),
+ dest);
+ }
+}
+
+template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
+ MIRType valueType,
+ const Address& dest);
+template void MacroAssembler::storeUnboxedValue(
+ const ConstantOrRegister& value, MIRType valueType,
+ const BaseObjectElementIndex& dest);
+
+void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); }
+
+// ========================================================================
+// wasm support
+
+CodeOffset MacroAssembler::wasmTrapInstruction() {
+ AutoForbidPoolsAndNops afp(this,
+ /* max number of instructions in scope = */ 1);
+ CodeOffset offs(currentOffset());
+ Unreachable();
+ return offs;
+}
+
+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
+ Register boundsCheckLimit, Label* ok) {
+ branch32(cond, index, boundsCheckLimit, ok);
+ if (JitOptions.spectreIndexMasking) {
+ csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
+ }
+}
+
+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
+ Address boundsCheckLimit, Label* ok) {
+ branch32(cond, index, boundsCheckLimit, ok);
+ if (JitOptions.spectreIndexMasking) {
+ csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
+ }
+}
+
+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
+ Register64 boundsCheckLimit, Label* ok) {
+ branchPtr(cond, index.reg, boundsCheckLimit.reg, ok);
+ if (JitOptions.spectreIndexMasking) {
+ csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
+ cond);
+ }
+}
+
+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
+ Address boundsCheckLimit, Label* ok) {
+ branchPtr(InvertCondition(cond), boundsCheckLimit, index.reg, ok);
+ if (JitOptions.spectreIndexMasking) {
+ csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
+ cond);
+ }
+}
+
+// FCVTZU behaves as follows:
+//
+// on NaN it produces zero
+// on too large it produces UINT_MAX (for appropriate type)
+// on too small it produces zero
+//
+// FCVTZS behaves as follows:
+//
+// on NaN it produces zero
+// on too large it produces INT_MAX (for appropriate type)
+// on too small it produces INT_MIN (ditto)
+
+void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_,
+ Register output_,
+ bool isSaturating,
+ Label* oolEntry) {
+ ARMRegister output(output_, 32);
+ ARMFPRegister input(input_, 64);
+ Fcvtzu(output, input);
+ if (!isSaturating) {
+ Cmp(output, 0);
+ Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
+ B(oolEntry, Assembler::Equal);
+ }
+}
+
+void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_,
+ Register output_,
+ bool isSaturating,
+ Label* oolEntry) {
+ ARMRegister output(output_, 32);
+ ARMFPRegister input(input_, 32);
+ Fcvtzu(output, input);
+ if (!isSaturating) {
+ Cmp(output, 0);
+ Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
+ B(oolEntry, Assembler::Equal);
+ }
+}
+
+void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_,
+ Register output_,
+ bool isSaturating,
+ Label* oolEntry) {
+ ARMRegister output(output_, 32);
+ ARMFPRegister input(input_, 64);
+ Fcvtzs(output, input);
+ if (!isSaturating) {
+ Cmp(output, 0);
+ Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
+ Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
+ B(oolEntry, Assembler::Equal);
+ }
+}
+
+void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_,
+ Register output_,
+ bool isSaturating,
+ Label* oolEntry) {
+ ARMRegister output(output_, 32);
+ ARMFPRegister input(input_, 32);
+ Fcvtzs(output, input);
+ if (!isSaturating) {
+ Cmp(output, 0);
+ Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
+ Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
+ B(oolEntry, Assembler::Equal);
+ }
+}
+
+void MacroAssembler::wasmTruncateDoubleToUInt64(
+ FloatRegister input_, Register64 output_, bool isSaturating,
+ Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
+ MOZ_ASSERT(tempDouble.isInvalid());
+
+ ARMRegister output(output_.reg, 64);
+ ARMFPRegister input(input_, 64);
+ Fcvtzu(output, input);
+ if (!isSaturating) {
+ Cmp(output, 0);
+ Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
+ B(oolEntry, Assembler::Equal);
+ bind(oolRejoin);
+ }
+}
+
+void MacroAssembler::wasmTruncateFloat32ToUInt64(
+ FloatRegister input_, Register64 output_, bool isSaturating,
+ Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
+ MOZ_ASSERT(tempDouble.isInvalid());
+
+ ARMRegister output(output_.reg, 64);
+ ARMFPRegister input(input_, 32);
+ Fcvtzu(output, input);
+ if (!isSaturating) {
+ Cmp(output, 0);
+ Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
+ B(oolEntry, Assembler::Equal);
+ bind(oolRejoin);
+ }
+}
+
+void MacroAssembler::wasmTruncateDoubleToInt64(
+ FloatRegister input_, Register64 output_, bool isSaturating,
+ Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
+ MOZ_ASSERT(tempDouble.isInvalid());
+
+ ARMRegister output(output_.reg, 64);
+ ARMFPRegister input(input_, 64);
+ Fcvtzs(output, input);
+ if (!isSaturating) {
+ Cmp(output, 0);
+ Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
+ Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
+ B(oolEntry, Assembler::Equal);
+ bind(oolRejoin);
+ }
+}
+
+void MacroAssembler::wasmTruncateFloat32ToInt64(
+ FloatRegister input_, Register64 output_, bool isSaturating,
+ Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
+ ARMRegister output(output_.reg, 64);
+ ARMFPRegister input(input_, 32);
+ Fcvtzs(output, input);
+ if (!isSaturating) {
+ Cmp(output, 0);
+ Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
+ Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
+ B(oolEntry, Assembler::Equal);
+ bind(oolRejoin);
+ }
+}
+
+void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input,
+ Register output,
+ TruncFlags flags,
+ wasm::BytecodeOffset off,
+ Label* rejoin) {
+ Label notNaN;
+ branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
+ wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
+ bind(&notNaN);
+
+ Label isOverflow;
+ const float two_31 = -float(INT32_MIN);
+ ScratchFloat32Scope fpscratch(*this);
+ if (flags & TRUNC_UNSIGNED) {
+ loadConstantFloat32(two_31 * 2, fpscratch);
+ branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+ &isOverflow);
+ loadConstantFloat32(-1.0f, fpscratch);
+ branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+ } else {
+ loadConstantFloat32(two_31, fpscratch);
+ branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+ &isOverflow);
+ loadConstantFloat32(-two_31, fpscratch);
+ branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
+ }
+ bind(&isOverflow);
+ wasmTrap(wasm::Trap::IntegerOverflow, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input,
+ Register output,
+ TruncFlags flags,
+ wasm::BytecodeOffset off,
+ Label* rejoin) {
+ Label notNaN;
+ branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
+ wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
+ bind(&notNaN);
+
+ Label isOverflow;
+ const double two_31 = -double(INT32_MIN);
+ ScratchDoubleScope fpscratch(*this);
+ if (flags & TRUNC_UNSIGNED) {
+ loadConstantDouble(two_31 * 2, fpscratch);
+ branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+ &isOverflow);
+ loadConstantDouble(-1.0, fpscratch);
+ branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+ } else {
+ loadConstantDouble(two_31, fpscratch);
+ branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+ &isOverflow);
+ loadConstantDouble(-two_31 - 1, fpscratch);
+ branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+ }
+ bind(&isOverflow);
+ wasmTrap(wasm::Trap::IntegerOverflow, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input,
+ Register64 output,
+ TruncFlags flags,
+ wasm::BytecodeOffset off,
+ Label* rejoin) {
+ Label notNaN;
+ branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
+ wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
+ bind(&notNaN);
+
+ Label isOverflow;
+ const float two_63 = -float(INT64_MIN);
+ ScratchFloat32Scope fpscratch(*this);
+ if (flags & TRUNC_UNSIGNED) {
+ loadConstantFloat32(two_63 * 2, fpscratch);
+ branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+ &isOverflow);
+ loadConstantFloat32(-1.0f, fpscratch);
+ branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+ } else {
+ loadConstantFloat32(two_63, fpscratch);
+ branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+ &isOverflow);
+ loadConstantFloat32(-two_63, fpscratch);
+ branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
+ }
+ bind(&isOverflow);
+ wasmTrap(wasm::Trap::IntegerOverflow, off);
+}
+
+void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input,
+ Register64 output,
+ TruncFlags flags,
+ wasm::BytecodeOffset off,
+ Label* rejoin) {
+ Label notNaN;
+ branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
+ wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
+ bind(&notNaN);
+
+ Label isOverflow;
+ const double two_63 = -double(INT64_MIN);
+ ScratchDoubleScope fpscratch(*this);
+ if (flags & TRUNC_UNSIGNED) {
+ loadConstantDouble(two_63 * 2, fpscratch);
+ branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+ &isOverflow);
+ loadConstantDouble(-1.0, fpscratch);
+ branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
+ } else {
+ loadConstantDouble(two_63, fpscratch);
+ branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
+ &isOverflow);
+ loadConstantDouble(-two_63, fpscratch);
+ branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
+ }
+ bind(&isOverflow);
+ wasmTrap(wasm::Trap::IntegerOverflow, off);
+}
+
+void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
+ Register memoryBase, Register ptr,
+ AnyRegister output) {
+ wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid());
+}
+
+void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
+ Register memoryBase, Register ptr,
+ Register64 output) {
+ wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output);
+}
+
+void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
+ AnyRegister value, Register memoryBase,
+ Register ptr) {
+ wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr);
+}
+
+void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
+ Register64 value, Register memoryBase,
+ Register ptr) {
+ wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr);
+}
+
+void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch,
+ ExitFrameType type) {
+ // Wasm stubs use the native SP, not the PSP.
+
+ linkExitFrame(cxreg, scratch);
+
+ MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64()));
+
+ // SP has to be 16-byte aligned when we do a load/store, so push |type| twice
+ // and then add 8 bytes to SP. This leaves SP unaligned.
+ move32(Imm32(int32_t(type)), scratch);
+ push(scratch, scratch);
+ Add(sp, sp, 8);
+
+ // Despite the above assertion, it is possible for control to flow from here
+ // to the code generated by
+ // MacroAssemblerCompat::handleFailureWithHandlerTail without any
+ // intervening assignment to PSP. But handleFailureWithHandlerTail assumes
+ // that PSP is the active stack pointer. Hence the following is necessary
+ // for safety. Note we can't use initPseudoStackPtr here as that would
+ // generate no instructions.
+ Mov(PseudoStackPointer64, sp);
+}
+
+void MacroAssembler::widenInt32(Register r) {
+ move32To64ZeroExtend(r, Register64(r));
+}
+
+// ========================================================================
+// Convert floating point.
+
+bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
+
+void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
+ Register temp) {
+ MOZ_ASSERT(temp == Register::Invalid());
+ Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) {
+ Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest,
+ Register temp) {
+ MOZ_ASSERT(temp == Register::Invalid());
+ Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) {
+ Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
+}
+
+void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
+ convertInt64ToDouble(Register64(src), dest);
+}
+
+// ========================================================================
+// Primitive atomic operations.
+
+// The computed MemOperand must be Reg+0 because the load/store exclusive
+// instructions only take a single pointer register.
+
+static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
+ const Address& address,
+ Register scratch) {
+ if (address.offset == 0) {
+ return MemOperand(X(masm, address.base), 0);
+ }
+
+ masm.Add(X(scratch), X(masm, address.base), address.offset);
+ return MemOperand(X(scratch), 0);
+}
+
+static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
+ const BaseIndex& address,
+ Register scratch) {
+ masm.Add(X(scratch), X(masm, address.base),
+ Operand(X(address.index), vixl::LSL, address.scale));
+ if (address.offset) {
+ masm.Add(X(scratch), X(scratch), address.offset);
+ }
+ return MemOperand(X(scratch), 0);
+}
+
+// This sign extends to targetWidth and leaves any higher bits zero.
+
+static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType,
+ Width targetWidth, Register src, Register dest) {
+ bool signExtend = Scalar::isSignedIntType(srcType);
+
+ switch (Scalar::byteSize(srcType)) {
+ case 1:
+ if (signExtend) {
+ masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
+ } else {
+ masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
+ }
+ break;
+ case 2:
+ if (signExtend) {
+ masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
+ } else {
+ masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
+ }
+ break;
+ case 4:
+ if (targetWidth == Width::_64) {
+ if (signExtend) {
+ masm.Sbfm(X(dest), X(src), 0, 31);
+ } else {
+ masm.Ubfm(X(dest), X(src), 0, 31);
+ }
+ } else if (src != dest) {
+ masm.Mov(R(dest, targetWidth), R(src, targetWidth));
+ }
+ break;
+ case 8:
+ if (src != dest) {
+ masm.Mov(R(dest, targetWidth), R(src, targetWidth));
+ }
+ break;
+ default:
+ MOZ_CRASH();
+ }
+}
+
+// Exclusive-loads zero-extend their values to the full width of the X register.
+//
+// Note, we've promised to leave the high bits of the 64-bit register clear if
+// the targetWidth is 32.
+
+static void LoadExclusive(MacroAssembler& masm,
+ const wasm::MemoryAccessDesc* access,
+ Scalar::Type srcType, Width targetWidth,
+ MemOperand ptr, Register dest) {
+ bool signExtend = Scalar::isSignedIntType(srcType);
+
+ // With this address form, a single native ldxr* will be emitted, and the
+ // AutoForbidPoolsAndNops ensures that the metadata is emitted at the address
+ // of the ldxr*.
+ MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0);
+
+ switch (Scalar::byteSize(srcType)) {
+ case 1: {
+ {
+ AutoForbidPoolsAndNops afp(
+ &masm,
+ /* max number of instructions in scope = */ 1);
+ if (access) {
+ masm.append(*access, masm.currentOffset());
+ }
+ masm.Ldxrb(W(dest), ptr);
+ }
+ if (signExtend) {
+ masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7);
+ }
+ break;
+ }
+ case 2: {
+ {
+ AutoForbidPoolsAndNops afp(
+ &masm,
+ /* max number of instructions in scope = */ 1);
+ if (access) {
+ masm.append(*access, masm.currentOffset());
+ }
+ masm.Ldxrh(W(dest), ptr);
+ }
+ if (signExtend) {
+ masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15);
+ }
+ break;
+ }
+ case 4: {
+ {
+ AutoForbidPoolsAndNops afp(
+ &masm,
+ /* max number of instructions in scope = */ 1);
+ if (access) {
+ masm.append(*access, masm.currentOffset());
+ }
+ masm.Ldxr(W(dest), ptr);
+ }
+ if (targetWidth == Width::_64 && signExtend) {
+ masm.Sbfm(X(dest), X(dest), 0, 31);
+ }
+ break;
+ }
+ case 8: {
+ {
+ AutoForbidPoolsAndNops afp(
+ &masm,
+ /* max number of instructions in scope = */ 1);
+ if (access) {
+ masm.append(*access, masm.currentOffset());
+ }
+ masm.Ldxr(X(dest), ptr);
+ }
+ break;
+ }
+ default: {
+ MOZ_CRASH();
+ }
+ }
+}
+
+static void StoreExclusive(MacroAssembler& masm, Scalar::Type type,
+ Register status, Register src, MemOperand ptr) {
+ switch (Scalar::byteSize(type)) {
+ case 1:
+ masm.Stxrb(W(status), W(src), ptr);
+ break;
+ case 2:
+ masm.Stxrh(W(status), W(src), ptr);
+ break;
+ case 4:
+ masm.Stxr(W(status), W(src), ptr);
+ break;
+ case 8:
+ masm.Stxr(W(status), X(src), ptr);
+ break;
+ }
+}
+
+static bool HasAtomicInstructions(MacroAssembler& masm) {
+ return masm.asVIXL().GetCPUFeatures()->Has(vixl::CPUFeatures::kAtomics);
+}
+
+static inline bool SupportedAtomicInstructionOperands(Scalar::Type type,
+ Width targetWidth) {
+ if (targetWidth == Width::_32) {
+ return byteSize(type) <= 4;
+ }
+ if (targetWidth == Width::_64) {
+ return byteSize(type) == 8;
+ }
+ return false;
+}
+
+template <typename T>
+static void CompareExchange(MacroAssembler& masm,
+ const wasm::MemoryAccessDesc* access,
+ Scalar::Type type, Width targetWidth,
+ const Synchronization& sync, const T& mem,
+ Register oldval, Register newval, Register output) {
+ MOZ_ASSERT(oldval != output && newval != output);
+
+ vixl::UseScratchRegisterScope temps(&masm);
+
+ Register ptrScratch = temps.AcquireX().asUnsized();
+ MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
+
+ MOZ_ASSERT(ptr.base().asUnsized() != output);
+
+ if (HasAtomicInstructions(masm) &&
+ SupportedAtomicInstructionOperands(type, targetWidth)) {
+ masm.Mov(X(output), X(oldval));
+ // Capal is using same atomic mechanism as Ldxr/Stxr, and
+ // consider it is the same for "Inner Shareable" domain.
+ // Not updated gen_cmpxchg in GenerateAtomicOperations.py.
+ masm.memoryBarrierBefore(sync);
+ if (access) {
+ masm.append(*access, masm.currentOffset());
+ }
+ switch (byteSize(type)) {
+ case 1:
+ masm.Casalb(R(output, targetWidth), R(newval, targetWidth), ptr);
+ break;
+ case 2:
+ masm.Casalh(R(output, targetWidth), R(newval, targetWidth), ptr);
+ break;
+ case 4:
+ case 8:
+ masm.Casal(R(output, targetWidth), R(newval, targetWidth), ptr);
+ break;
+ default:
+ MOZ_CRASH("CompareExchange unsupported type");
+ }
+ masm.memoryBarrierAfter(sync);
+ SignOrZeroExtend(masm, type, targetWidth, output, output);
+ return;
+ }
+
+ // The target doesn't support atomics, so generate a LL-SC loop. This requires
+ // only AArch64 v8.0.
+ Label again;
+ Label done;
+
+ // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+ // GenerateAtomicOperations.py
+ masm.memoryBarrierBefore(sync);
+
+ Register scratch = temps.AcquireX().asUnsized();
+
+ masm.bind(&again);
+ SignOrZeroExtend(masm, type, targetWidth, oldval, scratch);
+ LoadExclusive(masm, access, type, targetWidth, ptr, output);
+ masm.Cmp(R(output, targetWidth), R(scratch, targetWidth));
+ masm.B(&done, MacroAssembler::NotEqual);
+ StoreExclusive(masm, type, scratch, newval, ptr);
+ masm.Cbnz(W(scratch), &again);
+ masm.bind(&done);
+
+ masm.memoryBarrierAfter(sync);
+}
+
+template <typename T>
+static void AtomicExchange(MacroAssembler& masm,
+ const wasm::MemoryAccessDesc* access,
+ Scalar::Type type, Width targetWidth,
+ const Synchronization& sync, const T& mem,
+ Register value, Register output) {
+ MOZ_ASSERT(value != output);
+
+ vixl::UseScratchRegisterScope temps(&masm);
+
+ Register ptrScratch = temps.AcquireX().asUnsized();
+ MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
+
+ if (HasAtomicInstructions(masm) &&
+ SupportedAtomicInstructionOperands(type, targetWidth)) {
+ // Swpal is using same atomic mechanism as Ldxr/Stxr, and
+ // consider it is the same for "Inner Shareable" domain.
+ // Not updated gen_exchange in GenerateAtomicOperations.py.
+ masm.memoryBarrierBefore(sync);
+ if (access) {
+ masm.append(*access, masm.currentOffset());
+ }
+ switch (byteSize(type)) {
+ case 1:
+ masm.Swpalb(R(value, targetWidth), R(output, targetWidth), ptr);
+ break;
+ case 2:
+ masm.Swpalh(R(value, targetWidth), R(output, targetWidth), ptr);
+ break;
+ case 4:
+ case 8:
+ masm.Swpal(R(value, targetWidth), R(output, targetWidth), ptr);
+ break;
+ default:
+ MOZ_CRASH("AtomicExchange unsupported type");
+ }
+ masm.memoryBarrierAfter(sync);
+ SignOrZeroExtend(masm, type, targetWidth, output, output);
+ return;
+ }
+
+ // The target doesn't support atomics, so generate a LL-SC loop. This requires
+ // only AArch64 v8.0.
+ Label again;
+
+ // NOTE: the generated code must match the assembly code in gen_exchange in
+ // GenerateAtomicOperations.py
+ masm.memoryBarrierBefore(sync);
+
+ Register scratch = temps.AcquireX().asUnsized();
+
+ masm.bind(&again);
+ LoadExclusive(masm, access, type, targetWidth, ptr, output);
+ StoreExclusive(masm, type, scratch, value, ptr);
+ masm.Cbnz(W(scratch), &again);
+
+ masm.memoryBarrierAfter(sync);
+}
+
+template <bool wantResult, typename T>
+static void AtomicFetchOp(MacroAssembler& masm,
+ const wasm::MemoryAccessDesc* access,
+ Scalar::Type type, Width targetWidth,
+ const Synchronization& sync, AtomicOp op,
+ const T& mem, Register value, Register temp,
+ Register output) {
+ MOZ_ASSERT(value != output);
+ MOZ_ASSERT(value != temp);
+ MOZ_ASSERT_IF(wantResult, output != temp);
+
+ vixl::UseScratchRegisterScope temps(&masm);
+
+ Register ptrScratch = temps.AcquireX().asUnsized();
+ MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
+
+ if (HasAtomicInstructions(masm) &&
+ SupportedAtomicInstructionOperands(type, targetWidth) &&
+ !isFloatingType(type)) {
+ // LdXXXal/StXXXl is using same atomic mechanism as Ldxr/Stxr, and
+ // consider it is the same for "Inner Shareable" domain.
+ // Not updated gen_fetchop in GenerateAtomicOperations.py.
+ masm.memoryBarrierBefore(sync);
+
+#define FETCH_OP_CASE(op, arg) \
+ if (access) { \
+ masm.append(*access, masm.currentOffset()); \
+ } \
+ switch (byteSize(type)) { \
+ case 1: \
+ if (wantResult) { \
+ masm.Ld##op##alb(R(arg, targetWidth), R(output, targetWidth), ptr); \
+ } else { \
+ masm.St##op##lb(R(arg, targetWidth), ptr); \
+ } \
+ break; \
+ case 2: \
+ if (wantResult) { \
+ masm.Ld##op##alh(R(arg, targetWidth), R(output, targetWidth), ptr); \
+ } else { \
+ masm.St##op##lh(R(arg, targetWidth), ptr); \
+ } \
+ break; \
+ case 4: \
+ case 8: \
+ if (wantResult) { \
+ masm.Ld##op##al(R(arg, targetWidth), R(output, targetWidth), ptr); \
+ } else { \
+ masm.St##op##l(R(arg, targetWidth), ptr); \
+ } \
+ break; \
+ default: \
+ MOZ_CRASH("AtomicFetchOp unsupported type"); \
+ }
+
+ switch (op) {
+ case AtomicFetchAddOp:
+ FETCH_OP_CASE(add, value);
+ break;
+ case AtomicFetchSubOp: {
+ Register scratch = temps.AcquireX().asUnsized();
+ masm.Neg(X(scratch), X(value));
+ FETCH_OP_CASE(add, scratch);
+ break;
+ }
+ case AtomicFetchAndOp: {
+ Register scratch = temps.AcquireX().asUnsized();
+ masm.Eor(X(scratch), X(value), Operand(~0));
+ FETCH_OP_CASE(clr, scratch);
+ break;
+ }
+ case AtomicFetchOrOp:
+ FETCH_OP_CASE(set, value);
+ break;
+ case AtomicFetchXorOp:
+ FETCH_OP_CASE(eor, value);
+ break;
+ }
+ masm.memoryBarrierAfter(sync);
+ if (wantResult) {
+ SignOrZeroExtend(masm, type, targetWidth, output, output);
+ }
+ return;
+ }
+
+#undef FETCH_OP_CASE
+
+ // The target doesn't support atomics, so generate a LL-SC loop. This requires
+ // only AArch64 v8.0.
+ Label again;
+
+ // NOTE: the generated code must match the assembly code in gen_fetchop in
+ // GenerateAtomicOperations.py
+ masm.memoryBarrierBefore(sync);
+
+ Register scratch = temps.AcquireX().asUnsized();
+
+ masm.bind(&again);
+ LoadExclusive(masm, access, type, targetWidth, ptr, output);
+ switch (op) {
+ case AtomicFetchAddOp:
+ masm.Add(X(temp), X(output), X(value));
+ break;
+ case AtomicFetchSubOp:
+ masm.Sub(X(temp), X(output), X(value));
+ break;
+ case AtomicFetchAndOp:
+ masm.And(X(temp), X(output), X(value));
+ break;
+ case AtomicFetchOrOp:
+ masm.Orr(X(temp), X(output), X(value));
+ break;
+ case AtomicFetchXorOp:
+ masm.Eor(X(temp), X(output), X(value));
+ break;
+ }
+ StoreExclusive(masm, type, scratch, temp, ptr);
+ masm.Cbnz(W(scratch), &again);
+ if (wantResult) {
+ SignOrZeroExtend(masm, type, targetWidth, output, output);
+ }
+
+ masm.memoryBarrierAfter(sync);
+}
+
+void MacroAssembler::compareExchange(Scalar::Type type,
+ const Synchronization& sync,
+ const Address& mem, Register oldval,
+ Register newval, Register output) {
+ CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
+ output);
+}
+
+void MacroAssembler::compareExchange(Scalar::Type type,
+ const Synchronization& sync,
+ const BaseIndex& mem, Register oldval,
+ Register newval, Register output) {
+ CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
+ output);
+}
+
+void MacroAssembler::compareExchange64(const Synchronization& sync,
+ const Address& mem, Register64 expect,
+ Register64 replace, Register64 output) {
+ CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
+ expect.reg, replace.reg, output.reg);
+}
+
+void MacroAssembler::compareExchange64(const Synchronization& sync,
+ const BaseIndex& mem, Register64 expect,
+ Register64 replace, Register64 output) {
+ CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
+ expect.reg, replace.reg, output.reg);
+}
+
+void MacroAssembler::atomicExchange64(const Synchronization& sync,
+ const Address& mem, Register64 value,
+ Register64 output) {
+ AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
+ value.reg, output.reg);
+}
+
+void MacroAssembler::atomicExchange64(const Synchronization& sync,
+ const BaseIndex& mem, Register64 value,
+ Register64 output) {
+ AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
+ value.reg, output.reg);
+}
+
+void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
+ Register64 value, const Address& mem,
+ Register64 temp, Register64 output) {
+ AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
+ value.reg, temp.reg, output.reg);
+}
+
+void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op,
+ Register64 value, const BaseIndex& mem,
+ Register64 temp, Register64 output) {
+ AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
+ value.reg, temp.reg, output.reg);
+}
+
+void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
+ Register64 value, const Address& mem,
+ Register64 temp) {
+ AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
+ value.reg, temp.reg, temp.reg);
+}
+
+void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op,
+ Register64 value, const BaseIndex& mem,
+ Register64 temp) {
+ AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
+ value.reg, temp.reg, temp.reg);
+}
+
+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
+ const Address& mem, Register oldval,
+ Register newval, Register output) {
+ CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
+ oldval, newval, output);
+}
+
+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
+ const BaseIndex& mem, Register oldval,
+ Register newval, Register output) {
+ CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
+ oldval, newval, output);
+}
+
+void MacroAssembler::atomicExchange(Scalar::Type type,
+ const Synchronization& sync,
+ const Address& mem, Register value,
+ Register output) {
+ AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
+}
+
+void MacroAssembler::atomicExchange(Scalar::Type type,
+ const Synchronization& sync,
+ const BaseIndex& mem, Register value,
+ Register output) {
+ AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
+ const Address& mem, Register value,
+ Register output) {
+ AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
+ value, output);
+}
+
+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
+ const BaseIndex& mem, Register value,
+ Register output) {
+ AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
+ value, output);
+}
+
+void MacroAssembler::atomicFetchOp(Scalar::Type type,
+ const Synchronization& sync, AtomicOp op,
+ Register value, const Address& mem,
+ Register temp, Register output) {
+ AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
+ temp, output);
+}
+
+void MacroAssembler::atomicFetchOp(Scalar::Type type,
+ const Synchronization& sync, AtomicOp op,
+ Register value, const BaseIndex& mem,
+ Register temp, Register output) {
+ AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
+ temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
+ AtomicOp op, Register value,
+ const Address& mem, Register temp,
+ Register output) {
+ AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
+ op, mem, value, temp, output);
+}
+
+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
+ AtomicOp op, Register value,
+ const BaseIndex& mem, Register temp,
+ Register output) {
+ AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
+ op, mem, value, temp, output);
+}
+
+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
+ AtomicOp op, Register value,
+ const Address& mem, Register temp) {
+ AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
+ op, mem, value, temp, temp);
+}
+
+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
+ AtomicOp op, Register value,
+ const BaseIndex& mem, Register temp) {
+ AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
+ op, mem, value, temp, temp);
+}
+
+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
+ const Address& mem,
+ Register64 expect,
+ Register64 replace,
+ Register64 output) {
+ CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
+ expect.reg, replace.reg, output.reg);
+}
+
+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
+ const BaseIndex& mem,
+ Register64 expect,
+ Register64 replace,
+ Register64 output) {
+ CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
+ expect.reg, replace.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
+ const Address& mem, Register64 value,
+ Register64 output) {
+ AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
+ value.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
+ const BaseIndex& mem,
+ Register64 value, Register64 output) {
+ AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
+ value.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
+ AtomicOp op, Register64 value,
+ const Address& mem, Register64 temp,
+ Register64 output) {
+ AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
+ op, mem, value.reg, temp.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
+ AtomicOp op, Register64 value,
+ const BaseIndex& mem, Register64 temp,
+ Register64 output) {
+ AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
+ op, mem, value.reg, temp.reg, output.reg);
+}
+
+void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access,
+ AtomicOp op, Register64 value,
+ const BaseIndex& mem,
+ Register64 temp) {
+ AtomicFetchOp<false>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
+ op, mem, value.reg, temp.reg, temp.reg);
+}
+
+// ========================================================================
+// JS atomic operations.
+
+template <typename T>
+static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
+ const Synchronization& sync, const T& mem,
+ Register oldval, Register newval, Register temp,
+ AnyRegister output) {
+ if (arrayType == Scalar::Uint32) {
+ masm.compareExchange(arrayType, sync, mem, oldval, newval, temp);
+ masm.convertUInt32ToDouble(temp, output.fpu());
+ } else {
+ masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr());
+ }
+}
+
+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
+ const Synchronization& sync,
+ const Address& mem, Register oldval,
+ Register newval, Register temp,
+ AnyRegister output) {
+ CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
+}
+
+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
+ const Synchronization& sync,
+ const BaseIndex& mem, Register oldval,
+ Register newval, Register temp,
+ AnyRegister output) {
+ CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
+}
+
+template <typename T>
+static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
+ const Synchronization& sync, const T& mem,
+ Register value, Register temp,
+ AnyRegister output) {
+ if (arrayType == Scalar::Uint32) {
+ masm.atomicExchange(arrayType, sync, mem, value, temp);
+ masm.convertUInt32ToDouble(temp, output.fpu());
+ } else {
+ masm.atomicExchange(arrayType, sync, mem, value, output.gpr());
+ }
+}
+
+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
+ const Synchronization& sync,
+ const Address& mem, Register value,
+ Register temp, AnyRegister output) {
+ AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
+}
+
+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
+ const Synchronization& sync,
+ const BaseIndex& mem, Register value,
+ Register temp, AnyRegister output) {
+ AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
+}
+
+template <typename T>
+static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType,
+ const Synchronization& sync, AtomicOp op,
+ Register value, const T& mem, Register temp1,
+ Register temp2, AnyRegister output) {
+ if (arrayType == Scalar::Uint32) {
+ masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1);
+ masm.convertUInt32ToDouble(temp1, output.fpu());
+ } else {
+ masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr());
+ }
+}
+
+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
+ const Synchronization& sync, AtomicOp op,
+ Register value, const Address& mem,
+ Register temp1, Register temp2,
+ AnyRegister output) {
+ AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
+}
+
+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
+ const Synchronization& sync, AtomicOp op,
+ Register value, const BaseIndex& mem,
+ Register temp1, Register temp2,
+ AnyRegister output) {
+ AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
+}
+
+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
+ const Synchronization& sync, AtomicOp op,
+ Register value, const BaseIndex& mem,
+ Register temp) {
+ AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
+ value, temp, temp);
+}
+
+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
+ const Synchronization& sync, AtomicOp op,
+ Register value, const Address& mem,
+ Register temp) {
+ AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
+ value, temp, temp);
+}
+
+void MacroAssembler::flexibleQuotient32(Register rhs, Register srcDest,
+ bool isUnsigned,
+ const LiveRegisterSet&) {
+ quotient32(rhs, srcDest, isUnsigned);
+}
+
+void MacroAssembler::flexibleRemainder32(Register rhs, Register srcDest,
+ bool isUnsigned,
+ const LiveRegisterSet&) {
+ remainder32(rhs, srcDest, isUnsigned);
+}
+
+void MacroAssembler::flexibleDivMod32(Register rhs, Register srcDest,
+ Register remOutput, bool isUnsigned,
+ const LiveRegisterSet&) {
+ vixl::UseScratchRegisterScope temps(this);
+ ARMRegister scratch = temps.AcquireW();
+ ARMRegister src = temps.AcquireW();
+
+ // Preserve src for remainder computation
+ Mov(src, ARMRegister(srcDest, 32));
+
+ if (isUnsigned) {
+ Udiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32));
+ } else {
+ Sdiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32));
+ }
+ // Compute remainder
+ Mul(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32));
+ Sub(ARMRegister(remOutput, 32), src, scratch);
+}
+
+CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
+ AutoForbidPoolsAndNops afp(this,
+ /* max number of instructions in scope = */ 1);
+ CodeOffset offset(currentOffset());
+ adr(ARMRegister(dest, 64), 0, LabelDoc());
+ return offset;
+}
+
+void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
+ CodeLocationLabel target) {
+ ptrdiff_t off = target - loc;
+ MOZ_RELEASE_ASSERT(vixl::IsInt21(off));
+
+ Instruction* cur = reinterpret_cast<Instruction*>(loc.raw());
+ MOZ_ASSERT(cur->IsADR());
+
+ vixl::Register rd = vixl::Register::XRegFromCode(cur->Rd());
+ adr(cur, rd, off);
+}
+
+// ========================================================================
+// Spectre Mitigations.
+
+void MacroAssembler::speculationBarrier() {
+ // Conditional speculation barrier.
+ csdb();
+}
+
+void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
+ Label* fail) {
+ ARMFPRegister iFlt(src, 32);
+ ARMRegister o64(dest, 64);
+ ARMRegister o32(dest, 32);
+
+ Label handleZero;
+ Label fin;
+
+ // Handle ±0 and NaN first.
+ Fcmp(iFlt, 0.0);
+ B(Assembler::Equal, &handleZero);
+ // NaN is always a bail condition, just bail directly.
+ B(Assembler::Overflow, fail);
+
+ // Round towards negative infinity.
+ Fcvtms(o64, iFlt);
+
+ // Sign extend lower 32 bits to test if the result isn't an Int32.
+ Cmp(o64, Operand(o64, vixl::SXTW));
+ B(NotEqual, fail);
+
+ // Clear upper 32 bits.
+ Uxtw(o64, o64);
+ B(&fin);
+
+ bind(&handleZero);
+ // Move the top word of the float into the output reg, if it is non-zero,
+ // then the original value was -0.0.
+ Fmov(o32, iFlt);
+ Cbnz(o32, fail);
+ bind(&fin);
+}
+
+void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
+ Label* fail) {
+ ARMFPRegister iDbl(src, 64);
+ ARMRegister o64(dest, 64);
+ ARMRegister o32(dest, 32);
+
+ Label handleZero;
+ Label fin;
+
+ // Handle ±0 and NaN first.
+ Fcmp(iDbl, 0.0);
+ B(Assembler::Equal, &handleZero);
+ // NaN is always a bail condition, just bail directly.
+ B(Assembler::Overflow, fail);
+
+ // Round towards negative infinity.
+ Fcvtms(o64, iDbl);
+
+ // Sign extend lower 32 bits to test if the result isn't an Int32.
+ Cmp(o64, Operand(o64, vixl::SXTW));
+ B(NotEqual, fail);
+
+ // Clear upper 32 bits.
+ Uxtw(o64, o64);
+ B(&fin);
+
+ bind(&handleZero);
+ // Move the top word of the double into the output reg, if it is non-zero,
+ // then the original value was -0.0.
+ Fmov(o64, iDbl);
+ Cbnz(o64, fail);
+ bind(&fin);
+}
+
+void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest,
+ Label* fail) {
+ ARMFPRegister iFlt(src, 32);
+ ARMRegister o64(dest, 64);
+ ARMRegister o32(dest, 32);
+
+ Label handleZero;
+ Label fin;
+
+ // Round towards positive infinity.
+ Fcvtps(o64, iFlt);
+
+ // Sign extend lower 32 bits to test if the result isn't an Int32.
+ Cmp(o64, Operand(o64, vixl::SXTW));
+ B(NotEqual, fail);
+
+ // We have to check for (-1, -0] and NaN when the result is zero.
+ Cbz(o64, &handleZero);
+
+ // Clear upper 32 bits.
+ Uxtw(o64, o64);
+ B(&fin);
+
+ // Bail if the input is in (-1, -0] or NaN.
+ bind(&handleZero);
+ // Move the top word of the float into the output reg, if it is non-zero,
+ // then the original value wasn't +0.0.
+ Fmov(o32, iFlt);
+ Cbnz(o32, fail);
+ bind(&fin);
+}
+
+void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest,
+ Label* fail) {
+ ARMFPRegister iDbl(src, 64);
+ ARMRegister o64(dest, 64);
+ ARMRegister o32(dest, 32);
+
+ Label handleZero;
+ Label fin;
+
+ // Round towards positive infinity.
+ Fcvtps(o64, iDbl);
+
+ // Sign extend lower 32 bits to test if the result isn't an Int32.
+ Cmp(o64, Operand(o64, vixl::SXTW));
+ B(NotEqual, fail);
+
+ // We have to check for (-1, -0] and NaN when the result is zero.
+ Cbz(o64, &handleZero);
+
+ // Clear upper 32 bits.
+ Uxtw(o64, o64);
+ B(&fin);
+
+ // Bail if the input is in (-1, -0] or NaN.
+ bind(&handleZero);
+ // Move the top word of the double into the output reg, if it is non-zero,
+ // then the original value wasn't +0.0.
+ Fmov(o64, iDbl);
+ Cbnz(o64, fail);
+ bind(&fin);
+}
+
+void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest,
+ Label* fail) {
+ ARMFPRegister src32(src, 32);
+ ARMRegister dest32(dest, 32);
+ ARMRegister dest64(dest, 64);
+
+ Label done, zeroCase;
+
+ // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+ // In the case of overflow, the output is saturated.
+ // In the case of NaN and -0, the output is zero.
+ Fcvtzs(dest64, src32);
+
+ // If the output was zero, worry about special cases.
+ Cbz(dest64, &zeroCase);
+
+ // Sign extend lower 32 bits to test if the result isn't an Int32.
+ Cmp(dest64, Operand(dest64, vixl::SXTW));
+ B(NotEqual, fail);
+
+ // Clear upper 32 bits.
+ Uxtw(dest64, dest64);
+
+ // If the output was non-zero and wasn't saturated, just return it.
+ B(&done);
+
+ // Handle the case of a zero output:
+ // 1. The input may have been NaN, requiring a failure.
+ // 2. The input may have been in (-1,-0], requiring a failure.
+ {
+ bind(&zeroCase);
+
+ // Combine test for negative and NaN values using a single bitwise
+ // operation.
+ //
+ // | Decimal number | Bitwise representation |
+ // |----------------|------------------------|
+ // | -0 | 8000'0000 |
+ // | +0 | 0000'0000 |
+ // | +1 | 3f80'0000 |
+ // | NaN (or +Inf) | 7fyx'xxxx, y >= 8 |
+ // | -NaN (or -Inf) | ffyx'xxxx, y >= 8 |
+ //
+ // If any of two most significant bits is set, the number isn't in [0, 1).
+ // (Recall that floating point numbers, except for NaN, are strictly ordered
+ // when comparing their bitwise representation as signed integers.)
+
+ Fmov(dest32, src32);
+ Lsr(dest32, dest32, 30);
+ Cbnz(dest32, fail);
+ }
+
+ bind(&done);
+}
+
+void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest,
+ Label* fail) {
+ ARMFPRegister src64(src, 64);
+ ARMRegister dest64(dest, 64);
+ ARMRegister dest32(dest, 32);
+
+ Label done, zeroCase;
+
+ // Convert scalar to signed 64-bit fixed-point, rounding toward zero.
+ // In the case of overflow, the output is saturated.
+ // In the case of NaN and -0, the output is zero.
+ Fcvtzs(dest64, src64);
+
+ // If the output was zero, worry about special cases.
+ Cbz(dest64, &zeroCase);
+
+ // Sign extend lower 32 bits to test if the result isn't an Int32.
+ Cmp(dest64, Operand(dest64, vixl::SXTW));
+ B(NotEqual, fail);
+
+ // Clear upper 32 bits.
+ Uxtw(dest64, dest64);
+
+ // If the output was non-zero and wasn't saturated, just return it.
+ B(&done);
+
+ // Handle the case of a zero output:
+ // 1. The input may have been NaN, requiring a failure.
+ // 2. The input may have been in (-1,-0], requiring a failure.
+ {
+ bind(&zeroCase);
+
+ // Combine test for negative and NaN values using a single bitwise
+ // operation.
+ //
+ // | Decimal number | Bitwise representation |
+ // |----------------|------------------------|
+ // | -0 | 8000'0000'0000'0000 |
+ // | +0 | 0000'0000'0000'0000 |
+ // | +1 | 3ff0'0000'0000'0000 |
+ // | NaN (or +Inf) | 7ffx'xxxx'xxxx'xxxx |
+ // | -NaN (or -Inf) | fffx'xxxx'xxxx'xxxx |
+ //
+ // If any of two most significant bits is set, the number isn't in [0, 1).
+ // (Recall that floating point numbers, except for NaN, are strictly ordered
+ // when comparing their bitwise representation as signed integers.)
+
+ Fmov(dest64, src64);
+ Lsr(dest64, dest64, 62);
+ Cbnz(dest64, fail);
+ }
+
+ bind(&done);
+}
+
+void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest,
+ FloatRegister temp, Label* fail) {
+ ARMFPRegister src32(src, 32);
+ ARMRegister dest32(dest, 32);
+ ARMRegister dest64(dest, 64);
+
+ Label negative, saturated, done;
+
+ // Branch to a slow path if input < 0.0 due to complicated rounding rules.
+ // Note that Fcmp with NaN unsets the negative flag.
+ Fcmp(src32, 0.0);
+ B(&negative, Assembler::Condition::lo);
+
+ // Handle the simple case of a positive input, and also -0 and NaN.
+ // Rounding proceeds with consideration of the fractional part of the input:
+ // 1. If > 0.5, round to integer with higher absolute value (so, up).
+ // 2. If < 0.5, round to integer with lower absolute value (so, down).
+ // 3. If = 0.5, round to +Infinity (so, up).
+ {
+ // Convert to signed 64-bit integer, rounding halfway cases away from zero.
+ // In the case of overflow, the output is saturated.
+ // In the case of NaN and -0, the output is zero.
+ Fcvtas(dest64, src32);
+
+ // In the case of zero, the input may have been NaN or -0, which must bail.
+ Cbnz(dest64, &saturated);
+
+ // Combine test for -0 and NaN values using a single bitwise operation.
+ // See truncFloat32ToInt32 for an explanation.
+ Fmov(dest32, src32);
+ Lsr(dest32, dest32, 30);
+ Cbnz(dest32, fail);
+
+ B(&done);
+ }
+
+ // Handle the complicated case of a negative input.
+ // Rounding proceeds with consideration of the fractional part of the input:
+ // 1. If > 0.5, round to integer with higher absolute value (so, down).
+ // 2. If < 0.5, round to integer with lower absolute value (so, up).
+ // 3. If = 0.5, round to +Infinity (so, up).
+ bind(&negative);
+ {
+ // Inputs in [-0.5, 0) are rounded to -0. Fail.
+ loadConstantFloat32(-0.5f, temp);
+ branchFloat(Assembler::DoubleGreaterThanOrEqual, src, temp, fail);
+
+ // Other negative inputs need the biggest double less than 0.5 added.
+ loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
+ addFloat32(src, temp);
+
+ // Round all values toward -Infinity.
+ // In the case of overflow, the output is saturated.
+ // NaN and -0 are already handled by the "positive number" path above.
+ Fcvtms(dest64, temp);
+ }
+
+ bind(&saturated);
+
+ // Sign extend lower 32 bits to test if the result isn't an Int32.
+ Cmp(dest64, Operand(dest64, vixl::SXTW));
+ B(NotEqual, fail);
+
+ // Clear upper 32 bits.
+ Uxtw(dest64, dest64);
+
+ bind(&done);
+}
+
+void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest,
+ FloatRegister temp, Label* fail) {
+ ARMFPRegister src64(src, 64);
+ ARMRegister dest64(dest, 64);
+ ARMRegister dest32(dest, 32);
+
+ Label negative, saturated, done;
+
+ // Branch to a slow path if input < 0.0 due to complicated rounding rules.
+ // Note that Fcmp with NaN unsets the negative flag.
+ Fcmp(src64, 0.0);
+ B(&negative, Assembler::Condition::lo);
+
+ // Handle the simple case of a positive input, and also -0 and NaN.
+ // Rounding proceeds with consideration of the fractional part of the input:
+ // 1. If > 0.5, round to integer with higher absolute value (so, up).
+ // 2. If < 0.5, round to integer with lower absolute value (so, down).
+ // 3. If = 0.5, round to +Infinity (so, up).
+ {
+ // Convert to signed 64-bit integer, rounding halfway cases away from zero.
+ // In the case of overflow, the output is saturated.
+ // In the case of NaN and -0, the output is zero.
+ Fcvtas(dest64, src64);
+
+ // In the case of zero, the input may have been NaN or -0, which must bail.
+ Cbnz(dest64, &saturated);
+
+ // Combine test for -0 and NaN values using a single bitwise operation.
+ // See truncDoubleToInt32 for an explanation.
+ Fmov(dest64, src64);
+ Lsr(dest64, dest64, 62);
+ Cbnz(dest64, fail);
+
+ B(&done);
+ }
+
+ // Handle the complicated case of a negative input.
+ // Rounding proceeds with consideration of the fractional part of the input:
+ // 1. If > 0.5, round to integer with higher absolute value (so, down).
+ // 2. If < 0.5, round to integer with lower absolute value (so, up).
+ // 3. If = 0.5, round to +Infinity (so, up).
+ bind(&negative);
+ {
+ // Inputs in [-0.5, 0) are rounded to -0. Fail.
+ loadConstantDouble(-0.5, temp);
+ branchDouble(Assembler::DoubleGreaterThanOrEqual, src, temp, fail);
+
+ // Other negative inputs need the biggest double less than 0.5 added.
+ loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
+ addDouble(src, temp);
+
+ // Round all values toward -Infinity.
+ // In the case of overflow, the output is saturated.
+ // NaN and -0 are already handled by the "positive number" path above.
+ Fcvtms(dest64, temp);
+ }
+
+ bind(&saturated);
+
+ // Sign extend lower 32 bits to test if the result isn't an Int32.
+ Cmp(dest64, Operand(dest64, vixl::SXTW));
+ B(NotEqual, fail);
+
+ // Clear upper 32 bits.
+ Uxtw(dest64, dest64);
+
+ bind(&done);
+}
+
+void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src,
+ FloatRegister dest) {
+ switch (mode) {
+ case RoundingMode::Up:
+ frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+ return;
+ case RoundingMode::Down:
+ frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+ return;
+ case RoundingMode::NearestTiesToEven:
+ frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+ return;
+ case RoundingMode::TowardsZero:
+ frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+ return;
+ }
+ MOZ_CRASH("unexpected mode");
+}
+
+void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src,
+ FloatRegister dest) {
+ switch (mode) {
+ case RoundingMode::Up:
+ frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+ return;
+ case RoundingMode::Down:
+ frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+ return;
+ case RoundingMode::NearestTiesToEven:
+ frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+ return;
+ case RoundingMode::TowardsZero:
+ frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+ return;
+ }
+ MOZ_CRASH("unexpected mode");
+}
+
+void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister output) {
+ ScratchDoubleScope scratch(*this);
+
+ // Double with only the sign bit set
+ loadConstantDouble(-0.0, scratch);
+
+ if (lhs != output) {
+ moveDouble(lhs, output);
+ }
+
+ bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
+ ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
+ ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
+}
+
+void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs,
+ FloatRegister output) {
+ ScratchFloat32Scope scratch(*this);
+
+ // Float with only the sign bit set
+ loadConstantFloat32(-0.0f, scratch);
+
+ if (lhs != output) {
+ moveFloat32(lhs, output);
+ }
+
+ bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
+ ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
+ ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
+}
+
+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
+ Register pointer) {
+ Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64),
+ Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift));
+}
+
+//}}} check_macroassembler_style
+
+} // namespace jit
+} // namespace js
diff --git a/js/src/jit/arm64/MacroAssembler-arm64.h b/js/src/jit/arm64/MacroAssembler-arm64.h
new file mode 100644
index 0000000000..edfd8c9d3e
--- /dev/null
+++ b/js/src/jit/arm64/MacroAssembler-arm64.h
@@ -0,0 +1,2206 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_MacroAssembler_arm64_h
+#define jit_arm64_MacroAssembler_arm64_h
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/arm64/vixl/Debugger-vixl.h"
+#include "jit/arm64/vixl/MacroAssembler-vixl.h"
+#include "jit/AtomicOp.h"
+#include "jit/MoveResolver.h"
+#include "vm/BigIntType.h" // JS::BigInt
+#include "wasm/WasmBuiltins.h"
+
+#ifdef _M_ARM64
+# ifdef move32
+# undef move32
+# endif
+# ifdef move64
+# undef move64
+# endif
+#endif
+
+namespace js {
+namespace jit {
+
+// Import VIXL operands directly into the jit namespace for shared code.
+using vixl::MemOperand;
+using vixl::Operand;
+
+struct ImmShiftedTag : public ImmWord {
+ explicit ImmShiftedTag(JSValueShiftedTag shtag) : ImmWord((uintptr_t)shtag) {}
+
+ explicit ImmShiftedTag(JSValueType type)
+ : ImmWord(uintptr_t(JSValueShiftedTag(JSVAL_TYPE_TO_SHIFTED_TAG(type)))) {
+ }
+};
+
+struct ImmTag : public Imm32 {
+ explicit ImmTag(JSValueTag tag) : Imm32(tag) {}
+};
+
+class ScratchTagScope;
+
+class MacroAssemblerCompat : public vixl::MacroAssembler {
+ public:
+ typedef vixl::Condition Condition;
+
+ private:
+ // Perform a downcast. Should be removed by Bug 996602.
+ js::jit::MacroAssembler& asMasm();
+ const js::jit::MacroAssembler& asMasm() const;
+
+ public:
+ // Restrict to only VIXL-internal functions.
+ vixl::MacroAssembler& asVIXL();
+ const MacroAssembler& asVIXL() const;
+
+ protected:
+ bool enoughMemory_;
+ uint32_t framePushed_;
+
+ MacroAssemblerCompat()
+ : vixl::MacroAssembler(), enoughMemory_(true), framePushed_(0) {}
+
+ protected:
+ MoveResolver moveResolver_;
+
+ public:
+ bool oom() const { return Assembler::oom() || !enoughMemory_; }
+ static ARMRegister toARMRegister(RegisterOrSP r, size_t size) {
+ if (IsHiddenSP(r)) {
+ MOZ_ASSERT(size == 64);
+ return sp;
+ }
+ return ARMRegister(AsRegister(r), size);
+ }
+ static MemOperand toMemOperand(const Address& a) {
+ return MemOperand(toARMRegister(a.base, 64), a.offset);
+ }
+ void doBaseIndex(const vixl::CPURegister& rt, const BaseIndex& addr,
+ vixl::LoadStoreOp op) {
+ const ARMRegister base = toARMRegister(addr.base, 64);
+ const ARMRegister index = ARMRegister(addr.index, 64);
+ const unsigned scale = addr.scale;
+
+ if (!addr.offset &&
+ (!scale || scale == static_cast<unsigned>(CalcLSDataSize(op)))) {
+ LoadStoreMacro(rt, MemOperand(base, index, vixl::LSL, scale), op);
+ return;
+ }
+
+ vixl::UseScratchRegisterScope temps(this);
+ ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(!scratch64.Is(rt));
+ MOZ_ASSERT(!scratch64.Is(base));
+ MOZ_ASSERT(!scratch64.Is(index));
+
+ Add(scratch64, base, Operand(index, vixl::LSL, scale));
+ LoadStoreMacro(rt, MemOperand(scratch64, addr.offset), op);
+ }
+ void Push(ARMRegister reg) {
+ push(reg);
+ adjustFrame(reg.size() / 8);
+ }
+ void Push(Register reg) {
+ vixl::MacroAssembler::Push(ARMRegister(reg, 64));
+ adjustFrame(8);
+ }
+ void Push(Imm32 imm) {
+ push(imm);
+ adjustFrame(8);
+ }
+ void Push(FloatRegister f) {
+ push(ARMFPRegister(f, 64));
+ adjustFrame(8);
+ }
+ void Push(ImmPtr imm) {
+ push(imm);
+ adjustFrame(sizeof(void*));
+ }
+ void push(FloatRegister f) {
+ vixl::MacroAssembler::Push(ARMFPRegister(f, 64));
+ }
+ void push(ARMFPRegister f) { vixl::MacroAssembler::Push(f); }
+ void push(Imm32 imm) {
+ if (imm.value == 0) {
+ vixl::MacroAssembler::Push(vixl::xzr);
+ } else {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ move32(imm, scratch64.asUnsized());
+ vixl::MacroAssembler::Push(scratch64);
+ }
+ }
+ void push(ImmWord imm) {
+ if (imm.value == 0) {
+ vixl::MacroAssembler::Push(vixl::xzr);
+ } else {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ Mov(scratch64, imm.value);
+ vixl::MacroAssembler::Push(scratch64);
+ }
+ }
+ void push(ImmPtr imm) {
+ if (imm.value == nullptr) {
+ vixl::MacroAssembler::Push(vixl::xzr);
+ } else {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ movePtr(imm, scratch64.asUnsized());
+ vixl::MacroAssembler::Push(scratch64);
+ }
+ }
+ void push(ImmGCPtr imm) {
+ if (imm.value == nullptr) {
+ vixl::MacroAssembler::Push(vixl::xzr);
+ } else {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ movePtr(imm, scratch64.asUnsized());
+ vixl::MacroAssembler::Push(scratch64);
+ }
+ }
+ void push(ARMRegister reg) { vixl::MacroAssembler::Push(reg); }
+ void push(Address a) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(a.base != scratch64.asUnsized());
+ loadPtr(a, scratch64.asUnsized());
+ vixl::MacroAssembler::Push(scratch64);
+ }
+
+ // Push registers.
+ void push(Register reg) { vixl::MacroAssembler::Push(ARMRegister(reg, 64)); }
+ void push(RegisterOrSP reg) {
+ if (IsHiddenSP(reg)) {
+ vixl::MacroAssembler::Push(sp);
+ }
+ vixl::MacroAssembler::Push(toARMRegister(reg, 64));
+ }
+ void push(Register r0, Register r1) {
+ vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64));
+ }
+ void push(Register r0, Register r1, Register r2) {
+ vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64),
+ ARMRegister(r2, 64));
+ }
+ void push(Register r0, Register r1, Register r2, Register r3) {
+ vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64),
+ ARMRegister(r2, 64), ARMRegister(r3, 64));
+ }
+ void push(ARMFPRegister r0, ARMFPRegister r1, ARMFPRegister r2,
+ ARMFPRegister r3) {
+ vixl::MacroAssembler::Push(r0, r1, r2, r3);
+ }
+
+ // Pop registers.
+ void pop(Register reg) { vixl::MacroAssembler::Pop(ARMRegister(reg, 64)); }
+ void pop(Register r0, Register r1) {
+ vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64));
+ }
+ void pop(Register r0, Register r1, Register r2) {
+ vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64),
+ ARMRegister(r2, 64));
+ }
+ void pop(Register r0, Register r1, Register r2, Register r3) {
+ vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64),
+ ARMRegister(r2, 64), ARMRegister(r3, 64));
+ }
+ void pop(ARMFPRegister r0, ARMFPRegister r1, ARMFPRegister r2,
+ ARMFPRegister r3) {
+ vixl::MacroAssembler::Pop(r0, r1, r2, r3);
+ }
+
+ void pop(const ValueOperand& v) { pop(v.valueReg()); }
+ void pop(const FloatRegister& f) {
+ vixl::MacroAssembler::Pop(ARMFPRegister(f, 64));
+ }
+
+ void implicitPop(uint32_t args) {
+ MOZ_ASSERT(args % sizeof(intptr_t) == 0);
+ adjustFrame(0 - args);
+ }
+ void Pop(ARMRegister r) {
+ vixl::MacroAssembler::Pop(r);
+ adjustFrame(0 - r.size() / 8);
+ }
+ // FIXME: This is the same on every arch.
+ // FIXME: If we can share framePushed_, we can share this.
+ // FIXME: Or just make it at the highest level.
+ CodeOffset PushWithPatch(ImmWord word) {
+ framePushed_ += sizeof(word.value);
+ return pushWithPatch(word);
+ }
+ CodeOffset PushWithPatch(ImmPtr ptr) {
+ return PushWithPatch(ImmWord(uintptr_t(ptr.value)));
+ }
+
+ uint32_t framePushed() const { return framePushed_; }
+ void adjustFrame(int32_t diff) { setFramePushed(framePushed_ + diff); }
+
+ void setFramePushed(uint32_t framePushed) { framePushed_ = framePushed; }
+
+ void freeStack(Register amount) {
+ vixl::MacroAssembler::Drop(Operand(ARMRegister(amount, 64)));
+ }
+
+ // Update sp with the value of the current active stack pointer, if necessary.
+ void syncStackPtr() {
+ if (!GetStackPointer64().Is(vixl::sp)) {
+ Mov(vixl::sp, GetStackPointer64());
+ }
+ }
+ void initPseudoStackPtr() {
+ if (!GetStackPointer64().Is(vixl::sp)) {
+ Mov(GetStackPointer64(), vixl::sp);
+ }
+ }
+ // In debug builds only, cause a trap if PSP is active and PSP != SP
+ void assertStackPtrsSynced(uint32_t id) {
+#ifdef DEBUG
+ // The add and sub instructions below will only take a 12-bit immediate.
+ MOZ_ASSERT(id <= 0xFFF);
+ if (!GetStackPointer64().Is(vixl::sp)) {
+ Label ok;
+ // Add a marker, so we can figure out who requested the check when
+ // inspecting the generated code. Note, a more concise way to encode
+ // the marker would be to use it as an immediate for the `brk`
+ // instruction as generated by `Unreachable()`, and removing the add/sub.
+ Add(GetStackPointer64(), GetStackPointer64(), Operand(id));
+ Sub(GetStackPointer64(), GetStackPointer64(), Operand(id));
+ Cmp(vixl::sp, GetStackPointer64());
+ B(Equal, &ok);
+ Unreachable();
+ bind(&ok);
+ }
+#endif
+ }
+ // In debug builds only, add a marker that doesn't change the machine's
+ // state. Note these markers are x16-based, as opposed to the x28-based
+ // ones made by `assertStackPtrsSynced`.
+ void addMarker(uint32_t id) {
+#ifdef DEBUG
+ // Only 12 bits of immediate are allowed.
+ MOZ_ASSERT(id <= 0xFFF);
+ ARMRegister x16 = ARMRegister(r16, 64);
+ Add(x16, x16, Operand(id));
+ Sub(x16, x16, Operand(id));
+#endif
+ }
+
+ void storeValue(ValueOperand val, const Address& dest) {
+ storePtr(val.valueReg(), dest);
+ }
+
+ template <typename T>
+ void storeValue(JSValueType type, Register reg, const T& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != reg);
+ tagValue(type, reg, ValueOperand(scratch));
+ storeValue(ValueOperand(scratch), dest);
+ }
+ template <typename T>
+ void storeValue(const Value& val, const T& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ moveValue(val, ValueOperand(scratch));
+ storeValue(ValueOperand(scratch), dest);
+ }
+ void storeValue(ValueOperand val, BaseIndex dest) {
+ storePtr(val.valueReg(), dest);
+ }
+ void storeValue(const Address& src, const Address& dest, Register temp) {
+ loadPtr(src, temp);
+ storePtr(temp, dest);
+ }
+
+ void storePrivateValue(Register src, const Address& dest) {
+ storePtr(src, dest);
+ }
+ void storePrivateValue(ImmGCPtr imm, const Address& dest) {
+ storePtr(imm, dest);
+ }
+
+ void loadValue(Address src, Register val) {
+ Ldr(ARMRegister(val, 64), MemOperand(src));
+ }
+ void loadValue(Address src, ValueOperand val) {
+ Ldr(ARMRegister(val.valueReg(), 64), MemOperand(src));
+ }
+ void loadValue(const BaseIndex& src, ValueOperand val) {
+ doBaseIndex(ARMRegister(val.valueReg(), 64), src, vixl::LDR_x);
+ }
+ void loadUnalignedValue(const Address& src, ValueOperand dest) {
+ loadValue(src, dest);
+ }
+ void tagValue(JSValueType type, Register payload, ValueOperand dest) {
+ // This could be cleverer, but the first attempt had bugs.
+ Orr(ARMRegister(dest.valueReg(), 64), ARMRegister(payload, 64),
+ Operand(ImmShiftedTag(type).value));
+ }
+ void pushValue(ValueOperand val) {
+ vixl::MacroAssembler::Push(ARMRegister(val.valueReg(), 64));
+ }
+ void popValue(ValueOperand val) {
+ vixl::MacroAssembler::Pop(ARMRegister(val.valueReg(), 64));
+ // SP may be < PSP now (that's OK).
+ // eg testcase: tests/backup-point-bug1315634.js
+ }
+ void pushValue(const Value& val) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ if (val.isGCThing()) {
+ BufferOffset load =
+ movePatchablePtr(ImmPtr(val.bitsAsPunboxPointer()), scratch);
+ writeDataRelocation(val, load);
+ push(scratch);
+ } else {
+ moveValue(val, scratch);
+ push(scratch);
+ }
+ }
+ void pushValue(JSValueType type, Register reg) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != reg);
+ tagValue(type, reg, ValueOperand(scratch));
+ push(scratch);
+ }
+ void pushValue(const Address& addr) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != addr.base);
+ loadValue(addr, scratch);
+ push(scratch);
+ }
+ void pushValue(const BaseIndex& addr, Register scratch) {
+ loadValue(addr, ValueOperand(scratch));
+ pushValue(ValueOperand(scratch));
+ }
+ template <typename T>
+ void storeUnboxedPayload(ValueOperand value, T address, size_t nbytes,
+ JSValueType type) {
+ switch (nbytes) {
+ case 8: {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ if (type == JSVAL_TYPE_OBJECT) {
+ unboxObjectOrNull(value, scratch);
+ } else {
+ unboxNonDouble(value, scratch, type);
+ }
+ storePtr(scratch, address);
+ return;
+ }
+ case 4:
+ store32(value.valueReg(), address);
+ return;
+ case 1:
+ store8(value.valueReg(), address);
+ return;
+ default:
+ MOZ_CRASH("Bad payload width");
+ }
+ }
+ void moveValue(const Value& val, Register dest) {
+ if (val.isGCThing()) {
+ BufferOffset load =
+ movePatchablePtr(ImmPtr(val.bitsAsPunboxPointer()), dest);
+ writeDataRelocation(val, load);
+ } else {
+ movePtr(ImmWord(val.asRawBits()), dest);
+ }
+ }
+ void moveValue(const Value& src, const ValueOperand& dest) {
+ moveValue(src, dest.valueReg());
+ }
+
+ CodeOffset pushWithPatch(ImmWord imm) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ CodeOffset label = movWithPatch(imm, scratch);
+ push(scratch);
+ return label;
+ }
+
+ CodeOffset movWithPatch(ImmWord imm, Register dest) {
+ BufferOffset off = immPool64(ARMRegister(dest, 64), imm.value);
+ return CodeOffset(off.getOffset());
+ }
+ CodeOffset movWithPatch(ImmPtr imm, Register dest) {
+ BufferOffset off = immPool64(ARMRegister(dest, 64), uint64_t(imm.value));
+ return CodeOffset(off.getOffset());
+ }
+
+ void boxValue(JSValueType type, Register src, Register dest);
+
+ void splitSignExtTag(Register src, Register dest) {
+ sbfx(ARMRegister(dest, 64), ARMRegister(src, 64), JSVAL_TAG_SHIFT,
+ (64 - JSVAL_TAG_SHIFT));
+ }
+ [[nodiscard]] Register extractTag(const Address& address, Register scratch) {
+ loadPtr(address, scratch);
+ splitSignExtTag(scratch, scratch);
+ return scratch;
+ }
+ [[nodiscard]] Register extractTag(const ValueOperand& value,
+ Register scratch) {
+ splitSignExtTag(value.valueReg(), scratch);
+ return scratch;
+ }
+ [[nodiscard]] Register extractObject(const Address& address,
+ Register scratch) {
+ loadPtr(address, scratch);
+ unboxObject(scratch, scratch);
+ return scratch;
+ }
+ [[nodiscard]] Register extractObject(const ValueOperand& value,
+ Register scratch) {
+ unboxObject(value, scratch);
+ return scratch;
+ }
+ [[nodiscard]] Register extractSymbol(const ValueOperand& value,
+ Register scratch) {
+ unboxSymbol(value, scratch);
+ return scratch;
+ }
+ [[nodiscard]] Register extractInt32(const ValueOperand& value,
+ Register scratch) {
+ unboxInt32(value, scratch);
+ return scratch;
+ }
+ [[nodiscard]] Register extractBoolean(const ValueOperand& value,
+ Register scratch) {
+ unboxBoolean(value, scratch);
+ return scratch;
+ }
+
+ inline void ensureDouble(const ValueOperand& source, FloatRegister dest,
+ Label* failure);
+
+ void emitSet(Condition cond, Register dest) {
+ Cset(ARMRegister(dest, 64), cond);
+ }
+
+ void testNullSet(Condition cond, const ValueOperand& value, Register dest) {
+ cond = testNull(cond, value);
+ emitSet(cond, dest);
+ }
+ void testObjectSet(Condition cond, const ValueOperand& value, Register dest) {
+ cond = testObject(cond, value);
+ emitSet(cond, dest);
+ }
+ void testUndefinedSet(Condition cond, const ValueOperand& value,
+ Register dest) {
+ cond = testUndefined(cond, value);
+ emitSet(cond, dest);
+ }
+
+ void convertBoolToInt32(Register source, Register dest) {
+ Uxtb(ARMRegister(dest, 64), ARMRegister(source, 64));
+ }
+
+ void convertInt32ToDouble(Register src, FloatRegister dest) {
+ Scvtf(ARMFPRegister(dest, 64),
+ ARMRegister(src, 32)); // Uses FPCR rounding mode.
+ }
+ void convertInt32ToDouble(const Address& src, FloatRegister dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != src.base);
+ load32(src, scratch);
+ convertInt32ToDouble(scratch, dest);
+ }
+ void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != src.base);
+ MOZ_ASSERT(scratch != src.index);
+ load32(src, scratch);
+ convertInt32ToDouble(scratch, dest);
+ }
+
+ void convertInt32ToFloat32(Register src, FloatRegister dest) {
+ Scvtf(ARMFPRegister(dest, 32),
+ ARMRegister(src, 32)); // Uses FPCR rounding mode.
+ }
+ void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != src.base);
+ load32(src, scratch);
+ convertInt32ToFloat32(scratch, dest);
+ }
+
+ void convertUInt32ToDouble(Register src, FloatRegister dest) {
+ Ucvtf(ARMFPRegister(dest, 64),
+ ARMRegister(src, 32)); // Uses FPCR rounding mode.
+ }
+ void convertUInt32ToDouble(const Address& src, FloatRegister dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != src.base);
+ load32(src, scratch);
+ convertUInt32ToDouble(scratch, dest);
+ }
+
+ void convertUInt32ToFloat32(Register src, FloatRegister dest) {
+ Ucvtf(ARMFPRegister(dest, 32),
+ ARMRegister(src, 32)); // Uses FPCR rounding mode.
+ }
+ void convertUInt32ToFloat32(const Address& src, FloatRegister dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != src.base);
+ load32(src, scratch);
+ convertUInt32ToFloat32(scratch, dest);
+ }
+
+ void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
+ Fcvt(ARMFPRegister(dest, 64), ARMFPRegister(src, 32));
+ }
+ void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
+ Fcvt(ARMFPRegister(dest, 32), ARMFPRegister(src, 64));
+ }
+
+ using vixl::MacroAssembler::B;
+
+ void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
+ bool negativeZeroCheck = true) {
+ ARMFPRegister fsrc64(src, 64);
+ ARMRegister dest32(dest, 32);
+
+ // ARMv8.3 chips support the FJCVTZS instruction, which handles exactly this
+ // logic. But the simulator does not implement it, and when the simulator
+ // runs on ARM64 hardware we want to override vixl's detection of it.
+#if defined(JS_SIMULATOR_ARM64) && (defined(__aarch64__) || defined(_M_ARM64))
+ const bool fjscvt = false;
+#else
+ const bool fjscvt =
+ CPUHas(vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
+#endif
+ if (fjscvt) {
+ // Convert double to integer, rounding toward zero.
+ // The Z-flag is set iff the conversion is exact. -0 unsets the Z-flag.
+ Fjcvtzs(dest32, fsrc64);
+
+ if (negativeZeroCheck) {
+ B(fail, Assembler::NonZero);
+ } else {
+ Label done;
+ B(&done, Assembler::Zero); // If conversion was exact, go to end.
+
+ // The conversion was inexact, but the caller intends to allow -0.
+
+ // Compare fsrc64 to 0.
+ // If fsrc64 == 0 and FJCVTZS conversion was inexact, then fsrc64 is -0.
+ Fcmp(fsrc64, 0.0);
+ B(fail, Assembler::NotEqual); // Pass through -0; fail otherwise.
+
+ bind(&done);
+ }
+ } else {
+ // Older processors use a significantly slower path.
+ ARMRegister dest64(dest, 64);
+
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMFPRegister scratch64 = temps.AcquireD();
+ MOZ_ASSERT(!scratch64.Is(fsrc64));
+
+ Fcvtzs(dest32, fsrc64); // Convert, rounding toward zero.
+ Scvtf(scratch64, dest32); // Convert back, using FPCR rounding mode.
+ Fcmp(scratch64, fsrc64);
+ B(fail, Assembler::NotEqual);
+
+ if (negativeZeroCheck) {
+ Label nonzero;
+ Cbnz(dest32, &nonzero);
+ Fmov(dest64, fsrc64);
+ Cbnz(dest64, fail);
+ bind(&nonzero);
+ }
+ }
+ }
+ void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
+ bool negativeZeroCheck = true) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMFPRegister scratch32 = temps.AcquireS();
+
+ ARMFPRegister fsrc(src, 32);
+ ARMRegister dest32(dest, 32);
+ ARMRegister dest64(dest, 64);
+
+ MOZ_ASSERT(!scratch32.Is(fsrc));
+
+ Fcvtzs(dest64, fsrc); // Convert, rounding toward zero.
+ Scvtf(scratch32, dest32); // Convert back, using FPCR rounding mode.
+ Fcmp(scratch32, fsrc);
+ B(fail, Assembler::NotEqual);
+
+ if (negativeZeroCheck) {
+ Label nonzero;
+ Cbnz(dest32, &nonzero);
+ Fmov(dest32, fsrc);
+ Cbnz(dest32, fail);
+ bind(&nonzero);
+ }
+ Uxtw(dest64, dest64);
+ }
+
+ void convertDoubleToPtr(FloatRegister src, Register dest, Label* fail,
+ bool negativeZeroCheck = true) {
+ ARMFPRegister fsrc64(src, 64);
+ ARMRegister dest64(dest, 64);
+
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMFPRegister scratch64 = temps.AcquireD();
+ MOZ_ASSERT(!scratch64.Is(fsrc64));
+
+ // Note: we can't use the FJCVTZS instruction here because that only works
+ // for 32-bit values.
+
+ Fcvtzs(dest64, fsrc64); // Convert, rounding toward zero.
+ Scvtf(scratch64, dest64); // Convert back, using FPCR rounding mode.
+ Fcmp(scratch64, fsrc64);
+ B(fail, Assembler::NotEqual);
+
+ if (negativeZeroCheck) {
+ Label nonzero;
+ Cbnz(dest64, &nonzero);
+ Fmov(dest64, fsrc64);
+ Cbnz(dest64, fail);
+ bind(&nonzero);
+ }
+ }
+
+ void jump(Label* label) { B(label); }
+ void jump(JitCode* code) { branch(code); }
+ void jump(ImmPtr ptr) {
+ // It is unclear why this sync is necessary:
+ // * PSP and SP have been observed to be different in testcase
+ // tests/asm.js/testBug1046688.js.
+ // * Removing the sync causes no failures in all of jit-tests.
+ //
+ // Also see branch(JitCode*) below. This version of jump() is called only
+ // from jump(TrampolinePtr) which is called on various very slow paths,
+ // probably only in JS.
+ syncStackPtr();
+ BufferOffset loc =
+ b(-1,
+ LabelDoc()); // The jump target will be patched by executableCopy().
+ addPendingJump(loc, ptr, RelocationKind::HARDCODED);
+ }
+ void jump(TrampolinePtr code) { jump(ImmPtr(code.value)); }
+ void jump(Register reg) { Br(ARMRegister(reg, 64)); }
+ void jump(const Address& addr) {
+ vixl::UseScratchRegisterScope temps(this);
+ MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0
+ temps.Exclude(ScratchReg64);
+ MOZ_ASSERT(addr.base != ScratchReg64.asUnsized());
+ loadPtr(addr, ScratchReg64.asUnsized());
+ br(ScratchReg64);
+ }
+
+ void align(int alignment) { armbuffer_.align(alignment); }
+
+ void haltingAlign(int alignment) {
+ armbuffer_.align(alignment, vixl::HLT | ImmException(0xBAAD));
+ }
+ void nopAlign(int alignment) { armbuffer_.align(alignment); }
+
+ void movePtr(Register src, Register dest) {
+ Mov(ARMRegister(dest, 64), ARMRegister(src, 64));
+ }
+ void movePtr(ImmWord imm, Register dest) {
+ Mov(ARMRegister(dest, 64), int64_t(imm.value));
+ }
+ void movePtr(ImmPtr imm, Register dest) {
+ Mov(ARMRegister(dest, 64), int64_t(imm.value));
+ }
+ void movePtr(wasm::SymbolicAddress imm, Register dest) {
+ BufferOffset off = movePatchablePtr(ImmWord(0xffffffffffffffffULL), dest);
+ append(wasm::SymbolicAccess(CodeOffset(off.getOffset()), imm));
+ }
+ void movePtr(ImmGCPtr imm, Register dest) {
+ BufferOffset load = movePatchablePtr(ImmPtr(imm.value), dest);
+ writeDataRelocation(imm, load);
+ }
+
+ void mov(ImmWord imm, Register dest) { movePtr(imm, dest); }
+ void mov(ImmPtr imm, Register dest) { movePtr(imm, dest); }
+ void mov(wasm::SymbolicAddress imm, Register dest) { movePtr(imm, dest); }
+ void mov(Register src, Register dest) { movePtr(src, dest); }
+ void mov(CodeLabel* label, Register dest);
+
+ void move32(Imm32 imm, Register dest) {
+ Mov(ARMRegister(dest, 32), (int64_t)imm.value);
+ }
+ void move32(Register src, Register dest) {
+ Mov(ARMRegister(dest, 32), ARMRegister(src, 32));
+ }
+
+ // Move a pointer using a literal pool, so that the pointer
+ // may be easily patched or traced.
+ // Returns the BufferOffset of the load instruction emitted.
+ BufferOffset movePatchablePtr(ImmWord ptr, Register dest);
+ BufferOffset movePatchablePtr(ImmPtr ptr, Register dest);
+
+ void loadPtr(wasm::SymbolicAddress address, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ movePtr(address, scratch.asUnsized());
+ Ldr(ARMRegister(dest, 64), MemOperand(scratch));
+ }
+ void loadPtr(AbsoluteAddress address, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ movePtr(ImmWord((uintptr_t)address.addr), scratch.asUnsized());
+ Ldr(ARMRegister(dest, 64), MemOperand(scratch));
+ }
+ void loadPtr(const Address& address, Register dest) {
+ Ldr(ARMRegister(dest, 64), MemOperand(address));
+ }
+ void loadPtr(const BaseIndex& src, Register dest) {
+ ARMRegister base = toARMRegister(src.base, 64);
+ uint32_t scale = Imm32::ShiftOf(src.scale).value;
+ ARMRegister dest64(dest, 64);
+ ARMRegister index64(src.index, 64);
+
+ if (src.offset) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch = temps.AcquireX();
+ MOZ_ASSERT(!scratch.Is(base));
+ MOZ_ASSERT(!scratch.Is(dest64));
+ MOZ_ASSERT(!scratch.Is(index64));
+
+ Add(scratch, base, Operand(int64_t(src.offset)));
+ Ldr(dest64, MemOperand(scratch, index64, vixl::LSL, scale));
+ return;
+ }
+
+ Ldr(dest64, MemOperand(base, index64, vixl::LSL, scale));
+ }
+ void loadPrivate(const Address& src, Register dest);
+
+ void store8(Register src, const Address& address) {
+ Strb(ARMRegister(src, 32), toMemOperand(address));
+ }
+ void store8(Imm32 imm, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != address.base);
+ move32(imm, scratch32.asUnsized());
+ Strb(scratch32, toMemOperand(address));
+ }
+ void store8(Register src, const BaseIndex& address) {
+ doBaseIndex(ARMRegister(src, 32), address, vixl::STRB_w);
+ }
+ void store8(Imm32 imm, const BaseIndex& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != address.base);
+ MOZ_ASSERT(scratch32.asUnsized() != address.index);
+ Mov(scratch32, Operand(imm.value));
+ doBaseIndex(scratch32, address, vixl::STRB_w);
+ }
+
+ void store16(Register src, const Address& address) {
+ Strh(ARMRegister(src, 32), toMemOperand(address));
+ }
+ void store16(Imm32 imm, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != address.base);
+ move32(imm, scratch32.asUnsized());
+ Strh(scratch32, toMemOperand(address));
+ }
+ void store16(Register src, const BaseIndex& address) {
+ doBaseIndex(ARMRegister(src, 32), address, vixl::STRH_w);
+ }
+ void store16(Imm32 imm, const BaseIndex& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != address.base);
+ MOZ_ASSERT(scratch32.asUnsized() != address.index);
+ Mov(scratch32, Operand(imm.value));
+ doBaseIndex(scratch32, address, vixl::STRH_w);
+ }
+ template <typename S, typename T>
+ void store16Unaligned(const S& src, const T& dest) {
+ store16(src, dest);
+ }
+
+ void storePtr(ImmWord imm, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != address.base);
+ movePtr(imm, scratch);
+ storePtr(scratch, address);
+ }
+ void storePtr(ImmPtr imm, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != address.base);
+ Mov(scratch64, uint64_t(imm.value));
+ Str(scratch64, toMemOperand(address));
+ }
+ void storePtr(ImmGCPtr imm, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != address.base);
+ movePtr(imm, scratch);
+ storePtr(scratch, address);
+ }
+ void storePtr(Register src, const Address& address) {
+ Str(ARMRegister(src, 64), toMemOperand(address));
+ }
+
+ void storePtr(ImmWord imm, const BaseIndex& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != address.base);
+ MOZ_ASSERT(scratch64.asUnsized() != address.index);
+ Mov(scratch64, Operand(imm.value));
+ doBaseIndex(scratch64, address, vixl::STR_x);
+ }
+ void storePtr(ImmGCPtr imm, const BaseIndex& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != address.base);
+ MOZ_ASSERT(scratch != address.index);
+ movePtr(imm, scratch);
+ doBaseIndex(ARMRegister(scratch, 64), address, vixl::STR_x);
+ }
+ void storePtr(Register src, const BaseIndex& address) {
+ doBaseIndex(ARMRegister(src, 64), address, vixl::STR_x);
+ }
+
+ void storePtr(Register src, AbsoluteAddress address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ Mov(scratch64, uint64_t(address.addr));
+ Str(ARMRegister(src, 64), MemOperand(scratch64));
+ }
+
+ void store32(Register src, AbsoluteAddress address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ Mov(scratch64, uint64_t(address.addr));
+ Str(ARMRegister(src, 32), MemOperand(scratch64));
+ }
+ void store32(Imm32 imm, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != address.base);
+ Mov(scratch32, uint64_t(imm.value));
+ Str(scratch32, toMemOperand(address));
+ }
+ void store32(Register r, const Address& address) {
+ Str(ARMRegister(r, 32), toMemOperand(address));
+ }
+ void store32(Imm32 imm, const BaseIndex& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != address.base);
+ MOZ_ASSERT(scratch32.asUnsized() != address.index);
+ Mov(scratch32, imm.value);
+ doBaseIndex(scratch32, address, vixl::STR_w);
+ }
+ void store32(Register r, const BaseIndex& address) {
+ doBaseIndex(ARMRegister(r, 32), address, vixl::STR_w);
+ }
+
+ template <typename S, typename T>
+ void store32Unaligned(const S& src, const T& dest) {
+ store32(src, dest);
+ }
+
+ void store64(Register64 src, Address address) { storePtr(src.reg, address); }
+
+ void store64(Register64 src, const BaseIndex& address) {
+ storePtr(src.reg, address);
+ }
+
+ void store64(Imm64 imm, const BaseIndex& address) {
+ storePtr(ImmWord(imm.value), address);
+ }
+
+ void store64(Imm64 imm, const Address& address) {
+ storePtr(ImmWord(imm.value), address);
+ }
+
+ template <typename S, typename T>
+ void store64Unaligned(const S& src, const T& dest) {
+ store64(src, dest);
+ }
+
+ // StackPointer manipulation.
+ inline void addToStackPtr(Register src);
+ inline void addToStackPtr(Imm32 imm);
+ inline void addToStackPtr(const Address& src);
+ inline void addStackPtrTo(Register dest);
+
+ inline void subFromStackPtr(Register src);
+ inline void subFromStackPtr(Imm32 imm);
+ inline void subStackPtrFrom(Register dest);
+
+ inline void andToStackPtr(Imm32 t);
+
+ inline void moveToStackPtr(Register src);
+ inline void moveStackPtrTo(Register dest);
+
+ inline void loadStackPtr(const Address& src);
+ inline void storeStackPtr(const Address& dest);
+
+ // StackPointer testing functions.
+ inline void branchTestStackPtr(Condition cond, Imm32 rhs, Label* label);
+ inline void branchStackPtr(Condition cond, Register rhs, Label* label);
+ inline void branchStackPtrRhs(Condition cond, Address lhs, Label* label);
+ inline void branchStackPtrRhs(Condition cond, AbsoluteAddress lhs,
+ Label* label);
+
+ void testPtr(Register lhs, Register rhs) {
+ Tst(ARMRegister(lhs, 64), Operand(ARMRegister(rhs, 64)));
+ }
+ void test32(Register lhs, Register rhs) {
+ Tst(ARMRegister(lhs, 32), Operand(ARMRegister(rhs, 32)));
+ }
+ void test32(const Address& addr, Imm32 imm) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != addr.base);
+ load32(addr, scratch32.asUnsized());
+ Tst(scratch32, Operand(imm.value));
+ }
+ void test32(Register lhs, Imm32 rhs) {
+ Tst(ARMRegister(lhs, 32), Operand(rhs.value));
+ }
+ void cmp32(Register lhs, Imm32 rhs) {
+ Cmp(ARMRegister(lhs, 32), Operand(rhs.value));
+ }
+ void cmp32(Register a, Register b) {
+ Cmp(ARMRegister(a, 32), Operand(ARMRegister(b, 32)));
+ }
+ void cmp32(const Address& lhs, Imm32 rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != lhs.base);
+ Ldr(scratch32, toMemOperand(lhs));
+ Cmp(scratch32, Operand(rhs.value));
+ }
+ void cmp32(const Address& lhs, Register rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != lhs.base);
+ MOZ_ASSERT(scratch32.asUnsized() != rhs);
+ Ldr(scratch32, toMemOperand(lhs));
+ Cmp(scratch32, Operand(ARMRegister(rhs, 32)));
+ }
+ void cmp32(const vixl::Operand& lhs, Imm32 rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ Mov(scratch32, lhs);
+ Cmp(scratch32, Operand(rhs.value));
+ }
+ void cmp32(const vixl::Operand& lhs, Register rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ Mov(scratch32, lhs);
+ Cmp(scratch32, Operand(ARMRegister(rhs, 32)));
+ }
+
+ void cmn32(Register lhs, Imm32 rhs) {
+ Cmn(ARMRegister(lhs, 32), Operand(rhs.value));
+ }
+
+ void cmpPtr(Register lhs, Imm32 rhs) {
+ Cmp(ARMRegister(lhs, 64), Operand(rhs.value));
+ }
+ void cmpPtr(Register lhs, ImmWord rhs) {
+ Cmp(ARMRegister(lhs, 64), Operand(rhs.value));
+ }
+ void cmpPtr(Register lhs, ImmPtr rhs) {
+ Cmp(ARMRegister(lhs, 64), Operand(uint64_t(rhs.value)));
+ }
+ void cmpPtr(Register lhs, Imm64 rhs) {
+ Cmp(ARMRegister(lhs, 64), Operand(uint64_t(rhs.value)));
+ }
+ void cmpPtr(Register lhs, Register rhs) {
+ Cmp(ARMRegister(lhs, 64), ARMRegister(rhs, 64));
+ }
+ void cmpPtr(Register lhs, ImmGCPtr rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs);
+ movePtr(rhs, scratch);
+ cmpPtr(lhs, scratch);
+ }
+
+ void cmpPtr(const Address& lhs, Register rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != lhs.base);
+ MOZ_ASSERT(scratch64.asUnsized() != rhs);
+ Ldr(scratch64, toMemOperand(lhs));
+ Cmp(scratch64, Operand(ARMRegister(rhs, 64)));
+ }
+ void cmpPtr(const Address& lhs, ImmWord rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != lhs.base);
+ Ldr(scratch64, toMemOperand(lhs));
+ Cmp(scratch64, Operand(rhs.value));
+ }
+ void cmpPtr(const Address& lhs, ImmPtr rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != lhs.base);
+ Ldr(scratch64, toMemOperand(lhs));
+ Cmp(scratch64, Operand(uint64_t(rhs.value)));
+ }
+ void cmpPtr(const Address& lhs, ImmGCPtr rhs) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != lhs.base);
+ loadPtr(lhs, scratch);
+ cmpPtr(scratch, rhs);
+ }
+
+ void loadDouble(const Address& src, FloatRegister dest) {
+ Ldr(ARMFPRegister(dest, 64), MemOperand(src));
+ }
+ void loadDouble(const BaseIndex& src, FloatRegister dest) {
+ ARMRegister base = toARMRegister(src.base, 64);
+ ARMRegister index(src.index, 64);
+
+ if (src.offset == 0) {
+ Ldr(ARMFPRegister(dest, 64),
+ MemOperand(base, index, vixl::LSL, unsigned(src.scale)));
+ return;
+ }
+
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != src.base);
+ MOZ_ASSERT(scratch64.asUnsized() != src.index);
+
+ Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale)));
+ Ldr(ARMFPRegister(dest, 64), MemOperand(scratch64, src.offset));
+ }
+ void loadFloatAsDouble(const Address& addr, FloatRegister dest) {
+ Ldr(ARMFPRegister(dest, 32), toMemOperand(addr));
+ fcvt(ARMFPRegister(dest, 64), ARMFPRegister(dest, 32));
+ }
+ void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
+ ARMRegister base = toARMRegister(src.base, 64);
+ ARMRegister index(src.index, 64);
+ if (src.offset == 0) {
+ Ldr(ARMFPRegister(dest, 32),
+ MemOperand(base, index, vixl::LSL, unsigned(src.scale)));
+ } else {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != src.base);
+ MOZ_ASSERT(scratch64.asUnsized() != src.index);
+
+ Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale)));
+ Ldr(ARMFPRegister(dest, 32), MemOperand(scratch64, src.offset));
+ }
+ fcvt(ARMFPRegister(dest, 64), ARMFPRegister(dest, 32));
+ }
+
+ void loadFloat32(const Address& addr, FloatRegister dest) {
+ Ldr(ARMFPRegister(dest, 32), toMemOperand(addr));
+ }
+ void loadFloat32(const BaseIndex& src, FloatRegister dest) {
+ ARMRegister base = toARMRegister(src.base, 64);
+ ARMRegister index(src.index, 64);
+ if (src.offset == 0) {
+ Ldr(ARMFPRegister(dest, 32),
+ MemOperand(base, index, vixl::LSL, unsigned(src.scale)));
+ } else {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != src.base);
+ MOZ_ASSERT(scratch64.asUnsized() != src.index);
+
+ Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale)));
+ Ldr(ARMFPRegister(dest, 32), MemOperand(scratch64, src.offset));
+ }
+ }
+
+ void moveDouble(FloatRegister src, FloatRegister dest) {
+ fmov(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
+ }
+ void zeroDouble(FloatRegister reg) {
+ fmov(ARMFPRegister(reg, 64), vixl::xzr);
+ }
+ void zeroFloat32(FloatRegister reg) {
+ fmov(ARMFPRegister(reg, 32), vixl::wzr);
+ }
+
+ void moveFloat32(FloatRegister src, FloatRegister dest) {
+ fmov(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
+ }
+ void moveFloatAsDouble(Register src, FloatRegister dest) {
+ MOZ_CRASH("moveFloatAsDouble");
+ }
+
+ void moveSimd128(FloatRegister src, FloatRegister dest) {
+ fmov(ARMFPRegister(dest, 128), ARMFPRegister(src, 128));
+ }
+
+ void splitSignExtTag(const ValueOperand& operand, Register dest) {
+ splitSignExtTag(operand.valueReg(), dest);
+ }
+ void splitSignExtTag(const Address& operand, Register dest) {
+ loadPtr(operand, dest);
+ splitSignExtTag(dest, dest);
+ }
+ void splitSignExtTag(const BaseIndex& operand, Register dest) {
+ loadPtr(operand, dest);
+ splitSignExtTag(dest, dest);
+ }
+
+ // Extracts the tag of a value and places it in tag
+ inline void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag);
+ void cmpTag(const ValueOperand& operand, ImmTag tag) { MOZ_CRASH("cmpTag"); }
+
+ void load32(const Address& address, Register dest) {
+ Ldr(ARMRegister(dest, 32), toMemOperand(address));
+ }
+ void load32(const BaseIndex& src, Register dest) {
+ doBaseIndex(ARMRegister(dest, 32), src, vixl::LDR_w);
+ }
+ void load32(AbsoluteAddress address, Register dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ movePtr(ImmWord((uintptr_t)address.addr), scratch64.asUnsized());
+ ldr(ARMRegister(dest, 32), MemOperand(scratch64));
+ }
+ template <typename S>
+ void load32Unaligned(const S& src, Register dest) {
+ load32(src, dest);
+ }
+ void load64(const Address& address, Register64 dest) {
+ loadPtr(address, dest.reg);
+ }
+ void load64(const BaseIndex& address, Register64 dest) {
+ loadPtr(address, dest.reg);
+ }
+ template <typename S>
+ void load64Unaligned(const S& src, Register64 dest) {
+ load64(src, dest);
+ }
+
+ void load8SignExtend(const Address& address, Register dest) {
+ Ldrsb(ARMRegister(dest, 32), toMemOperand(address));
+ }
+ void load8SignExtend(const BaseIndex& src, Register dest) {
+ doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRSB_w);
+ }
+
+ void load8ZeroExtend(const Address& address, Register dest) {
+ Ldrb(ARMRegister(dest, 32), toMemOperand(address));
+ }
+ void load8ZeroExtend(const BaseIndex& src, Register dest) {
+ doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRB_w);
+ }
+
+ void load16SignExtend(const Address& address, Register dest) {
+ Ldrsh(ARMRegister(dest, 32), toMemOperand(address));
+ }
+ void load16SignExtend(const BaseIndex& src, Register dest) {
+ doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRSH_w);
+ }
+ template <typename S>
+ void load16UnalignedSignExtend(const S& src, Register dest) {
+ load16SignExtend(src, dest);
+ }
+
+ void load16ZeroExtend(const Address& address, Register dest) {
+ Ldrh(ARMRegister(dest, 32), toMemOperand(address));
+ }
+ void load16ZeroExtend(const BaseIndex& src, Register dest) {
+ doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRH_w);
+ }
+ template <typename S>
+ void load16UnalignedZeroExtend(const S& src, Register dest) {
+ load16ZeroExtend(src, dest);
+ }
+
+ void adds32(Register src, Register dest) {
+ Adds(ARMRegister(dest, 32), ARMRegister(dest, 32),
+ Operand(ARMRegister(src, 32)));
+ }
+ void adds32(Imm32 imm, Register dest) {
+ Adds(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+ }
+ void adds32(Imm32 imm, const Address& dest) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != dest.base);
+
+ Ldr(scratch32, toMemOperand(dest));
+ Adds(scratch32, scratch32, Operand(imm.value));
+ Str(scratch32, toMemOperand(dest));
+ }
+ void adds64(Imm32 imm, Register dest) {
+ Adds(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+ }
+ void adds64(ImmWord imm, Register dest) {
+ Adds(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+ }
+ void adds64(Register src, Register dest) {
+ Adds(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(ARMRegister(src, 64)));
+ }
+
+ void subs32(Imm32 imm, Register dest) {
+ Subs(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value));
+ }
+ void subs32(Register src, Register dest) {
+ Subs(ARMRegister(dest, 32), ARMRegister(dest, 32),
+ Operand(ARMRegister(src, 32)));
+ }
+ void subs64(Imm32 imm, Register dest) {
+ Subs(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value));
+ }
+ void subs64(Register src, Register dest) {
+ Subs(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(ARMRegister(src, 64)));
+ }
+
+ void negs32(Register reg) {
+ Negs(ARMRegister(reg, 32), Operand(ARMRegister(reg, 32)));
+ }
+
+ void ret() {
+ pop(lr);
+ abiret();
+ }
+
+ void retn(Imm32 n) {
+ vixl::UseScratchRegisterScope temps(this);
+ MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0
+ temps.Exclude(ScratchReg64);
+ // ip0 <- [sp]; sp += n; ret ip0
+ Ldr(ScratchReg64,
+ MemOperand(GetStackPointer64(), ptrdiff_t(n.value), vixl::PostIndex));
+ syncStackPtr(); // SP is always used to transmit the stack between calls.
+ Ret(ScratchReg64);
+ }
+
+ void j(Condition cond, Label* dest) { B(dest, cond); }
+
+ void branch(Condition cond, Label* label) { B(label, cond); }
+ void branch(JitCode* target) {
+ // It is unclear why this sync is necessary:
+ // * PSP and SP have been observed to be different in testcase
+ // tests/async/debugger-reject-after-fulfill.js
+ // * Removing the sync causes no failures in all of jit-tests.
+ //
+ // Also see jump() above. This is used only to implement jump(JitCode*)
+ // and only for JS, it appears.
+ syncStackPtr();
+ BufferOffset loc =
+ b(-1,
+ LabelDoc()); // The jump target will be patched by executableCopy().
+ addPendingJump(loc, ImmPtr(target->raw()), RelocationKind::JITCODE);
+ }
+
+ void compareDouble(DoubleCondition cond, FloatRegister lhs,
+ FloatRegister rhs) {
+ Fcmp(ARMFPRegister(lhs, 64), ARMFPRegister(rhs, 64));
+ }
+
+ void compareFloat(DoubleCondition cond, FloatRegister lhs,
+ FloatRegister rhs) {
+ Fcmp(ARMFPRegister(lhs, 32), ARMFPRegister(rhs, 32));
+ }
+
+ void compareSimd128Int(Assembler::Condition cond, ARMFPRegister dest,
+ ARMFPRegister lhs, ARMFPRegister rhs);
+ void compareSimd128Float(Assembler::Condition cond, ARMFPRegister dest,
+ ARMFPRegister lhs, ARMFPRegister rhs);
+ void rightShiftInt8x16(FloatRegister lhs, Register rhs, FloatRegister dest,
+ bool isUnsigned);
+ void rightShiftInt16x8(FloatRegister lhs, Register rhs, FloatRegister dest,
+ bool isUnsigned);
+ void rightShiftInt32x4(FloatRegister lhs, Register rhs, FloatRegister dest,
+ bool isUnsigned);
+ void rightShiftInt64x2(FloatRegister lhs, Register rhs, FloatRegister dest,
+ bool isUnsigned);
+
+ void branchNegativeZero(FloatRegister reg, Register scratch, Label* label) {
+ MOZ_CRASH("branchNegativeZero");
+ }
+ void branchNegativeZeroFloat32(FloatRegister reg, Register scratch,
+ Label* label) {
+ MOZ_CRASH("branchNegativeZeroFloat32");
+ }
+
+ void boxDouble(FloatRegister src, const ValueOperand& dest, FloatRegister) {
+ Fmov(ARMRegister(dest.valueReg(), 64), ARMFPRegister(src, 64));
+ }
+ void boxNonDouble(JSValueType type, Register src, const ValueOperand& dest) {
+ boxValue(type, src, dest.valueReg());
+ }
+
+ // Note that the |dest| register here may be ScratchReg, so we shouldn't use
+ // it.
+ void unboxInt32(const ValueOperand& src, Register dest) {
+ move32(src.valueReg(), dest);
+ }
+ void unboxInt32(const Address& src, Register dest) { load32(src, dest); }
+ void unboxInt32(const BaseIndex& src, Register dest) { load32(src, dest); }
+
+ template <typename T>
+ void unboxDouble(const T& src, FloatRegister dest) {
+ loadDouble(src, dest);
+ }
+ void unboxDouble(const ValueOperand& src, FloatRegister dest) {
+ Fmov(ARMFPRegister(dest, 64), ARMRegister(src.valueReg(), 64));
+ }
+
+ void unboxArgObjMagic(const ValueOperand& src, Register dest) {
+ MOZ_CRASH("unboxArgObjMagic");
+ }
+ void unboxArgObjMagic(const Address& src, Register dest) {
+ MOZ_CRASH("unboxArgObjMagic");
+ }
+
+ void unboxBoolean(const ValueOperand& src, Register dest) {
+ move32(src.valueReg(), dest);
+ }
+ void unboxBoolean(const Address& src, Register dest) { load32(src, dest); }
+ void unboxBoolean(const BaseIndex& src, Register dest) { load32(src, dest); }
+
+ void unboxMagic(const ValueOperand& src, Register dest) {
+ move32(src.valueReg(), dest);
+ }
+ void unboxNonDouble(const ValueOperand& src, Register dest,
+ JSValueType type) {
+ unboxNonDouble(src.valueReg(), dest, type);
+ }
+
+ template <typename T>
+ void unboxNonDouble(T src, Register dest, JSValueType type) {
+ MOZ_ASSERT(type != JSVAL_TYPE_DOUBLE);
+ if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+ load32(src, dest);
+ return;
+ }
+ loadPtr(src, dest);
+ unboxNonDouble(dest, dest, type);
+ }
+
+ void unboxNonDouble(Register src, Register dest, JSValueType type) {
+ MOZ_ASSERT(type != JSVAL_TYPE_DOUBLE);
+ if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
+ move32(src, dest);
+ return;
+ }
+ Eor(ARMRegister(dest, 64), ARMRegister(src, 64),
+ Operand(JSVAL_TYPE_TO_SHIFTED_TAG(type)));
+ }
+
+ void notBoolean(const ValueOperand& val) {
+ ARMRegister r(val.valueReg(), 64);
+ eor(r, r, Operand(1));
+ }
+ void unboxObject(const ValueOperand& src, Register dest) {
+ unboxNonDouble(src.valueReg(), dest, JSVAL_TYPE_OBJECT);
+ }
+ void unboxObject(Register src, Register dest) {
+ unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+ }
+ void unboxObject(const Address& src, Register dest) {
+ loadPtr(src, dest);
+ unboxNonDouble(dest, dest, JSVAL_TYPE_OBJECT);
+ }
+ void unboxObject(const BaseIndex& src, Register dest) {
+ doBaseIndex(ARMRegister(dest, 64), src, vixl::LDR_x);
+ unboxNonDouble(dest, dest, JSVAL_TYPE_OBJECT);
+ }
+
+ template <typename T>
+ void unboxObjectOrNull(const T& src, Register dest) {
+ unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
+ And(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(~JS::detail::ValueObjectOrNullBit));
+ }
+
+ // See comment in MacroAssembler-x64.h.
+ void unboxGCThingForGCBarrier(const Address& src, Register dest) {
+ loadPtr(src, dest);
+ And(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(JS::detail::ValueGCThingPayloadMask));
+ }
+ void unboxGCThingForGCBarrier(const ValueOperand& src, Register dest) {
+ And(ARMRegister(dest, 64), ARMRegister(src.valueReg(), 64),
+ Operand(JS::detail::ValueGCThingPayloadMask));
+ }
+
+ // Like unboxGCThingForGCBarrier, but loads the GC thing's chunk base.
+ void getGCThingValueChunk(const Address& src, Register dest) {
+ loadPtr(src, dest);
+ And(ARMRegister(dest, 64), ARMRegister(dest, 64),
+ Operand(JS::detail::ValueGCThingPayloadChunkMask));
+ }
+ void getGCThingValueChunk(const ValueOperand& src, Register dest) {
+ And(ARMRegister(dest, 64), ARMRegister(src.valueReg(), 64),
+ Operand(JS::detail::ValueGCThingPayloadChunkMask));
+ }
+
+ inline void unboxValue(const ValueOperand& src, AnyRegister dest,
+ JSValueType type);
+
+ void unboxString(const ValueOperand& operand, Register dest) {
+ unboxNonDouble(operand, dest, JSVAL_TYPE_STRING);
+ }
+ void unboxString(const Address& src, Register dest) {
+ unboxNonDouble(src, dest, JSVAL_TYPE_STRING);
+ }
+ void unboxSymbol(const ValueOperand& operand, Register dest) {
+ unboxNonDouble(operand, dest, JSVAL_TYPE_SYMBOL);
+ }
+ void unboxSymbol(const Address& src, Register dest) {
+ unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL);
+ }
+ void unboxBigInt(const ValueOperand& operand, Register dest) {
+ unboxNonDouble(operand, dest, JSVAL_TYPE_BIGINT);
+ }
+ void unboxBigInt(const Address& src, Register dest) {
+ unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT);
+ }
+ // These two functions use the low 32-bits of the full value register.
+ void boolValueToDouble(const ValueOperand& operand, FloatRegister dest) {
+ convertInt32ToDouble(operand.valueReg(), dest);
+ }
+ void int32ValueToDouble(const ValueOperand& operand, FloatRegister dest) {
+ convertInt32ToDouble(operand.valueReg(), dest);
+ }
+
+ void boolValueToFloat32(const ValueOperand& operand, FloatRegister dest) {
+ convertInt32ToFloat32(operand.valueReg(), dest);
+ }
+ void int32ValueToFloat32(const ValueOperand& operand, FloatRegister dest) {
+ convertInt32ToFloat32(operand.valueReg(), dest);
+ }
+
+ void loadConstantDouble(double d, FloatRegister dest) {
+ ARMFPRegister r(dest, 64);
+ if (d == 0.0) {
+ // Clang11 does movi for 0 and movi+fneg for -0, and this seems like a
+ // good implementation-independent strategy as it avoids any gpr->fpr
+ // moves or memory traffic.
+ Movi(r, 0);
+ if (std::signbit(d)) {
+ Fneg(r, r);
+ }
+ } else {
+ Fmov(r, d);
+ }
+ }
+ void loadConstantFloat32(float f, FloatRegister dest) {
+ ARMFPRegister r(dest, 32);
+ if (f == 0.0) {
+ // See comments above. There's not a movi variant for a single register,
+ // so clear the double.
+ Movi(ARMFPRegister(dest, 64), 0);
+ if (std::signbit(f)) {
+ Fneg(r, r);
+ }
+ } else {
+ Fmov(r, f);
+ }
+ }
+
+ void cmpTag(Register tag, ImmTag ref) {
+ // As opposed to other architecture, splitTag is replaced by splitSignExtTag
+ // which extract the tag with a sign extension. The reason being that cmp32
+ // with a tag value would be too large to fit as a 12 bits immediate value,
+ // and would require the VIXL macro assembler to add an extra instruction
+ // and require extra scratch register to load the Tag value.
+ //
+ // Instead, we compare with the negative value of the sign extended tag with
+ // the CMN instruction. The sign extended tag is expected to be a negative
+ // value. Therefore the negative of the sign extended tag is expected to be
+ // near 0 and fit on 12 bits.
+ //
+ // Ignoring the sign extension, the logic is the following:
+ //
+ // CMP32(Reg, Tag) = Reg - Tag
+ // = Reg + (-Tag)
+ // = CMN32(Reg, -Tag)
+ //
+ // Note: testGCThing, testPrimitive and testNumber which are checking for
+ // inequalities should use unsigned comparisons (as done by default) in
+ // order to keep the same relation order after the sign extension, i.e.
+ // using Above or Below which are based on the carry flag.
+ uint32_t hiShift = JSVAL_TAG_SHIFT - 32;
+ int32_t seTag = int32_t(ref.value);
+ seTag = (seTag << hiShift) >> hiShift;
+ MOZ_ASSERT(seTag < 0);
+ int32_t negTag = -seTag;
+ // Check thest negTag is encoded on a 12 bits immediate value.
+ MOZ_ASSERT((negTag & ~0xFFF) == 0);
+ cmn32(tag, Imm32(negTag));
+ }
+
+ // Register-based tests.
+ Condition testUndefined(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_UNDEFINED));
+ return cond;
+ }
+ Condition testInt32(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_INT32));
+ return cond;
+ }
+ Condition testBoolean(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_BOOLEAN));
+ return cond;
+ }
+ Condition testNull(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_NULL));
+ return cond;
+ }
+ Condition testString(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_STRING));
+ return cond;
+ }
+ Condition testSymbol(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_SYMBOL));
+ return cond;
+ }
+ Condition testBigInt(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_BIGINT));
+ return cond;
+ }
+ Condition testObject(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_OBJECT));
+ return cond;
+ }
+ Condition testDouble(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_MAX_DOUBLE));
+ // Requires unsigned comparison due to cmpTag internals.
+ return (cond == Equal) ? BelowOrEqual : Above;
+ }
+ Condition testNumber(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JS::detail::ValueUpperInclNumberTag));
+ // Requires unsigned comparison due to cmpTag internals.
+ return (cond == Equal) ? BelowOrEqual : Above;
+ }
+ Condition testGCThing(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag));
+ // Requires unsigned comparison due to cmpTag internals.
+ return (cond == Equal) ? AboveOrEqual : Below;
+ }
+ Condition testMagic(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JSVAL_TAG_MAGIC));
+ return cond;
+ }
+ Condition testPrimitive(Condition cond, Register tag) {
+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
+ cmpTag(tag, ImmTag(JS::detail::ValueUpperExclPrimitiveTag));
+ // Requires unsigned comparison due to cmpTag internals.
+ return (cond == Equal) ? Below : AboveOrEqual;
+ }
+ Condition testError(Condition cond, Register tag) {
+ return testMagic(cond, tag);
+ }
+
+ // ValueOperand-based tests.
+ Condition testInt32(Condition cond, const ValueOperand& value) {
+ // The incoming ValueOperand may use scratch registers.
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(scratch != value.valueReg());
+
+ splitSignExtTag(value, scratch);
+ return testInt32(cond, scratch);
+ }
+ Condition testBoolean(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testBoolean(cond, scratch);
+ }
+ Condition testDouble(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testDouble(cond, scratch);
+ }
+ Condition testNull(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testNull(cond, scratch);
+ }
+ Condition testUndefined(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testUndefined(cond, scratch);
+ }
+ Condition testString(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testString(cond, scratch);
+ }
+ Condition testSymbol(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testSymbol(cond, scratch);
+ }
+ Condition testBigInt(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testBigInt(cond, scratch);
+ }
+ Condition testObject(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testObject(cond, scratch);
+ }
+ Condition testNumber(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testNumber(cond, scratch);
+ }
+ Condition testPrimitive(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testPrimitive(cond, scratch);
+ }
+ Condition testMagic(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testMagic(cond, scratch);
+ }
+ Condition testGCThing(Condition cond, const ValueOperand& value) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(value.valueReg() != scratch);
+ splitSignExtTag(value, scratch);
+ return testGCThing(cond, scratch);
+ }
+ Condition testError(Condition cond, const ValueOperand& value) {
+ return testMagic(cond, value);
+ }
+
+ // Address-based tests.
+ Condition testGCThing(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testGCThing(cond, scratch);
+ }
+ Condition testMagic(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testMagic(cond, scratch);
+ }
+ Condition testInt32(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testInt32(cond, scratch);
+ }
+ Condition testDouble(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testDouble(cond, scratch);
+ }
+ Condition testBoolean(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testBoolean(cond, scratch);
+ }
+ Condition testNull(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testNull(cond, scratch);
+ }
+ Condition testUndefined(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testUndefined(cond, scratch);
+ }
+ Condition testString(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testString(cond, scratch);
+ }
+ Condition testSymbol(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testSymbol(cond, scratch);
+ }
+ Condition testBigInt(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testBigInt(cond, scratch);
+ }
+ Condition testObject(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testObject(cond, scratch);
+ }
+ Condition testNumber(Condition cond, const Address& address) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(address.base != scratch);
+ splitSignExtTag(address, scratch);
+ return testNumber(cond, scratch);
+ }
+
+ // BaseIndex-based tests.
+ Condition testUndefined(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testUndefined(cond, scratch);
+ }
+ Condition testNull(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testNull(cond, scratch);
+ }
+ Condition testBoolean(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testBoolean(cond, scratch);
+ }
+ Condition testString(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testString(cond, scratch);
+ }
+ Condition testSymbol(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testSymbol(cond, scratch);
+ }
+ Condition testBigInt(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testBigInt(cond, scratch);
+ }
+ Condition testInt32(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testInt32(cond, scratch);
+ }
+ Condition testObject(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testObject(cond, scratch);
+ }
+ Condition testDouble(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testDouble(cond, scratch);
+ }
+ Condition testMagic(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testMagic(cond, scratch);
+ }
+ Condition testGCThing(Condition cond, const BaseIndex& src) {
+ vixl::UseScratchRegisterScope temps(this);
+ const Register scratch = temps.AcquireX().asUnsized();
+ MOZ_ASSERT(src.base != scratch);
+ MOZ_ASSERT(src.index != scratch);
+ splitSignExtTag(src, scratch);
+ return testGCThing(cond, scratch);
+ }
+
+ Condition testInt32Truthy(bool truthy, const ValueOperand& operand) {
+ ARMRegister payload32(operand.valueReg(), 32);
+ Tst(payload32, payload32);
+ return truthy ? NonZero : Zero;
+ }
+
+ Condition testBooleanTruthy(bool truthy, const ValueOperand& operand) {
+ ARMRegister payload32(operand.valueReg(), 32);
+ Tst(payload32, payload32);
+ return truthy ? NonZero : Zero;
+ }
+
+ Condition testBigIntTruthy(bool truthy, const ValueOperand& value);
+ Condition testStringTruthy(bool truthy, const ValueOperand& value);
+
+ void int32OrDouble(Register src, ARMFPRegister dest) {
+ Label isInt32;
+ Label join;
+ testInt32(Equal, ValueOperand(src));
+ B(&isInt32, Equal);
+ // is double, move the bits as is
+ Fmov(dest, ARMRegister(src, 64));
+ B(&join);
+ bind(&isInt32);
+ // is int32, do a conversion while moving
+ Scvtf(dest, ARMRegister(src, 32));
+ bind(&join);
+ }
+ void loadUnboxedValue(Address address, MIRType type, AnyRegister dest) {
+ if (dest.isFloat()) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != address.base);
+ Ldr(scratch64, toMemOperand(address));
+ int32OrDouble(scratch64.asUnsized(), ARMFPRegister(dest.fpu(), 64));
+ } else {
+ unboxNonDouble(address, dest.gpr(), ValueTypeFromMIRType(type));
+ }
+ }
+
+ void loadUnboxedValue(BaseIndex address, MIRType type, AnyRegister dest) {
+ if (dest.isFloat()) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT(scratch64.asUnsized() != address.base);
+ MOZ_ASSERT(scratch64.asUnsized() != address.index);
+ doBaseIndex(scratch64, address, vixl::LDR_x);
+ int32OrDouble(scratch64.asUnsized(), ARMFPRegister(dest.fpu(), 64));
+ } else {
+ unboxNonDouble(address, dest.gpr(), ValueTypeFromMIRType(type));
+ }
+ }
+
+ // Emit a B that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
+ CodeOffset toggledJump(Label* label) {
+ BufferOffset offset = b(label, Always);
+ CodeOffset ret(offset.getOffset());
+ return ret;
+ }
+
+ // load: offset to the load instruction obtained by movePatchablePtr().
+ void writeDataRelocation(ImmGCPtr ptr, BufferOffset load) {
+ // Raw GC pointer relocations and Value relocations both end up in
+ // Assembler::TraceDataRelocations.
+ if (ptr.value) {
+ if (gc::IsInsideNursery(ptr.value)) {
+ embedsNurseryPointers_ = true;
+ }
+ dataRelocations_.writeUnsigned(load.getOffset());
+ }
+ }
+ void writeDataRelocation(const Value& val, BufferOffset load) {
+ // Raw GC pointer relocations and Value relocations both end up in
+ // Assembler::TraceDataRelocations.
+ if (val.isGCThing()) {
+ gc::Cell* cell = val.toGCThing();
+ if (cell && gc::IsInsideNursery(cell)) {
+ embedsNurseryPointers_ = true;
+ }
+ dataRelocations_.writeUnsigned(load.getOffset());
+ }
+ }
+
+ void computeEffectiveAddress(const Address& address, Register dest) {
+ Add(ARMRegister(dest, 64), toARMRegister(address.base, 64),
+ Operand(address.offset));
+ }
+ void computeEffectiveAddress(const Address& address, RegisterOrSP dest) {
+ Add(toARMRegister(dest, 64), toARMRegister(address.base, 64),
+ Operand(address.offset));
+ }
+ void computeEffectiveAddress(const BaseIndex& address, Register dest) {
+ ARMRegister dest64(dest, 64);
+ ARMRegister base64 = toARMRegister(address.base, 64);
+ ARMRegister index64(address.index, 64);
+
+ Add(dest64, base64, Operand(index64, vixl::LSL, address.scale));
+ if (address.offset) {
+ Add(dest64, dest64, Operand(address.offset));
+ }
+ }
+
+ public:
+ void handleFailureWithHandlerTail(Label* profilerExitTail,
+ Label* bailoutTail);
+
+ void profilerEnterFrame(Register framePtr, Register scratch);
+ void profilerExitFrame();
+
+ void wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register memoryBase,
+ Register ptr, AnyRegister outany, Register64 out64);
+ void wasmLoadImpl(const wasm::MemoryAccessDesc& access, MemOperand srcAddr,
+ AnyRegister outany, Register64 out64);
+ void wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister valany,
+ Register64 val64, Register memoryBase, Register ptr);
+ void wasmStoreImpl(const wasm::MemoryAccessDesc& access, MemOperand destAddr,
+ AnyRegister valany, Register64 val64);
+ // The complete address is in `address`, and `access` is used for its type
+ // attributes only; its `offset` is ignored.
+ void wasmLoadAbsolute(const wasm::MemoryAccessDesc& access,
+ Register memoryBase, uint64_t address, AnyRegister out,
+ Register64 out64);
+ void wasmStoreAbsolute(const wasm::MemoryAccessDesc& access,
+ AnyRegister value, Register64 value64,
+ Register memoryBase, uint64_t address);
+
+ // Emit a BLR or NOP instruction. ToggleCall can be used to patch
+ // this instruction.
+ CodeOffset toggledCall(JitCode* target, bool enabled) {
+ // The returned offset must be to the first instruction generated,
+ // for the debugger to match offset with Baseline's pcMappingEntries_.
+ BufferOffset offset = nextOffset();
+
+ // It is unclear why this sync is necessary:
+ // * PSP and SP have been observed to be different in testcase
+ // tests/cacheir/bug1448136.js
+ // * Removing the sync causes no failures in all of jit-tests.
+ syncStackPtr();
+
+ BufferOffset loadOffset;
+ {
+ vixl::UseScratchRegisterScope temps(this);
+
+ // The register used for the load is hardcoded, so that ToggleCall
+ // can patch in the branch instruction easily. This could be changed,
+ // but then ToggleCall must read the target register from the load.
+ MOZ_ASSERT(temps.IsAvailable(ScratchReg2_64));
+ temps.Exclude(ScratchReg2_64);
+
+ loadOffset = immPool64(ScratchReg2_64, uint64_t(target->raw()));
+
+ if (enabled) {
+ blr(ScratchReg2_64);
+ } else {
+ nop();
+ }
+ }
+
+ addPendingJump(loadOffset, ImmPtr(target->raw()), RelocationKind::JITCODE);
+ CodeOffset ret(offset.getOffset());
+ return ret;
+ }
+
+ static size_t ToggledCallSize(uint8_t* code) {
+ // The call site is a sequence of two or three instructions:
+ //
+ // syncStack (optional)
+ // ldr/adr
+ // nop/blr
+ //
+ // Flushed constant pools can appear before any of the instructions.
+
+ const Instruction* cur = (const Instruction*)code;
+ cur = cur->skipPool();
+ if (cur->IsStackPtrSync()) cur = cur->NextInstruction();
+ cur = cur->skipPool();
+ cur = cur->NextInstruction(); // LDR/ADR
+ cur = cur->skipPool();
+ cur = cur->NextInstruction(); // NOP/BLR
+ return (uint8_t*)cur - code;
+ }
+
+ void checkARMRegAlignment(const ARMRegister& reg) {
+#ifdef DEBUG
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch64 = temps.AcquireX();
+ MOZ_ASSERT_IF(!reg.IsSP(), scratch64.asUnsized() != reg.asUnsized());
+ Label aligned;
+ Mov(scratch64, reg);
+ Tst(scratch64, Operand(StackAlignment - 1));
+ B(Zero, &aligned);
+ breakpoint();
+ bind(&aligned);
+ Mov(scratch64, vixl::xzr); // Clear the scratch register for sanity.
+#endif
+ }
+
+ void checkStackAlignment() {
+#ifdef DEBUG
+ checkARMRegAlignment(GetStackPointer64());
+
+ // If another register is being used to track pushes, check sp explicitly.
+ if (!GetStackPointer64().Is(vixl::sp)) {
+ checkARMRegAlignment(vixl::sp);
+ }
+#endif
+ }
+
+ void abiret() {
+ syncStackPtr(); // SP is always used to transmit the stack between calls.
+ vixl::MacroAssembler::Ret(vixl::lr);
+ }
+
+ void incrementInt32Value(const Address& addr) {
+ vixl::UseScratchRegisterScope temps(this);
+ const ARMRegister scratch32 = temps.AcquireW();
+ MOZ_ASSERT(scratch32.asUnsized() != addr.base);
+
+ load32(addr, scratch32.asUnsized());
+ Add(scratch32, scratch32, Operand(1));
+ store32(scratch32.asUnsized(), addr);
+ }
+
+ void breakpoint();
+
+ // Emits a simulator directive to save the current sp on an internal stack.
+ void simulatorMarkSP() {
+#ifdef JS_SIMULATOR_ARM64
+ svc(vixl::kMarkStackPointer);
+#endif
+ }
+
+ // Emits a simulator directive to pop from its internal stack
+ // and assert that the value is equal to the current sp.
+ void simulatorCheckSP() {
+#ifdef JS_SIMULATOR_ARM64
+ svc(vixl::kCheckStackPointer);
+#endif
+ }
+
+ protected:
+ bool buildOOLFakeExitFrame(void* fakeReturnAddr);
+};
+
+// See documentation for ScratchTagScope and ScratchTagScopeRelease in
+// MacroAssembler-x64.h.
+
+class ScratchTagScope {
+ vixl::UseScratchRegisterScope temps_;
+ ARMRegister scratch64_;
+ bool owned_;
+ mozilla::DebugOnly<bool> released_;
+
+ public:
+ ScratchTagScope(MacroAssemblerCompat& masm, const ValueOperand&)
+ : temps_(&masm), owned_(true), released_(false) {
+ scratch64_ = temps_.AcquireX();
+ }
+
+ operator Register() {
+ MOZ_ASSERT(!released_);
+ return scratch64_.asUnsized();
+ }
+
+ void release() {
+ MOZ_ASSERT(!released_);
+ released_ = true;
+ if (owned_) {
+ temps_.Release(scratch64_);
+ owned_ = false;
+ }
+ }
+
+ void reacquire() {
+ MOZ_ASSERT(released_);
+ released_ = false;
+ }
+};
+
+class ScratchTagScopeRelease {
+ ScratchTagScope* ts_;
+
+ public:
+ explicit ScratchTagScopeRelease(ScratchTagScope* ts) : ts_(ts) {
+ ts_->release();
+ }
+ ~ScratchTagScopeRelease() { ts_->reacquire(); }
+};
+
+inline void MacroAssemblerCompat::splitTagForTest(const ValueOperand& value,
+ ScratchTagScope& tag) {
+ splitSignExtTag(value, tag);
+}
+
+typedef MacroAssemblerCompat MacroAssemblerSpecific;
+
+} // namespace jit
+} // namespace js
+
+#endif // jit_arm64_MacroAssembler_arm64_h
diff --git a/js/src/jit/arm64/MoveEmitter-arm64.cpp b/js/src/jit/arm64/MoveEmitter-arm64.cpp
new file mode 100644
index 0000000000..fa1bb1209e
--- /dev/null
+++ b/js/src/jit/arm64/MoveEmitter-arm64.cpp
@@ -0,0 +1,329 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/MoveEmitter-arm64.h"
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+MemOperand MoveEmitterARM64::toMemOperand(const MoveOperand& operand) const {
+ MOZ_ASSERT(operand.isMemory());
+ ARMRegister base(operand.base(), 64);
+ if (operand.base() == masm.getStackPointer()) {
+ return MemOperand(base,
+ operand.disp() + (masm.framePushed() - pushedAtStart_));
+ }
+ return MemOperand(base, operand.disp());
+}
+
+void MoveEmitterARM64::emit(const MoveResolver& moves) {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ // We have two scratch general registers, so use one as temporary storage for
+ // breaking cycles and leave the other available for memory to memory moves.
+ //
+ // This register is used when breaking GENERAL, INT32, FLOAT32, and DOUBLE
+ // move cycles. For FLOAT32/DOUBLE, this involves a fmov between float and
+ // general registers. We could avoid this if we had an extra scratch float
+ // register, otherwise we need the scratch float register for memory to
+ // memory moves that may happen in the cycle. We cannot use the scratch
+ // general register for SIMD128 cycles as it is not large enough.
+ cycleGeneralReg_ = temps.AcquireX();
+
+ for (size_t i = 0; i < moves.numMoves(); i++) {
+ emitMove(moves.getMove(i));
+ }
+
+ cycleGeneralReg_ = ARMRegister();
+}
+
+void MoveEmitterARM64::finish() {
+ assertDone();
+ masm.freeStack(masm.framePushed() - pushedAtStart_);
+ MOZ_ASSERT(masm.framePushed() == pushedAtStart_);
+}
+
+void MoveEmitterARM64::emitMove(const MoveOp& move) {
+ const MoveOperand& from = move.from();
+ const MoveOperand& to = move.to();
+
+ if (move.isCycleBegin()) {
+ MOZ_ASSERT(!inCycle_ && !move.isCycleEnd());
+ breakCycle(from, to, move.endCycleType());
+ inCycle_ = true;
+ } else if (move.isCycleEnd()) {
+ MOZ_ASSERT(inCycle_);
+ completeCycle(from, to, move.type());
+ inCycle_ = false;
+ return;
+ }
+
+ switch (move.type()) {
+ case MoveOp::FLOAT32:
+ emitFloat32Move(from, to);
+ break;
+ case MoveOp::DOUBLE:
+ emitDoubleMove(from, to);
+ break;
+ case MoveOp::SIMD128:
+ emitSimd128Move(from, to);
+ break;
+ case MoveOp::INT32:
+ emitInt32Move(from, to);
+ break;
+ case MoveOp::GENERAL:
+ emitGeneralMove(from, to);
+ break;
+ default:
+ MOZ_CRASH("Unexpected move type");
+ }
+}
+
+void MoveEmitterARM64::emitFloat32Move(const MoveOperand& from,
+ const MoveOperand& to) {
+ if (from.isFloatReg()) {
+ if (to.isFloatReg()) {
+ masm.Fmov(toFPReg(to, MoveOp::FLOAT32), toFPReg(from, MoveOp::FLOAT32));
+ } else {
+ masm.Str(toFPReg(from, MoveOp::FLOAT32), toMemOperand(to));
+ }
+ return;
+ }
+
+ if (to.isFloatReg()) {
+ masm.Ldr(toFPReg(to, MoveOp::FLOAT32), toMemOperand(from));
+ return;
+ }
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMFPRegister scratch32 = temps.AcquireS();
+ masm.Ldr(scratch32, toMemOperand(from));
+ masm.Str(scratch32, toMemOperand(to));
+}
+
+void MoveEmitterARM64::emitDoubleMove(const MoveOperand& from,
+ const MoveOperand& to) {
+ if (from.isFloatReg()) {
+ if (to.isFloatReg()) {
+ masm.Fmov(toFPReg(to, MoveOp::DOUBLE), toFPReg(from, MoveOp::DOUBLE));
+ } else {
+ masm.Str(toFPReg(from, MoveOp::DOUBLE), toMemOperand(to));
+ }
+ return;
+ }
+
+ if (to.isFloatReg()) {
+ masm.Ldr(toFPReg(to, MoveOp::DOUBLE), toMemOperand(from));
+ return;
+ }
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMFPRegister scratch = temps.AcquireD();
+ masm.Ldr(scratch, toMemOperand(from));
+ masm.Str(scratch, toMemOperand(to));
+}
+
+void MoveEmitterARM64::emitSimd128Move(const MoveOperand& from,
+ const MoveOperand& to) {
+ if (from.isFloatReg()) {
+ if (to.isFloatReg()) {
+ masm.Mov(toFPReg(to, MoveOp::SIMD128), toFPReg(from, MoveOp::SIMD128));
+ } else {
+ masm.Str(toFPReg(from, MoveOp::SIMD128), toMemOperand(to));
+ }
+ return;
+ }
+
+ if (to.isFloatReg()) {
+ masm.Ldr(toFPReg(to, MoveOp::SIMD128), toMemOperand(from));
+ return;
+ }
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMFPRegister scratch = temps.AcquireQ();
+ masm.Ldr(scratch, toMemOperand(from));
+ masm.Str(scratch, toMemOperand(to));
+}
+
+void MoveEmitterARM64::emitInt32Move(const MoveOperand& from,
+ const MoveOperand& to) {
+ if (from.isGeneralReg()) {
+ if (to.isGeneralReg()) {
+ masm.Mov(toARMReg32(to), toARMReg32(from));
+ } else {
+ masm.Str(toARMReg32(from), toMemOperand(to));
+ }
+ return;
+ }
+
+ if (to.isGeneralReg()) {
+ masm.Ldr(toARMReg32(to), toMemOperand(from));
+ return;
+ }
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMRegister scratch32 = temps.AcquireW();
+ masm.Ldr(scratch32, toMemOperand(from));
+ masm.Str(scratch32, toMemOperand(to));
+}
+
+void MoveEmitterARM64::emitGeneralMove(const MoveOperand& from,
+ const MoveOperand& to) {
+ if (from.isGeneralReg()) {
+ MOZ_ASSERT(to.isGeneralReg() || to.isMemory());
+ if (to.isGeneralReg()) {
+ masm.Mov(toARMReg64(to), toARMReg64(from));
+ } else {
+ masm.Str(toARMReg64(from), toMemOperand(to));
+ }
+ return;
+ }
+
+ // {Memory OR EffectiveAddress} -> Register move.
+ if (to.isGeneralReg()) {
+ MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
+ if (from.isMemory()) {
+ masm.Ldr(toARMReg64(to), toMemOperand(from));
+ } else {
+ masm.Add(toARMReg64(to), toARMReg64(from), Operand(from.disp()));
+ }
+ return;
+ }
+
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMRegister scratch64 = temps.AcquireX();
+
+ // Memory -> Memory move.
+ if (from.isMemory()) {
+ MOZ_ASSERT(to.isMemory());
+ masm.Ldr(scratch64, toMemOperand(from));
+ masm.Str(scratch64, toMemOperand(to));
+ return;
+ }
+
+ // EffectiveAddress -> Memory move.
+ MOZ_ASSERT(from.isEffectiveAddress());
+ MOZ_ASSERT(to.isMemory());
+ masm.Add(scratch64, toARMReg64(from), Operand(from.disp()));
+ masm.Str(scratch64, toMemOperand(to));
+}
+
+MemOperand MoveEmitterARM64::cycleSlot() {
+ // Using SP as stack pointer requires alignment preservation below.
+ MOZ_ASSERT(!masm.GetStackPointer64().Is(sp));
+
+ // Allocate a slot for breaking cycles if we have not already
+ if (pushedAtCycle_ == -1) {
+ static_assert(SpillSlotSize == 16);
+ masm.reserveStack(SpillSlotSize);
+ pushedAtCycle_ = masm.framePushed();
+ }
+
+ return MemOperand(masm.GetStackPointer64(),
+ masm.framePushed() - pushedAtCycle_);
+}
+
+void MoveEmitterARM64::breakCycle(const MoveOperand& from,
+ const MoveOperand& to, MoveOp::Type type) {
+ switch (type) {
+ case MoveOp::FLOAT32:
+ if (to.isMemory()) {
+ masm.Ldr(cycleGeneralReg_.W(), toMemOperand(to));
+ } else {
+ masm.Fmov(cycleGeneralReg_.W(), toFPReg(to, type));
+ }
+ break;
+
+ case MoveOp::DOUBLE:
+ if (to.isMemory()) {
+ masm.Ldr(cycleGeneralReg_.X(), toMemOperand(to));
+ } else {
+ masm.Fmov(cycleGeneralReg_.X(), toFPReg(to, type));
+ }
+ break;
+
+ case MoveOp::SIMD128:
+ if (to.isMemory()) {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMFPRegister scratch128 = temps.AcquireQ();
+ masm.Ldr(scratch128, toMemOperand(to));
+ masm.Str(scratch128, cycleSlot());
+ } else {
+ masm.Str(toFPReg(to, type), cycleSlot());
+ }
+ break;
+
+ case MoveOp::INT32:
+ if (to.isMemory()) {
+ masm.Ldr(cycleGeneralReg_.W(), toMemOperand(to));
+ } else {
+ masm.Mov(cycleGeneralReg_.W(), toARMReg32(to));
+ }
+ break;
+
+ case MoveOp::GENERAL:
+ if (to.isMemory()) {
+ masm.Ldr(cycleGeneralReg_.X(), toMemOperand(to));
+ } else {
+ masm.Mov(cycleGeneralReg_.X(), toARMReg64(to));
+ }
+ break;
+
+ default:
+ MOZ_CRASH("Unexpected move type");
+ }
+}
+
+void MoveEmitterARM64::completeCycle(const MoveOperand& from,
+ const MoveOperand& to, MoveOp::Type type) {
+ switch (type) {
+ case MoveOp::FLOAT32:
+ if (to.isMemory()) {
+ masm.Str(cycleGeneralReg_.W(), toMemOperand(to));
+ } else {
+ masm.Fmov(toFPReg(to, type), cycleGeneralReg_.W());
+ }
+ break;
+
+ case MoveOp::DOUBLE:
+ if (to.isMemory()) {
+ masm.Str(cycleGeneralReg_.X(), toMemOperand(to));
+ } else {
+ masm.Fmov(toFPReg(to, type), cycleGeneralReg_.X());
+ }
+ break;
+
+ case MoveOp::SIMD128:
+ if (to.isMemory()) {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMFPRegister scratch = temps.AcquireQ();
+ masm.Ldr(scratch, cycleSlot());
+ masm.Str(scratch, toMemOperand(to));
+ } else {
+ masm.Ldr(toFPReg(to, type), cycleSlot());
+ }
+ break;
+
+ case MoveOp::INT32:
+ if (to.isMemory()) {
+ masm.Str(cycleGeneralReg_.W(), toMemOperand(to));
+ } else {
+ masm.Mov(toARMReg32(to), cycleGeneralReg_.W());
+ }
+ break;
+
+ case MoveOp::GENERAL:
+ if (to.isMemory()) {
+ masm.Str(cycleGeneralReg_.X(), toMemOperand(to));
+ } else {
+ masm.Mov(toARMReg64(to), cycleGeneralReg_.X());
+ }
+ break;
+
+ default:
+ MOZ_CRASH("Unexpected move type");
+ }
+}
diff --git a/js/src/jit/arm64/MoveEmitter-arm64.h b/js/src/jit/arm64/MoveEmitter-arm64.h
new file mode 100644
index 0000000000..fec2e3e012
--- /dev/null
+++ b/js/src/jit/arm64/MoveEmitter-arm64.h
@@ -0,0 +1,99 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_MoveEmitter_arm64_h
+#define jit_arm64_MoveEmitter_arm64_h
+
+#include "mozilla/Assertions.h"
+
+#include <stdint.h>
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/MacroAssembler.h"
+#include "jit/MoveResolver.h"
+#include "jit/Registers.h"
+
+namespace js {
+namespace jit {
+
+class CodeGenerator;
+
+class MoveEmitterARM64 {
+ bool inCycle_;
+ MacroAssembler& masm;
+
+ // A scratch general register used to break cycles.
+ ARMRegister cycleGeneralReg_;
+
+ // Original stack push value.
+ uint32_t pushedAtStart_;
+
+ // This stores a stack offset to a spill location, snapshotting
+ // codegen->framePushed_ at the time it was allocated. It is -1 if no
+ // stack space has been allocated for that particular spill.
+ int32_t pushedAtCycle_;
+
+ void assertDone() { MOZ_ASSERT(!inCycle_); }
+
+ MemOperand cycleSlot();
+ MemOperand toMemOperand(const MoveOperand& operand) const;
+ ARMRegister toARMReg32(const MoveOperand& operand) const {
+ MOZ_ASSERT(operand.isGeneralReg());
+ return ARMRegister(operand.reg(), 32);
+ }
+ ARMRegister toARMReg64(const MoveOperand& operand) const {
+ if (operand.isGeneralReg()) {
+ return ARMRegister(operand.reg(), 64);
+ } else {
+ return ARMRegister(operand.base(), 64);
+ }
+ }
+ ARMFPRegister toFPReg(const MoveOperand& operand, MoveOp::Type t) const {
+ MOZ_ASSERT(operand.isFloatReg());
+ switch (t) {
+ case MoveOp::FLOAT32:
+ return ARMFPRegister(operand.floatReg().encoding(), 32);
+ case MoveOp::DOUBLE:
+ return ARMFPRegister(operand.floatReg().encoding(), 64);
+ case MoveOp::SIMD128:
+ return ARMFPRegister(operand.floatReg().encoding(), 128);
+ default:
+ MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Bad register type");
+ }
+ }
+
+ void emitFloat32Move(const MoveOperand& from, const MoveOperand& to);
+ void emitDoubleMove(const MoveOperand& from, const MoveOperand& to);
+ void emitSimd128Move(const MoveOperand& from, const MoveOperand& to);
+ void emitInt32Move(const MoveOperand& from, const MoveOperand& to);
+ void emitGeneralMove(const MoveOperand& from, const MoveOperand& to);
+
+ void emitMove(const MoveOp& move);
+ void breakCycle(const MoveOperand& from, const MoveOperand& to,
+ MoveOp::Type type);
+ void completeCycle(const MoveOperand& from, const MoveOperand& to,
+ MoveOp::Type type);
+
+ public:
+ explicit MoveEmitterARM64(MacroAssembler& masm)
+ : inCycle_(false),
+ masm(masm),
+ pushedAtStart_(masm.framePushed()),
+ pushedAtCycle_(-1) {}
+
+ ~MoveEmitterARM64() { assertDone(); }
+
+ void emit(const MoveResolver& moves);
+ void finish();
+ void setScratchRegister(Register reg) {}
+};
+
+typedef MoveEmitterARM64 MoveEmitter;
+
+} // namespace jit
+} // namespace js
+
+#endif /* jit_arm64_MoveEmitter_arm64_h */
diff --git a/js/src/jit/arm64/SharedICHelpers-arm64-inl.h b/js/src/jit/arm64/SharedICHelpers-arm64-inl.h
new file mode 100644
index 0000000000..8261a8b94f
--- /dev/null
+++ b/js/src/jit/arm64/SharedICHelpers-arm64-inl.h
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_SharedICHelpers_arm64_inl_h
+#define jit_arm64_SharedICHelpers_arm64_inl_h
+
+#include "jit/BaselineFrame.h"
+#include "jit/SharedICHelpers.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+namespace js {
+namespace jit {
+
+inline void EmitBaselineTailCallVM(TrampolinePtr target, MacroAssembler& masm,
+ uint32_t argSize) {
+#ifdef DEBUG
+ // We assume that R0 has been pushed, and R2 is unused.
+ static_assert(R2 == ValueOperand(r0));
+
+ // Store frame size without VMFunction arguments for debug assertions.
+ masm.Sub(x0, FramePointer64, masm.GetStackPointer64());
+ masm.Sub(w0, w0, Operand(argSize));
+ Address frameSizeAddr(FramePointer,
+ BaselineFrame::reverseOffsetOfDebugFrameSize());
+ masm.store32(w0.asUnsized(), frameSizeAddr);
+#endif
+
+ // Push frame descriptor (minus the return address) and perform the tail call.
+ static_assert(ICTailCallReg == lr);
+ masm.pushFrameDescriptor(FrameType::BaselineJS);
+
+ // The return address will be pushed by the VM wrapper, for compatibility
+ // with direct calls. Refer to the top of generateVMWrapper().
+ // ICTailCallReg (lr) already contains the return address (as we keep
+ // it there through the stub calls).
+
+ masm.jump(target);
+}
+
+inline void EmitBaselineCallVM(TrampolinePtr target, MacroAssembler& masm) {
+ masm.pushFrameDescriptor(FrameType::BaselineStub);
+ masm.call(target);
+}
+
+inline void EmitBaselineEnterStubFrame(MacroAssembler& masm, Register scratch) {
+ MOZ_ASSERT(scratch != ICTailCallReg);
+
+#ifdef DEBUG
+ // Compute frame size.
+ masm.Sub(ARMRegister(scratch, 64), FramePointer64, masm.GetStackPointer64());
+
+ Address frameSizeAddr(FramePointer,
+ BaselineFrame::reverseOffsetOfDebugFrameSize());
+ masm.store32(scratch, frameSizeAddr);
+#endif
+
+ // Push frame descriptor and return address.
+ // Save old frame pointer, stack pointer, and stub reg.
+ masm.PushFrameDescriptor(FrameType::BaselineJS);
+ masm.Push(ICTailCallReg);
+ masm.Push(FramePointer);
+
+ // Update the frame register.
+ masm.Mov(FramePointer64, masm.GetStackPointer64());
+
+ masm.Push(ICStubReg);
+
+ // Stack should remain 16-byte aligned.
+ masm.checkStackAlignment();
+}
+
+} // namespace jit
+} // namespace js
+
+#endif // jit_arm64_SharedICHelpers_arm64_inl_h
diff --git a/js/src/jit/arm64/SharedICHelpers-arm64.h b/js/src/jit/arm64/SharedICHelpers-arm64.h
new file mode 100644
index 0000000000..2ea45c80fb
--- /dev/null
+++ b/js/src/jit/arm64/SharedICHelpers-arm64.h
@@ -0,0 +1,82 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_SharedICHelpers_arm64_h
+#define jit_arm64_SharedICHelpers_arm64_h
+
+#include "jit/BaselineIC.h"
+#include "jit/JitFrames.h"
+#include "jit/MacroAssembler.h"
+#include "jit/SharedICRegisters.h"
+
+namespace js {
+namespace jit {
+
+// Distance from sp to the top Value inside an IC stub (no return address on the
+// stack on ARM).
+static const size_t ICStackValueOffset = 0;
+
+inline void EmitRestoreTailCallReg(MacroAssembler& masm) {
+ // No-op on ARM because link register is always holding the return address.
+}
+
+inline void EmitRepushTailCallReg(MacroAssembler& masm) {
+ // No-op on ARM because link register is always holding the return address.
+}
+
+inline void EmitCallIC(MacroAssembler& masm, CodeOffset* callOffset) {
+ // The stub pointer must already be in ICStubReg.
+ // Load stubcode pointer from the ICStub.
+ // R2 won't be active when we call ICs, so we can use r0.
+ static_assert(R2 == ValueOperand(r0));
+ masm.loadPtr(Address(ICStubReg, ICStub::offsetOfStubCode()), r0);
+
+ // Call the stubcode via a direct branch-and-link.
+ masm.Blr(x0);
+ *callOffset = CodeOffset(masm.currentOffset());
+}
+
+inline void EmitReturnFromIC(MacroAssembler& masm) {
+ masm.abiret(); // Defaults to lr.
+}
+
+inline void EmitBaselineLeaveStubFrame(MacroAssembler& masm) {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ const ARMRegister scratch64 = temps.AcquireX();
+
+ Address stubAddr(FramePointer, BaselineStubFrameLayout::ICStubOffsetFromFP);
+ masm.loadPtr(stubAddr, ICStubReg);
+
+ masm.moveToStackPtr(FramePointer);
+
+ // Pop values, discarding the frame descriptor.
+ masm.pop(FramePointer, ICTailCallReg, scratch64.asUnsized());
+
+ // Stack should remain 16-byte aligned.
+ masm.checkStackAlignment();
+}
+
+template <typename AddrType>
+inline void EmitPreBarrier(MacroAssembler& masm, const AddrType& addr,
+ MIRType type) {
+ // On AArch64, lr is clobbered by guardedCallPreBarrier. Save it first.
+ masm.push(lr);
+ masm.guardedCallPreBarrier(addr, type);
+ masm.pop(lr);
+}
+
+inline void EmitStubGuardFailure(MacroAssembler& masm) {
+ // Load next stub into ICStubReg.
+ masm.loadPtr(Address(ICStubReg, ICCacheIRStub::offsetOfNext()), ICStubReg);
+
+ // Return address is already loaded, just jump to the next stubcode.
+ masm.jump(Address(ICStubReg, ICStub::offsetOfStubCode()));
+}
+
+} // namespace jit
+} // namespace js
+
+#endif // jit_arm64_SharedICHelpers_arm64_h
diff --git a/js/src/jit/arm64/SharedICRegisters-arm64.h b/js/src/jit/arm64/SharedICRegisters-arm64.h
new file mode 100644
index 0000000000..1aa49d651c
--- /dev/null
+++ b/js/src/jit/arm64/SharedICRegisters-arm64.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_arm64_SharedICRegisters_arm64_h
+#define jit_arm64_SharedICRegisters_arm64_h
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/Registers.h"
+#include "jit/RegisterSets.h"
+
+namespace js {
+namespace jit {
+
+// ValueOperands R0, R1, and R2.
+// R0 == JSReturnReg, and R2 uses registers not preserved across calls.
+// R1 value should be preserved across calls.
+static constexpr Register R0_ = r2;
+static constexpr Register R1_ = r19;
+static constexpr Register R2_ = r0;
+
+static constexpr ValueOperand R0(R0_);
+static constexpr ValueOperand R1(R1_);
+static constexpr ValueOperand R2(R2_);
+
+// ICTailCallReg and ICStubReg use registers that are not preserved across
+// calls.
+static constexpr Register ICTailCallReg = r30;
+static constexpr Register ICStubReg = r9;
+
+// R7 - R9 are generally available for use within stubcode.
+
+// Note that BaselineTailCallReg is actually just the link
+// register. In ARM code emission, we do not clobber BaselineTailCallReg
+// since we keep the return address for calls there.
+
+static constexpr FloatRegister FloatReg0 = {FloatRegisters::d0,
+ FloatRegisters::Double};
+static constexpr FloatRegister FloatReg1 = {FloatRegisters::d1,
+ FloatRegisters::Double};
+static constexpr FloatRegister FloatReg2 = {FloatRegisters::d2,
+ FloatRegisters::Double};
+static constexpr FloatRegister FloatReg3 = {FloatRegisters::d3,
+ FloatRegisters::Double};
+
+} // namespace jit
+} // namespace js
+
+#endif // jit_arm64_SharedICRegisters_arm64_h
diff --git a/js/src/jit/arm64/Trampoline-arm64.cpp b/js/src/jit/arm64/Trampoline-arm64.cpp
new file mode 100644
index 0000000000..36f7f24d02
--- /dev/null
+++ b/js/src/jit/arm64/Trampoline-arm64.cpp
@@ -0,0 +1,840 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/arm64/SharedICHelpers-arm64.h"
+#include "jit/Bailouts.h"
+#include "jit/BaselineFrame.h"
+#include "jit/CalleeToken.h"
+#include "jit/JitFrames.h"
+#include "jit/JitRuntime.h"
+#include "jit/PerfSpewer.h"
+#include "jit/VMFunctions.h"
+#include "vm/JitActivation.h" // js::jit::JitActivation
+#include "vm/JSContext.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+/* This method generates a trampoline on ARM64 for a c++ function with
+ * the following signature:
+ * bool blah(void* code, int argc, Value* argv,
+ * JSObject* scopeChain, Value* vp)
+ * ...using standard AArch64 calling convention
+ */
+void JitRuntime::generateEnterJIT(JSContext* cx, MacroAssembler& masm) {
+ AutoCreatedBy acb(masm, "JitRuntime::generateEnterJIT");
+
+ enterJITOffset_ = startTrampolineCode(masm);
+
+ const Register reg_code = IntArgReg0; // EnterJitData::jitcode.
+ const Register reg_argc = IntArgReg1; // EnterJitData::maxArgc.
+ const Register reg_argv = IntArgReg2; // EnterJitData::maxArgv.
+ const Register reg_osrFrame = IntArgReg3; // EnterJitData::osrFrame.
+ const Register reg_callee = IntArgReg4; // EnterJitData::calleeToken.
+ const Register reg_scope = IntArgReg5; // EnterJitData::scopeChain.
+ const Register reg_osrNStack =
+ IntArgReg6; // EnterJitData::osrNumStackValues.
+ const Register reg_vp = IntArgReg7; // Address of EnterJitData::result.
+
+ static_assert(OsrFrameReg == IntArgReg3);
+
+ // During the pushes below, use the normal stack pointer.
+ masm.SetStackPointer64(sp);
+
+ // Save return address and old frame pointer; set new frame pointer.
+ masm.push(r30, r29);
+ masm.moveStackPtrTo(r29);
+
+ // Save callee-save integer registers.
+ // Also save x7 (reg_vp) and x30 (lr), for use later.
+ masm.push(r19, r20, r21, r22);
+ masm.push(r23, r24, r25, r26);
+ masm.push(r27, r28, r7, r30);
+
+ // Save callee-save floating-point registers.
+ // AArch64 ABI specifies that only the lower 64 bits must be saved.
+ masm.push(d8, d9, d10, d11);
+ masm.push(d12, d13, d14, d15);
+
+#ifdef DEBUG
+ // Emit stack canaries.
+ masm.movePtr(ImmWord(0xdeadd00d), r23);
+ masm.movePtr(ImmWord(0xdeadd11d), r24);
+ masm.push(r23, r24);
+#endif
+
+ // Common code below attempts to push single registers at a time,
+ // which breaks the stack pointer's 16-byte alignment requirement.
+ // Note that movePtr() is invalid because StackPointer is treated as xzr.
+ //
+ // FIXME: After testing, this entire function should be rewritten to not
+ // use the PseudoStackPointer: since the amount of data pushed is
+ // precalculated, we can just allocate the whole frame header at once and
+ // index off sp. This will save a significant number of instructions where
+ // Push() updates sp.
+ masm.Mov(PseudoStackPointer64, sp);
+ masm.SetStackPointer64(PseudoStackPointer64);
+
+ // Remember stack depth without padding and arguments.
+ masm.moveStackPtrTo(r19);
+
+ // If constructing, include newTarget in argument vector.
+ {
+ Label noNewTarget;
+ Imm32 constructingToken(CalleeToken_FunctionConstructing);
+ masm.branchTest32(Assembler::Zero, reg_callee, constructingToken,
+ &noNewTarget);
+ masm.add32(Imm32(1), reg_argc);
+ masm.bind(&noNewTarget);
+ }
+
+ // JitFrameLayout is as follows (higher is higher in memory):
+ // N*8 - [ JS argument vector ] (base 16-byte aligned)
+ // 8 - calleeToken
+ // 8 - frameDescriptor (16-byte aligned)
+ // 8 - returnAddress
+ // 8 - frame pointer (16-byte aligned, pushed by callee)
+
+ // Touch frame incrementally (a requirement for Windows).
+ //
+ // Use already saved callee-save registers r20 and r21 as temps.
+ //
+ // This has to be done outside the ScratchRegisterScope, as the temps are
+ // under demand inside the touchFrameValues call.
+
+ // Give sp 16-byte alignment and sync stack pointers.
+ masm.andToStackPtr(Imm32(~0xf));
+ // We needn't worry about the Gecko Profiler mark because touchFrameValues
+ // touches in large increments.
+ masm.touchFrameValues(reg_argc, r20, r21);
+ // Restore stack pointer, preserved above.
+ masm.moveToStackPtr(r19);
+
+ // Push the argument vector onto the stack.
+ // WARNING: destructively modifies reg_argv
+ {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+
+ const ARMRegister tmp_argc = temps.AcquireX();
+ const ARMRegister tmp_sp = temps.AcquireX();
+
+ Label noArguments;
+ Label loopHead;
+
+ masm.movePtr(reg_argc, tmp_argc.asUnsized());
+
+ // sp -= 8
+ // Since we're using PostIndex Str below, this is necessary to avoid
+ // overwriting the Gecko Profiler mark pushed above.
+ masm.subFromStackPtr(Imm32(8));
+
+ // sp -= 8 * argc
+ masm.Sub(PseudoStackPointer64, PseudoStackPointer64,
+ Operand(tmp_argc, vixl::SXTX, 3));
+
+ // Give sp 16-byte alignment and sync stack pointers.
+ masm.andToStackPtr(Imm32(~0xf));
+ masm.moveStackPtrTo(tmp_sp.asUnsized());
+
+ masm.branchTestPtr(Assembler::Zero, reg_argc, reg_argc, &noArguments);
+
+ // Begin argument-pushing loop.
+ // This could be optimized using Ldp and Stp.
+ {
+ masm.bind(&loopHead);
+
+ // Load an argument from argv, then increment argv by 8.
+ masm.Ldr(x24, MemOperand(ARMRegister(reg_argv, 64), Operand(8),
+ vixl::PostIndex));
+
+ // Store the argument to tmp_sp, then increment tmp_sp by 8.
+ masm.Str(x24, MemOperand(tmp_sp, Operand(8), vixl::PostIndex));
+
+ // Decrement tmp_argc and set the condition codes for the new value.
+ masm.Subs(tmp_argc, tmp_argc, Operand(1));
+
+ // Branch if arguments remain.
+ masm.B(&loopHead, vixl::Condition::NonZero);
+ }
+
+ masm.bind(&noArguments);
+ }
+ masm.checkStackAlignment();
+
+ // Push the calleeToken and the frame descriptor.
+ // The result address is used to store the actual number of arguments
+ // without adding an argument to EnterJIT.
+ {
+ vixl::UseScratchRegisterScope temps(&masm.asVIXL());
+ MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0
+ temps.Exclude(ScratchReg64);
+ Register scratch = ScratchReg64.asUnsized();
+ masm.push(reg_callee);
+
+ // Push the descriptor.
+ masm.unboxInt32(Address(reg_vp, 0x0), scratch);
+ masm.PushFrameDescriptorForJitCall(FrameType::CppToJSJit, scratch, scratch);
+ }
+ masm.checkStackAlignment();
+
+ Label osrReturnPoint;
+ {
+ // Check for Interpreter -> Baseline OSR.
+
+ AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
+ MOZ_ASSERT(!regs.has(FramePointer));
+ regs.take(OsrFrameReg);
+ regs.take(reg_code);
+ regs.take(reg_osrNStack);
+ MOZ_ASSERT(!regs.has(ReturnReg), "ReturnReg matches reg_code");
+
+ Label notOsr;
+ masm.branchTestPtr(Assembler::Zero, OsrFrameReg, OsrFrameReg, &notOsr);
+
+ Register scratch = regs.takeAny();
+
+ // Frame prologue.
+ masm.Adr(ARMRegister(scratch, 64), &osrReturnPoint);
+ masm.push(scratch, FramePointer);
+ masm.moveStackPtrTo(FramePointer);
+
+ // Reserve frame.
+ masm.subFromStackPtr(Imm32(BaselineFrame::Size()));
+
+ Register framePtrScratch = regs.takeAny();
+ masm.touchFrameValues(reg_osrNStack, scratch, framePtrScratch);
+ masm.moveStackPtrTo(framePtrScratch);
+
+ // Reserve space for locals and stack values.
+ // scratch = num_stack_values * sizeof(Value).
+ masm.Lsl(ARMRegister(scratch, 32), ARMRegister(reg_osrNStack, 32), 3);
+ masm.subFromStackPtr(scratch);
+
+ // Enter exit frame.
+ masm.pushFrameDescriptor(FrameType::BaselineJS);
+ masm.push(xzr); // Push xzr for a fake return address.
+ masm.push(FramePointer);
+ // No GC things to mark: push a bare token.
+ masm.loadJSContext(scratch);
+ masm.enterFakeExitFrame(scratch, scratch, ExitFrameType::Bare);
+
+ masm.push(reg_code);
+
+ // Initialize the frame, including filling in the slots.
+ using Fn = bool (*)(BaselineFrame * frame, InterpreterFrame * interpFrame,
+ uint32_t numStackValues);
+ masm.setupUnalignedABICall(r19);
+ masm.passABIArg(framePtrScratch); // BaselineFrame.
+ masm.passABIArg(reg_osrFrame); // InterpreterFrame.
+ masm.passABIArg(reg_osrNStack);
+ masm.callWithABI<Fn, jit::InitBaselineFrameForOsr>(
+ MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+ masm.pop(scratch);
+ MOZ_ASSERT(scratch != ReturnReg);
+
+ masm.addToStackPtr(Imm32(ExitFrameLayout::SizeWithFooter()));
+
+ Label error;
+ masm.branchIfFalseBool(ReturnReg, &error);
+
+ // If OSR-ing, then emit instrumentation for setting lastProfilerFrame
+ // if profiler instrumentation is enabled.
+ {
+ Label skipProfilingInstrumentation;
+ AbsoluteAddress addressOfEnabled(
+ cx->runtime()->geckoProfiler().addressOfEnabled());
+ masm.branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
+ &skipProfilingInstrumentation);
+ masm.profilerEnterFrame(FramePointer, regs.getAny());
+ masm.bind(&skipProfilingInstrumentation);
+ }
+
+ masm.jump(scratch);
+
+ // OOM: frame epilogue, load error value, discard return address and return.
+ masm.bind(&error);
+ masm.moveToStackPtr(FramePointer);
+ masm.pop(FramePointer);
+ masm.addToStackPtr(Imm32(sizeof(uintptr_t))); // Return address.
+ masm.syncStackPtr();
+ masm.moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
+ masm.B(&osrReturnPoint);
+
+ masm.bind(&notOsr);
+ masm.movePtr(reg_scope, R1_);
+ }
+
+ // The callee will push the return address and frame pointer on the stack,
+ // thus we check that the stack would be aligned once the call is complete.
+ masm.assertStackAlignment(JitStackAlignment, 2 * sizeof(uintptr_t));
+
+ // Call function.
+ // Since AArch64 doesn't have the pc register available, the callee must push
+ // lr.
+ masm.callJitNoProfiler(reg_code);
+
+ // Interpreter -> Baseline OSR will return here.
+ masm.bind(&osrReturnPoint);
+
+ // Discard arguments and padding. Set sp to the address of the saved
+ // registers. In debug builds we have to include the two stack canaries
+ // checked below.
+#ifdef DEBUG
+ static constexpr size_t SavedRegSize = 22 * sizeof(void*);
+#else
+ static constexpr size_t SavedRegSize = 20 * sizeof(void*);
+#endif
+ masm.computeEffectiveAddress(Address(FramePointer, -int32_t(SavedRegSize)),
+ masm.getStackPointer());
+
+ masm.syncStackPtr();
+ masm.SetStackPointer64(sp);
+
+#ifdef DEBUG
+ // Check that canaries placed on function entry are still present.
+ masm.pop(r24, r23);
+ Label x23OK, x24OK;
+
+ masm.branchPtr(Assembler::Equal, r23, ImmWord(0xdeadd00d), &x23OK);
+ masm.breakpoint();
+ masm.bind(&x23OK);
+
+ masm.branchPtr(Assembler::Equal, r24, ImmWord(0xdeadd11d), &x24OK);
+ masm.breakpoint();
+ masm.bind(&x24OK);
+#endif
+
+ // Restore callee-save floating-point registers.
+ masm.pop(d15, d14, d13, d12);
+ masm.pop(d11, d10, d9, d8);
+
+ // Restore callee-save integer registers.
+ // Also restore x7 (reg_vp) and x30 (lr).
+ masm.pop(r30, r7, r28, r27);
+ masm.pop(r26, r25, r24, r23);
+ masm.pop(r22, r21, r20, r19);
+
+ // Store return value (in JSReturnReg = x2 to just-popped reg_vp).
+ masm.storeValue(JSReturnOperand, Address(reg_vp, 0));
+
+ // Restore old frame pointer.
+ masm.pop(r29, r30);
+
+ // Return using the value popped into x30.
+ masm.abiret();
+
+ // Reset stack pointer.
+ masm.SetStackPointer64(PseudoStackPointer64);
+}
+
+// static
+mozilla::Maybe<::JS::ProfilingFrameIterator::RegisterState>
+JitRuntime::getCppEntryRegisters(JitFrameLayout* frameStackAddress) {
+ // Not supported, or not implemented yet.
+ // TODO: Implement along with the corresponding stack-walker changes, in
+ // coordination with the Gecko Profiler, see bug 1635987 and follow-ups.
+ return mozilla::Nothing{};
+}
+
+static void PushRegisterDump(MacroAssembler& masm) {
+ const LiveRegisterSet First28GeneralRegisters = LiveRegisterSet(
+ GeneralRegisterSet(Registers::AllMask &
+ ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28)),
+ FloatRegisterSet(FloatRegisters::NoneMask));
+
+ const LiveRegisterSet AllFloatRegisters =
+ LiveRegisterSet(GeneralRegisterSet(Registers::NoneMask),
+ FloatRegisterSet(FloatRegisters::AllMask));
+
+ // Push all general-purpose registers.
+ //
+ // The ARM64 ABI does not treat SP as a normal register that can
+ // be pushed. So pushing happens in two phases.
+ //
+ // Registers are pushed in reverse order of code.
+ //
+ // See block comment in MacroAssembler.h for further required invariants.
+
+ // First, push the last four registers, passing zero for sp.
+ // Zero is pushed for x28 and x31: the pseudo-SP and SP, respectively.
+ masm.asVIXL().Push(xzr, x30, x29, xzr);
+
+ // Second, push the first 28 registers that serve no special purpose.
+ masm.PushRegsInMask(First28GeneralRegisters);
+
+ // Finally, push all floating-point registers, completing the RegisterDump.
+ masm.PushRegsInMask(AllFloatRegisters);
+}
+
+void JitRuntime::generateInvalidator(MacroAssembler& masm, Label* bailoutTail) {
+ AutoCreatedBy acb(masm, "JitRuntime::generateInvalidator");
+
+ invalidatorOffset_ = startTrampolineCode(masm);
+
+ // The InvalidationBailoutStack saved in r0 must be:
+ // - osiPointReturnAddress_
+ // - ionScript_ (pushed by CodeGeneratorARM64::generateInvalidateEpilogue())
+ // - regs_ (pushed here)
+ // - fpregs_ (pushed here) [=r0]
+ PushRegisterDump(masm);
+ masm.moveStackPtrTo(r0);
+
+ // Reserve space for InvalidationBailout's bailoutInfo outparam.
+ masm.Sub(x1, masm.GetStackPointer64(), Operand(sizeof(void*)));
+ masm.moveToStackPtr(r1);
+
+ using Fn =
+ bool (*)(InvalidationBailoutStack * sp, BaselineBailoutInfo * *info);
+ masm.setupUnalignedABICall(r10);
+ masm.passABIArg(r0);
+ masm.passABIArg(r1);
+
+ masm.callWithABI<Fn, InvalidationBailout>(
+ MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther);
+
+ masm.pop(r2); // Get the bailoutInfo outparam.
+
+ // Pop the machine state and the dead frame.
+ masm.moveToStackPtr(FramePointer);
+
+ // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
+ masm.jump(bailoutTail);
+}
+
+void JitRuntime::generateArgumentsRectifier(MacroAssembler& masm,
+ ArgumentsRectifierKind kind) {
+ AutoCreatedBy acb(masm, "JitRuntime::generateArgumentsRectifier");
+
+ switch (kind) {
+ case ArgumentsRectifierKind::Normal:
+ argumentsRectifierOffset_ = startTrampolineCode(masm);
+ break;
+ case ArgumentsRectifierKind::TrialInlining:
+ trialInliningArgumentsRectifierOffset_ = startTrampolineCode(masm);
+ break;
+ }
+
+ // Save the return address for later.
+ masm.push(lr);
+
+ // Frame prologue.
+ //
+ // NOTE: if this changes, fix the Baseline bailout code too!
+ // See BaselineStackBuilder::calculatePrevFramePtr and
+ // BaselineStackBuilder::buildRectifierFrame (in BaselineBailouts.cpp).
+ masm.push(FramePointer);
+ masm.moveStackPtrTo(FramePointer);
+
+ // Load the information that the rectifier needs from the stack.
+ masm.loadNumActualArgs(FramePointer, r0);
+ masm.loadPtr(
+ Address(FramePointer, RectifierFrameLayout::offsetOfCalleeToken()), r1);
+
+ // Extract a JSFunction pointer from the callee token and keep the
+ // intermediary to avoid later recalculation.
+ masm.And(x5, x1, Operand(CalleeTokenMask));
+
+ // Get the arguments from the function object.
+ masm.loadFunctionArgCount(x5.asUnsized(), x6.asUnsized());
+
+ static_assert(CalleeToken_FunctionConstructing == 0x1,
+ "Constructing must be low-order bit");
+ masm.And(x4, x1, Operand(CalleeToken_FunctionConstructing));
+ masm.Add(x7, x6, x4);
+
+ // Copy the number of actual arguments into r8.
+ masm.mov(r0, r8);
+
+ // Calculate the position that our arguments are at before sp gets modified.
+ masm.Add(x3, masm.GetStackPointer64(), Operand(x8, vixl::LSL, 3));
+ masm.Add(x3, x3, Operand(sizeof(RectifierFrameLayout)));
+
+ // If the number of Values without |this| is even, push 8 padding bytes to
+ // ensure the stack is 16-byte aligned.
+ Label noPadding;
+ masm.Tbnz(x7, 0, &noPadding);
+ masm.asVIXL().Push(xzr);
+ masm.bind(&noPadding);
+
+ {
+ Label notConstructing;
+ masm.Cbz(x4, &notConstructing);
+
+ // new.target lives at the end of the pushed args
+ // NB: The arg vector holder starts at the beginning of the last arg,
+ // add a value to get to argv[argc]
+ masm.loadPtr(Address(r3, sizeof(Value)), r4);
+ masm.Push(r4);
+
+ masm.bind(&notConstructing);
+ }
+
+ // Calculate the number of undefineds that need to be pushed.
+ masm.Sub(w2, w6, w8);
+
+ // Put an undefined in a register so it can be pushed.
+ masm.moveValue(UndefinedValue(), ValueOperand(r4));
+
+ // Push undefined N times.
+ {
+ Label undefLoopTop;
+ masm.bind(&undefLoopTop);
+ masm.Push(r4);
+ masm.Subs(w2, w2, Operand(1));
+ masm.B(&undefLoopTop, Assembler::NonZero);
+ }
+
+ // Arguments copy loop. Copy for x8 >= 0 to include |this|.
+ {
+ Label copyLoopTop;
+ masm.bind(&copyLoopTop);
+ masm.Ldr(x4, MemOperand(x3, -sizeof(Value), vixl::PostIndex));
+ masm.Push(r4);
+ masm.Subs(x8, x8, Operand(1));
+ masm.B(&copyLoopTop, Assembler::NotSigned);
+ }
+
+ masm.push(r1); // Callee token.
+ masm.pushFrameDescriptorForJitCall(FrameType::Rectifier, r0, r0);
+
+ // Call the target function.
+ switch (kind) {
+ case ArgumentsRectifierKind::Normal:
+ masm.loadJitCodeRaw(r5, r3);
+ argumentsRectifierReturnOffset_ = masm.callJitNoProfiler(r3);
+ break;
+ case ArgumentsRectifierKind::TrialInlining:
+ Label noBaselineScript, done;
+ masm.loadBaselineJitCodeRaw(r5, r3, &noBaselineScript);
+ masm.callJitNoProfiler(r3);
+ masm.jump(&done);
+
+ // See BaselineCacheIRCompiler::emitCallInlinedFunction.
+ masm.bind(&noBaselineScript);
+ masm.loadJitCodeRaw(r5, r3);
+ masm.callJitNoProfiler(r3);
+ masm.bind(&done);
+ break;
+ }
+
+ masm.moveToStackPtr(FramePointer);
+ masm.pop(FramePointer);
+ masm.ret();
+}
+
+static void PushBailoutFrame(MacroAssembler& masm, Register spArg) {
+ // This assumes no SIMD registers, as JS does not support SIMD.
+
+ // The stack saved in spArg must be (higher entries have higher memory
+ // addresses):
+ // - snapshotOffset_
+ // - frameSize_
+ // - regs_
+ // - fpregs_ (spArg + 0)
+ PushRegisterDump(masm);
+ masm.moveStackPtrTo(spArg);
+}
+
+static void GenerateBailoutThunk(MacroAssembler& masm, Label* bailoutTail) {
+ PushBailoutFrame(masm, r0);
+
+ // SP % 8 == 4
+ // STEP 1c: Call the bailout function, giving a pointer to the
+ // structure we just blitted onto the stack.
+ // Make space for the BaselineBailoutInfo* outparam.
+ masm.reserveStack(sizeof(void*));
+ masm.moveStackPtrTo(r1);
+
+ using Fn = bool (*)(BailoutStack * sp, BaselineBailoutInfo * *info);
+ masm.setupUnalignedABICall(r2);
+ masm.passABIArg(r0);
+ masm.passABIArg(r1);
+ masm.callWithABI<Fn, Bailout>(MoveOp::GENERAL,
+ CheckUnsafeCallWithABI::DontCheckOther);
+
+ // Get the bailoutInfo outparam.
+ masm.pop(r2);
+
+ // Remove both the bailout frame and the topmost Ion frame's stack.
+ masm.moveToStackPtr(FramePointer);
+
+ // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
+ masm.jump(bailoutTail);
+}
+
+void JitRuntime::generateBailoutHandler(MacroAssembler& masm,
+ Label* bailoutTail) {
+ AutoCreatedBy acb(masm, "JitRuntime::generateBailoutHandler");
+
+ bailoutHandlerOffset_ = startTrampolineCode(masm);
+
+ GenerateBailoutThunk(masm, bailoutTail);
+}
+
+bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm,
+ const VMFunctionData& f, DynFn nativeFun,
+ uint32_t* wrapperOffset) {
+ AutoCreatedBy acb(masm, "JitRuntime::generateVMWrapper");
+
+ *wrapperOffset = startTrampolineCode(masm);
+
+ // Avoid conflicts with argument registers while discarding the result after
+ // the function call.
+ AllocatableGeneralRegisterSet regs(Register::Codes::WrapperMask);
+
+ static_assert(
+ (Register::Codes::VolatileMask & ~Register::Codes::WrapperMask) == 0,
+ "Wrapper register set must be a superset of the Volatile register set.");
+
+ // Unlike on other platforms, it is the responsibility of the VM *callee* to
+ // push the return address, while the caller must ensure that the address
+ // is stored in lr on entry. This allows the VM wrapper to work with both
+ // direct calls and tail calls.
+ masm.push(lr);
+
+ // First argument is the JSContext.
+ Register reg_cx = IntArgReg0;
+ regs.take(reg_cx);
+
+ // Stack is:
+ // ... frame ...
+ // +12 [args]
+ // +8 descriptor
+ // +0 returnAddress (pushed by this function, caller sets as lr)
+ //
+ // Push the frame pointer to finish the exit frame, then link it up.
+ masm.Push(FramePointer);
+ masm.moveStackPtrTo(FramePointer);
+ masm.loadJSContext(reg_cx);
+ masm.enterExitFrame(reg_cx, regs.getAny(), &f);
+
+ // Save the current stack pointer as the base for copying arguments.
+ Register argsBase = InvalidReg;
+ if (f.explicitArgs) {
+ // argsBase can't be an argument register. Bad things would happen if
+ // the MoveResolver didn't throw an assertion failure first.
+ argsBase = r8;
+ regs.take(argsBase);
+ masm.Add(ARMRegister(argsBase, 64), masm.GetStackPointer64(),
+ Operand(ExitFrameLayout::SizeWithFooter()));
+ }
+
+ // Reserve space for any outparameter.
+ Register outReg = InvalidReg;
+ switch (f.outParam) {
+ case Type_Value:
+ outReg = regs.takeAny();
+ masm.reserveStack(sizeof(Value));
+ masm.moveStackPtrTo(outReg);
+ break;
+
+ case Type_Handle:
+ outReg = regs.takeAny();
+ masm.PushEmptyRooted(f.outParamRootType);
+ masm.moveStackPtrTo(outReg);
+ break;
+
+ case Type_Int32:
+ case Type_Bool:
+ outReg = regs.takeAny();
+ masm.reserveStack(sizeof(int64_t));
+ masm.moveStackPtrTo(outReg);
+ break;
+
+ case Type_Double:
+ outReg = regs.takeAny();
+ masm.reserveStack(sizeof(double));
+ masm.moveStackPtrTo(outReg);
+ break;
+
+ case Type_Pointer:
+ outReg = regs.takeAny();
+ masm.reserveStack(sizeof(uintptr_t));
+ masm.moveStackPtrTo(outReg);
+ break;
+
+ default:
+ MOZ_ASSERT(f.outParam == Type_Void);
+ break;
+ }
+
+ masm.setupUnalignedABICall(regs.getAny());
+ masm.passABIArg(reg_cx);
+
+ size_t argDisp = 0;
+
+ // Copy arguments.
+ for (uint32_t explicitArg = 0; explicitArg < f.explicitArgs; explicitArg++) {
+ switch (f.argProperties(explicitArg)) {
+ case VMFunctionData::WordByValue:
+ masm.passABIArg(MoveOperand(argsBase, argDisp),
+ (f.argPassedInFloatReg(explicitArg) ? MoveOp::DOUBLE
+ : MoveOp::GENERAL));
+ argDisp += sizeof(void*);
+ break;
+
+ case VMFunctionData::WordByRef:
+ masm.passABIArg(
+ MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress),
+ MoveOp::GENERAL);
+ argDisp += sizeof(void*);
+ break;
+
+ case VMFunctionData::DoubleByValue:
+ case VMFunctionData::DoubleByRef:
+ MOZ_CRASH("NYI: AArch64 callVM should not be used with 128bit values.");
+ }
+ }
+
+ // Copy the semi-implicit outparam, if any.
+ // It is not a C++-abi outparam, which would get passed in the
+ // outparam register, but a real parameter to the function, which
+ // was stack-allocated above.
+ if (outReg != InvalidReg) {
+ masm.passABIArg(outReg);
+ }
+
+ masm.callWithABI(nativeFun, MoveOp::GENERAL,
+ CheckUnsafeCallWithABI::DontCheckHasExitFrame);
+
+ // SP is used to transfer stack across call boundaries.
+ masm.initPseudoStackPtr();
+
+ // Test for failure.
+ switch (f.failType()) {
+ case Type_Cell:
+ masm.branchTestPtr(Assembler::Zero, r0, r0, masm.failureLabel());
+ break;
+ case Type_Bool:
+ masm.branchIfFalseBool(r0, masm.failureLabel());
+ break;
+ case Type_Void:
+ break;
+ default:
+ MOZ_CRASH("unknown failure kind");
+ }
+
+ // Load the outparam and free any allocated stack.
+ switch (f.outParam) {
+ case Type_Value:
+ masm.Ldr(ARMRegister(JSReturnReg, 64),
+ MemOperand(masm.GetStackPointer64()));
+ masm.freeStack(sizeof(Value));
+ break;
+
+ case Type_Handle:
+ masm.popRooted(f.outParamRootType, ReturnReg, JSReturnOperand);
+ break;
+
+ case Type_Int32:
+ masm.Ldr(ARMRegister(ReturnReg, 32),
+ MemOperand(masm.GetStackPointer64()));
+ masm.freeStack(sizeof(int64_t));
+ break;
+
+ case Type_Bool:
+ masm.Ldrb(ARMRegister(ReturnReg, 32),
+ MemOperand(masm.GetStackPointer64()));
+ masm.freeStack(sizeof(int64_t));
+ break;
+
+ case Type_Double:
+ masm.Ldr(ARMFPRegister(ReturnDoubleReg, 64),
+ MemOperand(masm.GetStackPointer64()));
+ masm.freeStack(sizeof(double));
+ break;
+
+ case Type_Pointer:
+ masm.Ldr(ARMRegister(ReturnReg, 64),
+ MemOperand(masm.GetStackPointer64()));
+ masm.freeStack(sizeof(uintptr_t));
+ break;
+
+ default:
+ MOZ_ASSERT(f.outParam == Type_Void);
+ break;
+ }
+
+ // Until C++ code is instrumented against Spectre, prevent speculative
+ // execution from returning any private data.
+ if (f.returnsData() && JitOptions.spectreJitToCxxCalls) {
+ masm.speculationBarrier();
+ }
+
+ // Pop ExitFooterFrame and the frame pointer.
+ masm.leaveExitFrame(0);
+ masm.pop(FramePointer);
+
+ // Return. Subtract sizeof(void*) for the frame pointer.
+ masm.retn(Imm32(sizeof(ExitFrameLayout) - sizeof(void*) +
+ f.explicitStackSlots() * sizeof(void*) +
+ f.extraValuesToPop * sizeof(Value)));
+
+ return true;
+}
+
+uint32_t JitRuntime::generatePreBarrier(JSContext* cx, MacroAssembler& masm,
+ MIRType type) {
+ AutoCreatedBy acb(masm, "JitRuntime::generatePreBarrier");
+
+ uint32_t offset = startTrampolineCode(masm);
+
+ static_assert(PreBarrierReg == r1);
+ Register temp1 = r2;
+ Register temp2 = r3;
+ Register temp3 = r4;
+ masm.push(temp1);
+ masm.push(temp2);
+ masm.push(temp3);
+
+ Label noBarrier;
+ masm.emitPreBarrierFastPath(cx->runtime(), type, temp1, temp2, temp3,
+ &noBarrier);
+
+ // Call into C++ to mark this GC thing.
+ masm.pop(temp3);
+ masm.pop(temp2);
+ masm.pop(temp1);
+
+ LiveRegisterSet regs =
+ LiveRegisterSet(GeneralRegisterSet(Registers::VolatileMask),
+ FloatRegisterSet(FloatRegisters::VolatileMask));
+
+ // Also preserve the return address.
+ regs.add(lr);
+
+ masm.PushRegsInMask(regs);
+
+ masm.movePtr(ImmPtr(cx->runtime()), r3);
+
+ masm.setupUnalignedABICall(r0);
+ masm.passABIArg(r3);
+ masm.passABIArg(PreBarrierReg);
+ masm.callWithABI(JitPreWriteBarrier(type));
+
+ // Pop the volatile regs and restore LR.
+ masm.PopRegsInMask(regs);
+ masm.abiret();
+
+ masm.bind(&noBarrier);
+ masm.pop(temp3);
+ masm.pop(temp2);
+ masm.pop(temp1);
+ masm.abiret();
+
+ return offset;
+}
+
+void JitRuntime::generateBailoutTailStub(MacroAssembler& masm,
+ Label* bailoutTail) {
+ AutoCreatedBy acb(masm, "JitRuntime::generateBailoutTailStub");
+
+ masm.bind(bailoutTail);
+ masm.generateBailoutTail(r1, r2);
+}
diff --git a/js/src/jit/arm64/vixl/.clang-format b/js/src/jit/arm64/vixl/.clang-format
new file mode 100644
index 0000000000..122a79540d
--- /dev/null
+++ b/js/src/jit/arm64/vixl/.clang-format
@@ -0,0 +1,4 @@
+BasedOnStyle: Chromium
+
+# Ignore all comments because they aren't reflowed properly.
+CommentPragmas: "^"
diff --git a/js/src/jit/arm64/vixl/AUTHORS b/js/src/jit/arm64/vixl/AUTHORS
new file mode 100644
index 0000000000..257ec9d32b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/AUTHORS
@@ -0,0 +1,8 @@
+# Below is a list of people and organisations that have contributed to the VIXL
+# project. Entries should be added to the list as:
+#
+# Name/Organization <email address>
+
+ARM Ltd. <*@arm.com>
+Google Inc. <*@google.com>
+Linaro <*@linaro.org>
diff --git a/js/src/jit/arm64/vixl/Assembler-vixl.cpp b/js/src/jit/arm64/vixl/Assembler-vixl.cpp
new file mode 100644
index 0000000000..6ed31cef78
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Assembler-vixl.cpp
@@ -0,0 +1,5318 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+
+#include <cmath>
+
+#include "jit/arm64/vixl/MacroAssembler-vixl.h"
+
+namespace vixl {
+
+// CPURegList utilities.
+CPURegister CPURegList::PopLowestIndex() {
+ if (IsEmpty()) {
+ return NoCPUReg;
+ }
+ int index = CountTrailingZeros(list_);
+ VIXL_ASSERT((1ULL << index) & list_);
+ Remove(index);
+ return CPURegister(index, size_, type_);
+}
+
+
+CPURegister CPURegList::PopHighestIndex() {
+ VIXL_ASSERT(IsValid());
+ if (IsEmpty()) {
+ return NoCPUReg;
+ }
+ int index = CountLeadingZeros(list_);
+ index = kRegListSizeInBits - 1 - index;
+ VIXL_ASSERT((1ULL << index) & list_);
+ Remove(index);
+ return CPURegister(index, size_, type_);
+}
+
+
+bool CPURegList::IsValid() const {
+ if ((type_ == CPURegister::kRegister) ||
+ (type_ == CPURegister::kVRegister)) {
+ bool is_valid = true;
+ // Try to create a CPURegister for each element in the list.
+ for (int i = 0; i < kRegListSizeInBits; i++) {
+ if (((list_ >> i) & 1) != 0) {
+ is_valid &= CPURegister(i, size_, type_).IsValid();
+ }
+ }
+ return is_valid;
+ } else if (type_ == CPURegister::kNoRegister) {
+ // We can't use IsEmpty here because that asserts IsValid().
+ return list_ == 0;
+ } else {
+ return false;
+ }
+}
+
+
+void CPURegList::RemoveCalleeSaved() {
+ if (type() == CPURegister::kRegister) {
+ Remove(GetCalleeSaved(RegisterSizeInBits()));
+ } else if (type() == CPURegister::kVRegister) {
+ Remove(GetCalleeSavedV(RegisterSizeInBits()));
+ } else {
+ VIXL_ASSERT(type() == CPURegister::kNoRegister);
+ VIXL_ASSERT(IsEmpty());
+ // The list must already be empty, so do nothing.
+ }
+}
+
+
+CPURegList CPURegList::Union(const CPURegList& list_1,
+ const CPURegList& list_2,
+ const CPURegList& list_3) {
+ return Union(list_1, Union(list_2, list_3));
+}
+
+
+CPURegList CPURegList::Union(const CPURegList& list_1,
+ const CPURegList& list_2,
+ const CPURegList& list_3,
+ const CPURegList& list_4) {
+ return Union(Union(list_1, list_2), Union(list_3, list_4));
+}
+
+
+CPURegList CPURegList::Intersection(const CPURegList& list_1,
+ const CPURegList& list_2,
+ const CPURegList& list_3) {
+ return Intersection(list_1, Intersection(list_2, list_3));
+}
+
+
+CPURegList CPURegList::Intersection(const CPURegList& list_1,
+ const CPURegList& list_2,
+ const CPURegList& list_3,
+ const CPURegList& list_4) {
+ return Intersection(Intersection(list_1, list_2),
+ Intersection(list_3, list_4));
+}
+
+
+CPURegList CPURegList::GetCalleeSaved(unsigned size) {
+ return CPURegList(CPURegister::kRegister, size, 19, 29);
+}
+
+
+CPURegList CPURegList::GetCalleeSavedV(unsigned size) {
+ return CPURegList(CPURegister::kVRegister, size, 8, 15);
+}
+
+
+CPURegList CPURegList::GetCallerSaved(unsigned size) {
+ // Registers x0-x18 and lr (x30) are caller-saved.
+ CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 18);
+ // Do not use lr directly to avoid initialisation order fiasco bugs for users.
+ list.Combine(Register(30, kXRegSize));
+ return list;
+}
+
+
+CPURegList CPURegList::GetCallerSavedV(unsigned size) {
+ // Registers d0-d7 and d16-d31 are caller-saved.
+ CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7);
+ list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31));
+ return list;
+}
+
+
+const CPURegList kCalleeSaved = CPURegList::GetCalleeSaved();
+const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV();
+const CPURegList kCallerSaved = CPURegList::GetCallerSaved();
+const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV();
+
+
+// Registers.
+#define WREG(n) w##n,
+const Register Register::wregisters[] = {
+REGISTER_CODE_LIST(WREG)
+};
+#undef WREG
+
+#define XREG(n) x##n,
+const Register Register::xregisters[] = {
+REGISTER_CODE_LIST(XREG)
+};
+#undef XREG
+
+#define BREG(n) b##n,
+const VRegister VRegister::bregisters[] = {
+REGISTER_CODE_LIST(BREG)
+};
+#undef BREG
+
+#define HREG(n) h##n,
+const VRegister VRegister::hregisters[] = {
+REGISTER_CODE_LIST(HREG)
+};
+#undef HREG
+
+#define SREG(n) s##n,
+const VRegister VRegister::sregisters[] = {
+REGISTER_CODE_LIST(SREG)
+};
+#undef SREG
+
+#define DREG(n) d##n,
+const VRegister VRegister::dregisters[] = {
+REGISTER_CODE_LIST(DREG)
+};
+#undef DREG
+
+#define QREG(n) q##n,
+const VRegister VRegister::qregisters[] = {
+REGISTER_CODE_LIST(QREG)
+};
+#undef QREG
+
+#define VREG(n) v##n,
+const VRegister VRegister::vregisters[] = {
+REGISTER_CODE_LIST(VREG)
+};
+#undef VREG
+
+
+const Register& Register::WRegFromCode(unsigned code) {
+ if (code == kSPRegInternalCode) {
+ return wsp;
+ } else {
+ VIXL_ASSERT(code < kNumberOfRegisters);
+ return wregisters[code];
+ }
+}
+
+
+const Register& Register::XRegFromCode(unsigned code) {
+ if (code == kSPRegInternalCode) {
+ return sp;
+ } else {
+ VIXL_ASSERT(code < kNumberOfRegisters);
+ return xregisters[code];
+ }
+}
+
+
+const VRegister& VRegister::BRegFromCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ return bregisters[code];
+}
+
+
+const VRegister& VRegister::HRegFromCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ return hregisters[code];
+}
+
+
+const VRegister& VRegister::SRegFromCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ return sregisters[code];
+}
+
+
+const VRegister& VRegister::DRegFromCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ return dregisters[code];
+}
+
+
+const VRegister& VRegister::QRegFromCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ return qregisters[code];
+}
+
+
+const VRegister& VRegister::VRegFromCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ return vregisters[code];
+}
+
+
+const Register& CPURegister::W() const {
+ VIXL_ASSERT(IsValidRegister());
+ return Register::WRegFromCode(code_);
+}
+
+
+const Register& CPURegister::X() const {
+ VIXL_ASSERT(IsValidRegister());
+ return Register::XRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::B() const {
+ VIXL_ASSERT(IsValidVRegister());
+ return VRegister::BRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::H() const {
+ VIXL_ASSERT(IsValidVRegister());
+ return VRegister::HRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::S() const {
+ VIXL_ASSERT(IsValidVRegister());
+ return VRegister::SRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::D() const {
+ VIXL_ASSERT(IsValidVRegister());
+ return VRegister::DRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::Q() const {
+ VIXL_ASSERT(IsValidVRegister());
+ return VRegister::QRegFromCode(code_);
+}
+
+
+const VRegister& CPURegister::V() const {
+ VIXL_ASSERT(IsValidVRegister());
+ return VRegister::VRegFromCode(code_);
+}
+
+
+// Operand.
+Operand::Operand(int64_t immediate)
+ : immediate_(immediate),
+ reg_(NoReg),
+ shift_(NO_SHIFT),
+ extend_(NO_EXTEND),
+ shift_amount_(0) {}
+
+
+Operand::Operand(Register reg, Shift shift, unsigned shift_amount)
+ : reg_(reg),
+ shift_(shift),
+ extend_(NO_EXTEND),
+ shift_amount_(shift_amount) {
+ VIXL_ASSERT(shift != MSL);
+ VIXL_ASSERT(reg.Is64Bits() || (shift_amount < kWRegSize));
+ VIXL_ASSERT(reg.Is32Bits() || (shift_amount < kXRegSize));
+ VIXL_ASSERT(!reg.IsSP());
+}
+
+
+Operand::Operand(Register reg, Extend extend, unsigned shift_amount)
+ : reg_(reg),
+ shift_(NO_SHIFT),
+ extend_(extend),
+ shift_amount_(shift_amount) {
+ VIXL_ASSERT(reg.IsValid());
+ VIXL_ASSERT(shift_amount <= 4);
+ VIXL_ASSERT(!reg.IsSP());
+
+ // Extend modes SXTX and UXTX require a 64-bit register.
+ VIXL_ASSERT(reg.Is64Bits() || ((extend != SXTX) && (extend != UXTX)));
+}
+
+
+bool Operand::IsImmediate() const {
+ return reg_.Is(NoReg);
+}
+
+
+bool Operand::IsShiftedRegister() const {
+ return reg_.IsValid() && (shift_ != NO_SHIFT);
+}
+
+
+bool Operand::IsExtendedRegister() const {
+ return reg_.IsValid() && (extend_ != NO_EXTEND);
+}
+
+
+bool Operand::IsZero() const {
+ if (IsImmediate()) {
+ return immediate() == 0;
+ } else {
+ return reg().IsZero();
+ }
+}
+
+
+Operand Operand::ToExtendedRegister() const {
+ VIXL_ASSERT(IsShiftedRegister());
+ VIXL_ASSERT((shift_ == LSL) && (shift_amount_ <= 4));
+ return Operand(reg_, reg_.Is64Bits() ? UXTX : UXTW, shift_amount_);
+}
+
+
+// MemOperand
+MemOperand::MemOperand(Register base, int64_t offset, AddrMode addrmode)
+ : base_(base), regoffset_(NoReg), offset_(offset), addrmode_(addrmode) {
+ VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+}
+
+
+MemOperand::MemOperand(Register base,
+ Register regoffset,
+ Extend extend,
+ unsigned shift_amount)
+ : base_(base), regoffset_(regoffset), offset_(0), addrmode_(Offset),
+ shift_(NO_SHIFT), extend_(extend), shift_amount_(shift_amount) {
+ VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+ VIXL_ASSERT(!regoffset.IsSP());
+ VIXL_ASSERT((extend == UXTW) || (extend == SXTW) || (extend == SXTX));
+
+ // SXTX extend mode requires a 64-bit offset register.
+ VIXL_ASSERT(regoffset.Is64Bits() || (extend != SXTX));
+}
+
+
+MemOperand::MemOperand(Register base,
+ Register regoffset,
+ Shift shift,
+ unsigned shift_amount)
+ : base_(base), regoffset_(regoffset), offset_(0), addrmode_(Offset),
+ shift_(shift), extend_(NO_EXTEND), shift_amount_(shift_amount) {
+ VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+ VIXL_ASSERT(regoffset.Is64Bits() && !regoffset.IsSP());
+ VIXL_ASSERT(shift == LSL);
+}
+
+
+MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode)
+ : base_(base), regoffset_(NoReg), addrmode_(addrmode) {
+ VIXL_ASSERT(base.Is64Bits() && !base.IsZero());
+
+ if (offset.IsImmediate()) {
+ offset_ = offset.immediate();
+ } else if (offset.IsShiftedRegister()) {
+ VIXL_ASSERT((addrmode == Offset) || (addrmode == PostIndex));
+
+ regoffset_ = offset.reg();
+ shift_ = offset.shift();
+ shift_amount_ = offset.shift_amount();
+
+ extend_ = NO_EXTEND;
+ offset_ = 0;
+
+ // These assertions match those in the shifted-register constructor.
+ VIXL_ASSERT(regoffset_.Is64Bits() && !regoffset_.IsSP());
+ VIXL_ASSERT(shift_ == LSL);
+ } else {
+ VIXL_ASSERT(offset.IsExtendedRegister());
+ VIXL_ASSERT(addrmode == Offset);
+
+ regoffset_ = offset.reg();
+ extend_ = offset.extend();
+ shift_amount_ = offset.shift_amount();
+
+ shift_ = NO_SHIFT;
+ offset_ = 0;
+
+ // These assertions match those in the extended-register constructor.
+ VIXL_ASSERT(!regoffset_.IsSP());
+ VIXL_ASSERT((extend_ == UXTW) || (extend_ == SXTW) || (extend_ == SXTX));
+ VIXL_ASSERT((regoffset_.Is64Bits() || (extend_ != SXTX)));
+ }
+}
+
+
+bool MemOperand::IsImmediateOffset() const {
+ return (addrmode_ == Offset) && regoffset_.Is(NoReg);
+}
+
+
+bool MemOperand::IsRegisterOffset() const {
+ return (addrmode_ == Offset) && !regoffset_.Is(NoReg);
+}
+
+
+bool MemOperand::IsPreIndex() const {
+ return addrmode_ == PreIndex;
+}
+
+
+bool MemOperand::IsPostIndex() const {
+ return addrmode_ == PostIndex;
+}
+
+
+void MemOperand::AddOffset(int64_t offset) {
+ VIXL_ASSERT(IsImmediateOffset());
+ offset_ += offset;
+}
+
+
+// Assembler
+Assembler::Assembler(PositionIndependentCodeOption pic)
+ : pic_(pic),
+ cpu_features_(CPUFeatures::AArch64LegacyBaseline())
+{
+ // Mozilla change: always use maximally-present features.
+ cpu_features_.Combine(CPUFeatures::InferFromOS());
+
+ // Mozilla change: Compile time hard-coded value from js-config.mozbuild.
+#ifndef MOZ_AARCH64_JSCVT
+# error "MOZ_AARCH64_JSCVT must be defined."
+#elif MOZ_AARCH64_JSCVT >= 1
+ // Note, vixl backend implements the JSCVT flag as a boolean despite having 3
+ // extra bits reserved for forward compatibility in the ARMv8 documentation.
+ cpu_features_.Combine(CPUFeatures::kJSCVT);
+#endif
+}
+
+
+// Code generation.
+void Assembler::br(const Register& xn) {
+ VIXL_ASSERT(xn.Is64Bits());
+ Emit(BR | Rn(xn));
+}
+
+
+void Assembler::blr(const Register& xn) {
+ VIXL_ASSERT(xn.Is64Bits());
+ Emit(BLR | Rn(xn));
+}
+
+
+void Assembler::ret(const Register& xn) {
+ VIXL_ASSERT(xn.Is64Bits());
+ Emit(RET | Rn(xn));
+}
+
+
+void Assembler::NEONTable(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEONTableOp op) {
+ VIXL_ASSERT(vd.Is16B() || vd.Is8B());
+ VIXL_ASSERT(vn.Is16B());
+ VIXL_ASSERT(AreSameFormat(vd, vm));
+ Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ NEONTable(vd, vn, vm, NEON_TBL_1v);
+}
+
+
+void Assembler::tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vm) {
+ USE(vn2);
+ VIXL_ASSERT(AreSameFormat(vn, vn2));
+ VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+
+ NEONTable(vd, vn, vm, NEON_TBL_2v);
+}
+
+
+void Assembler::tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vm) {
+ USE(vn2, vn3);
+ VIXL_ASSERT(AreSameFormat(vn, vn2, vn3));
+ VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+ VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters));
+
+ NEONTable(vd, vn, vm, NEON_TBL_3v);
+}
+
+
+void Assembler::tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vn4,
+ const VRegister& vm) {
+ USE(vn2, vn3, vn4);
+ VIXL_ASSERT(AreSameFormat(vn, vn2, vn3, vn4));
+ VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+ VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters));
+ VIXL_ASSERT(vn4.code() == ((vn.code() + 3) % kNumberOfVRegisters));
+
+ NEONTable(vd, vn, vm, NEON_TBL_4v);
+}
+
+
+void Assembler::tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ NEONTable(vd, vn, vm, NEON_TBX_1v);
+}
+
+
+void Assembler::tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vm) {
+ USE(vn2);
+ VIXL_ASSERT(AreSameFormat(vn, vn2));
+ VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+
+ NEONTable(vd, vn, vm, NEON_TBX_2v);
+}
+
+
+void Assembler::tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vm) {
+ USE(vn2, vn3);
+ VIXL_ASSERT(AreSameFormat(vn, vn2, vn3));
+ VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+ VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters));
+
+ NEONTable(vd, vn, vm, NEON_TBX_3v);
+}
+
+
+void Assembler::tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vn4,
+ const VRegister& vm) {
+ USE(vn2, vn3, vn4);
+ VIXL_ASSERT(AreSameFormat(vn, vn2, vn3, vn4));
+ VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters));
+ VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters));
+ VIXL_ASSERT(vn4.code() == ((vn.code() + 3) % kNumberOfVRegisters));
+
+ NEONTable(vd, vn, vm, NEON_TBX_4v);
+}
+
+
+void Assembler::add(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSub(rd, rn, operand, LeaveFlags, ADD);
+}
+
+
+void Assembler::adds(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSub(rd, rn, operand, SetFlags, ADD);
+}
+
+
+void Assembler::cmn(const Register& rn,
+ const Operand& operand) {
+ Register zr = AppropriateZeroRegFor(rn);
+ adds(zr, rn, operand);
+}
+
+
+void Assembler::sub(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSub(rd, rn, operand, LeaveFlags, SUB);
+}
+
+
+void Assembler::subs(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSub(rd, rn, operand, SetFlags, SUB);
+}
+
+
+void Assembler::cmp(const Register& rn, const Operand& operand) {
+ Register zr = AppropriateZeroRegFor(rn);
+ subs(zr, rn, operand);
+}
+
+
+void Assembler::neg(const Register& rd, const Operand& operand) {
+ Register zr = AppropriateZeroRegFor(rd);
+ sub(rd, zr, operand);
+}
+
+
+void Assembler::negs(const Register& rd, const Operand& operand) {
+ Register zr = AppropriateZeroRegFor(rd);
+ subs(rd, zr, operand);
+}
+
+
+void Assembler::adc(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC);
+}
+
+
+void Assembler::adcs(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSubWithCarry(rd, rn, operand, SetFlags, ADC);
+}
+
+
+void Assembler::sbc(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC);
+}
+
+
+void Assembler::sbcs(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSubWithCarry(rd, rn, operand, SetFlags, SBC);
+}
+
+
+void Assembler::ngc(const Register& rd, const Operand& operand) {
+ Register zr = AppropriateZeroRegFor(rd);
+ sbc(rd, zr, operand);
+}
+
+
+void Assembler::ngcs(const Register& rd, const Operand& operand) {
+ Register zr = AppropriateZeroRegFor(rd);
+ sbcs(rd, zr, operand);
+}
+
+
+// Logical instructions.
+void Assembler::and_(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Logical(rd, rn, operand, AND);
+}
+
+
+void Assembler::bic(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Logical(rd, rn, operand, BIC);
+}
+
+
+void Assembler::bics(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Logical(rd, rn, operand, BICS);
+}
+
+
+void Assembler::orr(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Logical(rd, rn, operand, ORR);
+}
+
+
+void Assembler::orn(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Logical(rd, rn, operand, ORN);
+}
+
+
+void Assembler::eor(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Logical(rd, rn, operand, EOR);
+}
+
+
+void Assembler::eon(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Logical(rd, rn, operand, EON);
+}
+
+
+void Assembler::lslv(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == rm.size());
+ Emit(SF(rd) | LSLV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::lsrv(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == rm.size());
+ Emit(SF(rd) | LSRV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::asrv(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == rm.size());
+ Emit(SF(rd) | ASRV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::rorv(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == rm.size());
+ Emit(SF(rd) | RORV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+// Bitfield operations.
+void Assembler::bfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
+ Emit(SF(rd) | BFM | N |
+ ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::sbfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms) {
+ VIXL_ASSERT(rd.Is64Bits() || rn.Is32Bits());
+ Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
+ Emit(SF(rd) | SBFM | N |
+ ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::ubfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
+ Emit(SF(rd) | UBFM | N |
+ ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::extr(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ unsigned lsb) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == rm.size());
+ Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
+ Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.size()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::csel(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond) {
+ ConditionalSelect(rd, rn, rm, cond, CSEL);
+}
+
+
+void Assembler::csinc(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond) {
+ ConditionalSelect(rd, rn, rm, cond, CSINC);
+}
+
+
+void Assembler::csinv(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond) {
+ ConditionalSelect(rd, rn, rm, cond, CSINV);
+}
+
+
+void Assembler::csneg(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond) {
+ ConditionalSelect(rd, rn, rm, cond, CSNEG);
+}
+
+
+void Assembler::cset(const Register &rd, Condition cond) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ Register zr = AppropriateZeroRegFor(rd);
+ csinc(rd, zr, zr, InvertCondition(cond));
+}
+
+
+void Assembler::csetm(const Register &rd, Condition cond) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ Register zr = AppropriateZeroRegFor(rd);
+ csinv(rd, zr, zr, InvertCondition(cond));
+}
+
+
+void Assembler::cinc(const Register &rd, const Register &rn, Condition cond) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ csinc(rd, rn, rn, InvertCondition(cond));
+}
+
+
+void Assembler::cinv(const Register &rd, const Register &rn, Condition cond) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ csinv(rd, rn, rn, InvertCondition(cond));
+}
+
+
+void Assembler::cneg(const Register &rd, const Register &rn, Condition cond) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ csneg(rd, rn, rn, InvertCondition(cond));
+}
+
+
+void Assembler::ConditionalSelect(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond,
+ ConditionalSelectOp op) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == rm.size());
+ Emit(SF(rd) | op | Rm(rm) | Cond(cond) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::ccmn(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond) {
+ ConditionalCompare(rn, operand, nzcv, cond, CCMN);
+}
+
+
+void Assembler::ccmp(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond) {
+ ConditionalCompare(rn, operand, nzcv, cond, CCMP);
+}
+
+
+void Assembler::DataProcessing3Source(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra,
+ DataProcessing3SourceOp op) {
+ Emit(SF(rd) | op | Rm(rm) | Ra(ra) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32b(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+ Emit(SF(rm) | Rm(rm) | CRC32B | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32h(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+ Emit(SF(rm) | Rm(rm) | CRC32H | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32w(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+ Emit(SF(rm) | Rm(rm) | CRC32W | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32x(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is64Bits());
+ Emit(SF(rm) | Rm(rm) | CRC32X | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32cb(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+ Emit(SF(rm) | Rm(rm) | CRC32CB | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32ch(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+ Emit(SF(rm) | Rm(rm) | CRC32CH | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32cw(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits());
+ Emit(SF(rm) | Rm(rm) | CRC32CW | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::crc32cx(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is64Bits());
+ Emit(SF(rm) | Rm(rm) | CRC32CX | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::mul(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(AreSameSizeAndType(rd, rn, rm));
+ DataProcessing3Source(rd, rn, rm, AppropriateZeroRegFor(rd), MADD);
+}
+
+
+void Assembler::madd(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ DataProcessing3Source(rd, rn, rm, ra, MADD);
+}
+
+
+void Assembler::mneg(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(AreSameSizeAndType(rd, rn, rm));
+ DataProcessing3Source(rd, rn, rm, AppropriateZeroRegFor(rd), MSUB);
+}
+
+
+void Assembler::msub(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ DataProcessing3Source(rd, rn, rm, ra, MSUB);
+}
+
+
+void Assembler::umaddl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits());
+ VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+ DataProcessing3Source(rd, rn, rm, ra, UMADDL_x);
+}
+
+
+void Assembler::smaddl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits());
+ VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+ DataProcessing3Source(rd, rn, rm, ra, SMADDL_x);
+}
+
+
+void Assembler::umsubl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits());
+ VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+ DataProcessing3Source(rd, rn, rm, ra, UMSUBL_x);
+}
+
+
+void Assembler::smsubl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits());
+ VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+ DataProcessing3Source(rd, rn, rm, ra, SMSUBL_x);
+}
+
+
+void Assembler::smull(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.Is64Bits());
+ VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits());
+ DataProcessing3Source(rd, rn, rm, xzr, SMADDL_x);
+}
+
+
+void Assembler::sdiv(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == rm.size());
+ Emit(SF(rd) | SDIV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::smulh(const Register& xd,
+ const Register& xn,
+ const Register& xm) {
+ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits());
+ DataProcessing3Source(xd, xn, xm, xzr, SMULH_x);
+}
+
+
+void Assembler::umulh(const Register& xd,
+ const Register& xn,
+ const Register& xm) {
+ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits());
+ DataProcessing3Source(xd, xn, xm, xzr, UMULH_x);
+}
+
+
+void Assembler::udiv(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == rm.size());
+ Emit(SF(rd) | UDIV | Rm(rm) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::rbit(const Register& rd,
+ const Register& rn) {
+ DataProcessing1Source(rd, rn, RBIT);
+}
+
+
+void Assembler::rev16(const Register& rd,
+ const Register& rn) {
+ DataProcessing1Source(rd, rn, REV16);
+}
+
+
+void Assembler::rev32(const Register& rd,
+ const Register& rn) {
+ VIXL_ASSERT(rd.Is64Bits());
+ DataProcessing1Source(rd, rn, REV);
+}
+
+
+void Assembler::rev(const Register& rd,
+ const Register& rn) {
+ DataProcessing1Source(rd, rn, rd.Is64Bits() ? REV_x : REV_w);
+}
+
+
+void Assembler::clz(const Register& rd,
+ const Register& rn) {
+ DataProcessing1Source(rd, rn, CLZ);
+}
+
+
+void Assembler::cls(const Register& rd,
+ const Register& rn) {
+ DataProcessing1Source(rd, rn, CLS);
+}
+
+
+void Assembler::ldp(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& src) {
+ LoadStorePair(rt, rt2, src, LoadPairOpFor(rt, rt2));
+}
+
+
+void Assembler::stp(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& dst) {
+ LoadStorePair(rt, rt2, dst, StorePairOpFor(rt, rt2));
+}
+
+
+void Assembler::ldpsw(const Register& rt,
+ const Register& rt2,
+ const MemOperand& src) {
+ VIXL_ASSERT(rt.Is64Bits());
+ LoadStorePair(rt, rt2, src, LDPSW_x);
+}
+
+
+void Assembler::LoadStorePair(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& addr,
+ LoadStorePairOp op) {
+ // 'rt' and 'rt2' can only be aliased for stores.
+ VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || !rt.Is(rt2));
+ VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+ VIXL_ASSERT(IsImmLSPair(addr.offset(), CalcLSPairDataSize(op)));
+
+ int offset = static_cast<int>(addr.offset());
+ Instr memop = op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) |
+ ImmLSPair(offset, CalcLSPairDataSize(op));
+
+ Instr addrmodeop;
+ if (addr.IsImmediateOffset()) {
+ addrmodeop = LoadStorePairOffsetFixed;
+ } else {
+ VIXL_ASSERT(addr.offset() != 0);
+ if (addr.IsPreIndex()) {
+ addrmodeop = LoadStorePairPreIndexFixed;
+ } else {
+ VIXL_ASSERT(addr.IsPostIndex());
+ addrmodeop = LoadStorePairPostIndexFixed;
+ }
+ }
+ Emit(addrmodeop | memop);
+}
+
+
+void Assembler::ldnp(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& src) {
+ LoadStorePairNonTemporal(rt, rt2, src,
+ LoadPairNonTemporalOpFor(rt, rt2));
+}
+
+
+void Assembler::stnp(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& dst) {
+ LoadStorePairNonTemporal(rt, rt2, dst,
+ StorePairNonTemporalOpFor(rt, rt2));
+}
+
+
+void Assembler::LoadStorePairNonTemporal(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& addr,
+ LoadStorePairNonTemporalOp op) {
+ VIXL_ASSERT(!rt.Is(rt2));
+ VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+ VIXL_ASSERT(addr.IsImmediateOffset());
+
+ unsigned size = CalcLSPairDataSize(
+ static_cast<LoadStorePairOp>(op & LoadStorePairMask));
+ VIXL_ASSERT(IsImmLSPair(addr.offset(), size));
+ int offset = static_cast<int>(addr.offset());
+ Emit(op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) | ImmLSPair(offset, size));
+}
+
+
+// Memory instructions.
+void Assembler::ldrb(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, src, LDRB_w, option);
+}
+
+
+void Assembler::strb(const Register& rt, const MemOperand& dst,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, dst, STRB_w, option);
+}
+
+
+void Assembler::ldrsb(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w, option);
+}
+
+
+void Assembler::ldrh(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, src, LDRH_w, option);
+}
+
+
+void Assembler::strh(const Register& rt, const MemOperand& dst,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, dst, STRH_w, option);
+}
+
+
+void Assembler::ldrsh(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w, option);
+}
+
+
+void Assembler::ldr(const CPURegister& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, src, LoadOpFor(rt), option);
+}
+
+
+void Assembler::str(const CPURegister& rt, const MemOperand& dst,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, dst, StoreOpFor(rt), option);
+}
+
+
+void Assembler::ldrsw(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(rt.Is64Bits());
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ LoadStore(rt, src, LDRSW_x, option);
+}
+
+
+void Assembler::ldurb(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, src, LDRB_w, option);
+}
+
+
+void Assembler::sturb(const Register& rt, const MemOperand& dst,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, dst, STRB_w, option);
+}
+
+
+void Assembler::ldursb(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w, option);
+}
+
+
+void Assembler::ldurh(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, src, LDRH_w, option);
+}
+
+
+void Assembler::sturh(const Register& rt, const MemOperand& dst,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, dst, STRH_w, option);
+}
+
+
+void Assembler::ldursh(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w, option);
+}
+
+
+void Assembler::ldur(const CPURegister& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, src, LoadOpFor(rt), option);
+}
+
+
+void Assembler::stur(const CPURegister& rt, const MemOperand& dst,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, dst, StoreOpFor(rt), option);
+}
+
+
+void Assembler::ldursw(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(rt.Is64Bits());
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ LoadStore(rt, src, LDRSW_x, option);
+}
+
+
+void Assembler::ldrsw(const Register& rt, int imm19) {
+ Emit(LDRSW_x_lit | ImmLLiteral(imm19) | Rt(rt));
+}
+
+
+void Assembler::ldr(const CPURegister& rt, int imm19) {
+ LoadLiteralOp op = LoadLiteralOpFor(rt);
+ Emit(op | ImmLLiteral(imm19) | Rt(rt));
+}
+
+// clang-format off
+#define COMPARE_AND_SWAP_W_X_LIST(V) \
+ V(cas, CAS) \
+ V(casa, CASA) \
+ V(casl, CASL) \
+ V(casal, CASAL)
+// clang-format on
+
+#define DEFINE_ASM_FUNC(FN, OP) \
+ void Assembler::FN(const Register& rs, const Register& rt, \
+ const MemOperand& src) { \
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+ LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \
+ Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \
+ }
+COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+// clang-format off
+#define COMPARE_AND_SWAP_W_LIST(V) \
+ V(casb, CASB) \
+ V(casab, CASAB) \
+ V(caslb, CASLB) \
+ V(casalb, CASALB) \
+ V(cash, CASH) \
+ V(casah, CASAH) \
+ V(caslh, CASLH) \
+ V(casalh, CASALH)
+// clang-format on
+
+#define DEFINE_ASM_FUNC(FN, OP) \
+ void Assembler::FN(const Register& rs, const Register& rt, \
+ const MemOperand& src) { \
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+ Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \
+ }
+COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+// clang-format off
+#define COMPARE_AND_SWAP_PAIR_LIST(V) \
+ V(casp, CASP) \
+ V(caspa, CASPA) \
+ V(caspl, CASPL) \
+ V(caspal, CASPAL)
+// clang-format on
+
+#define DEFINE_ASM_FUNC(FN, OP) \
+ void Assembler::FN(const Register& rs, const Register& rs1, \
+ const Register& rt, const Register& rt1, \
+ const MemOperand& src) { \
+ USE(rs1, rt1); \
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+ VIXL_ASSERT(AreEven(rs, rt)); \
+ VIXL_ASSERT(AreConsecutive(rs, rs1)); \
+ VIXL_ASSERT(AreConsecutive(rt, rt1)); \
+ LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \
+ Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \
+ }
+COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::prfm(PrefetchOperation op, int imm19) {
+ Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19));
+}
+
+
+// Exclusive-access instructions.
+void Assembler::stxrb(const Register& rs,
+ const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ Emit(STXRB_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stxrh(const Register& rs,
+ const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ Emit(STXRH_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stxr(const Register& rs,
+ const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? STXR_x : STXR_w;
+ Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::ldxrb(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ Emit(LDXRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldxrh(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ Emit(LDXRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldxr(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? LDXR_x : LDXR_w;
+ Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::stxp(const Register& rs,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& dst) {
+ VIXL_ASSERT(rt.size() == rt2.size());
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? STXP_x : STXP_w;
+ Emit(op | Rs(rs) | Rt(rt) | Rt2(rt2) | RnSP(dst.base()));
+}
+
+
+void Assembler::ldxp(const Register& rt,
+ const Register& rt2,
+ const MemOperand& src) {
+ VIXL_ASSERT(rt.size() == rt2.size());
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? LDXP_x : LDXP_w;
+ Emit(op | Rs_mask | Rt(rt) | Rt2(rt2) | RnSP(src.base()));
+}
+
+
+void Assembler::stlxrb(const Register& rs,
+ const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ Emit(STLXRB_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stlxrh(const Register& rs,
+ const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ Emit(STLXRH_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stlxr(const Register& rs,
+ const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? STLXR_x : STLXR_w;
+ Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::ldaxrb(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ Emit(LDAXRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldaxrh(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ Emit(LDAXRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldaxr(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? LDAXR_x : LDAXR_w;
+ Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::stlxp(const Register& rs,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& dst) {
+ VIXL_ASSERT(rt.size() == rt2.size());
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? STLXP_x : STLXP_w;
+ Emit(op | Rs(rs) | Rt(rt) | Rt2(rt2) | RnSP(dst.base()));
+}
+
+
+void Assembler::ldaxp(const Register& rt,
+ const Register& rt2,
+ const MemOperand& src) {
+ VIXL_ASSERT(rt.size() == rt2.size());
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? LDAXP_x : LDAXP_w;
+ Emit(op | Rs_mask | Rt(rt) | Rt2(rt2) | RnSP(src.base()));
+}
+
+
+void Assembler::stlrb(const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ Emit(STLRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stlrh(const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ Emit(STLRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::stlr(const Register& rt,
+ const MemOperand& dst) {
+ VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? STLR_x : STLR_w;
+ Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base()));
+}
+
+
+void Assembler::ldarb(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ Emit(LDARB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldarh(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ Emit(LDARH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+
+void Assembler::ldar(const Register& rt,
+ const MemOperand& src) {
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0));
+ LoadStoreExclusive op = rt.Is64Bits() ? LDAR_x : LDAR_w;
+ Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base()));
+}
+
+// These macros generate all the variations of the atomic memory operations,
+// e.g. ldadd, ldadda, ldaddb, staddl, etc.
+// For a full list of the methods with comments, see the assembler header file.
+
+// clang-format off
+#define ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(V, DEF) \
+ V(DEF, add, LDADD) \
+ V(DEF, clr, LDCLR) \
+ V(DEF, eor, LDEOR) \
+ V(DEF, set, LDSET) \
+ V(DEF, smax, LDSMAX) \
+ V(DEF, smin, LDSMIN) \
+ V(DEF, umax, LDUMAX) \
+ V(DEF, umin, LDUMIN)
+
+#define ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \
+ V(NAME, OP##_x, OP##_w) \
+ V(NAME##l, OP##L_x, OP##L_w) \
+ V(NAME##b, OP##B, OP##B) \
+ V(NAME##lb, OP##LB, OP##LB) \
+ V(NAME##h, OP##H, OP##H) \
+ V(NAME##lh, OP##LH, OP##LH)
+
+#define ATOMIC_MEMORY_LOAD_MODES(V, NAME, OP) \
+ ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \
+ V(NAME##a, OP##A_x, OP##A_w) \
+ V(NAME##al, OP##AL_x, OP##AL_w) \
+ V(NAME##ab, OP##AB, OP##AB) \
+ V(NAME##alb, OP##ALB, OP##ALB) \
+ V(NAME##ah, OP##AH, OP##AH) \
+ V(NAME##alh, OP##ALH, OP##ALH)
+// clang-format on
+
+#define DEFINE_ASM_LOAD_FUNC(FN, OP_X, OP_W) \
+ void Assembler::ld##FN(const Register& rs, const Register& rt, \
+ const MemOperand& src) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+ AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \
+ Emit(op | Rs(rs) | Rt(rt) | RnSP(src.base())); \
+ }
+#define DEFINE_ASM_STORE_FUNC(FN, OP_X, OP_W) \
+ void Assembler::st##FN(const Register& rs, const MemOperand& src) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \
+ ld##FN(rs, AppropriateZeroRegFor(rs), src); \
+ }
+
+ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_LOAD_MODES,
+ DEFINE_ASM_LOAD_FUNC)
+ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_STORE_MODES,
+ DEFINE_ASM_STORE_FUNC)
+
+#define DEFINE_ASM_SWP_FUNC(FN, OP_X, OP_W) \
+ void Assembler::FN(const Register& rs, const Register& rt, \
+ const MemOperand& src) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \
+ AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \
+ Emit(op | Rs(rs) | Rt(rt) | RnSP(src.base())); \
+ }
+
+ATOMIC_MEMORY_LOAD_MODES(DEFINE_ASM_SWP_FUNC, swp, SWP)
+
+#undef DEFINE_ASM_LOAD_FUNC
+#undef DEFINE_ASM_STORE_FUNC
+#undef DEFINE_ASM_SWP_FUNC
+
+void Assembler::prfm(PrefetchOperation op, const MemOperand& address,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireUnscaledOffset);
+ VIXL_ASSERT(option != PreferUnscaledOffset);
+ Prefetch(op, address, option);
+}
+
+
+void Assembler::prfum(PrefetchOperation op, const MemOperand& address,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(option != RequireScaledOffset);
+ VIXL_ASSERT(option != PreferScaledOffset);
+ Prefetch(op, address, option);
+}
+
+
+void Assembler::sys(int op1, int crn, int crm, int op2, const Register& rt) {
+ Emit(SYS | ImmSysOp1(op1) | CRn(crn) | CRm(crm) | ImmSysOp2(op2) | Rt(rt));
+}
+
+
+void Assembler::sys(int op, const Register& rt) {
+ Emit(SYS | SysOp(op) | Rt(rt));
+}
+
+
+void Assembler::dc(DataCacheOp op, const Register& rt) {
+ VIXL_ASSERT((op == CVAC) || (op == CVAU) || (op == CIVAC) || (op == ZVA));
+ sys(op, rt);
+}
+
+
+void Assembler::ic(InstructionCacheOp op, const Register& rt) {
+ VIXL_ASSERT(op == IVAU);
+ sys(op, rt);
+}
+
+
+// NEON structure loads and stores.
+Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
+ Instr addr_field = RnSP(addr.base());
+
+ if (addr.IsPostIndex()) {
+ VIXL_STATIC_ASSERT(NEONLoadStoreMultiStructPostIndex ==
+ static_cast<NEONLoadStoreMultiStructPostIndexOp>(
+ NEONLoadStoreSingleStructPostIndex));
+
+ addr_field |= NEONLoadStoreMultiStructPostIndex;
+ if (addr.offset() == 0) {
+ addr_field |= RmNot31(addr.regoffset());
+ } else {
+ // The immediate post index addressing mode is indicated by rm = 31.
+ // The immediate is implied by the number of vector registers used.
+ addr_field |= (0x1f << Rm_offset);
+ }
+ } else {
+ VIXL_ASSERT(addr.IsImmediateOffset() && (addr.offset() == 0));
+ }
+ return addr_field;
+}
+
+void Assembler::LoadStoreStructVerify(const VRegister& vt,
+ const MemOperand& addr,
+ Instr op) {
+#ifdef DEBUG
+ // Assert that addressing mode is either offset (with immediate 0), post
+ // index by immediate of the size of the register list, or post index by a
+ // value in a core register.
+ if (addr.IsImmediateOffset()) {
+ VIXL_ASSERT(addr.offset() == 0);
+ } else {
+ int offset = vt.SizeInBytes();
+ switch (op) {
+ case NEON_LD1_1v:
+ case NEON_ST1_1v:
+ offset *= 1; break;
+ case NEONLoadStoreSingleStructLoad1:
+ case NEONLoadStoreSingleStructStore1:
+ case NEON_LD1R:
+ offset = (offset / vt.lanes()) * 1; break;
+
+ case NEON_LD1_2v:
+ case NEON_ST1_2v:
+ case NEON_LD2:
+ case NEON_ST2:
+ offset *= 2;
+ break;
+ case NEONLoadStoreSingleStructLoad2:
+ case NEONLoadStoreSingleStructStore2:
+ case NEON_LD2R:
+ offset = (offset / vt.lanes()) * 2; break;
+
+ case NEON_LD1_3v:
+ case NEON_ST1_3v:
+ case NEON_LD3:
+ case NEON_ST3:
+ offset *= 3; break;
+ case NEONLoadStoreSingleStructLoad3:
+ case NEONLoadStoreSingleStructStore3:
+ case NEON_LD3R:
+ offset = (offset / vt.lanes()) * 3; break;
+
+ case NEON_LD1_4v:
+ case NEON_ST1_4v:
+ case NEON_LD4:
+ case NEON_ST4:
+ offset *= 4; break;
+ case NEONLoadStoreSingleStructLoad4:
+ case NEONLoadStoreSingleStructStore4:
+ case NEON_LD4R:
+ offset = (offset / vt.lanes()) * 4; break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ VIXL_ASSERT(!addr.regoffset().Is(NoReg) ||
+ addr.offset() == offset);
+ }
+#else
+ USE(vt, addr, op);
+#endif
+}
+
+void Assembler::LoadStoreStruct(const VRegister& vt,
+ const MemOperand& addr,
+ NEONLoadStoreMultiStructOp op) {
+ LoadStoreStructVerify(vt, addr, op);
+ VIXL_ASSERT(vt.IsVector() || vt.Is1D());
+ Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
+}
+
+
+void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt,
+ const MemOperand& addr,
+ NEONLoadStoreSingleStructOp op) {
+ LoadStoreStructVerify(vt, addr, op);
+ Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+ const MemOperand& src) {
+ LoadStoreStruct(vt, src, NEON_LD1_1v);
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src) {
+ USE(vt2);
+ VIXL_ASSERT(AreSameFormat(vt, vt2));
+ VIXL_ASSERT(AreConsecutive(vt, vt2));
+ LoadStoreStruct(vt, src, NEON_LD1_2v);
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src) {
+ USE(vt2, vt3);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStruct(vt, src, NEON_LD1_3v);
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src) {
+ USE(vt2, vt3, vt4);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStruct(vt, src, NEON_LD1_4v);
+}
+
+
+void Assembler::ld2(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src) {
+ USE(vt2);
+ VIXL_ASSERT(AreSameFormat(vt, vt2));
+ VIXL_ASSERT(AreConsecutive(vt, vt2));
+ LoadStoreStruct(vt, src, NEON_LD2);
+}
+
+
+void Assembler::ld2(const VRegister& vt,
+ const VRegister& vt2,
+ int lane,
+ const MemOperand& src) {
+ USE(vt2);
+ VIXL_ASSERT(AreSameFormat(vt, vt2));
+ VIXL_ASSERT(AreConsecutive(vt, vt2));
+ LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2);
+}
+
+
+void Assembler::ld2r(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src) {
+ USE(vt2);
+ VIXL_ASSERT(AreSameFormat(vt, vt2));
+ VIXL_ASSERT(AreConsecutive(vt, vt2));
+ LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R);
+}
+
+
+void Assembler::ld3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src) {
+ USE(vt2, vt3);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStruct(vt, src, NEON_LD3);
+}
+
+
+void Assembler::ld3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ int lane,
+ const MemOperand& src) {
+ USE(vt2, vt3);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3);
+}
+
+
+void Assembler::ld3r(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src) {
+ USE(vt2, vt3);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R);
+}
+
+
+void Assembler::ld4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src) {
+ USE(vt2, vt3, vt4);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStruct(vt, src, NEON_LD4);
+}
+
+
+void Assembler::ld4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ int lane,
+ const MemOperand& src) {
+ USE(vt2, vt3, vt4);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4);
+}
+
+
+void Assembler::ld4r(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src) {
+ USE(vt2, vt3, vt4);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+ const MemOperand& src) {
+ LoadStoreStruct(vt, src, NEON_ST1_1v);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src) {
+ USE(vt2);
+ VIXL_ASSERT(AreSameFormat(vt, vt2));
+ VIXL_ASSERT(AreConsecutive(vt, vt2));
+ LoadStoreStruct(vt, src, NEON_ST1_2v);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src) {
+ USE(vt2, vt3);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStruct(vt, src, NEON_ST1_3v);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src) {
+ USE(vt2, vt3, vt4);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStruct(vt, src, NEON_ST1_4v);
+}
+
+
+void Assembler::st2(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& dst) {
+ USE(vt2);
+ VIXL_ASSERT(AreSameFormat(vt, vt2));
+ VIXL_ASSERT(AreConsecutive(vt, vt2));
+ LoadStoreStruct(vt, dst, NEON_ST2);
+}
+
+
+void Assembler::st2(const VRegister& vt,
+ const VRegister& vt2,
+ int lane,
+ const MemOperand& dst) {
+ USE(vt2);
+ VIXL_ASSERT(AreSameFormat(vt, vt2));
+ VIXL_ASSERT(AreConsecutive(vt, vt2));
+ LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2);
+}
+
+
+void Assembler::st3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& dst) {
+ USE(vt2, vt3);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStruct(vt, dst, NEON_ST3);
+}
+
+
+void Assembler::st3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ int lane,
+ const MemOperand& dst) {
+ USE(vt2, vt3);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3));
+ LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3);
+}
+
+
+void Assembler::st4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& dst) {
+ USE(vt2, vt3, vt4);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStruct(vt, dst, NEON_ST4);
+}
+
+
+void Assembler::st4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ int lane,
+ const MemOperand& dst) {
+ USE(vt2, vt3, vt4);
+ VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4));
+ VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4));
+ LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4);
+}
+
+
+void Assembler::LoadStoreStructSingle(const VRegister& vt,
+ uint32_t lane,
+ const MemOperand& addr,
+ NEONLoadStoreSingleStructOp op) {
+ LoadStoreStructVerify(vt, addr, op);
+
+ // We support vt arguments of the form vt.VxT() or vt.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ unsigned lane_size = vt.LaneSizeInBytes();
+ VIXL_ASSERT(lane < (kQRegSizeInBytes / lane_size));
+
+ // Lane size is encoded in the opcode field. Lane index is encoded in the Q,
+ // S and size fields.
+ lane *= lane_size;
+ if (lane_size == 8) lane++;
+
+ Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask;
+ Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask;
+ Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask;
+
+ Instr instr = op;
+ switch (lane_size) {
+ case 1: instr |= NEONLoadStoreSingle_b; break;
+ case 2: instr |= NEONLoadStoreSingle_h; break;
+ case 4: instr |= NEONLoadStoreSingle_s; break;
+ default:
+ VIXL_ASSERT(lane_size == 8);
+ instr |= NEONLoadStoreSingle_d;
+ }
+
+ Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt));
+}
+
+
+void Assembler::ld1(const VRegister& vt,
+ int lane,
+ const MemOperand& src) {
+ LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1);
+}
+
+
+void Assembler::ld1r(const VRegister& vt,
+ const MemOperand& src) {
+ LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R);
+}
+
+
+void Assembler::st1(const VRegister& vt,
+ int lane,
+ const MemOperand& dst) {
+ LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
+}
+
+
+void Assembler::NEON3DifferentL(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEON3DifferentOp vop) {
+ VIXL_ASSERT(AreSameFormat(vn, vm));
+ VIXL_ASSERT((vn.Is1H() && vd.Is1S()) ||
+ (vn.Is1S() && vd.Is1D()) ||
+ (vn.Is8B() && vd.Is8H()) ||
+ (vn.Is4H() && vd.Is4S()) ||
+ (vn.Is2S() && vd.Is2D()) ||
+ (vn.Is16B() && vd.Is8H())||
+ (vn.Is8H() && vd.Is4S()) ||
+ (vn.Is4S() && vd.Is2D()));
+ Instr format, op = vop;
+ if (vd.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ format = SFormat(vn);
+ } else {
+ format = VFormat(vn);
+ }
+ Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEON3DifferentW(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEON3DifferentOp vop) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT((vm.Is8B() && vd.Is8H()) ||
+ (vm.Is4H() && vd.Is4S()) ||
+ (vm.Is2S() && vd.Is2D()) ||
+ (vm.Is16B() && vd.Is8H())||
+ (vm.Is8H() && vd.Is4S()) ||
+ (vm.Is4S() && vd.Is2D()));
+ Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEON3DifferentHN(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEON3DifferentOp vop) {
+ VIXL_ASSERT(AreSameFormat(vm, vn));
+ VIXL_ASSERT((vd.Is8B() && vn.Is8H()) ||
+ (vd.Is4H() && vn.Is4S()) ||
+ (vd.Is2S() && vn.Is2D()) ||
+ (vd.Is16B() && vn.Is8H())||
+ (vd.Is8H() && vn.Is4S()) ||
+ (vd.Is4S() && vn.Is2D()));
+ Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_3DIFF_LONG_LIST(V) \
+ V(pmull, NEON_PMULL, vn.IsVector() && vn.Is8B()) \
+ V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B()) \
+ V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD()) \
+ V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ()) \
+ V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD()) \
+ V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ()) \
+ V(uabal, NEON_UABAL, vn.IsVector() && vn.IsD()) \
+ V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ()) \
+ V(sabdl, NEON_SABDL, vn.IsVector() && vn.IsD()) \
+ V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ()) \
+ V(uabdl, NEON_UABDL, vn.IsVector() && vn.IsD()) \
+ V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ()) \
+ V(smlal, NEON_SMLAL, vn.IsVector() && vn.IsD()) \
+ V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ()) \
+ V(umlal, NEON_UMLAL, vn.IsVector() && vn.IsD()) \
+ V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ()) \
+ V(smlsl, NEON_SMLSL, vn.IsVector() && vn.IsD()) \
+ V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ()) \
+ V(umlsl, NEON_UMLSL, vn.IsVector() && vn.IsD()) \
+ V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ()) \
+ V(smull, NEON_SMULL, vn.IsVector() && vn.IsD()) \
+ V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ()) \
+ V(umull, NEON_UMULL, vn.IsVector() && vn.IsD()) \
+ V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ()) \
+ V(ssubl, NEON_SSUBL, vn.IsVector() && vn.IsD()) \
+ V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ()) \
+ V(uaddl, NEON_UADDL, vn.IsVector() && vn.IsD()) \
+ V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ()) \
+ V(usubl, NEON_USUBL, vn.IsVector() && vn.IsD()) \
+ V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ()) \
+ V(sqdmlal, NEON_SQDMLAL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+ V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
+ V(sqdmlsl, NEON_SQDMLSL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+ V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
+ V(sqdmull, NEON_SQDMULL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
+ V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
+
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm) { \
+ VIXL_ASSERT(AS); \
+ NEON3DifferentL(vd, vn, vm, OP); \
+}
+NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+#define NEON_3DIFF_HN_LIST(V) \
+ V(addhn, NEON_ADDHN, vd.IsD()) \
+ V(addhn2, NEON_ADDHN2, vd.IsQ()) \
+ V(raddhn, NEON_RADDHN, vd.IsD()) \
+ V(raddhn2, NEON_RADDHN2, vd.IsQ()) \
+ V(subhn, NEON_SUBHN, vd.IsD()) \
+ V(subhn2, NEON_SUBHN2, vd.IsQ()) \
+ V(rsubhn, NEON_RSUBHN, vd.IsD()) \
+ V(rsubhn2, NEON_RSUBHN2, vd.IsQ())
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm) { \
+ VIXL_ASSERT(AS); \
+ NEON3DifferentHN(vd, vn, vm, OP); \
+}
+NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+void Assembler::uaddw(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(vm.IsD());
+ NEON3DifferentW(vd, vn, vm, NEON_UADDW);
+}
+
+
+void Assembler::uaddw2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(vm.IsQ());
+ NEON3DifferentW(vd, vn, vm, NEON_UADDW2);
+}
+
+
+void Assembler::saddw(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(vm.IsD());
+ NEON3DifferentW(vd, vn, vm, NEON_SADDW);
+}
+
+
+void Assembler::saddw2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(vm.IsQ());
+ NEON3DifferentW(vd, vn, vm, NEON_SADDW2);
+}
+
+
+void Assembler::usubw(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(vm.IsD());
+ NEON3DifferentW(vd, vn, vm, NEON_USUBW);
+}
+
+
+void Assembler::usubw2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(vm.IsQ());
+ NEON3DifferentW(vd, vn, vm, NEON_USUBW2);
+}
+
+
+void Assembler::ssubw(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(vm.IsD());
+ NEON3DifferentW(vd, vn, vm, NEON_SSUBW);
+}
+
+
+void Assembler::ssubw2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(vm.IsQ());
+ NEON3DifferentW(vd, vn, vm, NEON_SSUBW2);
+}
+
+
+void Assembler::mov(const Register& rd, const Register& rm) {
+ // Moves involving the stack pointer are encoded as add immediate with
+ // second operand of zero. Otherwise, orr with first operand zr is
+ // used.
+ if (rd.IsSP() || rm.IsSP()) {
+ add(rd, rm, 0);
+ } else {
+ orr(rd, AppropriateZeroRegFor(rd), rm);
+ }
+}
+
+
+void Assembler::mvn(const Register& rd, const Operand& operand) {
+ orn(rd, AppropriateZeroRegFor(rd), operand);
+}
+
+
+void Assembler::mrs(const Register& rt, SystemRegister sysreg) {
+ VIXL_ASSERT(rt.Is64Bits());
+ Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt));
+}
+
+
+void Assembler::msr(SystemRegister sysreg, const Register& rt) {
+ VIXL_ASSERT(rt.Is64Bits());
+ Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg));
+}
+
+
+void Assembler::clrex(int imm4) {
+ Emit(CLREX | CRm(imm4));
+}
+
+
+void Assembler::dmb(BarrierDomain domain, BarrierType type) {
+ Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type));
+}
+
+
+void Assembler::dsb(BarrierDomain domain, BarrierType type) {
+ Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type));
+}
+
+
+void Assembler::isb() {
+ Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
+}
+
+
+void Assembler::fmov(const VRegister& vd, double imm) {
+ if (vd.IsScalar()) {
+ VIXL_ASSERT(vd.Is1D());
+ Emit(FMOV_d_imm | Rd(vd) | ImmFP64(imm));
+ } else {
+ VIXL_ASSERT(vd.Is2D());
+ Instr op = NEONModifiedImmediate_MOVI | NEONModifiedImmediateOpBit;
+ Instr q = NEON_Q;
+ uint32_t encoded_imm = FP64ToImm8(imm);
+ Emit(q | op | ImmNEONabcdefgh(encoded_imm) | NEONCmode(0xf) | Rd(vd));
+ }
+}
+
+
+void Assembler::fmov(const VRegister& vd, float imm) {
+ if (vd.IsScalar()) {
+ VIXL_ASSERT(vd.Is1S());
+ Emit(FMOV_s_imm | Rd(vd) | ImmFP32(imm));
+ } else {
+ VIXL_ASSERT(vd.Is2S() || vd.Is4S());
+ Instr op = NEONModifiedImmediate_MOVI;
+ Instr q = vd.Is4S() ? NEON_Q : 0;
+ uint32_t encoded_imm = FP32ToImm8(imm);
+ Emit(q | op | ImmNEONabcdefgh(encoded_imm) | NEONCmode(0xf) | Rd(vd));
+ }
+}
+
+
+void Assembler::fmov(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT(rd.size() == vn.size());
+ FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd;
+ Emit(op | Rd(rd) | Rn(vn));
+}
+
+
+void Assembler::fmov(const VRegister& vd, const Register& rn) {
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ VIXL_ASSERT(vd.size() == rn.size());
+ FPIntegerConvertOp op = vd.Is32Bits() ? FMOV_sw : FMOV_dx;
+ Emit(op | Rd(vd) | Rn(rn));
+}
+
+
+void Assembler::fmov(const VRegister& vd, const VRegister& vn) {
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ VIXL_ASSERT(vd.IsSameFormat(vn));
+ Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn));
+}
+
+
+void Assembler::fmov(const VRegister& vd, int index, const Register& rn) {
+ VIXL_ASSERT((index == 1) && vd.Is1D() && rn.IsX());
+ USE(index);
+ Emit(FMOV_d1_x | Rd(vd) | Rn(rn));
+}
+
+
+void Assembler::fmov(const Register& rd, const VRegister& vn, int index) {
+ VIXL_ASSERT((index == 1) && vn.Is1D() && rd.IsX());
+ USE(index);
+ Emit(FMOV_x_d1 | Rd(rd) | Rn(vn));
+}
+
+
+void Assembler::fmadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FMADD_s : FMADD_d);
+}
+
+
+void Assembler::fmsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FMSUB_s : FMSUB_d);
+}
+
+
+void Assembler::fnmadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FNMADD_s : FNMADD_d);
+}
+
+
+void Assembler::fnmsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FNMSUB_s : FNMSUB_d);
+}
+
+
+void Assembler::fnmul(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm));
+ Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d;
+ Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::FPCompareMacro(const VRegister& vn,
+ double value,
+ FPTrapFlags trap) {
+ USE(value);
+ // Although the fcmp{e} instructions can strictly only take an immediate
+ // value of +0.0, we don't need to check for -0.0 because the sign of 0.0
+ // doesn't affect the result of the comparison.
+ VIXL_ASSERT(value == 0.0);
+ VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ Instr op = (trap == EnableTrap) ? FCMPE_zero : FCMP_zero;
+ Emit(FPType(vn) | op | Rn(vn));
+}
+
+
+void Assembler::FPCompareMacro(const VRegister& vn,
+ const VRegister& vm,
+ FPTrapFlags trap) {
+ VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT(vn.IsSameSizeAndType(vm));
+ Instr op = (trap == EnableTrap) ? FCMPE : FCMP;
+ Emit(FPType(vn) | op | Rm(vm) | Rn(vn));
+}
+
+
+void Assembler::fcmp(const VRegister& vn,
+ const VRegister& vm) {
+ FPCompareMacro(vn, vm, DisableTrap);
+}
+
+
+void Assembler::fcmpe(const VRegister& vn,
+ const VRegister& vm) {
+ FPCompareMacro(vn, vm, EnableTrap);
+}
+
+
+void Assembler::fcmp(const VRegister& vn,
+ double value) {
+ FPCompareMacro(vn, value, DisableTrap);
+}
+
+
+void Assembler::fcmpe(const VRegister& vn,
+ double value) {
+ FPCompareMacro(vn, value, EnableTrap);
+}
+
+
+void Assembler::FPCCompareMacro(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond,
+ FPTrapFlags trap) {
+ VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT(vn.IsSameSizeAndType(vm));
+ Instr op = (trap == EnableTrap) ? FCCMPE : FCCMP;
+ Emit(FPType(vn) | op | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv));
+}
+
+void Assembler::fccmp(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond) {
+ FPCCompareMacro(vn, vm, nzcv, cond, DisableTrap);
+}
+
+
+void Assembler::fccmpe(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond) {
+ FPCCompareMacro(vn, vm, nzcv, cond, EnableTrap);
+}
+
+
+void Assembler::fcsel(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ Condition cond) {
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+ Emit(FPType(vd) | FCSEL | Rm(vm) | Cond(cond) | Rn(vn) | Rd(vd));
+}
+
+void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kJSCVT));
+ VIXL_ASSERT(rd.IsW() && vn.Is1D());
+ Emit(FJCVTZS | Rn(vn) | Rd(rd));
+}
+
+
+void Assembler::NEONFPConvertToInt(const Register& rd,
+ const VRegister& vn,
+ Instr op) {
+ Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd));
+}
+
+
+void Assembler::NEONFPConvertToInt(const VRegister& vd,
+ const VRegister& vn,
+ Instr op) {
+ if (vn.IsScalar()) {
+ VIXL_ASSERT((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D()));
+ op |= NEON_Q | NEONScalar;
+ }
+ Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvt(const VRegister& vd,
+ const VRegister& vn) {
+ FPDataProcessing1SourceOp op;
+ if (vd.Is1D()) {
+ VIXL_ASSERT(vn.Is1S() || vn.Is1H());
+ op = vn.Is1S() ? FCVT_ds : FCVT_dh;
+ } else if (vd.Is1S()) {
+ VIXL_ASSERT(vn.Is1D() || vn.Is1H());
+ op = vn.Is1D() ? FCVT_sd : FCVT_sh;
+ } else {
+ VIXL_ASSERT(vd.Is1H());
+ VIXL_ASSERT(vn.Is1D() || vn.Is1S());
+ op = vn.Is1D() ? FCVT_hd : FCVT_hs;
+ }
+ FPDataProcessing1Source(vd, vn, op);
+}
+
+
+void Assembler::fcvtl(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vd.Is4S() && vn.Is4H()) ||
+ (vd.Is2D() && vn.Is2S()));
+ Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
+ Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvtl2(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vd.Is4S() && vn.Is8H()) ||
+ (vd.Is2D() && vn.Is4S()));
+ Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
+ Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvtn(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vn.Is4S() && vd.Is4H()) ||
+ (vn.Is2D() && vd.Is2S()));
+ Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
+ Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvtn2(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vn.Is4S() && vd.Is8H()) ||
+ (vn.Is2D() && vd.Is4S()));
+ Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
+ Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcvtxn(const VRegister& vd,
+ const VRegister& vn) {
+ Instr format = 1 << NEONSize_offset;
+ if (vd.IsScalar()) {
+ VIXL_ASSERT(vd.Is1S() && vn.Is1D());
+ Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd));
+ } else {
+ VIXL_ASSERT(vd.Is2S() && vn.Is2D());
+ Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd));
+ }
+}
+
+
+void Assembler::fcvtxn2(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.Is4S() && vn.Is2D());
+ Instr format = 1 << NEONSize_offset;
+ Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_FP2REGMISC_FCVT_LIST(V) \
+ V(fcvtnu, NEON_FCVTNU, FCVTNU) \
+ V(fcvtns, NEON_FCVTNS, FCVTNS) \
+ V(fcvtpu, NEON_FCVTPU, FCVTPU) \
+ V(fcvtps, NEON_FCVTPS, FCVTPS) \
+ V(fcvtmu, NEON_FCVTMU, FCVTMU) \
+ V(fcvtms, NEON_FCVTMS, FCVTMS) \
+ V(fcvtau, NEON_FCVTAU, FCVTAU) \
+ V(fcvtas, NEON_FCVTAS, FCVTAS)
+
+#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \
+void Assembler::FN(const Register& rd, \
+ const VRegister& vn) { \
+ NEONFPConvertToInt(rd, vn, SCA_OP); \
+} \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn) { \
+ NEONFPConvertToInt(vd, vn, VEC_OP); \
+}
+NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
+#undef DEFINE_ASM_FUNCS
+
+
+void Assembler::fcvtzs(const Register& rd,
+ const VRegister& vn,
+ int fbits) {
+ VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT((fbits >= 0) && (fbits <= rd.SizeInBits()));
+ if (fbits == 0) {
+ Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd));
+ } else {
+ Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) |
+ Rd(rd));
+ }
+}
+
+
+void Assembler::fcvtzs(const VRegister& vd,
+ const VRegister& vn,
+ int fbits) {
+ VIXL_ASSERT(fbits >= 0);
+ if (fbits == 0) {
+ NEONFP2RegMisc(vd, vn, NEON_FCVTZS);
+ } else {
+ VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm);
+ }
+}
+
+
+void Assembler::fcvtzu(const Register& rd,
+ const VRegister& vn,
+ int fbits) {
+ VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT((fbits >= 0) && (fbits <= rd.SizeInBits()));
+ if (fbits == 0) {
+ Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd));
+ } else {
+ Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) |
+ Rd(rd));
+ }
+}
+
+
+void Assembler::fcvtzu(const VRegister& vd,
+ const VRegister& vn,
+ int fbits) {
+ VIXL_ASSERT(fbits >= 0);
+ if (fbits == 0) {
+ NEONFP2RegMisc(vd, vn, NEON_FCVTZU);
+ } else {
+ VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm);
+ }
+}
+
+void Assembler::ucvtf(const VRegister& vd,
+ const VRegister& vn,
+ int fbits) {
+ VIXL_ASSERT(fbits >= 0);
+ if (fbits == 0) {
+ NEONFP2RegMisc(vd, vn, NEON_UCVTF);
+ } else {
+ VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm);
+ }
+}
+
+void Assembler::scvtf(const VRegister& vd,
+ const VRegister& vn,
+ int fbits) {
+ VIXL_ASSERT(fbits >= 0);
+ if (fbits == 0) {
+ NEONFP2RegMisc(vd, vn, NEON_SCVTF);
+ } else {
+ VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm);
+ }
+}
+
+
+void Assembler::scvtf(const VRegister& vd,
+ const Register& rn,
+ int fbits) {
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ VIXL_ASSERT(fbits >= 0);
+ if (fbits == 0) {
+ Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd));
+ } else {
+ Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
+ Rd(vd));
+ }
+}
+
+
+void Assembler::ucvtf(const VRegister& vd,
+ const Register& rn,
+ int fbits) {
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ VIXL_ASSERT(fbits >= 0);
+ if (fbits == 0) {
+ Emit(SF(rn) | FPType(vd) | UCVTF | Rn(rn) | Rd(vd));
+ } else {
+ Emit(SF(rn) | FPType(vd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
+ Rd(vd));
+ }
+}
+
+
+void Assembler::NEON3Same(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEON3SameOp vop) {
+ VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+ VIXL_ASSERT(vd.IsVector() || !vd.IsQ());
+
+ Instr format, op = vop;
+ if (vd.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ format = SFormat(vd);
+ } else {
+ format = VFormat(vd);
+ }
+
+ Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONFP3Same(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ Instr op) {
+ VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+ Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_FP2REGMISC_LIST(V) \
+ V(fabs, NEON_FABS, FABS) \
+ V(fneg, NEON_FNEG, FNEG) \
+ V(fsqrt, NEON_FSQRT, FSQRT) \
+ V(frintn, NEON_FRINTN, FRINTN) \
+ V(frinta, NEON_FRINTA, FRINTA) \
+ V(frintp, NEON_FRINTP, FRINTP) \
+ V(frintm, NEON_FRINTM, FRINTM) \
+ V(frintx, NEON_FRINTX, FRINTX) \
+ V(frintz, NEON_FRINTZ, FRINTZ) \
+ V(frinti, NEON_FRINTI, FRINTI) \
+ V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \
+ V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar )
+
+
+#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn) { \
+ Instr op; \
+ if (vd.IsScalar()) { \
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \
+ op = SCA_OP; \
+ } else { \
+ VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
+ op = VEC_OP; \
+ } \
+ NEONFP2RegMisc(vd, vn, op); \
+}
+NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+void Assembler::NEONFP2RegMisc(const VRegister& vd,
+ const VRegister& vn,
+ Instr op) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEON2RegMisc(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscOp vop,
+ int value) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(value == 0);
+ USE(value);
+
+ Instr format, op = vop;
+ if (vd.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ format = SFormat(vd);
+ } else {
+ format = VFormat(vd);
+ }
+
+ Emit(format | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::cmeq(const VRegister& vd,
+ const VRegister& vn,
+ int value) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value);
+}
+
+
+void Assembler::cmge(const VRegister& vd,
+ const VRegister& vn,
+ int value) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMGE_zero, value);
+}
+
+
+void Assembler::cmgt(const VRegister& vd,
+ const VRegister& vn,
+ int value) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMGT_zero, value);
+}
+
+
+void Assembler::cmle(const VRegister& vd,
+ const VRegister& vn,
+ int value) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMLE_zero, value);
+}
+
+
+void Assembler::cmlt(const VRegister& vd,
+ const VRegister& vn,
+ int value) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_CMLT_zero, value);
+}
+
+
+void Assembler::shll(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT((vd.Is8H() && vn.Is8B() && shift == 8) ||
+ (vd.Is4S() && vn.Is4H() && shift == 16) ||
+ (vd.Is2D() && vn.Is2S() && shift == 32));
+ USE(shift);
+ Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::shll2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ USE(shift);
+ VIXL_ASSERT((vd.Is8H() && vn.Is16B() && shift == 8) ||
+ (vd.Is4S() && vn.Is8H() && shift == 16) ||
+ (vd.Is2D() && vn.Is4S() && shift == 32));
+ Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONFP2RegMisc(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscOp vop,
+ double value) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(value == 0.0);
+ USE(value);
+
+ Instr op = vop;
+ if (vd.IsScalar()) {
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ op |= NEON_Q | NEONScalar;
+ } else {
+ VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S());
+ }
+
+ Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fcmeq(const VRegister& vd,
+ const VRegister& vn,
+ double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
+}
+
+
+void Assembler::fcmge(const VRegister& vd,
+ const VRegister& vn,
+ double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
+}
+
+
+void Assembler::fcmgt(const VRegister& vd,
+ const VRegister& vn,
+ double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
+}
+
+
+void Assembler::fcmle(const VRegister& vd,
+ const VRegister& vn,
+ double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
+}
+
+
+void Assembler::fcmlt(const VRegister& vd,
+ const VRegister& vn,
+ double value) {
+ NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
+}
+
+
+void Assembler::frecpx(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_3SAME_LIST(V) \
+ V(add, NEON_ADD, vd.IsVector() || vd.Is1D()) \
+ V(addp, NEON_ADDP, vd.IsVector() || vd.Is1D()) \
+ V(sub, NEON_SUB, vd.IsVector() || vd.Is1D()) \
+ V(cmeq, NEON_CMEQ, vd.IsVector() || vd.Is1D()) \
+ V(cmge, NEON_CMGE, vd.IsVector() || vd.Is1D()) \
+ V(cmgt, NEON_CMGT, vd.IsVector() || vd.Is1D()) \
+ V(cmhi, NEON_CMHI, vd.IsVector() || vd.Is1D()) \
+ V(cmhs, NEON_CMHS, vd.IsVector() || vd.Is1D()) \
+ V(cmtst, NEON_CMTST, vd.IsVector() || vd.Is1D()) \
+ V(sshl, NEON_SSHL, vd.IsVector() || vd.Is1D()) \
+ V(ushl, NEON_USHL, vd.IsVector() || vd.Is1D()) \
+ V(srshl, NEON_SRSHL, vd.IsVector() || vd.Is1D()) \
+ V(urshl, NEON_URSHL, vd.IsVector() || vd.Is1D()) \
+ V(sqdmulh, NEON_SQDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
+ V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
+ V(shadd, NEON_SHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uhadd, NEON_UHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(srhadd, NEON_SRHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(urhadd, NEON_URHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(shsub, NEON_SHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uhsub, NEON_UHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(smax, NEON_SMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(smaxp, NEON_SMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(smin, NEON_SMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(sminp, NEON_SMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(umax, NEON_UMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(umaxp, NEON_UMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(umin, NEON_UMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uminp, NEON_UMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(saba, NEON_SABA, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(sabd, NEON_SABD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uaba, NEON_UABA, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(uabd, NEON_UABD, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(mla, NEON_MLA, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(mls, NEON_MLS, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(mul, NEON_MUL, vd.IsVector() && !vd.IsLaneSizeD()) \
+ V(and_, NEON_AND, vd.Is8B() || vd.Is16B()) \
+ V(orr, NEON_ORR, vd.Is8B() || vd.Is16B()) \
+ V(orn, NEON_ORN, vd.Is8B() || vd.Is16B()) \
+ V(eor, NEON_EOR, vd.Is8B() || vd.Is16B()) \
+ V(bic, NEON_BIC, vd.Is8B() || vd.Is16B()) \
+ V(bit, NEON_BIT, vd.Is8B() || vd.Is16B()) \
+ V(bif, NEON_BIF, vd.Is8B() || vd.Is16B()) \
+ V(bsl, NEON_BSL, vd.Is8B() || vd.Is16B()) \
+ V(pmul, NEON_PMUL, vd.Is8B() || vd.Is16B()) \
+ V(uqadd, NEON_UQADD, true) \
+ V(sqadd, NEON_SQADD, true) \
+ V(uqsub, NEON_UQSUB, true) \
+ V(sqsub, NEON_SQSUB, true) \
+ V(sqshl, NEON_SQSHL, true) \
+ V(uqshl, NEON_UQSHL, true) \
+ V(sqrshl, NEON_SQRSHL, true) \
+ V(uqrshl, NEON_UQRSHL, true)
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm) { \
+ VIXL_ASSERT(AS); \
+ NEON3Same(vd, vn, vm, OP); \
+}
+NEON_3SAME_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+#define NEON_FP3SAME_OP_LIST(V) \
+ V(fadd, NEON_FADD, FADD) \
+ V(fsub, NEON_FSUB, FSUB) \
+ V(fmul, NEON_FMUL, FMUL) \
+ V(fdiv, NEON_FDIV, FDIV) \
+ V(fmax, NEON_FMAX, FMAX) \
+ V(fmaxnm, NEON_FMAXNM, FMAXNM) \
+ V(fmin, NEON_FMIN, FMIN) \
+ V(fminnm, NEON_FMINNM, FMINNM) \
+ V(fmulx, NEON_FMULX, NEON_FMULX_scalar) \
+ V(frecps, NEON_FRECPS, NEON_FRECPS_scalar) \
+ V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar) \
+ V(fabd, NEON_FABD, NEON_FABD_scalar) \
+ V(fmla, NEON_FMLA, 0) \
+ V(fmls, NEON_FMLS, 0) \
+ V(facge, NEON_FACGE, NEON_FACGE_scalar) \
+ V(facgt, NEON_FACGT, NEON_FACGT_scalar) \
+ V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar) \
+ V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar) \
+ V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar) \
+ V(faddp, NEON_FADDP, 0) \
+ V(fmaxp, NEON_FMAXP, 0) \
+ V(fminp, NEON_FMINP, 0) \
+ V(fmaxnmp, NEON_FMAXNMP, 0) \
+ V(fminnmp, NEON_FMINNMP, 0)
+
+#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm) { \
+ Instr op; \
+ if ((SCA_OP != 0) && vd.IsScalar()) { \
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \
+ op = SCA_OP; \
+ } else { \
+ VIXL_ASSERT(vd.IsVector()); \
+ VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
+ op = VEC_OP; \
+ } \
+ NEONFP3Same(vd, vn, vm, op); \
+}
+NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+void Assembler::addp(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vd.Is1D() && vn.Is2D()));
+ Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::faddp(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+ (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fmaxp(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+ (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fminp(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+ (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fmaxnmp(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+ (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::fminnmp(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) ||
+ (vd.Is1D() && vn.Is2D()));
+ Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::orr(const VRegister& vd,
+ const int imm8,
+ const int left_shift) {
+ NEONModifiedImmShiftLsl(vd, imm8, left_shift,
+ NEONModifiedImmediate_ORR);
+}
+
+
+void Assembler::mov(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ if (vd.IsD()) {
+ orr(vd.V8B(), vn.V8B(), vn.V8B());
+ } else {
+ VIXL_ASSERT(vd.IsQ());
+ orr(vd.V16B(), vn.V16B(), vn.V16B());
+ }
+}
+
+
+void Assembler::bic(const VRegister& vd,
+ const int imm8,
+ const int left_shift) {
+ NEONModifiedImmShiftLsl(vd, imm8, left_shift,
+ NEONModifiedImmediate_BIC);
+}
+
+
+void Assembler::movi(const VRegister& vd,
+ const uint64_t imm,
+ Shift shift,
+ const int shift_amount) {
+ VIXL_ASSERT((shift == LSL) || (shift == MSL));
+ if (vd.Is2D() || vd.Is1D()) {
+ VIXL_ASSERT(shift_amount == 0);
+ int imm8 = 0;
+ for (int i = 0; i < 8; ++i) {
+ int byte = (imm >> (i * 8)) & 0xff;
+ VIXL_ASSERT((byte == 0) || (byte == 0xff));
+ if (byte == 0xff) {
+ imm8 |= (1 << i);
+ }
+ }
+ int q = vd.Is2D() ? NEON_Q : 0;
+ Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI |
+ ImmNEONabcdefgh(imm8) | NEONCmode(0xe) | Rd(vd));
+ } else if (shift == LSL) {
+ VIXL_ASSERT(IsUint8(imm));
+ NEONModifiedImmShiftLsl(vd, static_cast<int>(imm), shift_amount,
+ NEONModifiedImmediate_MOVI);
+ } else {
+ VIXL_ASSERT(IsUint8(imm));
+ NEONModifiedImmShiftMsl(vd, static_cast<int>(imm), shift_amount,
+ NEONModifiedImmediate_MOVI);
+ }
+}
+
+
+void Assembler::mvn(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ if (vd.IsD()) {
+ not_(vd.V8B(), vn.V8B());
+ } else {
+ VIXL_ASSERT(vd.IsQ());
+ not_(vd.V16B(), vn.V16B());
+ }
+}
+
+
+void Assembler::mvni(const VRegister& vd,
+ const int imm8,
+ Shift shift,
+ const int shift_amount) {
+ VIXL_ASSERT((shift == LSL) || (shift == MSL));
+ if (shift == LSL) {
+ NEONModifiedImmShiftLsl(vd, imm8, shift_amount,
+ NEONModifiedImmediate_MVNI);
+ } else {
+ NEONModifiedImmShiftMsl(vd, imm8, shift_amount,
+ NEONModifiedImmediate_MVNI);
+ }
+}
+
+
+void Assembler::NEONFPByElement(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index,
+ NEONByIndexedElementOp vop) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT((vd.Is2S() && vm.Is1S()) ||
+ (vd.Is4S() && vm.Is1S()) ||
+ (vd.Is1S() && vm.Is1S()) ||
+ (vd.Is2D() && vm.Is1D()) ||
+ (vd.Is1D() && vm.Is1D()));
+ VIXL_ASSERT((vm.Is1S() && (vm_index < 4)) ||
+ (vm.Is1D() && (vm_index < 2)));
+
+ Instr op = vop;
+ int index_num_bits = vm.Is1S() ? 2 : 1;
+ if (vd.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ }
+
+ Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) |
+ Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONByElement(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index,
+ NEONByIndexedElementOp vop) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT((vd.Is4H() && vm.Is1H()) ||
+ (vd.Is8H() && vm.Is1H()) ||
+ (vd.Is1H() && vm.Is1H()) ||
+ (vd.Is2S() && vm.Is1S()) ||
+ (vd.Is4S() && vm.Is1S()) ||
+ (vd.Is1S() && vm.Is1S()));
+ VIXL_ASSERT((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
+ (vm.Is1S() && (vm_index < 4)));
+
+ Instr format, op = vop;
+ int index_num_bits = vm.Is1H() ? 3 : 2;
+ if (vd.IsScalar()) {
+ op |= NEONScalar | NEON_Q;
+ format = SFormat(vn);
+ } else {
+ format = VFormat(vn);
+ }
+ Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) |
+ Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONByElementL(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index,
+ NEONByIndexedElementOp vop) {
+ VIXL_ASSERT((vd.Is4S() && vn.Is4H() && vm.Is1H()) ||
+ (vd.Is4S() && vn.Is8H() && vm.Is1H()) ||
+ (vd.Is1S() && vn.Is1H() && vm.Is1H()) ||
+ (vd.Is2D() && vn.Is2S() && vm.Is1S()) ||
+ (vd.Is2D() && vn.Is4S() && vm.Is1S()) ||
+ (vd.Is1D() && vn.Is1S() && vm.Is1S()));
+
+ VIXL_ASSERT((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
+ (vm.Is1S() && (vm_index < 4)));
+
+ Instr format, op = vop;
+ int index_num_bits = vm.Is1H() ? 3 : 2;
+ if (vd.IsScalar()) {
+ op |= NEONScalar | NEON_Q;
+ format = SFormat(vn);
+ } else {
+ format = VFormat(vn);
+ }
+ Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) |
+ Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+#define NEON_BYELEMENT_LIST(V) \
+ V(mul, NEON_MUL_byelement, vn.IsVector()) \
+ V(mla, NEON_MLA_byelement, vn.IsVector()) \
+ V(mls, NEON_MLS_byelement, vn.IsVector()) \
+ V(sqdmulh, NEON_SQDMULH_byelement, true) \
+ V(sqrdmulh, NEON_SQRDMULH_byelement, true)
+
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm, \
+ int vm_index) { \
+ VIXL_ASSERT(AS); \
+ NEONByElement(vd, vn, vm, vm_index, OP); \
+}
+NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+#define NEON_FPBYELEMENT_LIST(V) \
+ V(fmul, NEON_FMUL_byelement) \
+ V(fmla, NEON_FMLA_byelement) \
+ V(fmls, NEON_FMLS_byelement) \
+ V(fmulx, NEON_FMULX_byelement)
+
+
+#define DEFINE_ASM_FUNC(FN, OP) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm, \
+ int vm_index) { \
+ NEONFPByElement(vd, vn, vm, vm_index, OP); \
+}
+NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+#define NEON_BYELEMENT_LONG_LIST(V) \
+ V(sqdmull, NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD()) \
+ V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(sqdmlal, NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD()) \
+ V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(sqdmlsl, NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD()) \
+ V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(smull, NEON_SMULL_byelement, vn.IsVector() && vn.IsD()) \
+ V(smull2, NEON_SMULL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(umull, NEON_UMULL_byelement, vn.IsVector() && vn.IsD()) \
+ V(umull2, NEON_UMULL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(smlal, NEON_SMLAL_byelement, vn.IsVector() && vn.IsD()) \
+ V(smlal2, NEON_SMLAL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(umlal, NEON_UMLAL_byelement, vn.IsVector() && vn.IsD()) \
+ V(umlal2, NEON_UMLAL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(smlsl, NEON_SMLSL_byelement, vn.IsVector() && vn.IsD()) \
+ V(smlsl2, NEON_SMLSL_byelement, vn.IsVector() && vn.IsQ()) \
+ V(umlsl, NEON_UMLSL_byelement, vn.IsVector() && vn.IsD()) \
+ V(umlsl2, NEON_UMLSL_byelement, vn.IsVector() && vn.IsQ())
+
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm, \
+ int vm_index) { \
+ VIXL_ASSERT(AS); \
+ NEONByElementL(vd, vn, vm, vm_index, OP); \
+}
+NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+void Assembler::suqadd(const VRegister& vd,
+ const VRegister& vn) {
+ NEON2RegMisc(vd, vn, NEON_SUQADD);
+}
+
+
+void Assembler::usqadd(const VRegister& vd,
+ const VRegister& vn) {
+ NEON2RegMisc(vd, vn, NEON_USQADD);
+}
+
+
+void Assembler::abs(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_ABS);
+}
+
+
+void Assembler::sqabs(const VRegister& vd,
+ const VRegister& vn) {
+ NEON2RegMisc(vd, vn, NEON_SQABS);
+}
+
+
+void Assembler::neg(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEON2RegMisc(vd, vn, NEON_NEG);
+}
+
+
+void Assembler::sqneg(const VRegister& vd,
+ const VRegister& vn) {
+ NEON2RegMisc(vd, vn, NEON_SQNEG);
+}
+
+
+void Assembler::NEONXtn(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscOp vop) {
+ Instr format, op = vop;
+ if (vd.IsScalar()) {
+ VIXL_ASSERT((vd.Is1B() && vn.Is1H()) ||
+ (vd.Is1H() && vn.Is1S()) ||
+ (vd.Is1S() && vn.Is1D()));
+ op |= NEON_Q | NEONScalar;
+ format = SFormat(vd);
+ } else {
+ VIXL_ASSERT((vd.Is8B() && vn.Is8H()) ||
+ (vd.Is4H() && vn.Is4S()) ||
+ (vd.Is2S() && vn.Is2D()) ||
+ (vd.Is16B() && vn.Is8H()) ||
+ (vd.Is8H() && vn.Is4S()) ||
+ (vd.Is4S() && vn.Is2D()));
+ format = VFormat(vd);
+ }
+ Emit(format | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::xtn(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsVector() && vd.IsD());
+ NEONXtn(vd, vn, NEON_XTN);
+}
+
+
+void Assembler::xtn2(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsVector() && vd.IsQ());
+ NEONXtn(vd, vn, NEON_XTN);
+}
+
+
+void Assembler::sqxtn(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsScalar() || vd.IsD());
+ NEONXtn(vd, vn, NEON_SQXTN);
+}
+
+
+void Assembler::sqxtn2(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsVector() && vd.IsQ());
+ NEONXtn(vd, vn, NEON_SQXTN);
+}
+
+
+void Assembler::sqxtun(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsScalar() || vd.IsD());
+ NEONXtn(vd, vn, NEON_SQXTUN);
+}
+
+
+void Assembler::sqxtun2(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsVector() && vd.IsQ());
+ NEONXtn(vd, vn, NEON_SQXTUN);
+}
+
+
+void Assembler::uqxtn(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsScalar() || vd.IsD());
+ NEONXtn(vd, vn, NEON_UQXTN);
+}
+
+
+void Assembler::uqxtn2(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(vd.IsVector() && vd.IsQ());
+ NEONXtn(vd, vn, NEON_UQXTN);
+}
+
+
+// NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size".
+void Assembler::not_(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+ Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::rbit(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+ Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::ext(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int index) {
+ VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+ VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+ VIXL_ASSERT((0 <= index) && (index < vd.lanes()));
+ Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::dup(const VRegister& vd,
+ const VRegister& vn,
+ int vn_index) {
+ Instr q, scalar;
+
+ // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ int lane_size = vn.LaneSizeInBytes();
+ NEONFormatField format;
+ switch (lane_size) {
+ case 1: format = NEON_16B; break;
+ case 2: format = NEON_8H; break;
+ case 4: format = NEON_4S; break;
+ default:
+ VIXL_ASSERT(lane_size == 8);
+ format = NEON_2D;
+ break;
+ }
+
+ if (vd.IsScalar()) {
+ q = NEON_Q;
+ scalar = NEONScalar;
+ } else {
+ VIXL_ASSERT(!vd.Is1D());
+ q = vd.IsD() ? 0 : NEON_Q;
+ scalar = 0;
+ }
+ Emit(q | scalar | NEON_DUP_ELEMENT |
+ ImmNEON5(format, vn_index) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::mov(const VRegister& vd,
+ const VRegister& vn,
+ int vn_index) {
+ VIXL_ASSERT(vn.IsScalar());
+ dup(vd, vn, vn_index);
+}
+
+
+void Assembler::dup(const VRegister& vd, const Register& rn) {
+ VIXL_ASSERT(!vd.Is1D());
+ VIXL_ASSERT(vd.Is2D() == rn.IsX());
+ int q = vd.IsD() ? 0 : NEON_Q;
+ Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd));
+}
+
+
+void Assembler::ins(const VRegister& vd,
+ int vd_index,
+ const VRegister& vn,
+ int vn_index) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ int lane_size = vd.LaneSizeInBytes();
+ NEONFormatField format;
+ switch (lane_size) {
+ case 1: format = NEON_16B; break;
+ case 2: format = NEON_8H; break;
+ case 4: format = NEON_4S; break;
+ default:
+ VIXL_ASSERT(lane_size == 8);
+ format = NEON_2D;
+ break;
+ }
+
+ VIXL_ASSERT((0 <= vd_index) &&
+ (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ VIXL_ASSERT((0 <= vn_index) &&
+ (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) |
+ ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::mov(const VRegister& vd,
+ int vd_index,
+ const VRegister& vn,
+ int vn_index) {
+ ins(vd, vd_index, vn, vn_index);
+}
+
+
+void Assembler::ins(const VRegister& vd,
+ int vd_index,
+ const Register& rn) {
+ // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ int lane_size = vd.LaneSizeInBytes();
+ NEONFormatField format;
+ switch (lane_size) {
+ case 1: format = NEON_16B; VIXL_ASSERT(rn.IsW()); break;
+ case 2: format = NEON_8H; VIXL_ASSERT(rn.IsW()); break;
+ case 4: format = NEON_4S; VIXL_ASSERT(rn.IsW()); break;
+ default:
+ VIXL_ASSERT(lane_size == 8);
+ VIXL_ASSERT(rn.IsX());
+ format = NEON_2D;
+ break;
+ }
+
+ VIXL_ASSERT((0 <= vd_index) &&
+ (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd));
+}
+
+
+void Assembler::mov(const VRegister& vd,
+ int vd_index,
+ const Register& rn) {
+ ins(vd, vd_index, rn);
+}
+
+
+void Assembler::umov(const Register& rd,
+ const VRegister& vn,
+ int vn_index) {
+ // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+ // number of lanes, and T is b, h, s or d.
+ int lane_size = vn.LaneSizeInBytes();
+ NEONFormatField format;
+ Instr q = 0;
+ switch (lane_size) {
+ case 1: format = NEON_16B; VIXL_ASSERT(rd.IsW()); break;
+ case 2: format = NEON_8H; VIXL_ASSERT(rd.IsW()); break;
+ case 4: format = NEON_4S; VIXL_ASSERT(rd.IsW()); break;
+ default:
+ VIXL_ASSERT(lane_size == 8);
+ VIXL_ASSERT(rd.IsX());
+ format = NEON_2D;
+ q = NEON_Q;
+ break;
+ }
+
+ VIXL_ASSERT((0 <= vn_index) &&
+ (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
+}
+
+
+void Assembler::mov(const Register& rd,
+ const VRegister& vn,
+ int vn_index) {
+ VIXL_ASSERT(vn.SizeInBytes() >= 4);
+ umov(rd, vn, vn_index);
+}
+
+
+void Assembler::smov(const Register& rd,
+ const VRegister& vn,
+ int vn_index) {
+ // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
+ // number of lanes, and T is b, h, s.
+ int lane_size = vn.LaneSizeInBytes();
+ NEONFormatField format;
+ Instr q = 0;
+ VIXL_ASSERT(lane_size != 8);
+ switch (lane_size) {
+ case 1: format = NEON_16B; break;
+ case 2: format = NEON_8H; break;
+ default:
+ VIXL_ASSERT(lane_size == 4);
+ VIXL_ASSERT(rd.IsX());
+ format = NEON_4S;
+ break;
+ }
+ q = rd.IsW() ? 0 : NEON_Q;
+ VIXL_ASSERT((0 <= vn_index) &&
+ (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
+ Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
+}
+
+
+void Assembler::cls(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(!vd.Is1D() && !vd.Is2D());
+ Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::clz(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(!vd.Is1D() && !vd.Is2D());
+ Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::cnt(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+ Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::rev16(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vd.Is8B() || vd.Is16B());
+ Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::rev32(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H());
+ Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::rev64(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(!vd.Is1D() && !vd.Is2D());
+ Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::ursqrte(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vd.Is2S() || vd.Is4S());
+ Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::urecpe(const VRegister& vd,
+ const VRegister& vn) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vd.Is2S() || vd.Is4S());
+ Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONAddlp(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscOp op) {
+ VIXL_ASSERT((op == NEON_SADDLP) ||
+ (op == NEON_UADDLP) ||
+ (op == NEON_SADALP) ||
+ (op == NEON_UADALP));
+
+ VIXL_ASSERT((vn.Is8B() && vd.Is4H()) ||
+ (vn.Is4H() && vd.Is2S()) ||
+ (vn.Is2S() && vd.Is1D()) ||
+ (vn.Is16B() && vd.Is8H())||
+ (vn.Is8H() && vd.Is4S()) ||
+ (vn.Is4S() && vd.Is2D()));
+ Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::saddlp(const VRegister& vd,
+ const VRegister& vn) {
+ NEONAddlp(vd, vn, NEON_SADDLP);
+}
+
+
+void Assembler::uaddlp(const VRegister& vd,
+ const VRegister& vn) {
+ NEONAddlp(vd, vn, NEON_UADDLP);
+}
+
+
+void Assembler::sadalp(const VRegister& vd,
+ const VRegister& vn) {
+ NEONAddlp(vd, vn, NEON_SADALP);
+}
+
+
+void Assembler::uadalp(const VRegister& vd,
+ const VRegister& vn) {
+ NEONAddlp(vd, vn, NEON_UADALP);
+}
+
+
+void Assembler::NEONAcrossLanesL(const VRegister& vd,
+ const VRegister& vn,
+ NEONAcrossLanesOp op) {
+ VIXL_ASSERT((vn.Is8B() && vd.Is1H()) ||
+ (vn.Is16B() && vd.Is1H()) ||
+ (vn.Is4H() && vd.Is1S()) ||
+ (vn.Is8H() && vd.Is1S()) ||
+ (vn.Is4S() && vd.Is1D()));
+ Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::saddlv(const VRegister& vd,
+ const VRegister& vn) {
+ NEONAcrossLanesL(vd, vn, NEON_SADDLV);
+}
+
+
+void Assembler::uaddlv(const VRegister& vd,
+ const VRegister& vn) {
+ NEONAcrossLanesL(vd, vn, NEON_UADDLV);
+}
+
+
+void Assembler::NEONAcrossLanes(const VRegister& vd,
+ const VRegister& vn,
+ NEONAcrossLanesOp op) {
+ VIXL_ASSERT((vn.Is8B() && vd.Is1B()) ||
+ (vn.Is16B() && vd.Is1B()) ||
+ (vn.Is4H() && vd.Is1H()) ||
+ (vn.Is8H() && vd.Is1H()) ||
+ (vn.Is4S() && vd.Is1S()));
+ if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
+ } else {
+ Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
+ }
+}
+
+
+#define NEON_ACROSSLANES_LIST(V) \
+ V(fmaxv, NEON_FMAXV, vd.Is1S()) \
+ V(fminv, NEON_FMINV, vd.Is1S()) \
+ V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \
+ V(fminnmv, NEON_FMINNMV, vd.Is1S()) \
+ V(addv, NEON_ADDV, true) \
+ V(smaxv, NEON_SMAXV, true) \
+ V(sminv, NEON_SMINV, true) \
+ V(umaxv, NEON_UMAXV, true) \
+ V(uminv, NEON_UMINV, true)
+
+
+#define DEFINE_ASM_FUNC(FN, OP, AS) \
+void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn) { \
+ VIXL_ASSERT(AS); \
+ NEONAcrossLanes(vd, vn, OP); \
+}
+NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
+#undef DEFINE_ASM_FUNC
+
+
+void Assembler::NEONPerm(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEONPermOp op) {
+ VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+ VIXL_ASSERT(!vd.Is1D());
+ Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::trn1(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_TRN1);
+}
+
+
+void Assembler::trn2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_TRN2);
+}
+
+
+void Assembler::uzp1(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_UZP1);
+}
+
+
+void Assembler::uzp2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_UZP2);
+}
+
+
+void Assembler::zip1(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_ZIP1);
+}
+
+
+void Assembler::zip2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ NEONPerm(vd, vn, vm, NEON_ZIP2);
+}
+
+
+void Assembler::NEONShiftImmediate(const VRegister& vd,
+ const VRegister& vn,
+ NEONShiftImmediateOp op,
+ int immh_immb) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ Instr q, scalar;
+ if (vn.IsScalar()) {
+ q = NEON_Q;
+ scalar = NEONScalar;
+ } else {
+ q = vd.IsD() ? 0 : NEON_Q;
+ scalar = 0;
+ }
+ Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONShiftLeftImmediate(const VRegister& vd,
+ const VRegister& vn,
+ int shift,
+ NEONShiftImmediateOp op) {
+ int laneSizeInBits = vn.LaneSizeInBits();
+ VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits));
+ NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
+}
+
+
+void Assembler::NEONShiftRightImmediate(const VRegister& vd,
+ const VRegister& vn,
+ int shift,
+ NEONShiftImmediateOp op) {
+ int laneSizeInBits = vn.LaneSizeInBits();
+ VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits));
+ NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
+}
+
+
+void Assembler::NEONShiftImmediateL(const VRegister& vd,
+ const VRegister& vn,
+ int shift,
+ NEONShiftImmediateOp op) {
+ int laneSizeInBits = vn.LaneSizeInBits();
+ VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits));
+ int immh_immb = (laneSizeInBits + shift) << 16;
+
+ VIXL_ASSERT((vn.Is8B() && vd.Is8H()) ||
+ (vn.Is4H() && vd.Is4S()) ||
+ (vn.Is2S() && vd.Is2D()) ||
+ (vn.Is16B() && vd.Is8H())||
+ (vn.Is8H() && vd.Is4S()) ||
+ (vn.Is4S() && vd.Is2D()));
+ Instr q;
+ q = vn.IsD() ? 0 : NEON_Q;
+ Emit(q | op | immh_immb | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::NEONShiftImmediateN(const VRegister& vd,
+ const VRegister& vn,
+ int shift,
+ NEONShiftImmediateOp op) {
+ Instr q, scalar;
+ int laneSizeInBits = vd.LaneSizeInBits();
+ VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits));
+ int immh_immb = (2 * laneSizeInBits - shift) << 16;
+
+ if (vn.IsScalar()) {
+ VIXL_ASSERT((vd.Is1B() && vn.Is1H()) ||
+ (vd.Is1H() && vn.Is1S()) ||
+ (vd.Is1S() && vn.Is1D()));
+ q = NEON_Q;
+ scalar = NEONScalar;
+ } else {
+ VIXL_ASSERT((vd.Is8B() && vn.Is8H()) ||
+ (vd.Is4H() && vn.Is4S()) ||
+ (vd.Is2S() && vn.Is2D()) ||
+ (vd.Is16B() && vn.Is8H())||
+ (vd.Is8H() && vn.Is4S()) ||
+ (vd.Is4S() && vn.Is2D()));
+ scalar = 0;
+ q = vd.IsD() ? 0 : NEON_Q;
+ }
+ Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::shl(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL);
+}
+
+
+void Assembler::sli(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI);
+}
+
+
+void Assembler::sqshl(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm);
+}
+
+
+void Assembler::sqshlu(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU);
+}
+
+
+void Assembler::uqshl(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm);
+}
+
+
+void Assembler::sshll(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsD());
+ NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
+}
+
+
+void Assembler::sshll2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsQ());
+ NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
+}
+
+
+void Assembler::sxtl(const VRegister& vd,
+ const VRegister& vn) {
+ sshll(vd, vn, 0);
+}
+
+
+void Assembler::sxtl2(const VRegister& vd,
+ const VRegister& vn) {
+ sshll2(vd, vn, 0);
+}
+
+
+void Assembler::ushll(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsD());
+ NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
+}
+
+
+void Assembler::ushll2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsQ());
+ NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
+}
+
+
+void Assembler::uxtl(const VRegister& vd,
+ const VRegister& vn) {
+ ushll(vd, vn, 0);
+}
+
+
+void Assembler::uxtl2(const VRegister& vd,
+ const VRegister& vn) {
+ ushll2(vd, vn, 0);
+}
+
+
+void Assembler::sri(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SRI);
+}
+
+
+void Assembler::sshr(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR);
+}
+
+
+void Assembler::ushr(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_USHR);
+}
+
+
+void Assembler::srshr(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR);
+}
+
+
+void Assembler::urshr(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR);
+}
+
+
+void Assembler::ssra(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA);
+}
+
+
+void Assembler::usra(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_USRA);
+}
+
+
+void Assembler::srsra(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA);
+}
+
+
+void Assembler::ursra(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsVector() || vd.Is1D());
+ NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA);
+}
+
+
+void Assembler::shrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsD());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
+}
+
+
+void Assembler::shrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
+}
+
+
+void Assembler::rshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsD());
+ NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
+}
+
+
+void Assembler::rshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
+}
+
+
+void Assembler::sqshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
+}
+
+
+void Assembler::sqshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
+}
+
+
+void Assembler::sqrshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
+}
+
+
+void Assembler::sqrshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
+}
+
+
+void Assembler::sqshrun(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
+}
+
+
+void Assembler::sqshrun2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
+}
+
+
+void Assembler::sqrshrun(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
+}
+
+
+void Assembler::sqrshrun2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
+}
+
+
+void Assembler::uqshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
+}
+
+
+void Assembler::uqshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
+}
+
+
+void Assembler::uqrshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
+ NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
+}
+
+
+void Assembler::uqrshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift) {
+ VIXL_ASSERT(vn.IsVector() && vd.IsQ());
+ NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
+}
+
+
+// Note:
+// Below, a difference in case for the same letter indicates a
+// negated bit.
+// If b is 1, then B is 0.
+uint32_t Assembler::FP32ToImm8(float imm) {
+ VIXL_ASSERT(IsImmFP32(imm));
+ // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
+ uint32_t bits = FloatToRawbits(imm);
+ // bit7: a000.0000
+ uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
+ // bit6: 0b00.0000
+ uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
+ // bit5_to_0: 00cd.efgh
+ uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
+
+ return bit7 | bit6 | bit5_to_0;
+}
+
+
+Instr Assembler::ImmFP32(float imm) {
+ return FP32ToImm8(imm) << ImmFP_offset;
+}
+
+
+uint32_t Assembler::FP64ToImm8(double imm) {
+ VIXL_ASSERT(IsImmFP64(imm));
+ // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+ // 0000.0000.0000.0000.0000.0000.0000.0000
+ uint64_t bits = DoubleToRawbits(imm);
+ // bit7: a000.0000
+ uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
+ // bit6: 0b00.0000
+ uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
+ // bit5_to_0: 00cd.efgh
+ uint64_t bit5_to_0 = (bits >> 48) & 0x3f;
+
+ return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
+}
+
+
+Instr Assembler::ImmFP64(double imm) {
+ return FP64ToImm8(imm) << ImmFP_offset;
+}
+
+
+// Code generation helpers.
+void Assembler::MoveWide(const Register& rd,
+ uint64_t imm,
+ int shift,
+ MoveWideImmediateOp mov_op) {
+ // Ignore the top 32 bits of an immediate if we're moving to a W register.
+ if (rd.Is32Bits()) {
+ // Check that the top 32 bits are zero (a positive 32-bit number) or top
+ // 33 bits are one (a negative 32-bit number, sign extended to 64 bits).
+ VIXL_ASSERT(((imm >> kWRegSize) == 0) ||
+ ((imm >> (kWRegSize - 1)) == 0x1ffffffff));
+ imm &= kWRegMask;
+ }
+
+ if (shift >= 0) {
+ // Explicit shift specified.
+ VIXL_ASSERT((shift == 0) || (shift == 16) ||
+ (shift == 32) || (shift == 48));
+ VIXL_ASSERT(rd.Is64Bits() || (shift == 0) || (shift == 16));
+ shift /= 16;
+ } else {
+ // Calculate a new immediate and shift combination to encode the immediate
+ // argument.
+ shift = 0;
+ if ((imm & 0xffffffffffff0000) == 0) {
+ // Nothing to do.
+ } else if ((imm & 0xffffffff0000ffff) == 0) {
+ imm >>= 16;
+ shift = 1;
+ } else if ((imm & 0xffff0000ffffffff) == 0) {
+ VIXL_ASSERT(rd.Is64Bits());
+ imm >>= 32;
+ shift = 2;
+ } else if ((imm & 0x0000ffffffffffff) == 0) {
+ VIXL_ASSERT(rd.Is64Bits());
+ imm >>= 48;
+ shift = 3;
+ }
+ }
+
+ VIXL_ASSERT(IsUint16(imm));
+
+ Emit(SF(rd) | MoveWideImmediateFixed | mov_op |
+ Rd(rd) | ImmMoveWide(imm) | ShiftMoveWide(shift));
+}
+
+
+void Assembler::AddSub(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ AddSubOp op) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ if (operand.IsImmediate()) {
+ int64_t immediate = operand.immediate();
+ VIXL_ASSERT(IsImmAddSub(immediate));
+ Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
+ Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) |
+ ImmAddSub(static_cast<int>(immediate)) | dest_reg | RnSP(rn));
+ } else if (operand.IsShiftedRegister()) {
+ VIXL_ASSERT(operand.reg().size() == rd.size());
+ VIXL_ASSERT(operand.shift() != ROR);
+
+ // For instructions of the form:
+ // add/sub wsp, <Wn>, <Wm> [, LSL #0-3 ]
+ // add/sub <Wd>, wsp, <Wm> [, LSL #0-3 ]
+ // add/sub wsp, wsp, <Wm> [, LSL #0-3 ]
+ // adds/subs <Wd>, wsp, <Wm> [, LSL #0-3 ]
+ // or their 64-bit register equivalents, convert the operand from shifted to
+ // extended register mode, and emit an add/sub extended instruction.
+ if (rn.IsSP() || rd.IsSP()) {
+ VIXL_ASSERT(!(rd.IsSP() && (S == SetFlags)));
+ DataProcExtendedRegister(rd, rn, operand.ToExtendedRegister(), S,
+ AddSubExtendedFixed | op);
+ } else {
+ DataProcShiftedRegister(rd, rn, operand, S, AddSubShiftedFixed | op);
+ }
+ } else {
+ VIXL_ASSERT(operand.IsExtendedRegister());
+ DataProcExtendedRegister(rd, rn, operand, S, AddSubExtendedFixed | op);
+ }
+}
+
+
+void Assembler::AddSubWithCarry(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ AddSubWithCarryOp op) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ VIXL_ASSERT(rd.size() == operand.reg().size());
+ VIXL_ASSERT(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
+ Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::hlt(int code) {
+ VIXL_ASSERT(IsUint16(code));
+ Emit(HLT | ImmException(code));
+}
+
+
+void Assembler::brk(int code) {
+ VIXL_ASSERT(IsUint16(code));
+ Emit(BRK | ImmException(code));
+}
+
+
+void Assembler::svc(int code) {
+ Emit(SVC | ImmException(code));
+}
+
+
+void Assembler::ConditionalCompare(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond,
+ ConditionalCompareOp op) {
+ Instr ccmpop;
+ if (operand.IsImmediate()) {
+ int64_t immediate = operand.immediate();
+ VIXL_ASSERT(IsImmConditionalCompare(immediate));
+ ccmpop = ConditionalCompareImmediateFixed | op |
+ ImmCondCmp(static_cast<unsigned>(immediate));
+ } else {
+ VIXL_ASSERT(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
+ ccmpop = ConditionalCompareRegisterFixed | op | Rm(operand.reg());
+ }
+ Emit(SF(rn) | ccmpop | Cond(cond) | Rn(rn) | Nzcv(nzcv));
+}
+
+
+void Assembler::DataProcessing1Source(const Register& rd,
+ const Register& rn,
+ DataProcessing1SourceOp op) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ Emit(SF(rn) | op | Rn(rn) | Rd(rd));
+}
+
+
+void Assembler::FPDataProcessing1Source(const VRegister& vd,
+ const VRegister& vn,
+ FPDataProcessing1SourceOp op) {
+ VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D());
+ Emit(FPType(vn) | op | Rn(vn) | Rd(vd));
+}
+
+
+void Assembler::FPDataProcessing3Source(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va,
+ FPDataProcessing3SourceOp op) {
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm, va));
+ Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd) | Ra(va));
+}
+
+
+void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd,
+ const int imm8,
+ const int left_shift,
+ NEONModifiedImmediateOp op) {
+ VIXL_ASSERT(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() ||
+ vd.Is2S() || vd.Is4S());
+ VIXL_ASSERT((left_shift == 0) || (left_shift == 8) ||
+ (left_shift == 16) || (left_shift == 24));
+ VIXL_ASSERT(IsUint8(imm8));
+
+ int cmode_1, cmode_2, cmode_3;
+ if (vd.Is8B() || vd.Is16B()) {
+ VIXL_ASSERT(op == NEONModifiedImmediate_MOVI);
+ cmode_1 = 1;
+ cmode_2 = 1;
+ cmode_3 = 1;
+ } else {
+ cmode_1 = (left_shift >> 3) & 1;
+ cmode_2 = left_shift >> 4;
+ cmode_3 = 0;
+ if (vd.Is4H() || vd.Is8H()) {
+ VIXL_ASSERT((left_shift == 0) || (left_shift == 8));
+ cmode_3 = 1;
+ }
+ }
+ int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1);
+
+ int q = vd.IsQ() ? NEON_Q : 0;
+
+ Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
+}
+
+
+void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd,
+ const int imm8,
+ const int shift_amount,
+ NEONModifiedImmediateOp op) {
+ VIXL_ASSERT(vd.Is2S() || vd.Is4S());
+ VIXL_ASSERT((shift_amount == 8) || (shift_amount == 16));
+ VIXL_ASSERT(IsUint8(imm8));
+
+ int cmode_0 = (shift_amount >> 4) & 1;
+ int cmode = 0xc | cmode_0;
+
+ int q = vd.IsQ() ? NEON_Q : 0;
+
+ Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
+}
+
+
+void Assembler::EmitShift(const Register& rd,
+ const Register& rn,
+ Shift shift,
+ unsigned shift_amount) {
+ switch (shift) {
+ case LSL:
+ lsl(rd, rn, shift_amount);
+ break;
+ case LSR:
+ lsr(rd, rn, shift_amount);
+ break;
+ case ASR:
+ asr(rd, rn, shift_amount);
+ break;
+ case ROR:
+ ror(rd, rn, shift_amount);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+
+void Assembler::EmitExtendShift(const Register& rd,
+ const Register& rn,
+ Extend extend,
+ unsigned left_shift) {
+ VIXL_ASSERT(rd.size() >= rn.size());
+ unsigned reg_size = rd.size();
+ // Use the correct size of register.
+ Register rn_ = Register(rn.code(), rd.size());
+ // Bits extracted are high_bit:0.
+ unsigned high_bit = (8 << (extend & 0x3)) - 1;
+ // Number of bits left in the result that are not introduced by the shift.
+ unsigned non_shift_bits = (reg_size - left_shift) & (reg_size - 1);
+
+ if ((non_shift_bits > high_bit) || (non_shift_bits == 0)) {
+ switch (extend) {
+ case UXTB:
+ case UXTH:
+ case UXTW: ubfm(rd, rn_, non_shift_bits, high_bit); break;
+ case SXTB:
+ case SXTH:
+ case SXTW: sbfm(rd, rn_, non_shift_bits, high_bit); break;
+ case UXTX:
+ case SXTX: {
+ VIXL_ASSERT(rn.size() == kXRegSize);
+ // Nothing to extend. Just shift.
+ lsl(rd, rn_, left_shift);
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ } else {
+ // No need to extend as the extended bits would be shifted away.
+ lsl(rd, rn_, left_shift);
+ }
+}
+
+
+void Assembler::DataProcExtendedRegister(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ Instr op) {
+ Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
+ Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) |
+ ExtendMode(operand.extend()) | ImmExtendShift(operand.shift_amount()) |
+ dest_reg | RnSP(rn));
+}
+
+
+Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
+ unsigned access_size,
+ LoadStoreScalingOption option) {
+ Instr base = RnSP(addr.base());
+ int64_t offset = addr.offset();
+
+ if (addr.IsImmediateOffset()) {
+ bool prefer_unscaled = (option == PreferUnscaledOffset) ||
+ (option == RequireUnscaledOffset);
+ if (prefer_unscaled && IsImmLSUnscaled(offset)) {
+ // Use the unscaled addressing mode.
+ return base | LoadStoreUnscaledOffsetFixed |
+ ImmLS(static_cast<int>(offset));
+ }
+
+ if ((option != RequireUnscaledOffset) &&
+ IsImmLSScaled(offset, access_size)) {
+ // Use the scaled addressing mode.
+ return base | LoadStoreUnsignedOffsetFixed |
+ ImmLSUnsigned(static_cast<int>(offset) >> access_size);
+ }
+
+ if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) {
+ // Use the unscaled addressing mode.
+ return base | LoadStoreUnscaledOffsetFixed |
+ ImmLS(static_cast<int>(offset));
+ }
+ }
+
+ // All remaining addressing modes are register-offset, pre-indexed or
+ // post-indexed modes.
+ VIXL_ASSERT((option != RequireUnscaledOffset) &&
+ (option != RequireScaledOffset));
+
+ if (addr.IsRegisterOffset()) {
+ Extend ext = addr.extend();
+ Shift shift = addr.shift();
+ unsigned shift_amount = addr.shift_amount();
+
+ // LSL is encoded in the option field as UXTX.
+ if (shift == LSL) {
+ ext = UXTX;
+ }
+
+ // Shifts are encoded in one bit, indicating a left shift by the memory
+ // access size.
+ VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size));
+ return base | LoadStoreRegisterOffsetFixed | Rm(addr.regoffset()) |
+ ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0);
+ }
+
+ if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) {
+ return base | LoadStorePreIndexFixed | ImmLS(static_cast<int>(offset));
+ }
+
+ if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) {
+ return base | LoadStorePostIndexFixed | ImmLS(static_cast<int>(offset));
+ }
+
+ // If this point is reached, the MemOperand (addr) cannot be encoded.
+ VIXL_UNREACHABLE();
+ return 0;
+}
+
+
+void Assembler::LoadStore(const CPURegister& rt,
+ const MemOperand& addr,
+ LoadStoreOp op,
+ LoadStoreScalingOption option) {
+ Emit(op | Rt(rt) | LoadStoreMemOperand(addr, CalcLSDataSize(op), option));
+}
+
+
+void Assembler::Prefetch(PrefetchOperation op,
+ const MemOperand& addr,
+ LoadStoreScalingOption option) {
+ VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset());
+
+ Instr prfop = ImmPrefetchOperation(op);
+ Emit(PRFM | prfop | LoadStoreMemOperand(addr, kXRegSizeInBytesLog2, option));
+}
+
+
+bool Assembler::IsImmAddSub(int64_t immediate) {
+ return IsUint12(immediate) ||
+ (IsUint12(immediate >> 12) && ((immediate & 0xfff) == 0));
+}
+
+
+bool Assembler::IsImmConditionalCompare(int64_t immediate) {
+ return IsUint5(immediate);
+}
+
+
+bool Assembler::IsImmFP32(float imm) {
+ // Valid values will have the form:
+ // aBbb.bbbc.defg.h000.0000.0000.0000.0000
+ uint32_t bits = FloatToRawbits(imm);
+ // bits[19..0] are cleared.
+ if ((bits & 0x7ffff) != 0) {
+ return false;
+ }
+
+ // bits[29..25] are all set or all cleared.
+ uint32_t b_pattern = (bits >> 16) & 0x3e00;
+ if (b_pattern != 0 && b_pattern != 0x3e00) {
+ return false;
+ }
+
+ // bit[30] and bit[29] are opposite.
+ if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
+ return false;
+ }
+
+ return true;
+}
+
+
+bool Assembler::IsImmFP64(double imm) {
+ // Valid values will have the form:
+ // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+ // 0000.0000.0000.0000.0000.0000.0000.0000
+ uint64_t bits = DoubleToRawbits(imm);
+ // bits[47..0] are cleared.
+ if ((bits & 0x0000ffffffffffff) != 0) {
+ return false;
+ }
+
+ // bits[61..54] are all set or all cleared.
+ uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+ if ((b_pattern != 0) && (b_pattern != 0x3fc0)) {
+ return false;
+ }
+
+ // bit[62] and bit[61] are opposite.
+ if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) {
+ return false;
+ }
+
+ return true;
+}
+
+
+bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) {
+ VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
+ bool offset_is_size_multiple =
+ (((offset >> access_size) << access_size) == offset);
+ return offset_is_size_multiple && IsInt7(offset >> access_size);
+}
+
+
+bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) {
+ VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
+ bool offset_is_size_multiple =
+ (((offset >> access_size) << access_size) == offset);
+ return offset_is_size_multiple && IsUint12(offset >> access_size);
+}
+
+
+bool Assembler::IsImmLSUnscaled(int64_t offset) {
+ return IsInt9(offset);
+}
+
+
+// The movn instruction can generate immediates containing an arbitrary 16-bit
+// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff.
+bool Assembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
+ return IsImmMovz(~imm, reg_size);
+}
+
+
+// The movz instruction can generate immediates containing an arbitrary 16-bit
+// value, with remaining bits clear, eg. 0x00001234, 0x0000123400000000.
+bool Assembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
+ VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
+ return CountClearHalfWords(imm, reg_size) >= ((reg_size / 16) - 1);
+}
+
+
+// Test if a given value can be encoded in the immediate field of a logical
+// instruction.
+// If it can be encoded, the function returns true, and values pointed to by n,
+// imm_s and imm_r are updated with immediates encoded in the format required
+// by the corresponding fields in the logical instruction.
+// If it can not be encoded, the function returns false, and the values pointed
+// to by n, imm_s and imm_r are undefined.
+bool Assembler::IsImmLogical(uint64_t value,
+ unsigned width,
+ unsigned* n,
+ unsigned* imm_s,
+ unsigned* imm_r) {
+ VIXL_ASSERT((width == kWRegSize) || (width == kXRegSize));
+
+ bool negate = false;
+
+ // Logical immediates are encoded using parameters n, imm_s and imm_r using
+ // the following table:
+ //
+ // N imms immr size S R
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
+ // (s bits must not be all set)
+ //
+ // A pattern is constructed of size bits, where the least significant S+1 bits
+ // are set. The pattern is rotated right by R, and repeated across a 32 or
+ // 64-bit value, depending on destination register width.
+ //
+ // Put another way: the basic format of a logical immediate is a single
+ // contiguous stretch of 1 bits, repeated across the whole word at intervals
+ // given by a power of 2. To identify them quickly, we first locate the
+ // lowest stretch of 1 bits, then the next 1 bit above that; that combination
+ // is different for every logical immediate, so it gives us all the
+ // information we need to identify the only logical immediate that our input
+ // could be, and then we simply check if that's the value we actually have.
+ //
+ // (The rotation parameter does give the possibility of the stretch of 1 bits
+ // going 'round the end' of the word. To deal with that, we observe that in
+ // any situation where that happens the bitwise NOT of the value is also a
+ // valid logical immediate. So we simply invert the input whenever its low bit
+ // is set, and then we know that the rotated case can't arise.)
+
+ if (value & 1) {
+ // If the low bit is 1, negate the value, and set a flag to remember that we
+ // did (so that we can adjust the return values appropriately).
+ negate = true;
+ value = ~value;
+ }
+
+ if (width == kWRegSize) {
+ // To handle 32-bit logical immediates, the very easiest thing is to repeat
+ // the input value twice to make a 64-bit word. The correct encoding of that
+ // as a logical immediate will also be the correct encoding of the 32-bit
+ // value.
+
+ // Avoid making the assumption that the most-significant 32 bits are zero by
+ // shifting the value left and duplicating it.
+ value <<= kWRegSize;
+ value |= value >> kWRegSize;
+ }
+
+ // The basic analysis idea: imagine our input word looks like this.
+ //
+ // 0011111000111110001111100011111000111110001111100011111000111110
+ // c b a
+ // |<--d-->|
+ //
+ // We find the lowest set bit (as an actual power-of-2 value, not its index)
+ // and call it a. Then we add a to our original number, which wipes out the
+ // bottommost stretch of set bits and replaces it with a 1 carried into the
+ // next zero bit. Then we look for the new lowest set bit, which is in
+ // position b, and subtract it, so now our number is just like the original
+ // but with the lowest stretch of set bits completely gone. Now we find the
+ // lowest set bit again, which is position c in the diagram above. Then we'll
+ // measure the distance d between bit positions a and c (using CLZ), and that
+ // tells us that the only valid logical immediate that could possibly be equal
+ // to this number is the one in which a stretch of bits running from a to just
+ // below b is replicated every d bits.
+ uint64_t a = LowestSetBit(value);
+ uint64_t value_plus_a = value + a;
+ uint64_t b = LowestSetBit(value_plus_a);
+ uint64_t value_plus_a_minus_b = value_plus_a - b;
+ uint64_t c = LowestSetBit(value_plus_a_minus_b);
+
+ int d, clz_a, out_n;
+ uint64_t mask;
+
+ if (c != 0) {
+ // The general case, in which there is more than one stretch of set bits.
+ // Compute the repeat distance d, and set up a bitmask covering the basic
+ // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
+ // of these cases the N bit of the output will be zero.
+ clz_a = CountLeadingZeros(a, kXRegSize);
+ int clz_c = CountLeadingZeros(c, kXRegSize);
+ d = clz_a - clz_c;
+ mask = ((UINT64_C(1) << d) - 1);
+ out_n = 0;
+ } else {
+ // Handle degenerate cases.
+ //
+ // If any of those 'find lowest set bit' operations didn't find a set bit at
+ // all, then the word will have been zero thereafter, so in particular the
+ // last lowest_set_bit operation will have returned zero. So we can test for
+ // all the special case conditions in one go by seeing if c is zero.
+ if (a == 0) {
+ // The input was zero (or all 1 bits, which will come to here too after we
+ // inverted it at the start of the function), for which we just return
+ // false.
+ return false;
+ } else {
+ // Otherwise, if c was zero but a was not, then there's just one stretch
+ // of set bits in our word, meaning that we have the trivial case of
+ // d == 64 and only one 'repetition'. Set up all the same variables as in
+ // the general case above, and set the N bit in the output.
+ clz_a = CountLeadingZeros(a, kXRegSize);
+ d = 64;
+ mask = ~UINT64_C(0);
+ out_n = 1;
+ }
+ }
+
+ // If the repeat period d is not a power of two, it can't be encoded.
+ if (!IsPowerOf2(d)) {
+ return false;
+ }
+
+ if (((b - a) & ~mask) != 0) {
+ // If the bit stretch (b - a) does not fit within the mask derived from the
+ // repeat period, then fail.
+ return false;
+ }
+
+ // The only possible option is b - a repeated every d bits. Now we're going to
+ // actually construct the valid logical immediate derived from that
+ // specification, and see if it equals our original input.
+ //
+ // To repeat a value every d bits, we multiply it by a number of the form
+ // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
+ // be derived using a table lookup on CLZ(d).
+ static const uint64_t multipliers[] = {
+ 0x0000000000000001UL,
+ 0x0000000100000001UL,
+ 0x0001000100010001UL,
+ 0x0101010101010101UL,
+ 0x1111111111111111UL,
+ 0x5555555555555555UL,
+ };
+ uint64_t multiplier = multipliers[CountLeadingZeros(d, kXRegSize) - 57];
+ uint64_t candidate = (b - a) * multiplier;
+
+ if (value != candidate) {
+ // The candidate pattern doesn't match our input value, so fail.
+ return false;
+ }
+
+ // We have a match! This is a valid logical immediate, so now we have to
+ // construct the bits and pieces of the instruction encoding that generates
+ // it.
+
+ // Count the set bits in our basic stretch. The special case of clz(0) == -1
+ // makes the answer come out right for stretches that reach the very top of
+ // the word (e.g. numbers like 0xffffc00000000000).
+ int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSize);
+ int s = clz_a - clz_b;
+
+ // Decide how many bits to rotate right by, to put the low bit of that basic
+ // stretch in position a.
+ int r;
+ if (negate) {
+ // If we inverted the input right at the start of this function, here's
+ // where we compensate: the number of set bits becomes the number of clear
+ // bits, and the rotation count is based on position b rather than position
+ // a (since b is the location of the 'lowest' 1 bit after inversion).
+ s = d - s;
+ r = (clz_b + 1) & (d - 1);
+ } else {
+ r = (clz_a + 1) & (d - 1);
+ }
+
+ // Now we're done, except for having to encode the S output in such a way that
+ // it gives both the number of set bits and the length of the repeated
+ // segment. The s field is encoded like this:
+ //
+ // imms size S
+ // ssssss 64 UInt(ssssss)
+ // 0sssss 32 UInt(sssss)
+ // 10ssss 16 UInt(ssss)
+ // 110sss 8 UInt(sss)
+ // 1110ss 4 UInt(ss)
+ // 11110s 2 UInt(s)
+ //
+ // So we 'or' (-d << 1) with our computed s to form imms.
+ if ((n != NULL) || (imm_s != NULL) || (imm_r != NULL)) {
+ *n = out_n;
+ *imm_s = ((-d << 1) | (s - 1)) & 0x3f;
+ *imm_r = r;
+ }
+
+ return true;
+}
+
+
+LoadStoreOp Assembler::LoadOpFor(const CPURegister& rt) {
+ VIXL_ASSERT(rt.IsValid());
+ if (rt.IsRegister()) {
+ return rt.Is64Bits() ? LDR_x : LDR_w;
+ } else {
+ VIXL_ASSERT(rt.IsVRegister());
+ switch (rt.SizeInBits()) {
+ case kBRegSize: return LDR_b;
+ case kHRegSize: return LDR_h;
+ case kSRegSize: return LDR_s;
+ case kDRegSize: return LDR_d;
+ default:
+ VIXL_ASSERT(rt.IsQ());
+ return LDR_q;
+ }
+ }
+}
+
+
+LoadStoreOp Assembler::StoreOpFor(const CPURegister& rt) {
+ VIXL_ASSERT(rt.IsValid());
+ if (rt.IsRegister()) {
+ return rt.Is64Bits() ? STR_x : STR_w;
+ } else {
+ VIXL_ASSERT(rt.IsVRegister());
+ switch (rt.SizeInBits()) {
+ case kBRegSize: return STR_b;
+ case kHRegSize: return STR_h;
+ case kSRegSize: return STR_s;
+ case kDRegSize: return STR_d;
+ default:
+ VIXL_ASSERT(rt.IsQ());
+ return STR_q;
+ }
+ }
+}
+
+
+LoadStorePairOp Assembler::StorePairOpFor(const CPURegister& rt,
+ const CPURegister& rt2) {
+ VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+ USE(rt2);
+ if (rt.IsRegister()) {
+ return rt.Is64Bits() ? STP_x : STP_w;
+ } else {
+ VIXL_ASSERT(rt.IsVRegister());
+ switch (rt.SizeInBytes()) {
+ case kSRegSizeInBytes: return STP_s;
+ case kDRegSizeInBytes: return STP_d;
+ default:
+ VIXL_ASSERT(rt.IsQ());
+ return STP_q;
+ }
+ }
+}
+
+
+LoadStorePairOp Assembler::LoadPairOpFor(const CPURegister& rt,
+ const CPURegister& rt2) {
+ VIXL_ASSERT((STP_w | LoadStorePairLBit) == LDP_w);
+ return static_cast<LoadStorePairOp>(StorePairOpFor(rt, rt2) |
+ LoadStorePairLBit);
+}
+
+
+LoadStorePairNonTemporalOp Assembler::StorePairNonTemporalOpFor(
+ const CPURegister& rt, const CPURegister& rt2) {
+ VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+ USE(rt2);
+ if (rt.IsRegister()) {
+ return rt.Is64Bits() ? STNP_x : STNP_w;
+ } else {
+ VIXL_ASSERT(rt.IsVRegister());
+ switch (rt.SizeInBytes()) {
+ case kSRegSizeInBytes: return STNP_s;
+ case kDRegSizeInBytes: return STNP_d;
+ default:
+ VIXL_ASSERT(rt.IsQ());
+ return STNP_q;
+ }
+ }
+}
+
+
+LoadStorePairNonTemporalOp Assembler::LoadPairNonTemporalOpFor(
+ const CPURegister& rt, const CPURegister& rt2) {
+ VIXL_ASSERT((STNP_w | LoadStorePairNonTemporalLBit) == LDNP_w);
+ return static_cast<LoadStorePairNonTemporalOp>(
+ StorePairNonTemporalOpFor(rt, rt2) | LoadStorePairNonTemporalLBit);
+}
+
+
+LoadLiteralOp Assembler::LoadLiteralOpFor(const CPURegister& rt) {
+ if (rt.IsRegister()) {
+ return rt.IsX() ? LDR_x_lit : LDR_w_lit;
+ } else {
+ VIXL_ASSERT(rt.IsVRegister());
+ switch (rt.SizeInBytes()) {
+ case kSRegSizeInBytes: return LDR_s_lit;
+ case kDRegSizeInBytes: return LDR_d_lit;
+ default:
+ VIXL_ASSERT(rt.IsQ());
+ return LDR_q_lit;
+ }
+ }
+}
+
+
+bool Assembler::CPUHas(const CPURegister& rt) const {
+ // Core registers are available without any particular CPU features.
+ if (rt.IsRegister()) return true;
+ VIXL_ASSERT(rt.IsVRegister());
+ // The architecture does not allow FP and NEON to be implemented separately,
+ // but we can crudely categorise them based on register size, since FP only
+ // uses D, S and (occasionally) H registers.
+ if (rt.IsH() || rt.IsS() || rt.IsD()) {
+ return CPUHas(CPUFeatures::kFP) || CPUHas(CPUFeatures::kNEON);
+ }
+ VIXL_ASSERT(rt.IsB() || rt.IsQ());
+ return CPUHas(CPUFeatures::kNEON);
+}
+
+
+bool Assembler::CPUHas(const CPURegister& rt, const CPURegister& rt2) const {
+ // This is currently only used for loads and stores, where rt and rt2 must
+ // have the same size and type. We could extend this to cover other cases if
+ // necessary, but for now we can avoid checking both registers.
+ VIXL_ASSERT(AreSameSizeAndType(rt, rt2));
+ USE(rt2);
+ return CPUHas(rt);
+}
+
+
+bool Assembler::CPUHas(SystemRegister sysreg) const {
+ switch (sysreg) {
+ case RNDR:
+ case RNDRRS:
+ return CPUHas(CPUFeatures::kRNG);
+ case FPCR:
+ case NZCV:
+ break;
+ }
+ return true;
+}
+
+
+bool AreAliased(const CPURegister& reg1, const CPURegister& reg2,
+ const CPURegister& reg3, const CPURegister& reg4,
+ const CPURegister& reg5, const CPURegister& reg6,
+ const CPURegister& reg7, const CPURegister& reg8) {
+ int number_of_valid_regs = 0;
+ int number_of_valid_fpregs = 0;
+
+ RegList unique_regs = 0;
+ RegList unique_fpregs = 0;
+
+ const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
+
+ for (unsigned i = 0; i < sizeof(regs) / sizeof(regs[0]); i++) {
+ if (regs[i].IsRegister()) {
+ number_of_valid_regs++;
+ unique_regs |= regs[i].Bit();
+ } else if (regs[i].IsVRegister()) {
+ number_of_valid_fpregs++;
+ unique_fpregs |= regs[i].Bit();
+ } else {
+ VIXL_ASSERT(!regs[i].IsValid());
+ }
+ }
+
+ int number_of_unique_regs = CountSetBits(unique_regs);
+ int number_of_unique_fpregs = CountSetBits(unique_fpregs);
+
+ VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
+ VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs);
+
+ return (number_of_valid_regs != number_of_unique_regs) ||
+ (number_of_valid_fpregs != number_of_unique_fpregs);
+}
+
+
+bool AreSameSizeAndType(const CPURegister& reg1, const CPURegister& reg2,
+ const CPURegister& reg3, const CPURegister& reg4,
+ const CPURegister& reg5, const CPURegister& reg6,
+ const CPURegister& reg7, const CPURegister& reg8) {
+ VIXL_ASSERT(reg1.IsValid());
+ bool match = true;
+ match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
+ match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
+ match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
+ match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
+ match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
+ match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
+ match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
+ return match;
+}
+
+bool AreEven(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4,
+ const CPURegister& reg5,
+ const CPURegister& reg6,
+ const CPURegister& reg7,
+ const CPURegister& reg8) {
+ VIXL_ASSERT(reg1.IsValid());
+ bool even = (reg1.code() % 2) == 0;
+ even &= !reg2.IsValid() || ((reg2.code() % 2) == 0);
+ even &= !reg3.IsValid() || ((reg3.code() % 2) == 0);
+ even &= !reg4.IsValid() || ((reg4.code() % 2) == 0);
+ even &= !reg5.IsValid() || ((reg5.code() % 2) == 0);
+ even &= !reg6.IsValid() || ((reg6.code() % 2) == 0);
+ even &= !reg7.IsValid() || ((reg7.code() % 2) == 0);
+ even &= !reg8.IsValid() || ((reg8.code() % 2) == 0);
+ return even;
+}
+
+bool AreConsecutive(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4) {
+ VIXL_ASSERT(reg1.IsValid());
+
+ if (!reg2.IsValid()) {
+ return true;
+ } else if (reg2.code() != ((reg1.code() + 1) % kNumberOfRegisters)) {
+ return false;
+ }
+
+ if (!reg3.IsValid()) {
+ return true;
+ } else if (reg3.code() != ((reg2.code() + 1) % kNumberOfRegisters)) {
+ return false;
+ }
+
+ if (!reg4.IsValid()) {
+ return true;
+ } else if (reg4.code() != ((reg3.code() + 1) % kNumberOfRegisters)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool AreSameFormat(const VRegister& reg1, const VRegister& reg2,
+ const VRegister& reg3, const VRegister& reg4) {
+ VIXL_ASSERT(reg1.IsValid());
+ bool match = true;
+ match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
+ match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
+ match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
+ return match;
+}
+
+
+bool AreConsecutive(const VRegister& reg1, const VRegister& reg2,
+ const VRegister& reg3, const VRegister& reg4) {
+ VIXL_ASSERT(reg1.IsValid());
+ bool match = true;
+ match &= !reg2.IsValid() ||
+ (reg2.code() == ((reg1.code() + 1) % kNumberOfVRegisters));
+ match &= !reg3.IsValid() ||
+ (reg3.code() == ((reg1.code() + 2) % kNumberOfVRegisters));
+ match &= !reg4.IsValid() ||
+ (reg4.code() == ((reg1.code() + 3) % kNumberOfVRegisters));
+ return match;
+}
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Assembler-vixl.h b/js/src/jit/arm64/vixl/Assembler-vixl.h
new file mode 100644
index 0000000000..462b359eea
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Assembler-vixl.h
@@ -0,0 +1,4974 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_ASSEMBLER_A64_H_
+#define VIXL_A64_ASSEMBLER_A64_H_
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "jit/arm64/vixl/MozBaseAssembler-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+#include "jit/JitSpewer.h"
+
+#include "jit/shared/Assembler-shared.h"
+#include "jit/shared/Disassembler-shared.h"
+#include "jit/shared/IonAssemblerBufferWithConstantPools.h"
+
+#if defined(_M_ARM64)
+#ifdef mvn
+#undef mvn
+#endif
+#endif
+
+namespace vixl {
+
+using js::jit::BufferOffset;
+using js::jit::Label;
+using js::jit::Address;
+using js::jit::BaseIndex;
+using js::jit::DisassemblerSpew;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+
+typedef uint64_t RegList;
+static const int kRegListSizeInBits = sizeof(RegList) * 8;
+
+
+// Registers.
+
+// Some CPURegister methods can return Register or VRegister types, so we need
+// to declare them in advance.
+class Register;
+class VRegister;
+
+class CPURegister {
+ public:
+ enum RegisterType {
+ // The kInvalid value is used to detect uninitialized static instances,
+ // which are always zero-initialized before any constructors are called.
+ kInvalid = 0,
+ kRegister,
+ kVRegister,
+ kFPRegister = kVRegister,
+ kNoRegister
+ };
+
+ constexpr CPURegister() : code_(0), size_(0), type_(kNoRegister) {
+ }
+
+ constexpr CPURegister(unsigned code, unsigned size, RegisterType type)
+ : code_(code), size_(size), type_(type) {
+ }
+
+ unsigned code() const {
+ VIXL_ASSERT(IsValid());
+ return code_;
+ }
+
+ RegisterType type() const {
+ VIXL_ASSERT(IsValidOrNone());
+ return type_;
+ }
+
+ RegList Bit() const {
+ VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
+ return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
+ }
+
+ unsigned size() const {
+ VIXL_ASSERT(IsValid());
+ return size_;
+ }
+
+ int SizeInBytes() const {
+ VIXL_ASSERT(IsValid());
+ VIXL_ASSERT(size() % 8 == 0);
+ return size_ / 8;
+ }
+
+ int SizeInBits() const {
+ VIXL_ASSERT(IsValid());
+ return size_;
+ }
+
+ bool Is8Bits() const {
+ VIXL_ASSERT(IsValid());
+ return size_ == 8;
+ }
+
+ bool Is16Bits() const {
+ VIXL_ASSERT(IsValid());
+ return size_ == 16;
+ }
+
+ bool Is32Bits() const {
+ VIXL_ASSERT(IsValid());
+ return size_ == 32;
+ }
+
+ bool Is64Bits() const {
+ VIXL_ASSERT(IsValid());
+ return size_ == 64;
+ }
+
+ bool Is128Bits() const {
+ VIXL_ASSERT(IsValid());
+ return size_ == 128;
+ }
+
+ bool IsValid() const {
+ if (IsValidRegister() || IsValidVRegister()) {
+ VIXL_ASSERT(!IsNone());
+ return true;
+ } else {
+ // This assert is hit when the register has not been properly initialized.
+ // One cause for this can be an initialisation order fiasco. See
+ // https://isocpp.org/wiki/faq/ctors#static-init-order for some details.
+ VIXL_ASSERT(IsNone());
+ return false;
+ }
+ }
+
+ bool IsValidRegister() const {
+ return IsRegister() &&
+ ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
+ ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
+ }
+
+ bool IsValidVRegister() const {
+ return IsVRegister() &&
+ ((size_ == kBRegSize) || (size_ == kHRegSize) ||
+ (size_ == kSRegSize) || (size_ == kDRegSize) ||
+ (size_ == kQRegSize)) &&
+ (code_ < kNumberOfVRegisters);
+ }
+
+ bool IsValidFPRegister() const {
+ return IsFPRegister() && (code_ < kNumberOfVRegisters);
+ }
+
+ bool IsNone() const {
+ // kNoRegister types should always have size 0 and code 0.
+ VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
+ VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
+
+ return type_ == kNoRegister;
+ }
+
+ bool Aliases(const CPURegister& other) const {
+ VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
+ return (code_ == other.code_) && (type_ == other.type_);
+ }
+
+ bool Is(const CPURegister& other) const {
+ VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
+ return Aliases(other) && (size_ == other.size_);
+ }
+
+ bool IsZero() const {
+ VIXL_ASSERT(IsValid());
+ return IsRegister() && (code_ == kZeroRegCode);
+ }
+
+ bool IsSP() const {
+ VIXL_ASSERT(IsValid());
+ return IsRegister() && (code_ == kSPRegInternalCode);
+ }
+
+ bool IsRegister() const {
+ return type_ == kRegister;
+ }
+
+ bool IsVRegister() const {
+ return type_ == kVRegister;
+ }
+
+ bool IsFPRegister() const {
+ return IsS() || IsD();
+ }
+
+ bool IsW() const { return IsValidRegister() && Is32Bits(); }
+ bool IsX() const { return IsValidRegister() && Is64Bits(); }
+
+ // These assertions ensure that the size and type of the register are as
+ // described. They do not consider the number of lanes that make up a vector.
+ // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
+ // does not imply Is1D() or Is8B().
+ // Check the number of lanes, ie. the format of the vector, using methods such
+ // as Is8B(), Is1D(), etc. in the VRegister class.
+ bool IsV() const { return IsVRegister(); }
+ bool IsB() const { return IsV() && Is8Bits(); }
+ bool IsH() const { return IsV() && Is16Bits(); }
+ bool IsS() const { return IsV() && Is32Bits(); }
+ bool IsD() const { return IsV() && Is64Bits(); }
+ bool IsQ() const { return IsV() && Is128Bits(); }
+
+ const Register& W() const;
+ const Register& X() const;
+ const VRegister& V() const;
+ const VRegister& B() const;
+ const VRegister& H() const;
+ const VRegister& S() const;
+ const VRegister& D() const;
+ const VRegister& Q() const;
+
+ bool IsSameSizeAndType(const CPURegister& other) const {
+ return (size_ == other.size_) && (type_ == other.type_);
+ }
+
+ protected:
+ unsigned code_;
+ unsigned size_;
+ RegisterType type_;
+
+ private:
+ bool IsValidOrNone() const {
+ return IsValid() || IsNone();
+ }
+};
+
+
+class Register : public CPURegister {
+ public:
+ Register() : CPURegister() {}
+ explicit Register(const CPURegister& other)
+ : CPURegister(other.code(), other.size(), other.type()) {
+ VIXL_ASSERT(IsValidRegister());
+ }
+ constexpr Register(unsigned code, unsigned size)
+ : CPURegister(code, size, kRegister) {}
+
+ constexpr Register(js::jit::Register r, unsigned size)
+ : CPURegister(r.code(), size, kRegister) {}
+
+ bool IsValid() const {
+ VIXL_ASSERT(IsRegister() || IsNone());
+ return IsValidRegister();
+ }
+
+ js::jit::Register asUnsized() const {
+ // asUnsized() is only ever used on temp registers or on registers that
+ // are known not to be SP, and there should be no risk of it being
+ // applied to SP. Check anyway.
+ VIXL_ASSERT(code_ != kSPRegInternalCode);
+ return js::jit::Register::FromCode((js::jit::Register::Code)code_);
+ }
+
+
+ static const Register& WRegFromCode(unsigned code);
+ static const Register& XRegFromCode(unsigned code);
+
+ private:
+ static const Register wregisters[];
+ static const Register xregisters[];
+};
+
+
+class VRegister : public CPURegister {
+ public:
+ VRegister() : CPURegister(), lanes_(1) {}
+ explicit VRegister(const CPURegister& other)
+ : CPURegister(other.code(), other.size(), other.type()), lanes_(1) {
+ VIXL_ASSERT(IsValidVRegister());
+ VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
+ }
+ constexpr VRegister(unsigned code, unsigned size, unsigned lanes = 1)
+ : CPURegister(code, size, kVRegister), lanes_(lanes) {
+ // VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
+ }
+ constexpr VRegister(js::jit::FloatRegister r)
+ : CPURegister(r.encoding(), r.size() * 8, kVRegister), lanes_(1) {
+ }
+ constexpr VRegister(js::jit::FloatRegister r, unsigned size)
+ : CPURegister(r.encoding(), size, kVRegister), lanes_(1) {
+ }
+ VRegister(unsigned code, VectorFormat format)
+ : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
+ lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
+ VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
+ }
+
+ bool IsValid() const {
+ VIXL_ASSERT(IsVRegister() || IsNone());
+ return IsValidVRegister();
+ }
+
+ static const VRegister& BRegFromCode(unsigned code);
+ static const VRegister& HRegFromCode(unsigned code);
+ static const VRegister& SRegFromCode(unsigned code);
+ static const VRegister& DRegFromCode(unsigned code);
+ static const VRegister& QRegFromCode(unsigned code);
+ static const VRegister& VRegFromCode(unsigned code);
+
+ VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
+ VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
+ VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
+ VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
+ VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
+ VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
+ VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
+ VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
+
+ bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
+ bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
+ bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
+ bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
+ bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
+ bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
+ bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
+ bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
+
+ // For consistency, we assert the number of lanes of these scalar registers,
+ // even though there are no vectors of equivalent total size with which they
+ // could alias.
+ bool Is1B() const {
+ VIXL_ASSERT(!(Is8Bits() && IsVector()));
+ return Is8Bits();
+ }
+ bool Is1H() const {
+ VIXL_ASSERT(!(Is16Bits() && IsVector()));
+ return Is16Bits();
+ }
+ bool Is1S() const {
+ VIXL_ASSERT(!(Is32Bits() && IsVector()));
+ return Is32Bits();
+ }
+
+ bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; }
+ bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; }
+ bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; }
+ bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; }
+
+ int lanes() const {
+ return lanes_;
+ }
+
+ bool IsScalar() const {
+ return lanes_ == 1;
+ }
+
+ bool IsVector() const {
+ return lanes_ > 1;
+ }
+
+ bool IsSameFormat(const VRegister& other) const {
+ return (size_ == other.size_) && (lanes_ == other.lanes_);
+ }
+
+ unsigned LaneSizeInBytes() const {
+ return SizeInBytes() / lanes_;
+ }
+
+ unsigned LaneSizeInBits() const {
+ return LaneSizeInBytes() * 8;
+ }
+
+ private:
+ static const VRegister bregisters[];
+ static const VRegister hregisters[];
+ static const VRegister sregisters[];
+ static const VRegister dregisters[];
+ static const VRegister qregisters[];
+ static const VRegister vregisters[];
+ int lanes_;
+};
+
+
+// Backward compatibility for FPRegisters.
+typedef VRegister FPRegister;
+
+// No*Reg is used to indicate an unused argument, or an error case. Note that
+// these all compare equal (using the Is() method). The Register and VRegister
+// variants are provided for convenience.
+const Register NoReg;
+const VRegister NoVReg;
+const FPRegister NoFPReg; // For backward compatibility.
+const CPURegister NoCPUReg;
+
+
+#define DEFINE_REGISTERS(N) \
+constexpr Register w##N(N, kWRegSize); \
+constexpr Register x##N(N, kXRegSize);
+REGISTER_CODE_LIST(DEFINE_REGISTERS)
+#undef DEFINE_REGISTERS
+constexpr Register wsp(kSPRegInternalCode, kWRegSize);
+constexpr Register sp(kSPRegInternalCode, kXRegSize);
+
+
+#define DEFINE_VREGISTERS(N) \
+constexpr VRegister b##N(N, kBRegSize); \
+constexpr VRegister h##N(N, kHRegSize); \
+constexpr VRegister s##N(N, kSRegSize); \
+constexpr VRegister d##N(N, kDRegSize); \
+constexpr VRegister q##N(N, kQRegSize); \
+constexpr VRegister v##N(N, kQRegSize);
+REGISTER_CODE_LIST(DEFINE_VREGISTERS)
+#undef DEFINE_VREGISTERS
+
+
+// Registers aliases.
+constexpr Register ip0 = x16;
+constexpr Register ip1 = x17;
+constexpr Register lr = x30;
+constexpr Register xzr = x31;
+constexpr Register wzr = w31;
+
+
+// AreAliased returns true if any of the named registers overlap. Arguments
+// set to NoReg are ignored. The system stack pointer may be specified.
+bool AreAliased(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoReg,
+ const CPURegister& reg4 = NoReg,
+ const CPURegister& reg5 = NoReg,
+ const CPURegister& reg6 = NoReg,
+ const CPURegister& reg7 = NoReg,
+ const CPURegister& reg8 = NoReg);
+
+
+// AreSameSizeAndType returns true if all of the specified registers have the
+// same size, and are of the same type. The system stack pointer may be
+// specified. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreSameSizeAndType(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoCPUReg,
+ const CPURegister& reg4 = NoCPUReg,
+ const CPURegister& reg5 = NoCPUReg,
+ const CPURegister& reg6 = NoCPUReg,
+ const CPURegister& reg7 = NoCPUReg,
+ const CPURegister& reg8 = NoCPUReg);
+
+// AreEven returns true if all of the specified registers have even register
+// indices. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreEven(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoReg,
+ const CPURegister& reg4 = NoReg,
+ const CPURegister& reg5 = NoReg,
+ const CPURegister& reg6 = NoReg,
+ const CPURegister& reg7 = NoReg,
+ const CPURegister& reg8 = NoReg);
+
+// AreConsecutive returns true if all of the specified registers are
+// consecutive in the register file. Arguments set to NoReg are ignored, as are
+// any subsequent arguments. At least one argument (reg1) must be valid
+// (not NoCPUReg).
+bool AreConsecutive(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoCPUReg,
+ const CPURegister& reg4 = NoCPUReg);
+
+// AreSameFormat returns true if all of the specified VRegisters have the same
+// vector format. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoVReg).
+bool AreSameFormat(const VRegister& reg1,
+ const VRegister& reg2,
+ const VRegister& reg3 = NoVReg,
+ const VRegister& reg4 = NoVReg);
+
+
+// AreConsecutive returns true if all of the specified VRegisters are
+// consecutive in the register file. Arguments set to NoReg are ignored, as are
+// any subsequent arguments. At least one argument (reg1) must be valid
+// (not NoVReg).
+bool AreConsecutive(const VRegister& reg1,
+ const VRegister& reg2,
+ const VRegister& reg3 = NoVReg,
+ const VRegister& reg4 = NoVReg);
+
+
+// Lists of registers.
+class CPURegList {
+ public:
+ explicit CPURegList(CPURegister reg1,
+ CPURegister reg2 = NoCPUReg,
+ CPURegister reg3 = NoCPUReg,
+ CPURegister reg4 = NoCPUReg)
+ : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
+ size_(reg1.size()), type_(reg1.type()) {
+ VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
+ VIXL_ASSERT(IsValid());
+ }
+
+ CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
+ : list_(list), size_(size), type_(type) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ CPURegList(CPURegister::RegisterType type, unsigned size,
+ unsigned first_reg, unsigned last_reg)
+ : size_(size), type_(type) {
+ VIXL_ASSERT(((type == CPURegister::kRegister) &&
+ (last_reg < kNumberOfRegisters)) ||
+ ((type == CPURegister::kVRegister) &&
+ (last_reg < kNumberOfVRegisters)));
+ VIXL_ASSERT(last_reg >= first_reg);
+ list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
+ list_ &= ~((UINT64_C(1) << first_reg) - 1);
+ VIXL_ASSERT(IsValid());
+ }
+
+ CPURegister::RegisterType type() const {
+ VIXL_ASSERT(IsValid());
+ return type_;
+ }
+
+ // Combine another CPURegList into this one. Registers that already exist in
+ // this list are left unchanged. The type and size of the registers in the
+ // 'other' list must match those in this list.
+ void Combine(const CPURegList& other) {
+ VIXL_ASSERT(IsValid());
+ VIXL_ASSERT(other.type() == type_);
+ VIXL_ASSERT(other.RegisterSizeInBits() == size_);
+ list_ |= other.list();
+ }
+
+ // Remove every register in the other CPURegList from this one. Registers that
+ // do not exist in this list are ignored. The type and size of the registers
+ // in the 'other' list must match those in this list.
+ void Remove(const CPURegList& other) {
+ VIXL_ASSERT(IsValid());
+ VIXL_ASSERT(other.type() == type_);
+ VIXL_ASSERT(other.RegisterSizeInBits() == size_);
+ list_ &= ~other.list();
+ }
+
+ // Variants of Combine and Remove which take a single register.
+ void Combine(const CPURegister& other) {
+ VIXL_ASSERT(other.type() == type_);
+ VIXL_ASSERT(other.size() == size_);
+ Combine(other.code());
+ }
+
+ void Remove(const CPURegister& other) {
+ VIXL_ASSERT(other.type() == type_);
+ VIXL_ASSERT(other.size() == size_);
+ Remove(other.code());
+ }
+
+ // Variants of Combine and Remove which take a single register by its code;
+ // the type and size of the register is inferred from this list.
+ void Combine(int code) {
+ VIXL_ASSERT(IsValid());
+ VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
+ list_ |= (UINT64_C(1) << code);
+ }
+
+ void Remove(int code) {
+ VIXL_ASSERT(IsValid());
+ VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
+ list_ &= ~(UINT64_C(1) << code);
+ }
+
+ static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
+ VIXL_ASSERT(list_1.type_ == list_2.type_);
+ VIXL_ASSERT(list_1.size_ == list_2.size_);
+ return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
+ }
+ static CPURegList Union(const CPURegList& list_1,
+ const CPURegList& list_2,
+ const CPURegList& list_3);
+ static CPURegList Union(const CPURegList& list_1,
+ const CPURegList& list_2,
+ const CPURegList& list_3,
+ const CPURegList& list_4);
+
+ static CPURegList Intersection(const CPURegList& list_1,
+ const CPURegList& list_2) {
+ VIXL_ASSERT(list_1.type_ == list_2.type_);
+ VIXL_ASSERT(list_1.size_ == list_2.size_);
+ return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
+ }
+ static CPURegList Intersection(const CPURegList& list_1,
+ const CPURegList& list_2,
+ const CPURegList& list_3);
+ static CPURegList Intersection(const CPURegList& list_1,
+ const CPURegList& list_2,
+ const CPURegList& list_3,
+ const CPURegList& list_4);
+
+ bool Overlaps(const CPURegList& other) const {
+ return (type_ == other.type_) && ((list_ & other.list_) != 0);
+ }
+
+ RegList list() const {
+ VIXL_ASSERT(IsValid());
+ return list_;
+ }
+
+ void set_list(RegList new_list) {
+ VIXL_ASSERT(IsValid());
+ list_ = new_list;
+ }
+
+ // Remove all callee-saved registers from the list. This can be useful when
+ // preparing registers for an AAPCS64 function call, for example.
+ void RemoveCalleeSaved();
+
+ CPURegister PopLowestIndex();
+ CPURegister PopHighestIndex();
+
+ // AAPCS64 callee-saved registers.
+ static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
+ static CPURegList GetCalleeSavedV(unsigned size = kDRegSize);
+
+ // AAPCS64 caller-saved registers. Note that this includes lr.
+ // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
+ // 64-bits being caller-saved.
+ static CPURegList GetCallerSaved(unsigned size = kXRegSize);
+ static CPURegList GetCallerSavedV(unsigned size = kDRegSize);
+
+ bool IsEmpty() const {
+ VIXL_ASSERT(IsValid());
+ return list_ == 0;
+ }
+
+ bool IncludesAliasOf(const CPURegister& other) const {
+ VIXL_ASSERT(IsValid());
+ return (type_ == other.type()) && ((other.Bit() & list_) != 0);
+ }
+
+ bool IncludesAliasOf(int code) const {
+ VIXL_ASSERT(IsValid());
+ return ((code & list_) != 0);
+ }
+
+ int Count() const {
+ VIXL_ASSERT(IsValid());
+ return CountSetBits(list_);
+ }
+
+ unsigned RegisterSizeInBits() const {
+ VIXL_ASSERT(IsValid());
+ return size_;
+ }
+
+ unsigned RegisterSizeInBytes() const {
+ int size_in_bits = RegisterSizeInBits();
+ VIXL_ASSERT((size_in_bits % 8) == 0);
+ return size_in_bits / 8;
+ }
+
+ unsigned TotalSizeInBytes() const {
+ VIXL_ASSERT(IsValid());
+ return RegisterSizeInBytes() * Count();
+ }
+
+ private:
+ RegList list_;
+ unsigned size_;
+ CPURegister::RegisterType type_;
+
+ bool IsValid() const;
+};
+
+
+// AAPCS64 callee-saved registers.
+extern const CPURegList kCalleeSaved;
+extern const CPURegList kCalleeSavedV;
+
+
+// AAPCS64 caller-saved registers. Note that this includes lr.
+extern const CPURegList kCallerSaved;
+extern const CPURegList kCallerSavedV;
+
+
+// Operand.
+class Operand {
+ public:
+ // #<immediate>
+ // where <immediate> is int64_t.
+ // This is allowed to be an implicit constructor because Operand is
+ // a wrapper class that doesn't normally perform any type conversion.
+ Operand(int64_t immediate = 0); // NOLINT(runtime/explicit)
+
+ // rm, {<shift> #<shift_amount>}
+ // where <shift> is one of {LSL, LSR, ASR, ROR}.
+ // <shift_amount> is uint6_t.
+ // This is allowed to be an implicit constructor because Operand is
+ // a wrapper class that doesn't normally perform any type conversion.
+ Operand(Register reg,
+ Shift shift = LSL,
+ unsigned shift_amount = 0); // NOLINT(runtime/explicit)
+
+ // rm, {<extend> {#<shift_amount>}}
+ // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
+ // <shift_amount> is uint2_t.
+ explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0);
+
+ bool IsImmediate() const;
+ bool IsShiftedRegister() const;
+ bool IsExtendedRegister() const;
+ bool IsZero() const;
+
+ // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
+ // which helps in the encoding of instructions that use the stack pointer.
+ Operand ToExtendedRegister() const;
+
+ int64_t immediate() const {
+ VIXL_ASSERT(IsImmediate());
+ return immediate_;
+ }
+
+ Register reg() const {
+ VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
+ return reg_;
+ }
+
+ CPURegister maybeReg() const {
+ if (IsShiftedRegister() || IsExtendedRegister())
+ return reg_;
+ return NoCPUReg;
+ }
+
+ Shift shift() const {
+ VIXL_ASSERT(IsShiftedRegister());
+ return shift_;
+ }
+
+ Extend extend() const {
+ VIXL_ASSERT(IsExtendedRegister());
+ return extend_;
+ }
+
+ unsigned shift_amount() const {
+ VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
+ return shift_amount_;
+ }
+
+ private:
+ int64_t immediate_;
+ Register reg_;
+ Shift shift_;
+ Extend extend_;
+ unsigned shift_amount_;
+};
+
+
+// MemOperand represents the addressing mode of a load or store instruction.
+class MemOperand {
+ public:
+ explicit MemOperand(Register base,
+ int64_t offset = 0,
+ AddrMode addrmode = Offset);
+ MemOperand(Register base,
+ Register regoffset,
+ Shift shift = LSL,
+ unsigned shift_amount = 0);
+ MemOperand(Register base,
+ Register regoffset,
+ Extend extend,
+ unsigned shift_amount = 0);
+ MemOperand(Register base,
+ const Operand& offset,
+ AddrMode addrmode = Offset);
+
+ // Adapter constructors using C++11 delegating.
+ // TODO: If sp == kSPRegInternalCode, the xzr check isn't necessary.
+ explicit MemOperand(js::jit::Address addr)
+ : MemOperand(IsHiddenSP(addr.base) ? sp : Register(AsRegister(addr.base), 64),
+ (ptrdiff_t)addr.offset) {
+ }
+
+ const Register& base() const { return base_; }
+ const Register& regoffset() const { return regoffset_; }
+ int64_t offset() const { return offset_; }
+ AddrMode addrmode() const { return addrmode_; }
+ Shift shift() const { return shift_; }
+ Extend extend() const { return extend_; }
+ unsigned shift_amount() const { return shift_amount_; }
+ bool IsImmediateOffset() const;
+ bool IsRegisterOffset() const;
+ bool IsPreIndex() const;
+ bool IsPostIndex() const;
+
+ void AddOffset(int64_t offset);
+
+ private:
+ Register base_;
+ Register regoffset_;
+ int64_t offset_;
+ AddrMode addrmode_;
+ Shift shift_;
+ Extend extend_;
+ unsigned shift_amount_;
+};
+
+
+// Control whether or not position-independent code should be emitted.
+enum PositionIndependentCodeOption {
+ // All code generated will be position-independent; all branches and
+ // references to labels generated with the Label class will use PC-relative
+ // addressing.
+ PositionIndependentCode,
+
+ // Allow VIXL to generate code that refers to absolute addresses. With this
+ // option, it will not be possible to copy the code buffer and run it from a
+ // different address; code must be generated in its final location.
+ PositionDependentCode,
+
+ // Allow VIXL to assume that the bottom 12 bits of the address will be
+ // constant, but that the top 48 bits may change. This allows `adrp` to
+ // function in systems which copy code between pages, but otherwise maintain
+ // 4KB page alignment.
+ PageOffsetDependentCode
+};
+
+
+// Control how scaled- and unscaled-offset loads and stores are generated.
+enum LoadStoreScalingOption {
+ // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
+ // register-offset, pre-index or post-index instructions if necessary.
+ PreferScaledOffset,
+
+ // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
+ // register-offset, pre-index or post-index instructions if necessary.
+ PreferUnscaledOffset,
+
+ // Require scaled-immediate-offset instructions.
+ RequireScaledOffset,
+
+ // Require unscaled-immediate-offset instructions.
+ RequireUnscaledOffset
+};
+
+
+// Assembler.
+class Assembler : public MozBaseAssembler {
+ public:
+ Assembler(PositionIndependentCodeOption pic = PositionIndependentCode);
+
+ // System functions.
+
+ // Finalize a code buffer of generated instructions. This function must be
+ // called before executing or copying code from the buffer.
+ void FinalizeCode();
+
+#define COPYENUM(v) static const Condition v = vixl::v
+#define COPYENUM_(v) static const Condition v = vixl::v##_
+ COPYENUM(Equal);
+ COPYENUM(Zero);
+ COPYENUM(NotEqual);
+ COPYENUM(NonZero);
+ COPYENUM(AboveOrEqual);
+ COPYENUM(CarrySet);
+ COPYENUM(Below);
+ COPYENUM(CarryClear);
+ COPYENUM(Signed);
+ COPYENUM(NotSigned);
+ COPYENUM(Overflow);
+ COPYENUM(NoOverflow);
+ COPYENUM(Above);
+ COPYENUM(BelowOrEqual);
+ COPYENUM_(GreaterThanOrEqual);
+ COPYENUM_(LessThan);
+ COPYENUM_(GreaterThan);
+ COPYENUM_(LessThanOrEqual);
+ COPYENUM(Always);
+ COPYENUM(Never);
+#undef COPYENUM
+#undef COPYENUM_
+
+ // Bit set when a DoubleCondition does not map to a single ARM condition.
+ // The MacroAssembler must special-case these conditions, or else
+ // ConditionFromDoubleCondition will complain.
+ static const int DoubleConditionBitSpecial = 0x100;
+
+ enum DoubleCondition {
+ DoubleOrdered = Condition::vc,
+ DoubleEqual = Condition::eq,
+ DoubleNotEqual = Condition::ne | DoubleConditionBitSpecial,
+ DoubleGreaterThan = Condition::gt,
+ DoubleGreaterThanOrEqual = Condition::ge,
+ DoubleLessThan = Condition::lo, // Could also use Condition::mi.
+ DoubleLessThanOrEqual = Condition::ls,
+
+ // If either operand is NaN, these conditions always evaluate to true.
+ DoubleUnordered = Condition::vs,
+ DoubleEqualOrUnordered = Condition::eq | DoubleConditionBitSpecial,
+ DoubleNotEqualOrUnordered = Condition::ne,
+ DoubleGreaterThanOrUnordered = Condition::hi,
+ DoubleGreaterThanOrEqualOrUnordered = Condition::hs,
+ DoubleLessThanOrUnordered = Condition::lt,
+ DoubleLessThanOrEqualOrUnordered = Condition::le
+ };
+
+ static inline Condition InvertCondition(Condition cond) {
+ // Conditions al and nv behave identically, as "always true". They can't be
+ // inverted, because there is no "always false" condition.
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ return static_cast<Condition>(cond ^ 1);
+ }
+
+ // This is chaging the condition codes for cmp a, b to the same codes for cmp b, a.
+ static inline Condition InvertCmpCondition(Condition cond) {
+ // Conditions al and nv behave identically, as "always true". They can't be
+ // inverted, because there is no "always false" condition.
+ switch (cond) {
+ case eq:
+ case ne:
+ return cond;
+ case gt:
+ return le;
+ case le:
+ return gt;
+ case ge:
+ return lt;
+ case lt:
+ return ge;
+ case hi:
+ return lo;
+ case lo:
+ return hi;
+ case hs:
+ return ls;
+ case ls:
+ return hs;
+ case mi:
+ return pl;
+ case pl:
+ return mi;
+ default:
+ MOZ_CRASH("TODO: figure this case out.");
+ }
+ return static_cast<Condition>(cond ^ 1);
+ }
+
+ static inline DoubleCondition InvertCondition(DoubleCondition cond) {
+ switch (cond) {
+ case DoubleOrdered:
+ return DoubleUnordered;
+ case DoubleEqual:
+ return DoubleNotEqualOrUnordered;
+ case DoubleNotEqual:
+ return DoubleEqualOrUnordered;
+ case DoubleGreaterThan:
+ return DoubleLessThanOrEqualOrUnordered;
+ case DoubleGreaterThanOrEqual:
+ return DoubleLessThanOrUnordered;
+ case DoubleLessThan:
+ return DoubleGreaterThanOrEqualOrUnordered;
+ case DoubleLessThanOrEqual:
+ return DoubleGreaterThanOrUnordered;
+ case DoubleUnordered:
+ return DoubleOrdered;
+ case DoubleEqualOrUnordered:
+ return DoubleNotEqual;
+ case DoubleNotEqualOrUnordered:
+ return DoubleEqual;
+ case DoubleGreaterThanOrUnordered:
+ return DoubleLessThanOrEqual;
+ case DoubleGreaterThanOrEqualOrUnordered:
+ return DoubleLessThan;
+ case DoubleLessThanOrUnordered:
+ return DoubleGreaterThanOrEqual;
+ case DoubleLessThanOrEqualOrUnordered:
+ return DoubleGreaterThan;
+ default:
+ MOZ_CRASH("Bad condition");
+ }
+ }
+
+ static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) {
+ VIXL_ASSERT(!(cond & DoubleConditionBitSpecial));
+ return static_cast<Condition>(cond);
+ }
+
+ // Instruction set functions.
+
+ // Branch / Jump instructions.
+ // Branch to register.
+ void br(const Register& xn);
+ static void br(Instruction* at, const Register& xn);
+
+ // Branch with link to register.
+ void blr(const Register& xn);
+ static void blr(Instruction* at, const Register& blr);
+
+ // Branch to register with return hint.
+ void ret(const Register& xn = lr);
+
+ // Unconditional branch to label.
+ BufferOffset b(Label* label);
+
+ // Conditional branch to label.
+ BufferOffset b(Label* label, Condition cond);
+
+ // Unconditional branch to PC offset.
+ BufferOffset b(int imm26, const LabelDoc& doc);
+ static void b(Instruction* at, int imm26);
+
+ // Conditional branch to PC offset.
+ BufferOffset b(int imm19, Condition cond, const LabelDoc& doc);
+ static void b(Instruction*at, int imm19, Condition cond);
+
+ // Branch with link to label.
+ void bl(Label* label);
+
+ // Branch with link to PC offset.
+ void bl(int imm26, const LabelDoc& doc);
+ static void bl(Instruction* at, int imm26);
+
+ // Compare and branch to label if zero.
+ void cbz(const Register& rt, Label* label);
+
+ // Compare and branch to PC offset if zero.
+ void cbz(const Register& rt, int imm19, const LabelDoc& doc);
+ static void cbz(Instruction* at, const Register& rt, int imm19);
+
+ // Compare and branch to label if not zero.
+ void cbnz(const Register& rt, Label* label);
+
+ // Compare and branch to PC offset if not zero.
+ void cbnz(const Register& rt, int imm19, const LabelDoc& doc);
+ static void cbnz(Instruction* at, const Register& rt, int imm19);
+
+ // Table lookup from one register.
+ void tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Table lookup from two registers.
+ void tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vm);
+
+ // Table lookup from three registers.
+ void tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vm);
+
+ // Table lookup from four registers.
+ void tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vn4,
+ const VRegister& vm);
+
+ // Table lookup extension from one register.
+ void tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Table lookup extension from two registers.
+ void tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vm);
+
+ // Table lookup extension from three registers.
+ void tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vm);
+
+ // Table lookup extension from four registers.
+ void tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vn4,
+ const VRegister& vm);
+
+ // Test bit and branch to label if zero.
+ void tbz(const Register& rt, unsigned bit_pos, Label* label);
+
+ // Test bit and branch to PC offset if zero.
+ void tbz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc);
+ static void tbz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14);
+
+ // Test bit and branch to label if not zero.
+ void tbnz(const Register& rt, unsigned bit_pos, Label* label);
+
+ // Test bit and branch to PC offset if not zero.
+ void tbnz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc);
+ static void tbnz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14);
+
+ // Address calculation instructions.
+ // Calculate a PC-relative address. Unlike for branches the offset in adr is
+ // unscaled (i.e. the result can be unaligned).
+
+ // Calculate the address of a label.
+ void adr(const Register& rd, Label* label);
+
+ // Calculate the address of a PC offset.
+ void adr(const Register& rd, int imm21, const LabelDoc& doc);
+ static void adr(Instruction* at, const Register& rd, int imm21);
+
+ // Calculate the page address of a label.
+ void adrp(const Register& rd, Label* label);
+
+ // Calculate the page address of a PC offset.
+ void adrp(const Register& rd, int imm21, const LabelDoc& doc);
+ static void adrp(Instruction* at, const Register& rd, int imm21);
+
+ // Data Processing instructions.
+ // Add.
+ void add(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Add and update status flags.
+ void adds(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Compare negative.
+ void cmn(const Register& rn, const Operand& operand);
+
+ // Subtract.
+ void sub(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Subtract and update status flags.
+ void subs(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Compare.
+ void cmp(const Register& rn, const Operand& operand);
+
+ // Negate.
+ void neg(const Register& rd,
+ const Operand& operand);
+
+ // Negate and update status flags.
+ void negs(const Register& rd,
+ const Operand& operand);
+
+ // Add with carry bit.
+ void adc(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Add with carry bit and update status flags.
+ void adcs(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Subtract with carry bit.
+ void sbc(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Subtract with carry bit and update status flags.
+ void sbcs(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Negate with carry bit.
+ void ngc(const Register& rd,
+ const Operand& operand);
+
+ // Negate with carry bit and update status flags.
+ void ngcs(const Register& rd,
+ const Operand& operand);
+
+ // Logical instructions.
+ // Bitwise and (A & B).
+ void and_(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Bitwise and (A & B) and update status flags.
+ BufferOffset ands(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Bit test and set flags.
+ BufferOffset tst(const Register& rn, const Operand& operand);
+
+ // Bit clear (A & ~B).
+ void bic(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Bit clear (A & ~B) and update status flags.
+ void bics(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+
+ // Bitwise or (A | B).
+ void orr(const Register& rd, const Register& rn, const Operand& operand);
+
+ // Bitwise nor (A | ~B).
+ void orn(const Register& rd, const Register& rn, const Operand& operand);
+
+ // Bitwise eor/xor (A ^ B).
+ void eor(const Register& rd, const Register& rn, const Operand& operand);
+
+ // Bitwise enor/xnor (A ^ ~B).
+ void eon(const Register& rd, const Register& rn, const Operand& operand);
+
+ // Logical shift left by variable.
+ void lslv(const Register& rd, const Register& rn, const Register& rm);
+
+ // Logical shift right by variable.
+ void lsrv(const Register& rd, const Register& rn, const Register& rm);
+
+ // Arithmetic shift right by variable.
+ void asrv(const Register& rd, const Register& rn, const Register& rm);
+
+ // Rotate right by variable.
+ void rorv(const Register& rd, const Register& rn, const Register& rm);
+
+ // Bitfield instructions.
+ // Bitfield move.
+ void bfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms);
+
+ // Signed bitfield move.
+ void sbfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms);
+
+ // Unsigned bitfield move.
+ void ubfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms);
+
+ // Bfm aliases.
+ // Bitfield insert.
+ void bfi(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(width >= 1);
+ VIXL_ASSERT(lsb + width <= rn.size());
+ bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
+ }
+
+ // Bitfield extract and insert low.
+ void bfxil(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(width >= 1);
+ VIXL_ASSERT(lsb + width <= rn.size());
+ bfm(rd, rn, lsb, lsb + width - 1);
+ }
+
+ // Sbfm aliases.
+ // Arithmetic shift right.
+ void asr(const Register& rd, const Register& rn, unsigned shift) {
+ VIXL_ASSERT(shift < rd.size());
+ sbfm(rd, rn, shift, rd.size() - 1);
+ }
+
+ // Signed bitfield insert with zero at right.
+ void sbfiz(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(width >= 1);
+ VIXL_ASSERT(lsb + width <= rn.size());
+ sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
+ }
+
+ // Signed bitfield extract.
+ void sbfx(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(width >= 1);
+ VIXL_ASSERT(lsb + width <= rn.size());
+ sbfm(rd, rn, lsb, lsb + width - 1);
+ }
+
+ // Signed extend byte.
+ void sxtb(const Register& rd, const Register& rn) {
+ sbfm(rd, rn, 0, 7);
+ }
+
+ // Signed extend halfword.
+ void sxth(const Register& rd, const Register& rn) {
+ sbfm(rd, rn, 0, 15);
+ }
+
+ // Signed extend word.
+ void sxtw(const Register& rd, const Register& rn) {
+ sbfm(rd, rn, 0, 31);
+ }
+
+ // Ubfm aliases.
+ // Logical shift left.
+ void lsl(const Register& rd, const Register& rn, unsigned shift) {
+ unsigned reg_size = rd.size();
+ VIXL_ASSERT(shift < reg_size);
+ ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
+ }
+
+ // Logical shift right.
+ void lsr(const Register& rd, const Register& rn, unsigned shift) {
+ VIXL_ASSERT(shift < rd.size());
+ ubfm(rd, rn, shift, rd.size() - 1);
+ }
+
+ // Unsigned bitfield insert with zero at right.
+ void ubfiz(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(width >= 1);
+ VIXL_ASSERT(lsb + width <= rn.size());
+ ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
+ }
+
+ // Unsigned bitfield extract.
+ void ubfx(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(width >= 1);
+ VIXL_ASSERT(lsb + width <= rn.size());
+ ubfm(rd, rn, lsb, lsb + width - 1);
+ }
+
+ // Unsigned extend byte.
+ void uxtb(const Register& rd, const Register& rn) {
+ ubfm(rd, rn, 0, 7);
+ }
+
+ // Unsigned extend halfword.
+ void uxth(const Register& rd, const Register& rn) {
+ ubfm(rd, rn, 0, 15);
+ }
+
+ // Unsigned extend word.
+ void uxtw(const Register& rd, const Register& rn) {
+ ubfm(rd, rn, 0, 31);
+ }
+
+ // Extract.
+ void extr(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ unsigned lsb);
+
+ // Conditional select: rd = cond ? rn : rm.
+ void csel(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond);
+
+ // Conditional select increment: rd = cond ? rn : rm + 1.
+ void csinc(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond);
+
+ // Conditional select inversion: rd = cond ? rn : ~rm.
+ void csinv(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond);
+
+ // Conditional select negation: rd = cond ? rn : -rm.
+ void csneg(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond);
+
+ // Conditional set: rd = cond ? 1 : 0.
+ void cset(const Register& rd, Condition cond);
+
+ // Conditional set mask: rd = cond ? -1 : 0.
+ void csetm(const Register& rd, Condition cond);
+
+ // Conditional increment: rd = cond ? rn + 1 : rn.
+ void cinc(const Register& rd, const Register& rn, Condition cond);
+
+ // Conditional invert: rd = cond ? ~rn : rn.
+ void cinv(const Register& rd, const Register& rn, Condition cond);
+
+ // Conditional negate: rd = cond ? -rn : rn.
+ void cneg(const Register& rd, const Register& rn, Condition cond);
+
+ // Rotate right.
+ void ror(const Register& rd, const Register& rs, unsigned shift) {
+ extr(rd, rs, rs, shift);
+ }
+
+ // Conditional comparison.
+ // Conditional compare negative.
+ void ccmn(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond);
+
+ // Conditional compare.
+ void ccmp(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond);
+
+ // CRC-32 checksum from byte.
+ void crc32b(const Register& rd,
+ const Register& rn,
+ const Register& rm);
+
+ // CRC-32 checksum from half-word.
+ void crc32h(const Register& rd,
+ const Register& rn,
+ const Register& rm);
+
+ // CRC-32 checksum from word.
+ void crc32w(const Register& rd,
+ const Register& rn,
+ const Register& rm);
+
+ // CRC-32 checksum from double word.
+ void crc32x(const Register& rd,
+ const Register& rn,
+ const Register& rm);
+
+ // CRC-32 C checksum from byte.
+ void crc32cb(const Register& rd,
+ const Register& rn,
+ const Register& rm);
+
+ // CRC-32 C checksum from half-word.
+ void crc32ch(const Register& rd,
+ const Register& rn,
+ const Register& rm);
+
+ // CRC-32 C checksum from word.
+ void crc32cw(const Register& rd,
+ const Register& rn,
+ const Register& rm);
+
+ // CRC-32C checksum from double word.
+ void crc32cx(const Register& rd,
+ const Register& rn,
+ const Register& rm);
+
+ // Multiply.
+ void mul(const Register& rd, const Register& rn, const Register& rm);
+
+ // Negated multiply.
+ void mneg(const Register& rd, const Register& rn, const Register& rm);
+
+ // Signed long multiply: 32 x 32 -> 64-bit.
+ void smull(const Register& rd, const Register& rn, const Register& rm);
+
+ // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
+ void smulh(const Register& xd, const Register& xn, const Register& xm);
+
+ // Multiply and accumulate.
+ void madd(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra);
+
+ // Multiply and subtract.
+ void msub(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra);
+
+ // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
+ void smaddl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra);
+
+ // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
+ void umaddl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra);
+
+ // Unsigned long multiply: 32 x 32 -> 64-bit.
+ void umull(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ umaddl(rd, rn, rm, xzr);
+ }
+
+ // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
+ void umulh(const Register& xd,
+ const Register& xn,
+ const Register& xm);
+
+ // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
+ void smsubl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra);
+
+ // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
+ void umsubl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra);
+
+ // Signed integer divide.
+ void sdiv(const Register& rd, const Register& rn, const Register& rm);
+
+ // Unsigned integer divide.
+ void udiv(const Register& rd, const Register& rn, const Register& rm);
+
+ // Bit reverse.
+ void rbit(const Register& rd, const Register& rn);
+
+ // Reverse bytes in 16-bit half words.
+ void rev16(const Register& rd, const Register& rn);
+
+ // Reverse bytes in 32-bit words.
+ void rev32(const Register& rd, const Register& rn);
+
+ // Reverse bytes.
+ void rev(const Register& rd, const Register& rn);
+
+ // Count leading zeroes.
+ void clz(const Register& rd, const Register& rn);
+
+ // Count leading sign bits.
+ void cls(const Register& rd, const Register& rn);
+
+ // Memory instructions.
+ // Load integer or FP register.
+ void ldr(const CPURegister& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Store integer or FP register.
+ void str(const CPURegister& rt, const MemOperand& dst,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Load word with sign extension.
+ void ldrsw(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Load byte.
+ void ldrb(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Store byte.
+ void strb(const Register& rt, const MemOperand& dst,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Load byte with sign extension.
+ void ldrsb(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Load half-word.
+ void ldrh(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Store half-word.
+ void strh(const Register& rt, const MemOperand& dst,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Load half-word with sign extension.
+ void ldrsh(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Load integer or FP register (with unscaled offset).
+ void ldur(const CPURegister& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Store integer or FP register (with unscaled offset).
+ void stur(const CPURegister& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Load word with sign extension.
+ void ldursw(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Load byte (with unscaled offset).
+ void ldurb(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Store byte (with unscaled offset).
+ void sturb(const Register& rt, const MemOperand& dst,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Load byte with sign extension (and unscaled offset).
+ void ldursb(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Load half-word (with unscaled offset).
+ void ldurh(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Store half-word (with unscaled offset).
+ void sturh(const Register& rt, const MemOperand& dst,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Load half-word with sign extension (and unscaled offset).
+ void ldursh(const Register& rt, const MemOperand& src,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Load integer or FP register pair.
+ void ldp(const CPURegister& rt, const CPURegister& rt2,
+ const MemOperand& src);
+
+ // Store integer or FP register pair.
+ void stp(const CPURegister& rt, const CPURegister& rt2,
+ const MemOperand& dst);
+
+ // Load word pair with sign extension.
+ void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
+
+ // Load integer or FP register pair, non-temporal.
+ void ldnp(const CPURegister& rt, const CPURegister& rt2,
+ const MemOperand& src);
+
+ // Store integer or FP register pair, non-temporal.
+ void stnp(const CPURegister& rt, const CPURegister& rt2,
+ const MemOperand& dst);
+
+ // Load integer or FP register from pc + imm19 << 2.
+ void ldr(const CPURegister& rt, int imm19);
+ static void ldr(Instruction* at, const CPURegister& rt, int imm19);
+
+ // Load word with sign extension from pc + imm19 << 2.
+ void ldrsw(const Register& rt, int imm19);
+
+ // Store exclusive byte.
+ void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
+
+ // Store exclusive half-word.
+ void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
+
+ // Store exclusive register.
+ void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
+
+ // Load exclusive byte.
+ void ldxrb(const Register& rt, const MemOperand& src);
+
+ // Load exclusive half-word.
+ void ldxrh(const Register& rt, const MemOperand& src);
+
+ // Load exclusive register.
+ void ldxr(const Register& rt, const MemOperand& src);
+
+ // Store exclusive register pair.
+ void stxp(const Register& rs,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& dst);
+
+ // Load exclusive register pair.
+ void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
+
+ // Store-release exclusive byte.
+ void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
+
+ // Store-release exclusive half-word.
+ void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
+
+ // Store-release exclusive register.
+ void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
+
+ // Load-acquire exclusive byte.
+ void ldaxrb(const Register& rt, const MemOperand& src);
+
+ // Load-acquire exclusive half-word.
+ void ldaxrh(const Register& rt, const MemOperand& src);
+
+ // Load-acquire exclusive register.
+ void ldaxr(const Register& rt, const MemOperand& src);
+
+ // Store-release exclusive register pair.
+ void stlxp(const Register& rs,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& dst);
+
+ // Load-acquire exclusive register pair.
+ void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
+
+ // Store-release byte.
+ void stlrb(const Register& rt, const MemOperand& dst);
+
+ // Store-release half-word.
+ void stlrh(const Register& rt, const MemOperand& dst);
+
+ // Store-release register.
+ void stlr(const Register& rt, const MemOperand& dst);
+
+ // Load-acquire byte.
+ void ldarb(const Register& rt, const MemOperand& src);
+
+ // Load-acquire half-word.
+ void ldarh(const Register& rt, const MemOperand& src);
+
+ // Load-acquire register.
+ void ldar(const Register& rt, const MemOperand& src);
+
+ // Compare and Swap word or doubleword in memory [Armv8.1].
+ void cas(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap word or doubleword in memory [Armv8.1].
+ void casa(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap word or doubleword in memory [Armv8.1].
+ void casl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap word or doubleword in memory [Armv8.1].
+ void casal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap byte in memory [Armv8.1].
+ void casb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap byte in memory [Armv8.1].
+ void casab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap byte in memory [Armv8.1].
+ void caslb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap byte in memory [Armv8.1].
+ void casalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap halfword in memory [Armv8.1].
+ void cash(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap halfword in memory [Armv8.1].
+ void casah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap halfword in memory [Armv8.1].
+ void caslh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap halfword in memory [Armv8.1].
+ void casalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
+ void casp(const Register& rs,
+ const Register& rs2,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& src);
+
+ // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
+ void caspa(const Register& rs,
+ const Register& rs2,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& src);
+
+ // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
+ void caspl(const Register& rs,
+ const Register& rs2,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& src);
+
+ // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
+ void caspal(const Register& rs,
+ const Register& rs2,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& src);
+
+ // Atomic add on byte in memory [Armv8.1]
+ void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
+ void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
+ void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on byte in memory, with Load-acquire and Store-release semantics
+ // [Armv8.1]
+ void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on halfword in memory [Armv8.1]
+ void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
+ void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
+ void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on halfword in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory [Armv8.1]
+ void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory [Armv8.1]
+ void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
+ void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
+ void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory [Armv8.1]
+ void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory [Armv8.1]
+ void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory [Armv8.1]
+ void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory [Armv8.1]
+ void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
+ void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on byte in memory [Armv8.1]
+ void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
+ void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
+ void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory [Armv8.1]
+ void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
+ void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory [Armv8.1]
+ void ldset(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory [Armv8.1]
+ void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory [Armv8.1]
+ void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory [Armv8.1]
+ void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, with Load-acquire
+ // and Store-release semantics [Armv8.1]
+ void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory [Armv8.1]
+ void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory [Armv8.1]
+ void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory [Armv8.1]
+ void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, with Load-acquire
+ // and Store-release semantics [Armv8.1]
+ void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory [Armv8.1]
+ void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory [Armv8.1]
+ void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
+ void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
+ // and Store-release semantics [Armv8.1]
+ void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory [Armv8.1]
+ void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory [Armv8.1]
+ void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
+ void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
+ // and Store-release semantics [Armv8.1]
+ void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on byte in memory, without return. [Armv8.1]
+ void staddb(const Register& rs, const MemOperand& src);
+
+ // Atomic add on byte in memory, with Store-release semantics and without
+ // return. [Armv8.1]
+ void staddlb(const Register& rs, const MemOperand& src);
+
+ // Atomic add on halfword in memory, without return. [Armv8.1]
+ void staddh(const Register& rs, const MemOperand& src);
+
+ // Atomic add on halfword in memory, with Store-release semantics and without
+ // return. [Armv8.1]
+ void staddlh(const Register& rs, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, without return. [Armv8.1]
+ void stadd(const Register& rs, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void staddl(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, without return. [Armv8.1]
+ void stclrb(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stclrlb(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, without return. [Armv8.1]
+ void stclrh(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stclrlh(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
+ void stclr(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stclrl(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
+ void steorb(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void steorlb(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
+ void steorh(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void steorlh(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void steor(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void steorl(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, without return. [Armv8.1]
+ void stsetb(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, with Store-release semantics and without
+ // return. [Armv8.1]
+ void stsetlb(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, without return. [Armv8.1]
+ void stseth(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stsetlh(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
+ void stset(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stsetl(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, without return. [Armv8.1]
+ void stsmaxb(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stsmaxlb(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
+ void stsmaxh(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void stsmaxlh(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void stsmax(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stsmaxl(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, without return. [Armv8.1]
+ void stsminb(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stsminlb(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
+ void stsminh(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void stsminlh(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void stsmin(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, with Store-release
+ // semantics and without return. semantics [Armv8.1]
+ void stsminl(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
+ void stumaxb(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stumaxlb(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
+ void stumaxh(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void stumaxlh(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void stumax(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stumaxl(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
+ void stuminb(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stuminlb(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
+ void stuminh(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void stuminlh(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void stumin(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stuminl(const Register& rs, const MemOperand& src);
+
+ // Swap byte in memory [Armv8.1]
+ void swpb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap byte in memory, with Load-acquire semantics [Armv8.1]
+ void swpab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap byte in memory, with Store-release semantics [Armv8.1]
+ void swplb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap byte in memory, with Load-acquire and Store-release semantics
+ // [Armv8.1]
+ void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap halfword in memory [Armv8.1]
+ void swph(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
+ void swpah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap halfword in memory, with Store-release semantics [Armv8.1]
+ void swplh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap halfword in memory, with Load-acquire and Store-release semantics
+ // [Armv8.1]
+ void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap word or doubleword in memory [Armv8.1]
+ void swp(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
+ void swpa(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
+ void swpl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap word or doubleword in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void swpal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Prefetch memory.
+ void prfm(PrefetchOperation op, const MemOperand& addr,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Prefetch memory (with unscaled offset).
+ void prfum(PrefetchOperation op, const MemOperand& addr,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Prefetch from pc + imm19 << 2.
+ void prfm(PrefetchOperation op, int imm19);
+
+ // Move instructions. The default shift of -1 indicates that the move
+ // instruction will calculate an appropriate 16-bit immediate and left shift
+ // that is equal to the 64-bit immediate argument. If an explicit left shift
+ // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
+ //
+ // For movk, an explicit shift can be used to indicate which half word should
+ // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
+ // half word with zero, whereas movk(x0, 0, 48) will overwrite the
+ // most-significant.
+
+ // Move immediate and keep.
+ void movk(const Register& rd, uint64_t imm, int shift = -1) {
+ MoveWide(rd, imm, shift, MOVK);
+ }
+
+ // Move inverted immediate.
+ void movn(const Register& rd, uint64_t imm, int shift = -1) {
+ MoveWide(rd, imm, shift, MOVN);
+ }
+
+ // Move immediate.
+ void movz(const Register& rd, uint64_t imm, int shift = -1) {
+ MoveWide(rd, imm, shift, MOVZ);
+ }
+
+ // Misc instructions.
+ // Monitor debug-mode breakpoint.
+ void brk(int code);
+
+ // Halting debug-mode breakpoint.
+ void hlt(int code);
+
+ // Generate exception targeting EL1.
+ void svc(int code);
+ static void svc(Instruction* at, int code);
+
+ // Move register to register.
+ void mov(const Register& rd, const Register& rn);
+
+ // Move inverted operand to register.
+ void mvn(const Register& rd, const Operand& operand);
+
+ // System instructions.
+ // Move to register from system register.
+ void mrs(const Register& rt, SystemRegister sysreg);
+
+ // Move from register to system register.
+ void msr(SystemRegister sysreg, const Register& rt);
+
+ // System instruction.
+ void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr);
+
+ // System instruction with pre-encoded op (op1:crn:crm:op2).
+ void sys(int op, const Register& rt = xzr);
+
+ // System data cache operation.
+ void dc(DataCacheOp op, const Register& rt);
+
+ // System instruction cache operation.
+ void ic(InstructionCacheOp op, const Register& rt);
+
+ // System hint.
+ BufferOffset hint(SystemHint code);
+ static void hint(Instruction* at, SystemHint code);
+
+ // Clear exclusive monitor.
+ void clrex(int imm4 = 0xf);
+
+ // Data memory barrier.
+ void dmb(BarrierDomain domain, BarrierType type);
+
+ // Data synchronization barrier.
+ void dsb(BarrierDomain domain, BarrierType type);
+
+ // Instruction synchronization barrier.
+ void isb();
+
+ // Alias for system instructions.
+ // No-op.
+ BufferOffset nop() {
+ return hint(NOP);
+ }
+ static void nop(Instruction* at);
+
+ // Alias for system instructions.
+ // Conditional speculation barrier.
+ BufferOffset csdb() {
+ return hint(CSDB);
+ }
+ static void csdb(Instruction* at);
+
+ // FP and NEON instructions.
+ // Move double precision immediate to FP register.
+ void fmov(const VRegister& vd, double imm);
+
+ // Move single precision immediate to FP register.
+ void fmov(const VRegister& vd, float imm);
+
+ // Move FP register to register.
+ void fmov(const Register& rd, const VRegister& fn);
+
+ // Move register to FP register.
+ void fmov(const VRegister& vd, const Register& rn);
+
+ // Move FP register to FP register.
+ void fmov(const VRegister& vd, const VRegister& fn);
+
+ // Move 64-bit register to top half of 128-bit FP register.
+ void fmov(const VRegister& vd, int index, const Register& rn);
+
+ // Move top half of 128-bit FP register to 64-bit register.
+ void fmov(const Register& rd, const VRegister& vn, int index);
+
+ // FP add.
+ void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // FP subtract.
+ void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // FP multiply.
+ void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // FP fused multiply-add.
+ void fmadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va);
+
+ // FP fused multiply-subtract.
+ void fmsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va);
+
+ // FP fused multiply-add and negate.
+ void fnmadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va);
+
+ // FP fused multiply-subtract and negate.
+ void fnmsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va);
+
+ // FP multiply-negate scalar.
+ void fnmul(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP reciprocal exponent scalar.
+ void frecpx(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP divide.
+ void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+ // FP maximum.
+ void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+ // FP minimum.
+ void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+ // FP maximum number.
+ void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+ // FP minimum number.
+ void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
+
+ // FP absolute.
+ void fabs(const VRegister& vd, const VRegister& vn);
+
+ // FP negate.
+ void fneg(const VRegister& vd, const VRegister& vn);
+
+ // FP square root.
+ void fsqrt(const VRegister& vd, const VRegister& vn);
+
+ // FP round to integer, nearest with ties to away.
+ void frinta(const VRegister& vd, const VRegister& vn);
+
+ // FP round to integer, implicit rounding.
+ void frinti(const VRegister& vd, const VRegister& vn);
+
+ // FP round to integer, toward minus infinity.
+ void frintm(const VRegister& vd, const VRegister& vn);
+
+ // FP round to integer, nearest with ties to even.
+ void frintn(const VRegister& vd, const VRegister& vn);
+
+ // FP round to integer, toward plus infinity.
+ void frintp(const VRegister& vd, const VRegister& vn);
+
+ // FP round to integer, exact, implicit rounding.
+ void frintx(const VRegister& vd, const VRegister& vn);
+
+ // FP round to integer, towards zero.
+ void frintz(const VRegister& vd, const VRegister& vn);
+
+ void FPCompareMacro(const VRegister& vn,
+ double value,
+ FPTrapFlags trap);
+
+ void FPCompareMacro(const VRegister& vn,
+ const VRegister& vm,
+ FPTrapFlags trap);
+
+ // FP compare registers.
+ void fcmp(const VRegister& vn, const VRegister& vm);
+
+ // FP compare immediate.
+ void fcmp(const VRegister& vn, double value);
+
+ void FPCCompareMacro(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond,
+ FPTrapFlags trap);
+
+ // FP conditional compare.
+ void fccmp(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond);
+
+ // FP signaling compare registers.
+ void fcmpe(const VRegister& vn, const VRegister& vm);
+
+ // FP signaling compare immediate.
+ void fcmpe(const VRegister& vn, double value);
+
+ // FP conditional signaling compare.
+ void fccmpe(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond);
+
+ // FP conditional select.
+ void fcsel(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ Condition cond);
+
+ // Common FP Convert functions.
+ void NEONFPConvertToInt(const Register& rd,
+ const VRegister& vn,
+ Instr op);
+ void NEONFPConvertToInt(const VRegister& vd,
+ const VRegister& vn,
+ Instr op);
+
+ // FP convert between precisions.
+ void fcvt(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to higher precision.
+ void fcvtl(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to higher precision (second part).
+ void fcvtl2(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to lower precision.
+ void fcvtn(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to lower prevision (second part).
+ void fcvtn2(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to lower precision, rounding to odd.
+ void fcvtxn(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to lower precision, rounding to odd (second part).
+ void fcvtxn2(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to signed integer, nearest with ties to away.
+ void fcvtas(const Register& rd, const VRegister& vn);
+
+ // FP convert to unsigned integer, nearest with ties to away.
+ void fcvtau(const Register& rd, const VRegister& vn);
+
+ // FP convert to signed integer, nearest with ties to away.
+ void fcvtas(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to unsigned integer, nearest with ties to away.
+ void fcvtau(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to signed integer, round towards -infinity.
+ void fcvtms(const Register& rd, const VRegister& vn);
+
+ // FP convert to unsigned integer, round towards -infinity.
+ void fcvtmu(const Register& rd, const VRegister& vn);
+
+ // FP convert to signed integer, round towards -infinity.
+ void fcvtms(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to unsigned integer, round towards -infinity.
+ void fcvtmu(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to signed integer, nearest with ties to even.
+ void fcvtns(const Register& rd, const VRegister& vn);
+
+ // FP convert to unsigned integer, nearest with ties to even.
+ void fcvtnu(const Register& rd, const VRegister& vn);
+
+ // FP convert to signed integer, nearest with ties to even.
+ void fcvtns(const VRegister& rd, const VRegister& vn);
+
+ // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
+ void fjcvtzs(const Register& rd, const VRegister& vn);
+
+ // FP convert to unsigned integer, nearest with ties to even.
+ void fcvtnu(const VRegister& rd, const VRegister& vn);
+
+ // FP convert to signed integer or fixed-point, round towards zero.
+ void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
+
+ // FP convert to unsigned integer or fixed-point, round towards zero.
+ void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
+
+ // FP convert to signed integer or fixed-point, round towards zero.
+ void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
+
+ // FP convert to unsigned integer or fixed-point, round towards zero.
+ void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
+
+ // FP convert to signed integer, round towards +infinity.
+ void fcvtps(const Register& rd, const VRegister& vn);
+
+ // FP convert to unsigned integer, round towards +infinity.
+ void fcvtpu(const Register& rd, const VRegister& vn);
+
+ // FP convert to signed integer, round towards +infinity.
+ void fcvtps(const VRegister& vd, const VRegister& vn);
+
+ // FP convert to unsigned integer, round towards +infinity.
+ void fcvtpu(const VRegister& vd, const VRegister& vn);
+
+ // Convert signed integer or fixed point to FP.
+ void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
+
+ // Convert unsigned integer or fixed point to FP.
+ void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
+
+ // Convert signed integer or fixed-point to FP.
+ void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
+
+ // Convert unsigned integer or fixed-point to FP.
+ void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
+
+ // Unsigned absolute difference.
+ void uabd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed absolute difference.
+ void sabd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned absolute difference and accumulate.
+ void uaba(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed absolute difference and accumulate.
+ void saba(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Add.
+ void add(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Subtract.
+ void sub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned halving add.
+ void uhadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed halving add.
+ void shadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned rounding halving add.
+ void urhadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed rounding halving add.
+ void srhadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned halving sub.
+ void uhsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed halving sub.
+ void shsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned saturating add.
+ void uqadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating add.
+ void sqadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned saturating subtract.
+ void uqsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating subtract.
+ void sqsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Add pairwise.
+ void addp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Add pair of elements scalar.
+ void addp(const VRegister& vd,
+ const VRegister& vn);
+
+ // Multiply-add to accumulator.
+ void mla(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Multiply-subtract to accumulator.
+ void mls(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Multiply.
+ void mul(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Multiply by scalar element.
+ void mul(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Multiply-add by scalar element.
+ void mla(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Multiply-subtract by scalar element.
+ void mls(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed long multiply-add by scalar element.
+ void smlal(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed long multiply-add by scalar element (second part).
+ void smlal2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Unsigned long multiply-add by scalar element.
+ void umlal(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Unsigned long multiply-add by scalar element (second part).
+ void umlal2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed long multiply-sub by scalar element.
+ void smlsl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed long multiply-sub by scalar element (second part).
+ void smlsl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Unsigned long multiply-sub by scalar element.
+ void umlsl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Unsigned long multiply-sub by scalar element (second part).
+ void umlsl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed long multiply by scalar element.
+ void smull(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed long multiply by scalar element (second part).
+ void smull2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Unsigned long multiply by scalar element.
+ void umull(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Unsigned long multiply by scalar element (second part).
+ void umull2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed saturating double long multiply by element.
+ void sqdmull(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed saturating double long multiply by element (second part).
+ void sqdmull2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed saturating doubling long multiply-add by element.
+ void sqdmlal(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed saturating doubling long multiply-add by element (second part).
+ void sqdmlal2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed saturating doubling long multiply-sub by element.
+ void sqdmlsl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed saturating doubling long multiply-sub by element (second part).
+ void sqdmlsl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Compare equal.
+ void cmeq(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Compare signed greater than or equal.
+ void cmge(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Compare signed greater than.
+ void cmgt(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Compare unsigned higher.
+ void cmhi(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Compare unsigned higher or same.
+ void cmhs(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Compare bitwise test bits nonzero.
+ void cmtst(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Compare bitwise to zero.
+ void cmeq(const VRegister& vd,
+ const VRegister& vn,
+ int value);
+
+ // Compare signed greater than or equal to zero.
+ void cmge(const VRegister& vd,
+ const VRegister& vn,
+ int value);
+
+ // Compare signed greater than zero.
+ void cmgt(const VRegister& vd,
+ const VRegister& vn,
+ int value);
+
+ // Compare signed less than or equal to zero.
+ void cmle(const VRegister& vd,
+ const VRegister& vn,
+ int value);
+
+ // Compare signed less than zero.
+ void cmlt(const VRegister& vd,
+ const VRegister& vn,
+ int value);
+
+ // Signed shift left by register.
+ void sshl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned shift left by register.
+ void ushl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating shift left by register.
+ void sqshl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned saturating shift left by register.
+ void uqshl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed rounding shift left by register.
+ void srshl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned rounding shift left by register.
+ void urshl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating rounding shift left by register.
+ void sqrshl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned saturating rounding shift left by register.
+ void uqrshl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Bitwise and.
+ void and_(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Bitwise or.
+ void orr(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Bitwise or immediate.
+ void orr(const VRegister& vd,
+ const int imm8,
+ const int left_shift = 0);
+
+ // Move register to register.
+ void mov(const VRegister& vd,
+ const VRegister& vn);
+
+ // Bitwise orn.
+ void orn(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Bitwise eor.
+ void eor(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Bit clear immediate.
+ void bic(const VRegister& vd,
+ const int imm8,
+ const int left_shift = 0);
+
+ // Bit clear.
+ void bic(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Bitwise insert if false.
+ void bif(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Bitwise insert if true.
+ void bit(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Bitwise select.
+ void bsl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Polynomial multiply.
+ void pmul(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Vector move immediate.
+ void movi(const VRegister& vd,
+ const uint64_t imm,
+ Shift shift = LSL,
+ const int shift_amount = 0);
+
+ // Bitwise not.
+ void mvn(const VRegister& vd,
+ const VRegister& vn);
+
+ // Vector move inverted immediate.
+ void mvni(const VRegister& vd,
+ const int imm8,
+ Shift shift = LSL,
+ const int shift_amount = 0);
+
+ // Signed saturating accumulate of unsigned value.
+ void suqadd(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned saturating accumulate of signed value.
+ void usqadd(const VRegister& vd,
+ const VRegister& vn);
+
+ // Absolute value.
+ void abs(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed saturating absolute value.
+ void sqabs(const VRegister& vd,
+ const VRegister& vn);
+
+ // Negate.
+ void neg(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed saturating negate.
+ void sqneg(const VRegister& vd,
+ const VRegister& vn);
+
+ // Bitwise not.
+ void not_(const VRegister& vd,
+ const VRegister& vn);
+
+ // Extract narrow.
+ void xtn(const VRegister& vd,
+ const VRegister& vn);
+
+ // Extract narrow (second part).
+ void xtn2(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed saturating extract narrow.
+ void sqxtn(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed saturating extract narrow (second part).
+ void sqxtn2(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned saturating extract narrow.
+ void uqxtn(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned saturating extract narrow (second part).
+ void uqxtn2(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed saturating extract unsigned narrow.
+ void sqxtun(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed saturating extract unsigned narrow (second part).
+ void sqxtun2(const VRegister& vd,
+ const VRegister& vn);
+
+ // Extract vector from pair of vectors.
+ void ext(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int index);
+
+ // Duplicate vector element to vector or scalar.
+ void dup(const VRegister& vd,
+ const VRegister& vn,
+ int vn_index);
+
+ // Move vector element to scalar.
+ void mov(const VRegister& vd,
+ const VRegister& vn,
+ int vn_index);
+
+ // Duplicate general-purpose register to vector.
+ void dup(const VRegister& vd,
+ const Register& rn);
+
+ // Insert vector element from another vector element.
+ void ins(const VRegister& vd,
+ int vd_index,
+ const VRegister& vn,
+ int vn_index);
+
+ // Move vector element to another vector element.
+ void mov(const VRegister& vd,
+ int vd_index,
+ const VRegister& vn,
+ int vn_index);
+
+ // Insert vector element from general-purpose register.
+ void ins(const VRegister& vd,
+ int vd_index,
+ const Register& rn);
+
+ // Move general-purpose register to a vector element.
+ void mov(const VRegister& vd,
+ int vd_index,
+ const Register& rn);
+
+ // Unsigned move vector element to general-purpose register.
+ void umov(const Register& rd,
+ const VRegister& vn,
+ int vn_index);
+
+ // Move vector element to general-purpose register.
+ void mov(const Register& rd,
+ const VRegister& vn,
+ int vn_index);
+
+ // Signed move vector element to general-purpose register.
+ void smov(const Register& rd,
+ const VRegister& vn,
+ int vn_index);
+
+ // One-element structure load to one register.
+ void ld1(const VRegister& vt,
+ const MemOperand& src);
+
+ // One-element structure load to two registers.
+ void ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src);
+
+ // One-element structure load to three registers.
+ void ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src);
+
+ // One-element structure load to four registers.
+ void ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src);
+
+ // One-element single structure load to one lane.
+ void ld1(const VRegister& vt,
+ int lane,
+ const MemOperand& src);
+
+ // One-element single structure load to all lanes.
+ void ld1r(const VRegister& vt,
+ const MemOperand& src);
+
+ // Two-element structure load.
+ void ld2(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src);
+
+ // Two-element single structure load to one lane.
+ void ld2(const VRegister& vt,
+ const VRegister& vt2,
+ int lane,
+ const MemOperand& src);
+
+ // Two-element single structure load to all lanes.
+ void ld2r(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src);
+
+ // Three-element structure load.
+ void ld3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src);
+
+ // Three-element single structure load to one lane.
+ void ld3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ int lane,
+ const MemOperand& src);
+
+ // Three-element single structure load to all lanes.
+ void ld3r(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src);
+
+ // Four-element structure load.
+ void ld4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src);
+
+ // Four-element single structure load to one lane.
+ void ld4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ int lane,
+ const MemOperand& src);
+
+ // Four-element single structure load to all lanes.
+ void ld4r(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src);
+
+ // Count leading sign bits.
+ void cls(const VRegister& vd,
+ const VRegister& vn);
+
+ // Count leading zero bits (vector).
+ void clz(const VRegister& vd,
+ const VRegister& vn);
+
+ // Population count per byte.
+ void cnt(const VRegister& vd,
+ const VRegister& vn);
+
+ // Reverse bit order.
+ void rbit(const VRegister& vd,
+ const VRegister& vn);
+
+ // Reverse elements in 16-bit halfwords.
+ void rev16(const VRegister& vd,
+ const VRegister& vn);
+
+ // Reverse elements in 32-bit words.
+ void rev32(const VRegister& vd,
+ const VRegister& vn);
+
+ // Reverse elements in 64-bit doublewords.
+ void rev64(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned reciprocal square root estimate.
+ void ursqrte(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned reciprocal estimate.
+ void urecpe(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed pairwise long add.
+ void saddlp(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned pairwise long add.
+ void uaddlp(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed pairwise long add and accumulate.
+ void sadalp(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned pairwise long add and accumulate.
+ void uadalp(const VRegister& vd,
+ const VRegister& vn);
+
+ // Shift left by immediate.
+ void shl(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed saturating shift left by immediate.
+ void sqshl(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed saturating shift left unsigned by immediate.
+ void sqshlu(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned saturating shift left by immediate.
+ void uqshl(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed shift left long by immediate.
+ void sshll(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed shift left long by immediate (second part).
+ void sshll2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed extend long.
+ void sxtl(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed extend long (second part).
+ void sxtl2(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned shift left long by immediate.
+ void ushll(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned shift left long by immediate (second part).
+ void ushll2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Shift left long by element size.
+ void shll(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Shift left long by element size (second part).
+ void shll2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned extend long.
+ void uxtl(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned extend long (second part).
+ void uxtl2(const VRegister& vd,
+ const VRegister& vn);
+
+ // Shift left by immediate and insert.
+ void sli(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Shift right by immediate and insert.
+ void sri(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed maximum.
+ void smax(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed pairwise maximum.
+ void smaxp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Add across vector.
+ void addv(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed add long across vector.
+ void saddlv(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned add long across vector.
+ void uaddlv(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP maximum number across vector.
+ void fmaxnmv(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP maximum across vector.
+ void fmaxv(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP minimum number across vector.
+ void fminnmv(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP minimum across vector.
+ void fminv(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed maximum across vector.
+ void smaxv(const VRegister& vd,
+ const VRegister& vn);
+
+ // Signed minimum.
+ void smin(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed minimum pairwise.
+ void sminp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed minimum across vector.
+ void sminv(const VRegister& vd,
+ const VRegister& vn);
+
+ // One-element structure store from one register.
+ void st1(const VRegister& vt,
+ const MemOperand& src);
+
+ // One-element structure store from two registers.
+ void st1(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src);
+
+ // One-element structure store from three registers.
+ void st1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src);
+
+ // One-element structure store from four registers.
+ void st1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src);
+
+ // One-element single structure store from one lane.
+ void st1(const VRegister& vt,
+ int lane,
+ const MemOperand& src);
+
+ // Two-element structure store from two registers.
+ void st2(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src);
+
+ // Two-element single structure store from two lanes.
+ void st2(const VRegister& vt,
+ const VRegister& vt2,
+ int lane,
+ const MemOperand& src);
+
+ // Three-element structure store from three registers.
+ void st3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src);
+
+ // Three-element single structure store from three lanes.
+ void st3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ int lane,
+ const MemOperand& src);
+
+ // Four-element structure store from four registers.
+ void st4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src);
+
+ // Four-element single structure store from four lanes.
+ void st4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ int lane,
+ const MemOperand& src);
+
+ // Unsigned add long.
+ void uaddl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned add long (second part).
+ void uaddl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned add wide.
+ void uaddw(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned add wide (second part).
+ void uaddw2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed add long.
+ void saddl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed add long (second part).
+ void saddl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed add wide.
+ void saddw(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed add wide (second part).
+ void saddw2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned subtract long.
+ void usubl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned subtract long (second part).
+ void usubl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned subtract wide.
+ void usubw(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned subtract wide (second part).
+ void usubw2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed subtract long.
+ void ssubl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed subtract long (second part).
+ void ssubl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed integer subtract wide.
+ void ssubw(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed integer subtract wide (second part).
+ void ssubw2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned maximum.
+ void umax(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned pairwise maximum.
+ void umaxp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned maximum across vector.
+ void umaxv(const VRegister& vd,
+ const VRegister& vn);
+
+ // Unsigned minimum.
+ void umin(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned pairwise minimum.
+ void uminp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned minimum across vector.
+ void uminv(const VRegister& vd,
+ const VRegister& vn);
+
+ // Transpose vectors (primary).
+ void trn1(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Transpose vectors (secondary).
+ void trn2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unzip vectors (primary).
+ void uzp1(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unzip vectors (secondary).
+ void uzp2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Zip vectors (primary).
+ void zip1(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Zip vectors (secondary).
+ void zip2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed shift right by immediate.
+ void sshr(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned shift right by immediate.
+ void ushr(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed rounding shift right by immediate.
+ void srshr(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned rounding shift right by immediate.
+ void urshr(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed shift right by immediate and accumulate.
+ void ssra(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned shift right by immediate and accumulate.
+ void usra(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed rounding shift right by immediate and accumulate.
+ void srsra(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned rounding shift right by immediate and accumulate.
+ void ursra(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Shift right narrow by immediate.
+ void shrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Shift right narrow by immediate (second part).
+ void shrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Rounding shift right narrow by immediate.
+ void rshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Rounding shift right narrow by immediate (second part).
+ void rshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned saturating shift right narrow by immediate.
+ void uqshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned saturating shift right narrow by immediate (second part).
+ void uqshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned saturating rounding shift right narrow by immediate.
+ void uqrshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Unsigned saturating rounding shift right narrow by immediate (second part).
+ void uqrshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed saturating shift right narrow by immediate.
+ void sqshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed saturating shift right narrow by immediate (second part).
+ void sqshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed saturating rounded shift right narrow by immediate.
+ void sqrshrn(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed saturating rounded shift right narrow by immediate (second part).
+ void sqrshrn2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed saturating shift right unsigned narrow by immediate.
+ void sqshrun(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed saturating shift right unsigned narrow by immediate (second part).
+ void sqshrun2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed sat rounded shift right unsigned narrow by immediate.
+ void sqrshrun(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // Signed sat rounded shift right unsigned narrow by immediate (second part).
+ void sqrshrun2(const VRegister& vd,
+ const VRegister& vn,
+ int shift);
+
+ // FP reciprocal step.
+ void frecps(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP reciprocal estimate.
+ void frecpe(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP reciprocal square root estimate.
+ void frsqrte(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP reciprocal square root step.
+ void frsqrts(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed absolute difference and accumulate long.
+ void sabal(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed absolute difference and accumulate long (second part).
+ void sabal2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned absolute difference and accumulate long.
+ void uabal(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned absolute difference and accumulate long (second part).
+ void uabal2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed absolute difference long.
+ void sabdl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed absolute difference long (second part).
+ void sabdl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned absolute difference long.
+ void uabdl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned absolute difference long (second part).
+ void uabdl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Polynomial multiply long.
+ void pmull(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Polynomial multiply long (second part).
+ void pmull2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed long multiply-add.
+ void smlal(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed long multiply-add (second part).
+ void smlal2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned long multiply-add.
+ void umlal(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned long multiply-add (second part).
+ void umlal2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed long multiply-sub.
+ void smlsl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed long multiply-sub (second part).
+ void smlsl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned long multiply-sub.
+ void umlsl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned long multiply-sub (second part).
+ void umlsl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed long multiply.
+ void smull(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed long multiply (second part).
+ void smull2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating doubling long multiply-add.
+ void sqdmlal(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating doubling long multiply-add (second part).
+ void sqdmlal2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating doubling long multiply-subtract.
+ void sqdmlsl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating doubling long multiply-subtract (second part).
+ void sqdmlsl2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating doubling long multiply.
+ void sqdmull(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating doubling long multiply (second part).
+ void sqdmull2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating doubling multiply returning high half.
+ void sqdmulh(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating rounding doubling multiply returning high half.
+ void sqrdmulh(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Signed saturating doubling multiply element returning high half.
+ void sqdmulh(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Signed saturating rounding doubling multiply element returning high half.
+ void sqrdmulh(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Unsigned long multiply long.
+ void umull(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Unsigned long multiply (second part).
+ void umull2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Add narrow returning high half.
+ void addhn(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Add narrow returning high half (second part).
+ void addhn2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Rounding add narrow returning high half.
+ void raddhn(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Rounding add narrow returning high half (second part).
+ void raddhn2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Subtract narrow returning high half.
+ void subhn(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Subtract narrow returning high half (second part).
+ void subhn2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Rounding subtract narrow returning high half.
+ void rsubhn(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // Rounding subtract narrow returning high half (second part).
+ void rsubhn2(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP vector multiply accumulate.
+ void fmla(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP vector multiply subtract.
+ void fmls(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP vector multiply extended.
+ void fmulx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP absolute greater than or equal.
+ void facge(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP absolute greater than.
+ void facgt(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP multiply by element.
+ void fmul(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // FP fused multiply-add to accumulator by element.
+ void fmla(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // FP fused multiply-sub from accumulator by element.
+ void fmls(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // FP multiply extended by element.
+ void fmulx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // FP compare equal.
+ void fcmeq(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP greater than.
+ void fcmgt(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP greater than or equal.
+ void fcmge(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP compare equal to zero.
+ void fcmeq(const VRegister& vd,
+ const VRegister& vn,
+ double imm);
+
+ // FP greater than zero.
+ void fcmgt(const VRegister& vd,
+ const VRegister& vn,
+ double imm);
+
+ // FP greater than or equal to zero.
+ void fcmge(const VRegister& vd,
+ const VRegister& vn,
+ double imm);
+
+ // FP less than or equal to zero.
+ void fcmle(const VRegister& vd,
+ const VRegister& vn,
+ double imm);
+
+ // FP less than to zero.
+ void fcmlt(const VRegister& vd,
+ const VRegister& vn,
+ double imm);
+
+ // FP absolute difference.
+ void fabd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP pairwise add vector.
+ void faddp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP pairwise add scalar.
+ void faddp(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP pairwise maximum vector.
+ void fmaxp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP pairwise maximum scalar.
+ void fmaxp(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP pairwise minimum vector.
+ void fminp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP pairwise minimum scalar.
+ void fminp(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP pairwise maximum number vector.
+ void fmaxnmp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP pairwise maximum number scalar.
+ void fmaxnmp(const VRegister& vd,
+ const VRegister& vn);
+
+ // FP pairwise minimum number vector.
+ void fminnmp(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm);
+
+ // FP pairwise minimum number scalar.
+ void fminnmp(const VRegister& vd,
+ const VRegister& vn);
+
+ // Emit generic instructions.
+ // Emit raw instructions into the instruction stream.
+ void dci(Instr raw_inst) { Emit(raw_inst); }
+
+ // Emit 32 bits of data into the instruction stream.
+ void dc32(uint32_t data) {
+ EmitData(&data, sizeof(data));
+ }
+
+ // Emit 64 bits of data into the instruction stream.
+ void dc64(uint64_t data) {
+ EmitData(&data, sizeof(data));
+ }
+
+ // Code generation helpers.
+
+ // Register encoding.
+ static Instr Rd(CPURegister rd) {
+ VIXL_ASSERT(rd.code() != kSPRegInternalCode);
+ return rd.code() << Rd_offset;
+ }
+
+ static Instr Rn(CPURegister rn) {
+ VIXL_ASSERT(rn.code() != kSPRegInternalCode);
+ return rn.code() << Rn_offset;
+ }
+
+ static Instr Rm(CPURegister rm) {
+ VIXL_ASSERT(rm.code() != kSPRegInternalCode);
+ return rm.code() << Rm_offset;
+ }
+
+ static Instr RmNot31(CPURegister rm) {
+ VIXL_ASSERT(rm.code() != kSPRegInternalCode);
+ VIXL_ASSERT(!rm.IsZero());
+ return Rm(rm);
+ }
+
+ static Instr Ra(CPURegister ra) {
+ VIXL_ASSERT(ra.code() != kSPRegInternalCode);
+ return ra.code() << Ra_offset;
+ }
+
+ static Instr Rt(CPURegister rt) {
+ VIXL_ASSERT(rt.code() != kSPRegInternalCode);
+ return rt.code() << Rt_offset;
+ }
+
+ static Instr Rt2(CPURegister rt2) {
+ VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
+ return rt2.code() << Rt2_offset;
+ }
+
+ static Instr Rs(CPURegister rs) {
+ VIXL_ASSERT(rs.code() != kSPRegInternalCode);
+ return rs.code() << Rs_offset;
+ }
+
+ // These encoding functions allow the stack pointer to be encoded, and
+ // disallow the zero register.
+ static Instr RdSP(Register rd) {
+ VIXL_ASSERT(!rd.IsZero());
+ return (rd.code() & kRegCodeMask) << Rd_offset;
+ }
+
+ static Instr RnSP(Register rn) {
+ VIXL_ASSERT(!rn.IsZero());
+ return (rn.code() & kRegCodeMask) << Rn_offset;
+ }
+
+ // Flags encoding.
+ static Instr Flags(FlagsUpdate S) {
+ if (S == SetFlags) {
+ return 1 << FlagsUpdate_offset;
+ } else if (S == LeaveFlags) {
+ return 0 << FlagsUpdate_offset;
+ }
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+
+ static Instr Cond(Condition cond) {
+ return cond << Condition_offset;
+ }
+
+ // PC-relative address encoding.
+ static Instr ImmPCRelAddress(int imm21) {
+ VIXL_ASSERT(IsInt21(imm21));
+ Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
+ Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
+ Instr immlo = imm << ImmPCRelLo_offset;
+ return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
+ }
+
+ // Branch encoding.
+ static Instr ImmUncondBranch(int imm26) {
+ VIXL_ASSERT(IsInt26(imm26));
+ return TruncateToUint26(imm26) << ImmUncondBranch_offset;
+ }
+
+ static Instr ImmCondBranch(int imm19) {
+ VIXL_ASSERT(IsInt19(imm19));
+ return TruncateToUint19(imm19) << ImmCondBranch_offset;
+ }
+
+ static Instr ImmCmpBranch(int imm19) {
+ VIXL_ASSERT(IsInt19(imm19));
+ return TruncateToUint19(imm19) << ImmCmpBranch_offset;
+ }
+
+ static Instr ImmTestBranch(int imm14) {
+ VIXL_ASSERT(IsInt14(imm14));
+ return TruncateToUint14(imm14) << ImmTestBranch_offset;
+ }
+
+ static Instr ImmTestBranchBit(unsigned bit_pos) {
+ VIXL_ASSERT(IsUint6(bit_pos));
+ // Subtract five from the shift offset, as we need bit 5 from bit_pos.
+ unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
+ unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
+ b5 &= ImmTestBranchBit5_mask;
+ b40 &= ImmTestBranchBit40_mask;
+ return b5 | b40;
+ }
+
+ // Data Processing encoding.
+ static Instr SF(Register rd) {
+ return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
+ }
+
+ static Instr ImmAddSub(int imm) {
+ VIXL_ASSERT(IsImmAddSub(imm));
+ if (IsUint12(imm)) { // No shift required.
+ imm <<= ImmAddSub_offset;
+ } else {
+ imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
+ }
+ return imm;
+ }
+
+ static Instr ImmS(unsigned imms, unsigned reg_size) {
+ VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
+ ((reg_size == kWRegSize) && IsUint5(imms)));
+ USE(reg_size);
+ return imms << ImmS_offset;
+ }
+
+ static Instr ImmR(unsigned immr, unsigned reg_size) {
+ VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
+ ((reg_size == kWRegSize) && IsUint5(immr)));
+ USE(reg_size);
+ VIXL_ASSERT(IsUint6(immr));
+ return immr << ImmR_offset;
+ }
+
+ static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
+ VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+ VIXL_ASSERT(IsUint6(imms));
+ VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
+ USE(reg_size);
+ return imms << ImmSetBits_offset;
+ }
+
+ static Instr ImmRotate(unsigned immr, unsigned reg_size) {
+ VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+ VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
+ ((reg_size == kWRegSize) && IsUint5(immr)));
+ USE(reg_size);
+ return immr << ImmRotate_offset;
+ }
+
+ static Instr ImmLLiteral(int imm19) {
+ VIXL_ASSERT(IsInt19(imm19));
+ return TruncateToUint19(imm19) << ImmLLiteral_offset;
+ }
+
+ static Instr BitN(unsigned bitn, unsigned reg_size) {
+ VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+ VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
+ USE(reg_size);
+ return bitn << BitN_offset;
+ }
+
+ static Instr ShiftDP(Shift shift) {
+ VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
+ return shift << ShiftDP_offset;
+ }
+
+ static Instr ImmDPShift(unsigned amount) {
+ VIXL_ASSERT(IsUint6(amount));
+ return amount << ImmDPShift_offset;
+ }
+
+ static Instr ExtendMode(Extend extend) {
+ return extend << ExtendMode_offset;
+ }
+
+ static Instr ImmExtendShift(unsigned left_shift) {
+ VIXL_ASSERT(left_shift <= 4);
+ return left_shift << ImmExtendShift_offset;
+ }
+
+ static Instr ImmCondCmp(unsigned imm) {
+ VIXL_ASSERT(IsUint5(imm));
+ return imm << ImmCondCmp_offset;
+ }
+
+ static Instr Nzcv(StatusFlags nzcv) {
+ return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
+ }
+
+ // MemOperand offset encoding.
+ static Instr ImmLSUnsigned(int imm12) {
+ VIXL_ASSERT(IsUint12(imm12));
+ return imm12 << ImmLSUnsigned_offset;
+ }
+
+ static Instr ImmLS(int imm9) {
+ VIXL_ASSERT(IsInt9(imm9));
+ return TruncateToUint9(imm9) << ImmLS_offset;
+ }
+
+ static Instr ImmLSPair(int imm7, unsigned access_size) {
+ VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7);
+ int scaled_imm7 = imm7 >> access_size;
+ VIXL_ASSERT(IsInt7(scaled_imm7));
+ return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
+ }
+
+ static Instr ImmShiftLS(unsigned shift_amount) {
+ VIXL_ASSERT(IsUint1(shift_amount));
+ return shift_amount << ImmShiftLS_offset;
+ }
+
+ static Instr ImmPrefetchOperation(int imm5) {
+ VIXL_ASSERT(IsUint5(imm5));
+ return imm5 << ImmPrefetchOperation_offset;
+ }
+
+ static Instr ImmException(int imm16) {
+ VIXL_ASSERT(IsUint16(imm16));
+ return imm16 << ImmException_offset;
+ }
+
+ static Instr ImmSystemRegister(int imm15) {
+ VIXL_ASSERT(IsUint15(imm15));
+ return imm15 << ImmSystemRegister_offset;
+ }
+
+ static Instr ImmHint(int imm7) {
+ VIXL_ASSERT(IsUint7(imm7));
+ return imm7 << ImmHint_offset;
+ }
+
+ static Instr CRm(int imm4) {
+ VIXL_ASSERT(IsUint4(imm4));
+ return imm4 << CRm_offset;
+ }
+
+ static Instr CRn(int imm4) {
+ VIXL_ASSERT(IsUint4(imm4));
+ return imm4 << CRn_offset;
+ }
+
+ static Instr SysOp(int imm14) {
+ VIXL_ASSERT(IsUint14(imm14));
+ return imm14 << SysOp_offset;
+ }
+
+ static Instr ImmSysOp1(int imm3) {
+ VIXL_ASSERT(IsUint3(imm3));
+ return imm3 << SysOp1_offset;
+ }
+
+ static Instr ImmSysOp2(int imm3) {
+ VIXL_ASSERT(IsUint3(imm3));
+ return imm3 << SysOp2_offset;
+ }
+
+ static Instr ImmBarrierDomain(int imm2) {
+ VIXL_ASSERT(IsUint2(imm2));
+ return imm2 << ImmBarrierDomain_offset;
+ }
+
+ static Instr ImmBarrierType(int imm2) {
+ VIXL_ASSERT(IsUint2(imm2));
+ return imm2 << ImmBarrierType_offset;
+ }
+
+ // Move immediates encoding.
+ static Instr ImmMoveWide(uint64_t imm) {
+ VIXL_ASSERT(IsUint16(imm));
+ return static_cast<Instr>(imm << ImmMoveWide_offset);
+ }
+
+ static Instr ShiftMoveWide(int64_t shift) {
+ VIXL_ASSERT(IsUint2(shift));
+ return static_cast<Instr>(shift << ShiftMoveWide_offset);
+ }
+
+ // FP Immediates.
+ static Instr ImmFP32(float imm);
+ static Instr ImmFP64(double imm);
+
+ // FP register type.
+ static Instr FPType(FPRegister fd) {
+ return fd.Is64Bits() ? FP64 : FP32;
+ }
+
+ static Instr FPScale(unsigned scale) {
+ VIXL_ASSERT(IsUint6(scale));
+ return scale << FPScale_offset;
+ }
+
+ // Immediate field checking helpers.
+ static bool IsImmAddSub(int64_t immediate);
+ static bool IsImmConditionalCompare(int64_t immediate);
+ static bool IsImmFP32(float imm);
+ static bool IsImmFP64(double imm);
+ static bool IsImmLogical(uint64_t value,
+ unsigned width,
+ unsigned* n = NULL,
+ unsigned* imm_s = NULL,
+ unsigned* imm_r = NULL);
+ static bool IsImmLSPair(int64_t offset, unsigned access_size);
+ static bool IsImmLSScaled(int64_t offset, unsigned access_size);
+ static bool IsImmLSUnscaled(int64_t offset);
+ static bool IsImmMovn(uint64_t imm, unsigned reg_size);
+ static bool IsImmMovz(uint64_t imm, unsigned reg_size);
+
+ // Instruction bits for vector format in data processing operations.
+ static Instr VFormat(VRegister vd) {
+ if (vd.Is64Bits()) {
+ switch (vd.lanes()) {
+ case 2: return NEON_2S;
+ case 4: return NEON_4H;
+ case 8: return NEON_8B;
+ default: return 0xffffffff;
+ }
+ } else {
+ VIXL_ASSERT(vd.Is128Bits());
+ switch (vd.lanes()) {
+ case 2: return NEON_2D;
+ case 4: return NEON_4S;
+ case 8: return NEON_8H;
+ case 16: return NEON_16B;
+ default: return 0xffffffff;
+ }
+ }
+ }
+
+ // Instruction bits for vector format in floating point data processing
+ // operations.
+ static Instr FPFormat(VRegister vd) {
+ if (vd.lanes() == 1) {
+ // Floating point scalar formats.
+ VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
+ return vd.Is64Bits() ? FP64 : FP32;
+ }
+
+ // Two lane floating point vector formats.
+ if (vd.lanes() == 2) {
+ VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
+ return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
+ }
+
+ // Four lane floating point vector format.
+ VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits());
+ return NEON_FP_4S;
+ }
+
+ // Instruction bits for vector format in load and store operations.
+ static Instr LSVFormat(VRegister vd) {
+ if (vd.Is64Bits()) {
+ switch (vd.lanes()) {
+ case 1: return LS_NEON_1D;
+ case 2: return LS_NEON_2S;
+ case 4: return LS_NEON_4H;
+ case 8: return LS_NEON_8B;
+ default: return 0xffffffff;
+ }
+ } else {
+ VIXL_ASSERT(vd.Is128Bits());
+ switch (vd.lanes()) {
+ case 2: return LS_NEON_2D;
+ case 4: return LS_NEON_4S;
+ case 8: return LS_NEON_8H;
+ case 16: return LS_NEON_16B;
+ default: return 0xffffffff;
+ }
+ }
+ }
+
+ // Instruction bits for scalar format in data processing operations.
+ static Instr SFormat(VRegister vd) {
+ VIXL_ASSERT(vd.lanes() == 1);
+ switch (vd.SizeInBytes()) {
+ case 1: return NEON_B;
+ case 2: return NEON_H;
+ case 4: return NEON_S;
+ case 8: return NEON_D;
+ default: return 0xffffffff;
+ }
+ }
+
+ static Instr ImmNEONHLM(int index, int num_bits) {
+ int h, l, m;
+ if (num_bits == 3) {
+ VIXL_ASSERT(IsUint3(index));
+ h = (index >> 2) & 1;
+ l = (index >> 1) & 1;
+ m = (index >> 0) & 1;
+ } else if (num_bits == 2) {
+ VIXL_ASSERT(IsUint2(index));
+ h = (index >> 1) & 1;
+ l = (index >> 0) & 1;
+ m = 0;
+ } else {
+ VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
+ h = (index >> 0) & 1;
+ l = 0;
+ m = 0;
+ }
+ return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
+ }
+
+ static Instr ImmNEONExt(int imm4) {
+ VIXL_ASSERT(IsUint4(imm4));
+ return imm4 << ImmNEONExt_offset;
+ }
+
+ static Instr ImmNEON5(Instr format, int index) {
+ VIXL_ASSERT(IsUint4(index));
+ int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
+ int imm5 = (index << (s + 1)) | (1 << s);
+ return imm5 << ImmNEON5_offset;
+ }
+
+ static Instr ImmNEON4(Instr format, int index) {
+ VIXL_ASSERT(IsUint4(index));
+ int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
+ int imm4 = index << s;
+ return imm4 << ImmNEON4_offset;
+ }
+
+ static Instr ImmNEONabcdefgh(int imm8) {
+ VIXL_ASSERT(IsUint8(imm8));
+ Instr instr;
+ instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
+ instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
+ return instr;
+ }
+
+ static Instr NEONCmode(int cmode) {
+ VIXL_ASSERT(IsUint4(cmode));
+ return cmode << NEONCmode_offset;
+ }
+
+ static Instr NEONModImmOp(int op) {
+ VIXL_ASSERT(IsUint1(op));
+ return op << NEONModImmOp_offset;
+ }
+
+ size_t size() const {
+ return SizeOfCodeGenerated();
+ }
+
+ size_t SizeOfCodeGenerated() const {
+ return armbuffer_.size();
+ }
+
+ PositionIndependentCodeOption pic() const {
+ return pic_;
+ }
+
+ CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
+
+ void SetCPUFeatures(const CPUFeatures& cpu_features) {
+ cpu_features_ = cpu_features;
+ }
+
+ bool AllowPageOffsetDependentCode() const {
+ return (pic() == PageOffsetDependentCode) ||
+ (pic() == PositionDependentCode);
+ }
+
+ static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
+ return reg.Is64Bits() ? xzr : wzr;
+ }
+
+
+ protected:
+ void LoadStore(const CPURegister& rt,
+ const MemOperand& addr,
+ LoadStoreOp op,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ void LoadStorePair(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& addr,
+ LoadStorePairOp op);
+ void LoadStoreStruct(const VRegister& vt,
+ const MemOperand& addr,
+ NEONLoadStoreMultiStructOp op);
+ void LoadStoreStruct1(const VRegister& vt,
+ int reg_count,
+ const MemOperand& addr);
+ void LoadStoreStructSingle(const VRegister& vt,
+ uint32_t lane,
+ const MemOperand& addr,
+ NEONLoadStoreSingleStructOp op);
+ void LoadStoreStructSingleAllLanes(const VRegister& vt,
+ const MemOperand& addr,
+ NEONLoadStoreSingleStructOp op);
+ void LoadStoreStructVerify(const VRegister& vt,
+ const MemOperand& addr,
+ Instr op);
+
+ void Prefetch(PrefetchOperation op,
+ const MemOperand& addr,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ BufferOffset Logical(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ LogicalOp op);
+ BufferOffset LogicalImmediate(const Register& rd,
+ const Register& rn,
+ unsigned n,
+ unsigned imm_s,
+ unsigned imm_r,
+ LogicalOp op);
+
+ void ConditionalCompare(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond,
+ ConditionalCompareOp op);
+
+ void AddSubWithCarry(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ AddSubWithCarryOp op);
+
+
+ // Functions for emulating operands not directly supported by the instruction
+ // set.
+ void EmitShift(const Register& rd,
+ const Register& rn,
+ Shift shift,
+ unsigned amount);
+ void EmitExtendShift(const Register& rd,
+ const Register& rn,
+ Extend extend,
+ unsigned left_shift);
+
+ void AddSub(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ AddSubOp op);
+
+ void NEONTable(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEONTableOp op);
+
+ // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
+ // registers. Only simple loads are supported; sign- and zero-extension (such
+ // as in LDPSW_x or LDRB_w) are not supported.
+ static LoadStoreOp LoadOpFor(const CPURegister& rt);
+ static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
+ const CPURegister& rt2);
+ static LoadStoreOp StoreOpFor(const CPURegister& rt);
+ static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
+ const CPURegister& rt2);
+ static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
+ const CPURegister& rt, const CPURegister& rt2);
+ static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
+ const CPURegister& rt, const CPURegister& rt2);
+ static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
+
+ // Convenience pass-through for CPU feature checks.
+ bool CPUHas(CPUFeatures::Feature feature0,
+ CPUFeatures::Feature feature1 = CPUFeatures::kNone,
+ CPUFeatures::Feature feature2 = CPUFeatures::kNone,
+ CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
+ return cpu_features_.Has(feature0, feature1, feature2, feature3);
+ }
+
+ // Determine whether the target CPU has the specified registers, based on the
+ // currently-enabled CPU features. Presence of a register does not imply
+ // support for arbitrary operations on it. For example, CPUs with FP have H
+ // registers, but most half-precision operations require the FPHalf feature.
+ //
+ // These are used to check CPU features in loads and stores that have the same
+ // entry point for both integer and FP registers.
+ bool CPUHas(const CPURegister& rt) const;
+ bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
+
+ bool CPUHas(SystemRegister sysreg) const;
+
+ private:
+ static uint32_t FP32ToImm8(float imm);
+ static uint32_t FP64ToImm8(double imm);
+
+ // Instruction helpers.
+ void MoveWide(const Register& rd,
+ uint64_t imm,
+ int shift,
+ MoveWideImmediateOp mov_op);
+ BufferOffset DataProcShiftedRegister(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ Instr op);
+ void DataProcExtendedRegister(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ Instr op);
+ void LoadStorePairNonTemporal(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& addr,
+ LoadStorePairNonTemporalOp op);
+ void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
+ void ConditionalSelect(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond,
+ ConditionalSelectOp op);
+ void DataProcessing1Source(const Register& rd,
+ const Register& rn,
+ DataProcessing1SourceOp op);
+ void DataProcessing3Source(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra,
+ DataProcessing3SourceOp op);
+ void FPDataProcessing1Source(const VRegister& fd,
+ const VRegister& fn,
+ FPDataProcessing1SourceOp op);
+ void FPDataProcessing3Source(const VRegister& fd,
+ const VRegister& fn,
+ const VRegister& fm,
+ const VRegister& fa,
+ FPDataProcessing3SourceOp op);
+ void NEONAcrossLanesL(const VRegister& vd,
+ const VRegister& vn,
+ NEONAcrossLanesOp op);
+ void NEONAcrossLanes(const VRegister& vd,
+ const VRegister& vn,
+ NEONAcrossLanesOp op);
+ void NEONModifiedImmShiftLsl(const VRegister& vd,
+ const int imm8,
+ const int left_shift,
+ NEONModifiedImmediateOp op);
+ void NEONModifiedImmShiftMsl(const VRegister& vd,
+ const int imm8,
+ const int shift_amount,
+ NEONModifiedImmediateOp op);
+ void NEONFP2Same(const VRegister& vd,
+ const VRegister& vn,
+ Instr vop);
+ void NEON3Same(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEON3SameOp vop);
+ void NEONFP3Same(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ Instr op);
+ void NEON3DifferentL(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEON3DifferentOp vop);
+ void NEON3DifferentW(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEON3DifferentOp vop);
+ void NEON3DifferentHN(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEON3DifferentOp vop);
+ void NEONFP2RegMisc(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscOp vop,
+ double value = 0.0);
+ void NEON2RegMisc(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscOp vop,
+ int value = 0);
+ void NEONFP2RegMisc(const VRegister& vd,
+ const VRegister& vn,
+ Instr op);
+ void NEONAddlp(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscOp op);
+ void NEONPerm(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ NEONPermOp op);
+ void NEONFPByElement(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index,
+ NEONByIndexedElementOp op);
+ void NEONByElement(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index,
+ NEONByIndexedElementOp op);
+ void NEONByElementL(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index,
+ NEONByIndexedElementOp op);
+ void NEONShiftImmediate(const VRegister& vd,
+ const VRegister& vn,
+ NEONShiftImmediateOp op,
+ int immh_immb);
+ void NEONShiftLeftImmediate(const VRegister& vd,
+ const VRegister& vn,
+ int shift,
+ NEONShiftImmediateOp op);
+ void NEONShiftRightImmediate(const VRegister& vd,
+ const VRegister& vn,
+ int shift,
+ NEONShiftImmediateOp op);
+ void NEONShiftImmediateL(const VRegister& vd,
+ const VRegister& vn,
+ int shift,
+ NEONShiftImmediateOp op);
+ void NEONShiftImmediateN(const VRegister& vd,
+ const VRegister& vn,
+ int shift,
+ NEONShiftImmediateOp op);
+ void NEONXtn(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscOp vop);
+
+ Instr LoadStoreStructAddrModeField(const MemOperand& addr);
+
+ // Encode the specified MemOperand for the specified access size and scaling
+ // preference.
+ Instr LoadStoreMemOperand(const MemOperand& addr,
+ unsigned access_size,
+ LoadStoreScalingOption option);
+
+ protected:
+ // Prevent generation of a literal pool for the next |maxInst| instructions.
+ // Guarantees instruction linearity.
+ class AutoBlockLiteralPool {
+ ARMBuffer* armbuffer_;
+
+ public:
+ AutoBlockLiteralPool(Assembler* assembler, size_t maxInst)
+ : armbuffer_(&assembler->armbuffer_) {
+ armbuffer_->enterNoPool(maxInst);
+ }
+ ~AutoBlockLiteralPool() {
+ armbuffer_->leaveNoPool();
+ }
+ };
+
+ protected:
+ // Buffer where the code is emitted.
+ PositionIndependentCodeOption pic_;
+
+ CPUFeatures cpu_features_;
+
+#ifdef DEBUG
+ bool finalized_;
+#endif
+};
+
+} // namespace vixl
+
+#endif // VIXL_A64_ASSEMBLER_A64_H_
diff --git a/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h b/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h
new file mode 100644
index 0000000000..e13eef6135
--- /dev/null
+++ b/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h
@@ -0,0 +1,179 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#ifndef VIXL_COMPILER_INTRINSICS_H
+#define VIXL_COMPILER_INTRINSICS_H
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+namespace vixl {
+
+// Helper to check whether the version of GCC used is greater than the specified
+// requirement.
+#define MAJOR 1000000
+#define MINOR 1000
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \
+ ((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR + __GNUC_PATCHLEVEL__) >= \
+ ((major) * MAJOR + (minor) * MINOR + (patchlevel)))
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \
+ ((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR) >= \
+ ((major) * MAJOR + (minor) * MINOR + (patchlevel)))
+#else
+#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) 0
+#endif
+
+
+#if defined(__clang__) && !defined(VIXL_NO_COMPILER_BUILTINS)
+
+#define COMPILER_HAS_BUILTIN_CLRSB (__has_builtin(__builtin_clrsb))
+#define COMPILER_HAS_BUILTIN_CLZ (__has_builtin(__builtin_clz))
+#define COMPILER_HAS_BUILTIN_CTZ (__has_builtin(__builtin_ctz))
+#define COMPILER_HAS_BUILTIN_FFS (__has_builtin(__builtin_ffs))
+#define COMPILER_HAS_BUILTIN_POPCOUNT (__has_builtin(__builtin_popcount))
+
+#elif defined(__GNUC__) && !defined(VIXL_NO_COMPILER_BUILTINS)
+// The documentation for these builtins is available at:
+// https://gcc.gnu.org/onlinedocs/gcc-$MAJOR.$MINOR.$PATCHLEVEL/gcc//Other-Builtins.html
+
+# define COMPILER_HAS_BUILTIN_CLRSB (GCC_VERSION_OR_NEWER(4, 7, 0))
+# define COMPILER_HAS_BUILTIN_CLZ (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_CTZ (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_FFS (GCC_VERSION_OR_NEWER(3, 4, 0))
+# define COMPILER_HAS_BUILTIN_POPCOUNT (GCC_VERSION_OR_NEWER(3, 4, 0))
+
+#else
+// One can define VIXL_NO_COMPILER_BUILTINS to force using the manually
+// implemented C++ methods.
+
+#define COMPILER_HAS_BUILTIN_BSWAP false
+#define COMPILER_HAS_BUILTIN_CLRSB false
+#define COMPILER_HAS_BUILTIN_CLZ false
+#define COMPILER_HAS_BUILTIN_CTZ false
+#define COMPILER_HAS_BUILTIN_FFS false
+#define COMPILER_HAS_BUILTIN_POPCOUNT false
+
+#endif
+
+
+template<typename V>
+inline bool IsPowerOf2(V value) {
+ return (value != 0) && ((value & (value - 1)) == 0);
+}
+
+
+// Implementation of intrinsics functions.
+// TODO: The implementations could be improved for sizes different from 32bit
+// and 64bit: we could mask the values and call the appropriate builtin.
+
+
+template<typename V>
+inline int CountLeadingZeros(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CLZ
+ if (width == 32) {
+ return (value == 0) ? 32 : __builtin_clz(static_cast<unsigned>(value));
+ } else if (width == 64) {
+ return (value == 0) ? 64 : __builtin_clzll(value);
+ }
+ MOZ_CRASH("Unhandled width.");
+#else
+ if (width == 32) {
+ return mozilla::CountLeadingZeroes32(value);
+ } else if (width == 64) {
+ return mozilla::CountLeadingZeroes64(value);
+ }
+ MOZ_CRASH("Unhandled width.");
+#endif
+}
+
+
+template<typename V>
+inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CLRSB
+ if (width == 32) {
+ return __builtin_clrsb(value);
+ } else if (width == 64) {
+ return __builtin_clrsbll(value);
+ }
+ MOZ_CRASH("Unhandled width.");
+#else
+ VIXL_ASSERT(IsPowerOf2(width) && (width <= 64));
+ if (value >= 0) {
+ return CountLeadingZeros(value, width) - 1;
+ } else {
+ return CountLeadingZeros(~value, width) - 1;
+ }
+#endif
+}
+
+
+template<typename V>
+inline int CountSetBits(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_POPCOUNT
+ if (width == 32) {
+ return __builtin_popcount(static_cast<unsigned>(value));
+ } else if (width == 64) {
+ return __builtin_popcountll(value);
+ }
+ MOZ_CRASH("Unhandled width.");
+#else
+ if (width == 32) {
+ return mozilla::CountPopulation32(value);
+ } else if (width == 64) {
+ return mozilla::CountPopulation64(value);
+ }
+ MOZ_CRASH("Unhandled width.");
+#endif
+}
+
+
+template<typename V>
+inline int CountTrailingZeros(V value, int width = (sizeof(V) * 8)) {
+#if COMPILER_HAS_BUILTIN_CTZ
+ if (width == 32) {
+ return (value == 0) ? 32 : __builtin_ctz(static_cast<unsigned>(value));
+ } else if (width == 64) {
+ return (value == 0) ? 64 : __builtin_ctzll(value);
+ }
+ MOZ_CRASH("Unhandled width.");
+#else
+ if (width == 32) {
+ return mozilla::CountTrailingZeroes32(value);
+ } else if (width == 64) {
+ return mozilla::CountTrailingZeroes64(value);
+ }
+ MOZ_CRASH("Unhandled width.");
+#endif
+}
+
+} // namespace vixl
+
+#endif // VIXL_COMPILER_INTRINSICS_H
+
diff --git a/js/src/jit/arm64/vixl/Constants-vixl.h b/js/src/jit/arm64/vixl/Constants-vixl.h
new file mode 100644
index 0000000000..2c174e61a5
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Constants-vixl.h
@@ -0,0 +1,2694 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_CONSTANTS_A64_H_
+#define VIXL_A64_CONSTANTS_A64_H_
+
+#include <stdint.h>
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+namespace vixl {
+
+// Supervisor Call (svc) specific support.
+//
+// The SVC instruction encodes an optional 16-bit immediate value.
+// The simulator understands the codes below.
+enum SVCSimulatorCodes {
+ kCallRtRedirected = 0x10, // Transition to x86_64 C code.
+ kMarkStackPointer = 0x11, // Push the current SP on a special Simulator stack.
+ kCheckStackPointer = 0x12 // Pop from the special Simulator stack and compare to SP.
+};
+
+const unsigned kNumberOfRegisters = 32;
+const unsigned kNumberOfVRegisters = 32;
+const unsigned kNumberOfFPRegisters = kNumberOfVRegisters;
+// Callee saved registers are x21-x30(lr).
+const int kNumberOfCalleeSavedRegisters = 10;
+const int kFirstCalleeSavedRegisterIndex = 21;
+// Callee saved FP registers are d8-d15. Note that the high parts of v8-v15 are
+// still caller-saved.
+const int kNumberOfCalleeSavedFPRegisters = 8;
+const int kFirstCalleeSavedFPRegisterIndex = 8;
+
+#define REGISTER_CODE_LIST(R) \
+R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \
+R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) \
+R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \
+R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
+
+#define INSTRUCTION_FIELDS_LIST(V_) \
+/* Register fields */ \
+V_(Rd, 4, 0, Bits) /* Destination register. */ \
+V_(Rn, 9, 5, Bits) /* First source register. */ \
+V_(Rm, 20, 16, Bits) /* Second source register. */ \
+V_(Ra, 14, 10, Bits) /* Third source register. */ \
+V_(Rt, 4, 0, Bits) /* Load/store register. */ \
+V_(Rt2, 14, 10, Bits) /* Load/store second register. */ \
+V_(Rs, 20, 16, Bits) /* Exclusive access status. */ \
+ \
+/* Common bits */ \
+V_(SixtyFourBits, 31, 31, Bits) \
+V_(FlagsUpdate, 29, 29, Bits) \
+ \
+/* PC relative addressing */ \
+V_(ImmPCRelHi, 23, 5, SignedBits) \
+V_(ImmPCRelLo, 30, 29, Bits) \
+ \
+/* Add/subtract/logical shift register */ \
+V_(ShiftDP, 23, 22, Bits) \
+V_(ImmDPShift, 15, 10, Bits) \
+ \
+/* Add/subtract immediate */ \
+V_(ImmAddSub, 21, 10, Bits) \
+V_(ShiftAddSub, 23, 22, Bits) \
+ \
+/* Add/substract extend */ \
+V_(ImmExtendShift, 12, 10, Bits) \
+V_(ExtendMode, 15, 13, Bits) \
+ \
+/* Move wide */ \
+V_(ImmMoveWide, 20, 5, Bits) \
+V_(ShiftMoveWide, 22, 21, Bits) \
+ \
+/* Logical immediate, bitfield and extract */ \
+V_(BitN, 22, 22, Bits) \
+V_(ImmRotate, 21, 16, Bits) \
+V_(ImmSetBits, 15, 10, Bits) \
+V_(ImmR, 21, 16, Bits) \
+V_(ImmS, 15, 10, Bits) \
+ \
+/* Test and branch immediate */ \
+V_(ImmTestBranch, 18, 5, SignedBits) \
+V_(ImmTestBranchBit40, 23, 19, Bits) \
+V_(ImmTestBranchBit5, 31, 31, Bits) \
+ \
+/* Conditionals */ \
+V_(Condition, 15, 12, Bits) \
+V_(ConditionBranch, 3, 0, Bits) \
+V_(Nzcv, 3, 0, Bits) \
+V_(ImmCondCmp, 20, 16, Bits) \
+V_(ImmCondBranch, 23, 5, SignedBits) \
+ \
+/* Floating point */ \
+V_(FPType, 23, 22, Bits) \
+V_(ImmFP, 20, 13, Bits) \
+V_(FPScale, 15, 10, Bits) \
+ \
+/* Load Store */ \
+V_(ImmLS, 20, 12, SignedBits) \
+V_(ImmLSUnsigned, 21, 10, Bits) \
+V_(ImmLSPair, 21, 15, SignedBits) \
+V_(ImmShiftLS, 12, 12, Bits) \
+V_(LSOpc, 23, 22, Bits) \
+V_(LSVector, 26, 26, Bits) \
+V_(LSSize, 31, 30, Bits) \
+V_(ImmPrefetchOperation, 4, 0, Bits) \
+V_(PrefetchHint, 4, 3, Bits) \
+V_(PrefetchTarget, 2, 1, Bits) \
+V_(PrefetchStream, 0, 0, Bits) \
+ \
+/* Other immediates */ \
+V_(ImmUncondBranch, 25, 0, SignedBits) \
+V_(ImmCmpBranch, 23, 5, SignedBits) \
+V_(ImmLLiteral, 23, 5, SignedBits) \
+V_(ImmException, 20, 5, Bits) \
+V_(ImmHint, 11, 5, Bits) \
+V_(ImmBarrierDomain, 11, 10, Bits) \
+V_(ImmBarrierType, 9, 8, Bits) \
+ \
+/* System (MRS, MSR, SYS) */ \
+V_(ImmSystemRegister, 19, 5, Bits) \
+V_(SysO0, 19, 19, Bits) \
+V_(SysOp, 18, 5, Bits) \
+V_(SysOp1, 18, 16, Bits) \
+V_(SysOp2, 7, 5, Bits) \
+V_(CRn, 15, 12, Bits) \
+V_(CRm, 11, 8, Bits) \
+ \
+/* Load-/store-exclusive */ \
+V_(LdStXLoad, 22, 22, Bits) \
+V_(LdStXNotExclusive, 23, 23, Bits) \
+V_(LdStXAcquireRelease, 15, 15, Bits) \
+V_(LdStXSizeLog2, 31, 30, Bits) \
+V_(LdStXPair, 21, 21, Bits) \
+ \
+/* NEON generic fields */ \
+V_(NEONQ, 30, 30, Bits) \
+V_(NEONSize, 23, 22, Bits) \
+V_(NEONLSSize, 11, 10, Bits) \
+V_(NEONS, 12, 12, Bits) \
+V_(NEONL, 21, 21, Bits) \
+V_(NEONM, 20, 20, Bits) \
+V_(NEONH, 11, 11, Bits) \
+V_(ImmNEONExt, 14, 11, Bits) \
+V_(ImmNEON5, 20, 16, Bits) \
+V_(ImmNEON4, 14, 11, Bits) \
+ \
+/* NEON Modified Immediate fields */ \
+V_(ImmNEONabc, 18, 16, Bits) \
+V_(ImmNEONdefgh, 9, 5, Bits) \
+V_(NEONModImmOp, 29, 29, Bits) \
+V_(NEONCmode, 15, 12, Bits) \
+ \
+/* NEON Shift Immediate fields */ \
+V_(ImmNEONImmhImmb, 22, 16, Bits) \
+V_(ImmNEONImmh, 22, 19, Bits) \
+V_(ImmNEONImmb, 18, 16, Bits)
+
+#define SYSTEM_REGISTER_FIELDS_LIST(V_, M_) \
+/* NZCV */ \
+V_(Flags, 31, 28, Bits) \
+V_(N, 31, 31, Bits) \
+V_(Z, 30, 30, Bits) \
+V_(C, 29, 29, Bits) \
+V_(V, 28, 28, Bits) \
+M_(NZCV, Flags_mask) \
+/* FPCR */ \
+V_(AHP, 26, 26, Bits) \
+V_(DN, 25, 25, Bits) \
+V_(FZ, 24, 24, Bits) \
+V_(RMode, 23, 22, Bits) \
+M_(FPCR, AHP_mask | DN_mask | FZ_mask | RMode_mask)
+
+// Fields offsets.
+#define DECLARE_FIELDS_OFFSETS(Name, HighBit, LowBit, X) \
+const int Name##_offset = LowBit; \
+const int Name##_width = HighBit - LowBit + 1; \
+const uint32_t Name##_mask = ((1 << Name##_width) - 1) << LowBit;
+#define NOTHING(A, B)
+INSTRUCTION_FIELDS_LIST(DECLARE_FIELDS_OFFSETS)
+SYSTEM_REGISTER_FIELDS_LIST(DECLARE_FIELDS_OFFSETS, NOTHING)
+#undef NOTHING
+#undef DECLARE_FIELDS_BITS
+
+// ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST), formed
+// from ImmPCRelLo and ImmPCRelHi.
+const int ImmPCRel_mask = ImmPCRelLo_mask | ImmPCRelHi_mask;
+
+// Condition codes.
+enum Condition {
+ eq = 0, // Z set Equal.
+ ne = 1, // Z clear Not equal.
+ cs = 2, // C set Carry set.
+ cc = 3, // C clear Carry clear.
+ mi = 4, // N set Negative.
+ pl = 5, // N clear Positive or zero.
+ vs = 6, // V set Overflow.
+ vc = 7, // V clear No overflow.
+ hi = 8, // C set, Z clear Unsigned higher.
+ ls = 9, // C clear or Z set Unsigned lower or same.
+ ge = 10, // N == V Greater or equal.
+ lt = 11, // N != V Less than.
+ gt = 12, // Z clear, N == V Greater than.
+ le = 13, // Z set or N != V Less then or equal
+ al = 14, // Always.
+ nv = 15, // Behaves as always/al.
+
+ // Aliases.
+ hs = cs, // C set Unsigned higher or same.
+ lo = cc, // C clear Unsigned lower.
+
+ // Mozilla expanded aliases.
+ Equal = 0, Zero = 0,
+ NotEqual = 1, NonZero = 1,
+ AboveOrEqual = 2, CarrySet = 2,
+ Below = 3, CarryClear = 3,
+ Signed = 4,
+ NotSigned = 5,
+ Overflow = 6,
+ NoOverflow = 7,
+ Above = 8,
+ BelowOrEqual = 9,
+ GreaterThanOrEqual_ = 10,
+ LessThan_ = 11,
+ GreaterThan_ = 12,
+ LessThanOrEqual_ = 13,
+ Always = 14,
+ Never = 15
+};
+
+inline Condition InvertCondition(Condition cond) {
+ // Conditions al and nv behave identically, as "always true". They can't be
+ // inverted, because there is no "always false" condition.
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ return static_cast<Condition>(cond ^ 1);
+}
+
+enum FPTrapFlags {
+ EnableTrap = 1,
+ DisableTrap = 0
+};
+
+enum FlagsUpdate {
+ SetFlags = 1,
+ LeaveFlags = 0
+};
+
+enum StatusFlags {
+ NoFlag = 0,
+
+ // Derive the flag combinations from the system register bit descriptions.
+ NFlag = N_mask,
+ ZFlag = Z_mask,
+ CFlag = C_mask,
+ VFlag = V_mask,
+ NZFlag = NFlag | ZFlag,
+ NCFlag = NFlag | CFlag,
+ NVFlag = NFlag | VFlag,
+ ZCFlag = ZFlag | CFlag,
+ ZVFlag = ZFlag | VFlag,
+ CVFlag = CFlag | VFlag,
+ NZCFlag = NFlag | ZFlag | CFlag,
+ NZVFlag = NFlag | ZFlag | VFlag,
+ NCVFlag = NFlag | CFlag | VFlag,
+ ZCVFlag = ZFlag | CFlag | VFlag,
+ NZCVFlag = NFlag | ZFlag | CFlag | VFlag,
+
+ // Floating-point comparison results.
+ FPEqualFlag = ZCFlag,
+ FPLessThanFlag = NFlag,
+ FPGreaterThanFlag = CFlag,
+ FPUnorderedFlag = CVFlag
+};
+
+enum Shift {
+ NO_SHIFT = -1,
+ LSL = 0x0,
+ LSR = 0x1,
+ ASR = 0x2,
+ ROR = 0x3,
+ MSL = 0x4
+};
+
+enum Extend {
+ NO_EXTEND = -1,
+ UXTB = 0,
+ UXTH = 1,
+ UXTW = 2,
+ UXTX = 3,
+ SXTB = 4,
+ SXTH = 5,
+ SXTW = 6,
+ SXTX = 7
+};
+
+enum SystemHint {
+ NOP = 0,
+ YIELD = 1,
+ WFE = 2,
+ WFI = 3,
+ SEV = 4,
+ SEVL = 5,
+ ESB = 16,
+ CSDB = 20,
+ BTI = 32,
+ BTI_c = 34,
+ BTI_j = 36,
+ BTI_jc = 38
+};
+
+enum BranchTargetIdentifier {
+ EmitBTI_none = NOP,
+ EmitBTI = BTI,
+ EmitBTI_c = BTI_c,
+ EmitBTI_j = BTI_j,
+ EmitBTI_jc = BTI_jc,
+
+ // These correspond to the values of the CRm:op2 fields in the equivalent HINT
+ // instruction.
+ EmitPACIASP = 25,
+ EmitPACIBSP = 27
+};
+
+enum BarrierDomain {
+ OuterShareable = 0,
+ NonShareable = 1,
+ InnerShareable = 2,
+ FullSystem = 3
+};
+
+enum BarrierType {
+ BarrierOther = 0,
+ BarrierReads = 1,
+ BarrierWrites = 2,
+ BarrierAll = 3
+};
+
+enum PrefetchOperation {
+ PLDL1KEEP = 0x00,
+ PLDL1STRM = 0x01,
+ PLDL2KEEP = 0x02,
+ PLDL2STRM = 0x03,
+ PLDL3KEEP = 0x04,
+ PLDL3STRM = 0x05,
+
+ PLIL1KEEP = 0x08,
+ PLIL1STRM = 0x09,
+ PLIL2KEEP = 0x0a,
+ PLIL2STRM = 0x0b,
+ PLIL3KEEP = 0x0c,
+ PLIL3STRM = 0x0d,
+
+ PSTL1KEEP = 0x10,
+ PSTL1STRM = 0x11,
+ PSTL2KEEP = 0x12,
+ PSTL2STRM = 0x13,
+ PSTL3KEEP = 0x14,
+ PSTL3STRM = 0x15
+};
+
+enum BType {
+ // Set when executing any instruction on a guarded page, except those cases
+ // listed below.
+ DefaultBType = 0,
+
+ // Set when an indirect branch is taken from an unguarded page to a guarded
+ // page, or from a guarded page to ip0 or ip1 (x16 or x17), eg "br ip0".
+ BranchFromUnguardedOrToIP = 1,
+
+ // Set when an indirect branch and link (call) is taken, eg. "blr x0".
+ BranchAndLink = 2,
+
+ // Set when an indirect branch is taken from a guarded page to a register
+ // that is not ip0 or ip1 (x16 or x17), eg, "br x0".
+ BranchFromGuardedNotToIP = 3
+};
+
+template<int op0, int op1, int crn, int crm, int op2>
+class SystemRegisterEncoder {
+ public:
+ static const uint32_t value =
+ ((op0 << SysO0_offset) |
+ (op1 << SysOp1_offset) |
+ (crn << CRn_offset) |
+ (crm << CRm_offset) |
+ (op2 << SysOp2_offset)) >> ImmSystemRegister_offset;
+};
+
+// System/special register names.
+// This information is not encoded as one field but as the concatenation of
+// multiple fields (Op0, Op1, Crn, Crm, Op2).
+enum SystemRegister {
+ NZCV = SystemRegisterEncoder<3, 3, 4, 2, 0>::value,
+ FPCR = SystemRegisterEncoder<3, 3, 4, 4, 0>::value,
+ RNDR = SystemRegisterEncoder<3, 3, 2, 4, 0>::value, // Random number.
+ RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value // Reseeded random number.
+};
+
+template<int op1, int crn, int crm, int op2>
+class CacheOpEncoder {
+ public:
+ static const uint32_t value =
+ ((op1 << SysOp1_offset) |
+ (crn << CRn_offset) |
+ (crm << CRm_offset) |
+ (op2 << SysOp2_offset)) >> SysOp_offset;
+};
+
+enum InstructionCacheOp : uint32_t {
+ IVAU = CacheOpEncoder<3, 7, 5, 1>::value
+};
+
+enum DataCacheOp : uint32_t {
+ CVAC = CacheOpEncoder<3, 7, 10, 1>::value,
+ CVAU = CacheOpEncoder<3, 7, 11, 1>::value,
+ CVAP = CacheOpEncoder<3, 7, 12, 1>::value,
+ CVADP = CacheOpEncoder<3, 7, 13, 1>::value,
+ CIVAC = CacheOpEncoder<3, 7, 14, 1>::value,
+ ZVA = CacheOpEncoder<3, 7, 4, 1>::value
+};
+
+// Instruction enumerations.
+//
+// These are the masks that define a class of instructions, and the list of
+// instructions within each class. Each enumeration has a Fixed, FMask and
+// Mask value.
+//
+// Fixed: The fixed bits in this instruction class.
+// FMask: The mask used to extract the fixed bits in the class.
+// Mask: The mask used to identify the instructions within a class.
+//
+// The enumerations can be used like this:
+//
+// VIXL_ASSERT(instr->Mask(PCRelAddressingFMask) == PCRelAddressingFixed);
+// switch(instr->Mask(PCRelAddressingMask)) {
+// case ADR: Format("adr 'Xd, 'AddrPCRelByte"); break;
+// case ADRP: Format("adrp 'Xd, 'AddrPCRelPage"); break;
+// default: printf("Unknown instruction\n");
+// }
+
+
+// Generic fields.
+enum GenericInstrField : uint32_t {
+ SixtyFourBits = 0x80000000,
+ ThirtyTwoBits = 0x00000000,
+
+ FPTypeMask = 0x00C00000,
+ FP16 = 0x00C00000,
+ FP32 = 0x00000000,
+ FP64 = 0x00400000
+};
+
+enum NEONFormatField : uint32_t {
+ NEONFormatFieldMask = 0x40C00000,
+ NEON_Q = 0x40000000,
+ NEON_8B = 0x00000000,
+ NEON_16B = NEON_8B | NEON_Q,
+ NEON_4H = 0x00400000,
+ NEON_8H = NEON_4H | NEON_Q,
+ NEON_2S = 0x00800000,
+ NEON_4S = NEON_2S | NEON_Q,
+ NEON_1D = 0x00C00000,
+ NEON_2D = 0x00C00000 | NEON_Q
+};
+
+enum NEONFPFormatField : uint32_t {
+ NEONFPFormatFieldMask = 0x40400000,
+ NEON_FP_4H = FP16,
+ NEON_FP_2S = FP32,
+ NEON_FP_8H = FP16 | NEON_Q,
+ NEON_FP_4S = FP32 | NEON_Q,
+ NEON_FP_2D = FP64 | NEON_Q
+};
+
+enum NEONLSFormatField : uint32_t {
+ NEONLSFormatFieldMask = 0x40000C00,
+ LS_NEON_8B = 0x00000000,
+ LS_NEON_16B = LS_NEON_8B | NEON_Q,
+ LS_NEON_4H = 0x00000400,
+ LS_NEON_8H = LS_NEON_4H | NEON_Q,
+ LS_NEON_2S = 0x00000800,
+ LS_NEON_4S = LS_NEON_2S | NEON_Q,
+ LS_NEON_1D = 0x00000C00,
+ LS_NEON_2D = LS_NEON_1D | NEON_Q
+};
+
+enum NEONScalarFormatField : uint32_t {
+ NEONScalarFormatFieldMask = 0x00C00000,
+ NEONScalar = 0x10000000,
+ NEON_B = 0x00000000,
+ NEON_H = 0x00400000,
+ NEON_S = 0x00800000,
+ NEON_D = 0x00C00000
+};
+
+// PC relative addressing.
+enum PCRelAddressingOp : uint32_t {
+ PCRelAddressingFixed = 0x10000000,
+ PCRelAddressingFMask = 0x1F000000,
+ PCRelAddressingMask = 0x9F000000,
+ ADR = PCRelAddressingFixed | 0x00000000,
+ ADRP = PCRelAddressingFixed | 0x80000000
+};
+
+// Add/sub (immediate, shifted and extended.)
+const int kSFOffset = 31;
+enum AddSubOp : uint32_t {
+ AddSubOpMask = 0x60000000,
+ AddSubSetFlagsBit = 0x20000000,
+ ADD = 0x00000000,
+ ADDS = ADD | AddSubSetFlagsBit,
+ SUB = 0x40000000,
+ SUBS = SUB | AddSubSetFlagsBit
+};
+
+#define ADD_SUB_OP_LIST(V) \
+ V(ADD), \
+ V(ADDS), \
+ V(SUB), \
+ V(SUBS)
+
+enum AddSubImmediateOp : uint32_t {
+ AddSubImmediateFixed = 0x11000000,
+ AddSubImmediateFMask = 0x1F000000,
+ AddSubImmediateMask = 0xFF000000,
+ #define ADD_SUB_IMMEDIATE(A) \
+ A##_w_imm = AddSubImmediateFixed | A, \
+ A##_x_imm = AddSubImmediateFixed | A | SixtyFourBits
+ ADD_SUB_OP_LIST(ADD_SUB_IMMEDIATE)
+ #undef ADD_SUB_IMMEDIATE
+};
+
+enum AddSubShiftedOp : uint32_t {
+ AddSubShiftedFixed = 0x0B000000,
+ AddSubShiftedFMask = 0x1F200000,
+ AddSubShiftedMask = 0xFF200000,
+ #define ADD_SUB_SHIFTED(A) \
+ A##_w_shift = AddSubShiftedFixed | A, \
+ A##_x_shift = AddSubShiftedFixed | A | SixtyFourBits
+ ADD_SUB_OP_LIST(ADD_SUB_SHIFTED)
+ #undef ADD_SUB_SHIFTED
+};
+
+enum AddSubExtendedOp : uint32_t {
+ AddSubExtendedFixed = 0x0B200000,
+ AddSubExtendedFMask = 0x1F200000,
+ AddSubExtendedMask = 0xFFE00000,
+ #define ADD_SUB_EXTENDED(A) \
+ A##_w_ext = AddSubExtendedFixed | A, \
+ A##_x_ext = AddSubExtendedFixed | A | SixtyFourBits
+ ADD_SUB_OP_LIST(ADD_SUB_EXTENDED)
+ #undef ADD_SUB_EXTENDED
+};
+
+// Add/sub with carry.
+enum AddSubWithCarryOp : uint32_t {
+ AddSubWithCarryFixed = 0x1A000000,
+ AddSubWithCarryFMask = 0x1FE00000,
+ AddSubWithCarryMask = 0xFFE0FC00,
+ ADC_w = AddSubWithCarryFixed | ADD,
+ ADC_x = AddSubWithCarryFixed | ADD | SixtyFourBits,
+ ADC = ADC_w,
+ ADCS_w = AddSubWithCarryFixed | ADDS,
+ ADCS_x = AddSubWithCarryFixed | ADDS | SixtyFourBits,
+ SBC_w = AddSubWithCarryFixed | SUB,
+ SBC_x = AddSubWithCarryFixed | SUB | SixtyFourBits,
+ SBC = SBC_w,
+ SBCS_w = AddSubWithCarryFixed | SUBS,
+ SBCS_x = AddSubWithCarryFixed | SUBS | SixtyFourBits
+};
+
+// Rotate right into flags.
+enum RotateRightIntoFlagsOp : uint32_t {
+ RotateRightIntoFlagsFixed = 0x1A000400,
+ RotateRightIntoFlagsFMask = 0x1FE07C00,
+ RotateRightIntoFlagsMask = 0xFFE07C10,
+ RMIF = RotateRightIntoFlagsFixed | 0xA0000000
+};
+
+// Evaluate into flags.
+enum EvaluateIntoFlagsOp : uint32_t {
+ EvaluateIntoFlagsFixed = 0x1A000800,
+ EvaluateIntoFlagsFMask = 0x1FE03C00,
+ EvaluateIntoFlagsMask = 0xFFE07C1F,
+ SETF8 = EvaluateIntoFlagsFixed | 0x2000000D,
+ SETF16 = EvaluateIntoFlagsFixed | 0x2000400D
+};
+
+// Logical (immediate and shifted register).
+enum LogicalOp : uint32_t {
+ LogicalOpMask = 0x60200000,
+ NOT = 0x00200000,
+ AND = 0x00000000,
+ BIC = AND | NOT,
+ ORR = 0x20000000,
+ ORN = ORR | NOT,
+ EOR = 0x40000000,
+ EON = EOR | NOT,
+ ANDS = 0x60000000,
+ BICS = ANDS | NOT
+};
+
+// Logical immediate.
+enum LogicalImmediateOp : uint32_t {
+ LogicalImmediateFixed = 0x12000000,
+ LogicalImmediateFMask = 0x1F800000,
+ LogicalImmediateMask = 0xFF800000,
+ AND_w_imm = LogicalImmediateFixed | AND,
+ AND_x_imm = LogicalImmediateFixed | AND | SixtyFourBits,
+ ORR_w_imm = LogicalImmediateFixed | ORR,
+ ORR_x_imm = LogicalImmediateFixed | ORR | SixtyFourBits,
+ EOR_w_imm = LogicalImmediateFixed | EOR,
+ EOR_x_imm = LogicalImmediateFixed | EOR | SixtyFourBits,
+ ANDS_w_imm = LogicalImmediateFixed | ANDS,
+ ANDS_x_imm = LogicalImmediateFixed | ANDS | SixtyFourBits
+};
+
+// Logical shifted register.
+enum LogicalShiftedOp : uint32_t {
+ LogicalShiftedFixed = 0x0A000000,
+ LogicalShiftedFMask = 0x1F000000,
+ LogicalShiftedMask = 0xFF200000,
+ AND_w = LogicalShiftedFixed | AND,
+ AND_x = LogicalShiftedFixed | AND | SixtyFourBits,
+ AND_shift = AND_w,
+ BIC_w = LogicalShiftedFixed | BIC,
+ BIC_x = LogicalShiftedFixed | BIC | SixtyFourBits,
+ BIC_shift = BIC_w,
+ ORR_w = LogicalShiftedFixed | ORR,
+ ORR_x = LogicalShiftedFixed | ORR | SixtyFourBits,
+ ORR_shift = ORR_w,
+ ORN_w = LogicalShiftedFixed | ORN,
+ ORN_x = LogicalShiftedFixed | ORN | SixtyFourBits,
+ ORN_shift = ORN_w,
+ EOR_w = LogicalShiftedFixed | EOR,
+ EOR_x = LogicalShiftedFixed | EOR | SixtyFourBits,
+ EOR_shift = EOR_w,
+ EON_w = LogicalShiftedFixed | EON,
+ EON_x = LogicalShiftedFixed | EON | SixtyFourBits,
+ EON_shift = EON_w,
+ ANDS_w = LogicalShiftedFixed | ANDS,
+ ANDS_x = LogicalShiftedFixed | ANDS | SixtyFourBits,
+ ANDS_shift = ANDS_w,
+ BICS_w = LogicalShiftedFixed | BICS,
+ BICS_x = LogicalShiftedFixed | BICS | SixtyFourBits,
+ BICS_shift = BICS_w
+};
+
+// Move wide immediate.
+enum MoveWideImmediateOp : uint32_t {
+ MoveWideImmediateFixed = 0x12800000,
+ MoveWideImmediateFMask = 0x1F800000,
+ MoveWideImmediateMask = 0xFF800000,
+ MOVN = 0x00000000,
+ MOVZ = 0x40000000,
+ MOVK = 0x60000000,
+ MOVN_w = MoveWideImmediateFixed | MOVN,
+ MOVN_x = MoveWideImmediateFixed | MOVN | SixtyFourBits,
+ MOVZ_w = MoveWideImmediateFixed | MOVZ,
+ MOVZ_x = MoveWideImmediateFixed | MOVZ | SixtyFourBits,
+ MOVK_w = MoveWideImmediateFixed | MOVK,
+ MOVK_x = MoveWideImmediateFixed | MOVK | SixtyFourBits
+};
+
+// Bitfield.
+const int kBitfieldNOffset = 22;
+enum BitfieldOp : uint32_t {
+ BitfieldFixed = 0x13000000,
+ BitfieldFMask = 0x1F800000,
+ BitfieldMask = 0xFF800000,
+ SBFM_w = BitfieldFixed | 0x00000000,
+ SBFM_x = BitfieldFixed | 0x80000000,
+ SBFM = SBFM_w,
+ BFM_w = BitfieldFixed | 0x20000000,
+ BFM_x = BitfieldFixed | 0xA0000000,
+ BFM = BFM_w,
+ UBFM_w = BitfieldFixed | 0x40000000,
+ UBFM_x = BitfieldFixed | 0xC0000000,
+ UBFM = UBFM_w
+ // Bitfield N field.
+};
+
+// Extract.
+enum ExtractOp : uint32_t {
+ ExtractFixed = 0x13800000,
+ ExtractFMask = 0x1F800000,
+ ExtractMask = 0xFFA00000,
+ EXTR_w = ExtractFixed | 0x00000000,
+ EXTR_x = ExtractFixed | 0x80000000,
+ EXTR = EXTR_w
+};
+
+// Unconditional branch.
+enum UnconditionalBranchOp : uint32_t {
+ UnconditionalBranchFixed = 0x14000000,
+ UnconditionalBranchFMask = 0x7C000000,
+ UnconditionalBranchMask = 0xFC000000,
+ B = UnconditionalBranchFixed | 0x00000000,
+ BL = UnconditionalBranchFixed | 0x80000000
+};
+
+// Unconditional branch to register.
+enum UnconditionalBranchToRegisterOp : uint32_t {
+ UnconditionalBranchToRegisterFixed = 0xD6000000,
+ UnconditionalBranchToRegisterFMask = 0xFE000000,
+ UnconditionalBranchToRegisterMask = 0xFFFFFC00,
+ BR = UnconditionalBranchToRegisterFixed | 0x001F0000,
+ BLR = UnconditionalBranchToRegisterFixed | 0x003F0000,
+ RET = UnconditionalBranchToRegisterFixed | 0x005F0000,
+
+ BRAAZ = UnconditionalBranchToRegisterFixed | 0x001F0800,
+ BRABZ = UnconditionalBranchToRegisterFixed | 0x001F0C00,
+ BLRAAZ = UnconditionalBranchToRegisterFixed | 0x003F0800,
+ BLRABZ = UnconditionalBranchToRegisterFixed | 0x003F0C00,
+ RETAA = UnconditionalBranchToRegisterFixed | 0x005F0800,
+ RETAB = UnconditionalBranchToRegisterFixed | 0x005F0C00,
+ BRAA = UnconditionalBranchToRegisterFixed | 0x011F0800,
+ BRAB = UnconditionalBranchToRegisterFixed | 0x011F0C00,
+ BLRAA = UnconditionalBranchToRegisterFixed | 0x013F0800,
+ BLRAB = UnconditionalBranchToRegisterFixed | 0x013F0C00
+};
+
+// Compare and branch.
+enum CompareBranchOp : uint32_t {
+ CompareBranchFixed = 0x34000000,
+ CompareBranchFMask = 0x7E000000,
+ CompareBranchMask = 0xFF000000,
+ CBZ_w = CompareBranchFixed | 0x00000000,
+ CBZ_x = CompareBranchFixed | 0x80000000,
+ CBZ = CBZ_w,
+ CBNZ_w = CompareBranchFixed | 0x01000000,
+ CBNZ_x = CompareBranchFixed | 0x81000000,
+ CBNZ = CBNZ_w
+};
+
+// Test and branch.
+enum TestBranchOp : uint32_t {
+ TestBranchFixed = 0x36000000,
+ TestBranchFMask = 0x7E000000,
+ TestBranchMask = 0x7F000000,
+ TBZ = TestBranchFixed | 0x00000000,
+ TBNZ = TestBranchFixed | 0x01000000
+};
+
+// Conditional branch.
+enum ConditionalBranchOp : uint32_t {
+ ConditionalBranchFixed = 0x54000000,
+ ConditionalBranchFMask = 0xFE000000,
+ ConditionalBranchMask = 0xFF000010,
+ B_cond = ConditionalBranchFixed | 0x00000000
+};
+
+// System.
+// System instruction encoding is complicated because some instructions use op
+// and CR fields to encode parameters. To handle this cleanly, the system
+// instructions are split into more than one enum.
+
+enum SystemOp : uint32_t {
+ SystemFixed = 0xD5000000,
+ SystemFMask = 0xFFC00000
+};
+
+enum SystemSysRegOp : uint32_t {
+ SystemSysRegFixed = 0xD5100000,
+ SystemSysRegFMask = 0xFFD00000,
+ SystemSysRegMask = 0xFFF00000,
+ MRS = SystemSysRegFixed | 0x00200000,
+ MSR = SystemSysRegFixed | 0x00000000
+};
+
+enum SystemPStateOp : uint32_t {
+ SystemPStateFixed = 0xD5004000,
+ SystemPStateFMask = 0xFFF8F000,
+ SystemPStateMask = 0xFFFFF0FF,
+ CFINV = SystemPStateFixed | 0x0000001F,
+ XAFLAG = SystemPStateFixed | 0x0000003F,
+ AXFLAG = SystemPStateFixed | 0x0000005F
+};
+
+enum SystemHintOp : uint32_t {
+ SystemHintFixed = 0xD503201F,
+ SystemHintFMask = 0xFFFFF01F,
+ SystemHintMask = 0xFFFFF01F,
+ HINT = SystemHintFixed | 0x00000000
+};
+
+enum SystemSysOp : uint32_t {
+ SystemSysFixed = 0xD5080000,
+ SystemSysFMask = 0xFFF80000,
+ SystemSysMask = 0xFFF80000,
+ SYS = SystemSysFixed | 0x00000000
+};
+
+// Exception.
+enum ExceptionOp : uint32_t {
+ ExceptionFixed = 0xD4000000,
+ ExceptionFMask = 0xFF000000,
+ ExceptionMask = 0xFFE0001F,
+ HLT = ExceptionFixed | 0x00400000,
+ BRK = ExceptionFixed | 0x00200000,
+ SVC = ExceptionFixed | 0x00000001,
+ HVC = ExceptionFixed | 0x00000002,
+ SMC = ExceptionFixed | 0x00000003,
+ DCPS1 = ExceptionFixed | 0x00A00001,
+ DCPS2 = ExceptionFixed | 0x00A00002,
+ DCPS3 = ExceptionFixed | 0x00A00003
+};
+
+enum MemBarrierOp : uint32_t {
+ MemBarrierFixed = 0xD503309F,
+ MemBarrierFMask = 0xFFFFF09F,
+ MemBarrierMask = 0xFFFFF0FF,
+ DSB = MemBarrierFixed | 0x00000000,
+ DMB = MemBarrierFixed | 0x00000020,
+ ISB = MemBarrierFixed | 0x00000040
+};
+
+enum SystemExclusiveMonitorOp : uint32_t {
+ SystemExclusiveMonitorFixed = 0xD503305F,
+ SystemExclusiveMonitorFMask = 0xFFFFF0FF,
+ SystemExclusiveMonitorMask = 0xFFFFF0FF,
+ CLREX = SystemExclusiveMonitorFixed
+};
+
+enum SystemPAuthOp : uint32_t {
+ SystemPAuthFixed = 0xD503211F,
+ SystemPAuthFMask = 0xFFFFFD1F,
+ SystemPAuthMask = 0xFFFFFFFF,
+ PACIA1716 = SystemPAuthFixed | 0x00000100,
+ PACIB1716 = SystemPAuthFixed | 0x00000140,
+ AUTIA1716 = SystemPAuthFixed | 0x00000180,
+ AUTIB1716 = SystemPAuthFixed | 0x000001C0,
+ PACIAZ = SystemPAuthFixed | 0x00000300,
+ PACIASP = SystemPAuthFixed | 0x00000320,
+ PACIBZ = SystemPAuthFixed | 0x00000340,
+ PACIBSP = SystemPAuthFixed | 0x00000360,
+ AUTIAZ = SystemPAuthFixed | 0x00000380,
+ AUTIASP = SystemPAuthFixed | 0x000003A0,
+ AUTIBZ = SystemPAuthFixed | 0x000003C0,
+ AUTIBSP = SystemPAuthFixed | 0x000003E0,
+
+ // XPACLRI has the same fixed mask as System Hints and needs to be handled
+ // differently.
+ XPACLRI = 0xD50320FF
+};
+
+// Any load or store.
+enum LoadStoreAnyOp : uint32_t {
+ LoadStoreAnyFMask = 0x0a000000,
+ LoadStoreAnyFixed = 0x08000000
+};
+
+// Any load pair or store pair.
+enum LoadStorePairAnyOp : uint32_t {
+ LoadStorePairAnyFMask = 0x3a000000,
+ LoadStorePairAnyFixed = 0x28000000
+};
+
+#define LOAD_STORE_PAIR_OP_LIST(V) \
+ V(STP, w, 0x00000000), \
+ V(LDP, w, 0x00400000), \
+ V(LDPSW, x, 0x40400000), \
+ V(STP, x, 0x80000000), \
+ V(LDP, x, 0x80400000), \
+ V(STP, s, 0x04000000), \
+ V(LDP, s, 0x04400000), \
+ V(STP, d, 0x44000000), \
+ V(LDP, d, 0x44400000), \
+ V(STP, q, 0x84000000), \
+ V(LDP, q, 0x84400000)
+
+// Load/store pair (post, pre and offset.)
+enum LoadStorePairOp : uint32_t {
+ LoadStorePairMask = 0xC4400000,
+ LoadStorePairLBit = 1 << 22,
+ #define LOAD_STORE_PAIR(A, B, C) \
+ A##_##B = C
+ LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR)
+ #undef LOAD_STORE_PAIR
+};
+
+enum LoadStorePairPostIndexOp : uint32_t {
+ LoadStorePairPostIndexFixed = 0x28800000,
+ LoadStorePairPostIndexFMask = 0x3B800000,
+ LoadStorePairPostIndexMask = 0xFFC00000,
+ #define LOAD_STORE_PAIR_POST_INDEX(A, B, C) \
+ A##_##B##_post = LoadStorePairPostIndexFixed | A##_##B
+ LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_POST_INDEX)
+ #undef LOAD_STORE_PAIR_POST_INDEX
+};
+
+enum LoadStorePairPreIndexOp : uint32_t {
+ LoadStorePairPreIndexFixed = 0x29800000,
+ LoadStorePairPreIndexFMask = 0x3B800000,
+ LoadStorePairPreIndexMask = 0xFFC00000,
+ #define LOAD_STORE_PAIR_PRE_INDEX(A, B, C) \
+ A##_##B##_pre = LoadStorePairPreIndexFixed | A##_##B
+ LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_PRE_INDEX)
+ #undef LOAD_STORE_PAIR_PRE_INDEX
+};
+
+enum LoadStorePairOffsetOp : uint32_t {
+ LoadStorePairOffsetFixed = 0x29000000,
+ LoadStorePairOffsetFMask = 0x3B800000,
+ LoadStorePairOffsetMask = 0xFFC00000,
+ #define LOAD_STORE_PAIR_OFFSET(A, B, C) \
+ A##_##B##_off = LoadStorePairOffsetFixed | A##_##B
+ LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_OFFSET)
+ #undef LOAD_STORE_PAIR_OFFSET
+};
+
+enum LoadStorePairNonTemporalOp : uint32_t {
+ LoadStorePairNonTemporalFixed = 0x28000000,
+ LoadStorePairNonTemporalFMask = 0x3B800000,
+ LoadStorePairNonTemporalMask = 0xFFC00000,
+ LoadStorePairNonTemporalLBit = 1 << 22,
+ STNP_w = LoadStorePairNonTemporalFixed | STP_w,
+ LDNP_w = LoadStorePairNonTemporalFixed | LDP_w,
+ STNP_x = LoadStorePairNonTemporalFixed | STP_x,
+ LDNP_x = LoadStorePairNonTemporalFixed | LDP_x,
+ STNP_s = LoadStorePairNonTemporalFixed | STP_s,
+ LDNP_s = LoadStorePairNonTemporalFixed | LDP_s,
+ STNP_d = LoadStorePairNonTemporalFixed | STP_d,
+ LDNP_d = LoadStorePairNonTemporalFixed | LDP_d,
+ STNP_q = LoadStorePairNonTemporalFixed | STP_q,
+ LDNP_q = LoadStorePairNonTemporalFixed | LDP_q
+};
+
+// Load with pointer authentication.
+enum LoadStorePACOp : uint32_t {
+ LoadStorePACFixed = 0xF8200400,
+ LoadStorePACFMask = 0xFF200400,
+ LoadStorePACMask = 0xFFA00C00,
+ LoadStorePACPreBit = 0x00000800,
+ LDRAA = LoadStorePACFixed | 0x00000000,
+ LDRAA_pre = LoadStorePACPreBit | LDRAA,
+ LDRAB = LoadStorePACFixed | 0x00800000,
+ LDRAB_pre = LoadStorePACPreBit | LDRAB
+};
+
+// Load literal.
+enum LoadLiteralOp : uint32_t {
+ LoadLiteralFixed = 0x18000000,
+ LoadLiteralFMask = 0x3B000000,
+ LoadLiteralMask = 0xFF000000,
+ LDR_w_lit = LoadLiteralFixed | 0x00000000,
+ LDR_x_lit = LoadLiteralFixed | 0x40000000,
+ LDRSW_x_lit = LoadLiteralFixed | 0x80000000,
+ PRFM_lit = LoadLiteralFixed | 0xC0000000,
+ LDR_s_lit = LoadLiteralFixed | 0x04000000,
+ LDR_d_lit = LoadLiteralFixed | 0x44000000,
+ LDR_q_lit = LoadLiteralFixed | 0x84000000
+};
+
+#define LOAD_STORE_OP_LIST(V) \
+ V(ST, RB, w, 0x00000000), \
+ V(ST, RH, w, 0x40000000), \
+ V(ST, R, w, 0x80000000), \
+ V(ST, R, x, 0xC0000000), \
+ V(LD, RB, w, 0x00400000), \
+ V(LD, RH, w, 0x40400000), \
+ V(LD, R, w, 0x80400000), \
+ V(LD, R, x, 0xC0400000), \
+ V(LD, RSB, x, 0x00800000), \
+ V(LD, RSH, x, 0x40800000), \
+ V(LD, RSW, x, 0x80800000), \
+ V(LD, RSB, w, 0x00C00000), \
+ V(LD, RSH, w, 0x40C00000), \
+ V(ST, R, b, 0x04000000), \
+ V(ST, R, h, 0x44000000), \
+ V(ST, R, s, 0x84000000), \
+ V(ST, R, d, 0xC4000000), \
+ V(ST, R, q, 0x04800000), \
+ V(LD, R, b, 0x04400000), \
+ V(LD, R, h, 0x44400000), \
+ V(LD, R, s, 0x84400000), \
+ V(LD, R, d, 0xC4400000), \
+ V(LD, R, q, 0x04C00000)
+
+// Load/store (post, pre, offset and unsigned.)
+enum LoadStoreOp : uint32_t {
+ LoadStoreMask = 0xC4C00000,
+ LoadStoreVMask = 0x04000000,
+ #define LOAD_STORE(A, B, C, D) \
+ A##B##_##C = D
+ LOAD_STORE_OP_LIST(LOAD_STORE),
+ #undef LOAD_STORE
+ PRFM = 0xC0800000
+};
+
+// Load/store unscaled offset.
+enum LoadStoreUnscaledOffsetOp : uint32_t {
+ LoadStoreUnscaledOffsetFixed = 0x38000000,
+ LoadStoreUnscaledOffsetFMask = 0x3B200C00,
+ LoadStoreUnscaledOffsetMask = 0xFFE00C00,
+ PRFUM = LoadStoreUnscaledOffsetFixed | PRFM,
+ #define LOAD_STORE_UNSCALED(A, B, C, D) \
+ A##U##B##_##C = LoadStoreUnscaledOffsetFixed | D
+ LOAD_STORE_OP_LIST(LOAD_STORE_UNSCALED)
+ #undef LOAD_STORE_UNSCALED
+};
+
+// Load/store post index.
+enum LoadStorePostIndex : uint32_t {
+ LoadStorePostIndexFixed = 0x38000400,
+ LoadStorePostIndexFMask = 0x3B200C00,
+ LoadStorePostIndexMask = 0xFFE00C00,
+ #define LOAD_STORE_POST_INDEX(A, B, C, D) \
+ A##B##_##C##_post = LoadStorePostIndexFixed | D
+ LOAD_STORE_OP_LIST(LOAD_STORE_POST_INDEX)
+ #undef LOAD_STORE_POST_INDEX
+};
+
+// Load/store pre index.
+enum LoadStorePreIndex : uint32_t {
+ LoadStorePreIndexFixed = 0x38000C00,
+ LoadStorePreIndexFMask = 0x3B200C00,
+ LoadStorePreIndexMask = 0xFFE00C00,
+ #define LOAD_STORE_PRE_INDEX(A, B, C, D) \
+ A##B##_##C##_pre = LoadStorePreIndexFixed | D
+ LOAD_STORE_OP_LIST(LOAD_STORE_PRE_INDEX)
+ #undef LOAD_STORE_PRE_INDEX
+};
+
+// Load/store unsigned offset.
+enum LoadStoreUnsignedOffset : uint32_t {
+ LoadStoreUnsignedOffsetFixed = 0x39000000,
+ LoadStoreUnsignedOffsetFMask = 0x3B000000,
+ LoadStoreUnsignedOffsetMask = 0xFFC00000,
+ PRFM_unsigned = LoadStoreUnsignedOffsetFixed | PRFM,
+ #define LOAD_STORE_UNSIGNED_OFFSET(A, B, C, D) \
+ A##B##_##C##_unsigned = LoadStoreUnsignedOffsetFixed | D
+ LOAD_STORE_OP_LIST(LOAD_STORE_UNSIGNED_OFFSET)
+ #undef LOAD_STORE_UNSIGNED_OFFSET
+};
+
+// Load/store register offset.
+enum LoadStoreRegisterOffset : uint32_t {
+ LoadStoreRegisterOffsetFixed = 0x38200800,
+ LoadStoreRegisterOffsetFMask = 0x3B200C00,
+ LoadStoreRegisterOffsetMask = 0xFFE00C00,
+ PRFM_reg = LoadStoreRegisterOffsetFixed | PRFM,
+ #define LOAD_STORE_REGISTER_OFFSET(A, B, C, D) \
+ A##B##_##C##_reg = LoadStoreRegisterOffsetFixed | D
+ LOAD_STORE_OP_LIST(LOAD_STORE_REGISTER_OFFSET)
+ #undef LOAD_STORE_REGISTER_OFFSET
+};
+
+enum LoadStoreExclusive : uint32_t {
+ LoadStoreExclusiveFixed = 0x08000000,
+ LoadStoreExclusiveFMask = 0x3F000000,
+ LoadStoreExclusiveMask = 0xFFE08000,
+ STXRB_w = LoadStoreExclusiveFixed | 0x00000000,
+ STXRH_w = LoadStoreExclusiveFixed | 0x40000000,
+ STXR_w = LoadStoreExclusiveFixed | 0x80000000,
+ STXR_x = LoadStoreExclusiveFixed | 0xC0000000,
+ LDXRB_w = LoadStoreExclusiveFixed | 0x00400000,
+ LDXRH_w = LoadStoreExclusiveFixed | 0x40400000,
+ LDXR_w = LoadStoreExclusiveFixed | 0x80400000,
+ LDXR_x = LoadStoreExclusiveFixed | 0xC0400000,
+ STXP_w = LoadStoreExclusiveFixed | 0x80200000,
+ STXP_x = LoadStoreExclusiveFixed | 0xC0200000,
+ LDXP_w = LoadStoreExclusiveFixed | 0x80600000,
+ LDXP_x = LoadStoreExclusiveFixed | 0xC0600000,
+ STLXRB_w = LoadStoreExclusiveFixed | 0x00008000,
+ STLXRH_w = LoadStoreExclusiveFixed | 0x40008000,
+ STLXR_w = LoadStoreExclusiveFixed | 0x80008000,
+ STLXR_x = LoadStoreExclusiveFixed | 0xC0008000,
+ LDAXRB_w = LoadStoreExclusiveFixed | 0x00408000,
+ LDAXRH_w = LoadStoreExclusiveFixed | 0x40408000,
+ LDAXR_w = LoadStoreExclusiveFixed | 0x80408000,
+ LDAXR_x = LoadStoreExclusiveFixed | 0xC0408000,
+ STLXP_w = LoadStoreExclusiveFixed | 0x80208000,
+ STLXP_x = LoadStoreExclusiveFixed | 0xC0208000,
+ LDAXP_w = LoadStoreExclusiveFixed | 0x80608000,
+ LDAXP_x = LoadStoreExclusiveFixed | 0xC0608000,
+ STLRB_w = LoadStoreExclusiveFixed | 0x00808000,
+ STLRH_w = LoadStoreExclusiveFixed | 0x40808000,
+ STLR_w = LoadStoreExclusiveFixed | 0x80808000,
+ STLR_x = LoadStoreExclusiveFixed | 0xC0808000,
+ LDARB_w = LoadStoreExclusiveFixed | 0x00C08000,
+ LDARH_w = LoadStoreExclusiveFixed | 0x40C08000,
+ LDAR_w = LoadStoreExclusiveFixed | 0x80C08000,
+ LDAR_x = LoadStoreExclusiveFixed | 0xC0C08000,
+
+ // v8.1 Load/store LORegion ops
+ STLLRB = LoadStoreExclusiveFixed | 0x00800000,
+ LDLARB = LoadStoreExclusiveFixed | 0x00C00000,
+ STLLRH = LoadStoreExclusiveFixed | 0x40800000,
+ LDLARH = LoadStoreExclusiveFixed | 0x40C00000,
+ STLLR_w = LoadStoreExclusiveFixed | 0x80800000,
+ LDLAR_w = LoadStoreExclusiveFixed | 0x80C00000,
+ STLLR_x = LoadStoreExclusiveFixed | 0xC0800000,
+ LDLAR_x = LoadStoreExclusiveFixed | 0xC0C00000,
+
+ // v8.1 Load/store exclusive ops
+ LSEBit_l = 0x00400000,
+ LSEBit_o0 = 0x00008000,
+ LSEBit_sz = 0x40000000,
+ CASFixed = LoadStoreExclusiveFixed | 0x80A00000,
+ CASBFixed = LoadStoreExclusiveFixed | 0x00A00000,
+ CASHFixed = LoadStoreExclusiveFixed | 0x40A00000,
+ CASPFixed = LoadStoreExclusiveFixed | 0x00200000,
+ CAS_w = CASFixed,
+ CAS_x = CASFixed | LSEBit_sz,
+ CASA_w = CASFixed | LSEBit_l,
+ CASA_x = CASFixed | LSEBit_l | LSEBit_sz,
+ CASL_w = CASFixed | LSEBit_o0,
+ CASL_x = CASFixed | LSEBit_o0 | LSEBit_sz,
+ CASAL_w = CASFixed | LSEBit_l | LSEBit_o0,
+ CASAL_x = CASFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz,
+ CASB = CASBFixed,
+ CASAB = CASBFixed | LSEBit_l,
+ CASLB = CASBFixed | LSEBit_o0,
+ CASALB = CASBFixed | LSEBit_l | LSEBit_o0,
+ CASH = CASHFixed,
+ CASAH = CASHFixed | LSEBit_l,
+ CASLH = CASHFixed | LSEBit_o0,
+ CASALH = CASHFixed | LSEBit_l | LSEBit_o0,
+ CASP_w = CASPFixed,
+ CASP_x = CASPFixed | LSEBit_sz,
+ CASPA_w = CASPFixed | LSEBit_l,
+ CASPA_x = CASPFixed | LSEBit_l | LSEBit_sz,
+ CASPL_w = CASPFixed | LSEBit_o0,
+ CASPL_x = CASPFixed | LSEBit_o0 | LSEBit_sz,
+ CASPAL_w = CASPFixed | LSEBit_l | LSEBit_o0,
+ CASPAL_x = CASPFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz
+};
+
+// Load/store RCpc unscaled offset.
+enum LoadStoreRCpcUnscaledOffsetOp : uint32_t {
+ LoadStoreRCpcUnscaledOffsetFixed = 0x19000000,
+ LoadStoreRCpcUnscaledOffsetFMask = 0x3F200C00,
+ LoadStoreRCpcUnscaledOffsetMask = 0xFFE00C00,
+ STLURB = LoadStoreRCpcUnscaledOffsetFixed | 0x00000000,
+ LDAPURB = LoadStoreRCpcUnscaledOffsetFixed | 0x00400000,
+ LDAPURSB_x = LoadStoreRCpcUnscaledOffsetFixed | 0x00800000,
+ LDAPURSB_w = LoadStoreRCpcUnscaledOffsetFixed | 0x00C00000,
+ STLURH = LoadStoreRCpcUnscaledOffsetFixed | 0x40000000,
+ LDAPURH = LoadStoreRCpcUnscaledOffsetFixed | 0x40400000,
+ LDAPURSH_x = LoadStoreRCpcUnscaledOffsetFixed | 0x40800000,
+ LDAPURSH_w = LoadStoreRCpcUnscaledOffsetFixed | 0x40C00000,
+ STLUR_w = LoadStoreRCpcUnscaledOffsetFixed | 0x80000000,
+ LDAPUR_w = LoadStoreRCpcUnscaledOffsetFixed | 0x80400000,
+ LDAPURSW = LoadStoreRCpcUnscaledOffsetFixed | 0x80800000,
+ STLUR_x = LoadStoreRCpcUnscaledOffsetFixed | 0xC0000000,
+ LDAPUR_x = LoadStoreRCpcUnscaledOffsetFixed | 0xC0400000
+};
+
+#define ATOMIC_MEMORY_SIMPLE_OPC_LIST(V) \
+ V(LDADD, 0x00000000), \
+ V(LDCLR, 0x00001000), \
+ V(LDEOR, 0x00002000), \
+ V(LDSET, 0x00003000), \
+ V(LDSMAX, 0x00004000), \
+ V(LDSMIN, 0x00005000), \
+ V(LDUMAX, 0x00006000), \
+ V(LDUMIN, 0x00007000)
+
+// Atomic memory.
+enum AtomicMemoryOp : uint32_t {
+ AtomicMemoryFixed = 0x38200000,
+ AtomicMemoryFMask = 0x3B200C00,
+ AtomicMemoryMask = 0xFFE0FC00,
+ SWPB = AtomicMemoryFixed | 0x00008000,
+ SWPAB = AtomicMemoryFixed | 0x00808000,
+ SWPLB = AtomicMemoryFixed | 0x00408000,
+ SWPALB = AtomicMemoryFixed | 0x00C08000,
+ SWPH = AtomicMemoryFixed | 0x40008000,
+ SWPAH = AtomicMemoryFixed | 0x40808000,
+ SWPLH = AtomicMemoryFixed | 0x40408000,
+ SWPALH = AtomicMemoryFixed | 0x40C08000,
+ SWP_w = AtomicMemoryFixed | 0x80008000,
+ SWPA_w = AtomicMemoryFixed | 0x80808000,
+ SWPL_w = AtomicMemoryFixed | 0x80408000,
+ SWPAL_w = AtomicMemoryFixed | 0x80C08000,
+ SWP_x = AtomicMemoryFixed | 0xC0008000,
+ SWPA_x = AtomicMemoryFixed | 0xC0808000,
+ SWPL_x = AtomicMemoryFixed | 0xC0408000,
+ SWPAL_x = AtomicMemoryFixed | 0xC0C08000,
+ LDAPRB = AtomicMemoryFixed | 0x0080C000,
+ LDAPRH = AtomicMemoryFixed | 0x4080C000,
+ LDAPR_w = AtomicMemoryFixed | 0x8080C000,
+ LDAPR_x = AtomicMemoryFixed | 0xC080C000,
+
+ AtomicMemorySimpleFMask = 0x3B208C00,
+ AtomicMemorySimpleOpMask = 0x00007000,
+#define ATOMIC_MEMORY_SIMPLE(N, OP) \
+ N##Op = OP, \
+ N##B = AtomicMemoryFixed | OP, \
+ N##AB = AtomicMemoryFixed | OP | 0x00800000, \
+ N##LB = AtomicMemoryFixed | OP | 0x00400000, \
+ N##ALB = AtomicMemoryFixed | OP | 0x00C00000, \
+ N##H = AtomicMemoryFixed | OP | 0x40000000, \
+ N##AH = AtomicMemoryFixed | OP | 0x40800000, \
+ N##LH = AtomicMemoryFixed | OP | 0x40400000, \
+ N##ALH = AtomicMemoryFixed | OP | 0x40C00000, \
+ N##_w = AtomicMemoryFixed | OP | 0x80000000, \
+ N##A_w = AtomicMemoryFixed | OP | 0x80800000, \
+ N##L_w = AtomicMemoryFixed | OP | 0x80400000, \
+ N##AL_w = AtomicMemoryFixed | OP | 0x80C00000, \
+ N##_x = AtomicMemoryFixed | OP | 0xC0000000, \
+ N##A_x = AtomicMemoryFixed | OP | 0xC0800000, \
+ N##L_x = AtomicMemoryFixed | OP | 0xC0400000, \
+ N##AL_x = AtomicMemoryFixed | OP | 0xC0C00000
+
+ ATOMIC_MEMORY_SIMPLE_OPC_LIST(ATOMIC_MEMORY_SIMPLE)
+#undef ATOMIC_MEMORY_SIMPLE
+};
+
+// Conditional compare.
+enum ConditionalCompareOp : uint32_t {
+ ConditionalCompareMask = 0x60000000,
+ CCMN = 0x20000000,
+ CCMP = 0x60000000
+};
+
+// Conditional compare register.
+enum ConditionalCompareRegisterOp : uint32_t {
+ ConditionalCompareRegisterFixed = 0x1A400000,
+ ConditionalCompareRegisterFMask = 0x1FE00800,
+ ConditionalCompareRegisterMask = 0xFFE00C10,
+ CCMN_w = ConditionalCompareRegisterFixed | CCMN,
+ CCMN_x = ConditionalCompareRegisterFixed | SixtyFourBits | CCMN,
+ CCMP_w = ConditionalCompareRegisterFixed | CCMP,
+ CCMP_x = ConditionalCompareRegisterFixed | SixtyFourBits | CCMP
+};
+
+// Conditional compare immediate.
+enum ConditionalCompareImmediateOp : uint32_t {
+ ConditionalCompareImmediateFixed = 0x1A400800,
+ ConditionalCompareImmediateFMask = 0x1FE00800,
+ ConditionalCompareImmediateMask = 0xFFE00C10,
+ CCMN_w_imm = ConditionalCompareImmediateFixed | CCMN,
+ CCMN_x_imm = ConditionalCompareImmediateFixed | SixtyFourBits | CCMN,
+ CCMP_w_imm = ConditionalCompareImmediateFixed | CCMP,
+ CCMP_x_imm = ConditionalCompareImmediateFixed | SixtyFourBits | CCMP
+};
+
+// Conditional select.
+enum ConditionalSelectOp : uint32_t {
+ ConditionalSelectFixed = 0x1A800000,
+ ConditionalSelectFMask = 0x1FE00000,
+ ConditionalSelectMask = 0xFFE00C00,
+ CSEL_w = ConditionalSelectFixed | 0x00000000,
+ CSEL_x = ConditionalSelectFixed | 0x80000000,
+ CSEL = CSEL_w,
+ CSINC_w = ConditionalSelectFixed | 0x00000400,
+ CSINC_x = ConditionalSelectFixed | 0x80000400,
+ CSINC = CSINC_w,
+ CSINV_w = ConditionalSelectFixed | 0x40000000,
+ CSINV_x = ConditionalSelectFixed | 0xC0000000,
+ CSINV = CSINV_w,
+ CSNEG_w = ConditionalSelectFixed | 0x40000400,
+ CSNEG_x = ConditionalSelectFixed | 0xC0000400,
+ CSNEG = CSNEG_w
+};
+
+// Data processing 1 source.
+enum DataProcessing1SourceOp : uint32_t {
+ DataProcessing1SourceFixed = 0x5AC00000,
+ DataProcessing1SourceFMask = 0x5FE00000,
+ DataProcessing1SourceMask = 0xFFFFFC00,
+ RBIT = DataProcessing1SourceFixed | 0x00000000,
+ RBIT_w = RBIT,
+ RBIT_x = RBIT | SixtyFourBits,
+ REV16 = DataProcessing1SourceFixed | 0x00000400,
+ REV16_w = REV16,
+ REV16_x = REV16 | SixtyFourBits,
+ REV = DataProcessing1SourceFixed | 0x00000800,
+ REV_w = REV,
+ REV32_x = REV | SixtyFourBits,
+ REV_x = DataProcessing1SourceFixed | SixtyFourBits | 0x00000C00,
+ CLZ = DataProcessing1SourceFixed | 0x00001000,
+ CLZ_w = CLZ,
+ CLZ_x = CLZ | SixtyFourBits,
+ CLS = DataProcessing1SourceFixed | 0x00001400,
+ CLS_w = CLS,
+ CLS_x = CLS | SixtyFourBits,
+
+ // Pointer authentication instructions in Armv8.3.
+ PACIA = DataProcessing1SourceFixed | 0x80010000,
+ PACIB = DataProcessing1SourceFixed | 0x80010400,
+ PACDA = DataProcessing1SourceFixed | 0x80010800,
+ PACDB = DataProcessing1SourceFixed | 0x80010C00,
+ AUTIA = DataProcessing1SourceFixed | 0x80011000,
+ AUTIB = DataProcessing1SourceFixed | 0x80011400,
+ AUTDA = DataProcessing1SourceFixed | 0x80011800,
+ AUTDB = DataProcessing1SourceFixed | 0x80011C00,
+ PACIZA = DataProcessing1SourceFixed | 0x80012000,
+ PACIZB = DataProcessing1SourceFixed | 0x80012400,
+ PACDZA = DataProcessing1SourceFixed | 0x80012800,
+ PACDZB = DataProcessing1SourceFixed | 0x80012C00,
+ AUTIZA = DataProcessing1SourceFixed | 0x80013000,
+ AUTIZB = DataProcessing1SourceFixed | 0x80013400,
+ AUTDZA = DataProcessing1SourceFixed | 0x80013800,
+ AUTDZB = DataProcessing1SourceFixed | 0x80013C00,
+ XPACI = DataProcessing1SourceFixed | 0x80014000,
+ XPACD = DataProcessing1SourceFixed | 0x80014400
+};
+
+// Data processing 2 source.
+enum DataProcessing2SourceOp : uint32_t {
+ DataProcessing2SourceFixed = 0x1AC00000,
+ DataProcessing2SourceFMask = 0x5FE00000,
+ DataProcessing2SourceMask = 0xFFE0FC00,
+ UDIV_w = DataProcessing2SourceFixed | 0x00000800,
+ UDIV_x = DataProcessing2SourceFixed | 0x80000800,
+ UDIV = UDIV_w,
+ SDIV_w = DataProcessing2SourceFixed | 0x00000C00,
+ SDIV_x = DataProcessing2SourceFixed | 0x80000C00,
+ SDIV = SDIV_w,
+ LSLV_w = DataProcessing2SourceFixed | 0x00002000,
+ LSLV_x = DataProcessing2SourceFixed | 0x80002000,
+ LSLV = LSLV_w,
+ LSRV_w = DataProcessing2SourceFixed | 0x00002400,
+ LSRV_x = DataProcessing2SourceFixed | 0x80002400,
+ LSRV = LSRV_w,
+ ASRV_w = DataProcessing2SourceFixed | 0x00002800,
+ ASRV_x = DataProcessing2SourceFixed | 0x80002800,
+ ASRV = ASRV_w,
+ RORV_w = DataProcessing2SourceFixed | 0x00002C00,
+ RORV_x = DataProcessing2SourceFixed | 0x80002C00,
+ RORV = RORV_w,
+ PACGA = DataProcessing2SourceFixed | SixtyFourBits | 0x00003000,
+ CRC32B = DataProcessing2SourceFixed | 0x00004000,
+ CRC32H = DataProcessing2SourceFixed | 0x00004400,
+ CRC32W = DataProcessing2SourceFixed | 0x00004800,
+ CRC32X = DataProcessing2SourceFixed | SixtyFourBits | 0x00004C00,
+ CRC32CB = DataProcessing2SourceFixed | 0x00005000,
+ CRC32CH = DataProcessing2SourceFixed | 0x00005400,
+ CRC32CW = DataProcessing2SourceFixed | 0x00005800,
+ CRC32CX = DataProcessing2SourceFixed | SixtyFourBits | 0x00005C00
+};
+
+// Data processing 3 source.
+enum DataProcessing3SourceOp : uint32_t {
+ DataProcessing3SourceFixed = 0x1B000000,
+ DataProcessing3SourceFMask = 0x1F000000,
+ DataProcessing3SourceMask = 0xFFE08000,
+ MADD_w = DataProcessing3SourceFixed | 0x00000000,
+ MADD_x = DataProcessing3SourceFixed | 0x80000000,
+ MADD = MADD_w,
+ MSUB_w = DataProcessing3SourceFixed | 0x00008000,
+ MSUB_x = DataProcessing3SourceFixed | 0x80008000,
+ MSUB = MSUB_w,
+ SMADDL_x = DataProcessing3SourceFixed | 0x80200000,
+ SMSUBL_x = DataProcessing3SourceFixed | 0x80208000,
+ SMULH_x = DataProcessing3SourceFixed | 0x80400000,
+ UMADDL_x = DataProcessing3SourceFixed | 0x80A00000,
+ UMSUBL_x = DataProcessing3SourceFixed | 0x80A08000,
+ UMULH_x = DataProcessing3SourceFixed | 0x80C00000
+};
+
+// Floating point compare.
+enum FPCompareOp : uint32_t {
+ FPCompareFixed = 0x1E202000,
+ FPCompareFMask = 0x5F203C00,
+ FPCompareMask = 0xFFE0FC1F,
+ FCMP_h = FPCompareFixed | FP16 | 0x00000000,
+ FCMP_s = FPCompareFixed | 0x00000000,
+ FCMP_d = FPCompareFixed | FP64 | 0x00000000,
+ FCMP = FCMP_s,
+ FCMP_h_zero = FPCompareFixed | FP16 | 0x00000008,
+ FCMP_s_zero = FPCompareFixed | 0x00000008,
+ FCMP_d_zero = FPCompareFixed | FP64 | 0x00000008,
+ FCMP_zero = FCMP_s_zero,
+ FCMPE_h = FPCompareFixed | FP16 | 0x00000010,
+ FCMPE_s = FPCompareFixed | 0x00000010,
+ FCMPE_d = FPCompareFixed | FP64 | 0x00000010,
+ FCMPE = FCMPE_s,
+ FCMPE_h_zero = FPCompareFixed | FP16 | 0x00000018,
+ FCMPE_s_zero = FPCompareFixed | 0x00000018,
+ FCMPE_d_zero = FPCompareFixed | FP64 | 0x00000018,
+ FCMPE_zero = FCMPE_s_zero
+};
+
+// Floating point conditional compare.
+enum FPConditionalCompareOp : uint32_t {
+ FPConditionalCompareFixed = 0x1E200400,
+ FPConditionalCompareFMask = 0x5F200C00,
+ FPConditionalCompareMask = 0xFFE00C10,
+ FCCMP_h = FPConditionalCompareFixed | FP16 | 0x00000000,
+ FCCMP_s = FPConditionalCompareFixed | 0x00000000,
+ FCCMP_d = FPConditionalCompareFixed | FP64 | 0x00000000,
+ FCCMP = FCCMP_s,
+ FCCMPE_h = FPConditionalCompareFixed | FP16 | 0x00000010,
+ FCCMPE_s = FPConditionalCompareFixed | 0x00000010,
+ FCCMPE_d = FPConditionalCompareFixed | FP64 | 0x00000010,
+ FCCMPE = FCCMPE_s
+};
+
+// Floating point conditional select.
+enum FPConditionalSelectOp : uint32_t {
+ FPConditionalSelectFixed = 0x1E200C00,
+ FPConditionalSelectFMask = 0x5F200C00,
+ FPConditionalSelectMask = 0xFFE00C00,
+ FCSEL_h = FPConditionalSelectFixed | FP16 | 0x00000000,
+ FCSEL_s = FPConditionalSelectFixed | 0x00000000,
+ FCSEL_d = FPConditionalSelectFixed | FP64 | 0x00000000,
+ FCSEL = FCSEL_s
+};
+
+// Floating point immediate.
+enum FPImmediateOp : uint32_t {
+ FPImmediateFixed = 0x1E201000,
+ FPImmediateFMask = 0x5F201C00,
+ FPImmediateMask = 0xFFE01C00,
+ FMOV_h_imm = FPImmediateFixed | FP16 | 0x00000000,
+ FMOV_s_imm = FPImmediateFixed | 0x00000000,
+ FMOV_d_imm = FPImmediateFixed | FP64 | 0x00000000
+};
+
+// Floating point data processing 1 source.
+enum FPDataProcessing1SourceOp : uint32_t {
+ FPDataProcessing1SourceFixed = 0x1E204000,
+ FPDataProcessing1SourceFMask = 0x5F207C00,
+ FPDataProcessing1SourceMask = 0xFFFFFC00,
+ FMOV_h = FPDataProcessing1SourceFixed | FP16 | 0x00000000,
+ FMOV_s = FPDataProcessing1SourceFixed | 0x00000000,
+ FMOV_d = FPDataProcessing1SourceFixed | FP64 | 0x00000000,
+ FMOV = FMOV_s,
+ FABS_h = FPDataProcessing1SourceFixed | FP16 | 0x00008000,
+ FABS_s = FPDataProcessing1SourceFixed | 0x00008000,
+ FABS_d = FPDataProcessing1SourceFixed | FP64 | 0x00008000,
+ FABS = FABS_s,
+ FNEG_h = FPDataProcessing1SourceFixed | FP16 | 0x00010000,
+ FNEG_s = FPDataProcessing1SourceFixed | 0x00010000,
+ FNEG_d = FPDataProcessing1SourceFixed | FP64 | 0x00010000,
+ FNEG = FNEG_s,
+ FSQRT_h = FPDataProcessing1SourceFixed | FP16 | 0x00018000,
+ FSQRT_s = FPDataProcessing1SourceFixed | 0x00018000,
+ FSQRT_d = FPDataProcessing1SourceFixed | FP64 | 0x00018000,
+ FSQRT = FSQRT_s,
+ FCVT_ds = FPDataProcessing1SourceFixed | 0x00028000,
+ FCVT_sd = FPDataProcessing1SourceFixed | FP64 | 0x00020000,
+ FCVT_hs = FPDataProcessing1SourceFixed | 0x00038000,
+ FCVT_hd = FPDataProcessing1SourceFixed | FP64 | 0x00038000,
+ FCVT_sh = FPDataProcessing1SourceFixed | 0x00C20000,
+ FCVT_dh = FPDataProcessing1SourceFixed | 0x00C28000,
+ FRINT32X_s = FPDataProcessing1SourceFixed | 0x00088000,
+ FRINT32X_d = FPDataProcessing1SourceFixed | FP64 | 0x00088000,
+ FRINT32X = FRINT32X_s,
+ FRINT32Z_s = FPDataProcessing1SourceFixed | 0x00080000,
+ FRINT32Z_d = FPDataProcessing1SourceFixed | FP64 | 0x00080000,
+ FRINT32Z = FRINT32Z_s,
+ FRINT64X_s = FPDataProcessing1SourceFixed | 0x00098000,
+ FRINT64X_d = FPDataProcessing1SourceFixed | FP64 | 0x00098000,
+ FRINT64X = FRINT64X_s,
+ FRINT64Z_s = FPDataProcessing1SourceFixed | 0x00090000,
+ FRINT64Z_d = FPDataProcessing1SourceFixed | FP64 | 0x00090000,
+ FRINT64Z = FRINT64Z_s,
+ FRINTN_h = FPDataProcessing1SourceFixed | FP16 | 0x00040000,
+ FRINTN_s = FPDataProcessing1SourceFixed | 0x00040000,
+ FRINTN_d = FPDataProcessing1SourceFixed | FP64 | 0x00040000,
+ FRINTN = FRINTN_s,
+ FRINTP_h = FPDataProcessing1SourceFixed | FP16 | 0x00048000,
+ FRINTP_s = FPDataProcessing1SourceFixed | 0x00048000,
+ FRINTP_d = FPDataProcessing1SourceFixed | FP64 | 0x00048000,
+ FRINTP = FRINTP_s,
+ FRINTM_h = FPDataProcessing1SourceFixed | FP16 | 0x00050000,
+ FRINTM_s = FPDataProcessing1SourceFixed | 0x00050000,
+ FRINTM_d = FPDataProcessing1SourceFixed | FP64 | 0x00050000,
+ FRINTM = FRINTM_s,
+ FRINTZ_h = FPDataProcessing1SourceFixed | FP16 | 0x00058000,
+ FRINTZ_s = FPDataProcessing1SourceFixed | 0x00058000,
+ FRINTZ_d = FPDataProcessing1SourceFixed | FP64 | 0x00058000,
+ FRINTZ = FRINTZ_s,
+ FRINTA_h = FPDataProcessing1SourceFixed | FP16 | 0x00060000,
+ FRINTA_s = FPDataProcessing1SourceFixed | 0x00060000,
+ FRINTA_d = FPDataProcessing1SourceFixed | FP64 | 0x00060000,
+ FRINTA = FRINTA_s,
+ FRINTX_h = FPDataProcessing1SourceFixed | FP16 | 0x00070000,
+ FRINTX_s = FPDataProcessing1SourceFixed | 0x00070000,
+ FRINTX_d = FPDataProcessing1SourceFixed | FP64 | 0x00070000,
+ FRINTX = FRINTX_s,
+ FRINTI_h = FPDataProcessing1SourceFixed | FP16 | 0x00078000,
+ FRINTI_s = FPDataProcessing1SourceFixed | 0x00078000,
+ FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000,
+ FRINTI = FRINTI_s
+};
+
+// Floating point data processing 2 source.
+enum FPDataProcessing2SourceOp : uint32_t {
+ FPDataProcessing2SourceFixed = 0x1E200800,
+ FPDataProcessing2SourceFMask = 0x5F200C00,
+ FPDataProcessing2SourceMask = 0xFFE0FC00,
+ FMUL = FPDataProcessing2SourceFixed | 0x00000000,
+ FMUL_h = FMUL | FP16,
+ FMUL_s = FMUL,
+ FMUL_d = FMUL | FP64,
+ FDIV = FPDataProcessing2SourceFixed | 0x00001000,
+ FDIV_h = FDIV | FP16,
+ FDIV_s = FDIV,
+ FDIV_d = FDIV | FP64,
+ FADD = FPDataProcessing2SourceFixed | 0x00002000,
+ FADD_h = FADD | FP16,
+ FADD_s = FADD,
+ FADD_d = FADD | FP64,
+ FSUB = FPDataProcessing2SourceFixed | 0x00003000,
+ FSUB_h = FSUB | FP16,
+ FSUB_s = FSUB,
+ FSUB_d = FSUB | FP64,
+ FMAX = FPDataProcessing2SourceFixed | 0x00004000,
+ FMAX_h = FMAX | FP16,
+ FMAX_s = FMAX,
+ FMAX_d = FMAX | FP64,
+ FMIN = FPDataProcessing2SourceFixed | 0x00005000,
+ FMIN_h = FMIN | FP16,
+ FMIN_s = FMIN,
+ FMIN_d = FMIN | FP64,
+ FMAXNM = FPDataProcessing2SourceFixed | 0x00006000,
+ FMAXNM_h = FMAXNM | FP16,
+ FMAXNM_s = FMAXNM,
+ FMAXNM_d = FMAXNM | FP64,
+ FMINNM = FPDataProcessing2SourceFixed | 0x00007000,
+ FMINNM_h = FMINNM | FP16,
+ FMINNM_s = FMINNM,
+ FMINNM_d = FMINNM | FP64,
+ FNMUL = FPDataProcessing2SourceFixed | 0x00008000,
+ FNMUL_h = FNMUL | FP16,
+ FNMUL_s = FNMUL,
+ FNMUL_d = FNMUL | FP64
+};
+
+// Floating point data processing 3 source.
+enum FPDataProcessing3SourceOp : uint32_t {
+ FPDataProcessing3SourceFixed = 0x1F000000,
+ FPDataProcessing3SourceFMask = 0x5F000000,
+ FPDataProcessing3SourceMask = 0xFFE08000,
+ FMADD_h = FPDataProcessing3SourceFixed | 0x00C00000,
+ FMSUB_h = FPDataProcessing3SourceFixed | 0x00C08000,
+ FNMADD_h = FPDataProcessing3SourceFixed | 0x00E00000,
+ FNMSUB_h = FPDataProcessing3SourceFixed | 0x00E08000,
+ FMADD_s = FPDataProcessing3SourceFixed | 0x00000000,
+ FMSUB_s = FPDataProcessing3SourceFixed | 0x00008000,
+ FNMADD_s = FPDataProcessing3SourceFixed | 0x00200000,
+ FNMSUB_s = FPDataProcessing3SourceFixed | 0x00208000,
+ FMADD_d = FPDataProcessing3SourceFixed | 0x00400000,
+ FMSUB_d = FPDataProcessing3SourceFixed | 0x00408000,
+ FNMADD_d = FPDataProcessing3SourceFixed | 0x00600000,
+ FNMSUB_d = FPDataProcessing3SourceFixed | 0x00608000
+};
+
+// Conversion between floating point and integer.
+enum FPIntegerConvertOp : uint32_t {
+ FPIntegerConvertFixed = 0x1E200000,
+ FPIntegerConvertFMask = 0x5F20FC00,
+ FPIntegerConvertMask = 0xFFFFFC00,
+ FCVTNS = FPIntegerConvertFixed | 0x00000000,
+ FCVTNS_wh = FCVTNS | FP16,
+ FCVTNS_xh = FCVTNS | SixtyFourBits | FP16,
+ FCVTNS_ws = FCVTNS,
+ FCVTNS_xs = FCVTNS | SixtyFourBits,
+ FCVTNS_wd = FCVTNS | FP64,
+ FCVTNS_xd = FCVTNS | SixtyFourBits | FP64,
+ FCVTNU = FPIntegerConvertFixed | 0x00010000,
+ FCVTNU_wh = FCVTNU | FP16,
+ FCVTNU_xh = FCVTNU | SixtyFourBits | FP16,
+ FCVTNU_ws = FCVTNU,
+ FCVTNU_xs = FCVTNU | SixtyFourBits,
+ FCVTNU_wd = FCVTNU | FP64,
+ FCVTNU_xd = FCVTNU | SixtyFourBits | FP64,
+ FCVTPS = FPIntegerConvertFixed | 0x00080000,
+ FCVTPS_wh = FCVTPS | FP16,
+ FCVTPS_xh = FCVTPS | SixtyFourBits | FP16,
+ FCVTPS_ws = FCVTPS,
+ FCVTPS_xs = FCVTPS | SixtyFourBits,
+ FCVTPS_wd = FCVTPS | FP64,
+ FCVTPS_xd = FCVTPS | SixtyFourBits | FP64,
+ FCVTPU = FPIntegerConvertFixed | 0x00090000,
+ FCVTPU_wh = FCVTPU | FP16,
+ FCVTPU_xh = FCVTPU | SixtyFourBits | FP16,
+ FCVTPU_ws = FCVTPU,
+ FCVTPU_xs = FCVTPU | SixtyFourBits,
+ FCVTPU_wd = FCVTPU | FP64,
+ FCVTPU_xd = FCVTPU | SixtyFourBits | FP64,
+ FCVTMS = FPIntegerConvertFixed | 0x00100000,
+ FCVTMS_wh = FCVTMS | FP16,
+ FCVTMS_xh = FCVTMS | SixtyFourBits | FP16,
+ FCVTMS_ws = FCVTMS,
+ FCVTMS_xs = FCVTMS | SixtyFourBits,
+ FCVTMS_wd = FCVTMS | FP64,
+ FCVTMS_xd = FCVTMS | SixtyFourBits | FP64,
+ FCVTMU = FPIntegerConvertFixed | 0x00110000,
+ FCVTMU_wh = FCVTMU | FP16,
+ FCVTMU_xh = FCVTMU | SixtyFourBits | FP16,
+ FCVTMU_ws = FCVTMU,
+ FCVTMU_xs = FCVTMU | SixtyFourBits,
+ FCVTMU_wd = FCVTMU | FP64,
+ FCVTMU_xd = FCVTMU | SixtyFourBits | FP64,
+ FCVTZS = FPIntegerConvertFixed | 0x00180000,
+ FCVTZS_wh = FCVTZS | FP16,
+ FCVTZS_xh = FCVTZS | SixtyFourBits | FP16,
+ FCVTZS_ws = FCVTZS,
+ FCVTZS_xs = FCVTZS | SixtyFourBits,
+ FCVTZS_wd = FCVTZS | FP64,
+ FCVTZS_xd = FCVTZS | SixtyFourBits | FP64,
+ FCVTZU = FPIntegerConvertFixed | 0x00190000,
+ FCVTZU_wh = FCVTZU | FP16,
+ FCVTZU_xh = FCVTZU | SixtyFourBits | FP16,
+ FCVTZU_ws = FCVTZU,
+ FCVTZU_xs = FCVTZU | SixtyFourBits,
+ FCVTZU_wd = FCVTZU | FP64,
+ FCVTZU_xd = FCVTZU | SixtyFourBits | FP64,
+ SCVTF = FPIntegerConvertFixed | 0x00020000,
+ SCVTF_hw = SCVTF | FP16,
+ SCVTF_hx = SCVTF | SixtyFourBits | FP16,
+ SCVTF_sw = SCVTF,
+ SCVTF_sx = SCVTF | SixtyFourBits,
+ SCVTF_dw = SCVTF | FP64,
+ SCVTF_dx = SCVTF | SixtyFourBits | FP64,
+ UCVTF = FPIntegerConvertFixed | 0x00030000,
+ UCVTF_hw = UCVTF | FP16,
+ UCVTF_hx = UCVTF | SixtyFourBits | FP16,
+ UCVTF_sw = UCVTF,
+ UCVTF_sx = UCVTF | SixtyFourBits,
+ UCVTF_dw = UCVTF | FP64,
+ UCVTF_dx = UCVTF | SixtyFourBits | FP64,
+ FCVTAS = FPIntegerConvertFixed | 0x00040000,
+ FCVTAS_wh = FCVTAS | FP16,
+ FCVTAS_xh = FCVTAS | SixtyFourBits | FP16,
+ FCVTAS_ws = FCVTAS,
+ FCVTAS_xs = FCVTAS | SixtyFourBits,
+ FCVTAS_wd = FCVTAS | FP64,
+ FCVTAS_xd = FCVTAS | SixtyFourBits | FP64,
+ FCVTAU = FPIntegerConvertFixed | 0x00050000,
+ FCVTAU_wh = FCVTAU | FP16,
+ FCVTAU_xh = FCVTAU | SixtyFourBits | FP16,
+ FCVTAU_ws = FCVTAU,
+ FCVTAU_xs = FCVTAU | SixtyFourBits,
+ FCVTAU_wd = FCVTAU | FP64,
+ FCVTAU_xd = FCVTAU | SixtyFourBits | FP64,
+ FMOV_wh = FPIntegerConvertFixed | 0x00060000 | FP16,
+ FMOV_hw = FPIntegerConvertFixed | 0x00070000 | FP16,
+ FMOV_xh = FMOV_wh | SixtyFourBits,
+ FMOV_hx = FMOV_hw | SixtyFourBits,
+ FMOV_ws = FPIntegerConvertFixed | 0x00060000,
+ FMOV_sw = FPIntegerConvertFixed | 0x00070000,
+ FMOV_xd = FMOV_ws | SixtyFourBits | FP64,
+ FMOV_dx = FMOV_sw | SixtyFourBits | FP64,
+ FMOV_d1_x = FPIntegerConvertFixed | SixtyFourBits | 0x008F0000,
+ FMOV_x_d1 = FPIntegerConvertFixed | SixtyFourBits | 0x008E0000,
+ FJCVTZS = FPIntegerConvertFixed | FP64 | 0x001E0000
+};
+
+// Conversion between fixed point and floating point.
+enum FPFixedPointConvertOp : uint32_t {
+ FPFixedPointConvertFixed = 0x1E000000,
+ FPFixedPointConvertFMask = 0x5F200000,
+ FPFixedPointConvertMask = 0xFFFF0000,
+ FCVTZS_fixed = FPFixedPointConvertFixed | 0x00180000,
+ FCVTZS_wh_fixed = FCVTZS_fixed | FP16,
+ FCVTZS_xh_fixed = FCVTZS_fixed | SixtyFourBits | FP16,
+ FCVTZS_ws_fixed = FCVTZS_fixed,
+ FCVTZS_xs_fixed = FCVTZS_fixed | SixtyFourBits,
+ FCVTZS_wd_fixed = FCVTZS_fixed | FP64,
+ FCVTZS_xd_fixed = FCVTZS_fixed | SixtyFourBits | FP64,
+ FCVTZU_fixed = FPFixedPointConvertFixed | 0x00190000,
+ FCVTZU_wh_fixed = FCVTZU_fixed | FP16,
+ FCVTZU_xh_fixed = FCVTZU_fixed | SixtyFourBits | FP16,
+ FCVTZU_ws_fixed = FCVTZU_fixed,
+ FCVTZU_xs_fixed = FCVTZU_fixed | SixtyFourBits,
+ FCVTZU_wd_fixed = FCVTZU_fixed | FP64,
+ FCVTZU_xd_fixed = FCVTZU_fixed | SixtyFourBits | FP64,
+ SCVTF_fixed = FPFixedPointConvertFixed | 0x00020000,
+ SCVTF_hw_fixed = SCVTF_fixed | FP16,
+ SCVTF_hx_fixed = SCVTF_fixed | SixtyFourBits | FP16,
+ SCVTF_sw_fixed = SCVTF_fixed,
+ SCVTF_sx_fixed = SCVTF_fixed | SixtyFourBits,
+ SCVTF_dw_fixed = SCVTF_fixed | FP64,
+ SCVTF_dx_fixed = SCVTF_fixed | SixtyFourBits | FP64,
+ UCVTF_fixed = FPFixedPointConvertFixed | 0x00030000,
+ UCVTF_hw_fixed = UCVTF_fixed | FP16,
+ UCVTF_hx_fixed = UCVTF_fixed | SixtyFourBits | FP16,
+ UCVTF_sw_fixed = UCVTF_fixed,
+ UCVTF_sx_fixed = UCVTF_fixed | SixtyFourBits,
+ UCVTF_dw_fixed = UCVTF_fixed | FP64,
+ UCVTF_dx_fixed = UCVTF_fixed | SixtyFourBits | FP64
+};
+
+// Crypto - two register SHA.
+enum Crypto2RegSHAOp : uint32_t {
+ Crypto2RegSHAFixed = 0x5E280800,
+ Crypto2RegSHAFMask = 0xFF3E0C00
+};
+
+// Crypto - three register SHA.
+enum Crypto3RegSHAOp : uint32_t {
+ Crypto3RegSHAFixed = 0x5E000000,
+ Crypto3RegSHAFMask = 0xFF208C00
+};
+
+// Crypto - AES.
+enum CryptoAESOp : uint32_t {
+ CryptoAESFixed = 0x4E280800,
+ CryptoAESFMask = 0xFF3E0C00
+};
+
+// NEON instructions with two register operands.
+enum NEON2RegMiscOp : uint32_t {
+ NEON2RegMiscFixed = 0x0E200800,
+ NEON2RegMiscFMask = 0x9F3E0C00,
+ NEON2RegMiscMask = 0xBF3FFC00,
+ NEON2RegMiscUBit = 0x20000000,
+ NEON_REV64 = NEON2RegMiscFixed | 0x00000000,
+ NEON_REV32 = NEON2RegMiscFixed | 0x20000000,
+ NEON_REV16 = NEON2RegMiscFixed | 0x00001000,
+ NEON_SADDLP = NEON2RegMiscFixed | 0x00002000,
+ NEON_UADDLP = NEON_SADDLP | NEON2RegMiscUBit,
+ NEON_SUQADD = NEON2RegMiscFixed | 0x00003000,
+ NEON_USQADD = NEON_SUQADD | NEON2RegMiscUBit,
+ NEON_CLS = NEON2RegMiscFixed | 0x00004000,
+ NEON_CLZ = NEON2RegMiscFixed | 0x20004000,
+ NEON_CNT = NEON2RegMiscFixed | 0x00005000,
+ NEON_RBIT_NOT = NEON2RegMiscFixed | 0x20005000,
+ NEON_SADALP = NEON2RegMiscFixed | 0x00006000,
+ NEON_UADALP = NEON_SADALP | NEON2RegMiscUBit,
+ NEON_SQABS = NEON2RegMiscFixed | 0x00007000,
+ NEON_SQNEG = NEON2RegMiscFixed | 0x20007000,
+ NEON_CMGT_zero = NEON2RegMiscFixed | 0x00008000,
+ NEON_CMGE_zero = NEON2RegMiscFixed | 0x20008000,
+ NEON_CMEQ_zero = NEON2RegMiscFixed | 0x00009000,
+ NEON_CMLE_zero = NEON2RegMiscFixed | 0x20009000,
+ NEON_CMLT_zero = NEON2RegMiscFixed | 0x0000A000,
+ NEON_ABS = NEON2RegMiscFixed | 0x0000B000,
+ NEON_NEG = NEON2RegMiscFixed | 0x2000B000,
+ NEON_XTN = NEON2RegMiscFixed | 0x00012000,
+ NEON_SQXTUN = NEON2RegMiscFixed | 0x20012000,
+ NEON_SHLL = NEON2RegMiscFixed | 0x20013000,
+ NEON_SQXTN = NEON2RegMiscFixed | 0x00014000,
+ NEON_UQXTN = NEON_SQXTN | NEON2RegMiscUBit,
+
+ NEON2RegMiscOpcode = 0x0001F000,
+ NEON_RBIT_NOT_opcode = NEON_RBIT_NOT & NEON2RegMiscOpcode,
+ NEON_NEG_opcode = NEON_NEG & NEON2RegMiscOpcode,
+ NEON_XTN_opcode = NEON_XTN & NEON2RegMiscOpcode,
+ NEON_UQXTN_opcode = NEON_UQXTN & NEON2RegMiscOpcode,
+
+ // These instructions use only one bit of the size field. The other bit is
+ // used to distinguish between instructions.
+ NEON2RegMiscFPMask = NEON2RegMiscMask | 0x00800000,
+ NEON_FABS = NEON2RegMiscFixed | 0x0080F000,
+ NEON_FNEG = NEON2RegMiscFixed | 0x2080F000,
+ NEON_FCVTN = NEON2RegMiscFixed | 0x00016000,
+ NEON_FCVTXN = NEON2RegMiscFixed | 0x20016000,
+ NEON_FCVTL = NEON2RegMiscFixed | 0x00017000,
+ NEON_FRINT32X = NEON2RegMiscFixed | 0x2001E000,
+ NEON_FRINT32Z = NEON2RegMiscFixed | 0x0001E000,
+ NEON_FRINT64X = NEON2RegMiscFixed | 0x2001F000,
+ NEON_FRINT64Z = NEON2RegMiscFixed | 0x0001F000,
+ NEON_FRINTN = NEON2RegMiscFixed | 0x00018000,
+ NEON_FRINTA = NEON2RegMiscFixed | 0x20018000,
+ NEON_FRINTP = NEON2RegMiscFixed | 0x00818000,
+ NEON_FRINTM = NEON2RegMiscFixed | 0x00019000,
+ NEON_FRINTX = NEON2RegMiscFixed | 0x20019000,
+ NEON_FRINTZ = NEON2RegMiscFixed | 0x00819000,
+ NEON_FRINTI = NEON2RegMiscFixed | 0x20819000,
+ NEON_FCVTNS = NEON2RegMiscFixed | 0x0001A000,
+ NEON_FCVTNU = NEON_FCVTNS | NEON2RegMiscUBit,
+ NEON_FCVTPS = NEON2RegMiscFixed | 0x0081A000,
+ NEON_FCVTPU = NEON_FCVTPS | NEON2RegMiscUBit,
+ NEON_FCVTMS = NEON2RegMiscFixed | 0x0001B000,
+ NEON_FCVTMU = NEON_FCVTMS | NEON2RegMiscUBit,
+ NEON_FCVTZS = NEON2RegMiscFixed | 0x0081B000,
+ NEON_FCVTZU = NEON_FCVTZS | NEON2RegMiscUBit,
+ NEON_FCVTAS = NEON2RegMiscFixed | 0x0001C000,
+ NEON_FCVTAU = NEON_FCVTAS | NEON2RegMiscUBit,
+ NEON_FSQRT = NEON2RegMiscFixed | 0x2081F000,
+ NEON_SCVTF = NEON2RegMiscFixed | 0x0001D000,
+ NEON_UCVTF = NEON_SCVTF | NEON2RegMiscUBit,
+ NEON_URSQRTE = NEON2RegMiscFixed | 0x2081C000,
+ NEON_URECPE = NEON2RegMiscFixed | 0x0081C000,
+ NEON_FRSQRTE = NEON2RegMiscFixed | 0x2081D000,
+ NEON_FRECPE = NEON2RegMiscFixed | 0x0081D000,
+ NEON_FCMGT_zero = NEON2RegMiscFixed | 0x0080C000,
+ NEON_FCMGE_zero = NEON2RegMiscFixed | 0x2080C000,
+ NEON_FCMEQ_zero = NEON2RegMiscFixed | 0x0080D000,
+ NEON_FCMLE_zero = NEON2RegMiscFixed | 0x2080D000,
+ NEON_FCMLT_zero = NEON2RegMiscFixed | 0x0080E000,
+
+ NEON_FCVTL_opcode = NEON_FCVTL & NEON2RegMiscOpcode,
+ NEON_FCVTN_opcode = NEON_FCVTN & NEON2RegMiscOpcode
+};
+
+// NEON instructions with two register operands (FP16).
+enum NEON2RegMiscFP16Op : uint32_t {
+ NEON2RegMiscFP16Fixed = 0x0E780800,
+ NEON2RegMiscFP16FMask = 0x9F7E0C00,
+ NEON2RegMiscFP16Mask = 0xBFFFFC00,
+ NEON_FRINTN_H = NEON2RegMiscFP16Fixed | 0x00018000,
+ NEON_FRINTM_H = NEON2RegMiscFP16Fixed | 0x00019000,
+ NEON_FCVTNS_H = NEON2RegMiscFP16Fixed | 0x0001A000,
+ NEON_FCVTMS_H = NEON2RegMiscFP16Fixed | 0x0001B000,
+ NEON_FCVTAS_H = NEON2RegMiscFP16Fixed | 0x0001C000,
+ NEON_SCVTF_H = NEON2RegMiscFP16Fixed | 0x0001D000,
+ NEON_FCMGT_H_zero = NEON2RegMiscFP16Fixed | 0x0080C000,
+ NEON_FCMEQ_H_zero = NEON2RegMiscFP16Fixed | 0x0080D000,
+ NEON_FCMLT_H_zero = NEON2RegMiscFP16Fixed | 0x0080E000,
+ NEON_FABS_H = NEON2RegMiscFP16Fixed | 0x0080F000,
+ NEON_FRINTP_H = NEON2RegMiscFP16Fixed | 0x00818000,
+ NEON_FRINTZ_H = NEON2RegMiscFP16Fixed | 0x00819000,
+ NEON_FCVTPS_H = NEON2RegMiscFP16Fixed | 0x0081A000,
+ NEON_FCVTZS_H = NEON2RegMiscFP16Fixed | 0x0081B000,
+ NEON_FRECPE_H = NEON2RegMiscFP16Fixed | 0x0081D000,
+ NEON_FRINTA_H = NEON2RegMiscFP16Fixed | 0x20018000,
+ NEON_FRINTX_H = NEON2RegMiscFP16Fixed | 0x20019000,
+ NEON_FCVTNU_H = NEON2RegMiscFP16Fixed | 0x2001A000,
+ NEON_FCVTMU_H = NEON2RegMiscFP16Fixed | 0x2001B000,
+ NEON_FCVTAU_H = NEON2RegMiscFP16Fixed | 0x2001C000,
+ NEON_UCVTF_H = NEON2RegMiscFP16Fixed | 0x2001D000,
+ NEON_FCMGE_H_zero = NEON2RegMiscFP16Fixed | 0x2080C000,
+ NEON_FCMLE_H_zero = NEON2RegMiscFP16Fixed | 0x2080D000,
+ NEON_FNEG_H = NEON2RegMiscFP16Fixed | 0x2080F000,
+ NEON_FRINTI_H = NEON2RegMiscFP16Fixed | 0x20819000,
+ NEON_FCVTPU_H = NEON2RegMiscFP16Fixed | 0x2081A000,
+ NEON_FCVTZU_H = NEON2RegMiscFP16Fixed | 0x2081B000,
+ NEON_FRSQRTE_H = NEON2RegMiscFP16Fixed | 0x2081D000,
+ NEON_FSQRT_H = NEON2RegMiscFP16Fixed | 0x2081F000
+};
+
+// NEON instructions with three same-type operands.
+enum NEON3SameOp : uint32_t {
+ NEON3SameFixed = 0x0E200400,
+ NEON3SameFMask = 0x9F200400,
+ NEON3SameMask = 0xBF20FC00,
+ NEON3SameUBit = 0x20000000,
+ NEON_ADD = NEON3SameFixed | 0x00008000,
+ NEON_ADDP = NEON3SameFixed | 0x0000B800,
+ NEON_SHADD = NEON3SameFixed | 0x00000000,
+ NEON_SHSUB = NEON3SameFixed | 0x00002000,
+ NEON_SRHADD = NEON3SameFixed | 0x00001000,
+ NEON_CMEQ = NEON3SameFixed | NEON3SameUBit | 0x00008800,
+ NEON_CMGE = NEON3SameFixed | 0x00003800,
+ NEON_CMGT = NEON3SameFixed | 0x00003000,
+ NEON_CMHI = NEON3SameFixed | NEON3SameUBit | NEON_CMGT,
+ NEON_CMHS = NEON3SameFixed | NEON3SameUBit | NEON_CMGE,
+ NEON_CMTST = NEON3SameFixed | 0x00008800,
+ NEON_MLA = NEON3SameFixed | 0x00009000,
+ NEON_MLS = NEON3SameFixed | 0x20009000,
+ NEON_MUL = NEON3SameFixed | 0x00009800,
+ NEON_PMUL = NEON3SameFixed | 0x20009800,
+ NEON_SRSHL = NEON3SameFixed | 0x00005000,
+ NEON_SQSHL = NEON3SameFixed | 0x00004800,
+ NEON_SQRSHL = NEON3SameFixed | 0x00005800,
+ NEON_SSHL = NEON3SameFixed | 0x00004000,
+ NEON_SMAX = NEON3SameFixed | 0x00006000,
+ NEON_SMAXP = NEON3SameFixed | 0x0000A000,
+ NEON_SMIN = NEON3SameFixed | 0x00006800,
+ NEON_SMINP = NEON3SameFixed | 0x0000A800,
+ NEON_SABD = NEON3SameFixed | 0x00007000,
+ NEON_SABA = NEON3SameFixed | 0x00007800,
+ NEON_UABD = NEON3SameFixed | NEON3SameUBit | NEON_SABD,
+ NEON_UABA = NEON3SameFixed | NEON3SameUBit | NEON_SABA,
+ NEON_SQADD = NEON3SameFixed | 0x00000800,
+ NEON_SQSUB = NEON3SameFixed | 0x00002800,
+ NEON_SUB = NEON3SameFixed | NEON3SameUBit | 0x00008000,
+ NEON_UHADD = NEON3SameFixed | NEON3SameUBit | NEON_SHADD,
+ NEON_UHSUB = NEON3SameFixed | NEON3SameUBit | NEON_SHSUB,
+ NEON_URHADD = NEON3SameFixed | NEON3SameUBit | NEON_SRHADD,
+ NEON_UMAX = NEON3SameFixed | NEON3SameUBit | NEON_SMAX,
+ NEON_UMAXP = NEON3SameFixed | NEON3SameUBit | NEON_SMAXP,
+ NEON_UMIN = NEON3SameFixed | NEON3SameUBit | NEON_SMIN,
+ NEON_UMINP = NEON3SameFixed | NEON3SameUBit | NEON_SMINP,
+ NEON_URSHL = NEON3SameFixed | NEON3SameUBit | NEON_SRSHL,
+ NEON_UQADD = NEON3SameFixed | NEON3SameUBit | NEON_SQADD,
+ NEON_UQRSHL = NEON3SameFixed | NEON3SameUBit | NEON_SQRSHL,
+ NEON_UQSHL = NEON3SameFixed | NEON3SameUBit | NEON_SQSHL,
+ NEON_UQSUB = NEON3SameFixed | NEON3SameUBit | NEON_SQSUB,
+ NEON_USHL = NEON3SameFixed | NEON3SameUBit | NEON_SSHL,
+ NEON_SQDMULH = NEON3SameFixed | 0x0000B000,
+ NEON_SQRDMULH = NEON3SameFixed | 0x2000B000,
+
+ // NEON floating point instructions with three same-type operands.
+ NEON3SameFPFixed = NEON3SameFixed | 0x0000C000,
+ NEON3SameFPFMask = NEON3SameFMask | 0x0000C000,
+ NEON3SameFPMask = NEON3SameMask | 0x00800000,
+ NEON_FADD = NEON3SameFixed | 0x0000D000,
+ NEON_FSUB = NEON3SameFixed | 0x0080D000,
+ NEON_FMUL = NEON3SameFixed | 0x2000D800,
+ NEON_FDIV = NEON3SameFixed | 0x2000F800,
+ NEON_FMAX = NEON3SameFixed | 0x0000F000,
+ NEON_FMAXNM = NEON3SameFixed | 0x0000C000,
+ NEON_FMAXP = NEON3SameFixed | 0x2000F000,
+ NEON_FMAXNMP = NEON3SameFixed | 0x2000C000,
+ NEON_FMIN = NEON3SameFixed | 0x0080F000,
+ NEON_FMINNM = NEON3SameFixed | 0x0080C000,
+ NEON_FMINP = NEON3SameFixed | 0x2080F000,
+ NEON_FMINNMP = NEON3SameFixed | 0x2080C000,
+ NEON_FMLA = NEON3SameFixed | 0x0000C800,
+ NEON_FMLS = NEON3SameFixed | 0x0080C800,
+ NEON_FMULX = NEON3SameFixed | 0x0000D800,
+ NEON_FRECPS = NEON3SameFixed | 0x0000F800,
+ NEON_FRSQRTS = NEON3SameFixed | 0x0080F800,
+ NEON_FABD = NEON3SameFixed | 0x2080D000,
+ NEON_FADDP = NEON3SameFixed | 0x2000D000,
+ NEON_FCMEQ = NEON3SameFixed | 0x0000E000,
+ NEON_FCMGE = NEON3SameFixed | 0x2000E000,
+ NEON_FCMGT = NEON3SameFixed | 0x2080E000,
+ NEON_FACGE = NEON3SameFixed | 0x2000E800,
+ NEON_FACGT = NEON3SameFixed | 0x2080E800,
+
+ // NEON logical instructions with three same-type operands.
+ NEON3SameLogicalFixed = NEON3SameFixed | 0x00001800,
+ NEON3SameLogicalFMask = NEON3SameFMask | 0x0000F800,
+ NEON3SameLogicalMask = 0xBFE0FC00,
+ NEON3SameLogicalFormatMask = NEON_Q,
+ NEON_AND = NEON3SameLogicalFixed | 0x00000000,
+ NEON_ORR = NEON3SameLogicalFixed | 0x00A00000,
+ NEON_ORN = NEON3SameLogicalFixed | 0x00C00000,
+ NEON_EOR = NEON3SameLogicalFixed | 0x20000000,
+ NEON_BIC = NEON3SameLogicalFixed | 0x00400000,
+ NEON_BIF = NEON3SameLogicalFixed | 0x20C00000,
+ NEON_BIT = NEON3SameLogicalFixed | 0x20800000,
+ NEON_BSL = NEON3SameLogicalFixed | 0x20400000,
+
+ // FHM (FMLAL-like) instructions have an oddball encoding scheme under 3Same.
+ NEON3SameFHMMask = 0xBFE0FC00, // U size opcode
+ NEON_FMLAL = NEON3SameFixed | 0x0000E800, // 0 00 11101
+ NEON_FMLAL2 = NEON3SameFixed | 0x2000C800, // 1 00 11001
+ NEON_FMLSL = NEON3SameFixed | 0x0080E800, // 0 10 11101
+ NEON_FMLSL2 = NEON3SameFixed | 0x2080C800 // 1 10 11001
+};
+
+enum NEON3SameFP16 : uint32_t {
+ NEON3SameFP16Fixed = 0x0E400400,
+ NEON3SameFP16FMask = 0x9F60C400,
+ NEON3SameFP16Mask = 0xBFE0FC00,
+ NEON_FMAXNM_H = NEON3SameFP16Fixed | 0x00000000,
+ NEON_FMLA_H = NEON3SameFP16Fixed | 0x00000800,
+ NEON_FADD_H = NEON3SameFP16Fixed | 0x00001000,
+ NEON_FMULX_H = NEON3SameFP16Fixed | 0x00001800,
+ NEON_FCMEQ_H = NEON3SameFP16Fixed | 0x00002000,
+ NEON_FMAX_H = NEON3SameFP16Fixed | 0x00003000,
+ NEON_FRECPS_H = NEON3SameFP16Fixed | 0x00003800,
+ NEON_FMINNM_H = NEON3SameFP16Fixed | 0x00800000,
+ NEON_FMLS_H = NEON3SameFP16Fixed | 0x00800800,
+ NEON_FSUB_H = NEON3SameFP16Fixed | 0x00801000,
+ NEON_FMIN_H = NEON3SameFP16Fixed | 0x00803000,
+ NEON_FRSQRTS_H = NEON3SameFP16Fixed | 0x00803800,
+ NEON_FMAXNMP_H = NEON3SameFP16Fixed | 0x20000000,
+ NEON_FADDP_H = NEON3SameFP16Fixed | 0x20001000,
+ NEON_FMUL_H = NEON3SameFP16Fixed | 0x20001800,
+ NEON_FCMGE_H = NEON3SameFP16Fixed | 0x20002000,
+ NEON_FACGE_H = NEON3SameFP16Fixed | 0x20002800,
+ NEON_FMAXP_H = NEON3SameFP16Fixed | 0x20003000,
+ NEON_FDIV_H = NEON3SameFP16Fixed | 0x20003800,
+ NEON_FMINNMP_H = NEON3SameFP16Fixed | 0x20800000,
+ NEON_FABD_H = NEON3SameFP16Fixed | 0x20801000,
+ NEON_FCMGT_H = NEON3SameFP16Fixed | 0x20802000,
+ NEON_FACGT_H = NEON3SameFP16Fixed | 0x20802800,
+ NEON_FMINP_H = NEON3SameFP16Fixed | 0x20803000
+};
+
+// 'Extra' NEON instructions with three same-type operands.
+enum NEON3SameExtraOp : uint32_t {
+ NEON3SameExtraFixed = 0x0E008400,
+ NEON3SameExtraUBit = 0x20000000,
+ NEON3SameExtraFMask = 0x9E208400,
+ NEON3SameExtraMask = 0xBE20FC00,
+ NEON_SQRDMLAH = NEON3SameExtraFixed | NEON3SameExtraUBit,
+ NEON_SQRDMLSH = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00000800,
+ NEON_SDOT = NEON3SameExtraFixed | 0x00001000,
+ NEON_UDOT = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00001000,
+
+ /* v8.3 Complex Numbers */
+ NEON3SameExtraFCFixed = 0x2E00C400,
+ NEON3SameExtraFCFMask = 0xBF20C400,
+ // FCMLA fixes opcode<3:2>, and uses opcode<1:0> to encode <rotate>.
+ NEON3SameExtraFCMLAMask = NEON3SameExtraFCFMask | 0x00006000,
+ NEON_FCMLA = NEON3SameExtraFCFixed,
+ // FCADD fixes opcode<3:2, 0>, and uses opcode<1> to encode <rotate>.
+ NEON3SameExtraFCADDMask = NEON3SameExtraFCFMask | 0x00006800,
+ NEON_FCADD = NEON3SameExtraFCFixed | 0x00002000
+ // Other encodings under NEON3SameExtraFCFMask are UNALLOCATED.
+};
+
+// NEON instructions with three different-type operands.
+enum NEON3DifferentOp : uint32_t {
+ NEON3DifferentFixed = 0x0E200000,
+ NEON3DifferentFMask = 0x9F200C00,
+ NEON3DifferentMask = 0xFF20FC00,
+ NEON_ADDHN = NEON3DifferentFixed | 0x00004000,
+ NEON_ADDHN2 = NEON_ADDHN | NEON_Q,
+ NEON_PMULL = NEON3DifferentFixed | 0x0000E000,
+ NEON_PMULL2 = NEON_PMULL | NEON_Q,
+ NEON_RADDHN = NEON3DifferentFixed | 0x20004000,
+ NEON_RADDHN2 = NEON_RADDHN | NEON_Q,
+ NEON_RSUBHN = NEON3DifferentFixed | 0x20006000,
+ NEON_RSUBHN2 = NEON_RSUBHN | NEON_Q,
+ NEON_SABAL = NEON3DifferentFixed | 0x00005000,
+ NEON_SABAL2 = NEON_SABAL | NEON_Q,
+ NEON_SABDL = NEON3DifferentFixed | 0x00007000,
+ NEON_SABDL2 = NEON_SABDL | NEON_Q,
+ NEON_SADDL = NEON3DifferentFixed | 0x00000000,
+ NEON_SADDL2 = NEON_SADDL | NEON_Q,
+ NEON_SADDW = NEON3DifferentFixed | 0x00001000,
+ NEON_SADDW2 = NEON_SADDW | NEON_Q,
+ NEON_SMLAL = NEON3DifferentFixed | 0x00008000,
+ NEON_SMLAL2 = NEON_SMLAL | NEON_Q,
+ NEON_SMLSL = NEON3DifferentFixed | 0x0000A000,
+ NEON_SMLSL2 = NEON_SMLSL | NEON_Q,
+ NEON_SMULL = NEON3DifferentFixed | 0x0000C000,
+ NEON_SMULL2 = NEON_SMULL | NEON_Q,
+ NEON_SSUBL = NEON3DifferentFixed | 0x00002000,
+ NEON_SSUBL2 = NEON_SSUBL | NEON_Q,
+ NEON_SSUBW = NEON3DifferentFixed | 0x00003000,
+ NEON_SSUBW2 = NEON_SSUBW | NEON_Q,
+ NEON_SQDMLAL = NEON3DifferentFixed | 0x00009000,
+ NEON_SQDMLAL2 = NEON_SQDMLAL | NEON_Q,
+ NEON_SQDMLSL = NEON3DifferentFixed | 0x0000B000,
+ NEON_SQDMLSL2 = NEON_SQDMLSL | NEON_Q,
+ NEON_SQDMULL = NEON3DifferentFixed | 0x0000D000,
+ NEON_SQDMULL2 = NEON_SQDMULL | NEON_Q,
+ NEON_SUBHN = NEON3DifferentFixed | 0x00006000,
+ NEON_SUBHN2 = NEON_SUBHN | NEON_Q,
+ NEON_UABAL = NEON_SABAL | NEON3SameUBit,
+ NEON_UABAL2 = NEON_UABAL | NEON_Q,
+ NEON_UABDL = NEON_SABDL | NEON3SameUBit,
+ NEON_UABDL2 = NEON_UABDL | NEON_Q,
+ NEON_UADDL = NEON_SADDL | NEON3SameUBit,
+ NEON_UADDL2 = NEON_UADDL | NEON_Q,
+ NEON_UADDW = NEON_SADDW | NEON3SameUBit,
+ NEON_UADDW2 = NEON_UADDW | NEON_Q,
+ NEON_UMLAL = NEON_SMLAL | NEON3SameUBit,
+ NEON_UMLAL2 = NEON_UMLAL | NEON_Q,
+ NEON_UMLSL = NEON_SMLSL | NEON3SameUBit,
+ NEON_UMLSL2 = NEON_UMLSL | NEON_Q,
+ NEON_UMULL = NEON_SMULL | NEON3SameUBit,
+ NEON_UMULL2 = NEON_UMULL | NEON_Q,
+ NEON_USUBL = NEON_SSUBL | NEON3SameUBit,
+ NEON_USUBL2 = NEON_USUBL | NEON_Q,
+ NEON_USUBW = NEON_SSUBW | NEON3SameUBit,
+ NEON_USUBW2 = NEON_USUBW | NEON_Q
+};
+
+// NEON instructions operating across vectors.
+enum NEONAcrossLanesOp : uint32_t {
+ NEONAcrossLanesFixed = 0x0E300800,
+ NEONAcrossLanesFMask = 0x9F3E0C00,
+ NEONAcrossLanesMask = 0xBF3FFC00,
+ NEON_ADDV = NEONAcrossLanesFixed | 0x0001B000,
+ NEON_SADDLV = NEONAcrossLanesFixed | 0x00003000,
+ NEON_UADDLV = NEONAcrossLanesFixed | 0x20003000,
+ NEON_SMAXV = NEONAcrossLanesFixed | 0x0000A000,
+ NEON_SMINV = NEONAcrossLanesFixed | 0x0001A000,
+ NEON_UMAXV = NEONAcrossLanesFixed | 0x2000A000,
+ NEON_UMINV = NEONAcrossLanesFixed | 0x2001A000,
+
+ NEONAcrossLanesFP16Fixed = NEONAcrossLanesFixed | 0x0000C000,
+ NEONAcrossLanesFP16FMask = NEONAcrossLanesFMask | 0x2000C000,
+ NEONAcrossLanesFP16Mask = NEONAcrossLanesMask | 0x20800000,
+ NEON_FMAXNMV_H = NEONAcrossLanesFP16Fixed | 0x00000000,
+ NEON_FMAXV_H = NEONAcrossLanesFP16Fixed | 0x00003000,
+ NEON_FMINNMV_H = NEONAcrossLanesFP16Fixed | 0x00800000,
+ NEON_FMINV_H = NEONAcrossLanesFP16Fixed | 0x00803000,
+
+ // NEON floating point across instructions.
+ NEONAcrossLanesFPFixed = NEONAcrossLanesFixed | 0x2000C000,
+ NEONAcrossLanesFPFMask = NEONAcrossLanesFMask | 0x2000C000,
+ NEONAcrossLanesFPMask = NEONAcrossLanesMask | 0x20800000,
+
+ NEON_FMAXV = NEONAcrossLanesFPFixed | 0x2000F000,
+ NEON_FMINV = NEONAcrossLanesFPFixed | 0x2080F000,
+ NEON_FMAXNMV = NEONAcrossLanesFPFixed | 0x2000C000,
+ NEON_FMINNMV = NEONAcrossLanesFPFixed | 0x2080C000
+};
+
+// NEON instructions with indexed element operand.
+enum NEONByIndexedElementOp : uint32_t {
+ NEONByIndexedElementFixed = 0x0F000000,
+ NEONByIndexedElementFMask = 0x9F000400,
+ NEONByIndexedElementMask = 0xBF00F400,
+ NEON_MUL_byelement = NEONByIndexedElementFixed | 0x00008000,
+ NEON_MLA_byelement = NEONByIndexedElementFixed | 0x20000000,
+ NEON_MLS_byelement = NEONByIndexedElementFixed | 0x20004000,
+ NEON_SMULL_byelement = NEONByIndexedElementFixed | 0x0000A000,
+ NEON_SMLAL_byelement = NEONByIndexedElementFixed | 0x00002000,
+ NEON_SMLSL_byelement = NEONByIndexedElementFixed | 0x00006000,
+ NEON_UMULL_byelement = NEONByIndexedElementFixed | 0x2000A000,
+ NEON_UMLAL_byelement = NEONByIndexedElementFixed | 0x20002000,
+ NEON_UMLSL_byelement = NEONByIndexedElementFixed | 0x20006000,
+ NEON_SQDMULL_byelement = NEONByIndexedElementFixed | 0x0000B000,
+ NEON_SQDMLAL_byelement = NEONByIndexedElementFixed | 0x00003000,
+ NEON_SQDMLSL_byelement = NEONByIndexedElementFixed | 0x00007000,
+ NEON_SQDMULH_byelement = NEONByIndexedElementFixed | 0x0000C000,
+ NEON_SQRDMULH_byelement = NEONByIndexedElementFixed | 0x0000D000,
+ NEON_SDOT_byelement = NEONByIndexedElementFixed | 0x0000E000,
+ NEON_SQRDMLAH_byelement = NEONByIndexedElementFixed | 0x2000D000,
+ NEON_UDOT_byelement = NEONByIndexedElementFixed | 0x2000E000,
+ NEON_SQRDMLSH_byelement = NEONByIndexedElementFixed | 0x2000F000,
+
+ NEON_FMLA_H_byelement = NEONByIndexedElementFixed | 0x00001000,
+ NEON_FMLS_H_byelement = NEONByIndexedElementFixed | 0x00005000,
+ NEON_FMUL_H_byelement = NEONByIndexedElementFixed | 0x00009000,
+ NEON_FMULX_H_byelement = NEONByIndexedElementFixed | 0x20009000,
+
+ // Floating point instructions.
+ NEONByIndexedElementFPFixed = NEONByIndexedElementFixed | 0x00800000,
+ NEONByIndexedElementFPMask = NEONByIndexedElementMask | 0x00800000,
+ NEON_FMLA_byelement = NEONByIndexedElementFPFixed | 0x00001000,
+ NEON_FMLS_byelement = NEONByIndexedElementFPFixed | 0x00005000,
+ NEON_FMUL_byelement = NEONByIndexedElementFPFixed | 0x00009000,
+ NEON_FMULX_byelement = NEONByIndexedElementFPFixed | 0x20009000,
+
+ // FMLAL-like instructions.
+ // For all cases: U = x, size = 10, opcode = xx00
+ NEONByIndexedElementFPLongFixed = NEONByIndexedElementFixed | 0x00800000,
+ NEONByIndexedElementFPLongFMask = NEONByIndexedElementFMask | 0x00C03000,
+ NEONByIndexedElementFPLongMask = 0xBFC0F400,
+ NEON_FMLAL_H_byelement = NEONByIndexedElementFixed | 0x00800000,
+ NEON_FMLAL2_H_byelement = NEONByIndexedElementFixed | 0x20808000,
+ NEON_FMLSL_H_byelement = NEONByIndexedElementFixed | 0x00804000,
+ NEON_FMLSL2_H_byelement = NEONByIndexedElementFixed | 0x2080C000,
+
+ // Complex instruction(s).
+ // This is necessary because the 'rot' encoding moves into the
+ // NEONByIndex..Mask space.
+ NEONByIndexedElementFPComplexMask = 0xBF009400,
+ NEON_FCMLA_byelement = NEONByIndexedElementFixed | 0x20001000
+};
+
+// NEON register copy.
+enum NEONCopyOp : uint32_t {
+ NEONCopyFixed = 0x0E000400,
+ NEONCopyFMask = 0x9FE08400,
+ NEONCopyMask = 0x3FE08400,
+ NEONCopyInsElementMask = NEONCopyMask | 0x40000000,
+ NEONCopyInsGeneralMask = NEONCopyMask | 0x40007800,
+ NEONCopyDupElementMask = NEONCopyMask | 0x20007800,
+ NEONCopyDupGeneralMask = NEONCopyDupElementMask,
+ NEONCopyUmovMask = NEONCopyMask | 0x20007800,
+ NEONCopySmovMask = NEONCopyMask | 0x20007800,
+ NEON_INS_ELEMENT = NEONCopyFixed | 0x60000000,
+ NEON_INS_GENERAL = NEONCopyFixed | 0x40001800,
+ NEON_DUP_ELEMENT = NEONCopyFixed | 0x00000000,
+ NEON_DUP_GENERAL = NEONCopyFixed | 0x00000800,
+ NEON_SMOV = NEONCopyFixed | 0x00002800,
+ NEON_UMOV = NEONCopyFixed | 0x00003800
+};
+
+// NEON extract.
+enum NEONExtractOp : uint32_t {
+ NEONExtractFixed = 0x2E000000,
+ NEONExtractFMask = 0xBF208400,
+ NEONExtractMask = 0xBFE08400,
+ NEON_EXT = NEONExtractFixed | 0x00000000
+};
+
+enum NEONLoadStoreMultiOp : uint32_t {
+ NEONLoadStoreMultiL = 0x00400000,
+ NEONLoadStoreMulti1_1v = 0x00007000,
+ NEONLoadStoreMulti1_2v = 0x0000A000,
+ NEONLoadStoreMulti1_3v = 0x00006000,
+ NEONLoadStoreMulti1_4v = 0x00002000,
+ NEONLoadStoreMulti2 = 0x00008000,
+ NEONLoadStoreMulti3 = 0x00004000,
+ NEONLoadStoreMulti4 = 0x00000000
+};
+
+// NEON load/store multiple structures.
+enum NEONLoadStoreMultiStructOp : uint32_t {
+ NEONLoadStoreMultiStructFixed = 0x0C000000,
+ NEONLoadStoreMultiStructFMask = 0xBFBF0000,
+ NEONLoadStoreMultiStructMask = 0xBFFFF000,
+ NEONLoadStoreMultiStructStore = NEONLoadStoreMultiStructFixed,
+ NEONLoadStoreMultiStructLoad = NEONLoadStoreMultiStructFixed |
+ NEONLoadStoreMultiL,
+ NEON_LD1_1v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_1v,
+ NEON_LD1_2v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_2v,
+ NEON_LD1_3v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_3v,
+ NEON_LD1_4v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_4v,
+ NEON_LD2 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti2,
+ NEON_LD3 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti3,
+ NEON_LD4 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti4,
+ NEON_ST1_1v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_1v,
+ NEON_ST1_2v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_2v,
+ NEON_ST1_3v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_3v,
+ NEON_ST1_4v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_4v,
+ NEON_ST2 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti2,
+ NEON_ST3 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti3,
+ NEON_ST4 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti4
+};
+
+// NEON load/store multiple structures with post-index addressing.
+enum NEONLoadStoreMultiStructPostIndexOp : uint32_t {
+ NEONLoadStoreMultiStructPostIndexFixed = 0x0C800000,
+ NEONLoadStoreMultiStructPostIndexFMask = 0xBFA00000,
+ NEONLoadStoreMultiStructPostIndexMask = 0xBFE0F000,
+ NEONLoadStoreMultiStructPostIndex = 0x00800000,
+ NEON_LD1_1v_post = NEON_LD1_1v | NEONLoadStoreMultiStructPostIndex,
+ NEON_LD1_2v_post = NEON_LD1_2v | NEONLoadStoreMultiStructPostIndex,
+ NEON_LD1_3v_post = NEON_LD1_3v | NEONLoadStoreMultiStructPostIndex,
+ NEON_LD1_4v_post = NEON_LD1_4v | NEONLoadStoreMultiStructPostIndex,
+ NEON_LD2_post = NEON_LD2 | NEONLoadStoreMultiStructPostIndex,
+ NEON_LD3_post = NEON_LD3 | NEONLoadStoreMultiStructPostIndex,
+ NEON_LD4_post = NEON_LD4 | NEONLoadStoreMultiStructPostIndex,
+ NEON_ST1_1v_post = NEON_ST1_1v | NEONLoadStoreMultiStructPostIndex,
+ NEON_ST1_2v_post = NEON_ST1_2v | NEONLoadStoreMultiStructPostIndex,
+ NEON_ST1_3v_post = NEON_ST1_3v | NEONLoadStoreMultiStructPostIndex,
+ NEON_ST1_4v_post = NEON_ST1_4v | NEONLoadStoreMultiStructPostIndex,
+ NEON_ST2_post = NEON_ST2 | NEONLoadStoreMultiStructPostIndex,
+ NEON_ST3_post = NEON_ST3 | NEONLoadStoreMultiStructPostIndex,
+ NEON_ST4_post = NEON_ST4 | NEONLoadStoreMultiStructPostIndex
+};
+
+enum NEONLoadStoreSingleOp : uint32_t {
+ NEONLoadStoreSingle1 = 0x00000000,
+ NEONLoadStoreSingle2 = 0x00200000,
+ NEONLoadStoreSingle3 = 0x00002000,
+ NEONLoadStoreSingle4 = 0x00202000,
+ NEONLoadStoreSingleL = 0x00400000,
+ NEONLoadStoreSingle_b = 0x00000000,
+ NEONLoadStoreSingle_h = 0x00004000,
+ NEONLoadStoreSingle_s = 0x00008000,
+ NEONLoadStoreSingle_d = 0x00008400,
+ NEONLoadStoreSingleAllLanes = 0x0000C000,
+ NEONLoadStoreSingleLenMask = 0x00202000
+};
+
+// NEON load/store single structure.
+enum NEONLoadStoreSingleStructOp : uint32_t {
+ NEONLoadStoreSingleStructFixed = 0x0D000000,
+ NEONLoadStoreSingleStructFMask = 0xBF9F0000,
+ NEONLoadStoreSingleStructMask = 0xBFFFE000,
+ NEONLoadStoreSingleStructStore = NEONLoadStoreSingleStructFixed,
+ NEONLoadStoreSingleStructLoad = NEONLoadStoreSingleStructFixed |
+ NEONLoadStoreSingleL,
+ NEONLoadStoreSingleStructLoad1 = NEONLoadStoreSingle1 |
+ NEONLoadStoreSingleStructLoad,
+ NEONLoadStoreSingleStructLoad2 = NEONLoadStoreSingle2 |
+ NEONLoadStoreSingleStructLoad,
+ NEONLoadStoreSingleStructLoad3 = NEONLoadStoreSingle3 |
+ NEONLoadStoreSingleStructLoad,
+ NEONLoadStoreSingleStructLoad4 = NEONLoadStoreSingle4 |
+ NEONLoadStoreSingleStructLoad,
+ NEONLoadStoreSingleStructStore1 = NEONLoadStoreSingle1 |
+ NEONLoadStoreSingleStructFixed,
+ NEONLoadStoreSingleStructStore2 = NEONLoadStoreSingle2 |
+ NEONLoadStoreSingleStructFixed,
+ NEONLoadStoreSingleStructStore3 = NEONLoadStoreSingle3 |
+ NEONLoadStoreSingleStructFixed,
+ NEONLoadStoreSingleStructStore4 = NEONLoadStoreSingle4 |
+ NEONLoadStoreSingleStructFixed,
+ NEON_LD1_b = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_b,
+ NEON_LD1_h = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_h,
+ NEON_LD1_s = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_s,
+ NEON_LD1_d = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_d,
+ NEON_LD1R = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingleAllLanes,
+ NEON_ST1_b = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_b,
+ NEON_ST1_h = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_h,
+ NEON_ST1_s = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_s,
+ NEON_ST1_d = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_d,
+
+ NEON_LD2_b = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_b,
+ NEON_LD2_h = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_h,
+ NEON_LD2_s = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_s,
+ NEON_LD2_d = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_d,
+ NEON_LD2R = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingleAllLanes,
+ NEON_ST2_b = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_b,
+ NEON_ST2_h = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_h,
+ NEON_ST2_s = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_s,
+ NEON_ST2_d = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_d,
+
+ NEON_LD3_b = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_b,
+ NEON_LD3_h = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_h,
+ NEON_LD3_s = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_s,
+ NEON_LD3_d = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_d,
+ NEON_LD3R = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingleAllLanes,
+ NEON_ST3_b = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_b,
+ NEON_ST3_h = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_h,
+ NEON_ST3_s = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_s,
+ NEON_ST3_d = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_d,
+
+ NEON_LD4_b = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_b,
+ NEON_LD4_h = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_h,
+ NEON_LD4_s = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_s,
+ NEON_LD4_d = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_d,
+ NEON_LD4R = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingleAllLanes,
+ NEON_ST4_b = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_b,
+ NEON_ST4_h = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_h,
+ NEON_ST4_s = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_s,
+ NEON_ST4_d = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_d
+};
+
+// NEON load/store single structure with post-index addressing.
+enum NEONLoadStoreSingleStructPostIndexOp : uint32_t {
+ NEONLoadStoreSingleStructPostIndexFixed = 0x0D800000,
+ NEONLoadStoreSingleStructPostIndexFMask = 0xBF800000,
+ NEONLoadStoreSingleStructPostIndexMask = 0xBFE0E000,
+ NEONLoadStoreSingleStructPostIndex = 0x00800000,
+ NEON_LD1_b_post = NEON_LD1_b | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD1_h_post = NEON_LD1_h | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD1_s_post = NEON_LD1_s | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD1_d_post = NEON_LD1_d | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD1R_post = NEON_LD1R | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST1_b_post = NEON_ST1_b | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST1_h_post = NEON_ST1_h | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST1_s_post = NEON_ST1_s | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST1_d_post = NEON_ST1_d | NEONLoadStoreSingleStructPostIndex,
+
+ NEON_LD2_b_post = NEON_LD2_b | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD2_h_post = NEON_LD2_h | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD2_s_post = NEON_LD2_s | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD2_d_post = NEON_LD2_d | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD2R_post = NEON_LD2R | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST2_b_post = NEON_ST2_b | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST2_h_post = NEON_ST2_h | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST2_s_post = NEON_ST2_s | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST2_d_post = NEON_ST2_d | NEONLoadStoreSingleStructPostIndex,
+
+ NEON_LD3_b_post = NEON_LD3_b | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD3_h_post = NEON_LD3_h | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD3_s_post = NEON_LD3_s | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD3_d_post = NEON_LD3_d | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD3R_post = NEON_LD3R | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST3_b_post = NEON_ST3_b | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST3_h_post = NEON_ST3_h | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST3_s_post = NEON_ST3_s | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST3_d_post = NEON_ST3_d | NEONLoadStoreSingleStructPostIndex,
+
+ NEON_LD4_b_post = NEON_LD4_b | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD4_h_post = NEON_LD4_h | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD4_s_post = NEON_LD4_s | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD4_d_post = NEON_LD4_d | NEONLoadStoreSingleStructPostIndex,
+ NEON_LD4R_post = NEON_LD4R | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST4_b_post = NEON_ST4_b | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST4_h_post = NEON_ST4_h | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST4_s_post = NEON_ST4_s | NEONLoadStoreSingleStructPostIndex,
+ NEON_ST4_d_post = NEON_ST4_d | NEONLoadStoreSingleStructPostIndex
+};
+
+// NEON modified immediate.
+enum NEONModifiedImmediateOp : uint32_t {
+ NEONModifiedImmediateFixed = 0x0F000400,
+ NEONModifiedImmediateFMask = 0x9FF80400,
+ NEONModifiedImmediateOpBit = 0x20000000,
+ NEONModifiedImmediate_FMOV = NEONModifiedImmediateFixed | 0x00000800,
+ NEONModifiedImmediate_MOVI = NEONModifiedImmediateFixed | 0x00000000,
+ NEONModifiedImmediate_MVNI = NEONModifiedImmediateFixed | 0x20000000,
+ NEONModifiedImmediate_ORR = NEONModifiedImmediateFixed | 0x00001000,
+ NEONModifiedImmediate_BIC = NEONModifiedImmediateFixed | 0x20001000
+};
+
+// NEON shift immediate.
+enum NEONShiftImmediateOp : uint32_t {
+ NEONShiftImmediateFixed = 0x0F000400,
+ NEONShiftImmediateFMask = 0x9F800400,
+ NEONShiftImmediateMask = 0xBF80FC00,
+ NEONShiftImmediateUBit = 0x20000000,
+ NEON_SHL = NEONShiftImmediateFixed | 0x00005000,
+ NEON_SSHLL = NEONShiftImmediateFixed | 0x0000A000,
+ NEON_USHLL = NEONShiftImmediateFixed | 0x2000A000,
+ NEON_SLI = NEONShiftImmediateFixed | 0x20005000,
+ NEON_SRI = NEONShiftImmediateFixed | 0x20004000,
+ NEON_SHRN = NEONShiftImmediateFixed | 0x00008000,
+ NEON_RSHRN = NEONShiftImmediateFixed | 0x00008800,
+ NEON_UQSHRN = NEONShiftImmediateFixed | 0x20009000,
+ NEON_UQRSHRN = NEONShiftImmediateFixed | 0x20009800,
+ NEON_SQSHRN = NEONShiftImmediateFixed | 0x00009000,
+ NEON_SQRSHRN = NEONShiftImmediateFixed | 0x00009800,
+ NEON_SQSHRUN = NEONShiftImmediateFixed | 0x20008000,
+ NEON_SQRSHRUN = NEONShiftImmediateFixed | 0x20008800,
+ NEON_SSHR = NEONShiftImmediateFixed | 0x00000000,
+ NEON_SRSHR = NEONShiftImmediateFixed | 0x00002000,
+ NEON_USHR = NEONShiftImmediateFixed | 0x20000000,
+ NEON_URSHR = NEONShiftImmediateFixed | 0x20002000,
+ NEON_SSRA = NEONShiftImmediateFixed | 0x00001000,
+ NEON_SRSRA = NEONShiftImmediateFixed | 0x00003000,
+ NEON_USRA = NEONShiftImmediateFixed | 0x20001000,
+ NEON_URSRA = NEONShiftImmediateFixed | 0x20003000,
+ NEON_SQSHLU = NEONShiftImmediateFixed | 0x20006000,
+ NEON_SCVTF_imm = NEONShiftImmediateFixed | 0x0000E000,
+ NEON_UCVTF_imm = NEONShiftImmediateFixed | 0x2000E000,
+ NEON_FCVTZS_imm = NEONShiftImmediateFixed | 0x0000F800,
+ NEON_FCVTZU_imm = NEONShiftImmediateFixed | 0x2000F800,
+ NEON_SQSHL_imm = NEONShiftImmediateFixed | 0x00007000,
+ NEON_UQSHL_imm = NEONShiftImmediateFixed | 0x20007000
+};
+
+// NEON table.
+enum NEONTableOp : uint32_t {
+ NEONTableFixed = 0x0E000000,
+ NEONTableFMask = 0xBF208C00,
+ NEONTableExt = 0x00001000,
+ NEONTableMask = 0xBF20FC00,
+ NEON_TBL_1v = NEONTableFixed | 0x00000000,
+ NEON_TBL_2v = NEONTableFixed | 0x00002000,
+ NEON_TBL_3v = NEONTableFixed | 0x00004000,
+ NEON_TBL_4v = NEONTableFixed | 0x00006000,
+ NEON_TBX_1v = NEON_TBL_1v | NEONTableExt,
+ NEON_TBX_2v = NEON_TBL_2v | NEONTableExt,
+ NEON_TBX_3v = NEON_TBL_3v | NEONTableExt,
+ NEON_TBX_4v = NEON_TBL_4v | NEONTableExt
+};
+
+// NEON perm.
+enum NEONPermOp : uint32_t {
+ NEONPermFixed = 0x0E000800,
+ NEONPermFMask = 0xBF208C00,
+ NEONPermMask = 0x3F20FC00,
+ NEON_UZP1 = NEONPermFixed | 0x00001000,
+ NEON_TRN1 = NEONPermFixed | 0x00002000,
+ NEON_ZIP1 = NEONPermFixed | 0x00003000,
+ NEON_UZP2 = NEONPermFixed | 0x00005000,
+ NEON_TRN2 = NEONPermFixed | 0x00006000,
+ NEON_ZIP2 = NEONPermFixed | 0x00007000
+};
+
+// NEON scalar instructions with two register operands.
+enum NEONScalar2RegMiscOp : uint32_t {
+ NEONScalar2RegMiscFixed = 0x5E200800,
+ NEONScalar2RegMiscFMask = 0xDF3E0C00,
+ NEONScalar2RegMiscMask = NEON_Q | NEONScalar | NEON2RegMiscMask,
+ NEON_CMGT_zero_scalar = NEON_Q | NEONScalar | NEON_CMGT_zero,
+ NEON_CMEQ_zero_scalar = NEON_Q | NEONScalar | NEON_CMEQ_zero,
+ NEON_CMLT_zero_scalar = NEON_Q | NEONScalar | NEON_CMLT_zero,
+ NEON_CMGE_zero_scalar = NEON_Q | NEONScalar | NEON_CMGE_zero,
+ NEON_CMLE_zero_scalar = NEON_Q | NEONScalar | NEON_CMLE_zero,
+ NEON_ABS_scalar = NEON_Q | NEONScalar | NEON_ABS,
+ NEON_SQABS_scalar = NEON_Q | NEONScalar | NEON_SQABS,
+ NEON_NEG_scalar = NEON_Q | NEONScalar | NEON_NEG,
+ NEON_SQNEG_scalar = NEON_Q | NEONScalar | NEON_SQNEG,
+ NEON_SQXTN_scalar = NEON_Q | NEONScalar | NEON_SQXTN,
+ NEON_UQXTN_scalar = NEON_Q | NEONScalar | NEON_UQXTN,
+ NEON_SQXTUN_scalar = NEON_Q | NEONScalar | NEON_SQXTUN,
+ NEON_SUQADD_scalar = NEON_Q | NEONScalar | NEON_SUQADD,
+ NEON_USQADD_scalar = NEON_Q | NEONScalar | NEON_USQADD,
+
+ NEONScalar2RegMiscOpcode = NEON2RegMiscOpcode,
+ NEON_NEG_scalar_opcode = NEON_NEG_scalar & NEONScalar2RegMiscOpcode,
+
+ NEONScalar2RegMiscFPMask = NEONScalar2RegMiscMask | 0x00800000,
+ NEON_FRSQRTE_scalar = NEON_Q | NEONScalar | NEON_FRSQRTE,
+ NEON_FRECPE_scalar = NEON_Q | NEONScalar | NEON_FRECPE,
+ NEON_SCVTF_scalar = NEON_Q | NEONScalar | NEON_SCVTF,
+ NEON_UCVTF_scalar = NEON_Q | NEONScalar | NEON_UCVTF,
+ NEON_FCMGT_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_zero,
+ NEON_FCMEQ_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_zero,
+ NEON_FCMLT_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_zero,
+ NEON_FCMGE_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_zero,
+ NEON_FCMLE_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_zero,
+ NEON_FRECPX_scalar = NEONScalar2RegMiscFixed | 0x0081F000,
+ NEON_FCVTNS_scalar = NEON_Q | NEONScalar | NEON_FCVTNS,
+ NEON_FCVTNU_scalar = NEON_Q | NEONScalar | NEON_FCVTNU,
+ NEON_FCVTPS_scalar = NEON_Q | NEONScalar | NEON_FCVTPS,
+ NEON_FCVTPU_scalar = NEON_Q | NEONScalar | NEON_FCVTPU,
+ NEON_FCVTMS_scalar = NEON_Q | NEONScalar | NEON_FCVTMS,
+ NEON_FCVTMU_scalar = NEON_Q | NEONScalar | NEON_FCVTMU,
+ NEON_FCVTZS_scalar = NEON_Q | NEONScalar | NEON_FCVTZS,
+ NEON_FCVTZU_scalar = NEON_Q | NEONScalar | NEON_FCVTZU,
+ NEON_FCVTAS_scalar = NEON_Q | NEONScalar | NEON_FCVTAS,
+ NEON_FCVTAU_scalar = NEON_Q | NEONScalar | NEON_FCVTAU,
+ NEON_FCVTXN_scalar = NEON_Q | NEONScalar | NEON_FCVTXN
+};
+
+// NEON instructions with two register operands (FP16).
+enum NEONScalar2RegMiscFP16Op : uint32_t {
+ NEONScalar2RegMiscFP16Fixed = 0x5E780800,
+ NEONScalar2RegMiscFP16FMask = 0xDF7E0C00,
+ NEONScalar2RegMiscFP16Mask = 0xFFFFFC00,
+ NEON_FCVTNS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTNS_H,
+ NEON_FCVTMS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTMS_H,
+ NEON_FCVTAS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTAS_H,
+ NEON_SCVTF_H_scalar = NEON_Q | NEONScalar | NEON_SCVTF_H,
+ NEON_FCMGT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_H_zero,
+ NEON_FCMEQ_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_H_zero,
+ NEON_FCMLT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_H_zero,
+ NEON_FCVTPS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTPS_H,
+ NEON_FCVTZS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTZS_H,
+ NEON_FRECPE_H_scalar = NEON_Q | NEONScalar | NEON_FRECPE_H,
+ NEON_FRECPX_H_scalar = NEONScalar2RegMiscFP16Fixed | 0x0081F000,
+ NEON_FCVTNU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTNU_H,
+ NEON_FCVTMU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTMU_H,
+ NEON_FCVTAU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTAU_H,
+ NEON_UCVTF_H_scalar = NEON_Q | NEONScalar | NEON_UCVTF_H,
+ NEON_FCMGE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_H_zero,
+ NEON_FCMLE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_H_zero,
+ NEON_FCVTPU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTPU_H,
+ NEON_FCVTZU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_H,
+ NEON_FRSQRTE_H_scalar = NEON_Q | NEONScalar | NEON_FRSQRTE_H
+};
+
+// NEON scalar instructions with three same-type operands.
+enum NEONScalar3SameOp : uint32_t {
+ NEONScalar3SameFixed = 0x5E200400,
+ NEONScalar3SameFMask = 0xDF200400,
+ NEONScalar3SameMask = 0xFF20FC00,
+ NEON_ADD_scalar = NEON_Q | NEONScalar | NEON_ADD,
+ NEON_CMEQ_scalar = NEON_Q | NEONScalar | NEON_CMEQ,
+ NEON_CMGE_scalar = NEON_Q | NEONScalar | NEON_CMGE,
+ NEON_CMGT_scalar = NEON_Q | NEONScalar | NEON_CMGT,
+ NEON_CMHI_scalar = NEON_Q | NEONScalar | NEON_CMHI,
+ NEON_CMHS_scalar = NEON_Q | NEONScalar | NEON_CMHS,
+ NEON_CMTST_scalar = NEON_Q | NEONScalar | NEON_CMTST,
+ NEON_SUB_scalar = NEON_Q | NEONScalar | NEON_SUB,
+ NEON_UQADD_scalar = NEON_Q | NEONScalar | NEON_UQADD,
+ NEON_SQADD_scalar = NEON_Q | NEONScalar | NEON_SQADD,
+ NEON_UQSUB_scalar = NEON_Q | NEONScalar | NEON_UQSUB,
+ NEON_SQSUB_scalar = NEON_Q | NEONScalar | NEON_SQSUB,
+ NEON_USHL_scalar = NEON_Q | NEONScalar | NEON_USHL,
+ NEON_SSHL_scalar = NEON_Q | NEONScalar | NEON_SSHL,
+ NEON_UQSHL_scalar = NEON_Q | NEONScalar | NEON_UQSHL,
+ NEON_SQSHL_scalar = NEON_Q | NEONScalar | NEON_SQSHL,
+ NEON_URSHL_scalar = NEON_Q | NEONScalar | NEON_URSHL,
+ NEON_SRSHL_scalar = NEON_Q | NEONScalar | NEON_SRSHL,
+ NEON_UQRSHL_scalar = NEON_Q | NEONScalar | NEON_UQRSHL,
+ NEON_SQRSHL_scalar = NEON_Q | NEONScalar | NEON_SQRSHL,
+ NEON_SQDMULH_scalar = NEON_Q | NEONScalar | NEON_SQDMULH,
+ NEON_SQRDMULH_scalar = NEON_Q | NEONScalar | NEON_SQRDMULH,
+
+ // NEON floating point scalar instructions with three same-type operands.
+ NEONScalar3SameFPFixed = NEONScalar3SameFixed | 0x0000C000,
+ NEONScalar3SameFPFMask = NEONScalar3SameFMask | 0x0000C000,
+ NEONScalar3SameFPMask = NEONScalar3SameMask | 0x00800000,
+ NEON_FACGE_scalar = NEON_Q | NEONScalar | NEON_FACGE,
+ NEON_FACGT_scalar = NEON_Q | NEONScalar | NEON_FACGT,
+ NEON_FCMEQ_scalar = NEON_Q | NEONScalar | NEON_FCMEQ,
+ NEON_FCMGE_scalar = NEON_Q | NEONScalar | NEON_FCMGE,
+ NEON_FCMGT_scalar = NEON_Q | NEONScalar | NEON_FCMGT,
+ NEON_FMULX_scalar = NEON_Q | NEONScalar | NEON_FMULX,
+ NEON_FRECPS_scalar = NEON_Q | NEONScalar | NEON_FRECPS,
+ NEON_FRSQRTS_scalar = NEON_Q | NEONScalar | NEON_FRSQRTS,
+ NEON_FABD_scalar = NEON_Q | NEONScalar | NEON_FABD
+};
+
+// NEON scalar instructions with three different-type operands.
+enum NEONScalar3DiffOp : uint32_t {
+ NEONScalar3DiffFixed = 0x5E200000,
+ NEONScalar3DiffFMask = 0xDF200C00,
+ NEONScalar3DiffMask = NEON_Q | NEONScalar | NEON3DifferentMask,
+ NEON_SQDMLAL_scalar = NEON_Q | NEONScalar | NEON_SQDMLAL,
+ NEON_SQDMLSL_scalar = NEON_Q | NEONScalar | NEON_SQDMLSL,
+ NEON_SQDMULL_scalar = NEON_Q | NEONScalar | NEON_SQDMULL
+};
+
+// NEON scalar instructions with indexed element operand.
+enum NEONScalarByIndexedElementOp : uint32_t {
+ NEONScalarByIndexedElementFixed = 0x5F000000,
+ NEONScalarByIndexedElementFMask = 0xDF000400,
+ NEONScalarByIndexedElementMask = 0xFF00F400,
+ NEON_SQDMLAL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMLAL_byelement,
+ NEON_SQDMLSL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMLSL_byelement,
+ NEON_SQDMULL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMULL_byelement,
+ NEON_SQDMULH_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMULH_byelement,
+ NEON_SQRDMULH_byelement_scalar
+ = NEON_Q | NEONScalar | NEON_SQRDMULH_byelement,
+ NEON_SQRDMLAH_byelement_scalar
+ = NEON_Q | NEONScalar | NEON_SQRDMLAH_byelement,
+ NEON_SQRDMLSH_byelement_scalar
+ = NEON_Q | NEONScalar | NEON_SQRDMLSH_byelement,
+ NEON_FMLA_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLA_H_byelement,
+ NEON_FMLS_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLS_H_byelement,
+ NEON_FMUL_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMUL_H_byelement,
+ NEON_FMULX_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_H_byelement,
+
+ // Floating point instructions.
+ NEONScalarByIndexedElementFPFixed
+ = NEONScalarByIndexedElementFixed | 0x00800000,
+ NEONScalarByIndexedElementFPMask
+ = NEONScalarByIndexedElementMask | 0x00800000,
+ NEON_FMLA_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLA_byelement,
+ NEON_FMLS_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLS_byelement,
+ NEON_FMUL_byelement_scalar = NEON_Q | NEONScalar | NEON_FMUL_byelement,
+ NEON_FMULX_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_byelement
+};
+
+// NEON scalar register copy.
+enum NEONScalarCopyOp : uint32_t {
+ NEONScalarCopyFixed = 0x5E000400,
+ NEONScalarCopyFMask = 0xDFE08400,
+ NEONScalarCopyMask = 0xFFE0FC00,
+ NEON_DUP_ELEMENT_scalar = NEON_Q | NEONScalar | NEON_DUP_ELEMENT
+};
+
+// NEON scalar pairwise instructions.
+enum NEONScalarPairwiseOp : uint32_t {
+ NEONScalarPairwiseFixed = 0x5E300800,
+ NEONScalarPairwiseFMask = 0xDF3E0C00,
+ NEONScalarPairwiseMask = 0xFFB1F800,
+ NEON_ADDP_scalar = NEONScalarPairwiseFixed | 0x0081B000,
+ NEON_FMAXNMP_h_scalar = NEONScalarPairwiseFixed | 0x0000C000,
+ NEON_FADDP_h_scalar = NEONScalarPairwiseFixed | 0x0000D000,
+ NEON_FMAXP_h_scalar = NEONScalarPairwiseFixed | 0x0000F000,
+ NEON_FMINNMP_h_scalar = NEONScalarPairwiseFixed | 0x0080C000,
+ NEON_FMINP_h_scalar = NEONScalarPairwiseFixed | 0x0080F000,
+ NEON_FMAXNMP_scalar = NEONScalarPairwiseFixed | 0x2000C000,
+ NEON_FMINNMP_scalar = NEONScalarPairwiseFixed | 0x2080C000,
+ NEON_FADDP_scalar = NEONScalarPairwiseFixed | 0x2000D000,
+ NEON_FMAXP_scalar = NEONScalarPairwiseFixed | 0x2000F000,
+ NEON_FMINP_scalar = NEONScalarPairwiseFixed | 0x2080F000
+};
+
+// NEON scalar shift immediate.
+enum NEONScalarShiftImmediateOp : uint32_t {
+ NEONScalarShiftImmediateFixed = 0x5F000400,
+ NEONScalarShiftImmediateFMask = 0xDF800400,
+ NEONScalarShiftImmediateMask = 0xFF80FC00,
+ NEON_SHL_scalar = NEON_Q | NEONScalar | NEON_SHL,
+ NEON_SLI_scalar = NEON_Q | NEONScalar | NEON_SLI,
+ NEON_SRI_scalar = NEON_Q | NEONScalar | NEON_SRI,
+ NEON_SSHR_scalar = NEON_Q | NEONScalar | NEON_SSHR,
+ NEON_USHR_scalar = NEON_Q | NEONScalar | NEON_USHR,
+ NEON_SRSHR_scalar = NEON_Q | NEONScalar | NEON_SRSHR,
+ NEON_URSHR_scalar = NEON_Q | NEONScalar | NEON_URSHR,
+ NEON_SSRA_scalar = NEON_Q | NEONScalar | NEON_SSRA,
+ NEON_USRA_scalar = NEON_Q | NEONScalar | NEON_USRA,
+ NEON_SRSRA_scalar = NEON_Q | NEONScalar | NEON_SRSRA,
+ NEON_URSRA_scalar = NEON_Q | NEONScalar | NEON_URSRA,
+ NEON_UQSHRN_scalar = NEON_Q | NEONScalar | NEON_UQSHRN,
+ NEON_UQRSHRN_scalar = NEON_Q | NEONScalar | NEON_UQRSHRN,
+ NEON_SQSHRN_scalar = NEON_Q | NEONScalar | NEON_SQSHRN,
+ NEON_SQRSHRN_scalar = NEON_Q | NEONScalar | NEON_SQRSHRN,
+ NEON_SQSHRUN_scalar = NEON_Q | NEONScalar | NEON_SQSHRUN,
+ NEON_SQRSHRUN_scalar = NEON_Q | NEONScalar | NEON_SQRSHRUN,
+ NEON_SQSHLU_scalar = NEON_Q | NEONScalar | NEON_SQSHLU,
+ NEON_SQSHL_imm_scalar = NEON_Q | NEONScalar | NEON_SQSHL_imm,
+ NEON_UQSHL_imm_scalar = NEON_Q | NEONScalar | NEON_UQSHL_imm,
+ NEON_SCVTF_imm_scalar = NEON_Q | NEONScalar | NEON_SCVTF_imm,
+ NEON_UCVTF_imm_scalar = NEON_Q | NEONScalar | NEON_UCVTF_imm,
+ NEON_FCVTZS_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZS_imm,
+ NEON_FCVTZU_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_imm
+};
+
+enum ReservedOp : uint32_t {
+ ReservedFixed = 0x00000000,
+ ReservedFMask = 0x1E000000,
+ ReservedMask = 0xFFFF0000,
+
+ UDF = ReservedFixed | 0x00000000
+};
+
+// Unimplemented and unallocated instructions. These are defined to make fixed
+// bit assertion easier.
+enum UnimplementedOp : uint32_t {
+ UnimplementedFixed = 0x00000000,
+ UnimplementedFMask = 0x00000000
+};
+
+enum UnallocatedOp : uint32_t {
+ UnallocatedFixed = 0x00000000,
+ UnallocatedFMask = 0x00000000
+};
+
+// Instruction bit pattern for an undefined instruction, that will trigger a
+// SIGILL at runtime.
+//
+// A couple of strategies we can use here. There are no unencoded
+// instructions in the instruction set that are guaranteed to remain that
+// way. However there are some currently (as of 2018) unencoded
+// instructions that are good candidates.
+//
+// Ideally, unencoded instructions should be non-destructive to the register
+// state, and should be unencoded at all exception levels.
+//
+// At the trap the pc will hold the address of the offending instruction.
+//
+// Some candidates for unencoded instructions:
+//
+// 0xd4a00000 (essentially dcps0, a good one since it is nonsensical and may
+// remain unencoded in the future for that reason)
+// 0x33000000 (bfm variant)
+// 0xd67f0000 (br variant)
+// 0x5ac00c00 (rbit variant)
+//
+// This instruction is "dcps0", also has 16-bit payload if needed.
+static constexpr uint32_t UNDEFINED_INST_PATTERN = 0xd4a00000;
+
+} // namespace vixl
+
+#endif // VIXL_A64_CONSTANTS_A64_H_
diff --git a/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp b/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp
new file mode 100644
index 0000000000..f31c22fbf5
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp
@@ -0,0 +1,231 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#include "jit/arm64/vixl/Cpu-Features-vixl.h"
+
+#include <ostream>
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+#define VIXL_USE_AARCH64_CPU_HELPERS
+
+namespace vixl {
+
+static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) {
+ if (feature == CPUFeatures::kNone) {
+ return 0;
+ } else {
+ // Check that the shift is well-defined, and that the feature is valid.
+ VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <=
+ (sizeof(uint64_t) * 8));
+ VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures);
+ return UINT64_C(1) << feature;
+ }
+}
+
+CPUFeatures::CPUFeatures(Feature feature0,
+ Feature feature1,
+ Feature feature2,
+ Feature feature3)
+ : features_(0) {
+ Combine(feature0, feature1, feature2, feature3);
+}
+
+CPUFeatures CPUFeatures::All() {
+ CPUFeatures all;
+ // Check that the shift is well-defined.
+ VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8));
+ all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1;
+ return all;
+}
+
+CPUFeatures CPUFeatures::InferFromIDRegisters() {
+ // This function assumes that kIDRegisterEmulation is available.
+ CPUFeatures features(CPUFeatures::kIDRegisterEmulation);
+#ifdef VIXL_USE_AARCH64_CPU_HELPERS
+ // Note that the Linux kernel filters these values during emulation, so the
+ // results may not exactly match the expected hardware support.
+ features.Combine(CPU::InferCPUFeaturesFromIDRegisters());
+#endif
+ return features;
+}
+
+CPUFeatures CPUFeatures::InferFromOS(QueryIDRegistersOption option) {
+#ifdef VIXL_USE_AARCH64_CPU_HELPERS
+ return CPU::InferCPUFeaturesFromOS(option);
+#else
+ USE(option);
+ return CPUFeatures();
+#endif
+}
+
+void CPUFeatures::Combine(const CPUFeatures& other) {
+ features_ |= other.features_;
+}
+
+void CPUFeatures::Combine(Feature feature0,
+ Feature feature1,
+ Feature feature2,
+ Feature feature3) {
+ features_ |= MakeFeatureMask(feature0);
+ features_ |= MakeFeatureMask(feature1);
+ features_ |= MakeFeatureMask(feature2);
+ features_ |= MakeFeatureMask(feature3);
+}
+
+void CPUFeatures::Remove(const CPUFeatures& other) {
+ features_ &= ~other.features_;
+}
+
+void CPUFeatures::Remove(Feature feature0,
+ Feature feature1,
+ Feature feature2,
+ Feature feature3) {
+ features_ &= ~MakeFeatureMask(feature0);
+ features_ &= ~MakeFeatureMask(feature1);
+ features_ &= ~MakeFeatureMask(feature2);
+ features_ &= ~MakeFeatureMask(feature3);
+}
+
+CPUFeatures CPUFeatures::With(const CPUFeatures& other) const {
+ CPUFeatures f(*this);
+ f.Combine(other);
+ return f;
+}
+
+CPUFeatures CPUFeatures::With(Feature feature0,
+ Feature feature1,
+ Feature feature2,
+ Feature feature3) const {
+ CPUFeatures f(*this);
+ f.Combine(feature0, feature1, feature2, feature3);
+ return f;
+}
+
+CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const {
+ CPUFeatures f(*this);
+ f.Remove(other);
+ return f;
+}
+
+CPUFeatures CPUFeatures::Without(Feature feature0,
+ Feature feature1,
+ Feature feature2,
+ Feature feature3) const {
+ CPUFeatures f(*this);
+ f.Remove(feature0, feature1, feature2, feature3);
+ return f;
+}
+
+bool CPUFeatures::Has(const CPUFeatures& other) const {
+ return (features_ & other.features_) == other.features_;
+}
+
+bool CPUFeatures::Has(Feature feature0,
+ Feature feature1,
+ Feature feature2,
+ Feature feature3) const {
+ uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) |
+ MakeFeatureMask(feature2) | MakeFeatureMask(feature3);
+ return (features_ & mask) == mask;
+}
+
+size_t CPUFeatures::Count() const { return CountSetBits(features_); }
+
+std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) {
+ // clang-format off
+ switch (feature) {
+#define VIXL_FORMAT_FEATURE(SYMBOL, NAME, CPUINFO) \
+ case CPUFeatures::SYMBOL: \
+ return os << NAME;
+VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE)
+#undef VIXL_FORMAT_FEATURE
+ case CPUFeatures::kNone:
+ return os << "none";
+ case CPUFeatures::kNumberOfFeatures:
+ VIXL_UNREACHABLE();
+ }
+ // clang-format on
+ VIXL_UNREACHABLE();
+ return os;
+}
+
+CPUFeatures::const_iterator CPUFeatures::begin() const {
+ if (features_ == 0) return const_iterator(this, kNone);
+
+ int feature_number = CountTrailingZeros(features_);
+ vixl::CPUFeatures::Feature feature =
+ static_cast<CPUFeatures::Feature>(feature_number);
+ return const_iterator(this, feature);
+}
+
+CPUFeatures::const_iterator CPUFeatures::end() const {
+ return const_iterator(this, kNone);
+}
+
+std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) {
+ CPUFeatures::const_iterator it = features.begin();
+ while (it != features.end()) {
+ os << *it;
+ ++it;
+ if (it != features.end()) os << ", ";
+ }
+ return os;
+}
+
+bool CPUFeaturesConstIterator::operator==(
+ const CPUFeaturesConstIterator& other) const {
+ VIXL_ASSERT(IsValid());
+ return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_);
+}
+
+CPUFeatures::Feature CPUFeaturesConstIterator::operator++() { // Prefix
+ VIXL_ASSERT(IsValid());
+ do {
+ // Find the next feature. The order is unspecified.
+ feature_ = static_cast<CPUFeatures::Feature>(feature_ + 1);
+ if (feature_ == CPUFeatures::kNumberOfFeatures) {
+ feature_ = CPUFeatures::kNone;
+ VIXL_STATIC_ASSERT(CPUFeatures::kNone == -1);
+ }
+ VIXL_ASSERT(CPUFeatures::kNone <= feature_);
+ VIXL_ASSERT(feature_ < CPUFeatures::kNumberOfFeatures);
+ // cpu_features_->Has(kNone) is always true, so this will terminate even if
+ // the features list is empty.
+ } while (!cpu_features_->Has(feature_));
+ return feature_;
+}
+
+CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) { // Postfix
+ CPUFeatures::Feature result = feature_;
+ ++(*this);
+ return result;
+}
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Cpu-Features-vixl.h b/js/src/jit/arm64/vixl/Cpu-Features-vixl.h
new file mode 100644
index 0000000000..b980233bf2
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Cpu-Features-vixl.h
@@ -0,0 +1,397 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CPU_FEATURES_H
+#define VIXL_CPU_FEATURES_H
+
+#include <ostream>
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+
+namespace vixl {
+
+
+// clang-format off
+#define VIXL_CPU_FEATURE_LIST(V) \
+ /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \
+ /* registers, so that the detailed feature registers can be read */ \
+ /* directly. */ \
+ V(kIDRegisterEmulation, "ID register emulation", "cpuid") \
+ \
+ V(kFP, "FP", "fp") \
+ V(kNEON, "NEON", "asimd") \
+ V(kCRC32, "CRC32", "crc32") \
+ /* Cryptographic support instructions. */ \
+ V(kAES, "AES", "aes") \
+ V(kSHA1, "SHA1", "sha1") \
+ V(kSHA2, "SHA2", "sha2") \
+ /* A form of PMULL{2} with a 128-bit (1Q) result. */ \
+ V(kPmull1Q, "Pmull1Q", "pmull") \
+ /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc. */ \
+ V(kAtomics, "Atomics", "atomics") \
+ /* Limited ordering regions: LDLAR, STLLR and their variants. */ \
+ V(kLORegions, "LORegions", NULL) \
+ /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH. */ \
+ V(kRDM, "RDM", "asimdrdm") \
+ /* Scalable Vector Extension. */ \
+ V(kSVE, "SVE", "sve") \
+ /* SDOT and UDOT support (in NEON). */ \
+ V(kDotProduct, "DotProduct", "asimddp") \
+ /* Half-precision (FP16) support for FP and NEON, respectively. */ \
+ V(kFPHalf, "FPHalf", "fphp") \
+ V(kNEONHalf, "NEONHalf", "asimdhp") \
+ /* The RAS extension, including the ESB instruction. */ \
+ V(kRAS, "RAS", NULL) \
+ /* Data cache clean to the point of persistence: DC CVAP. */ \
+ V(kDCPoP, "DCPoP", "dcpop") \
+ /* Data cache clean to the point of deep persistence: DC CVADP. */ \
+ V(kDCCVADP, "DCCVADP", NULL) \
+ /* Cryptographic support instructions. */ \
+ V(kSHA3, "SHA3", "sha3") \
+ V(kSHA512, "SHA512", "sha512") \
+ V(kSM3, "SM3", "sm3") \
+ V(kSM4, "SM4", "sm4") \
+ /* Pointer authentication for addresses. */ \
+ V(kPAuth, "PAuth", NULL) \
+ /* Pointer authentication for addresses uses QARMA. */ \
+ V(kPAuthQARMA, "PAuthQARMA", NULL) \
+ /* Generic authentication (using the PACGA instruction). */ \
+ V(kPAuthGeneric, "PAuthGeneric", NULL) \
+ /* Generic authentication uses QARMA. */ \
+ V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \
+ /* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \
+ V(kJSCVT, "JSCVT", "jscvt") \
+ /* Complex number support for NEON: FCMLA and FCADD. */ \
+ V(kFcma, "Fcma", "fcma") \
+ /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
+ V(kRCpc, "RCpc", "lrcpc") \
+ V(kRCpcImm, "RCpc (imm)", "ilrcpc") \
+ /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF. */ \
+ V(kFlagM, "FlagM", "flagm") \
+ /* Unaligned single-copy atomicity. */ \
+ V(kUSCAT, "USCAT", "uscat") \
+ /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}. */ \
+ V(kFHM, "FHM", "asimdfhm") \
+ /* Data-independent timing (for selected instructions). */ \
+ V(kDIT, "DIT", "dit") \
+ /* Branch target identification. */ \
+ V(kBTI, "BTI", NULL) \
+ /* Flag manipulation instructions: {AX,XA}FLAG */ \
+ V(kAXFlag, "AXFlag", NULL) \
+ /* Random number generation extension, */ \
+ V(kRNG, "RNG", NULL) \
+ /* Floating-point round to {32,64}-bit integer. */ \
+ V(kFrintToFixedSizedInt,"Frint (bounded)", NULL)
+// clang-format on
+
+
+class CPUFeaturesConstIterator;
+
+// A representation of the set of features known to be supported by the target
+// device. Each feature is represented by a simple boolean flag.
+//
+// - When the Assembler is asked to assemble an instruction, it asserts (in
+// debug mode) that the necessary features are available.
+//
+// - TODO: The MacroAssembler relies on the Assembler's assertions, but in
+// some cases it may be useful for macros to generate a fall-back sequence
+// in case features are not available.
+//
+// - The Simulator assumes by default that all features are available, but it
+// is possible to configure it to fail if the simulated code uses features
+// that are not enabled.
+//
+// The Simulator also offers pseudo-instructions to allow features to be
+// enabled and disabled dynamically. This is useful when you want to ensure
+// that some features are constrained to certain areas of code.
+//
+// - The base Disassembler knows nothing about CPU features, but the
+// PrintDisassembler can be configured to annotate its output with warnings
+// about unavailable features. The Simulator uses this feature when
+// instruction trace is enabled.
+//
+// - The Decoder-based components -- the Simulator and PrintDisassembler --
+// rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of
+// features actually encountered so that a large block of code can be
+// examined (either directly or through simulation), and the required
+// features analysed later.
+//
+// Expected usage:
+//
+// // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for
+// // compatibility with older version of VIXL.
+// MacroAssembler masm;
+//
+// // Generate code only for the current CPU.
+// masm.SetCPUFeatures(CPUFeatures::InferFromOS());
+//
+// // Turn off feature checking entirely.
+// masm.SetCPUFeatures(CPUFeatures::All());
+//
+// Feature set manipulation:
+//
+// CPUFeatures f; // The default constructor gives an empty set.
+// // Individual features can be added (or removed).
+// f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES);
+// f.Remove(CPUFeatures::kNEON);
+//
+// // Some helpers exist for extensions that provide several features.
+// f.Remove(CPUFeatures::All());
+// f.Combine(CPUFeatures::AArch64LegacyBaseline());
+//
+// // Chained construction is also possible.
+// CPUFeatures g =
+// f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32);
+//
+// // Features can be queried. Where multiple features are given, they are
+// // combined with logical AND.
+// if (h.Has(CPUFeatures::kNEON)) { ... }
+// if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... }
+// if (h.Has(g)) { ... }
+// // If the empty set is requested, the result is always 'true'.
+// VIXL_ASSERT(h.Has(CPUFeatures()));
+//
+// // For debug and reporting purposes, features can be enumerated (or
+// // printed directly):
+// std::cout << CPUFeatures::kNEON; // Prints something like "NEON".
+// std::cout << f; // Prints something like "FP, NEON, CRC32".
+class CPUFeatures {
+ public:
+ // clang-format off
+ // Individual features.
+ // These should be treated as opaque tokens. User code should not rely on
+ // specific numeric values or ordering.
+ enum Feature {
+ // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that
+ // this class supports.
+
+ kNone = -1,
+#define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL,
+ VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE)
+#undef VIXL_DECLARE_FEATURE
+ kNumberOfFeatures
+ };
+ // clang-format on
+
+ // By default, construct with no features enabled.
+ CPUFeatures() : features_(0) {}
+
+ // Construct with some features already enabled.
+ CPUFeatures(Feature feature0,
+ Feature feature1 = kNone,
+ Feature feature2 = kNone,
+ Feature feature3 = kNone);
+
+ // Construct with all features enabled. This can be used to disable feature
+ // checking: `Has(...)` returns true regardless of the argument.
+ static CPUFeatures All();
+
+ // Construct an empty CPUFeatures. This is equivalent to the default
+ // constructor, but is provided for symmetry and convenience.
+ static CPUFeatures None() { return CPUFeatures(); }
+
+ // The presence of these features was assumed by version of VIXL before this
+ // API was added, so using this set by default ensures API compatibility.
+ static CPUFeatures AArch64LegacyBaseline() {
+ return CPUFeatures(kFP, kNEON, kCRC32);
+ }
+
+ // Construct a new CPUFeatures object using ID registers. This assumes that
+ // kIDRegisterEmulation is present.
+ static CPUFeatures InferFromIDRegisters();
+
+ enum QueryIDRegistersOption {
+ kDontQueryIDRegisters,
+ kQueryIDRegistersIfAvailable
+ };
+
+ // Construct a new CPUFeatures object based on what the OS reports.
+ static CPUFeatures InferFromOS(
+ QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);
+
+ // Combine another CPUFeatures object into this one. Features that already
+ // exist in this set are left unchanged.
+ void Combine(const CPUFeatures& other);
+
+ // Combine specific features into this set. Features that already exist in
+ // this set are left unchanged.
+ void Combine(Feature feature0,
+ Feature feature1 = kNone,
+ Feature feature2 = kNone,
+ Feature feature3 = kNone);
+
+ // Remove features in another CPUFeatures object from this one.
+ void Remove(const CPUFeatures& other);
+
+ // Remove specific features from this set.
+ void Remove(Feature feature0,
+ Feature feature1 = kNone,
+ Feature feature2 = kNone,
+ Feature feature3 = kNone);
+
+ // Chaining helpers for convenient construction.
+ CPUFeatures With(const CPUFeatures& other) const;
+ CPUFeatures With(Feature feature0,
+ Feature feature1 = kNone,
+ Feature feature2 = kNone,
+ Feature feature3 = kNone) const;
+ CPUFeatures Without(const CPUFeatures& other) const;
+ CPUFeatures Without(Feature feature0,
+ Feature feature1 = kNone,
+ Feature feature2 = kNone,
+ Feature feature3 = kNone) const;
+
+ // Query features.
+ // Note that an empty query (like `Has(kNone)`) always returns true.
+ bool Has(const CPUFeatures& other) const;
+ bool Has(Feature feature0,
+ Feature feature1 = kNone,
+ Feature feature2 = kNone,
+ Feature feature3 = kNone) const;
+
+ // Return the number of enabled features.
+ size_t Count() const;
+ bool HasNoFeatures() const { return Count() == 0; }
+
+ // Check for equivalence.
+ bool operator==(const CPUFeatures& other) const {
+ return Has(other) && other.Has(*this);
+ }
+ bool operator!=(const CPUFeatures& other) const { return !(*this == other); }
+
+ typedef CPUFeaturesConstIterator const_iterator;
+
+ const_iterator begin() const;
+ const_iterator end() const;
+
+ private:
+ // Each bit represents a feature. This field will be replaced as needed if
+ // features are added.
+ uint64_t features_;
+
+ friend std::ostream& operator<<(std::ostream& os,
+ const vixl::CPUFeatures& features);
+};
+
+std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature);
+std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features);
+
+// This is not a proper C++ iterator type, but it simulates enough of
+// ForwardIterator that simple loops can be written.
+class CPUFeaturesConstIterator {
+ public:
+ CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL,
+ CPUFeatures::Feature start = CPUFeatures::kNone)
+ : cpu_features_(cpu_features), feature_(start) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ bool operator==(const CPUFeaturesConstIterator& other) const;
+ bool operator!=(const CPUFeaturesConstIterator& other) const {
+ return !(*this == other);
+ }
+ CPUFeatures::Feature operator++();
+ CPUFeatures::Feature operator++(int);
+
+ CPUFeatures::Feature operator*() const {
+ VIXL_ASSERT(IsValid());
+ return feature_;
+ }
+
+ // For proper support of C++'s simplest "Iterator" concept, this class would
+ // have to define member types (such as CPUFeaturesIterator::pointer) to make
+ // it appear as if it iterates over Feature objects in memory. That is, we'd
+ // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator.
+ // This is at least partially possible -- the std::vector<bool> specialisation
+ // does something similar -- but it doesn't seem worthwhile for a
+ // special-purpose debug helper, so they are omitted here.
+ private:
+ const CPUFeatures* cpu_features_;
+ CPUFeatures::Feature feature_;
+
+ bool IsValid() const {
+ return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) ||
+ cpu_features_->Has(feature_);
+ }
+};
+
+// A convenience scope for temporarily modifying a CPU features object. This
+// allows features to be enabled for short sequences.
+//
+// Expected usage:
+//
+// {
+// CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32);
+// // This scope can now use CRC32, as well as anything else that was enabled
+// // before the scope.
+//
+// ...
+//
+// // At the end of the scope, the original CPU features are restored.
+// }
+class CPUFeaturesScope {
+ public:
+ // Start a CPUFeaturesScope on any object that implements
+ // `CPUFeatures* GetCPUFeatures()`.
+ template <typename T>
+ explicit CPUFeaturesScope(T* cpu_features_wrapper,
+ CPUFeatures::Feature feature0 = CPUFeatures::kNone,
+ CPUFeatures::Feature feature1 = CPUFeatures::kNone,
+ CPUFeatures::Feature feature2 = CPUFeatures::kNone,
+ CPUFeatures::Feature feature3 = CPUFeatures::kNone)
+ : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
+ old_features_(*cpu_features_) {
+ cpu_features_->Combine(feature0, feature1, feature2, feature3);
+ }
+
+ template <typename T>
+ CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
+ : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
+ old_features_(*cpu_features_) {
+ cpu_features_->Combine(other);
+ }
+
+ ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
+
+ // For advanced usage, the CPUFeatures object can be accessed directly.
+ // The scope will restore the original state when it ends.
+
+ CPUFeatures* GetCPUFeatures() const { return cpu_features_; }
+
+ void SetCPUFeatures(const CPUFeatures& cpu_features) {
+ *cpu_features_ = cpu_features;
+ }
+
+ private:
+ CPUFeatures* const cpu_features_;
+ const CPUFeatures old_features_;
+};
+
+
+} // namespace vixl
+
+#endif // VIXL_CPU_FEATURES_H
diff --git a/js/src/jit/arm64/vixl/Cpu-vixl.cpp b/js/src/jit/arm64/vixl/Cpu-vixl.cpp
new file mode 100644
index 0000000000..12244e73e4
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Cpu-vixl.cpp
@@ -0,0 +1,256 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+
+#include "jstypes.h"
+
+#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
+#include <sys/auxv.h>
+#define VIXL_USE_LINUX_HWCAP 1
+#endif
+
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+
+namespace vixl {
+
+
+const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
+const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
+const IDRegister::Field AA64PFR0::kSVE(32);
+const IDRegister::Field AA64PFR0::kDIT(48);
+
+const IDRegister::Field AA64PFR1::kBT(0);
+
+const IDRegister::Field AA64ISAR0::kAES(4);
+const IDRegister::Field AA64ISAR0::kSHA1(8);
+const IDRegister::Field AA64ISAR0::kSHA2(12);
+const IDRegister::Field AA64ISAR0::kCRC32(16);
+const IDRegister::Field AA64ISAR0::kAtomic(20);
+const IDRegister::Field AA64ISAR0::kRDM(28);
+const IDRegister::Field AA64ISAR0::kSHA3(32);
+const IDRegister::Field AA64ISAR0::kSM3(36);
+const IDRegister::Field AA64ISAR0::kSM4(40);
+const IDRegister::Field AA64ISAR0::kDP(44);
+const IDRegister::Field AA64ISAR0::kFHM(48);
+const IDRegister::Field AA64ISAR0::kTS(52);
+
+const IDRegister::Field AA64ISAR1::kDPB(0);
+const IDRegister::Field AA64ISAR1::kAPA(4);
+const IDRegister::Field AA64ISAR1::kAPI(8);
+const IDRegister::Field AA64ISAR1::kJSCVT(12);
+const IDRegister::Field AA64ISAR1::kFCMA(16);
+const IDRegister::Field AA64ISAR1::kLRCPC(20);
+const IDRegister::Field AA64ISAR1::kGPA(24);
+const IDRegister::Field AA64ISAR1::kGPI(28);
+const IDRegister::Field AA64ISAR1::kFRINTTS(32);
+const IDRegister::Field AA64ISAR1::kSB(36);
+const IDRegister::Field AA64ISAR1::kSPECRES(40);
+
+const IDRegister::Field AA64MMFR1::kLO(16);
+
+CPUFeatures AA64PFR0::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
+ if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
+ if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
+ if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
+ if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
+ if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
+ return f;
+}
+
+CPUFeatures AA64PFR1::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
+ return f;
+}
+
+CPUFeatures AA64ISAR0::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
+ if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
+ if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
+ if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
+ if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
+ if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
+ if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
+ if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
+ if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
+ if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
+ if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
+ if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
+ if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
+ if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
+ if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
+ return f;
+}
+
+CPUFeatures AA64ISAR1::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
+ if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
+ if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
+ if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
+ if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
+ if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
+
+ if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth);
+ if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA);
+ if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
+ if (Get(kGPA) >= 1) {
+ f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
+ }
+ return f;
+}
+
+CPUFeatures AA64MMFR1::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
+ return f;
+}
+
+int IDRegister::Get(IDRegister::Field field) const {
+ int msb = field.GetMsb();
+ int lsb = field.GetLsb();
+ VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
+ (sizeof(int) * kBitsPerByte));
+ switch (field.GetType()) {
+ case Field::kSigned:
+ return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
+ case Field::kUnsigned:
+ return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
+ }
+ VIXL_UNREACHABLE();
+ return 0;
+}
+
+CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
+ CPUFeatures f;
+#define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures());
+ VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
+#undef VIXL_COMBINE_ID_REG
+ return f;
+}
+
+CPUFeatures CPU::InferCPUFeaturesFromOS(
+ CPUFeatures::QueryIDRegistersOption option) {
+ CPUFeatures features;
+
+#if VIXL_USE_LINUX_HWCAP
+ // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
+ // than explicit bits, but explicit bits allow us to identify features that
+ // the toolchain doesn't know about.
+ static const CPUFeatures::Feature kFeatureBits[] = {
+ // Bits 0-7
+ CPUFeatures::kFP,
+ CPUFeatures::kNEON,
+ CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track.
+ CPUFeatures::kAES,
+ CPUFeatures::kPmull1Q,
+ CPUFeatures::kSHA1,
+ CPUFeatures::kSHA2,
+ CPUFeatures::kCRC32,
+ // Bits 8-15
+ CPUFeatures::kAtomics,
+ CPUFeatures::kFPHalf,
+ CPUFeatures::kNEONHalf,
+ CPUFeatures::kIDRegisterEmulation,
+ CPUFeatures::kRDM,
+ CPUFeatures::kJSCVT,
+ CPUFeatures::kFcma,
+ CPUFeatures::kRCpc,
+ // Bits 16-23
+ CPUFeatures::kDCPoP,
+ CPUFeatures::kSHA3,
+ CPUFeatures::kSM3,
+ CPUFeatures::kSM4,
+ CPUFeatures::kDotProduct,
+ CPUFeatures::kSHA512,
+ CPUFeatures::kSVE,
+ CPUFeatures::kFHM,
+ // Bits 24-27
+ CPUFeatures::kDIT,
+ CPUFeatures::kUSCAT,
+ CPUFeatures::kRCpcImm,
+ CPUFeatures::kFlagM
+ // Bits 28-31 are unassigned.
+ };
+ static const size_t kFeatureBitCount =
+ sizeof(kFeatureBits) / sizeof(kFeatureBits[0]);
+
+ // Mozilla change: Set the default for the simulator.
+#ifdef JS_SIMULATOR_ARM64
+ unsigned long auxv = ~(0UL); // Enable all features for the Simulator.
+#else
+ unsigned long auxv = getauxval(AT_HWCAP); // NOLINT(runtime/int)
+#endif
+
+ VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte));
+ for (size_t i = 0; i < kFeatureBitCount; i++) {
+ if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]);
+ }
+#elif defined(XP_MACOSX)
+ // Apple processors have kJSCVT, kDotProduct, and kAtomics features.
+ features.Combine(CPUFeatures::kJSCVT, CPUFeatures::kDotProduct,
+ CPUFeatures::kAtomics);
+#endif // VIXL_USE_LINUX_HWCAP
+
+ if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
+ (features.Has(CPUFeatures::kIDRegisterEmulation))) {
+ features.Combine(InferCPUFeaturesFromIDRegisters());
+ }
+ return features;
+}
+
+
+#ifdef __aarch64__
+#define VIXL_READ_ID_REG(NAME) \
+ NAME CPU::Read##NAME() { \
+ uint64_t value = 0; \
+ __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \
+ return NAME(value); \
+ }
+#else // __aarch64__
+#define VIXL_READ_ID_REG(NAME) \
+ NAME CPU::Read##NAME() { \
+ /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \
+ VIXL_ABORT(); \
+ }
+#endif // __aarch64__
+
+VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
+
+#undef VIXL_READ_ID_REG
+
+
+// Initialise to smallest possible cache size.
+unsigned CPU::dcache_line_size_ = 1;
+unsigned CPU::icache_line_size_ = 1;
+
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Cpu-vixl.h b/js/src/jit/arm64/vixl/Cpu-vixl.h
new file mode 100644
index 0000000000..4db51aad6b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Cpu-vixl.h
@@ -0,0 +1,241 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CPU_AARCH64_H
+#define VIXL_CPU_AARCH64_H
+
+#include "jit/arm64/vixl/Cpu-Features-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+#include "jit/arm64/vixl/Instructions-vixl.h"
+
+#ifndef VIXL_INCLUDE_TARGET_AARCH64
+// The supporting .cc file is only compiled when the A64 target is selected.
+// Throw an explicit error now to avoid a harder-to-debug linker error later.
+//
+// These helpers _could_ work on any AArch64 host, even when generating AArch32
+// code, but we don't support this because the available features may differ
+// between AArch32 and AArch64 on the same platform, so basing AArch32 code
+// generation on aarch64::CPU features is probably broken.
+#error cpu-aarch64.h requires VIXL_INCLUDE_TARGET_AARCH64 (scons target=a64).
+#endif
+
+namespace vixl {
+
+// A CPU ID register, for use with CPUFeatures::kIDRegisterEmulation. Fields
+// specific to each register are described in relevant subclasses.
+class IDRegister {
+ protected:
+ explicit IDRegister(uint64_t value = 0) : value_(value) {}
+
+ class Field {
+ public:
+ enum Type { kUnsigned, kSigned };
+
+ explicit Field(int lsb, Type type = kUnsigned) : lsb_(lsb), type_(type) {}
+
+ static const int kMaxWidthInBits = 4;
+
+ int GetWidthInBits() const {
+ // All current ID fields have four bits.
+ return kMaxWidthInBits;
+ }
+ int GetLsb() const { return lsb_; }
+ int GetMsb() const { return lsb_ + GetWidthInBits() - 1; }
+ Type GetType() const { return type_; }
+
+ private:
+ int lsb_;
+ Type type_;
+ };
+
+ public:
+ // Extract the specified field, performing sign-extension for signed fields.
+ // This allows us to implement the 'value >= number' detection mechanism
+ // recommended by the Arm ARM, for both signed and unsigned fields.
+ int Get(Field field) const;
+
+ private:
+ uint64_t value_;
+};
+
+class AA64PFR0 : public IDRegister {
+ public:
+ explicit AA64PFR0(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kFP;
+ static const Field kAdvSIMD;
+ static const Field kSVE;
+ static const Field kDIT;
+};
+
+class AA64PFR1 : public IDRegister {
+ public:
+ explicit AA64PFR1(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kBT;
+};
+
+class AA64ISAR0 : public IDRegister {
+ public:
+ explicit AA64ISAR0(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kAES;
+ static const Field kSHA1;
+ static const Field kSHA2;
+ static const Field kCRC32;
+ static const Field kAtomic;
+ static const Field kRDM;
+ static const Field kSHA3;
+ static const Field kSM3;
+ static const Field kSM4;
+ static const Field kDP;
+ static const Field kFHM;
+ static const Field kTS;
+};
+
+class AA64ISAR1 : public IDRegister {
+ public:
+ explicit AA64ISAR1(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kDPB;
+ static const Field kAPA;
+ static const Field kAPI;
+ static const Field kJSCVT;
+ static const Field kFCMA;
+ static const Field kLRCPC;
+ static const Field kGPA;
+ static const Field kGPI;
+ static const Field kFRINTTS;
+ static const Field kSB;
+ static const Field kSPECRES;
+};
+
+class AA64MMFR1 : public IDRegister {
+ public:
+ explicit AA64MMFR1(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kLO;
+};
+
+class CPU {
+ public:
+ // Initialise CPU support.
+ static void SetUp();
+
+ // Ensures the data at a given address and with a given size is the same for
+ // the I and D caches. I and D caches are not automatically coherent on ARM
+ // so this operation is required before any dynamically generated code can
+ // safely run.
+ static void EnsureIAndDCacheCoherency(void* address, size_t length);
+
+ // Flush the local instruction pipeline, forcing a reload of any instructions
+ // beyond this barrier from the icache.
+ static void FlushExecutionContext();
+
+ // Read and interpret the ID registers. This requires
+ // CPUFeatures::kIDRegisterEmulation, and therefore cannot be called on
+ // non-AArch64 platforms.
+ static CPUFeatures InferCPUFeaturesFromIDRegisters();
+
+ // Read and interpret CPUFeatures reported by the OS. Failed queries (or
+ // unsupported platforms) return an empty list. Note that this is
+ // indistinguishable from a successful query on a platform that advertises no
+ // features.
+ //
+ // Non-AArch64 hosts are considered to be unsupported platforms, and this
+ // function returns an empty list.
+ static CPUFeatures InferCPUFeaturesFromOS(
+ CPUFeatures::QueryIDRegistersOption option =
+ CPUFeatures::kQueryIDRegistersIfAvailable);
+
+ // Handle tagged pointers.
+ template <typename T>
+ static T SetPointerTag(T pointer, uint64_t tag) {
+ VIXL_ASSERT(IsUintN(kAddressTagWidth, tag));
+
+ // Use C-style casts to get static_cast behaviour for integral types (T),
+ // and reinterpret_cast behaviour for other types.
+
+ uint64_t raw = (uint64_t)pointer;
+ VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+ raw = (raw & ~kAddressTagMask) | (tag << kAddressTagOffset);
+ return (T)raw;
+ }
+
+ template <typename T>
+ static uint64_t GetPointerTag(T pointer) {
+ // Use C-style casts to get static_cast behaviour for integral types (T),
+ // and reinterpret_cast behaviour for other types.
+
+ uint64_t raw = (uint64_t)pointer;
+ VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+ return (raw & kAddressTagMask) >> kAddressTagOffset;
+ }
+
+ private:
+#define VIXL_AARCH64_ID_REG_LIST(V) \
+ V(AA64PFR0) \
+ V(AA64PFR1) \
+ V(AA64ISAR0) \
+ V(AA64ISAR1) \
+ V(AA64MMFR1)
+
+#define VIXL_READ_ID_REG(NAME) static NAME Read##NAME();
+ // On native AArch64 platforms, read the named CPU ID registers. These require
+ // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64
+ // platforms.
+ VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
+#undef VIXL_READ_ID_REG
+
+ // Return the content of the cache type register.
+ static uint32_t GetCacheType();
+
+ // I and D cache line size in bytes.
+ static unsigned icache_line_size_;
+ static unsigned dcache_line_size_;
+};
+
+} // namespace vixl
+
+#endif // VIXL_CPU_AARCH64_H
diff --git a/js/src/jit/arm64/vixl/Debugger-vixl.cpp b/js/src/jit/arm64/vixl/Debugger-vixl.cpp
new file mode 100644
index 0000000000..fa3e15601e
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Debugger-vixl.cpp
@@ -0,0 +1,1535 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+// OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jstypes.h"
+
+#ifdef JS_SIMULATOR_ARM64
+
+#include "jit/arm64/vixl/Debugger-vixl.h"
+
+#include "mozilla/Vector.h"
+
+#include "js/AllocPolicy.h"
+
+namespace vixl {
+
+// List of commands supported by the debugger.
+#define DEBUG_COMMAND_LIST(C) \
+C(HelpCommand) \
+C(ContinueCommand) \
+C(StepCommand) \
+C(DisasmCommand) \
+C(PrintCommand) \
+C(ExamineCommand)
+
+// Debugger command lines are broken up in token of different type to make
+// processing easier later on.
+class Token {
+ public:
+ virtual ~Token() {}
+
+ // Token type.
+ virtual bool IsRegister() const { return false; }
+ virtual bool IsFPRegister() const { return false; }
+ virtual bool IsIdentifier() const { return false; }
+ virtual bool IsAddress() const { return false; }
+ virtual bool IsInteger() const { return false; }
+ virtual bool IsFormat() const { return false; }
+ virtual bool IsUnknown() const { return false; }
+ // Token properties.
+ virtual bool CanAddressMemory() const { return false; }
+ virtual uint8_t* ToAddress(Debugger* debugger) const = 0;
+ virtual void Print(FILE* out = stdout) const = 0;
+
+ static Token* Tokenize(const char* arg);
+};
+
+typedef mozilla::Vector<Token*, 0, js::SystemAllocPolicy> TokenVector;
+
+// Tokens often hold one value.
+template<typename T> class ValueToken : public Token {
+ public:
+ explicit ValueToken(T value) : value_(value) {}
+ ValueToken() {}
+
+ T value() const { return value_; }
+
+ virtual uint8_t* ToAddress(Debugger* debugger) const override {
+ USE(debugger);
+ VIXL_ABORT();
+ }
+
+ protected:
+ T value_;
+};
+
+// Integer registers (X or W) and their aliases.
+// Format: wn or xn with 0 <= n < 32 or a name in the aliases list.
+class RegisterToken : public ValueToken<const Register> {
+ public:
+ explicit RegisterToken(const Register reg)
+ : ValueToken<const Register>(reg) {}
+
+ virtual bool IsRegister() const override { return true; }
+ virtual bool CanAddressMemory() const override { return value().Is64Bits(); }
+ virtual uint8_t* ToAddress(Debugger* debugger) const override;
+ virtual void Print(FILE* out = stdout) const override;
+ const char* Name() const;
+
+ static Token* Tokenize(const char* arg);
+ static RegisterToken* Cast(Token* tok) {
+ VIXL_ASSERT(tok->IsRegister());
+ return reinterpret_cast<RegisterToken*>(tok);
+ }
+
+ private:
+ static const int kMaxAliasNumber = 4;
+ static const char* kXAliases[kNumberOfRegisters][kMaxAliasNumber];
+ static const char* kWAliases[kNumberOfRegisters][kMaxAliasNumber];
+};
+
+// Floating point registers (D or S).
+// Format: sn or dn with 0 <= n < 32.
+class FPRegisterToken : public ValueToken<const FPRegister> {
+ public:
+ explicit FPRegisterToken(const FPRegister fpreg)
+ : ValueToken<const FPRegister>(fpreg) {}
+
+ virtual bool IsFPRegister() const override { return true; }
+ virtual void Print(FILE* out = stdout) const override;
+
+ static Token* Tokenize(const char* arg);
+ static FPRegisterToken* Cast(Token* tok) {
+ VIXL_ASSERT(tok->IsFPRegister());
+ return reinterpret_cast<FPRegisterToken*>(tok);
+ }
+};
+
+
+// Non-register identifiers.
+// Format: Alphanumeric string starting with a letter.
+class IdentifierToken : public ValueToken<char*> {
+ public:
+ explicit IdentifierToken(const char* name) {
+ size_t size = strlen(name) + 1;
+ value_ = js_pod_malloc<char>(size);
+ strncpy(value_, name, size);
+ }
+ virtual ~IdentifierToken() { js_free(value_); }
+
+ virtual bool IsIdentifier() const override { return true; }
+ virtual bool CanAddressMemory() const override { return strcmp(value(), "pc") == 0; }
+ virtual uint8_t* ToAddress(Debugger* debugger) const override;
+ virtual void Print(FILE* out = stdout) const override;
+
+ static Token* Tokenize(const char* arg);
+ static IdentifierToken* Cast(Token* tok) {
+ VIXL_ASSERT(tok->IsIdentifier());
+ return reinterpret_cast<IdentifierToken*>(tok);
+ }
+};
+
+// 64-bit address literal.
+// Format: 0x... with up to 16 hexadecimal digits.
+class AddressToken : public ValueToken<uint8_t*> {
+ public:
+ explicit AddressToken(uint8_t* address) : ValueToken<uint8_t*>(address) {}
+
+ virtual bool IsAddress() const override { return true; }
+ virtual bool CanAddressMemory() const override { return true; }
+ virtual uint8_t* ToAddress(Debugger* debugger) const override;
+ virtual void Print(FILE* out = stdout) const override;
+
+ static Token* Tokenize(const char* arg);
+ static AddressToken* Cast(Token* tok) {
+ VIXL_ASSERT(tok->IsAddress());
+ return reinterpret_cast<AddressToken*>(tok);
+ }
+};
+
+
+// 64-bit decimal integer literal.
+// Format: n.
+class IntegerToken : public ValueToken<int64_t> {
+ public:
+ explicit IntegerToken(int64_t value) : ValueToken<int64_t>(value) {}
+
+ virtual bool IsInteger() const override { return true; }
+ virtual void Print(FILE* out = stdout) const override;
+
+ static Token* Tokenize(const char* arg);
+ static IntegerToken* Cast(Token* tok) {
+ VIXL_ASSERT(tok->IsInteger());
+ return reinterpret_cast<IntegerToken*>(tok);
+ }
+};
+
+// Literal describing how to print a chunk of data (up to 64 bits).
+// Format: .ln
+// where l (letter) is one of
+// * x: hexadecimal
+// * s: signed integer
+// * u: unsigned integer
+// * f: floating point
+// * i: instruction
+// and n (size) is one of 8, 16, 32 and 64. n should be omitted for
+// instructions.
+class FormatToken : public Token {
+ public:
+ FormatToken() {}
+
+ virtual bool IsFormat() const override { return true; }
+ virtual int SizeOf() const = 0;
+ virtual char type_code() const = 0;
+ virtual void PrintData(void* data, FILE* out = stdout) const = 0;
+ virtual void Print(FILE* out = stdout) const override = 0;
+
+ virtual uint8_t* ToAddress(Debugger* debugger) const override {
+ USE(debugger);
+ VIXL_ABORT();
+ }
+
+ static Token* Tokenize(const char* arg);
+ static FormatToken* Cast(Token* tok) {
+ VIXL_ASSERT(tok->IsFormat());
+ return reinterpret_cast<FormatToken*>(tok);
+ }
+};
+
+
+template<typename T> class Format : public FormatToken {
+ public:
+ Format(const char* fmt, char type_code) : fmt_(fmt), type_code_(type_code) {}
+
+ virtual int SizeOf() const override { return sizeof(T); }
+ virtual char type_code() const override { return type_code_; }
+ virtual void PrintData(void* data, FILE* out = stdout) const override {
+ T value;
+ memcpy(&value, data, sizeof(value));
+ fprintf(out, fmt_, value);
+ }
+ virtual void Print(FILE* out = stdout) const override;
+
+ private:
+ const char* fmt_;
+ char type_code_;
+};
+
+// Tokens which don't fit any of the above.
+class UnknownToken : public Token {
+ public:
+ explicit UnknownToken(const char* arg) {
+ size_t size = strlen(arg) + 1;
+ unknown_ = js_pod_malloc<char>(size);
+ strncpy(unknown_, arg, size);
+ }
+ virtual ~UnknownToken() { js_free(unknown_); }
+ virtual uint8_t* ToAddress(Debugger* debugger) const override {
+ USE(debugger);
+ VIXL_ABORT();
+ }
+
+ virtual bool IsUnknown() const override { return true; }
+ virtual void Print(FILE* out = stdout) const override;
+
+ private:
+ char* unknown_;
+};
+
+
+// All debugger commands must subclass DebugCommand and implement Run, Print
+// and Build. Commands must also define kHelp and kAliases.
+class DebugCommand {
+ public:
+ explicit DebugCommand(Token* name) : name_(IdentifierToken::Cast(name)) {}
+ DebugCommand() : name_(NULL) {}
+ virtual ~DebugCommand() { js_delete(name_); }
+
+ const char* name() { return name_->value(); }
+ // Run the command on the given debugger. The command returns true if
+ // execution should move to the next instruction.
+ virtual bool Run(Debugger * debugger) = 0;
+ virtual void Print(FILE* out = stdout);
+
+ static bool Match(const char* name, const char** aliases);
+ static DebugCommand* Parse(char* line);
+ static void PrintHelp(const char** aliases,
+ const char* args,
+ const char* help);
+
+ private:
+ IdentifierToken* name_;
+};
+
+// For all commands below see their respective kHelp and kAliases in
+// debugger-a64.cc
+class HelpCommand : public DebugCommand {
+ public:
+ explicit HelpCommand(Token* name) : DebugCommand(name) {}
+
+ virtual bool Run(Debugger* debugger) override;
+
+ static DebugCommand* Build(TokenVector&& args);
+
+ static const char* kHelp;
+ static const char* kAliases[];
+ static const char* kArguments;
+};
+
+
+class ContinueCommand : public DebugCommand {
+ public:
+ explicit ContinueCommand(Token* name) : DebugCommand(name) {}
+
+ virtual bool Run(Debugger* debugger) override;
+
+ static DebugCommand* Build(TokenVector&& args);
+
+ static const char* kHelp;
+ static const char* kAliases[];
+ static const char* kArguments;
+};
+
+
+class StepCommand : public DebugCommand {
+ public:
+ StepCommand(Token* name, IntegerToken* count)
+ : DebugCommand(name), count_(count) {}
+ virtual ~StepCommand() { js_delete(count_); }
+
+ int64_t count() { return count_->value(); }
+ virtual bool Run(Debugger* debugger) override;
+ virtual void Print(FILE* out = stdout) override;
+
+ static DebugCommand* Build(TokenVector&& args);
+
+ static const char* kHelp;
+ static const char* kAliases[];
+ static const char* kArguments;
+
+ private:
+ IntegerToken* count_;
+};
+
+class DisasmCommand : public DebugCommand {
+ public:
+ static DebugCommand* Build(TokenVector&& args);
+
+ static const char* kHelp;
+ static const char* kAliases[];
+ static const char* kArguments;
+};
+
+
+class PrintCommand : public DebugCommand {
+ public:
+ PrintCommand(Token* name, Token* target, FormatToken* format)
+ : DebugCommand(name), target_(target), format_(format) {}
+ virtual ~PrintCommand() {
+ js_delete(target_);
+ js_delete(format_);
+ }
+
+ Token* target() { return target_; }
+ FormatToken* format() { return format_; }
+ virtual bool Run(Debugger* debugger) override;
+ virtual void Print(FILE* out = stdout) override;
+
+ static DebugCommand* Build(TokenVector&& args);
+
+ static const char* kHelp;
+ static const char* kAliases[];
+ static const char* kArguments;
+
+ private:
+ Token* target_;
+ FormatToken* format_;
+};
+
+class ExamineCommand : public DebugCommand {
+ public:
+ ExamineCommand(Token* name,
+ Token* target,
+ FormatToken* format,
+ IntegerToken* count)
+ : DebugCommand(name), target_(target), format_(format), count_(count) {}
+ virtual ~ExamineCommand() {
+ js_delete(target_);
+ js_delete(format_);
+ js_delete(count_);
+ }
+
+ Token* target() { return target_; }
+ FormatToken* format() { return format_; }
+ IntegerToken* count() { return count_; }
+ virtual bool Run(Debugger* debugger) override;
+ virtual void Print(FILE* out = stdout) override;
+
+ static DebugCommand* Build(TokenVector&& args);
+
+ static const char* kHelp;
+ static const char* kAliases[];
+ static const char* kArguments;
+
+ private:
+ Token* target_;
+ FormatToken* format_;
+ IntegerToken* count_;
+};
+
+// Commands which name does not match any of the known commnand.
+class UnknownCommand : public DebugCommand {
+ public:
+ explicit UnknownCommand(TokenVector&& args) : args_(std::move(args)) {}
+ virtual ~UnknownCommand();
+
+ virtual bool Run(Debugger* debugger) override;
+
+ private:
+ TokenVector args_;
+};
+
+// Commands which name match a known command but the syntax is invalid.
+class InvalidCommand : public DebugCommand {
+ public:
+ InvalidCommand(TokenVector&& args, int index, const char* cause)
+ : args_(std::move(args)), index_(index), cause_(cause) {}
+ virtual ~InvalidCommand();
+
+ virtual bool Run(Debugger* debugger) override;
+
+ private:
+ TokenVector args_;
+ int index_;
+ const char* cause_;
+};
+
+const char* HelpCommand::kAliases[] = { "help", NULL };
+const char* HelpCommand::kArguments = NULL;
+const char* HelpCommand::kHelp = " Print this help.";
+
+const char* ContinueCommand::kAliases[] = { "continue", "c", NULL };
+const char* ContinueCommand::kArguments = NULL;
+const char* ContinueCommand::kHelp = " Resume execution.";
+
+const char* StepCommand::kAliases[] = { "stepi", "si", NULL };
+const char* StepCommand::kArguments = "[n = 1]";
+const char* StepCommand::kHelp = " Execute n next instruction(s).";
+
+const char* DisasmCommand::kAliases[] = { "disasm", "di", NULL };
+const char* DisasmCommand::kArguments = "[n = 10]";
+const char* DisasmCommand::kHelp =
+ " Disassemble n instruction(s) at pc.\n"
+ " This command is equivalent to x pc.i [n = 10]."
+;
+
+const char* PrintCommand::kAliases[] = { "print", "p", NULL };
+const char* PrintCommand::kArguments = "<entity>[.format]";
+const char* PrintCommand::kHelp =
+ " Print the given entity according to the given format.\n"
+ " The format parameter only affects individual registers; it is ignored\n"
+ " for other entities.\n"
+ " <entity> can be one of the following:\n"
+ " * A register name (such as x0, s1, ...).\n"
+ " * 'regs', to print all integer (W and X) registers.\n"
+ " * 'fpregs' to print all floating-point (S and D) registers.\n"
+ " * 'sysregs' to print all system registers (including NZCV).\n"
+ " * 'pc' to print the current program counter.\n"
+;
+
+const char* ExamineCommand::kAliases[] = { "m", "mem", "x", NULL };
+const char* ExamineCommand::kArguments = "<addr>[.format] [n = 10]";
+const char* ExamineCommand::kHelp =
+ " Examine memory. Print n items of memory at address <addr> according to\n"
+ " the given [.format].\n"
+ " Addr can be an immediate address, a register name or pc.\n"
+ " Format is made of a type letter: 'x' (hexadecimal), 's' (signed), 'u'\n"
+ " (unsigned), 'f' (floating point), i (instruction) and a size in bits\n"
+ " when appropriate (8, 16, 32, 64)\n"
+ " E.g 'x sp.x64' will print 10 64-bit words from the stack in\n"
+ " hexadecimal format."
+;
+
+const char* RegisterToken::kXAliases[kNumberOfRegisters][kMaxAliasNumber] = {
+ { "x0", NULL },
+ { "x1", NULL },
+ { "x2", NULL },
+ { "x3", NULL },
+ { "x4", NULL },
+ { "x5", NULL },
+ { "x6", NULL },
+ { "x7", NULL },
+ { "x8", NULL },
+ { "x9", NULL },
+ { "x10", NULL },
+ { "x11", NULL },
+ { "x12", NULL },
+ { "x13", NULL },
+ { "x14", NULL },
+ { "x15", NULL },
+ { "ip0", "x16", NULL },
+ { "ip1", "x17", NULL },
+ { "x18", "pr", NULL },
+ { "x19", NULL },
+ { "x20", NULL },
+ { "x21", NULL },
+ { "x22", NULL },
+ { "x23", NULL },
+ { "x24", NULL },
+ { "x25", NULL },
+ { "x26", NULL },
+ { "x27", NULL },
+ { "x28", NULL },
+ { "fp", "x29", NULL },
+ { "lr", "x30", NULL },
+ { "sp", NULL}
+};
+
+const char* RegisterToken::kWAliases[kNumberOfRegisters][kMaxAliasNumber] = {
+ { "w0", NULL },
+ { "w1", NULL },
+ { "w2", NULL },
+ { "w3", NULL },
+ { "w4", NULL },
+ { "w5", NULL },
+ { "w6", NULL },
+ { "w7", NULL },
+ { "w8", NULL },
+ { "w9", NULL },
+ { "w10", NULL },
+ { "w11", NULL },
+ { "w12", NULL },
+ { "w13", NULL },
+ { "w14", NULL },
+ { "w15", NULL },
+ { "w16", NULL },
+ { "w17", NULL },
+ { "w18", NULL },
+ { "w19", NULL },
+ { "w20", NULL },
+ { "w21", NULL },
+ { "w22", NULL },
+ { "w23", NULL },
+ { "w24", NULL },
+ { "w25", NULL },
+ { "w26", NULL },
+ { "w27", NULL },
+ { "w28", NULL },
+ { "w29", NULL },
+ { "w30", NULL },
+ { "wsp", NULL }
+};
+
+
+Debugger::Debugger(Decoder* decoder, FILE* stream)
+ : Simulator(decoder, stream),
+ debug_parameters_(DBG_INACTIVE),
+ pending_request_(false),
+ steps_(0),
+ last_command_(NULL) {
+ disasm_ = js_new<PrintDisassembler>(stdout);
+ printer_ = js_new<Decoder>();
+ printer_->AppendVisitor(disasm_);
+}
+
+
+Debugger::~Debugger() {
+ js_delete(disasm_);
+ js_delete(printer_);
+}
+
+
+void Debugger::Run() {
+ pc_modified_ = false;
+ while (pc_ != kEndOfSimAddress) {
+ if (pending_request()) RunDebuggerShell();
+ ExecuteInstruction();
+ LogAllWrittenRegisters();
+ }
+}
+
+
+void Debugger::PrintInstructions(const void* address, int64_t count) {
+ if (count == 0) {
+ return;
+ }
+
+ const Instruction* from = Instruction::CastConst(address);
+ if (count < 0) {
+ count = -count;
+ from -= (count - 1) * kInstructionSize;
+ }
+ const Instruction* to = from + count * kInstructionSize;
+
+ for (const Instruction* current = from;
+ current < to;
+ current = current->NextInstruction()) {
+ printer_->Decode(current);
+ }
+}
+
+
+void Debugger::PrintMemory(const uint8_t* address,
+ const FormatToken* format,
+ int64_t count) {
+ if (count == 0) {
+ return;
+ }
+
+ const uint8_t* from = address;
+ int size = format->SizeOf();
+ if (count < 0) {
+ count = -count;
+ from -= (count - 1) * size;
+ }
+ const uint8_t* to = from + count * size;
+
+ for (const uint8_t* current = from; current < to; current += size) {
+ if (((current - from) % 8) == 0) {
+ printf("\n%p: ", current);
+ }
+
+ uint64_t data = Memory::Read<uint64_t>(current);
+ format->PrintData(&data);
+ printf(" ");
+ }
+ printf("\n\n");
+}
+
+
+void Debugger::PrintRegister(const Register& target_reg,
+ const char* name,
+ const FormatToken* format) {
+ const uint64_t reg_size = target_reg.size();
+ const uint64_t format_size = format->SizeOf() * 8;
+ const uint64_t count = reg_size / format_size;
+ const uint64_t mask = 0xffffffffffffffff >> (64 - format_size);
+ const uint64_t reg_value = reg<uint64_t>(target_reg.code(),
+ Reg31IsStackPointer);
+ VIXL_ASSERT(count > 0);
+
+ printf("%s = ", name);
+ for (uint64_t i = 1; i <= count; i++) {
+ uint64_t data = reg_value >> (reg_size - (i * format_size));
+ data &= mask;
+ format->PrintData(&data);
+ printf(" ");
+ }
+ printf("\n");
+}
+
+
+// TODO(all): fix this for vector registers.
+void Debugger::PrintFPRegister(const FPRegister& target_fpreg,
+ const FormatToken* format) {
+ const unsigned fpreg_size = target_fpreg.size();
+ const uint64_t format_size = format->SizeOf() * 8;
+ const uint64_t count = fpreg_size / format_size;
+ const uint64_t mask = 0xffffffffffffffff >> (64 - format_size);
+ const uint64_t fpreg_value = vreg<uint64_t>(fpreg_size, target_fpreg.code());
+ VIXL_ASSERT(count > 0);
+
+ if (target_fpreg.Is32Bits()) {
+ printf("s%u = ", target_fpreg.code());
+ } else {
+ printf("d%u = ", target_fpreg.code());
+ }
+ for (uint64_t i = 1; i <= count; i++) {
+ uint64_t data = fpreg_value >> (fpreg_size - (i * format_size));
+ data &= mask;
+ format->PrintData(&data);
+ printf(" ");
+ }
+ printf("\n");
+}
+
+
+void Debugger::VisitException(const Instruction* instr) {
+ switch (instr->Mask(ExceptionMask)) {
+ case BRK:
+ DoBreakpoint(instr);
+ return;
+ case HLT:
+ VIXL_FALLTHROUGH();
+ default: Simulator::VisitException(instr);
+ }
+}
+
+
+// Read a command. A command will be at most kMaxDebugShellLine char long and
+// ends with '\n\0'.
+// TODO: Should this be a utility function?
+char* Debugger::ReadCommandLine(const char* prompt, char* buffer, int length) {
+ int fgets_calls = 0;
+ char* end = NULL;
+
+ printf("%s", prompt);
+ fflush(stdout);
+
+ do {
+ if (fgets(buffer, length, stdin) == NULL) {
+ printf(" ** Error while reading command. **\n");
+ return NULL;
+ }
+
+ fgets_calls++;
+ end = strchr(buffer, '\n');
+ } while (end == NULL);
+
+ if (fgets_calls != 1) {
+ printf(" ** Command too long. **\n");
+ return NULL;
+ }
+
+ // Remove the newline from the end of the command.
+ VIXL_ASSERT(end[1] == '\0');
+ VIXL_ASSERT((end - buffer) < (length - 1));
+ end[0] = '\0';
+
+ return buffer;
+}
+
+
+void Debugger::RunDebuggerShell() {
+ if (IsDebuggerRunning()) {
+ if (steps_ > 0) {
+ // Finish stepping first.
+ --steps_;
+ return;
+ }
+
+ printf("Next: ");
+ PrintInstructions(pc());
+ bool done = false;
+ while (!done) {
+ char buffer[kMaxDebugShellLine];
+ char* line = ReadCommandLine("vixl> ", buffer, kMaxDebugShellLine);
+
+ if (line == NULL) continue; // An error occurred.
+
+ DebugCommand* command = DebugCommand::Parse(line);
+ if (command != NULL) {
+ last_command_ = command;
+ }
+
+ if (last_command_ != NULL) {
+ done = last_command_->Run(this);
+ } else {
+ printf("No previous command to run!\n");
+ }
+ }
+
+ if ((debug_parameters_ & DBG_BREAK) != 0) {
+ // The break request has now been handled, move to next instruction.
+ debug_parameters_ &= ~DBG_BREAK;
+ increment_pc();
+ }
+ }
+}
+
+
+void Debugger::DoBreakpoint(const Instruction* instr) {
+ VIXL_ASSERT(instr->Mask(ExceptionMask) == BRK);
+
+ printf("Hit breakpoint at pc=%p.\n", reinterpret_cast<const void*>(instr));
+ set_debug_parameters(debug_parameters() | DBG_BREAK | DBG_ACTIVE);
+ // Make the shell point to the brk instruction.
+ set_pc(instr);
+}
+
+
+static bool StringToUInt64(uint64_t* value, const char* line, int base = 10) {
+ char* endptr = NULL;
+ errno = 0; // Reset errors.
+ uint64_t parsed = strtoul(line, &endptr, base);
+
+ if (errno == ERANGE) {
+ // Overflow.
+ return false;
+ }
+
+ if (endptr == line) {
+ // No digits were parsed.
+ return false;
+ }
+
+ if (*endptr != '\0') {
+ // Non-digit characters present at the end.
+ return false;
+ }
+
+ *value = parsed;
+ return true;
+}
+
+
+static bool StringToInt64(int64_t* value, const char* line, int base = 10) {
+ char* endptr = NULL;
+ errno = 0; // Reset errors.
+ int64_t parsed = strtol(line, &endptr, base);
+
+ if (errno == ERANGE) {
+ // Overflow, undeflow.
+ return false;
+ }
+
+ if (endptr == line) {
+ // No digits were parsed.
+ return false;
+ }
+
+ if (*endptr != '\0') {
+ // Non-digit characters present at the end.
+ return false;
+ }
+
+ *value = parsed;
+ return true;
+}
+
+
+Token* Token::Tokenize(const char* arg) {
+ if ((arg == NULL) || (*arg == '\0')) {
+ return NULL;
+ }
+
+ // The order is important. For example Identifier::Tokenize would consider
+ // any register to be a valid identifier.
+
+ Token* token = RegisterToken::Tokenize(arg);
+ if (token != NULL) {
+ return token;
+ }
+
+ token = FPRegisterToken::Tokenize(arg);
+ if (token != NULL) {
+ return token;
+ }
+
+ token = IdentifierToken::Tokenize(arg);
+ if (token != NULL) {
+ return token;
+ }
+
+ token = AddressToken::Tokenize(arg);
+ if (token != NULL) {
+ return token;
+ }
+
+ token = IntegerToken::Tokenize(arg);
+ if (token != NULL) {
+ return token;
+ }
+
+ return js_new<UnknownToken>(arg);
+}
+
+
+uint8_t* RegisterToken::ToAddress(Debugger* debugger) const {
+ VIXL_ASSERT(CanAddressMemory());
+ uint64_t reg_value = debugger->xreg(value().code(), Reg31IsStackPointer);
+ uint8_t* address = NULL;
+ memcpy(&address, &reg_value, sizeof(address));
+ return address;
+}
+
+
+void RegisterToken::Print(FILE* out) const {
+ VIXL_ASSERT(value().IsValid());
+ fprintf(out, "[Register %s]", Name());
+}
+
+
+const char* RegisterToken::Name() const {
+ if (value().Is32Bits()) {
+ return kWAliases[value().code()][0];
+ } else {
+ return kXAliases[value().code()][0];
+ }
+}
+
+
+Token* RegisterToken::Tokenize(const char* arg) {
+ for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+ // Is it a X register or alias?
+ for (const char** current = kXAliases[i]; *current != NULL; current++) {
+ if (strcmp(arg, *current) == 0) {
+ return js_new<RegisterToken>(Register::XRegFromCode(i));
+ }
+ }
+
+ // Is it a W register or alias?
+ for (const char** current = kWAliases[i]; *current != NULL; current++) {
+ if (strcmp(arg, *current) == 0) {
+ return js_new<RegisterToken>(Register::WRegFromCode(i));
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+void FPRegisterToken::Print(FILE* out) const {
+ VIXL_ASSERT(value().IsValid());
+ char prefix = value().Is32Bits() ? 's' : 'd';
+ fprintf(out, "[FPRegister %c%" PRIu32 "]", prefix, value().code());
+}
+
+
+Token* FPRegisterToken::Tokenize(const char* arg) {
+ if (strlen(arg) < 2) {
+ return NULL;
+ }
+
+ switch (*arg) {
+ case 's':
+ case 'd':
+ const char* cursor = arg + 1;
+ uint64_t code = 0;
+ if (!StringToUInt64(&code, cursor)) {
+ return NULL;
+ }
+
+ if (code > kNumberOfFPRegisters) {
+ return NULL;
+ }
+
+ VRegister fpreg = NoVReg;
+ switch (*arg) {
+ case 's':
+ fpreg = VRegister::SRegFromCode(static_cast<unsigned>(code));
+ break;
+ case 'd':
+ fpreg = VRegister::DRegFromCode(static_cast<unsigned>(code));
+ break;
+ default: VIXL_UNREACHABLE();
+ }
+
+ return js_new<FPRegisterToken>(fpreg);
+ }
+
+ return NULL;
+}
+
+
+uint8_t* IdentifierToken::ToAddress(Debugger* debugger) const {
+ VIXL_ASSERT(CanAddressMemory());
+ const Instruction* pc_value = debugger->pc();
+ uint8_t* address = NULL;
+ memcpy(&address, &pc_value, sizeof(address));
+ return address;
+}
+
+void IdentifierToken::Print(FILE* out) const {
+ fprintf(out, "[Identifier %s]", value());
+}
+
+
+Token* IdentifierToken::Tokenize(const char* arg) {
+ if (!isalpha(arg[0])) {
+ return NULL;
+ }
+
+ const char* cursor = arg + 1;
+ while ((*cursor != '\0') && isalnum(*cursor)) {
+ ++cursor;
+ }
+
+ if (*cursor == '\0') {
+ return js_new<IdentifierToken>(arg);
+ }
+
+ return NULL;
+}
+
+
+uint8_t* AddressToken::ToAddress(Debugger* debugger) const {
+ USE(debugger);
+ return value();
+}
+
+
+void AddressToken::Print(FILE* out) const {
+ fprintf(out, "[Address %p]", value());
+}
+
+
+Token* AddressToken::Tokenize(const char* arg) {
+ if ((strlen(arg) < 3) || (arg[0] != '0') || (arg[1] != 'x')) {
+ return NULL;
+ }
+
+ uint64_t ptr = 0;
+ if (!StringToUInt64(&ptr, arg, 16)) {
+ return NULL;
+ }
+
+ uint8_t* address = reinterpret_cast<uint8_t*>(ptr);
+ return js_new<AddressToken>(address);
+}
+
+
+void IntegerToken::Print(FILE* out) const {
+ fprintf(out, "[Integer %" PRId64 "]", value());
+}
+
+
+Token* IntegerToken::Tokenize(const char* arg) {
+ int64_t value = 0;
+ if (!StringToInt64(&value, arg)) {
+ return NULL;
+ }
+
+ return js_new<IntegerToken>(value);
+}
+
+
+Token* FormatToken::Tokenize(const char* arg) {
+ size_t length = strlen(arg);
+ switch (arg[0]) {
+ case 'x':
+ case 's':
+ case 'u':
+ case 'f':
+ if (length == 1) return NULL;
+ break;
+ case 'i':
+ if (length == 1) return js_new<Format<uint32_t>>("%08" PRIx32, 'i');
+ VIXL_FALLTHROUGH();
+ default: return NULL;
+ }
+
+ char* endptr = NULL;
+ errno = 0; // Reset errors.
+ uint64_t count = strtoul(arg + 1, &endptr, 10);
+
+ if (errno != 0) {
+ // Overflow, etc.
+ return NULL;
+ }
+
+ if (endptr == arg) {
+ // No digits were parsed.
+ return NULL;
+ }
+
+ if (*endptr != '\0') {
+ // There are unexpected (non-digit) characters after the number.
+ return NULL;
+ }
+
+ switch (arg[0]) {
+ case 'x':
+ switch (count) {
+ case 8: return js_new<Format<uint8_t>>("%02" PRIx8, 'x');
+ case 16: return js_new<Format<uint16_t>>("%04" PRIx16, 'x');
+ case 32: return js_new<Format<uint32_t>>("%08" PRIx32, 'x');
+ case 64: return js_new<Format<uint64_t>>("%016" PRIx64, 'x');
+ default: return NULL;
+ }
+ case 's':
+ switch (count) {
+ case 8: return js_new<Format<int8_t>>("%4" PRId8, 's');
+ case 16: return js_new<Format<int16_t>>("%6" PRId16, 's');
+ case 32: return js_new<Format<int32_t>>("%11" PRId32, 's');
+ case 64: return js_new<Format<int64_t>>("%20" PRId64, 's');
+ default: return NULL;
+ }
+ case 'u':
+ switch (count) {
+ case 8: return js_new<Format<uint8_t>>("%3" PRIu8, 'u');
+ case 16: return js_new<Format<uint16_t>>("%5" PRIu16, 'u');
+ case 32: return js_new<Format<uint32_t>>("%10" PRIu32, 'u');
+ case 64: return js_new<Format<uint64_t>>("%20" PRIu64, 'u');
+ default: return NULL;
+ }
+ case 'f':
+ switch (count) {
+ case 32: return js_new<Format<float>>("%13g", 'f');
+ case 64: return js_new<Format<double>>("%13g", 'f');
+ default: return NULL;
+ }
+ default:
+ VIXL_UNREACHABLE();
+ return NULL;
+ }
+}
+
+
+template<typename T>
+void Format<T>::Print(FILE* out) const {
+ unsigned size = sizeof(T) * 8;
+ fprintf(out, "[Format %c%u - %s]", type_code_, size, fmt_);
+}
+
+
+void UnknownToken::Print(FILE* out) const {
+ fprintf(out, "[Unknown %s]", unknown_);
+}
+
+
+void DebugCommand::Print(FILE* out) {
+ fprintf(out, "%s", name());
+}
+
+
+bool DebugCommand::Match(const char* name, const char** aliases) {
+ for (const char** current = aliases; *current != NULL; current++) {
+ if (strcmp(name, *current) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+DebugCommand* DebugCommand::Parse(char* line) {
+ TokenVector args;
+
+ for (char* chunk = strtok(line, " \t");
+ chunk != NULL;
+ chunk = strtok(NULL, " \t")) {
+ char* dot = strchr(chunk, '.');
+ if (dot != NULL) {
+ // 'Token.format'.
+ Token* format = FormatToken::Tokenize(dot + 1);
+ if (format != NULL) {
+ *dot = '\0';
+ (void)args.append(Token::Tokenize(chunk));
+ (void)args.append(format);
+ } else {
+ // Error while parsing the format, push the UnknownToken so an error
+ // can be accurately reported.
+ (void)args.append(Token::Tokenize(chunk));
+ }
+ } else {
+ (void)args.append(Token::Tokenize(chunk));
+ }
+ }
+
+ if (args.empty()) {
+ return NULL;
+ }
+
+ if (!args[0]->IsIdentifier()) {
+ return js_new<InvalidCommand>(std::move(args), 0, "command name is not valid");
+ }
+
+ const char* name = IdentifierToken::Cast(args[0])->value();
+ #define RETURN_IF_MATCH(Command) \
+ if (Match(name, Command::kAliases)) { \
+ return Command::Build(std::move(args)); \
+ }
+ DEBUG_COMMAND_LIST(RETURN_IF_MATCH);
+ #undef RETURN_IF_MATCH
+
+ return js_new<UnknownCommand>(std::move(args));
+}
+
+
+void DebugCommand::PrintHelp(const char** aliases,
+ const char* args,
+ const char* help) {
+ VIXL_ASSERT(aliases[0] != NULL);
+ VIXL_ASSERT(help != NULL);
+
+ printf("\n----\n\n");
+ for (const char** current = aliases; *current != NULL; current++) {
+ if (args != NULL) {
+ printf("%s %s\n", *current, args);
+ } else {
+ printf("%s\n", *current);
+ }
+ }
+ printf("\n%s\n", help);
+}
+
+
+bool HelpCommand::Run(Debugger* debugger) {
+ VIXL_ASSERT(debugger->IsDebuggerRunning());
+ USE(debugger);
+
+ #define PRINT_HELP(Command) \
+ DebugCommand::PrintHelp(Command::kAliases, \
+ Command::kArguments, \
+ Command::kHelp);
+ DEBUG_COMMAND_LIST(PRINT_HELP);
+ #undef PRINT_HELP
+ printf("\n----\n\n");
+
+ return false;
+}
+
+
+DebugCommand* HelpCommand::Build(TokenVector&& args) {
+ if (args.length() != 1) {
+ return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+ }
+
+ return js_new<HelpCommand>(args[0]);
+}
+
+
+bool ContinueCommand::Run(Debugger* debugger) {
+ VIXL_ASSERT(debugger->IsDebuggerRunning());
+
+ debugger->set_debug_parameters(debugger->debug_parameters() & ~DBG_ACTIVE);
+ return true;
+}
+
+
+DebugCommand* ContinueCommand::Build(TokenVector&& args) {
+ if (args.length() != 1) {
+ return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+ }
+
+ return js_new<ContinueCommand>(args[0]);
+}
+
+
+bool StepCommand::Run(Debugger* debugger) {
+ VIXL_ASSERT(debugger->IsDebuggerRunning());
+
+ int64_t steps = count();
+ if (steps < 0) {
+ printf(" ** invalid value for steps: %" PRId64 " (<0) **\n", steps);
+ } else if (steps > 1) {
+ debugger->set_steps(steps - 1);
+ }
+
+ return true;
+}
+
+
+void StepCommand::Print(FILE* out) {
+ fprintf(out, "%s %" PRId64 "", name(), count());
+}
+
+
+DebugCommand* StepCommand::Build(TokenVector&& args) {
+ IntegerToken* count = NULL;
+ switch (args.length()) {
+ case 1: { // step [1]
+ count = js_new<IntegerToken>(1);
+ break;
+ }
+ case 2: { // step n
+ Token* first = args[1];
+ if (!first->IsInteger()) {
+ return js_new<InvalidCommand>(std::move(args), 1, "expects int");
+ }
+ count = IntegerToken::Cast(first);
+ break;
+ }
+ default:
+ return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+ }
+
+ return js_new<StepCommand>(args[0], count);
+}
+
+
+DebugCommand* DisasmCommand::Build(TokenVector&& args) {
+ IntegerToken* count = NULL;
+ switch (args.length()) {
+ case 1: { // disasm [10]
+ count = js_new<IntegerToken>(10);
+ break;
+ }
+ case 2: { // disasm n
+ Token* first = args[1];
+ if (!first->IsInteger()) {
+ return js_new<InvalidCommand>(std::move(args), 1, "expects int");
+ }
+
+ count = IntegerToken::Cast(first);
+ break;
+ }
+ default:
+ return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+ }
+
+ Token* target = js_new<IdentifierToken>("pc");
+ FormatToken* format = js_new<Format<uint32_t>>("%08" PRIx32, 'i');
+ return js_new<ExamineCommand>(args[0], target, format, count);
+}
+
+
+void PrintCommand::Print(FILE* out) {
+ fprintf(out, "%s ", name());
+ target()->Print(out);
+ if (format() != NULL) format()->Print(out);
+}
+
+
+bool PrintCommand::Run(Debugger* debugger) {
+ VIXL_ASSERT(debugger->IsDebuggerRunning());
+
+ Token* tok = target();
+ if (tok->IsIdentifier()) {
+ char* identifier = IdentifierToken::Cast(tok)->value();
+ if (strcmp(identifier, "regs") == 0) {
+ debugger->PrintRegisters();
+ } else if (strcmp(identifier, "fpregs") == 0) {
+ debugger->PrintVRegisters();
+ } else if (strcmp(identifier, "sysregs") == 0) {
+ debugger->PrintSystemRegisters();
+ } else if (strcmp(identifier, "pc") == 0) {
+ printf("pc = %16p\n", reinterpret_cast<const void*>(debugger->pc()));
+ } else {
+ printf(" ** Unknown identifier to print: %s **\n", identifier);
+ }
+
+ return false;
+ }
+
+ FormatToken* format_tok = format();
+ VIXL_ASSERT(format_tok != NULL);
+ if (format_tok->type_code() == 'i') {
+ // TODO(all): Add support for instruction disassembly.
+ printf(" ** unsupported format: instructions **\n");
+ return false;
+ }
+
+ if (tok->IsRegister()) {
+ RegisterToken* reg_tok = RegisterToken::Cast(tok);
+ Register reg = reg_tok->value();
+ debugger->PrintRegister(reg, reg_tok->Name(), format_tok);
+ return false;
+ }
+
+ if (tok->IsFPRegister()) {
+ FPRegister fpreg = FPRegisterToken::Cast(tok)->value();
+ debugger->PrintFPRegister(fpreg, format_tok);
+ return false;
+ }
+
+ VIXL_UNREACHABLE();
+ return false;
+}
+
+
+DebugCommand* PrintCommand::Build(TokenVector&& args) {
+ if (args.length() < 2) {
+ return js_new<InvalidCommand>(std::move(args), -1, "too few arguments");
+ }
+
+ Token* target = args[1];
+ if (!target->IsRegister() &&
+ !target->IsFPRegister() &&
+ !target->IsIdentifier()) {
+ return js_new<InvalidCommand>(std::move(args), 1, "expects reg or identifier");
+ }
+
+ FormatToken* format = NULL;
+ int target_size = 0;
+ if (target->IsRegister()) {
+ Register reg = RegisterToken::Cast(target)->value();
+ target_size = reg.SizeInBytes();
+ } else if (target->IsFPRegister()) {
+ FPRegister fpreg = FPRegisterToken::Cast(target)->value();
+ target_size = fpreg.SizeInBytes();
+ }
+ // If the target is an identifier there must be no format. This is checked
+ // in the switch statement below.
+
+ switch (args.length()) {
+ case 2: {
+ if (target->IsRegister()) {
+ switch (target_size) {
+ case 4: format = js_new<Format<uint32_t>>("%08" PRIx32, 'x'); break;
+ case 8: format = js_new<Format<uint64_t>>("%016" PRIx64, 'x'); break;
+ default: VIXL_UNREACHABLE();
+ }
+ } else if (target->IsFPRegister()) {
+ switch (target_size) {
+ case 4: format = js_new<Format<float>>("%8g", 'f'); break;
+ case 8: format = js_new<Format<double>>("%8g", 'f'); break;
+ default: VIXL_UNREACHABLE();
+ }
+ }
+ break;
+ }
+ case 3: {
+ if (target->IsIdentifier()) {
+ return js_new<InvalidCommand>(std::move(args), 2,
+ "format is only allowed with registers");
+ }
+
+ Token* second = args[2];
+ if (!second->IsFormat()) {
+ return js_new<InvalidCommand>(std::move(args), 2, "expects format");
+ }
+ format = FormatToken::Cast(second);
+
+ if (format->SizeOf() > target_size) {
+ return js_new<InvalidCommand>(std::move(args), 2, "format too wide");
+ }
+
+ break;
+ }
+ default:
+ return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+ }
+
+ return js_new<PrintCommand>(args[0], target, format);
+}
+
+
+bool ExamineCommand::Run(Debugger* debugger) {
+ VIXL_ASSERT(debugger->IsDebuggerRunning());
+
+ uint8_t* address = target()->ToAddress(debugger);
+ int64_t amount = count()->value();
+ if (format()->type_code() == 'i') {
+ debugger->PrintInstructions(address, amount);
+ } else {
+ debugger->PrintMemory(address, format(), amount);
+ }
+
+ return false;
+}
+
+
+void ExamineCommand::Print(FILE* out) {
+ fprintf(out, "%s ", name());
+ format()->Print(out);
+ target()->Print(out);
+}
+
+
+DebugCommand* ExamineCommand::Build(TokenVector&& args) {
+ if (args.length() < 2) {
+ return js_new<InvalidCommand>(std::move(args), -1, "too few arguments");
+ }
+
+ Token* target = args[1];
+ if (!target->CanAddressMemory()) {
+ return js_new<InvalidCommand>(std::move(args), 1, "expects address");
+ }
+
+ FormatToken* format = NULL;
+ IntegerToken* count = NULL;
+
+ switch (args.length()) {
+ case 2: { // mem addr[.x64] [10]
+ format = js_new<Format<uint64_t>>("%016" PRIx64, 'x');
+ count = js_new<IntegerToken>(10);
+ break;
+ }
+ case 3: { // mem addr.format [10]
+ // mem addr[.x64] n
+ Token* second = args[2];
+ if (second->IsFormat()) {
+ format = FormatToken::Cast(second);
+ count = js_new<IntegerToken>(10);
+ break;
+ } else if (second->IsInteger()) {
+ format = js_new<Format<uint64_t>>("%016" PRIx64, 'x');
+ count = IntegerToken::Cast(second);
+ } else {
+ return js_new<InvalidCommand>(std::move(args), 2, "expects format or integer");
+ }
+ VIXL_UNREACHABLE();
+ break;
+ }
+ case 4: { // mem addr.format n
+ Token* second = args[2];
+ Token* third = args[3];
+ if (!second->IsFormat() || !third->IsInteger()) {
+ return js_new<InvalidCommand>(std::move(args), -1, "expects addr[.format] [n]");
+ }
+ format = FormatToken::Cast(second);
+ count = IntegerToken::Cast(third);
+ break;
+ }
+ default:
+ return js_new<InvalidCommand>(std::move(args), -1, "too many arguments");
+ }
+
+ return js_new<ExamineCommand>(args[0], target, format, count);
+}
+
+
+UnknownCommand::~UnknownCommand() {
+ const size_t size = args_.length();
+ for (size_t i = 0; i < size; ++i) {
+ js_delete(args_[i]);
+ }
+}
+
+
+bool UnknownCommand::Run(Debugger* debugger) {
+ VIXL_ASSERT(debugger->IsDebuggerRunning());
+ USE(debugger);
+
+ printf(" ** Unknown Command:");
+ const size_t size = args_.length();
+ for (size_t i = 0; i < size; ++i) {
+ printf(" ");
+ args_[i]->Print(stdout);
+ }
+ printf(" **\n");
+
+ return false;
+}
+
+
+InvalidCommand::~InvalidCommand() {
+ const size_t size = args_.length();
+ for (size_t i = 0; i < size; ++i) {
+ js_delete(args_[i]);
+ }
+}
+
+
+bool InvalidCommand::Run(Debugger* debugger) {
+ VIXL_ASSERT(debugger->IsDebuggerRunning());
+ USE(debugger);
+
+ printf(" ** Invalid Command:");
+ const size_t size = args_.length();
+ for (size_t i = 0; i < size; ++i) {
+ printf(" ");
+ if (i == static_cast<size_t>(index_)) {
+ printf(">>");
+ args_[i]->Print(stdout);
+ printf("<<");
+ } else {
+ args_[i]->Print(stdout);
+ }
+ }
+ printf(" **\n");
+ printf(" ** %s\n", cause_);
+
+ return false;
+}
+
+} // namespace vixl
+
+#endif // JS_SIMULATOR_ARM64
diff --git a/js/src/jit/arm64/vixl/Debugger-vixl.h b/js/src/jit/arm64/vixl/Debugger-vixl.h
new file mode 100644
index 0000000000..7236bf1e5e
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Debugger-vixl.h
@@ -0,0 +1,117 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef JS_SIMULATOR_ARM64
+
+#ifndef VIXL_A64_DEBUGGER_A64_H_
+#define VIXL_A64_DEBUGGER_A64_H_
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+
+#include "jit/arm64/vixl/Constants-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Simulator-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+namespace vixl {
+
+// Flags that represent the debugger state.
+enum DebugParameters {
+ DBG_INACTIVE = 0,
+ DBG_ACTIVE = 1 << 0, // The debugger is active.
+ DBG_BREAK = 1 << 1 // The debugger is at a breakpoint.
+};
+
+// Forward declarations.
+class DebugCommand;
+class Token;
+class FormatToken;
+
+class Debugger : public Simulator {
+ public:
+ explicit Debugger(Decoder* decoder, FILE* stream = stdout);
+ ~Debugger();
+
+ virtual void Run() override;
+ virtual void VisitException(const Instruction* instr) override;
+
+ int debug_parameters() const { return debug_parameters_; }
+ void set_debug_parameters(int parameters) {
+ debug_parameters_ = parameters;
+
+ update_pending_request();
+ }
+
+ // Numbers of instructions to execute before the debugger shell is given
+ // back control.
+ int64_t steps() const { return steps_; }
+ void set_steps(int64_t value) {
+ VIXL_ASSERT(value > 1);
+ steps_ = value;
+ }
+
+ bool IsDebuggerRunning() const {
+ return (debug_parameters_ & DBG_ACTIVE) != 0;
+ }
+
+ bool pending_request() const { return pending_request_; }
+ void update_pending_request() {
+ pending_request_ = IsDebuggerRunning();
+ }
+
+ void PrintInstructions(const void* address, int64_t count = 1);
+ void PrintMemory(const uint8_t* address,
+ const FormatToken* format,
+ int64_t count = 1);
+ void PrintRegister(const Register& target_reg,
+ const char* name,
+ const FormatToken* format);
+ void PrintFPRegister(const FPRegister& target_fpreg,
+ const FormatToken* format);
+
+ private:
+ char* ReadCommandLine(const char* prompt, char* buffer, int length);
+ void RunDebuggerShell();
+ void DoBreakpoint(const Instruction* instr);
+
+ int debug_parameters_;
+ bool pending_request_;
+ int64_t steps_;
+ DebugCommand* last_command_;
+ PrintDisassembler* disasm_;
+ Decoder* printer_;
+
+ // Length of the biggest command line accepted by the debugger shell.
+ static const int kMaxDebugShellLine = 256;
+};
+
+} // namespace vixl
+
+#endif // VIXL_A64_DEBUGGER_A64_H_
+
+#endif // JS_SIMULATOR_ARM64
diff --git a/js/src/jit/arm64/vixl/Decoder-vixl.cpp b/js/src/jit/arm64/vixl/Decoder-vixl.cpp
new file mode 100644
index 0000000000..884654ec8e
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Decoder-vixl.cpp
@@ -0,0 +1,899 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Decoder-vixl.h"
+
+#include <algorithm>
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+namespace vixl {
+
+void Decoder::DecodeInstruction(const Instruction *instr) {
+ if (instr->Bits(28, 27) == 0) {
+ VisitUnallocated(instr);
+ } else {
+ switch (instr->Bits(27, 24)) {
+ // 0: PC relative addressing.
+ case 0x0: DecodePCRelAddressing(instr); break;
+
+ // 1: Add/sub immediate.
+ case 0x1: DecodeAddSubImmediate(instr); break;
+
+ // A: Logical shifted register.
+ // Add/sub with carry.
+ // Conditional compare register.
+ // Conditional compare immediate.
+ // Conditional select.
+ // Data processing 1 source.
+ // Data processing 2 source.
+ // B: Add/sub shifted register.
+ // Add/sub extended register.
+ // Data processing 3 source.
+ case 0xA:
+ case 0xB: DecodeDataProcessing(instr); break;
+
+ // 2: Logical immediate.
+ // Move wide immediate.
+ case 0x2: DecodeLogical(instr); break;
+
+ // 3: Bitfield.
+ // Extract.
+ case 0x3: DecodeBitfieldExtract(instr); break;
+
+ // 4: Unconditional branch immediate.
+ // Exception generation.
+ // Compare and branch immediate.
+ // 5: Compare and branch immediate.
+ // Conditional branch.
+ // System.
+ // 6,7: Unconditional branch.
+ // Test and branch immediate.
+ case 0x4:
+ case 0x5:
+ case 0x6:
+ case 0x7: DecodeBranchSystemException(instr); break;
+
+ // 8,9: Load/store register pair post-index.
+ // Load register literal.
+ // Load/store register unscaled immediate.
+ // Load/store register immediate post-index.
+ // Load/store register immediate pre-index.
+ // Load/store register offset.
+ // Load/store exclusive.
+ // C,D: Load/store register pair offset.
+ // Load/store register pair pre-index.
+ // Load/store register unsigned immediate.
+ // Advanced SIMD.
+ case 0x8:
+ case 0x9:
+ case 0xC:
+ case 0xD: DecodeLoadStore(instr); break;
+
+ // E: FP fixed point conversion.
+ // FP integer conversion.
+ // FP data processing 1 source.
+ // FP compare.
+ // FP immediate.
+ // FP data processing 2 source.
+ // FP conditional compare.
+ // FP conditional select.
+ // Advanced SIMD.
+ // F: FP data processing 3 source.
+ // Advanced SIMD.
+ case 0xE:
+ case 0xF: DecodeFP(instr); break;
+ }
+ }
+}
+
+void Decoder::AppendVisitor(DecoderVisitor* new_visitor) {
+ MOZ_ALWAYS_TRUE(visitors_.append(new_visitor));
+}
+
+
+void Decoder::PrependVisitor(DecoderVisitor* new_visitor) {
+ MOZ_ALWAYS_TRUE(visitors_.insert(visitors_.begin(), new_visitor));
+}
+
+
+void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,
+ DecoderVisitor* registered_visitor) {
+ for (auto it = visitors_.begin(); it != visitors_.end(); it++) {
+ if (*it == registered_visitor) {
+ MOZ_ALWAYS_TRUE(visitors_.insert(it, new_visitor));
+ return;
+ }
+ }
+ // We reached the end of the list without finding registered_visitor.
+ MOZ_ALWAYS_TRUE(visitors_.append(new_visitor));
+}
+
+
+void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor,
+ DecoderVisitor* registered_visitor) {
+ for (auto it = visitors_.begin(); it != visitors_.end(); it++) {
+ if (*it == registered_visitor) {
+ it++;
+ MOZ_ALWAYS_TRUE(visitors_.insert(it, new_visitor));
+ return;
+ }
+ }
+ // We reached the end of the list without finding registered_visitor.
+ MOZ_ALWAYS_TRUE(visitors_.append(new_visitor));
+}
+
+
+void Decoder::RemoveVisitor(DecoderVisitor* visitor) {
+ visitors_.erase(std::remove(visitors_.begin(), visitors_.end(), visitor),
+ visitors_.end());
+}
+
+
+void Decoder::DecodePCRelAddressing(const Instruction* instr) {
+ VIXL_ASSERT(instr->Bits(27, 24) == 0x0);
+ // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level
+ // decode.
+ VIXL_ASSERT(instr->Bit(28) == 0x1);
+ VisitPCRelAddressing(instr);
+}
+
+
+void Decoder::DecodeBranchSystemException(const Instruction* instr) {
+ VIXL_ASSERT((instr->Bits(27, 24) == 0x4) ||
+ (instr->Bits(27, 24) == 0x5) ||
+ (instr->Bits(27, 24) == 0x6) ||
+ (instr->Bits(27, 24) == 0x7) );
+
+ switch (instr->Bits(31, 29)) {
+ case 0:
+ case 4: {
+ VisitUnconditionalBranch(instr);
+ break;
+ }
+ case 1:
+ case 5: {
+ if (instr->Bit(25) == 0) {
+ VisitCompareBranch(instr);
+ } else {
+ VisitTestBranch(instr);
+ }
+ break;
+ }
+ case 2: {
+ if (instr->Bit(25) == 0) {
+ if ((instr->Bit(24) == 0x1) ||
+ (instr->Mask(0x01000010) == 0x00000010)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitConditionalBranch(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ break;
+ }
+ case 6: {
+ if (instr->Bit(25) == 0) {
+ if (instr->Bit(24) == 0) {
+ if ((instr->Bits(4, 2) != 0) ||
+ (instr->Mask(0x00E0001D) == 0x00200001) ||
+ (instr->Mask(0x00E0001D) == 0x00400001) ||
+ (instr->Mask(0x00E0001E) == 0x00200002) ||
+ (instr->Mask(0x00E0001E) == 0x00400002) ||
+ (instr->Mask(0x00E0001C) == 0x00600000) ||
+ (instr->Mask(0x00E0001C) == 0x00800000) ||
+ (instr->Mask(0x00E0001F) == 0x00A00000) ||
+ (instr->Mask(0x00C0001C) == 0x00C00000)) {
+ if (instr->InstructionBits() == UNDEFINED_INST_PATTERN) {
+ VisitException(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ VisitException(instr);
+ }
+ } else {
+ if (instr->Bits(23, 22) == 0) {
+ const Instr masked_003FF0E0 = instr->Mask(0x003FF0E0);
+ if ((instr->Bits(21, 19) == 0x4) ||
+ (masked_003FF0E0 == 0x00033000) ||
+ (masked_003FF0E0 == 0x003FF020) ||
+ (masked_003FF0E0 == 0x003FF060) ||
+ (masked_003FF0E0 == 0x003FF0E0) ||
+ (instr->Mask(0x00388000) == 0x00008000) ||
+ (instr->Mask(0x0038E000) == 0x00000000) ||
+ (instr->Mask(0x0039E000) == 0x00002000) ||
+ (instr->Mask(0x003AE000) == 0x00002000) ||
+ (instr->Mask(0x003CE000) == 0x00042000) ||
+ (instr->Mask(0x003FFFC0) == 0x000320C0) ||
+ (instr->Mask(0x003FF100) == 0x00032100) ||
+ // (instr->Mask(0x003FF200) == 0x00032200) || // match CSDB
+ (instr->Mask(0x003FF400) == 0x00032400) ||
+ (instr->Mask(0x003FF800) == 0x00032800) ||
+ (instr->Mask(0x0038F000) == 0x00005000) ||
+ (instr->Mask(0x0038E000) == 0x00006000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitSystem(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ if ((instr->Bit(24) == 0x1) ||
+ (instr->Bits(20, 16) != 0x1F) ||
+ (instr->Bits(15, 10) != 0) ||
+ (instr->Bits(4, 0) != 0) ||
+ (instr->Bits(24, 21) == 0x3) ||
+ (instr->Bits(24, 22) == 0x3)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitUnconditionalBranchToRegister(instr);
+ }
+ }
+ break;
+ }
+ case 3:
+ case 7: {
+ VisitUnallocated(instr);
+ break;
+ }
+ }
+}
+
+
+void Decoder::DecodeLoadStore(const Instruction* instr) {
+ VIXL_ASSERT((instr->Bits(27, 24) == 0x8) ||
+ (instr->Bits(27, 24) == 0x9) ||
+ (instr->Bits(27, 24) == 0xC) ||
+ (instr->Bits(27, 24) == 0xD) );
+ // TODO(all): rearrange the tree to integrate this branch.
+ if ((instr->Bit(28) == 0) && (instr->Bit(29) == 0) && (instr->Bit(26) == 1)) {
+ DecodeNEONLoadStore(instr);
+ return;
+ }
+
+ if (instr->Bit(24) == 0) {
+ if (instr->Bit(28) == 0) {
+ if (instr->Bit(29) == 0) {
+ if (instr->Bit(26) == 0) {
+ VisitLoadStoreExclusive(instr);
+ } else {
+ VIXL_UNREACHABLE();
+ }
+ } else {
+ if ((instr->Bits(31, 30) == 0x3) ||
+ (instr->Mask(0xC4400000) == 0x40000000)) {
+ VisitUnallocated(instr);
+ } else {
+ if (instr->Bit(23) == 0) {
+ if (instr->Mask(0xC4400000) == 0xC0400000) {
+ VisitUnallocated(instr);
+ } else {
+ VisitLoadStorePairNonTemporal(instr);
+ }
+ } else {
+ VisitLoadStorePairPostIndex(instr);
+ }
+ }
+ }
+ } else {
+ if (instr->Bit(29) == 0) {
+ if (instr->Mask(0xC4000000) == 0xC4000000) {
+ VisitUnallocated(instr);
+ } else {
+ VisitLoadLiteral(instr);
+ }
+ } else {
+ if ((instr->Mask(0x44800000) == 0x44800000) ||
+ (instr->Mask(0x84800000) == 0x84800000)) {
+ VisitUnallocated(instr);
+ } else {
+ if (instr->Bit(21) == 0) {
+ switch (instr->Bits(11, 10)) {
+ case 0: {
+ VisitLoadStoreUnscaledOffset(instr);
+ break;
+ }
+ case 1: {
+ if (instr->Mask(0xC4C00000) == 0xC0800000) {
+ VisitUnallocated(instr);
+ } else {
+ VisitLoadStorePostIndex(instr);
+ }
+ break;
+ }
+ case 2: {
+ // TODO: VisitLoadStoreRegisterOffsetUnpriv.
+ VisitUnimplemented(instr);
+ break;
+ }
+ case 3: {
+ if (instr->Mask(0xC4C00000) == 0xC0800000) {
+ VisitUnallocated(instr);
+ } else {
+ VisitLoadStorePreIndex(instr);
+ }
+ break;
+ }
+ }
+ } else {
+ if (instr->Bits(11, 10) == 0x2) {
+ if (instr->Bit(14) == 0) {
+ VisitUnallocated(instr);
+ } else {
+ VisitLoadStoreRegisterOffset(instr);
+ }
+ } else {
+ if (instr->Bits(11, 10) == 0x0) {
+ if (instr->Bit(25) == 0) {
+ if (instr->Bit(26) == 0) {
+ if ((instr->Bit(15) == 1) &&
+ ((instr->Bits(14, 12) == 0x1) ||
+ (instr->Bit(13) == 1) ||
+ (instr->Bits(14, 12) == 0x5) ||
+ ((instr->Bits(14, 12) == 0x4) &&
+ ((instr->Bit(23) == 0) ||
+ (instr->Bits(23, 22) == 0x3))))) {
+ VisitUnallocated(instr);
+ } else {
+ VisitAtomicMemory(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ }
+ }
+ }
+ }
+ } else {
+ if (instr->Bit(28) == 0) {
+ if (instr->Bit(29) == 0) {
+ VisitUnallocated(instr);
+ } else {
+ if ((instr->Bits(31, 30) == 0x3) ||
+ (instr->Mask(0xC4400000) == 0x40000000)) {
+ VisitUnallocated(instr);
+ } else {
+ if (instr->Bit(23) == 0) {
+ VisitLoadStorePairOffset(instr);
+ } else {
+ VisitLoadStorePairPreIndex(instr);
+ }
+ }
+ }
+ } else {
+ if (instr->Bit(29) == 0) {
+ VisitUnallocated(instr);
+ } else {
+ if ((instr->Mask(0x84C00000) == 0x80C00000) ||
+ (instr->Mask(0x44800000) == 0x44800000) ||
+ (instr->Mask(0x84800000) == 0x84800000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitLoadStoreUnsignedOffset(instr);
+ }
+ }
+ }
+ }
+}
+
+
+void Decoder::DecodeLogical(const Instruction* instr) {
+ VIXL_ASSERT(instr->Bits(27, 24) == 0x2);
+
+ if (instr->Mask(0x80400000) == 0x00400000) {
+ VisitUnallocated(instr);
+ } else {
+ if (instr->Bit(23) == 0) {
+ VisitLogicalImmediate(instr);
+ } else {
+ if (instr->Bits(30, 29) == 0x1) {
+ VisitUnallocated(instr);
+ } else {
+ VisitMoveWideImmediate(instr);
+ }
+ }
+ }
+}
+
+
+void Decoder::DecodeBitfieldExtract(const Instruction* instr) {
+ VIXL_ASSERT(instr->Bits(27, 24) == 0x3);
+
+ if ((instr->Mask(0x80400000) == 0x80000000) ||
+ (instr->Mask(0x80400000) == 0x00400000) ||
+ (instr->Mask(0x80008000) == 0x00008000)) {
+ VisitUnallocated(instr);
+ } else if (instr->Bit(23) == 0) {
+ if ((instr->Mask(0x80200000) == 0x00200000) ||
+ (instr->Mask(0x60000000) == 0x60000000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitBitfield(instr);
+ }
+ } else {
+ if ((instr->Mask(0x60200000) == 0x00200000) ||
+ (instr->Mask(0x60000000) != 0x00000000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitExtract(instr);
+ }
+ }
+}
+
+
+void Decoder::DecodeAddSubImmediate(const Instruction* instr) {
+ VIXL_ASSERT(instr->Bits(27, 24) == 0x1);
+ if (instr->Bit(23) == 1) {
+ VisitUnallocated(instr);
+ } else {
+ VisitAddSubImmediate(instr);
+ }
+}
+
+
+void Decoder::DecodeDataProcessing(const Instruction* instr) {
+ VIXL_ASSERT((instr->Bits(27, 24) == 0xA) ||
+ (instr->Bits(27, 24) == 0xB));
+
+ if (instr->Bit(24) == 0) {
+ if (instr->Bit(28) == 0) {
+ if (instr->Mask(0x80008000) == 0x00008000) {
+ VisitUnallocated(instr);
+ } else {
+ VisitLogicalShifted(instr);
+ }
+ } else {
+ switch (instr->Bits(23, 21)) {
+ case 0: {
+ if (instr->Mask(0x0000FC00) != 0) {
+ VisitUnallocated(instr);
+ } else {
+ VisitAddSubWithCarry(instr);
+ }
+ break;
+ }
+ case 2: {
+ if ((instr->Bit(29) == 0) ||
+ (instr->Mask(0x00000410) != 0)) {
+ VisitUnallocated(instr);
+ } else {
+ if (instr->Bit(11) == 0) {
+ VisitConditionalCompareRegister(instr);
+ } else {
+ VisitConditionalCompareImmediate(instr);
+ }
+ }
+ break;
+ }
+ case 4: {
+ if (instr->Mask(0x20000800) != 0x00000000) {
+ VisitUnallocated(instr);
+ } else {
+ VisitConditionalSelect(instr);
+ }
+ break;
+ }
+ case 6: {
+ if (instr->Bit(29) == 0x1) {
+ VisitUnallocated(instr);
+ VIXL_FALLTHROUGH();
+ } else {
+ if (instr->Bit(30) == 0) {
+ if ((instr->Bit(15) == 0x1) ||
+ (instr->Bits(15, 11) == 0) ||
+ (instr->Bits(15, 12) == 0x1) ||
+ (instr->Bits(15, 12) == 0x3) ||
+ (instr->Bits(15, 13) == 0x3) ||
+ (instr->Mask(0x8000EC00) == 0x00004C00) ||
+ (instr->Mask(0x8000E800) == 0x80004000) ||
+ (instr->Mask(0x8000E400) == 0x80004000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitDataProcessing2Source(instr);
+ }
+ } else {
+ if ((instr->Bit(13) == 1) ||
+ (instr->Bits(20, 16) != 0) ||
+ (instr->Bits(15, 14) != 0) ||
+ (instr->Mask(0xA01FFC00) == 0x00000C00) ||
+ (instr->Mask(0x201FF800) == 0x00001800)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitDataProcessing1Source(instr);
+ }
+ }
+ break;
+ }
+ }
+ case 1:
+ case 3:
+ case 5:
+ case 7: VisitUnallocated(instr); break;
+ }
+ }
+ } else {
+ if (instr->Bit(28) == 0) {
+ if (instr->Bit(21) == 0) {
+ if ((instr->Bits(23, 22) == 0x3) ||
+ (instr->Mask(0x80008000) == 0x00008000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitAddSubShifted(instr);
+ }
+ } else {
+ if ((instr->Mask(0x00C00000) != 0x00000000) ||
+ (instr->Mask(0x00001400) == 0x00001400) ||
+ (instr->Mask(0x00001800) == 0x00001800)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitAddSubExtended(instr);
+ }
+ }
+ } else {
+ if ((instr->Bit(30) == 0x1) ||
+ (instr->Bits(30, 29) == 0x1) ||
+ (instr->Mask(0xE0600000) == 0x00200000) ||
+ (instr->Mask(0xE0608000) == 0x00400000) ||
+ (instr->Mask(0x60608000) == 0x00408000) ||
+ (instr->Mask(0x60E00000) == 0x00E00000) ||
+ (instr->Mask(0x60E00000) == 0x00800000) ||
+ (instr->Mask(0x60E00000) == 0x00600000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitDataProcessing3Source(instr);
+ }
+ }
+ }
+}
+
+
+void Decoder::DecodeFP(const Instruction* instr) {
+ VIXL_ASSERT((instr->Bits(27, 24) == 0xE) ||
+ (instr->Bits(27, 24) == 0xF));
+ if (instr->Bit(28) == 0) {
+ DecodeNEONVectorDataProcessing(instr);
+ } else {
+ if (instr->Bits(31, 30) == 0x3) {
+ VisitUnallocated(instr);
+ } else if (instr->Bits(31, 30) == 0x1) {
+ DecodeNEONScalarDataProcessing(instr);
+ } else {
+ if (instr->Bit(29) == 0) {
+ if (instr->Bit(24) == 0) {
+ if (instr->Bit(21) == 0) {
+ if ((instr->Bit(23) == 1) ||
+ (instr->Bit(18) == 1) ||
+ (instr->Mask(0x80008000) == 0x00000000) ||
+ (instr->Mask(0x000E0000) == 0x00000000) ||
+ (instr->Mask(0x000E0000) == 0x000A0000) ||
+ (instr->Mask(0x00160000) == 0x00000000) ||
+ (instr->Mask(0x00160000) == 0x00120000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitFPFixedPointConvert(instr);
+ }
+ } else {
+ if (instr->Bits(15, 10) == 32) {
+ VisitUnallocated(instr);
+ } else if (instr->Bits(15, 10) == 0) {
+ if ((instr->Bits(23, 22) == 0x3) ||
+ (instr->Mask(0x000E0000) == 0x000A0000) ||
+ (instr->Mask(0x000E0000) == 0x000C0000) ||
+ (instr->Mask(0x00160000) == 0x00120000) ||
+ (instr->Mask(0x00160000) == 0x00140000) ||
+ (instr->Mask(0x20C40000) == 0x00800000) ||
+ (instr->Mask(0x20C60000) == 0x00840000) ||
+ (instr->Mask(0xA0C60000) == 0x80060000) ||
+ (instr->Mask(0xA0C60000) == 0x00860000) ||
+ (instr->Mask(0xA0C60000) == 0x00460000) ||
+ (instr->Mask(0xA0CE0000) == 0x80860000) ||
+ (instr->Mask(0xA0CE0000) == 0x804E0000) ||
+ (instr->Mask(0xA0CE0000) == 0x000E0000) ||
+ (instr->Mask(0xA0D60000) == 0x00160000) ||
+ (instr->Mask(0xA0D60000) == 0x80560000) ||
+ (instr->Mask(0xA0D60000) == 0x80960000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitFPIntegerConvert(instr);
+ }
+ } else if (instr->Bits(14, 10) == 16) {
+ const Instr masked_A0DF8000 = instr->Mask(0xA0DF8000);
+ if ((instr->Mask(0x80180000) != 0) ||
+ (masked_A0DF8000 == 0x00020000) ||
+ (masked_A0DF8000 == 0x00030000) ||
+ (masked_A0DF8000 == 0x00068000) ||
+ (masked_A0DF8000 == 0x00428000) ||
+ (masked_A0DF8000 == 0x00430000) ||
+ (masked_A0DF8000 == 0x00468000) ||
+ (instr->Mask(0xA0D80000) == 0x00800000) ||
+ (instr->Mask(0xA0DE0000) == 0x00C00000) ||
+ (instr->Mask(0xA0DF0000) == 0x00C30000) ||
+ (instr->Mask(0xA0DC0000) == 0x00C40000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitFPDataProcessing1Source(instr);
+ }
+ } else if (instr->Bits(13, 10) == 8) {
+ if ((instr->Bits(15, 14) != 0) ||
+ (instr->Bits(2, 0) != 0) ||
+ (instr->Mask(0x80800000) != 0x00000000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitFPCompare(instr);
+ }
+ } else if (instr->Bits(12, 10) == 4) {
+ if ((instr->Bits(9, 5) != 0) ||
+ (instr->Mask(0x80800000) != 0x00000000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitFPImmediate(instr);
+ }
+ } else {
+ if (instr->Mask(0x80800000) != 0x00000000) {
+ VisitUnallocated(instr);
+ } else {
+ switch (instr->Bits(11, 10)) {
+ case 1: {
+ VisitFPConditionalCompare(instr);
+ break;
+ }
+ case 2: {
+ if ((instr->Bits(15, 14) == 0x3) ||
+ (instr->Mask(0x00009000) == 0x00009000) ||
+ (instr->Mask(0x0000A000) == 0x0000A000)) {
+ VisitUnallocated(instr);
+ } else {
+ VisitFPDataProcessing2Source(instr);
+ }
+ break;
+ }
+ case 3: {
+ VisitFPConditionalSelect(instr);
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ }
+ }
+ }
+ } else {
+ // Bit 30 == 1 has been handled earlier.
+ VIXL_ASSERT(instr->Bit(30) == 0);
+ if (instr->Mask(0xA0800000) != 0) {
+ VisitUnallocated(instr);
+ } else {
+ VisitFPDataProcessing3Source(instr);
+ }
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ }
+}
+
+
+void Decoder::DecodeNEONLoadStore(const Instruction* instr) {
+ VIXL_ASSERT(instr->Bits(29, 25) == 0x6);
+ if (instr->Bit(31) == 0) {
+ if ((instr->Bit(24) == 0) && (instr->Bit(21) == 1)) {
+ VisitUnallocated(instr);
+ return;
+ }
+
+ if (instr->Bit(23) == 0) {
+ if (instr->Bits(20, 16) == 0) {
+ if (instr->Bit(24) == 0) {
+ VisitNEONLoadStoreMultiStruct(instr);
+ } else {
+ VisitNEONLoadStoreSingleStruct(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ if (instr->Bit(24) == 0) {
+ VisitNEONLoadStoreMultiStructPostIndex(instr);
+ } else {
+ VisitNEONLoadStoreSingleStructPostIndex(instr);
+ }
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+}
+
+
+void Decoder::DecodeNEONVectorDataProcessing(const Instruction* instr) {
+ VIXL_ASSERT(instr->Bits(28, 25) == 0x7);
+ if (instr->Bit(31) == 0) {
+ if (instr->Bit(24) == 0) {
+ if (instr->Bit(21) == 0) {
+ if (instr->Bit(15) == 0) {
+ if (instr->Bit(10) == 0) {
+ if (instr->Bit(29) == 0) {
+ if (instr->Bit(11) == 0) {
+ VisitNEONTable(instr);
+ } else {
+ VisitNEONPerm(instr);
+ }
+ } else {
+ VisitNEONExtract(instr);
+ }
+ } else {
+ if (instr->Bits(23, 22) == 0) {
+ VisitNEONCopy(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ if (instr->Bit(10) == 0) {
+ if (instr->Bit(11) == 0) {
+ VisitNEON3Different(instr);
+ } else {
+ if (instr->Bits(18, 17) == 0) {
+ if (instr->Bit(20) == 0) {
+ if (instr->Bit(19) == 0) {
+ VisitNEON2RegMisc(instr);
+ } else {
+ if (instr->Bits(30, 29) == 0x2) {
+ VisitCryptoAES(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ if (instr->Bit(19) == 0) {
+ VisitNEONAcrossLanes(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ VisitNEON3Same(instr);
+ }
+ }
+ } else {
+ if (instr->Bit(10) == 0) {
+ VisitNEONByIndexedElement(instr);
+ } else {
+ if (instr->Bit(23) == 0) {
+ if (instr->Bits(22, 19) == 0) {
+ VisitNEONModifiedImmediate(instr);
+ } else {
+ VisitNEONShiftImmediate(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+}
+
+
+void Decoder::DecodeNEONScalarDataProcessing(const Instruction* instr) {
+ VIXL_ASSERT(instr->Bits(28, 25) == 0xF);
+ if (instr->Bit(24) == 0) {
+ if (instr->Bit(21) == 0) {
+ if (instr->Bit(15) == 0) {
+ if (instr->Bit(10) == 0) {
+ if (instr->Bit(29) == 0) {
+ if (instr->Bit(11) == 0) {
+ VisitCrypto3RegSHA(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ if (instr->Bits(23, 22) == 0) {
+ VisitNEONScalarCopy(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ if (instr->Bit(10) == 0) {
+ if (instr->Bit(11) == 0) {
+ VisitNEONScalar3Diff(instr);
+ } else {
+ if (instr->Bits(18, 17) == 0) {
+ if (instr->Bit(20) == 0) {
+ if (instr->Bit(19) == 0) {
+ VisitNEONScalar2RegMisc(instr);
+ } else {
+ if (instr->Bit(29) == 0) {
+ VisitCrypto2RegSHA(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ if (instr->Bit(19) == 0) {
+ VisitNEONScalarPairwise(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ } else {
+ VisitNEONScalar3Same(instr);
+ }
+ }
+ } else {
+ if (instr->Bit(10) == 0) {
+ VisitNEONScalarByIndexedElement(instr);
+ } else {
+ if (instr->Bit(23) == 0) {
+ VisitNEONScalarShiftImmediate(instr);
+ } else {
+ VisitUnallocated(instr);
+ }
+ }
+ }
+}
+
+
+#define DEFINE_VISITOR_CALLERS(A) \
+ void Decoder::Visit##A(const Instruction *instr) { \
+ VIXL_ASSERT(instr->Mask(A##FMask) == A##Fixed); \
+ for (auto visitor : visitors_) { \
+ visitor->Visit##A(instr); \
+ } \
+ }
+VISITOR_LIST(DEFINE_VISITOR_CALLERS)
+#undef DEFINE_VISITOR_CALLERS
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Decoder-vixl.h b/js/src/jit/arm64/vixl/Decoder-vixl.h
new file mode 100644
index 0000000000..1b3cf172ac
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Decoder-vixl.h
@@ -0,0 +1,276 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_DECODER_A64_H_
+#define VIXL_A64_DECODER_A64_H_
+
+#include "mozilla/Vector.h"
+
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "js/AllocPolicy.h"
+
+
+// List macro containing all visitors needed by the decoder class.
+
+#define VISITOR_LIST_THAT_RETURN(V) \
+ V(PCRelAddressing) \
+ V(AddSubImmediate) \
+ V(LogicalImmediate) \
+ V(MoveWideImmediate) \
+ V(AtomicMemory) \
+ V(Bitfield) \
+ V(Extract) \
+ V(UnconditionalBranch) \
+ V(UnconditionalBranchToRegister) \
+ V(CompareBranch) \
+ V(TestBranch) \
+ V(ConditionalBranch) \
+ V(System) \
+ V(Exception) \
+ V(LoadStorePairPostIndex) \
+ V(LoadStorePairOffset) \
+ V(LoadStorePairPreIndex) \
+ V(LoadStorePairNonTemporal) \
+ V(LoadLiteral) \
+ V(LoadStoreUnscaledOffset) \
+ V(LoadStorePostIndex) \
+ V(LoadStorePreIndex) \
+ V(LoadStoreRegisterOffset) \
+ V(LoadStoreUnsignedOffset) \
+ V(LoadStoreExclusive) \
+ V(LogicalShifted) \
+ V(AddSubShifted) \
+ V(AddSubExtended) \
+ V(AddSubWithCarry) \
+ V(ConditionalCompareRegister) \
+ V(ConditionalCompareImmediate) \
+ V(ConditionalSelect) \
+ V(DataProcessing1Source) \
+ V(DataProcessing2Source) \
+ V(DataProcessing3Source) \
+ V(FPCompare) \
+ V(FPConditionalCompare) \
+ V(FPConditionalSelect) \
+ V(FPImmediate) \
+ V(FPDataProcessing1Source) \
+ V(FPDataProcessing2Source) \
+ V(FPDataProcessing3Source) \
+ V(FPIntegerConvert) \
+ V(FPFixedPointConvert) \
+ V(Crypto2RegSHA) \
+ V(Crypto3RegSHA) \
+ V(CryptoAES) \
+ V(NEON2RegMisc) \
+ V(NEON3Different) \
+ V(NEON3Same) \
+ V(NEONAcrossLanes) \
+ V(NEONByIndexedElement) \
+ V(NEONCopy) \
+ V(NEONExtract) \
+ V(NEONLoadStoreMultiStruct) \
+ V(NEONLoadStoreMultiStructPostIndex) \
+ V(NEONLoadStoreSingleStruct) \
+ V(NEONLoadStoreSingleStructPostIndex) \
+ V(NEONModifiedImmediate) \
+ V(NEONScalar2RegMisc) \
+ V(NEONScalar3Diff) \
+ V(NEONScalar3Same) \
+ V(NEONScalarByIndexedElement) \
+ V(NEONScalarCopy) \
+ V(NEONScalarPairwise) \
+ V(NEONScalarShiftImmediate) \
+ V(NEONShiftImmediate) \
+ V(NEONTable) \
+ V(NEONPerm)
+
+#define VISITOR_LIST_THAT_DONT_RETURN(V) \
+ V(Unallocated) \
+ V(Unimplemented) \
+
+#define VISITOR_LIST(V) \
+ VISITOR_LIST_THAT_RETURN(V) \
+ VISITOR_LIST_THAT_DONT_RETURN(V) \
+
+namespace vixl {
+
+// The Visitor interface. Disassembler and simulator (and other tools)
+// must provide implementations for all of these functions.
+class DecoderVisitor {
+ public:
+ enum VisitorConstness {
+ kConstVisitor,
+ kNonConstVisitor
+ };
+ explicit DecoderVisitor(VisitorConstness constness = kConstVisitor)
+ : constness_(constness) {}
+
+ virtual ~DecoderVisitor() {}
+
+ #define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0;
+ VISITOR_LIST(DECLARE)
+ #undef DECLARE
+
+ bool IsConstVisitor() const { return constness_ == kConstVisitor; }
+ Instruction* MutableInstruction(const Instruction* instr) {
+ VIXL_ASSERT(!IsConstVisitor());
+ return const_cast<Instruction*>(instr);
+ }
+
+ private:
+ const VisitorConstness constness_;
+};
+
+
+class Decoder {
+ public:
+ Decoder() {}
+
+ // Top-level wrappers around the actual decoding function.
+ void Decode(const Instruction* instr) {
+#ifdef DEBUG
+ for (auto visitor : visitors_) {
+ VIXL_ASSERT(visitor->IsConstVisitor());
+ }
+#endif
+ DecodeInstruction(instr);
+ }
+ void Decode(Instruction* instr) {
+ DecodeInstruction(const_cast<const Instruction*>(instr));
+ }
+
+ // Register a new visitor class with the decoder.
+ // Decode() will call the corresponding visitor method from all registered
+ // visitor classes when decoding reaches the leaf node of the instruction
+ // decode tree.
+ // Visitors are called in order.
+ // A visitor can be registered multiple times.
+ //
+ // d.AppendVisitor(V1);
+ // d.AppendVisitor(V2);
+ // d.PrependVisitor(V2);
+ // d.AppendVisitor(V3);
+ //
+ // d.Decode(i);
+ //
+ // will call in order visitor methods in V2, V1, V2, V3.
+ void AppendVisitor(DecoderVisitor* visitor);
+ void PrependVisitor(DecoderVisitor* visitor);
+ // These helpers register `new_visitor` before or after the first instance of
+ // `registered_visiter` in the list.
+ // So if
+ // V1, V2, V1, V2
+ // are registered in this order in the decoder, calls to
+ // d.InsertVisitorAfter(V3, V1);
+ // d.InsertVisitorBefore(V4, V2);
+ // will yield the order
+ // V1, V3, V4, V2, V1, V2
+ //
+ // For more complex modifications of the order of registered visitors, one can
+ // directly access and modify the list of visitors via the `visitors()'
+ // accessor.
+ void InsertVisitorBefore(DecoderVisitor* new_visitor,
+ DecoderVisitor* registered_visitor);
+ void InsertVisitorAfter(DecoderVisitor* new_visitor,
+ DecoderVisitor* registered_visitor);
+
+ // Remove all instances of a previously registered visitor class from the list
+ // of visitors stored by the decoder.
+ void RemoveVisitor(DecoderVisitor* visitor);
+
+ #define DECLARE(A) void Visit##A(const Instruction* instr);
+ VISITOR_LIST(DECLARE)
+ #undef DECLARE
+
+
+ private:
+ // Decodes an instruction and calls the visitor functions registered with the
+ // Decoder class.
+ void DecodeInstruction(const Instruction* instr);
+
+ // Decode the PC relative addressing instruction, and call the corresponding
+ // visitors.
+ // On entry, instruction bits 27:24 = 0x0.
+ void DecodePCRelAddressing(const Instruction* instr);
+
+ // Decode the add/subtract immediate instruction, and call the correspoding
+ // visitors.
+ // On entry, instruction bits 27:24 = 0x1.
+ void DecodeAddSubImmediate(const Instruction* instr);
+
+ // Decode the branch, system command, and exception generation parts of
+ // the instruction tree, and call the corresponding visitors.
+ // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
+ void DecodeBranchSystemException(const Instruction* instr);
+
+ // Decode the load and store parts of the instruction tree, and call
+ // the corresponding visitors.
+ // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
+ void DecodeLoadStore(const Instruction* instr);
+
+ // Decode the logical immediate and move wide immediate parts of the
+ // instruction tree, and call the corresponding visitors.
+ // On entry, instruction bits 27:24 = 0x2.
+ void DecodeLogical(const Instruction* instr);
+
+ // Decode the bitfield and extraction parts of the instruction tree,
+ // and call the corresponding visitors.
+ // On entry, instruction bits 27:24 = 0x3.
+ void DecodeBitfieldExtract(const Instruction* instr);
+
+ // Decode the data processing parts of the instruction tree, and call the
+ // corresponding visitors.
+ // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
+ void DecodeDataProcessing(const Instruction* instr);
+
+ // Decode the floating point parts of the instruction tree, and call the
+ // corresponding visitors.
+ // On entry, instruction bits 27:24 = {0xE, 0xF}.
+ void DecodeFP(const Instruction* instr);
+
+ // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
+ // and call the corresponding visitors.
+ // On entry, instruction bits 29:25 = 0x6.
+ void DecodeNEONLoadStore(const Instruction* instr);
+
+ // Decode the Advanced SIMD (NEON) vector data processing part of the
+ // instruction tree, and call the corresponding visitors.
+ // On entry, instruction bits 28:25 = 0x7.
+ void DecodeNEONVectorDataProcessing(const Instruction* instr);
+
+ // Decode the Advanced SIMD (NEON) scalar data processing part of the
+ // instruction tree, and call the corresponding visitors.
+ // On entry, instruction bits 28:25 = 0xF.
+ void DecodeNEONScalarDataProcessing(const Instruction* instr);
+
+ private:
+ // Visitors are registered in a list.
+ mozilla::Vector<DecoderVisitor*, 8, js::SystemAllocPolicy> visitors_;
+};
+
+} // namespace vixl
+
+#endif // VIXL_A64_DECODER_A64_H_
diff --git a/js/src/jit/arm64/vixl/Disasm-vixl.cpp b/js/src/jit/arm64/vixl/Disasm-vixl.cpp
new file mode 100644
index 0000000000..1116ebb67b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Disasm-vixl.cpp
@@ -0,0 +1,3741 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Disasm-vixl.h"
+
+#include "mozilla/Sprintf.h"
+#include <cstdlib>
+
+namespace vixl {
+
+Disassembler::Disassembler() {
+ buffer_size_ = 256;
+ buffer_ = reinterpret_cast<char*>(malloc(buffer_size_));
+ buffer_pos_ = 0;
+ own_buffer_ = true;
+ code_address_offset_ = 0;
+}
+
+
+Disassembler::Disassembler(char* text_buffer, int buffer_size) {
+ buffer_size_ = buffer_size;
+ buffer_ = text_buffer;
+ buffer_pos_ = 0;
+ own_buffer_ = false;
+ code_address_offset_ = 0;
+}
+
+
+Disassembler::~Disassembler() {
+ if (own_buffer_) {
+ free(buffer_);
+ }
+}
+
+
+char* Disassembler::GetOutput() {
+ return buffer_;
+}
+
+
+void Disassembler::VisitAddSubImmediate(const Instruction* instr) {
+ bool rd_is_zr = RdIsZROrSP(instr);
+ bool stack_op = (rd_is_zr || RnIsZROrSP(instr)) &&
+ (instr->ImmAddSub() == 0) ? true : false;
+ const char *mnemonic = "";
+ const char *form = "'Rds, 'Rns, 'IAddSub";
+ const char *form_cmp = "'Rns, 'IAddSub";
+ const char *form_mov = "'Rds, 'Rns";
+
+ switch (instr->Mask(AddSubImmediateMask)) {
+ case ADD_w_imm:
+ case ADD_x_imm: {
+ mnemonic = "add";
+ if (stack_op) {
+ mnemonic = "mov";
+ form = form_mov;
+ }
+ break;
+ }
+ case ADDS_w_imm:
+ case ADDS_x_imm: {
+ mnemonic = "adds";
+ if (rd_is_zr) {
+ mnemonic = "cmn";
+ form = form_cmp;
+ }
+ break;
+ }
+ case SUB_w_imm:
+ case SUB_x_imm: mnemonic = "sub"; break;
+ case SUBS_w_imm:
+ case SUBS_x_imm: {
+ mnemonic = "subs";
+ if (rd_is_zr) {
+ mnemonic = "cmp";
+ form = form_cmp;
+ }
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitAddSubShifted(const Instruction* instr) {
+ bool rd_is_zr = RdIsZROrSP(instr);
+ bool rn_is_zr = RnIsZROrSP(instr);
+ const char *mnemonic = "";
+ const char *form = "'Rd, 'Rn, 'Rm'NDP";
+ const char *form_cmp = "'Rn, 'Rm'NDP";
+ const char *form_neg = "'Rd, 'Rm'NDP";
+
+ switch (instr->Mask(AddSubShiftedMask)) {
+ case ADD_w_shift:
+ case ADD_x_shift: mnemonic = "add"; break;
+ case ADDS_w_shift:
+ case ADDS_x_shift: {
+ mnemonic = "adds";
+ if (rd_is_zr) {
+ mnemonic = "cmn";
+ form = form_cmp;
+ }
+ break;
+ }
+ case SUB_w_shift:
+ case SUB_x_shift: {
+ mnemonic = "sub";
+ if (rn_is_zr) {
+ mnemonic = "neg";
+ form = form_neg;
+ }
+ break;
+ }
+ case SUBS_w_shift:
+ case SUBS_x_shift: {
+ mnemonic = "subs";
+ if (rd_is_zr) {
+ mnemonic = "cmp";
+ form = form_cmp;
+ } else if (rn_is_zr) {
+ mnemonic = "negs";
+ form = form_neg;
+ }
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitAddSubExtended(const Instruction* instr) {
+ bool rd_is_zr = RdIsZROrSP(instr);
+ const char *mnemonic = "";
+ Extend mode = static_cast<Extend>(instr->ExtendMode());
+ const char *form = ((mode == UXTX) || (mode == SXTX)) ?
+ "'Rds, 'Rns, 'Xm'Ext" : "'Rds, 'Rns, 'Wm'Ext";
+ const char *form_cmp = ((mode == UXTX) || (mode == SXTX)) ?
+ "'Rns, 'Xm'Ext" : "'Rns, 'Wm'Ext";
+
+ switch (instr->Mask(AddSubExtendedMask)) {
+ case ADD_w_ext:
+ case ADD_x_ext: mnemonic = "add"; break;
+ case ADDS_w_ext:
+ case ADDS_x_ext: {
+ mnemonic = "adds";
+ if (rd_is_zr) {
+ mnemonic = "cmn";
+ form = form_cmp;
+ }
+ break;
+ }
+ case SUB_w_ext:
+ case SUB_x_ext: mnemonic = "sub"; break;
+ case SUBS_w_ext:
+ case SUBS_x_ext: {
+ mnemonic = "subs";
+ if (rd_is_zr) {
+ mnemonic = "cmp";
+ form = form_cmp;
+ }
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitAddSubWithCarry(const Instruction* instr) {
+ bool rn_is_zr = RnIsZROrSP(instr);
+ const char *mnemonic = "";
+ const char *form = "'Rd, 'Rn, 'Rm";
+ const char *form_neg = "'Rd, 'Rm";
+
+ switch (instr->Mask(AddSubWithCarryMask)) {
+ case ADC_w:
+ case ADC_x: mnemonic = "adc"; break;
+ case ADCS_w:
+ case ADCS_x: mnemonic = "adcs"; break;
+ case SBC_w:
+ case SBC_x: {
+ mnemonic = "sbc";
+ if (rn_is_zr) {
+ mnemonic = "ngc";
+ form = form_neg;
+ }
+ break;
+ }
+ case SBCS_w:
+ case SBCS_x: {
+ mnemonic = "sbcs";
+ if (rn_is_zr) {
+ mnemonic = "ngcs";
+ form = form_neg;
+ }
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLogicalImmediate(const Instruction* instr) {
+ bool rd_is_zr = RdIsZROrSP(instr);
+ bool rn_is_zr = RnIsZROrSP(instr);
+ const char *mnemonic = "";
+ const char *form = "'Rds, 'Rn, 'ITri";
+
+ if (instr->ImmLogical() == 0) {
+ // The immediate encoded in the instruction is not in the expected format.
+ Format(instr, "unallocated", "(LogicalImmediate)");
+ return;
+ }
+
+ switch (instr->Mask(LogicalImmediateMask)) {
+ case AND_w_imm:
+ case AND_x_imm: mnemonic = "and"; break;
+ case ORR_w_imm:
+ case ORR_x_imm: {
+ mnemonic = "orr";
+ unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize
+ : kWRegSize;
+ if (rn_is_zr && !IsMovzMovnImm(reg_size, instr->ImmLogical())) {
+ mnemonic = "mov";
+ form = "'Rds, 'ITri";
+ }
+ break;
+ }
+ case EOR_w_imm:
+ case EOR_x_imm: mnemonic = "eor"; break;
+ case ANDS_w_imm:
+ case ANDS_x_imm: {
+ mnemonic = "ands";
+ if (rd_is_zr) {
+ mnemonic = "tst";
+ form = "'Rn, 'ITri";
+ }
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+bool Disassembler::IsMovzMovnImm(unsigned reg_size, uint64_t value) {
+ VIXL_ASSERT((reg_size == kXRegSize) ||
+ ((reg_size == kWRegSize) && (value <= 0xffffffff)));
+
+ // Test for movz: 16 bits set at positions 0, 16, 32 or 48.
+ if (((value & UINT64_C(0xffffffffffff0000)) == 0) ||
+ ((value & UINT64_C(0xffffffff0000ffff)) == 0) ||
+ ((value & UINT64_C(0xffff0000ffffffff)) == 0) ||
+ ((value & UINT64_C(0x0000ffffffffffff)) == 0)) {
+ return true;
+ }
+
+ // Test for movn: NOT(16 bits set at positions 0, 16, 32 or 48).
+ if ((reg_size == kXRegSize) &&
+ (((~value & UINT64_C(0xffffffffffff0000)) == 0) ||
+ ((~value & UINT64_C(0xffffffff0000ffff)) == 0) ||
+ ((~value & UINT64_C(0xffff0000ffffffff)) == 0) ||
+ ((~value & UINT64_C(0x0000ffffffffffff)) == 0))) {
+ return true;
+ }
+ if ((reg_size == kWRegSize) &&
+ (((value & 0xffff0000) == 0xffff0000) ||
+ ((value & 0x0000ffff) == 0x0000ffff))) {
+ return true;
+ }
+ return false;
+}
+
+
+void Disassembler::VisitLogicalShifted(const Instruction* instr) {
+ bool rd_is_zr = RdIsZROrSP(instr);
+ bool rn_is_zr = RnIsZROrSP(instr);
+ const char *mnemonic = "";
+ const char *form = "'Rd, 'Rn, 'Rm'NLo";
+
+ switch (instr->Mask(LogicalShiftedMask)) {
+ case AND_w:
+ case AND_x: mnemonic = "and"; break;
+ case BIC_w:
+ case BIC_x: mnemonic = "bic"; break;
+ case EOR_w:
+ case EOR_x: mnemonic = "eor"; break;
+ case EON_w:
+ case EON_x: mnemonic = "eon"; break;
+ case BICS_w:
+ case BICS_x: mnemonic = "bics"; break;
+ case ANDS_w:
+ case ANDS_x: {
+ mnemonic = "ands";
+ if (rd_is_zr) {
+ mnemonic = "tst";
+ form = "'Rn, 'Rm'NLo";
+ }
+ break;
+ }
+ case ORR_w:
+ case ORR_x: {
+ mnemonic = "orr";
+ if (rn_is_zr && (instr->ImmDPShift() == 0) && (instr->ShiftDP() == LSL)) {
+ mnemonic = "mov";
+ form = "'Rd, 'Rm";
+ }
+ break;
+ }
+ case ORN_w:
+ case ORN_x: {
+ mnemonic = "orn";
+ if (rn_is_zr) {
+ mnemonic = "mvn";
+ form = "'Rd, 'Rm'NLo";
+ }
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitConditionalCompareRegister(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Rn, 'Rm, 'INzcv, 'Cond";
+
+ switch (instr->Mask(ConditionalCompareRegisterMask)) {
+ case CCMN_w:
+ case CCMN_x: mnemonic = "ccmn"; break;
+ case CCMP_w:
+ case CCMP_x: mnemonic = "ccmp"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitConditionalCompareImmediate(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Rn, 'IP, 'INzcv, 'Cond";
+
+ switch (instr->Mask(ConditionalCompareImmediateMask)) {
+ case CCMN_w_imm:
+ case CCMN_x_imm: mnemonic = "ccmn"; break;
+ case CCMP_w_imm:
+ case CCMP_x_imm: mnemonic = "ccmp"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitConditionalSelect(const Instruction* instr) {
+ bool rnm_is_zr = (RnIsZROrSP(instr) && RmIsZROrSP(instr));
+ bool rn_is_rm = (instr->Rn() == instr->Rm());
+ const char *mnemonic = "";
+ const char *form = "'Rd, 'Rn, 'Rm, 'Cond";
+ const char *form_test = "'Rd, 'CInv";
+ const char *form_update = "'Rd, 'Rn, 'CInv";
+
+ Condition cond = static_cast<Condition>(instr->Condition());
+ bool invertible_cond = (cond != al) && (cond != nv);
+
+ switch (instr->Mask(ConditionalSelectMask)) {
+ case CSEL_w:
+ case CSEL_x: mnemonic = "csel"; break;
+ case CSINC_w:
+ case CSINC_x: {
+ mnemonic = "csinc";
+ if (rnm_is_zr && invertible_cond) {
+ mnemonic = "cset";
+ form = form_test;
+ } else if (rn_is_rm && invertible_cond) {
+ mnemonic = "cinc";
+ form = form_update;
+ }
+ break;
+ }
+ case CSINV_w:
+ case CSINV_x: {
+ mnemonic = "csinv";
+ if (rnm_is_zr && invertible_cond) {
+ mnemonic = "csetm";
+ form = form_test;
+ } else if (rn_is_rm && invertible_cond) {
+ mnemonic = "cinv";
+ form = form_update;
+ }
+ break;
+ }
+ case CSNEG_w:
+ case CSNEG_x: {
+ mnemonic = "csneg";
+ if (rn_is_rm && invertible_cond) {
+ mnemonic = "cneg";
+ form = form_update;
+ }
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitBitfield(const Instruction* instr) {
+ unsigned s = instr->ImmS();
+ unsigned r = instr->ImmR();
+ unsigned rd_size_minus_1 =
+ ((instr->SixtyFourBits() == 1) ? kXRegSize : kWRegSize) - 1;
+ const char *mnemonic = "";
+ const char *form = "";
+ const char *form_shift_right = "'Rd, 'Rn, 'IBr";
+ const char *form_extend = "'Rd, 'Wn";
+ const char *form_bfiz = "'Rd, 'Rn, 'IBZ-r, 'IBs+1";
+ const char *form_bfx = "'Rd, 'Rn, 'IBr, 'IBs-r+1";
+ const char *form_lsl = "'Rd, 'Rn, 'IBZ-r";
+
+ switch (instr->Mask(BitfieldMask)) {
+ case SBFM_w:
+ case SBFM_x: {
+ mnemonic = "sbfx";
+ form = form_bfx;
+ if (r == 0) {
+ form = form_extend;
+ if (s == 7) {
+ mnemonic = "sxtb";
+ } else if (s == 15) {
+ mnemonic = "sxth";
+ } else if ((s == 31) && (instr->SixtyFourBits() == 1)) {
+ mnemonic = "sxtw";
+ } else {
+ form = form_bfx;
+ }
+ } else if (s == rd_size_minus_1) {
+ mnemonic = "asr";
+ form = form_shift_right;
+ } else if (s < r) {
+ mnemonic = "sbfiz";
+ form = form_bfiz;
+ }
+ break;
+ }
+ case UBFM_w:
+ case UBFM_x: {
+ mnemonic = "ubfx";
+ form = form_bfx;
+ if (r == 0) {
+ form = form_extend;
+ if (s == 7) {
+ mnemonic = "uxtb";
+ } else if (s == 15) {
+ mnemonic = "uxth";
+ } else {
+ form = form_bfx;
+ }
+ }
+ if (s == rd_size_minus_1) {
+ mnemonic = "lsr";
+ form = form_shift_right;
+ } else if (r == s + 1) {
+ mnemonic = "lsl";
+ form = form_lsl;
+ } else if (s < r) {
+ mnemonic = "ubfiz";
+ form = form_bfiz;
+ }
+ break;
+ }
+ case BFM_w:
+ case BFM_x: {
+ mnemonic = "bfxil";
+ form = form_bfx;
+ if (s < r) {
+ mnemonic = "bfi";
+ form = form_bfiz;
+ }
+ }
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitExtract(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Rd, 'Rn, 'Rm, 'IExtract";
+
+ switch (instr->Mask(ExtractMask)) {
+ case EXTR_w:
+ case EXTR_x: {
+ if (instr->Rn() == instr->Rm()) {
+ mnemonic = "ror";
+ form = "'Rd, 'Rn, 'IExtract";
+ } else {
+ mnemonic = "extr";
+ }
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitPCRelAddressing(const Instruction* instr) {
+ switch (instr->Mask(PCRelAddressingMask)) {
+ case ADR: Format(instr, "adr", "'Xd, 'AddrPCRelByte"); break;
+ case ADRP: Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); break;
+ default: Format(instr, "unimplemented", "(PCRelAddressing)");
+ }
+}
+
+
+void Disassembler::VisitConditionalBranch(const Instruction* instr) {
+ switch (instr->Mask(ConditionalBranchMask)) {
+ case B_cond: Format(instr, "b.'CBrn", "'TImmCond"); break;
+ default: VIXL_UNREACHABLE();
+ }
+}
+
+
+void Disassembler::VisitUnconditionalBranchToRegister(
+ const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Xn";
+
+ switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
+ case BR: mnemonic = "br"; break;
+ case BLR: mnemonic = "blr"; break;
+ case RET: {
+ mnemonic = "ret";
+ if (instr->Rn() == kLinkRegCode) {
+ form = NULL;
+ }
+ break;
+ }
+ default: form = "(UnconditionalBranchToRegister)";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitUnconditionalBranch(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'TImmUncn";
+
+ switch (instr->Mask(UnconditionalBranchMask)) {
+ case B: mnemonic = "b"; break;
+ case BL: mnemonic = "bl"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitDataProcessing1Source(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Rd, 'Rn";
+
+ switch (instr->Mask(DataProcessing1SourceMask)) {
+ #define FORMAT(A, B) \
+ case A##_w: \
+ case A##_x: mnemonic = B; break;
+ FORMAT(RBIT, "rbit");
+ FORMAT(REV16, "rev16");
+ FORMAT(REV, "rev");
+ FORMAT(CLZ, "clz");
+ FORMAT(CLS, "cls");
+ #undef FORMAT
+ case REV32_x: mnemonic = "rev32"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitDataProcessing2Source(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Rd, 'Rn, 'Rm";
+ const char *form_wwx = "'Wd, 'Wn, 'Xm";
+
+ switch (instr->Mask(DataProcessing2SourceMask)) {
+ #define FORMAT(A, B) \
+ case A##_w: \
+ case A##_x: mnemonic = B; break;
+ FORMAT(UDIV, "udiv");
+ FORMAT(SDIV, "sdiv");
+ FORMAT(LSLV, "lsl");
+ FORMAT(LSRV, "lsr");
+ FORMAT(ASRV, "asr");
+ FORMAT(RORV, "ror");
+ #undef FORMAT
+ case CRC32B: mnemonic = "crc32b"; break;
+ case CRC32H: mnemonic = "crc32h"; break;
+ case CRC32W: mnemonic = "crc32w"; break;
+ case CRC32X: mnemonic = "crc32x"; form = form_wwx; break;
+ case CRC32CB: mnemonic = "crc32cb"; break;
+ case CRC32CH: mnemonic = "crc32ch"; break;
+ case CRC32CW: mnemonic = "crc32cw"; break;
+ case CRC32CX: mnemonic = "crc32cx"; form = form_wwx; break;
+ default: form = "(DataProcessing2Source)";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitDataProcessing3Source(const Instruction* instr) {
+ bool ra_is_zr = RaIsZROrSP(instr);
+ const char *mnemonic = "";
+ const char *form = "'Xd, 'Wn, 'Wm, 'Xa";
+ const char *form_rrr = "'Rd, 'Rn, 'Rm";
+ const char *form_rrrr = "'Rd, 'Rn, 'Rm, 'Ra";
+ const char *form_xww = "'Xd, 'Wn, 'Wm";
+ const char *form_xxx = "'Xd, 'Xn, 'Xm";
+
+ switch (instr->Mask(DataProcessing3SourceMask)) {
+ case MADD_w:
+ case MADD_x: {
+ mnemonic = "madd";
+ form = form_rrrr;
+ if (ra_is_zr) {
+ mnemonic = "mul";
+ form = form_rrr;
+ }
+ break;
+ }
+ case MSUB_w:
+ case MSUB_x: {
+ mnemonic = "msub";
+ form = form_rrrr;
+ if (ra_is_zr) {
+ mnemonic = "mneg";
+ form = form_rrr;
+ }
+ break;
+ }
+ case SMADDL_x: {
+ mnemonic = "smaddl";
+ if (ra_is_zr) {
+ mnemonic = "smull";
+ form = form_xww;
+ }
+ break;
+ }
+ case SMSUBL_x: {
+ mnemonic = "smsubl";
+ if (ra_is_zr) {
+ mnemonic = "smnegl";
+ form = form_xww;
+ }
+ break;
+ }
+ case UMADDL_x: {
+ mnemonic = "umaddl";
+ if (ra_is_zr) {
+ mnemonic = "umull";
+ form = form_xww;
+ }
+ break;
+ }
+ case UMSUBL_x: {
+ mnemonic = "umsubl";
+ if (ra_is_zr) {
+ mnemonic = "umnegl";
+ form = form_xww;
+ }
+ break;
+ }
+ case SMULH_x: {
+ mnemonic = "smulh";
+ form = form_xxx;
+ break;
+ }
+ case UMULH_x: {
+ mnemonic = "umulh";
+ form = form_xxx;
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitCompareBranch(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Rt, 'TImmCmpa";
+
+ switch (instr->Mask(CompareBranchMask)) {
+ case CBZ_w:
+ case CBZ_x: mnemonic = "cbz"; break;
+ case CBNZ_w:
+ case CBNZ_x: mnemonic = "cbnz"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitTestBranch(const Instruction* instr) {
+ const char *mnemonic = "";
+ // If the top bit of the immediate is clear, the tested register is
+ // disassembled as Wt, otherwise Xt. As the top bit of the immediate is
+ // encoded in bit 31 of the instruction, we can reuse the Rt form, which
+ // uses bit 31 (normally "sf") to choose the register size.
+ const char *form = "'Rt, 'IS, 'TImmTest";
+
+ switch (instr->Mask(TestBranchMask)) {
+ case TBZ: mnemonic = "tbz"; break;
+ case TBNZ: mnemonic = "tbnz"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitMoveWideImmediate(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Rd, 'IMoveImm";
+
+ // Print the shift separately for movk, to make it clear which half word will
+ // be overwritten. Movn and movz print the computed immediate, which includes
+ // shift calculation.
+ switch (instr->Mask(MoveWideImmediateMask)) {
+ case MOVN_w:
+ case MOVN_x:
+ if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0)) {
+ if ((instr->SixtyFourBits() == 0) && (instr->ImmMoveWide() == 0xffff)) {
+ mnemonic = "movn";
+ } else {
+ mnemonic = "mov";
+ form = "'Rd, 'IMoveNeg";
+ }
+ } else {
+ mnemonic = "movn";
+ }
+ break;
+ case MOVZ_w:
+ case MOVZ_x:
+ if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0))
+ mnemonic = "mov";
+ else
+ mnemonic = "movz";
+ break;
+ case MOVK_w:
+ case MOVK_x: mnemonic = "movk"; form = "'Rd, 'IMoveLSL"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+#define LOAD_STORE_LIST(V) \
+ V(STRB_w, "strb", "'Wt") \
+ V(STRH_w, "strh", "'Wt") \
+ V(STR_w, "str", "'Wt") \
+ V(STR_x, "str", "'Xt") \
+ V(LDRB_w, "ldrb", "'Wt") \
+ V(LDRH_w, "ldrh", "'Wt") \
+ V(LDR_w, "ldr", "'Wt") \
+ V(LDR_x, "ldr", "'Xt") \
+ V(LDRSB_x, "ldrsb", "'Xt") \
+ V(LDRSH_x, "ldrsh", "'Xt") \
+ V(LDRSW_x, "ldrsw", "'Xt") \
+ V(LDRSB_w, "ldrsb", "'Wt") \
+ V(LDRSH_w, "ldrsh", "'Wt") \
+ V(STR_b, "str", "'Bt") \
+ V(STR_h, "str", "'Ht") \
+ V(STR_s, "str", "'St") \
+ V(STR_d, "str", "'Dt") \
+ V(LDR_b, "ldr", "'Bt") \
+ V(LDR_h, "ldr", "'Ht") \
+ V(LDR_s, "ldr", "'St") \
+ V(LDR_d, "ldr", "'Dt") \
+ V(STR_q, "str", "'Qt") \
+ V(LDR_q, "ldr", "'Qt")
+
+void Disassembler::VisitLoadStorePreIndex(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(LoadStorePreIndex)";
+
+ switch (instr->Mask(LoadStorePreIndexMask)) {
+ #define LS_PREINDEX(A, B, C) \
+ case A##_pre: mnemonic = B; form = C ", ['Xns'ILS]!"; break;
+ LOAD_STORE_LIST(LS_PREINDEX)
+ #undef LS_PREINDEX
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStorePostIndex(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(LoadStorePostIndex)";
+
+ switch (instr->Mask(LoadStorePostIndexMask)) {
+ #define LS_POSTINDEX(A, B, C) \
+ case A##_post: mnemonic = B; form = C ", ['Xns]'ILS"; break;
+ LOAD_STORE_LIST(LS_POSTINDEX)
+ #undef LS_POSTINDEX
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(LoadStoreUnsignedOffset)";
+
+ switch (instr->Mask(LoadStoreUnsignedOffsetMask)) {
+ #define LS_UNSIGNEDOFFSET(A, B, C) \
+ case A##_unsigned: mnemonic = B; form = C ", ['Xns'ILU]"; break;
+ LOAD_STORE_LIST(LS_UNSIGNEDOFFSET)
+ #undef LS_UNSIGNEDOFFSET
+ case PRFM_unsigned: mnemonic = "prfm"; form = "'PrefOp, ['Xns'ILU]";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStoreRegisterOffset(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(LoadStoreRegisterOffset)";
+
+ switch (instr->Mask(LoadStoreRegisterOffsetMask)) {
+ #define LS_REGISTEROFFSET(A, B, C) \
+ case A##_reg: mnemonic = B; form = C ", ['Xns, 'Offsetreg]"; break;
+ LOAD_STORE_LIST(LS_REGISTEROFFSET)
+ #undef LS_REGISTEROFFSET
+ case PRFM_reg: mnemonic = "prfm"; form = "'PrefOp, ['Xns, 'Offsetreg]";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Wt, ['Xns'ILS]";
+ const char *form_x = "'Xt, ['Xns'ILS]";
+ const char *form_b = "'Bt, ['Xns'ILS]";
+ const char *form_h = "'Ht, ['Xns'ILS]";
+ const char *form_s = "'St, ['Xns'ILS]";
+ const char *form_d = "'Dt, ['Xns'ILS]";
+ const char *form_q = "'Qt, ['Xns'ILS]";
+ const char *form_prefetch = "'PrefOp, ['Xns'ILS]";
+
+ switch (instr->Mask(LoadStoreUnscaledOffsetMask)) {
+ case STURB_w: mnemonic = "sturb"; break;
+ case STURH_w: mnemonic = "sturh"; break;
+ case STUR_w: mnemonic = "stur"; break;
+ case STUR_x: mnemonic = "stur"; form = form_x; break;
+ case STUR_b: mnemonic = "stur"; form = form_b; break;
+ case STUR_h: mnemonic = "stur"; form = form_h; break;
+ case STUR_s: mnemonic = "stur"; form = form_s; break;
+ case STUR_d: mnemonic = "stur"; form = form_d; break;
+ case STUR_q: mnemonic = "stur"; form = form_q; break;
+ case LDURB_w: mnemonic = "ldurb"; break;
+ case LDURH_w: mnemonic = "ldurh"; break;
+ case LDUR_w: mnemonic = "ldur"; break;
+ case LDUR_x: mnemonic = "ldur"; form = form_x; break;
+ case LDUR_b: mnemonic = "ldur"; form = form_b; break;
+ case LDUR_h: mnemonic = "ldur"; form = form_h; break;
+ case LDUR_s: mnemonic = "ldur"; form = form_s; break;
+ case LDUR_d: mnemonic = "ldur"; form = form_d; break;
+ case LDUR_q: mnemonic = "ldur"; form = form_q; break;
+ case LDURSB_x: form = form_x; VIXL_FALLTHROUGH();
+ case LDURSB_w: mnemonic = "ldursb"; break;
+ case LDURSH_x: form = form_x; VIXL_FALLTHROUGH();
+ case LDURSH_w: mnemonic = "ldursh"; break;
+ case LDURSW_x: mnemonic = "ldursw"; form = form_x; break;
+ case PRFUM: mnemonic = "prfum"; form = form_prefetch; break;
+ default: form = "(LoadStoreUnscaledOffset)";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadLiteral(const Instruction* instr) {
+ const char *mnemonic = "ldr";
+ const char *form = "(LoadLiteral)";
+
+ switch (instr->Mask(LoadLiteralMask)) {
+ case LDR_w_lit: form = "'Wt, 'ILLiteral 'LValue"; break;
+ case LDR_x_lit: form = "'Xt, 'ILLiteral 'LValue"; break;
+ case LDR_s_lit: form = "'St, 'ILLiteral 'LValue"; break;
+ case LDR_d_lit: form = "'Dt, 'ILLiteral 'LValue"; break;
+ case LDR_q_lit: form = "'Qt, 'ILLiteral 'LValue"; break;
+ case LDRSW_x_lit: {
+ mnemonic = "ldrsw";
+ form = "'Xt, 'ILLiteral 'LValue";
+ break;
+ }
+ case PRFM_lit: {
+ mnemonic = "prfm";
+ form = "'PrefOp, 'ILLiteral 'LValue";
+ break;
+ }
+ default: mnemonic = "unimplemented";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+#define LOAD_STORE_PAIR_LIST(V) \
+ V(STP_w, "stp", "'Wt, 'Wt2", "2") \
+ V(LDP_w, "ldp", "'Wt, 'Wt2", "2") \
+ V(LDPSW_x, "ldpsw", "'Xt, 'Xt2", "2") \
+ V(STP_x, "stp", "'Xt, 'Xt2", "3") \
+ V(LDP_x, "ldp", "'Xt, 'Xt2", "3") \
+ V(STP_s, "stp", "'St, 'St2", "2") \
+ V(LDP_s, "ldp", "'St, 'St2", "2") \
+ V(STP_d, "stp", "'Dt, 'Dt2", "3") \
+ V(LDP_d, "ldp", "'Dt, 'Dt2", "3") \
+ V(LDP_q, "ldp", "'Qt, 'Qt2", "4") \
+ V(STP_q, "stp", "'Qt, 'Qt2", "4")
+
+void Disassembler::VisitLoadStorePairPostIndex(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(LoadStorePairPostIndex)";
+
+ switch (instr->Mask(LoadStorePairPostIndexMask)) {
+ #define LSP_POSTINDEX(A, B, C, D) \
+ case A##_post: mnemonic = B; form = C ", ['Xns]'ILP" D; break;
+ LOAD_STORE_PAIR_LIST(LSP_POSTINDEX)
+ #undef LSP_POSTINDEX
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStorePairPreIndex(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(LoadStorePairPreIndex)";
+
+ switch (instr->Mask(LoadStorePairPreIndexMask)) {
+ #define LSP_PREINDEX(A, B, C, D) \
+ case A##_pre: mnemonic = B; form = C ", ['Xns'ILP" D "]!"; break;
+ LOAD_STORE_PAIR_LIST(LSP_PREINDEX)
+ #undef LSP_PREINDEX
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStorePairOffset(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(LoadStorePairOffset)";
+
+ switch (instr->Mask(LoadStorePairOffsetMask)) {
+ #define LSP_OFFSET(A, B, C, D) \
+ case A##_off: mnemonic = B; form = C ", ['Xns'ILP" D "]"; break;
+ LOAD_STORE_PAIR_LIST(LSP_OFFSET)
+ #undef LSP_OFFSET
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitLoadStorePairNonTemporal(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form;
+
+ switch (instr->Mask(LoadStorePairNonTemporalMask)) {
+ case STNP_w: mnemonic = "stnp"; form = "'Wt, 'Wt2, ['Xns'ILP2]"; break;
+ case LDNP_w: mnemonic = "ldnp"; form = "'Wt, 'Wt2, ['Xns'ILP2]"; break;
+ case STNP_x: mnemonic = "stnp"; form = "'Xt, 'Xt2, ['Xns'ILP3]"; break;
+ case LDNP_x: mnemonic = "ldnp"; form = "'Xt, 'Xt2, ['Xns'ILP3]"; break;
+ case STNP_s: mnemonic = "stnp"; form = "'St, 'St2, ['Xns'ILP2]"; break;
+ case LDNP_s: mnemonic = "ldnp"; form = "'St, 'St2, ['Xns'ILP2]"; break;
+ case STNP_d: mnemonic = "stnp"; form = "'Dt, 'Dt2, ['Xns'ILP3]"; break;
+ case LDNP_d: mnemonic = "ldnp"; form = "'Dt, 'Dt2, ['Xns'ILP3]"; break;
+ case STNP_q: mnemonic = "stnp"; form = "'Qt, 'Qt2, ['Xns'ILP4]"; break;
+ case LDNP_q: mnemonic = "ldnp"; form = "'Qt, 'Qt2, ['Xns'ILP4]"; break;
+ default: form = "(LoadStorePairNonTemporal)";
+ }
+ Format(instr, mnemonic, form);
+}
+
+// clang-format off
+#define LOAD_STORE_EXCLUSIVE_LIST(V) \
+ V(STXRB_w, "stxrb", "'Ws, 'Wt") \
+ V(STXRH_w, "stxrh", "'Ws, 'Wt") \
+ V(STXR_w, "stxr", "'Ws, 'Wt") \
+ V(STXR_x, "stxr", "'Ws, 'Xt") \
+ V(LDXRB_w, "ldxrb", "'Wt") \
+ V(LDXRH_w, "ldxrh", "'Wt") \
+ V(LDXR_w, "ldxr", "'Wt") \
+ V(LDXR_x, "ldxr", "'Xt") \
+ V(STXP_w, "stxp", "'Ws, 'Wt, 'Wt2") \
+ V(STXP_x, "stxp", "'Ws, 'Xt, 'Xt2") \
+ V(LDXP_w, "ldxp", "'Wt, 'Wt2") \
+ V(LDXP_x, "ldxp", "'Xt, 'Xt2") \
+ V(STLXRB_w, "stlxrb", "'Ws, 'Wt") \
+ V(STLXRH_w, "stlxrh", "'Ws, 'Wt") \
+ V(STLXR_w, "stlxr", "'Ws, 'Wt") \
+ V(STLXR_x, "stlxr", "'Ws, 'Xt") \
+ V(LDAXRB_w, "ldaxrb", "'Wt") \
+ V(LDAXRH_w, "ldaxrh", "'Wt") \
+ V(LDAXR_w, "ldaxr", "'Wt") \
+ V(LDAXR_x, "ldaxr", "'Xt") \
+ V(STLXP_w, "stlxp", "'Ws, 'Wt, 'Wt2") \
+ V(STLXP_x, "stlxp", "'Ws, 'Xt, 'Xt2") \
+ V(LDAXP_w, "ldaxp", "'Wt, 'Wt2") \
+ V(LDAXP_x, "ldaxp", "'Xt, 'Xt2") \
+ V(STLRB_w, "stlrb", "'Wt") \
+ V(STLRH_w, "stlrh", "'Wt") \
+ V(STLR_w, "stlr", "'Wt") \
+ V(STLR_x, "stlr", "'Xt") \
+ V(LDARB_w, "ldarb", "'Wt") \
+ V(LDARH_w, "ldarh", "'Wt") \
+ V(LDAR_w, "ldar", "'Wt") \
+ V(LDAR_x, "ldar", "'Xt") \
+ V(CAS_w, "cas", "'Ws, 'Wt") \
+ V(CAS_x, "cas", "'Xs, 'Xt") \
+ V(CASA_w, "casa", "'Ws, 'Wt") \
+ V(CASA_x, "casa", "'Xs, 'Xt") \
+ V(CASL_w, "casl", "'Ws, 'Wt") \
+ V(CASL_x, "casl", "'Xs, 'Xt") \
+ V(CASAL_w, "casal", "'Ws, 'Wt") \
+ V(CASAL_x, "casal", "'Xs, 'Xt") \
+ V(CASB, "casb", "'Ws, 'Wt") \
+ V(CASAB, "casab", "'Ws, 'Wt") \
+ V(CASLB, "caslb", "'Ws, 'Wt") \
+ V(CASALB, "casalb", "'Ws, 'Wt") \
+ V(CASH, "cash", "'Ws, 'Wt") \
+ V(CASAH, "casah", "'Ws, 'Wt") \
+ V(CASLH, "caslh", "'Ws, 'Wt") \
+ V(CASALH, "casalh", "'Ws, 'Wt") \
+ V(CASP_w, "casp", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
+ V(CASP_x, "casp", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
+ V(CASPA_w, "caspa", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
+ V(CASPA_x, "caspa", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
+ V(CASPL_w, "caspl", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
+ V(CASPL_x, "caspl", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
+ V(CASPAL_w, "caspal", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
+ V(CASPAL_x, "caspal", "'Xs, 'X(s+1), 'Xt, 'X(t+1)")
+// clang-format on
+
+void Disassembler::VisitLoadStoreExclusive(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form;
+
+ switch (instr->Mask(LoadStoreExclusiveMask)) {
+#define LSX(A, B, C) \
+ case A: \
+ mnemonic = B; \
+ form = C ", ['Xns]"; \
+ break;
+ LOAD_STORE_EXCLUSIVE_LIST(LSX)
+#undef LSX
+ default:
+ form = "(LoadStoreExclusive)";
+ }
+
+ switch (instr->Mask(LoadStoreExclusiveMask)) {
+ case CASP_w:
+ case CASP_x:
+ case CASPA_w:
+ case CASPA_x:
+ case CASPL_w:
+ case CASPL_x:
+ case CASPAL_w:
+ case CASPAL_x:
+ if ((instr->Rs() % 2 == 1) || (instr->Rt() % 2 == 1)) {
+ mnemonic = "unallocated";
+ form = "(LoadStoreExclusive)";
+ }
+ break;
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+#define ATOMIC_MEMORY_SIMPLE_LIST(V) \
+ V(LDADD, "add") \
+ V(LDCLR, "clr") \
+ V(LDEOR, "eor") \
+ V(LDSET, "set") \
+ V(LDSMAX, "smax") \
+ V(LDSMIN, "smin") \
+ V(LDUMAX, "umax") \
+ V(LDUMIN, "umin")
+
+void Disassembler::VisitAtomicMemory(const Instruction* instr) {
+ const int kMaxAtomicOpMnemonicLength = 16;
+ const char* mnemonic;
+ const char* form = "'Ws, 'Wt, ['Xns]";
+
+ switch (instr->Mask(AtomicMemoryMask)) {
+#define AMS(A, MN) \
+ case A##B: \
+ mnemonic = MN "b"; \
+ break; \
+ case A##AB: \
+ mnemonic = MN "ab"; \
+ break; \
+ case A##LB: \
+ mnemonic = MN "lb"; \
+ break; \
+ case A##ALB: \
+ mnemonic = MN "alb"; \
+ break; \
+ case A##H: \
+ mnemonic = MN "h"; \
+ break; \
+ case A##AH: \
+ mnemonic = MN "ah"; \
+ break; \
+ case A##LH: \
+ mnemonic = MN "lh"; \
+ break; \
+ case A##ALH: \
+ mnemonic = MN "alh"; \
+ break; \
+ case A##_w: \
+ mnemonic = MN; \
+ break; \
+ case A##A_w: \
+ mnemonic = MN "a"; \
+ break; \
+ case A##L_w: \
+ mnemonic = MN "l"; \
+ break; \
+ case A##AL_w: \
+ mnemonic = MN "al"; \
+ break; \
+ case A##_x: \
+ mnemonic = MN; \
+ form = "'Xs, 'Xt, ['Xns]"; \
+ break; \
+ case A##A_x: \
+ mnemonic = MN "a"; \
+ form = "'Xs, 'Xt, ['Xns]"; \
+ break; \
+ case A##L_x: \
+ mnemonic = MN "l"; \
+ form = "'Xs, 'Xt, ['Xns]"; \
+ break; \
+ case A##AL_x: \
+ mnemonic = MN "al"; \
+ form = "'Xs, 'Xt, ['Xns]"; \
+ break;
+ ATOMIC_MEMORY_SIMPLE_LIST(AMS)
+
+ // SWP has the same semantics as ldadd etc but without the store aliases.
+ AMS(SWP, "swp")
+#undef AMS
+
+ case LDAPRB:
+ mnemonic = "ldaprb";
+ form = "'Wt, ['Xns]";
+ break;
+ case LDAPRH:
+ mnemonic = "ldaprh";
+ form = "'Wt, ['Xns]";
+ break;
+ case LDAPR_w:
+ mnemonic = "ldapr";
+ form = "'Wt, ['Xns]";
+ break;
+ case LDAPR_x:
+ mnemonic = "ldapr";
+ form = "'Xt, ['Xns]";
+ break;
+ default:
+ mnemonic = "unimplemented";
+ form = "(AtomicMemory)";
+ }
+
+ const char* prefix = "";
+ switch (instr->Mask(AtomicMemoryMask)) {
+#define AMS(A, MN) \
+ case A##AB: \
+ case A##ALB: \
+ case A##AH: \
+ case A##ALH: \
+ case A##A_w: \
+ case A##AL_w: \
+ case A##A_x: \
+ case A##AL_x: \
+ prefix = "ld"; \
+ break; \
+ case A##B: \
+ case A##LB: \
+ case A##H: \
+ case A##LH: \
+ case A##_w: \
+ case A##L_w: { \
+ prefix = "ld"; \
+ unsigned rt = instr->Rt(); \
+ if (Register(rt, 32).IsZero()) { \
+ prefix = "st"; \
+ form = "'Ws, ['Xns]"; \
+ } \
+ break; \
+ } \
+ case A##_x: \
+ case A##L_x: { \
+ prefix = "ld"; \
+ unsigned rt = instr->Rt(); \
+ if (Register(rt, 64).IsZero()) { \
+ prefix = "st"; \
+ form = "'Xs, ['Xns]"; \
+ } \
+ break; \
+ }
+ ATOMIC_MEMORY_SIMPLE_LIST(AMS)
+#undef AMS
+ }
+
+ char buffer[kMaxAtomicOpMnemonicLength];
+ if (strlen(prefix) > 0) {
+ snprintf(buffer, kMaxAtomicOpMnemonicLength, "%s%s", prefix, mnemonic);
+ mnemonic = buffer;
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitFPCompare(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Fn, 'Fm";
+ const char *form_zero = "'Fn, #0.0";
+
+ switch (instr->Mask(FPCompareMask)) {
+ case FCMP_s_zero:
+ case FCMP_d_zero: form = form_zero; VIXL_FALLTHROUGH();
+ case FCMP_s:
+ case FCMP_d: mnemonic = "fcmp"; break;
+ case FCMPE_s_zero:
+ case FCMPE_d_zero: form = form_zero; VIXL_FALLTHROUGH();
+ case FCMPE_s:
+ case FCMPE_d: mnemonic = "fcmpe"; break;
+ default: form = "(FPCompare)";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPConditionalCompare(const Instruction* instr) {
+ const char *mnemonic = "unmplemented";
+ const char *form = "'Fn, 'Fm, 'INzcv, 'Cond";
+
+ switch (instr->Mask(FPConditionalCompareMask)) {
+ case FCCMP_s:
+ case FCCMP_d: mnemonic = "fccmp"; break;
+ case FCCMPE_s:
+ case FCCMPE_d: mnemonic = "fccmpe"; break;
+ default: form = "(FPConditionalCompare)";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPConditionalSelect(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Fd, 'Fn, 'Fm, 'Cond";
+
+ switch (instr->Mask(FPConditionalSelectMask)) {
+ case FCSEL_s:
+ case FCSEL_d: mnemonic = "fcsel"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPDataProcessing1Source(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Fd, 'Fn";
+
+ switch (instr->Mask(FPDataProcessing1SourceMask)) {
+ #define FORMAT(A, B) \
+ case A##_s: \
+ case A##_d: mnemonic = B; break;
+ FORMAT(FMOV, "fmov");
+ FORMAT(FABS, "fabs");
+ FORMAT(FNEG, "fneg");
+ FORMAT(FSQRT, "fsqrt");
+ FORMAT(FRINTN, "frintn");
+ FORMAT(FRINTP, "frintp");
+ FORMAT(FRINTM, "frintm");
+ FORMAT(FRINTZ, "frintz");
+ FORMAT(FRINTA, "frinta");
+ FORMAT(FRINTX, "frintx");
+ FORMAT(FRINTI, "frinti");
+ #undef FORMAT
+ case FCVT_ds: mnemonic = "fcvt"; form = "'Dd, 'Sn"; break;
+ case FCVT_sd: mnemonic = "fcvt"; form = "'Sd, 'Dn"; break;
+ case FCVT_hs: mnemonic = "fcvt"; form = "'Hd, 'Sn"; break;
+ case FCVT_sh: mnemonic = "fcvt"; form = "'Sd, 'Hn"; break;
+ case FCVT_dh: mnemonic = "fcvt"; form = "'Dd, 'Hn"; break;
+ case FCVT_hd: mnemonic = "fcvt"; form = "'Hd, 'Dn"; break;
+ default: form = "(FPDataProcessing1Source)";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPDataProcessing2Source(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Fd, 'Fn, 'Fm";
+
+ switch (instr->Mask(FPDataProcessing2SourceMask)) {
+ #define FORMAT(A, B) \
+ case A##_s: \
+ case A##_d: mnemonic = B; break;
+ FORMAT(FMUL, "fmul");
+ FORMAT(FDIV, "fdiv");
+ FORMAT(FADD, "fadd");
+ FORMAT(FSUB, "fsub");
+ FORMAT(FMAX, "fmax");
+ FORMAT(FMIN, "fmin");
+ FORMAT(FMAXNM, "fmaxnm");
+ FORMAT(FMINNM, "fminnm");
+ FORMAT(FNMUL, "fnmul");
+ #undef FORMAT
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPDataProcessing3Source(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Fd, 'Fn, 'Fm, 'Fa";
+
+ switch (instr->Mask(FPDataProcessing3SourceMask)) {
+ #define FORMAT(A, B) \
+ case A##_s: \
+ case A##_d: mnemonic = B; break;
+ FORMAT(FMADD, "fmadd");
+ FORMAT(FMSUB, "fmsub");
+ FORMAT(FNMADD, "fnmadd");
+ FORMAT(FNMSUB, "fnmsub");
+ #undef FORMAT
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPImmediate(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "(FPImmediate)";
+
+ switch (instr->Mask(FPImmediateMask)) {
+ case FMOV_s_imm: mnemonic = "fmov"; form = "'Sd, 'IFPSingle"; break;
+ case FMOV_d_imm: mnemonic = "fmov"; form = "'Dd, 'IFPDouble"; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPIntegerConvert(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(FPIntegerConvert)";
+ const char *form_rf = "'Rd, 'Fn";
+ const char *form_fr = "'Fd, 'Rn";
+
+ switch (instr->Mask(FPIntegerConvertMask)) {
+ case FMOV_ws:
+ case FMOV_xd: mnemonic = "fmov"; form = form_rf; break;
+ case FMOV_sw:
+ case FMOV_dx: mnemonic = "fmov"; form = form_fr; break;
+ case FMOV_d1_x: mnemonic = "fmov"; form = "'Vd.D[1], 'Rn"; break;
+ case FMOV_x_d1: mnemonic = "fmov"; form = "'Rd, 'Vn.D[1]"; break;
+ case FCVTAS_ws:
+ case FCVTAS_xs:
+ case FCVTAS_wd:
+ case FCVTAS_xd: mnemonic = "fcvtas"; form = form_rf; break;
+ case FCVTAU_ws:
+ case FCVTAU_xs:
+ case FCVTAU_wd:
+ case FCVTAU_xd: mnemonic = "fcvtau"; form = form_rf; break;
+ case FCVTMS_ws:
+ case FCVTMS_xs:
+ case FCVTMS_wd:
+ case FCVTMS_xd: mnemonic = "fcvtms"; form = form_rf; break;
+ case FCVTMU_ws:
+ case FCVTMU_xs:
+ case FCVTMU_wd:
+ case FCVTMU_xd: mnemonic = "fcvtmu"; form = form_rf; break;
+ case FCVTNS_ws:
+ case FCVTNS_xs:
+ case FCVTNS_wd:
+ case FCVTNS_xd: mnemonic = "fcvtns"; form = form_rf; break;
+ case FCVTNU_ws:
+ case FCVTNU_xs:
+ case FCVTNU_wd:
+ case FCVTNU_xd: mnemonic = "fcvtnu"; form = form_rf; break;
+ case FCVTZU_xd:
+ case FCVTZU_ws:
+ case FCVTZU_wd:
+ case FCVTZU_xs: mnemonic = "fcvtzu"; form = form_rf; break;
+ case FCVTZS_xd:
+ case FCVTZS_wd:
+ case FCVTZS_xs:
+ case FCVTZS_ws: mnemonic = "fcvtzs"; form = form_rf; break;
+ case FCVTPU_xd:
+ case FCVTPU_ws:
+ case FCVTPU_wd:
+ case FCVTPU_xs: mnemonic = "fcvtpu"; form = form_rf; break;
+ case FCVTPS_xd:
+ case FCVTPS_wd:
+ case FCVTPS_xs:
+ case FCVTPS_ws: mnemonic = "fcvtps"; form = form_rf; break;
+ case SCVTF_sw:
+ case SCVTF_sx:
+ case SCVTF_dw:
+ case SCVTF_dx: mnemonic = "scvtf"; form = form_fr; break;
+ case UCVTF_sw:
+ case UCVTF_sx:
+ case UCVTF_dw:
+ case UCVTF_dx: mnemonic = "ucvtf"; form = form_fr; break;
+ case FJCVTZS: mnemonic = "fjcvtzs"; form = form_rf; break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitFPFixedPointConvert(const Instruction* instr) {
+ const char *mnemonic = "";
+ const char *form = "'Rd, 'Fn, 'IFPFBits";
+ const char *form_fr = "'Fd, 'Rn, 'IFPFBits";
+
+ switch (instr->Mask(FPFixedPointConvertMask)) {
+ case FCVTZS_ws_fixed:
+ case FCVTZS_xs_fixed:
+ case FCVTZS_wd_fixed:
+ case FCVTZS_xd_fixed: mnemonic = "fcvtzs"; break;
+ case FCVTZU_ws_fixed:
+ case FCVTZU_xs_fixed:
+ case FCVTZU_wd_fixed:
+ case FCVTZU_xd_fixed: mnemonic = "fcvtzu"; break;
+ case SCVTF_sw_fixed:
+ case SCVTF_sx_fixed:
+ case SCVTF_dw_fixed:
+ case SCVTF_dx_fixed: mnemonic = "scvtf"; form = form_fr; break;
+ case UCVTF_sw_fixed:
+ case UCVTF_sx_fixed:
+ case UCVTF_dw_fixed:
+ case UCVTF_dx_fixed: mnemonic = "ucvtf"; form = form_fr; break;
+ default: VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitSystem(const Instruction* instr) {
+ // Some system instructions hijack their Op and Cp fields to represent a
+ // range of immediates instead of indicating a different instruction. This
+ // makes the decoding tricky.
+ const char *mnemonic = "unimplemented";
+ const char *form = "(System)";
+
+ if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
+ switch (instr->Mask(SystemExclusiveMonitorMask)) {
+ case CLREX: {
+ mnemonic = "clrex";
+ form = (instr->CRm() == 0xf) ? NULL : "'IX";
+ break;
+ }
+ }
+ } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
+ switch (instr->Mask(SystemSysRegMask)) {
+ case MRS: {
+ mnemonic = "mrs";
+ switch (instr->ImmSystemRegister()) {
+ case NZCV: form = "'Xt, nzcv"; break;
+ case FPCR: form = "'Xt, fpcr"; break;
+ default: form = "'Xt, (unknown)"; break;
+ }
+ break;
+ }
+ case MSR: {
+ mnemonic = "msr";
+ switch (instr->ImmSystemRegister()) {
+ case NZCV: form = "nzcv, 'Xt"; break;
+ case FPCR: form = "fpcr, 'Xt"; break;
+ default: form = "(unknown), 'Xt"; break;
+ }
+ break;
+ }
+ }
+ } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
+ switch (instr->ImmHint()) {
+ case NOP: {
+ mnemonic = "nop";
+ form = NULL;
+ break;
+ }
+ case CSDB: {
+ mnemonic = "csdb";
+ form = NULL;
+ break;
+ }
+ }
+ } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
+ switch (instr->Mask(MemBarrierMask)) {
+ case DMB: {
+ mnemonic = "dmb";
+ form = "'M";
+ break;
+ }
+ case DSB: {
+ mnemonic = "dsb";
+ form = "'M";
+ break;
+ }
+ case ISB: {
+ mnemonic = "isb";
+ form = NULL;
+ break;
+ }
+ }
+ } else if (instr->Mask(SystemSysFMask) == SystemSysFixed) {
+ switch (instr->SysOp()) {
+ case IVAU:
+ mnemonic = "ic";
+ form = "ivau, 'Xt";
+ break;
+ case CVAC:
+ mnemonic = "dc";
+ form = "cvac, 'Xt";
+ break;
+ case CVAU:
+ mnemonic = "dc";
+ form = "cvau, 'Xt";
+ break;
+ case CIVAC:
+ mnemonic = "dc";
+ form = "civac, 'Xt";
+ break;
+ case ZVA:
+ mnemonic = "dc";
+ form = "zva, 'Xt";
+ break;
+ default:
+ mnemonic = "sys";
+ if (instr->Rt() == 31) {
+ form = "'G1, 'Kn, 'Km, 'G2";
+ } else {
+ form = "'G1, 'Kn, 'Km, 'G2, 'Xt";
+ }
+ break;
+ }
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitException(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'IDebug";
+
+ switch (instr->Mask(ExceptionMask)) {
+ case HLT: mnemonic = "hlt"; break;
+ case BRK: mnemonic = "brk"; break;
+ case SVC: mnemonic = "svc"; break;
+ case HVC: mnemonic = "hvc"; break;
+ case SMC: mnemonic = "smc"; break;
+ case DCPS1: mnemonic = "dcps1"; form = "{'IDebug}"; break;
+ case DCPS2: mnemonic = "dcps2"; form = "{'IDebug}"; break;
+ case DCPS3: mnemonic = "dcps3"; form = "{'IDebug}"; break;
+ default: form = "(Exception)";
+ }
+ Format(instr, mnemonic, form);
+}
+
+
+void Disassembler::VisitCrypto2RegSHA(const Instruction* instr) {
+ VisitUnimplemented(instr);
+}
+
+
+void Disassembler::VisitCrypto3RegSHA(const Instruction* instr) {
+ VisitUnimplemented(instr);
+}
+
+
+void Disassembler::VisitCryptoAES(const Instruction* instr) {
+ VisitUnimplemented(instr);
+}
+
+
+void Disassembler::VisitNEON2RegMisc(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vd.%s, 'Vn.%s";
+ const char *form_cmp_zero = "'Vd.%s, 'Vn.%s, #0";
+ const char *form_fcmp_zero = "'Vd.%s, 'Vn.%s, #0.0";
+ NEONFormatDecoder nfd(instr);
+
+ static const NEONFormatMap map_lp_ta = {
+ {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}
+ };
+
+ static const NEONFormatMap map_cvt_ta = {
+ {22}, {NF_4S, NF_2D}
+ };
+
+ static const NEONFormatMap map_cvt_tb = {
+ {22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S}
+ };
+
+ if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
+ // These instructions all use a two bit size field, except NOT and RBIT,
+ // which use the field to encode the operation.
+ switch (instr->Mask(NEON2RegMiscMask)) {
+ case NEON_REV64: mnemonic = "rev64"; break;
+ case NEON_REV32: mnemonic = "rev32"; break;
+ case NEON_REV16: mnemonic = "rev16"; break;
+ case NEON_SADDLP:
+ mnemonic = "saddlp";
+ nfd.SetFormatMap(0, &map_lp_ta);
+ break;
+ case NEON_UADDLP:
+ mnemonic = "uaddlp";
+ nfd.SetFormatMap(0, &map_lp_ta);
+ break;
+ case NEON_SUQADD: mnemonic = "suqadd"; break;
+ case NEON_USQADD: mnemonic = "usqadd"; break;
+ case NEON_CLS: mnemonic = "cls"; break;
+ case NEON_CLZ: mnemonic = "clz"; break;
+ case NEON_CNT: mnemonic = "cnt"; break;
+ case NEON_SADALP:
+ mnemonic = "sadalp";
+ nfd.SetFormatMap(0, &map_lp_ta);
+ break;
+ case NEON_UADALP:
+ mnemonic = "uadalp";
+ nfd.SetFormatMap(0, &map_lp_ta);
+ break;
+ case NEON_SQABS: mnemonic = "sqabs"; break;
+ case NEON_SQNEG: mnemonic = "sqneg"; break;
+ case NEON_CMGT_zero: mnemonic = "cmgt"; form = form_cmp_zero; break;
+ case NEON_CMGE_zero: mnemonic = "cmge"; form = form_cmp_zero; break;
+ case NEON_CMEQ_zero: mnemonic = "cmeq"; form = form_cmp_zero; break;
+ case NEON_CMLE_zero: mnemonic = "cmle"; form = form_cmp_zero; break;
+ case NEON_CMLT_zero: mnemonic = "cmlt"; form = form_cmp_zero; break;
+ case NEON_ABS: mnemonic = "abs"; break;
+ case NEON_NEG: mnemonic = "neg"; break;
+ case NEON_RBIT_NOT:
+ switch (instr->FPType()) {
+ case 0: mnemonic = "mvn"; break;
+ case 1: mnemonic = "rbit"; break;
+ default: form = "(NEON2RegMisc)";
+ }
+ nfd.SetFormatMaps(nfd.LogicalFormatMap());
+ break;
+ }
+ } else {
+ // These instructions all use a one bit size field, except XTN, SQXTUN,
+ // SHLL, SQXTN and UQXTN, which use a two bit size field.
+ nfd.SetFormatMaps(nfd.FPFormatMap());
+ switch (instr->Mask(NEON2RegMiscFPMask)) {
+ case NEON_FABS: mnemonic = "fabs"; break;
+ case NEON_FNEG: mnemonic = "fneg"; break;
+ case NEON_FCVTN:
+ mnemonic = instr->Mask(NEON_Q) ? "fcvtn2" : "fcvtn";
+ nfd.SetFormatMap(0, &map_cvt_tb);
+ nfd.SetFormatMap(1, &map_cvt_ta);
+ break;
+ case NEON_FCVTXN:
+ mnemonic = instr->Mask(NEON_Q) ? "fcvtxn2" : "fcvtxn";
+ nfd.SetFormatMap(0, &map_cvt_tb);
+ nfd.SetFormatMap(1, &map_cvt_ta);
+ break;
+ case NEON_FCVTL:
+ mnemonic = instr->Mask(NEON_Q) ? "fcvtl2" : "fcvtl";
+ nfd.SetFormatMap(0, &map_cvt_ta);
+ nfd.SetFormatMap(1, &map_cvt_tb);
+ break;
+ case NEON_FRINTN: mnemonic = "frintn"; break;
+ case NEON_FRINTA: mnemonic = "frinta"; break;
+ case NEON_FRINTP: mnemonic = "frintp"; break;
+ case NEON_FRINTM: mnemonic = "frintm"; break;
+ case NEON_FRINTX: mnemonic = "frintx"; break;
+ case NEON_FRINTZ: mnemonic = "frintz"; break;
+ case NEON_FRINTI: mnemonic = "frinti"; break;
+ case NEON_FCVTNS: mnemonic = "fcvtns"; break;
+ case NEON_FCVTNU: mnemonic = "fcvtnu"; break;
+ case NEON_FCVTPS: mnemonic = "fcvtps"; break;
+ case NEON_FCVTPU: mnemonic = "fcvtpu"; break;
+ case NEON_FCVTMS: mnemonic = "fcvtms"; break;
+ case NEON_FCVTMU: mnemonic = "fcvtmu"; break;
+ case NEON_FCVTZS: mnemonic = "fcvtzs"; break;
+ case NEON_FCVTZU: mnemonic = "fcvtzu"; break;
+ case NEON_FCVTAS: mnemonic = "fcvtas"; break;
+ case NEON_FCVTAU: mnemonic = "fcvtau"; break;
+ case NEON_FSQRT: mnemonic = "fsqrt"; break;
+ case NEON_SCVTF: mnemonic = "scvtf"; break;
+ case NEON_UCVTF: mnemonic = "ucvtf"; break;
+ case NEON_URSQRTE: mnemonic = "ursqrte"; break;
+ case NEON_URECPE: mnemonic = "urecpe"; break;
+ case NEON_FRSQRTE: mnemonic = "frsqrte"; break;
+ case NEON_FRECPE: mnemonic = "frecpe"; break;
+ case NEON_FCMGT_zero: mnemonic = "fcmgt"; form = form_fcmp_zero; break;
+ case NEON_FCMGE_zero: mnemonic = "fcmge"; form = form_fcmp_zero; break;
+ case NEON_FCMEQ_zero: mnemonic = "fcmeq"; form = form_fcmp_zero; break;
+ case NEON_FCMLE_zero: mnemonic = "fcmle"; form = form_fcmp_zero; break;
+ case NEON_FCMLT_zero: mnemonic = "fcmlt"; form = form_fcmp_zero; break;
+ default:
+ if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
+ (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
+ nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+ nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+
+ switch (instr->Mask(NEON2RegMiscMask)) {
+ case NEON_XTN: mnemonic = "xtn"; break;
+ case NEON_SQXTN: mnemonic = "sqxtn"; break;
+ case NEON_UQXTN: mnemonic = "uqxtn"; break;
+ case NEON_SQXTUN: mnemonic = "sqxtun"; break;
+ case NEON_SHLL:
+ mnemonic = "shll";
+ nfd.SetFormatMap(0, nfd.LongIntegerFormatMap());
+ nfd.SetFormatMap(1, nfd.IntegerFormatMap());
+ switch (instr->NEONSize()) {
+ case 0: form = "'Vd.%s, 'Vn.%s, #8"; break;
+ case 1: form = "'Vd.%s, 'Vn.%s, #16"; break;
+ case 2: form = "'Vd.%s, 'Vn.%s, #32"; break;
+ default: form = "(NEON2RegMisc)";
+ }
+ }
+ Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
+ return;
+ } else {
+ form = "(NEON2RegMisc)";
+ }
+ }
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEON3Same(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+ NEONFormatDecoder nfd(instr);
+
+ if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
+ switch (instr->Mask(NEON3SameLogicalMask)) {
+ case NEON_AND: mnemonic = "and"; break;
+ case NEON_ORR:
+ mnemonic = "orr";
+ if (instr->Rm() == instr->Rn()) {
+ mnemonic = "mov";
+ form = "'Vd.%s, 'Vn.%s";
+ }
+ break;
+ case NEON_ORN: mnemonic = "orn"; break;
+ case NEON_EOR: mnemonic = "eor"; break;
+ case NEON_BIC: mnemonic = "bic"; break;
+ case NEON_BIF: mnemonic = "bif"; break;
+ case NEON_BIT: mnemonic = "bit"; break;
+ case NEON_BSL: mnemonic = "bsl"; break;
+ default: form = "(NEON3Same)";
+ }
+ nfd.SetFormatMaps(nfd.LogicalFormatMap());
+ } else {
+ static const char *mnemonics[] = {
+ "shadd", "uhadd", "shadd", "uhadd",
+ "sqadd", "uqadd", "sqadd", "uqadd",
+ "srhadd", "urhadd", "srhadd", "urhadd",
+ NULL, NULL, NULL, NULL, // Handled by logical cases above.
+ "shsub", "uhsub", "shsub", "uhsub",
+ "sqsub", "uqsub", "sqsub", "uqsub",
+ "cmgt", "cmhi", "cmgt", "cmhi",
+ "cmge", "cmhs", "cmge", "cmhs",
+ "sshl", "ushl", "sshl", "ushl",
+ "sqshl", "uqshl", "sqshl", "uqshl",
+ "srshl", "urshl", "srshl", "urshl",
+ "sqrshl", "uqrshl", "sqrshl", "uqrshl",
+ "smax", "umax", "smax", "umax",
+ "smin", "umin", "smin", "umin",
+ "sabd", "uabd", "sabd", "uabd",
+ "saba", "uaba", "saba", "uaba",
+ "add", "sub", "add", "sub",
+ "cmtst", "cmeq", "cmtst", "cmeq",
+ "mla", "mls", "mla", "mls",
+ "mul", "pmul", "mul", "pmul",
+ "smaxp", "umaxp", "smaxp", "umaxp",
+ "sminp", "uminp", "sminp", "uminp",
+ "sqdmulh", "sqrdmulh", "sqdmulh", "sqrdmulh",
+ "addp", "unallocated", "addp", "unallocated",
+ "fmaxnm", "fmaxnmp", "fminnm", "fminnmp",
+ "fmla", "unallocated", "fmls", "unallocated",
+ "fadd", "faddp", "fsub", "fabd",
+ "fmulx", "fmul", "unallocated", "unallocated",
+ "fcmeq", "fcmge", "unallocated", "fcmgt",
+ "unallocated", "facge", "unallocated", "facgt",
+ "fmax", "fmaxp", "fmin", "fminp",
+ "frecps", "fdiv", "frsqrts", "unallocated"};
+
+ // Operation is determined by the opcode bits (15-11), the top bit of
+ // size (23) and the U bit (29).
+ unsigned index = (instr->Bits(15, 11) << 2) | (instr->Bit(23) << 1) |
+ instr->Bit(29);
+ VIXL_ASSERT(index < (sizeof(mnemonics) / sizeof(mnemonics[0])));
+ mnemonic = mnemonics[index];
+ // Assert that index is not one of the previously handled logical
+ // instructions.
+ VIXL_ASSERT(mnemonic != NULL);
+
+ if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
+ nfd.SetFormatMaps(nfd.FPFormatMap());
+ }
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEON3Different(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+
+ NEONFormatDecoder nfd(instr);
+ nfd.SetFormatMap(0, nfd.LongIntegerFormatMap());
+
+ // Ignore the Q bit. Appending a "2" suffix is handled later.
+ switch (instr->Mask(NEON3DifferentMask) & ~NEON_Q) {
+ case NEON_PMULL: mnemonic = "pmull"; break;
+ case NEON_SABAL: mnemonic = "sabal"; break;
+ case NEON_SABDL: mnemonic = "sabdl"; break;
+ case NEON_SADDL: mnemonic = "saddl"; break;
+ case NEON_SMLAL: mnemonic = "smlal"; break;
+ case NEON_SMLSL: mnemonic = "smlsl"; break;
+ case NEON_SMULL: mnemonic = "smull"; break;
+ case NEON_SSUBL: mnemonic = "ssubl"; break;
+ case NEON_SQDMLAL: mnemonic = "sqdmlal"; break;
+ case NEON_SQDMLSL: mnemonic = "sqdmlsl"; break;
+ case NEON_SQDMULL: mnemonic = "sqdmull"; break;
+ case NEON_UABAL: mnemonic = "uabal"; break;
+ case NEON_UABDL: mnemonic = "uabdl"; break;
+ case NEON_UADDL: mnemonic = "uaddl"; break;
+ case NEON_UMLAL: mnemonic = "umlal"; break;
+ case NEON_UMLSL: mnemonic = "umlsl"; break;
+ case NEON_UMULL: mnemonic = "umull"; break;
+ case NEON_USUBL: mnemonic = "usubl"; break;
+ case NEON_SADDW:
+ mnemonic = "saddw";
+ nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+ break;
+ case NEON_SSUBW:
+ mnemonic = "ssubw";
+ nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+ break;
+ case NEON_UADDW:
+ mnemonic = "uaddw";
+ nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+ break;
+ case NEON_USUBW:
+ mnemonic = "usubw";
+ nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
+ break;
+ case NEON_ADDHN:
+ mnemonic = "addhn";
+ nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
+ nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+ break;
+ case NEON_RADDHN:
+ mnemonic = "raddhn";
+ nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
+ nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+ break;
+ case NEON_RSUBHN:
+ mnemonic = "rsubhn";
+ nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
+ nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+ break;
+ case NEON_SUBHN:
+ mnemonic = "subhn";
+ nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
+ nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+ break;
+ default: form = "(NEON3Different)";
+ }
+ Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONAcrossLanes(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "%sd, 'Vn.%s";
+
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap(),
+ NEONFormatDecoder::IntegerFormatMap());
+
+ if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ nfd.SetFormatMap(0, nfd.FPScalarFormatMap());
+ nfd.SetFormatMap(1, nfd.FPFormatMap());
+ switch (instr->Mask(NEONAcrossLanesFPMask)) {
+ case NEON_FMAXV: mnemonic = "fmaxv"; break;
+ case NEON_FMINV: mnemonic = "fminv"; break;
+ case NEON_FMAXNMV: mnemonic = "fmaxnmv"; break;
+ case NEON_FMINNMV: mnemonic = "fminnmv"; break;
+ default: form = "(NEONAcrossLanes)"; break;
+ }
+ } else if (instr->Mask(NEONAcrossLanesFMask) == NEONAcrossLanesFixed) {
+ switch (instr->Mask(NEONAcrossLanesMask)) {
+ case NEON_ADDV: mnemonic = "addv"; break;
+ case NEON_SMAXV: mnemonic = "smaxv"; break;
+ case NEON_SMINV: mnemonic = "sminv"; break;
+ case NEON_UMAXV: mnemonic = "umaxv"; break;
+ case NEON_UMINV: mnemonic = "uminv"; break;
+ case NEON_SADDLV:
+ mnemonic = "saddlv";
+ nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
+ break;
+ case NEON_UADDLV:
+ mnemonic = "uaddlv";
+ nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
+ break;
+ default: form = "(NEONAcrossLanes)"; break;
+ }
+ }
+ Format(instr, mnemonic, nfd.Substitute(form,
+ NEONFormatDecoder::kPlaceholder, NEONFormatDecoder::kFormat));
+}
+
+
+void Disassembler::VisitNEONByIndexedElement(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ bool l_instr = false;
+ bool fp_instr = false;
+
+ const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
+
+ static const NEONFormatMap map_ta = {
+ {23, 22}, {NF_UNDEF, NF_4S, NF_2D}
+ };
+ NEONFormatDecoder nfd(instr, &map_ta,
+ NEONFormatDecoder::IntegerFormatMap(),
+ NEONFormatDecoder::ScalarFormatMap());
+
+ switch (instr->Mask(NEONByIndexedElementMask)) {
+ case NEON_SMULL_byelement: mnemonic = "smull"; l_instr = true; break;
+ case NEON_UMULL_byelement: mnemonic = "umull"; l_instr = true; break;
+ case NEON_SMLAL_byelement: mnemonic = "smlal"; l_instr = true; break;
+ case NEON_UMLAL_byelement: mnemonic = "umlal"; l_instr = true; break;
+ case NEON_SMLSL_byelement: mnemonic = "smlsl"; l_instr = true; break;
+ case NEON_UMLSL_byelement: mnemonic = "umlsl"; l_instr = true; break;
+ case NEON_SQDMULL_byelement: mnemonic = "sqdmull"; l_instr = true; break;
+ case NEON_SQDMLAL_byelement: mnemonic = "sqdmlal"; l_instr = true; break;
+ case NEON_SQDMLSL_byelement: mnemonic = "sqdmlsl"; l_instr = true; break;
+ case NEON_MUL_byelement: mnemonic = "mul"; break;
+ case NEON_MLA_byelement: mnemonic = "mla"; break;
+ case NEON_MLS_byelement: mnemonic = "mls"; break;
+ case NEON_SQDMULH_byelement: mnemonic = "sqdmulh"; break;
+ case NEON_SQRDMULH_byelement: mnemonic = "sqrdmulh"; break;
+ default:
+ switch (instr->Mask(NEONByIndexedElementFPMask)) {
+ case NEON_FMUL_byelement: mnemonic = "fmul"; fp_instr = true; break;
+ case NEON_FMLA_byelement: mnemonic = "fmla"; fp_instr = true; break;
+ case NEON_FMLS_byelement: mnemonic = "fmls"; fp_instr = true; break;
+ case NEON_FMULX_byelement: mnemonic = "fmulx"; fp_instr = true; break;
+ }
+ }
+
+ if (l_instr) {
+ Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
+ } else if (fp_instr) {
+ nfd.SetFormatMap(0, nfd.FPFormatMap());
+ Format(instr, mnemonic, nfd.Substitute(form));
+ } else {
+ nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+ Format(instr, mnemonic, nfd.Substitute(form));
+ }
+}
+
+
+void Disassembler::VisitNEONCopy(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(NEONCopy)";
+
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap(),
+ NEONFormatDecoder::TriangularScalarFormatMap());
+
+ if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
+ mnemonic = "mov";
+ nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+ form = "'Vd.%s['IVInsIndex1], 'Vn.%s['IVInsIndex2]";
+ } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
+ mnemonic = "mov";
+ nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+ if (nfd.GetVectorFormat() == kFormatD) {
+ form = "'Vd.%s['IVInsIndex1], 'Xn";
+ } else {
+ form = "'Vd.%s['IVInsIndex1], 'Wn";
+ }
+ } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
+ if (instr->Mask(NEON_Q) || ((instr->ImmNEON5() & 7) == 4)) {
+ mnemonic = "mov";
+ } else {
+ mnemonic = "umov";
+ }
+ nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+ if (nfd.GetVectorFormat() == kFormatD) {
+ form = "'Xd, 'Vn.%s['IVInsIndex1]";
+ } else {
+ form = "'Wd, 'Vn.%s['IVInsIndex1]";
+ }
+ } else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) {
+ mnemonic = "smov";
+ nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+ form = "'Rdq, 'Vn.%s['IVInsIndex1]";
+ } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
+ mnemonic = "dup";
+ form = "'Vd.%s, 'Vn.%s['IVInsIndex1]";
+ } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
+ mnemonic = "dup";
+ if (nfd.GetVectorFormat() == kFormat2D) {
+ form = "'Vd.%s, 'Xn";
+ } else {
+ form = "'Vd.%s, 'Wn";
+ }
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONExtract(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(NEONExtract)";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+ if (instr->Mask(NEONExtractMask) == NEON_EXT) {
+ mnemonic = "ext";
+ form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVExtract";
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(NEONLoadStoreMultiStruct)";
+ const char *form_1v = "{'Vt.%1$s}, ['Xns]";
+ const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns]";
+ const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns]";
+ const char *form_4v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+
+ switch (instr->Mask(NEONLoadStoreMultiStructMask)) {
+ case NEON_LD1_1v: mnemonic = "ld1"; form = form_1v; break;
+ case NEON_LD1_2v: mnemonic = "ld1"; form = form_2v; break;
+ case NEON_LD1_3v: mnemonic = "ld1"; form = form_3v; break;
+ case NEON_LD1_4v: mnemonic = "ld1"; form = form_4v; break;
+ case NEON_LD2: mnemonic = "ld2"; form = form_2v; break;
+ case NEON_LD3: mnemonic = "ld3"; form = form_3v; break;
+ case NEON_LD4: mnemonic = "ld4"; form = form_4v; break;
+ case NEON_ST1_1v: mnemonic = "st1"; form = form_1v; break;
+ case NEON_ST1_2v: mnemonic = "st1"; form = form_2v; break;
+ case NEON_ST1_3v: mnemonic = "st1"; form = form_3v; break;
+ case NEON_ST1_4v: mnemonic = "st1"; form = form_4v; break;
+ case NEON_ST2: mnemonic = "st2"; form = form_2v; break;
+ case NEON_ST3: mnemonic = "st3"; form = form_3v; break;
+ case NEON_ST4: mnemonic = "st4"; form = form_4v; break;
+ default: break;
+ }
+
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONLoadStoreMultiStructPostIndex(
+ const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(NEONLoadStoreMultiStructPostIndex)";
+ const char *form_1v = "{'Vt.%1$s}, ['Xns], 'Xmr1";
+ const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns], 'Xmr2";
+ const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns], 'Xmr3";
+ const char *form_4v =
+ "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmr4";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+
+ switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
+ case NEON_LD1_1v_post: mnemonic = "ld1"; form = form_1v; break;
+ case NEON_LD1_2v_post: mnemonic = "ld1"; form = form_2v; break;
+ case NEON_LD1_3v_post: mnemonic = "ld1"; form = form_3v; break;
+ case NEON_LD1_4v_post: mnemonic = "ld1"; form = form_4v; break;
+ case NEON_LD2_post: mnemonic = "ld2"; form = form_2v; break;
+ case NEON_LD3_post: mnemonic = "ld3"; form = form_3v; break;
+ case NEON_LD4_post: mnemonic = "ld4"; form = form_4v; break;
+ case NEON_ST1_1v_post: mnemonic = "st1"; form = form_1v; break;
+ case NEON_ST1_2v_post: mnemonic = "st1"; form = form_2v; break;
+ case NEON_ST1_3v_post: mnemonic = "st1"; form = form_3v; break;
+ case NEON_ST1_4v_post: mnemonic = "st1"; form = form_4v; break;
+ case NEON_ST2_post: mnemonic = "st2"; form = form_2v; break;
+ case NEON_ST3_post: mnemonic = "st3"; form = form_3v; break;
+ case NEON_ST4_post: mnemonic = "st4"; form = form_4v; break;
+ default: break;
+ }
+
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(NEONLoadStoreSingleStruct)";
+
+ const char *form_1b = "{'Vt.b}['IVLSLane0], ['Xns]";
+ const char *form_1h = "{'Vt.h}['IVLSLane1], ['Xns]";
+ const char *form_1s = "{'Vt.s}['IVLSLane2], ['Xns]";
+ const char *form_1d = "{'Vt.d}['IVLSLane3], ['Xns]";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+
+ switch (instr->Mask(NEONLoadStoreSingleStructMask)) {
+ case NEON_LD1_b: mnemonic = "ld1"; form = form_1b; break;
+ case NEON_LD1_h: mnemonic = "ld1"; form = form_1h; break;
+ case NEON_LD1_s:
+ mnemonic = "ld1";
+ VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
+ form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d;
+ break;
+ case NEON_ST1_b: mnemonic = "st1"; form = form_1b; break;
+ case NEON_ST1_h: mnemonic = "st1"; form = form_1h; break;
+ case NEON_ST1_s:
+ mnemonic = "st1";
+ VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
+ form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d;
+ break;
+ case NEON_LD1R:
+ mnemonic = "ld1r";
+ form = "{'Vt.%s}, ['Xns]";
+ break;
+ case NEON_LD2_b:
+ case NEON_ST2_b:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+ form = "{'Vt.b, 'Vt2.b}['IVLSLane0], ['Xns]";
+ break;
+ case NEON_LD2_h:
+ case NEON_ST2_h:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+ form = "{'Vt.h, 'Vt2.h}['IVLSLane1], ['Xns]";
+ break;
+ case NEON_LD2_s:
+ case NEON_ST2_s:
+ VIXL_STATIC_ASSERT((NEON_ST2_s | (1 << NEONLSSize_offset)) == NEON_ST2_d);
+ VIXL_STATIC_ASSERT((NEON_LD2_s | (1 << NEONLSSize_offset)) == NEON_LD2_d);
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+ if ((instr->NEONLSSize() & 1) == 0)
+ form = "{'Vt.s, 'Vt2.s}['IVLSLane2], ['Xns]";
+ else
+ form = "{'Vt.d, 'Vt2.d}['IVLSLane3], ['Xns]";
+ break;
+ case NEON_LD2R:
+ mnemonic = "ld2r";
+ form = "{'Vt.%s, 'Vt2.%s}, ['Xns]";
+ break;
+ case NEON_LD3_b:
+ case NEON_ST3_b:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+ form = "{'Vt.b, 'Vt2.b, 'Vt3.b}['IVLSLane0], ['Xns]";
+ break;
+ case NEON_LD3_h:
+ case NEON_ST3_h:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+ form = "{'Vt.h, 'Vt2.h, 'Vt3.h}['IVLSLane1], ['Xns]";
+ break;
+ case NEON_LD3_s:
+ case NEON_ST3_s:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+ if ((instr->NEONLSSize() & 1) == 0)
+ form = "{'Vt.s, 'Vt2.s, 'Vt3.s}['IVLSLane2], ['Xns]";
+ else
+ form = "{'Vt.d, 'Vt2.d, 'Vt3.d}['IVLSLane3], ['Xns]";
+ break;
+ case NEON_LD3R:
+ mnemonic = "ld3r";
+ form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns]";
+ break;
+ case NEON_LD4_b:
+ case NEON_ST4_b:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+ form = "{'Vt.b, 'Vt2.b, 'Vt3.b, 'Vt4.b}['IVLSLane0], ['Xns]";
+ break;
+ case NEON_LD4_h:
+ case NEON_ST4_h:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+ form = "{'Vt.h, 'Vt2.h, 'Vt3.h, 'Vt4.h}['IVLSLane1], ['Xns]";
+ break;
+ case NEON_LD4_s:
+ case NEON_ST4_s:
+ VIXL_STATIC_ASSERT((NEON_LD4_s | (1 << NEONLSSize_offset)) == NEON_LD4_d);
+ VIXL_STATIC_ASSERT((NEON_ST4_s | (1 << NEONLSSize_offset)) == NEON_ST4_d);
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+ if ((instr->NEONLSSize() & 1) == 0)
+ form = "{'Vt.s, 'Vt2.s, 'Vt3.s, 'Vt4.s}['IVLSLane2], ['Xns]";
+ else
+ form = "{'Vt.d, 'Vt2.d, 'Vt3.d, 'Vt4.d}['IVLSLane3], ['Xns]";
+ break;
+ case NEON_LD4R:
+ mnemonic = "ld4r";
+ form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]";
+ break;
+ default: break;
+ }
+
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONLoadStoreSingleStructPostIndex(
+ const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(NEONLoadStoreSingleStructPostIndex)";
+
+ const char *form_1b = "{'Vt.b}['IVLSLane0], ['Xns], 'Xmb1";
+ const char *form_1h = "{'Vt.h}['IVLSLane1], ['Xns], 'Xmb2";
+ const char *form_1s = "{'Vt.s}['IVLSLane2], ['Xns], 'Xmb4";
+ const char *form_1d = "{'Vt.d}['IVLSLane3], ['Xns], 'Xmb8";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+
+ switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
+ case NEON_LD1_b_post: mnemonic = "ld1"; form = form_1b; break;
+ case NEON_LD1_h_post: mnemonic = "ld1"; form = form_1h; break;
+ case NEON_LD1_s_post:
+ mnemonic = "ld1";
+ VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
+ form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d;
+ break;
+ case NEON_ST1_b_post: mnemonic = "st1"; form = form_1b; break;
+ case NEON_ST1_h_post: mnemonic = "st1"; form = form_1h; break;
+ case NEON_ST1_s_post:
+ mnemonic = "st1";
+ VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
+ form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d;
+ break;
+ case NEON_LD1R_post:
+ mnemonic = "ld1r";
+ form = "{'Vt.%s}, ['Xns], 'Xmz1";
+ break;
+ case NEON_LD2_b_post:
+ case NEON_ST2_b_post:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+ form = "{'Vt.b, 'Vt2.b}['IVLSLane0], ['Xns], 'Xmb2";
+ break;
+ case NEON_ST2_h_post:
+ case NEON_LD2_h_post:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+ form = "{'Vt.h, 'Vt2.h}['IVLSLane1], ['Xns], 'Xmb4";
+ break;
+ case NEON_LD2_s_post:
+ case NEON_ST2_s_post:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2";
+ if ((instr->NEONLSSize() & 1) == 0)
+ form = "{'Vt.s, 'Vt2.s}['IVLSLane2], ['Xns], 'Xmb8";
+ else
+ form = "{'Vt.d, 'Vt2.d}['IVLSLane3], ['Xns], 'Xmb16";
+ break;
+ case NEON_LD2R_post:
+ mnemonic = "ld2r";
+ form = "{'Vt.%s, 'Vt2.%s}, ['Xns], 'Xmz2";
+ break;
+ case NEON_LD3_b_post:
+ case NEON_ST3_b_post:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+ form = "{'Vt.b, 'Vt2.b, 'Vt3.b}['IVLSLane0], ['Xns], 'Xmb3";
+ break;
+ case NEON_LD3_h_post:
+ case NEON_ST3_h_post:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+ form = "{'Vt.h, 'Vt2.h, 'Vt3.h}['IVLSLane1], ['Xns], 'Xmb6";
+ break;
+ case NEON_LD3_s_post:
+ case NEON_ST3_s_post:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3";
+ if ((instr->NEONLSSize() & 1) == 0)
+ form = "{'Vt.s, 'Vt2.s, 'Vt3.s}['IVLSLane2], ['Xns], 'Xmb12";
+ else
+ form = "{'Vt.d, 'Vt2.d, 'Vt3.d}['IVLSLane3], ['Xns], 'Xmr3";
+ break;
+ case NEON_LD3R_post:
+ mnemonic = "ld3r";
+ form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns], 'Xmz3";
+ break;
+ case NEON_LD4_b_post:
+ case NEON_ST4_b_post:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+ form = "{'Vt.b, 'Vt2.b, 'Vt3.b, 'Vt4.b}['IVLSLane0], ['Xns], 'Xmb4";
+ break;
+ case NEON_LD4_h_post:
+ case NEON_ST4_h_post:
+ mnemonic = (instr->LdStXLoad()) == 1 ? "ld4" : "st4";
+ form = "{'Vt.h, 'Vt2.h, 'Vt3.h, 'Vt4.h}['IVLSLane1], ['Xns], 'Xmb8";
+ break;
+ case NEON_LD4_s_post:
+ case NEON_ST4_s_post:
+ mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4";
+ if ((instr->NEONLSSize() & 1) == 0)
+ form = "{'Vt.s, 'Vt2.s, 'Vt3.s, 'Vt4.s}['IVLSLane2], ['Xns], 'Xmb16";
+ else
+ form = "{'Vt.d, 'Vt2.d, 'Vt3.d, 'Vt4.d}['IVLSLane3], ['Xns], 'Xmb32";
+ break;
+ case NEON_LD4R_post:
+ mnemonic = "ld4r";
+ form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmz4";
+ break;
+ default: break;
+ }
+
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONModifiedImmediate(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vt.%s, 'IVMIImm8, lsl 'IVMIShiftAmt1";
+
+ int cmode = instr->NEONCmode();
+ int cmode_3 = (cmode >> 3) & 1;
+ int cmode_2 = (cmode >> 2) & 1;
+ int cmode_1 = (cmode >> 1) & 1;
+ int cmode_0 = cmode & 1;
+ int q = instr->NEONQ();
+ int op = instr->NEONModImmOp();
+
+ static const NEONFormatMap map_b = { {30}, {NF_8B, NF_16B} };
+ static const NEONFormatMap map_h = { {30}, {NF_4H, NF_8H} };
+ static const NEONFormatMap map_s = { {30}, {NF_2S, NF_4S} };
+ NEONFormatDecoder nfd(instr, &map_b);
+
+ if (cmode_3 == 0) {
+ if (cmode_0 == 0) {
+ mnemonic = (op == 1) ? "mvni" : "movi";
+ } else { // cmode<0> == '1'.
+ mnemonic = (op == 1) ? "bic" : "orr";
+ }
+ nfd.SetFormatMap(0, &map_s);
+ } else { // cmode<3> == '1'.
+ if (cmode_2 == 0) {
+ if (cmode_0 == 0) {
+ mnemonic = (op == 1) ? "mvni" : "movi";
+ } else { // cmode<0> == '1'.
+ mnemonic = (op == 1) ? "bic" : "orr";
+ }
+ nfd.SetFormatMap(0, &map_h);
+ } else { // cmode<2> == '1'.
+ if (cmode_1 == 0) {
+ mnemonic = (op == 1) ? "mvni" : "movi";
+ form = "'Vt.%s, 'IVMIImm8, msl 'IVMIShiftAmt2";
+ nfd.SetFormatMap(0, &map_s);
+ } else { // cmode<1> == '1'.
+ if (cmode_0 == 0) {
+ mnemonic = "movi";
+ if (op == 0) {
+ form = "'Vt.%s, 'IVMIImm8";
+ } else {
+ form = (q == 0) ? "'Dd, 'IVMIImm" : "'Vt.2d, 'IVMIImm";
+ }
+ } else { // cmode<0> == '1'
+ mnemonic = "fmov";
+ if (op == 0) {
+ form = "'Vt.%s, 'IVMIImmFPSingle";
+ nfd.SetFormatMap(0, &map_s);
+ } else {
+ if (q == 1) {
+ form = "'Vt.2d, 'IVMIImmFPDouble";
+ }
+ }
+ }
+ }
+ }
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONScalar2RegMisc(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "%sd, %sn";
+ const char *form_0 = "%sd, %sn, #0";
+ const char *form_fp0 = "%sd, %sn, #0.0";
+
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+
+ if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
+ // These instructions all use a two bit size field, except NOT and RBIT,
+ // which use the field to encode the operation.
+ switch (instr->Mask(NEONScalar2RegMiscMask)) {
+ case NEON_CMGT_zero_scalar: mnemonic = "cmgt"; form = form_0; break;
+ case NEON_CMGE_zero_scalar: mnemonic = "cmge"; form = form_0; break;
+ case NEON_CMLE_zero_scalar: mnemonic = "cmle"; form = form_0; break;
+ case NEON_CMLT_zero_scalar: mnemonic = "cmlt"; form = form_0; break;
+ case NEON_CMEQ_zero_scalar: mnemonic = "cmeq"; form = form_0; break;
+ case NEON_NEG_scalar: mnemonic = "neg"; break;
+ case NEON_SQNEG_scalar: mnemonic = "sqneg"; break;
+ case NEON_ABS_scalar: mnemonic = "abs"; break;
+ case NEON_SQABS_scalar: mnemonic = "sqabs"; break;
+ case NEON_SUQADD_scalar: mnemonic = "suqadd"; break;
+ case NEON_USQADD_scalar: mnemonic = "usqadd"; break;
+ default: form = "(NEONScalar2RegMisc)";
+ }
+ } else {
+ // These instructions all use a one bit size field, except SQXTUN, SQXTN
+ // and UQXTN, which use a two bit size field.
+ nfd.SetFormatMaps(nfd.FPScalarFormatMap());
+ switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
+ case NEON_FRSQRTE_scalar: mnemonic = "frsqrte"; break;
+ case NEON_FRECPE_scalar: mnemonic = "frecpe"; break;
+ case NEON_SCVTF_scalar: mnemonic = "scvtf"; break;
+ case NEON_UCVTF_scalar: mnemonic = "ucvtf"; break;
+ case NEON_FCMGT_zero_scalar: mnemonic = "fcmgt"; form = form_fp0; break;
+ case NEON_FCMGE_zero_scalar: mnemonic = "fcmge"; form = form_fp0; break;
+ case NEON_FCMLE_zero_scalar: mnemonic = "fcmle"; form = form_fp0; break;
+ case NEON_FCMLT_zero_scalar: mnemonic = "fcmlt"; form = form_fp0; break;
+ case NEON_FCMEQ_zero_scalar: mnemonic = "fcmeq"; form = form_fp0; break;
+ case NEON_FRECPX_scalar: mnemonic = "frecpx"; break;
+ case NEON_FCVTNS_scalar: mnemonic = "fcvtns"; break;
+ case NEON_FCVTNU_scalar: mnemonic = "fcvtnu"; break;
+ case NEON_FCVTPS_scalar: mnemonic = "fcvtps"; break;
+ case NEON_FCVTPU_scalar: mnemonic = "fcvtpu"; break;
+ case NEON_FCVTMS_scalar: mnemonic = "fcvtms"; break;
+ case NEON_FCVTMU_scalar: mnemonic = "fcvtmu"; break;
+ case NEON_FCVTZS_scalar: mnemonic = "fcvtzs"; break;
+ case NEON_FCVTZU_scalar: mnemonic = "fcvtzu"; break;
+ case NEON_FCVTAS_scalar: mnemonic = "fcvtas"; break;
+ case NEON_FCVTAU_scalar: mnemonic = "fcvtau"; break;
+ case NEON_FCVTXN_scalar:
+ nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
+ mnemonic = "fcvtxn";
+ break;
+ default:
+ nfd.SetFormatMap(0, nfd.ScalarFormatMap());
+ nfd.SetFormatMap(1, nfd.LongScalarFormatMap());
+ switch (instr->Mask(NEONScalar2RegMiscMask)) {
+ case NEON_SQXTN_scalar: mnemonic = "sqxtn"; break;
+ case NEON_UQXTN_scalar: mnemonic = "uqxtn"; break;
+ case NEON_SQXTUN_scalar: mnemonic = "sqxtun"; break;
+ default: form = "(NEONScalar2RegMisc)";
+ }
+ }
+ }
+ Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+
+void Disassembler::VisitNEONScalar3Diff(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "%sd, %sn, %sm";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap(),
+ NEONFormatDecoder::ScalarFormatMap());
+
+ switch (instr->Mask(NEONScalar3DiffMask)) {
+ case NEON_SQDMLAL_scalar : mnemonic = "sqdmlal"; break;
+ case NEON_SQDMLSL_scalar : mnemonic = "sqdmlsl"; break;
+ case NEON_SQDMULL_scalar : mnemonic = "sqdmull"; break;
+ default: form = "(NEONScalar3Diff)";
+ }
+ Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+
+void Disassembler::VisitNEONScalar3Same(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "%sd, %sn, %sm";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+
+ if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
+ nfd.SetFormatMaps(nfd.FPScalarFormatMap());
+ switch (instr->Mask(NEONScalar3SameFPMask)) {
+ case NEON_FACGE_scalar: mnemonic = "facge"; break;
+ case NEON_FACGT_scalar: mnemonic = "facgt"; break;
+ case NEON_FCMEQ_scalar: mnemonic = "fcmeq"; break;
+ case NEON_FCMGE_scalar: mnemonic = "fcmge"; break;
+ case NEON_FCMGT_scalar: mnemonic = "fcmgt"; break;
+ case NEON_FMULX_scalar: mnemonic = "fmulx"; break;
+ case NEON_FRECPS_scalar: mnemonic = "frecps"; break;
+ case NEON_FRSQRTS_scalar: mnemonic = "frsqrts"; break;
+ case NEON_FABD_scalar: mnemonic = "fabd"; break;
+ default: form = "(NEONScalar3Same)";
+ }
+ } else {
+ switch (instr->Mask(NEONScalar3SameMask)) {
+ case NEON_ADD_scalar: mnemonic = "add"; break;
+ case NEON_SUB_scalar: mnemonic = "sub"; break;
+ case NEON_CMEQ_scalar: mnemonic = "cmeq"; break;
+ case NEON_CMGE_scalar: mnemonic = "cmge"; break;
+ case NEON_CMGT_scalar: mnemonic = "cmgt"; break;
+ case NEON_CMHI_scalar: mnemonic = "cmhi"; break;
+ case NEON_CMHS_scalar: mnemonic = "cmhs"; break;
+ case NEON_CMTST_scalar: mnemonic = "cmtst"; break;
+ case NEON_UQADD_scalar: mnemonic = "uqadd"; break;
+ case NEON_SQADD_scalar: mnemonic = "sqadd"; break;
+ case NEON_UQSUB_scalar: mnemonic = "uqsub"; break;
+ case NEON_SQSUB_scalar: mnemonic = "sqsub"; break;
+ case NEON_USHL_scalar: mnemonic = "ushl"; break;
+ case NEON_SSHL_scalar: mnemonic = "sshl"; break;
+ case NEON_UQSHL_scalar: mnemonic = "uqshl"; break;
+ case NEON_SQSHL_scalar: mnemonic = "sqshl"; break;
+ case NEON_URSHL_scalar: mnemonic = "urshl"; break;
+ case NEON_SRSHL_scalar: mnemonic = "srshl"; break;
+ case NEON_UQRSHL_scalar: mnemonic = "uqrshl"; break;
+ case NEON_SQRSHL_scalar: mnemonic = "sqrshl"; break;
+ case NEON_SQDMULH_scalar: mnemonic = "sqdmulh"; break;
+ case NEON_SQRDMULH_scalar: mnemonic = "sqrdmulh"; break;
+ default: form = "(NEONScalar3Same)";
+ }
+ }
+ Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+
+void Disassembler::VisitNEONScalarByIndexedElement(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+ bool long_instr = false;
+
+ switch (instr->Mask(NEONScalarByIndexedElementMask)) {
+ case NEON_SQDMULL_byelement_scalar:
+ mnemonic = "sqdmull";
+ long_instr = true;
+ break;
+ case NEON_SQDMLAL_byelement_scalar:
+ mnemonic = "sqdmlal";
+ long_instr = true;
+ break;
+ case NEON_SQDMLSL_byelement_scalar:
+ mnemonic = "sqdmlsl";
+ long_instr = true;
+ break;
+ case NEON_SQDMULH_byelement_scalar:
+ mnemonic = "sqdmulh";
+ break;
+ case NEON_SQRDMULH_byelement_scalar:
+ mnemonic = "sqrdmulh";
+ break;
+ default:
+ nfd.SetFormatMap(0, nfd.FPScalarFormatMap());
+ switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
+ case NEON_FMUL_byelement_scalar: mnemonic = "fmul"; break;
+ case NEON_FMLA_byelement_scalar: mnemonic = "fmla"; break;
+ case NEON_FMLS_byelement_scalar: mnemonic = "fmls"; break;
+ case NEON_FMULX_byelement_scalar: mnemonic = "fmulx"; break;
+ default: form = "(NEONScalarByIndexedElement)";
+ }
+ }
+
+ if (long_instr) {
+ nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
+ }
+
+ Format(instr, mnemonic, nfd.Substitute(
+ form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat));
+}
+
+
+void Disassembler::VisitNEONScalarCopy(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(NEONScalarCopy)";
+
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
+
+ if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
+ mnemonic = "mov";
+ form = "%sd, 'Vn.%s['IVInsIndex1]";
+ }
+
+ Format(instr, mnemonic, nfd.Substitute(form, nfd.kPlaceholder, nfd.kFormat));
+}
+
+
+void Disassembler::VisitNEONScalarPairwise(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "%sd, 'Vn.%s";
+ NEONFormatMap map = { {22}, {NF_2S, NF_2D} };
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap(), &map);
+
+ switch (instr->Mask(NEONScalarPairwiseMask)) {
+ case NEON_ADDP_scalar: mnemonic = "addp"; break;
+ case NEON_FADDP_scalar: mnemonic = "faddp"; break;
+ case NEON_FMAXP_scalar: mnemonic = "fmaxp"; break;
+ case NEON_FMAXNMP_scalar: mnemonic = "fmaxnmp"; break;
+ case NEON_FMINP_scalar: mnemonic = "fminp"; break;
+ case NEON_FMINNMP_scalar: mnemonic = "fminnmp"; break;
+ default: form = "(NEONScalarPairwise)";
+ }
+ Format(instr, mnemonic, nfd.Substitute(form,
+ NEONFormatDecoder::kPlaceholder, NEONFormatDecoder::kFormat));
+}
+
+
+void Disassembler::VisitNEONScalarShiftImmediate(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "%sd, %sn, 'Is1";
+ const char *form_2 = "%sd, %sn, 'Is2";
+
+ static const NEONFormatMap map_shift = {
+ {22, 21, 20, 19},
+ {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S,
+ NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D}
+ };
+ static const NEONFormatMap map_shift_narrow = {
+ {21, 20, 19},
+ {NF_UNDEF, NF_H, NF_S, NF_S, NF_D, NF_D, NF_D, NF_D}
+ };
+ NEONFormatDecoder nfd(instr, &map_shift);
+
+ if (instr->ImmNEONImmh()) { // immh has to be non-zero.
+ switch (instr->Mask(NEONScalarShiftImmediateMask)) {
+ case NEON_FCVTZU_imm_scalar: mnemonic = "fcvtzu"; break;
+ case NEON_FCVTZS_imm_scalar: mnemonic = "fcvtzs"; break;
+ case NEON_SCVTF_imm_scalar: mnemonic = "scvtf"; break;
+ case NEON_UCVTF_imm_scalar: mnemonic = "ucvtf"; break;
+ case NEON_SRI_scalar: mnemonic = "sri"; break;
+ case NEON_SSHR_scalar: mnemonic = "sshr"; break;
+ case NEON_USHR_scalar: mnemonic = "ushr"; break;
+ case NEON_SRSHR_scalar: mnemonic = "srshr"; break;
+ case NEON_URSHR_scalar: mnemonic = "urshr"; break;
+ case NEON_SSRA_scalar: mnemonic = "ssra"; break;
+ case NEON_USRA_scalar: mnemonic = "usra"; break;
+ case NEON_SRSRA_scalar: mnemonic = "srsra"; break;
+ case NEON_URSRA_scalar: mnemonic = "ursra"; break;
+ case NEON_SHL_scalar: mnemonic = "shl"; form = form_2; break;
+ case NEON_SLI_scalar: mnemonic = "sli"; form = form_2; break;
+ case NEON_SQSHLU_scalar: mnemonic = "sqshlu"; form = form_2; break;
+ case NEON_SQSHL_imm_scalar: mnemonic = "sqshl"; form = form_2; break;
+ case NEON_UQSHL_imm_scalar: mnemonic = "uqshl"; form = form_2; break;
+ case NEON_UQSHRN_scalar:
+ mnemonic = "uqshrn";
+ nfd.SetFormatMap(1, &map_shift_narrow);
+ break;
+ case NEON_UQRSHRN_scalar:
+ mnemonic = "uqrshrn";
+ nfd.SetFormatMap(1, &map_shift_narrow);
+ break;
+ case NEON_SQSHRN_scalar:
+ mnemonic = "sqshrn";
+ nfd.SetFormatMap(1, &map_shift_narrow);
+ break;
+ case NEON_SQRSHRN_scalar:
+ mnemonic = "sqrshrn";
+ nfd.SetFormatMap(1, &map_shift_narrow);
+ break;
+ case NEON_SQSHRUN_scalar:
+ mnemonic = "sqshrun";
+ nfd.SetFormatMap(1, &map_shift_narrow);
+ break;
+ case NEON_SQRSHRUN_scalar:
+ mnemonic = "sqrshrun";
+ nfd.SetFormatMap(1, &map_shift_narrow);
+ break;
+ default:
+ form = "(NEONScalarShiftImmediate)";
+ }
+ } else {
+ form = "(NEONScalarShiftImmediate)";
+ }
+ Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+
+void Disassembler::VisitNEONShiftImmediate(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vd.%s, 'Vn.%s, 'Is1";
+ const char *form_shift_2 = "'Vd.%s, 'Vn.%s, 'Is2";
+ const char *form_xtl = "'Vd.%s, 'Vn.%s";
+
+ // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
+ static const NEONFormatMap map_shift_ta = {
+ {22, 21, 20, 19},
+ {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}
+ };
+
+ // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
+ // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
+ static const NEONFormatMap map_shift_tb = {
+ {22, 21, 20, 19, 30},
+ {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_4H, NF_8H,
+ NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S,
+ NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
+ NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}
+ };
+
+ NEONFormatDecoder nfd(instr, &map_shift_tb);
+
+ if (instr->ImmNEONImmh()) { // immh has to be non-zero.
+ switch (instr->Mask(NEONShiftImmediateMask)) {
+ case NEON_SQSHLU: mnemonic = "sqshlu"; form = form_shift_2; break;
+ case NEON_SQSHL_imm: mnemonic = "sqshl"; form = form_shift_2; break;
+ case NEON_UQSHL_imm: mnemonic = "uqshl"; form = form_shift_2; break;
+ case NEON_SHL: mnemonic = "shl"; form = form_shift_2; break;
+ case NEON_SLI: mnemonic = "sli"; form = form_shift_2; break;
+ case NEON_SCVTF_imm: mnemonic = "scvtf"; break;
+ case NEON_UCVTF_imm: mnemonic = "ucvtf"; break;
+ case NEON_FCVTZU_imm: mnemonic = "fcvtzu"; break;
+ case NEON_FCVTZS_imm: mnemonic = "fcvtzs"; break;
+ case NEON_SRI: mnemonic = "sri"; break;
+ case NEON_SSHR: mnemonic = "sshr"; break;
+ case NEON_USHR: mnemonic = "ushr"; break;
+ case NEON_SRSHR: mnemonic = "srshr"; break;
+ case NEON_URSHR: mnemonic = "urshr"; break;
+ case NEON_SSRA: mnemonic = "ssra"; break;
+ case NEON_USRA: mnemonic = "usra"; break;
+ case NEON_SRSRA: mnemonic = "srsra"; break;
+ case NEON_URSRA: mnemonic = "ursra"; break;
+ case NEON_SHRN:
+ mnemonic = instr->Mask(NEON_Q) ? "shrn2" : "shrn";
+ nfd.SetFormatMap(1, &map_shift_ta);
+ break;
+ case NEON_RSHRN:
+ mnemonic = instr->Mask(NEON_Q) ? "rshrn2" : "rshrn";
+ nfd.SetFormatMap(1, &map_shift_ta);
+ break;
+ case NEON_UQSHRN:
+ mnemonic = instr->Mask(NEON_Q) ? "uqshrn2" : "uqshrn";
+ nfd.SetFormatMap(1, &map_shift_ta);
+ break;
+ case NEON_UQRSHRN:
+ mnemonic = instr->Mask(NEON_Q) ? "uqrshrn2" : "uqrshrn";
+ nfd.SetFormatMap(1, &map_shift_ta);
+ break;
+ case NEON_SQSHRN:
+ mnemonic = instr->Mask(NEON_Q) ? "sqshrn2" : "sqshrn";
+ nfd.SetFormatMap(1, &map_shift_ta);
+ break;
+ case NEON_SQRSHRN:
+ mnemonic = instr->Mask(NEON_Q) ? "sqrshrn2" : "sqrshrn";
+ nfd.SetFormatMap(1, &map_shift_ta);
+ break;
+ case NEON_SQSHRUN:
+ mnemonic = instr->Mask(NEON_Q) ? "sqshrun2" : "sqshrun";
+ nfd.SetFormatMap(1, &map_shift_ta);
+ break;
+ case NEON_SQRSHRUN:
+ mnemonic = instr->Mask(NEON_Q) ? "sqrshrun2" : "sqrshrun";
+ nfd.SetFormatMap(1, &map_shift_ta);
+ break;
+ case NEON_SSHLL:
+ nfd.SetFormatMap(0, &map_shift_ta);
+ if (instr->ImmNEONImmb() == 0 &&
+ CountSetBits(instr->ImmNEONImmh(), 32) == 1) { // sxtl variant.
+ form = form_xtl;
+ mnemonic = instr->Mask(NEON_Q) ? "sxtl2" : "sxtl";
+ } else { // sshll variant.
+ form = form_shift_2;
+ mnemonic = instr->Mask(NEON_Q) ? "sshll2" : "sshll";
+ }
+ break;
+ case NEON_USHLL:
+ nfd.SetFormatMap(0, &map_shift_ta);
+ if (instr->ImmNEONImmb() == 0 &&
+ CountSetBits(instr->ImmNEONImmh(), 32) == 1) { // uxtl variant.
+ form = form_xtl;
+ mnemonic = instr->Mask(NEON_Q) ? "uxtl2" : "uxtl";
+ } else { // ushll variant.
+ form = form_shift_2;
+ mnemonic = instr->Mask(NEON_Q) ? "ushll2" : "ushll";
+ }
+ break;
+ default: form = "(NEONShiftImmediate)";
+ }
+ } else {
+ form = "(NEONShiftImmediate)";
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitNEONTable(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(NEONTable)";
+ const char form_1v[] = "'Vd.%%s, {'Vn.16b}, 'Vm.%%s";
+ const char form_2v[] = "'Vd.%%s, {'Vn.16b, v%d.16b}, 'Vm.%%s";
+ const char form_3v[] = "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b}, 'Vm.%%s";
+ const char form_4v[] =
+ "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b, v%d.16b}, 'Vm.%%s";
+ static const NEONFormatMap map_b = { {30}, {NF_8B, NF_16B} };
+ NEONFormatDecoder nfd(instr, &map_b);
+
+ switch (instr->Mask(NEONTableMask)) {
+ case NEON_TBL_1v: mnemonic = "tbl"; form = form_1v; break;
+ case NEON_TBL_2v: mnemonic = "tbl"; form = form_2v; break;
+ case NEON_TBL_3v: mnemonic = "tbl"; form = form_3v; break;
+ case NEON_TBL_4v: mnemonic = "tbl"; form = form_4v; break;
+ case NEON_TBX_1v: mnemonic = "tbx"; form = form_1v; break;
+ case NEON_TBX_2v: mnemonic = "tbx"; form = form_2v; break;
+ case NEON_TBX_3v: mnemonic = "tbx"; form = form_3v; break;
+ case NEON_TBX_4v: mnemonic = "tbx"; form = form_4v; break;
+ default: break;
+ }
+
+ char re_form[sizeof(form_4v) + 6];
+ int reg_num = instr->Rn();
+ SprintfLiteral(re_form, form,
+ (reg_num + 1) % kNumberOfVRegisters,
+ (reg_num + 2) % kNumberOfVRegisters,
+ (reg_num + 3) % kNumberOfVRegisters);
+
+ Format(instr, mnemonic, nfd.Substitute(re_form));
+}
+
+
+void Disassembler::VisitNEONPerm(const Instruction* instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+ NEONFormatDecoder nfd(instr);
+
+ switch (instr->Mask(NEONPermMask)) {
+ case NEON_TRN1: mnemonic = "trn1"; break;
+ case NEON_TRN2: mnemonic = "trn2"; break;
+ case NEON_UZP1: mnemonic = "uzp1"; break;
+ case NEON_UZP2: mnemonic = "uzp2"; break;
+ case NEON_ZIP1: mnemonic = "zip1"; break;
+ case NEON_ZIP2: mnemonic = "zip2"; break;
+ default: form = "(NEONPerm)";
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+
+void Disassembler::VisitUnimplemented(const Instruction* instr) {
+ Format(instr, "unimplemented", "(Unimplemented)");
+}
+
+
+void Disassembler::VisitUnallocated(const Instruction* instr) {
+ Format(instr, "unallocated", "(Unallocated)");
+}
+
+
+void Disassembler::ProcessOutput(const Instruction* /*instr*/) {
+ // The base disasm does nothing more than disassembling into a buffer.
+}
+
+
+void Disassembler::AppendRegisterNameToOutput(const Instruction* instr,
+ const CPURegister& reg) {
+ USE(instr);
+ VIXL_ASSERT(reg.IsValid());
+ char reg_char;
+
+ if (reg.IsRegister()) {
+ reg_char = reg.Is64Bits() ? 'x' : 'w';
+ } else {
+ VIXL_ASSERT(reg.IsVRegister());
+ switch (reg.SizeInBits()) {
+ case kBRegSize: reg_char = 'b'; break;
+ case kHRegSize: reg_char = 'h'; break;
+ case kSRegSize: reg_char = 's'; break;
+ case kDRegSize: reg_char = 'd'; break;
+ default:
+ VIXL_ASSERT(reg.Is128Bits());
+ reg_char = 'q';
+ }
+ }
+
+ if (reg.IsVRegister() || !(reg.Aliases(sp) || reg.Aliases(xzr))) {
+ // A core or scalar/vector register: [wx]0 - 30, [bhsdq]0 - 31.
+ AppendToOutput("%c%d", reg_char, reg.code());
+ } else if (reg.Aliases(sp)) {
+ // Disassemble w31/x31 as stack pointer wsp/sp.
+ AppendToOutput("%s", reg.Is64Bits() ? "sp" : "wsp");
+ } else {
+ // Disassemble w31/x31 as zero register wzr/xzr.
+ AppendToOutput("%czr", reg_char);
+ }
+}
+
+
+void Disassembler::AppendPCRelativeOffsetToOutput(const Instruction* instr,
+ int64_t offset) {
+ USE(instr);
+ char sign = (offset < 0) ? '-' : '+';
+ AppendToOutput("#%c0x%" PRIx64, sign, std::abs(offset));
+}
+
+
+void Disassembler::AppendAddressToOutput(const Instruction* instr,
+ const void* addr) {
+ USE(instr);
+ AppendToOutput("(addr 0x%" PRIxPTR ")", reinterpret_cast<uintptr_t>(addr));
+}
+
+
+void Disassembler::AppendCodeAddressToOutput(const Instruction* instr,
+ const void* addr) {
+ AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendDataAddressToOutput(const Instruction* instr,
+ const void* addr) {
+ AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendCodeRelativeAddressToOutput(const Instruction* instr,
+ const void* addr) {
+ USE(instr);
+ int64_t rel_addr = CodeRelativeAddress(addr);
+ if (rel_addr >= 0) {
+ AppendToOutput("(addr 0x%" PRIx64 ")", rel_addr);
+ } else {
+ AppendToOutput("(addr -0x%" PRIx64 ")", -rel_addr);
+ }
+}
+
+
+void Disassembler::AppendCodeRelativeCodeAddressToOutput(
+ const Instruction* instr, const void* addr) {
+ AppendCodeRelativeAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendCodeRelativeDataAddressToOutput(
+ const Instruction* instr, const void* addr) {
+ AppendCodeRelativeAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::MapCodeAddress(int64_t base_address,
+ const Instruction* instr_address) {
+ set_code_address_offset(
+ base_address - reinterpret_cast<intptr_t>(instr_address));
+}
+int64_t Disassembler::CodeRelativeAddress(const void* addr) {
+ return reinterpret_cast<intptr_t>(addr) + code_address_offset();
+}
+
+
+void Disassembler::Format(const Instruction* instr, const char* mnemonic,
+ const char* format) {
+ VIXL_ASSERT(mnemonic != NULL);
+ ResetOutput();
+ uint32_t pos = buffer_pos_;
+ Substitute(instr, mnemonic);
+ if (format != NULL) {
+ uint32_t spaces = buffer_pos_ - pos < 8 ? 8 - (buffer_pos_ - pos) : 1;
+ while (spaces--) {
+ VIXL_ASSERT(buffer_pos_ < buffer_size_);
+ buffer_[buffer_pos_++] = ' ';
+ }
+ Substitute(instr, format);
+ }
+ VIXL_ASSERT(buffer_pos_ < buffer_size_);
+ buffer_[buffer_pos_] = 0;
+ ProcessOutput(instr);
+}
+
+
+void Disassembler::Substitute(const Instruction* instr, const char* string) {
+ char chr = *string++;
+ while (chr != '\0') {
+ if (chr == '\'') {
+ string += SubstituteField(instr, string);
+ } else {
+ VIXL_ASSERT(buffer_pos_ < buffer_size_);
+ buffer_[buffer_pos_++] = chr;
+ }
+ chr = *string++;
+ }
+}
+
+
+int Disassembler::SubstituteField(const Instruction* instr,
+ const char* format) {
+ switch (format[0]) {
+ // NB. The remaining substitution prefix characters are: GJKUZ.
+ case 'R': // Register. X or W, selected by sf bit.
+ case 'F': // FP register. S or D, selected by type field.
+ case 'V': // Vector register, V, vector format.
+ case 'W':
+ case 'X':
+ case 'B':
+ case 'H':
+ case 'S':
+ case 'D':
+ case 'Q': return SubstituteRegisterField(instr, format);
+ case 'I': return SubstituteImmediateField(instr, format);
+ case 'L': return SubstituteLiteralField(instr, format);
+ case 'N': return SubstituteShiftField(instr, format);
+ case 'P': return SubstitutePrefetchField(instr, format);
+ case 'C': return SubstituteConditionField(instr, format);
+ case 'E': return SubstituteExtendField(instr, format);
+ case 'A': return SubstitutePCRelAddressField(instr, format);
+ case 'T': return SubstituteBranchTargetField(instr, format);
+ case 'O': return SubstituteLSRegOffsetField(instr, format);
+ case 'M': return SubstituteBarrierField(instr, format);
+ case 'K': return SubstituteCrField(instr, format);
+ case 'G': return SubstituteSysOpField(instr, format);
+ default: {
+ VIXL_UNREACHABLE();
+ return 1;
+ }
+ }
+}
+
+
+int Disassembler::SubstituteRegisterField(const Instruction* instr,
+ const char* format) {
+ char reg_prefix = format[0];
+ unsigned reg_num = 0;
+ unsigned field_len = 2;
+
+ switch (format[1]) {
+ case 'd':
+ reg_num = instr->Rd();
+ if (format[2] == 'q') {
+ reg_prefix = instr->NEONQ() ? 'X' : 'W';
+ field_len = 3;
+ }
+ break;
+ case 'n': reg_num = instr->Rn(); break;
+ case 'm':
+ reg_num = instr->Rm();
+ switch (format[2]) {
+ // Handle registers tagged with b (bytes), z (instruction), or
+ // r (registers), used for address updates in
+ // NEON load/store instructions.
+ case 'r':
+ case 'b':
+ case 'z': {
+ field_len = 3;
+ char* eimm;
+ int imm = static_cast<int>(strtol(&format[3], &eimm, 10));
+ field_len += eimm - &format[3];
+ if (reg_num == 31) {
+ switch (format[2]) {
+ case 'z':
+ imm *= (1 << instr->NEONLSSize());
+ break;
+ case 'r':
+ imm *= (instr->NEONQ() == 0) ? kDRegSizeInBytes
+ : kQRegSizeInBytes;
+ break;
+ case 'b':
+ break;
+ }
+ AppendToOutput("#%d", imm);
+ return field_len;
+ }
+ break;
+ }
+ }
+ break;
+ case 'e':
+ // This is register Rm, but using a 4-bit specifier. Used in NEON
+ // by-element instructions.
+ reg_num = (instr->Rm() & 0xf);
+ break;
+ case 'a': reg_num = instr->Ra(); break;
+ case 's': reg_num = instr->Rs(); break;
+ case 't':
+ reg_num = instr->Rt();
+ if (format[0] == 'V') {
+ if ((format[2] >= '2') && (format[2] <= '4')) {
+ // Handle consecutive vector register specifiers Vt2, Vt3 and Vt4.
+ reg_num = (reg_num + format[2] - '1') % 32;
+ field_len = 3;
+ }
+ } else {
+ if (format[2] == '2') {
+ // Handle register specifier Rt2.
+ reg_num = instr->Rt2();
+ field_len = 3;
+ }
+ }
+ break;
+ case '(': {
+ switch (format[2]) {
+ case 's':
+ reg_num = instr->Rs();
+ break;
+ case 't':
+ reg_num = instr->Rt();
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ VIXL_ASSERT(format[3] == '+');
+ int i = 4;
+ int addition = 0;
+ while (format[i] != ')') {
+ VIXL_ASSERT((format[i] >= '0') && (format[i] <= '9'));
+ addition *= 10;
+ addition += format[i] - '0';
+ ++i;
+ }
+ reg_num += addition;
+ field_len = i + 1;
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+
+ // Increase field length for registers tagged as stack.
+ if (format[1] != '(' && format[2] == 's') {
+ field_len = 3;
+ }
+
+ CPURegister::RegisterType reg_type = CPURegister::kRegister;
+ unsigned reg_size = kXRegSize;
+
+ if (reg_prefix == 'R') {
+ reg_prefix = instr->SixtyFourBits() ? 'X' : 'W';
+ } else if (reg_prefix == 'F') {
+ reg_prefix = ((instr->FPType() & 1) == 0) ? 'S' : 'D';
+ }
+
+ switch (reg_prefix) {
+ case 'W':
+ reg_type = CPURegister::kRegister; reg_size = kWRegSize; break;
+ case 'X':
+ reg_type = CPURegister::kRegister; reg_size = kXRegSize; break;
+ case 'B':
+ reg_type = CPURegister::kVRegister; reg_size = kBRegSize; break;
+ case 'H':
+ reg_type = CPURegister::kVRegister; reg_size = kHRegSize; break;
+ case 'S':
+ reg_type = CPURegister::kVRegister; reg_size = kSRegSize; break;
+ case 'D':
+ reg_type = CPURegister::kVRegister; reg_size = kDRegSize; break;
+ case 'Q':
+ reg_type = CPURegister::kVRegister; reg_size = kQRegSize; break;
+ case 'V':
+ AppendToOutput("v%d", reg_num);
+ return field_len;
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ if ((reg_type == CPURegister::kRegister) &&
+ (reg_num == kZeroRegCode) && (format[2] == 's')) {
+ reg_num = kSPRegInternalCode;
+ }
+
+ AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type));
+
+ return field_len;
+}
+
+
+int Disassembler::SubstituteImmediateField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(format[0] == 'I');
+
+ switch (format[1]) {
+ case 'M': { // IMoveImm, IMoveNeg or IMoveLSL.
+ if (format[5] == 'L') {
+ AppendToOutput("#0x%" PRIx32, instr->ImmMoveWide());
+ if (instr->ShiftMoveWide() > 0) {
+ AppendToOutput(", lsl #%" PRId32, 16 * instr->ShiftMoveWide());
+ }
+ } else {
+ VIXL_ASSERT((format[5] == 'I') || (format[5] == 'N'));
+ uint64_t imm = static_cast<uint64_t>(instr->ImmMoveWide()) <<
+ (16 * instr->ShiftMoveWide());
+ if (format[5] == 'N')
+ imm = ~imm;
+ if (!instr->SixtyFourBits())
+ imm &= UINT64_C(0xffffffff);
+ AppendToOutput("#0x%" PRIx64, imm);
+ }
+ return 8;
+ }
+ case 'L': {
+ switch (format[2]) {
+ case 'L': { // ILLiteral - Immediate Load Literal.
+ AppendToOutput("pc%+" PRId32,
+ instr->ImmLLiteral() << kLiteralEntrySizeLog2);
+ return 9;
+ }
+ case 'S': { // ILS - Immediate Load/Store.
+ if (instr->ImmLS() != 0) {
+ AppendToOutput(", #%" PRId32, instr->ImmLS());
+ }
+ return 3;
+ }
+ case 'P': { // ILPx - Immediate Load/Store Pair, x = access size.
+ if (instr->ImmLSPair() != 0) {
+ // format[3] is the scale value. Convert to a number.
+ int scale = 1 << (format[3] - '0');
+ AppendToOutput(", #%" PRId32, instr->ImmLSPair() * scale);
+ }
+ return 4;
+ }
+ case 'U': { // ILU - Immediate Load/Store Unsigned.
+ if (instr->ImmLSUnsigned() != 0) {
+ int shift = instr->SizeLS();
+ AppendToOutput(", #%" PRId32, instr->ImmLSUnsigned() << shift);
+ }
+ return 3;
+ }
+ default: {
+ VIXL_UNIMPLEMENTED();
+ return 0;
+ }
+ }
+ }
+ case 'C': { // ICondB - Immediate Conditional Branch.
+ int64_t offset = instr->ImmCondBranch() << 2;
+ AppendPCRelativeOffsetToOutput(instr, offset);
+ return 6;
+ }
+ case 'A': { // IAddSub.
+ VIXL_ASSERT(instr->ShiftAddSub() <= 1);
+ int64_t imm = instr->ImmAddSub() << (12 * instr->ShiftAddSub());
+ AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm);
+ return 7;
+ }
+ case 'F': { // IFPSingle, IFPDouble or IFPFBits.
+ if (format[3] == 'F') { // IFPFbits.
+ AppendToOutput("#%" PRId32, 64 - instr->FPScale());
+ return 8;
+ } else {
+ AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmFP(),
+ format[3] == 'S' ? instr->ImmFP32() : instr->ImmFP64());
+ return 9;
+ }
+ }
+ case 'T': { // ITri - Immediate Triangular Encoded.
+ AppendToOutput("#0x%" PRIx64, instr->ImmLogical());
+ return 4;
+ }
+ case 'N': { // INzcv.
+ int nzcv = (instr->Nzcv() << Flags_offset);
+ AppendToOutput("#%c%c%c%c", ((nzcv & NFlag) == 0) ? 'n' : 'N',
+ ((nzcv & ZFlag) == 0) ? 'z' : 'Z',
+ ((nzcv & CFlag) == 0) ? 'c' : 'C',
+ ((nzcv & VFlag) == 0) ? 'v' : 'V');
+ return 5;
+ }
+ case 'P': { // IP - Conditional compare.
+ AppendToOutput("#%" PRId32, instr->ImmCondCmp());
+ return 2;
+ }
+ case 'B': { // Bitfields.
+ return SubstituteBitfieldImmediateField(instr, format);
+ }
+ case 'E': { // IExtract.
+ AppendToOutput("#%" PRId32, instr->ImmS());
+ return 8;
+ }
+ case 'S': { // IS - Test and branch bit.
+ AppendToOutput("#%" PRId32, (instr->ImmTestBranchBit5() << 5) |
+ instr->ImmTestBranchBit40());
+ return 2;
+ }
+ case 's': { // Is - Shift (immediate).
+ switch (format[2]) {
+ case '1': { // Is1 - SSHR.
+ int shift = 16 << HighestSetBitPosition(instr->ImmNEONImmh());
+ shift -= instr->ImmNEONImmhImmb();
+ AppendToOutput("#%d", shift);
+ return 3;
+ }
+ case '2': { // Is2 - SLI.
+ int shift = instr->ImmNEONImmhImmb();
+ shift -= 8 << HighestSetBitPosition(instr->ImmNEONImmh());
+ AppendToOutput("#%d", shift);
+ return 3;
+ }
+ default: {
+ VIXL_UNIMPLEMENTED();
+ return 0;
+ }
+ }
+ }
+ case 'D': { // IDebug - HLT and BRK instructions.
+ AppendToOutput("#0x%" PRIx32, instr->ImmException());
+ return 6;
+ }
+ case 'V': { // Immediate Vector.
+ switch (format[2]) {
+ case 'E': { // IVExtract.
+ AppendToOutput("#%" PRId32, instr->ImmNEONExt());
+ return 9;
+ }
+ case 'B': { // IVByElemIndex.
+ int vm_index = (instr->NEONH() << 1) | instr->NEONL();
+ if (instr->NEONSize() == 1) {
+ vm_index = (vm_index << 1) | instr->NEONM();
+ }
+ AppendToOutput("%d", vm_index);
+ return strlen("IVByElemIndex");
+ }
+ case 'I': { // INS element.
+ if (strncmp(format, "IVInsIndex", strlen("IVInsIndex")) == 0) {
+ int rd_index, rn_index;
+ int imm5 = instr->ImmNEON5();
+ int imm4 = instr->ImmNEON4();
+ int tz = CountTrailingZeros(imm5, 32);
+ rd_index = imm5 >> (tz + 1);
+ rn_index = imm4 >> tz;
+ if (strncmp(format, "IVInsIndex1", strlen("IVInsIndex1")) == 0) {
+ AppendToOutput("%d", rd_index);
+ return strlen("IVInsIndex1");
+ } else if (strncmp(format, "IVInsIndex2",
+ strlen("IVInsIndex2")) == 0) {
+ AppendToOutput("%d", rn_index);
+ return strlen("IVInsIndex2");
+ } else {
+ VIXL_UNIMPLEMENTED();
+ return 0;
+ }
+ }
+ VIXL_FALLTHROUGH();
+ }
+ case 'L': { // IVLSLane[0123] - suffix indicates access size shift.
+ AppendToOutput("%d", instr->NEONLSIndex(format[8] - '0'));
+ return 9;
+ }
+ case 'M': { // Modified Immediate cases.
+ if (strncmp(format,
+ "IVMIImmFPSingle",
+ strlen("IVMIImmFPSingle")) == 0) {
+ AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmNEONabcdefgh(),
+ instr->ImmNEONFP32());
+ return strlen("IVMIImmFPSingle");
+ } else if (strncmp(format,
+ "IVMIImmFPDouble",
+ strlen("IVMIImmFPDouble")) == 0) {
+ AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmNEONabcdefgh(),
+ instr->ImmNEONFP64());
+ return strlen("IVMIImmFPDouble");
+ } else if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) {
+ uint64_t imm8 = instr->ImmNEONabcdefgh();
+ AppendToOutput("#0x%" PRIx64, imm8);
+ return strlen("IVMIImm8");
+ } else if (strncmp(format, "IVMIImm", strlen("IVMIImm")) == 0) {
+ uint64_t imm8 = instr->ImmNEONabcdefgh();
+ uint64_t imm = 0;
+ for (int i = 0; i < 8; ++i) {
+ if (imm8 & (1ULL << i)) {
+ imm |= (UINT64_C(0xff) << (8 * i));
+ }
+ }
+ AppendToOutput("#0x%" PRIx64, imm);
+ return strlen("IVMIImm");
+ } else if (strncmp(format, "IVMIShiftAmt1",
+ strlen("IVMIShiftAmt1")) == 0) {
+ int cmode = instr->NEONCmode();
+ int shift_amount = 8 * ((cmode >> 1) & 3);
+ AppendToOutput("#%d", shift_amount);
+ return strlen("IVMIShiftAmt1");
+ } else if (strncmp(format, "IVMIShiftAmt2",
+ strlen("IVMIShiftAmt2")) == 0) {
+ int cmode = instr->NEONCmode();
+ int shift_amount = 8 << (cmode & 1);
+ AppendToOutput("#%d", shift_amount);
+ return strlen("IVMIShiftAmt2");
+ } else {
+ VIXL_UNIMPLEMENTED();
+ return 0;
+ }
+ }
+ default: {
+ VIXL_UNIMPLEMENTED();
+ return 0;
+ }
+ }
+ }
+ case 'X': { // IX - CLREX instruction.
+ AppendToOutput("#0x%" PRIx32, instr->CRm());
+ return 2;
+ }
+ default: {
+ VIXL_UNIMPLEMENTED();
+ return 0;
+ }
+ }
+}
+
+
+int Disassembler::SubstituteBitfieldImmediateField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT((format[0] == 'I') && (format[1] == 'B'));
+ unsigned r = instr->ImmR();
+ unsigned s = instr->ImmS();
+
+ switch (format[2]) {
+ case 'r': { // IBr.
+ AppendToOutput("#%d", r);
+ return 3;
+ }
+ case 's': { // IBs+1 or IBs-r+1.
+ if (format[3] == '+') {
+ AppendToOutput("#%d", s + 1);
+ return 5;
+ } else {
+ VIXL_ASSERT(format[3] == '-');
+ AppendToOutput("#%d", s - r + 1);
+ return 7;
+ }
+ }
+ case 'Z': { // IBZ-r.
+ VIXL_ASSERT((format[3] == '-') && (format[4] == 'r'));
+ unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize : kWRegSize;
+ AppendToOutput("#%d", reg_size - r);
+ return 5;
+ }
+ default: {
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+ }
+}
+
+
+int Disassembler::SubstituteLiteralField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(strncmp(format, "LValue", 6) == 0);
+ USE(format);
+
+ const void * address = instr->LiteralAddress<const void *>();
+ switch (instr->Mask(LoadLiteralMask)) {
+ case LDR_w_lit:
+ case LDR_x_lit:
+ case LDRSW_x_lit:
+ case LDR_s_lit:
+ case LDR_d_lit:
+ case LDR_q_lit:
+ AppendCodeRelativeDataAddressToOutput(instr, address);
+ break;
+ case PRFM_lit: {
+ // Use the prefetch hint to decide how to print the address.
+ switch (instr->PrefetchHint()) {
+ case 0x0: // PLD: prefetch for load.
+ case 0x2: // PST: prepare for store.
+ AppendCodeRelativeDataAddressToOutput(instr, address);
+ break;
+ case 0x1: // PLI: preload instructions.
+ AppendCodeRelativeCodeAddressToOutput(instr, address);
+ break;
+ case 0x3: // Unallocated hint.
+ AppendCodeRelativeAddressToOutput(instr, address);
+ break;
+ }
+ break;
+ }
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ return 6;
+}
+
+
+int Disassembler::SubstituteShiftField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(format[0] == 'N');
+ VIXL_ASSERT(instr->ShiftDP() <= 0x3);
+
+ switch (format[1]) {
+ case 'D': { // HDP.
+ VIXL_ASSERT(instr->ShiftDP() != ROR);
+ VIXL_FALLTHROUGH();
+ }
+ case 'L': { // HLo.
+ if (instr->ImmDPShift() != 0) {
+ const char* shift_type[] = {"lsl", "lsr", "asr", "ror"};
+ AppendToOutput(", %s #%" PRId32, shift_type[instr->ShiftDP()],
+ instr->ImmDPShift());
+ }
+ return 3;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ return 0;
+ }
+}
+
+
+int Disassembler::SubstituteConditionField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(format[0] == 'C');
+ const char* condition_code[] = { "eq", "ne", "hs", "lo",
+ "mi", "pl", "vs", "vc",
+ "hi", "ls", "ge", "lt",
+ "gt", "le", "al", "nv" };
+ int cond;
+ switch (format[1]) {
+ case 'B': cond = instr->ConditionBranch(); break;
+ case 'I': {
+ cond = InvertCondition(static_cast<Condition>(instr->Condition()));
+ break;
+ }
+ default: cond = instr->Condition();
+ }
+ AppendToOutput("%s", condition_code[cond]);
+ return 4;
+}
+
+
+int Disassembler::SubstitutePCRelAddressField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) || // Used by `adr`.
+ (strcmp(format, "AddrPCRelPage") == 0)); // Used by `adrp`.
+
+ int64_t offset = instr->ImmPCRel();
+
+ // Compute the target address based on the effective address (after applying
+ // code_address_offset). This is required for correct behaviour of adrp.
+ const Instruction* base = instr + code_address_offset();
+ if (format[9] == 'P') {
+ offset *= kPageSize;
+ base = AlignDown(base, kPageSize);
+ }
+ // Strip code_address_offset before printing, so we can use the
+ // semantically-correct AppendCodeRelativeAddressToOutput.
+ const void* target =
+ reinterpret_cast<const void*>(base + offset - code_address_offset());
+
+ AppendPCRelativeOffsetToOutput(instr, offset);
+ AppendToOutput(" ");
+ AppendCodeRelativeAddressToOutput(instr, target);
+ return 13;
+}
+
+
+int Disassembler::SubstituteBranchTargetField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(strncmp(format, "TImm", 4) == 0);
+
+ int64_t offset = 0;
+ switch (format[5]) {
+ // BImmUncn - unconditional branch immediate.
+ case 'n': offset = instr->ImmUncondBranch(); break;
+ // BImmCond - conditional branch immediate.
+ case 'o': offset = instr->ImmCondBranch(); break;
+ // BImmCmpa - compare and branch immediate.
+ case 'm': offset = instr->ImmCmpBranch(); break;
+ // BImmTest - test and branch immediate.
+ case 'e': offset = instr->ImmTestBranch(); break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ offset <<= kInstructionSizeLog2;
+ const void* target_address = reinterpret_cast<const void*>(instr + offset);
+ VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+
+ AppendPCRelativeOffsetToOutput(instr, offset);
+ AppendToOutput(" ");
+ AppendCodeRelativeCodeAddressToOutput(instr, target_address);
+
+ return 8;
+}
+
+
+int Disassembler::SubstituteExtendField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(strncmp(format, "Ext", 3) == 0);
+ VIXL_ASSERT(instr->ExtendMode() <= 7);
+ USE(format);
+
+ const char* extend_mode[] = { "uxtb", "uxth", "uxtw", "uxtx",
+ "sxtb", "sxth", "sxtw", "sxtx" };
+
+ // If rd or rn is SP, uxtw on 32-bit registers and uxtx on 64-bit
+ // registers becomes lsl.
+ if (((instr->Rd() == kZeroRegCode) || (instr->Rn() == kZeroRegCode)) &&
+ (((instr->ExtendMode() == UXTW) && (instr->SixtyFourBits() == 0)) ||
+ (instr->ExtendMode() == UXTX))) {
+ if (instr->ImmExtendShift() > 0) {
+ AppendToOutput(", lsl #%" PRId32, instr->ImmExtendShift());
+ }
+ } else {
+ AppendToOutput(", %s", extend_mode[instr->ExtendMode()]);
+ if (instr->ImmExtendShift() > 0) {
+ AppendToOutput(" #%" PRId32, instr->ImmExtendShift());
+ }
+ }
+ return 3;
+}
+
+
+int Disassembler::SubstituteLSRegOffsetField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(strncmp(format, "Offsetreg", 9) == 0);
+ const char* extend_mode[] = { "undefined", "undefined", "uxtw", "lsl",
+ "undefined", "undefined", "sxtw", "sxtx" };
+ USE(format);
+
+ unsigned shift = instr->ImmShiftLS();
+ Extend ext = static_cast<Extend>(instr->ExtendMode());
+ char reg_type = ((ext == UXTW) || (ext == SXTW)) ? 'w' : 'x';
+
+ unsigned rm = instr->Rm();
+ if (rm == kZeroRegCode) {
+ AppendToOutput("%czr", reg_type);
+ } else {
+ AppendToOutput("%c%d", reg_type, rm);
+ }
+
+ // Extend mode UXTX is an alias for shift mode LSL here.
+ if (!((ext == UXTX) && (shift == 0))) {
+ AppendToOutput(", %s", extend_mode[ext]);
+ if (shift != 0) {
+ AppendToOutput(" #%d", instr->SizeLS());
+ }
+ }
+ return 9;
+}
+
+
+int Disassembler::SubstitutePrefetchField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(format[0] == 'P');
+ USE(format);
+
+ static const char* hints[] = {"ld", "li", "st"};
+ static const char* stream_options[] = {"keep", "strm"};
+
+ unsigned hint = instr->PrefetchHint();
+ unsigned target = instr->PrefetchTarget() + 1;
+ unsigned stream = instr->PrefetchStream();
+
+ if ((hint >= (sizeof(hints) / sizeof(hints[0]))) || (target > 3)) {
+ // Unallocated prefetch operations.
+ int prefetch_mode = instr->ImmPrefetchOperation();
+ AppendToOutput("#0b%c%c%c%c%c",
+ (prefetch_mode & (1 << 4)) ? '1' : '0',
+ (prefetch_mode & (1 << 3)) ? '1' : '0',
+ (prefetch_mode & (1 << 2)) ? '1' : '0',
+ (prefetch_mode & (1 << 1)) ? '1' : '0',
+ (prefetch_mode & (1 << 0)) ? '1' : '0');
+ } else {
+ VIXL_ASSERT(stream < (sizeof(stream_options) / sizeof(stream_options[0])));
+ AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]);
+ }
+ return 6;
+}
+
+int Disassembler::SubstituteBarrierField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(format[0] == 'M');
+ USE(format);
+
+ static const char* options[4][4] = {
+ { "sy (0b0000)", "oshld", "oshst", "osh" },
+ { "sy (0b0100)", "nshld", "nshst", "nsh" },
+ { "sy (0b1000)", "ishld", "ishst", "ish" },
+ { "sy (0b1100)", "ld", "st", "sy" }
+ };
+ int domain = instr->ImmBarrierDomain();
+ int type = instr->ImmBarrierType();
+
+ AppendToOutput("%s", options[domain][type]);
+ return 1;
+}
+
+int Disassembler::SubstituteSysOpField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(format[0] == 'G');
+ int op = -1;
+ switch (format[1]) {
+ case '1': op = instr->SysOp1(); break;
+ case '2': op = instr->SysOp2(); break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ AppendToOutput("#%d", op);
+ return 2;
+}
+
+int Disassembler::SubstituteCrField(const Instruction* instr,
+ const char* format) {
+ VIXL_ASSERT(format[0] == 'K');
+ int cr = -1;
+ switch (format[1]) {
+ case 'n': cr = instr->CRn(); break;
+ case 'm': cr = instr->CRm(); break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ AppendToOutput("C%d", cr);
+ return 2;
+}
+
+void Disassembler::ResetOutput() {
+ buffer_pos_ = 0;
+ buffer_[buffer_pos_] = 0;
+}
+
+
+void Disassembler::AppendToOutput(const char* format, ...) {
+ va_list args;
+ va_start(args, format);
+ buffer_pos_ += vsnprintf(&buffer_[buffer_pos_], buffer_size_ - buffer_pos_,
+ format, args);
+ va_end(args);
+}
+
+
+void PrintDisassembler::ProcessOutput(const Instruction* instr) {
+ fprintf(stream_, "0x%016" PRIx64 " %08" PRIx32 "\t\t%s\n",
+ reinterpret_cast<uint64_t>(instr),
+ instr->InstructionBits(),
+ GetOutput());
+}
+
+void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr)
+{
+ vixl::Disassembler disasm(buffer, bufsize-1);
+ vixl::Decoder decoder;
+ decoder.AppendVisitor(&disasm);
+ decoder.Decode(instr);
+ buffer[bufsize-1] = 0; // Just to be safe
+}
+
+char* GdbDisassembleInstruction(const Instruction* instr)
+{
+ static char buffer[1024];
+ DisassembleInstruction(buffer, sizeof(buffer), instr);
+ return buffer;
+}
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Disasm-vixl.h b/js/src/jit/arm64/vixl/Disasm-vixl.h
new file mode 100644
index 0000000000..e04730da83
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Disasm-vixl.h
@@ -0,0 +1,181 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_DISASM_A64_H
+#define VIXL_A64_DISASM_A64_H
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+#include "jit/arm64/vixl/Decoder-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+namespace vixl {
+
+class Disassembler: public DecoderVisitor {
+ public:
+ Disassembler();
+ Disassembler(char* text_buffer, int buffer_size);
+ virtual ~Disassembler();
+ char* GetOutput();
+
+ // Declare all Visitor functions.
+ #define DECLARE(A) virtual void Visit##A(const Instruction* instr) override;
+ VISITOR_LIST(DECLARE)
+ #undef DECLARE
+
+ protected:
+ virtual void ProcessOutput(const Instruction* instr);
+
+ // Default output functions. The functions below implement a default way of
+ // printing elements in the disassembly. A sub-class can override these to
+ // customize the disassembly output.
+
+ // Prints the name of a register.
+ // TODO: This currently doesn't allow renaming of V registers.
+ virtual void AppendRegisterNameToOutput(const Instruction* instr,
+ const CPURegister& reg);
+
+ // Prints a PC-relative offset. This is used for example when disassembling
+ // branches to immediate offsets.
+ virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr,
+ int64_t offset);
+
+ // Prints an address, in the general case. It can be code or data. This is
+ // used for example to print the target address of an ADR instruction.
+ virtual void AppendCodeRelativeAddressToOutput(const Instruction* instr,
+ const void* addr);
+
+ // Prints the address of some code.
+ // This is used for example to print the target address of a branch to an
+ // immediate offset.
+ // A sub-class can for example override this method to lookup the address and
+ // print an appropriate name.
+ virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr,
+ const void* addr);
+
+ // Prints the address of some data.
+ // This is used for example to print the source address of a load literal
+ // instruction.
+ virtual void AppendCodeRelativeDataAddressToOutput(const Instruction* instr,
+ const void* addr);
+
+ // Same as the above, but for addresses that are not relative to the code
+ // buffer. They are currently not used by VIXL.
+ virtual void AppendAddressToOutput(const Instruction* instr,
+ const void* addr);
+ virtual void AppendCodeAddressToOutput(const Instruction* instr,
+ const void* addr);
+ virtual void AppendDataAddressToOutput(const Instruction* instr,
+ const void* addr);
+
+ public:
+ // Get/Set the offset that should be added to code addresses when printing
+ // code-relative addresses in the AppendCodeRelative<Type>AddressToOutput()
+ // helpers.
+ // Below is an example of how a branch immediate instruction in memory at
+ // address 0xb010200 would disassemble with different offsets.
+ // Base address | Disassembly
+ // 0x0 | 0xb010200: b #+0xcc (addr 0xb0102cc)
+ // 0x10000 | 0xb000200: b #+0xcc (addr 0xb0002cc)
+ // 0xb010200 | 0x0: b #+0xcc (addr 0xcc)
+ void MapCodeAddress(int64_t base_address, const Instruction* instr_address);
+ int64_t CodeRelativeAddress(const void* instr);
+
+ private:
+ void Format(
+ const Instruction* instr, const char* mnemonic, const char* format);
+ void Substitute(const Instruction* instr, const char* string);
+ int SubstituteField(const Instruction* instr, const char* format);
+ int SubstituteRegisterField(const Instruction* instr, const char* format);
+ int SubstituteImmediateField(const Instruction* instr, const char* format);
+ int SubstituteLiteralField(const Instruction* instr, const char* format);
+ int SubstituteBitfieldImmediateField(
+ const Instruction* instr, const char* format);
+ int SubstituteShiftField(const Instruction* instr, const char* format);
+ int SubstituteExtendField(const Instruction* instr, const char* format);
+ int SubstituteConditionField(const Instruction* instr, const char* format);
+ int SubstitutePCRelAddressField(const Instruction* instr, const char* format);
+ int SubstituteBranchTargetField(const Instruction* instr, const char* format);
+ int SubstituteLSRegOffsetField(const Instruction* instr, const char* format);
+ int SubstitutePrefetchField(const Instruction* instr, const char* format);
+ int SubstituteBarrierField(const Instruction* instr, const char* format);
+ int SubstituteSysOpField(const Instruction* instr, const char* format);
+ int SubstituteCrField(const Instruction* instr, const char* format);
+ bool RdIsZROrSP(const Instruction* instr) const {
+ return (instr->Rd() == kZeroRegCode);
+ }
+
+ bool RnIsZROrSP(const Instruction* instr) const {
+ return (instr->Rn() == kZeroRegCode);
+ }
+
+ bool RmIsZROrSP(const Instruction* instr) const {
+ return (instr->Rm() == kZeroRegCode);
+ }
+
+ bool RaIsZROrSP(const Instruction* instr) const {
+ return (instr->Ra() == kZeroRegCode);
+ }
+
+ bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
+
+ int64_t code_address_offset() const { return code_address_offset_; }
+
+ protected:
+ void ResetOutput();
+ void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);
+
+ void set_code_address_offset(int64_t code_address_offset) {
+ code_address_offset_ = code_address_offset;
+ }
+
+ char* buffer_;
+ uint32_t buffer_pos_;
+ uint32_t buffer_size_;
+ bool own_buffer_;
+
+ int64_t code_address_offset_;
+};
+
+
+class PrintDisassembler: public Disassembler {
+ public:
+ explicit PrintDisassembler(FILE* stream) : stream_(stream) { }
+
+ protected:
+ virtual void ProcessOutput(const Instruction* instr) override;
+
+ private:
+ FILE *stream_;
+};
+
+void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr);
+char* GdbDisassembleInstruction(const Instruction* instr);
+
+} // namespace vixl
+
+#endif // VIXL_A64_DISASM_A64_H
diff --git a/js/src/jit/arm64/vixl/Globals-vixl.h b/js/src/jit/arm64/vixl/Globals-vixl.h
new file mode 100644
index 0000000000..2c7d5703f1
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Globals-vixl.h
@@ -0,0 +1,272 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_GLOBALS_H
+#define VIXL_GLOBALS_H
+
+// Get standard C99 macros for integer types.
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include "mozilla/Assertions.h"
+
+#include <cstdarg>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+
+extern "C" {
+#include <inttypes.h>
+#include <stdint.h>
+}
+
+#include "jstypes.h"
+
+#include "jit/arm64/vixl/Platform-vixl.h"
+#include "js/Utility.h"
+
+#ifdef VIXL_NEGATIVE_TESTING
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#endif
+
+namespace vixl {
+
+typedef uint8_t byte;
+
+const int KBytes = 1024;
+const int MBytes = 1024 * KBytes;
+
+const int kBitsPerByte = 8;
+
+template <int SizeInBits>
+struct Unsigned;
+
+template <>
+struct Unsigned<32> {
+ typedef uint32_t type;
+};
+
+template <>
+struct Unsigned<64> {
+ typedef uint64_t type;
+};
+
+} // namespace vixl
+
+// Detect the host's pointer size.
+#if (UINTPTR_MAX == UINT32_MAX)
+#define VIXL_HOST_POINTER_32
+#elif (UINTPTR_MAX == UINT64_MAX)
+#define VIXL_HOST_POINTER_64
+#else
+#error "Unsupported host pointer size."
+#endif
+
+#ifdef VIXL_NEGATIVE_TESTING
+#define VIXL_ABORT() \
+ do { \
+ std::ostringstream oss; \
+ oss << "Aborting in " << __FILE__ << ", line " << __LINE__ << std::endl; \
+ throw std::runtime_error(oss.str()); \
+ } while (false)
+#define VIXL_ABORT_WITH_MSG(msg) \
+ do { \
+ std::ostringstream oss; \
+ oss << (msg) << "in " << __FILE__ << ", line " << __LINE__ << std::endl; \
+ throw std::runtime_error(oss.str()); \
+ } while (false)
+#define VIXL_CHECK(condition) \
+ do { \
+ if (!(condition)) { \
+ std::ostringstream oss; \
+ oss << "Assertion failed (" #condition ")\nin "; \
+ oss << __FILE__ << ", line " << __LINE__ << std::endl; \
+ throw std::runtime_error(oss.str()); \
+ } \
+ } while (false)
+#else
+#define VIXL_ABORT() \
+ do { \
+ MOZ_CRASH(); \
+ } while (false)
+#define VIXL_ABORT_WITH_MSG(msg) \
+ do { \
+ MOZ_CRASH(msg); \
+ } while (false)
+#define VIXL_CHECK(condition) \
+ do { \
+ if (!(condition)) { \
+ MOZ_CRASH(); \
+ } \
+ } while (false)
+#endif
+#ifdef DEBUG
+#define VIXL_ASSERT(condition) MOZ_ASSERT(condition)
+#define VIXL_UNIMPLEMENTED() \
+ do { \
+ VIXL_ABORT_WITH_MSG("UNIMPLEMENTED "); \
+ } while (false)
+#define VIXL_UNREACHABLE() \
+ do { \
+ VIXL_ABORT_WITH_MSG("UNREACHABLE "); \
+ } while (false)
+#else
+#define VIXL_ASSERT(condition) ((void)0)
+#define VIXL_UNIMPLEMENTED() ((void)0)
+#define VIXL_UNREACHABLE() MOZ_CRASH("vixl unreachable")
+#endif
+// This is not as powerful as template based assertions, but it is simple.
+// It assumes that the descriptions are unique. If this starts being a problem,
+// we can switch to a different implemention.
+#define VIXL_CONCAT(a, b) a##b
+#if __cplusplus >= 201103L
+#define VIXL_STATIC_ASSERT_LINE(line_unused, condition, message) \
+ static_assert(condition, message)
+#else
+#define VIXL_STATIC_ASSERT_LINE(line, condition, message_unused) \
+ typedef char VIXL_CONCAT(STATIC_ASSERT_LINE_, line)[(condition) ? 1 : -1] \
+ __attribute__((unused))
+#endif
+#define VIXL_STATIC_ASSERT(condition) \
+ VIXL_STATIC_ASSERT_LINE(__LINE__, condition, "")
+#define VIXL_STATIC_ASSERT_MESSAGE(condition, message) \
+ VIXL_STATIC_ASSERT_LINE(__LINE__, condition, message)
+
+#define VIXL_WARNING(message) \
+ do { \
+ printf("WARNING in %s, line %i: %s", __FILE__, __LINE__, message); \
+ } while (false)
+
+template <typename T1>
+inline void USE(const T1&) {}
+
+template <typename T1, typename T2>
+inline void USE(const T1&, const T2&) {}
+
+template <typename T1, typename T2, typename T3>
+inline void USE(const T1&, const T2&, const T3&) {}
+
+template <typename T1, typename T2, typename T3, typename T4>
+inline void USE(const T1&, const T2&, const T3&, const T4&) {}
+
+#define VIXL_ALIGNMENT_EXCEPTION() \
+ do { \
+ VIXL_ABORT_WITH_MSG("ALIGNMENT EXCEPTION\t"); \
+ } while (0)
+
+// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough
+// argument to annotate intentional fall-through between switch labels.
+// For more information please refer to:
+// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+#ifndef __has_warning
+#define __has_warning(x) 0
+#endif
+
+// Fallthrough annotation for Clang and C++11(201103L).
+#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
+#define VIXL_FALLTHROUGH() [[clang::fallthrough]]
+// Fallthrough annotation for GCC >= 7.
+#elif __GNUC__ >= 7
+#define VIXL_FALLTHROUGH() __attribute__((fallthrough))
+#else
+#define VIXL_FALLTHROUGH() \
+ do { \
+ } while (0)
+#endif
+
+#if __cplusplus >= 201103L
+#define VIXL_NO_RETURN [[noreturn]]
+#else
+#define VIXL_NO_RETURN __attribute__((noreturn))
+#endif
+#ifdef VIXL_DEBUG
+#define VIXL_NO_RETURN_IN_DEBUG_MODE VIXL_NO_RETURN
+#else
+#define VIXL_NO_RETURN_IN_DEBUG_MODE
+#endif
+
+#if __cplusplus >= 201103L
+#define VIXL_OVERRIDE override
+#else
+#define VIXL_OVERRIDE
+#endif
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 1
+#endif
+#else
+#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 0
+#endif
+#if VIXL_AARCH64_GENERATE_SIMULATOR_CODE
+#warning "Generating Simulator instructions without Simulator support."
+#endif
+#endif
+
+// We do not have a simulator for AArch32, although we can pretend we do so that
+// tests that require running natively can be skipped.
+#ifndef __arm__
+#define VIXL_INCLUDE_SIMULATOR_AARCH32
+#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 1
+#endif
+#else
+#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE
+#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 0
+#endif
+#endif
+
+// Target Architecture/ISA
+
+// Hack: always include AArch64.
+#define VIXL_INCLUDE_TARGET_A64
+
+#ifdef VIXL_INCLUDE_TARGET_A64
+#define VIXL_INCLUDE_TARGET_AARCH64
+#endif
+
+#if defined(VIXL_INCLUDE_TARGET_A32) && defined(VIXL_INCLUDE_TARGET_T32)
+#define VIXL_INCLUDE_TARGET_AARCH32
+#elif defined(VIXL_INCLUDE_TARGET_A32)
+#define VIXL_INCLUDE_TARGET_A32_ONLY
+#else
+#define VIXL_INCLUDE_TARGET_T32_ONLY
+#endif
+
+
+#endif // VIXL_GLOBALS_H
diff --git a/js/src/jit/arm64/vixl/Instructions-vixl.cpp b/js/src/jit/arm64/vixl/Instructions-vixl.cpp
new file mode 100644
index 0000000000..dcc0fab05e
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Instructions-vixl.cpp
@@ -0,0 +1,627 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Instructions-vixl.h"
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+
+namespace vixl {
+
+static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
+ uint64_t value,
+ unsigned width) {
+ VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
+ (width == 32));
+ VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+ uint64_t result = value & ((UINT64_C(1) << width) - 1);
+ for (unsigned i = width; i < reg_size; i *= 2) {
+ result |= (result << i);
+ }
+ return result;
+}
+
+
+bool Instruction::IsLoad() const {
+ if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
+ return false;
+ }
+
+ if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) {
+ return Mask(LoadStorePairLBit) != 0;
+ } else {
+ LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreMask));
+ switch (op) {
+ case LDRB_w:
+ case LDRH_w:
+ case LDR_w:
+ case LDR_x:
+ case LDRSB_w:
+ case LDRSB_x:
+ case LDRSH_w:
+ case LDRSH_x:
+ case LDRSW_x:
+ case LDR_b:
+ case LDR_h:
+ case LDR_s:
+ case LDR_d:
+ case LDR_q: return true;
+ default: return false;
+ }
+ }
+}
+
+
+bool Instruction::IsStore() const {
+ if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
+ return false;
+ }
+
+ if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) {
+ return Mask(LoadStorePairLBit) == 0;
+ } else {
+ LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreMask));
+ switch (op) {
+ case STRB_w:
+ case STRH_w:
+ case STR_w:
+ case STR_x:
+ case STR_b:
+ case STR_h:
+ case STR_s:
+ case STR_d:
+ case STR_q: return true;
+ default: return false;
+ }
+ }
+}
+
+
+// Logical immediates can't encode zero, so a return value of zero is used to
+// indicate a failure case. Specifically, where the constraints on imm_s are
+// not met.
+uint64_t Instruction::ImmLogical() const {
+ unsigned reg_size = SixtyFourBits() ? kXRegSize : kWRegSize;
+ int32_t n = BitN();
+ int32_t imm_s = ImmSetBits();
+ int32_t imm_r = ImmRotate();
+
+ // An integer is constructed from the n, imm_s and imm_r bits according to
+ // the following table:
+ //
+ // N imms immr size S R
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
+ // (s bits must not be all set)
+ //
+ // A pattern is constructed of size bits, where the least significant S+1
+ // bits are set. The pattern is rotated right by R, and repeated across a
+ // 32 or 64-bit value, depending on destination register width.
+ //
+
+ if (n == 1) {
+ if (imm_s == 0x3f) {
+ return 0;
+ }
+ uint64_t bits = (UINT64_C(1) << (imm_s + 1)) - 1;
+ return RotateRight(bits, imm_r, 64);
+ } else {
+ if ((imm_s >> 1) == 0x1f) {
+ return 0;
+ }
+ for (int width = 0x20; width >= 0x2; width >>= 1) {
+ if ((imm_s & width) == 0) {
+ int mask = width - 1;
+ if ((imm_s & mask) == mask) {
+ return 0;
+ }
+ uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1;
+ return RepeatBitsAcrossReg(reg_size,
+ RotateRight(bits, imm_r & mask, width),
+ width);
+ }
+ }
+ }
+ VIXL_UNREACHABLE();
+ return 0;
+}
+
+
+uint32_t Instruction::ImmNEONabcdefgh() const {
+ return ImmNEONabc() << 5 | ImmNEONdefgh();
+}
+
+
+float Instruction::Imm8ToFP32(uint32_t imm8) {
+ // Imm8: abcdefgh (8 bits)
+ // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
+ // where B is b ^ 1
+ uint32_t bits = imm8;
+ uint32_t bit7 = (bits >> 7) & 0x1;
+ uint32_t bit6 = (bits >> 6) & 0x1;
+ uint32_t bit5_to_0 = bits & 0x3f;
+ uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19);
+
+ return RawbitsToFloat(result);
+}
+
+
+float Instruction::ImmFP32() const {
+ return Imm8ToFP32(ImmFP());
+}
+
+
+double Instruction::Imm8ToFP64(uint32_t imm8) {
+ // Imm8: abcdefgh (8 bits)
+ // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+ // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
+ // where B is b ^ 1
+ uint32_t bits = imm8;
+ uint64_t bit7 = (bits >> 7) & 0x1;
+ uint64_t bit6 = (bits >> 6) & 0x1;
+ uint64_t bit5_to_0 = bits & 0x3f;
+ uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48);
+
+ return RawbitsToDouble(result);
+}
+
+
+double Instruction::ImmFP64() const {
+ return Imm8ToFP64(ImmFP());
+}
+
+
+float Instruction::ImmNEONFP32() const {
+ return Imm8ToFP32(ImmNEONabcdefgh());
+}
+
+
+double Instruction::ImmNEONFP64() const {
+ return Imm8ToFP64(ImmNEONabcdefgh());
+}
+
+unsigned CalcLSPairDataSize(LoadStorePairOp op) {
+ VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
+ VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
+ switch (op) {
+ case STP_q:
+ case LDP_q: return kQRegSizeInBytesLog2;
+ case STP_x:
+ case LDP_x:
+ case STP_d:
+ case LDP_d: return kXRegSizeInBytesLog2;
+ default: return kWRegSizeInBytesLog2;
+ }
+}
+
+
+int Instruction::ImmBranchRangeBitwidth(ImmBranchType branch_type) {
+ switch (branch_type) {
+ case UncondBranchType:
+ return ImmUncondBranch_width;
+ case CondBranchType:
+ return ImmCondBranch_width;
+ case CompareBranchType:
+ return ImmCmpBranch_width;
+ case TestBranchType:
+ return ImmTestBranch_width;
+ default:
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+}
+
+
+int32_t Instruction::ImmBranchForwardRange(ImmBranchType branch_type) {
+ int32_t encoded_max = 1 << (ImmBranchRangeBitwidth(branch_type) - 1);
+ return encoded_max * kInstructionSize;
+}
+
+
+bool Instruction::IsValidImmPCOffset(ImmBranchType branch_type,
+ int64_t offset) {
+ return IsIntN(ImmBranchRangeBitwidth(branch_type), offset);
+}
+
+ImmBranchRangeType Instruction::ImmBranchTypeToRange(ImmBranchType branch_type)
+{
+ switch (branch_type) {
+ case UncondBranchType:
+ return UncondBranchRangeType;
+ case CondBranchType:
+ case CompareBranchType:
+ return CondBranchRangeType;
+ case TestBranchType:
+ return TestBranchRangeType;
+ default:
+ return UnknownBranchRangeType;
+ }
+}
+
+int32_t Instruction::ImmBranchMaxForwardOffset(ImmBranchRangeType range_type)
+{
+ // Branches encode a pc-relative two's complement number of 32-bit
+ // instructions. Compute the number of bytes corresponding to the largest
+ // positive number of instructions that can be encoded.
+ switch(range_type) {
+ case TestBranchRangeType:
+ return ((1 << ImmTestBranch_width) - 1) / 2 * kInstructionSize;
+ case CondBranchRangeType:
+ return ((1 << ImmCondBranch_width) - 1) / 2 * kInstructionSize;
+ case UncondBranchRangeType:
+ return ((1 << ImmUncondBranch_width) - 1) / 2 * kInstructionSize;
+ default:
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+}
+
+int32_t Instruction::ImmBranchMinBackwardOffset(ImmBranchRangeType range_type)
+{
+ switch(range_type) {
+ case TestBranchRangeType:
+ return -int32_t(1 << ImmTestBranch_width) / int32_t(2 * kInstructionSize);
+ case CondBranchRangeType:
+ return -int32_t(1 << ImmCondBranch_width) / int32_t(2 * kInstructionSize);
+ case UncondBranchRangeType:
+ return -int32_t(1 << ImmUncondBranch_width) / int32_t(2 * kInstructionSize);
+ default:
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+}
+
+const Instruction* Instruction::ImmPCOffsetTarget() const {
+ const Instruction * base = this;
+ ptrdiff_t offset;
+ if (IsPCRelAddressing()) {
+ // ADR and ADRP.
+ offset = ImmPCRel();
+ if (Mask(PCRelAddressingMask) == ADRP) {
+ base = AlignDown(base, kPageSize);
+ offset *= kPageSize;
+ } else {
+ VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR);
+ }
+ } else {
+ // All PC-relative branches.
+ VIXL_ASSERT(BranchType() != UnknownBranchType);
+ // Relative branch offsets are instruction-size-aligned.
+ offset = ImmBranch() << kInstructionSizeLog2;
+ }
+ return base + offset;
+}
+
+
+int Instruction::ImmBranch() const {
+ switch (BranchType()) {
+ case CondBranchType: return ImmCondBranch();
+ case UncondBranchType: return ImmUncondBranch();
+ case CompareBranchType: return ImmCmpBranch();
+ case TestBranchType: return ImmTestBranch();
+ default: VIXL_UNREACHABLE();
+ }
+ return 0;
+}
+
+
+void Instruction::SetImmPCOffsetTarget(const Instruction* target) {
+ if (IsPCRelAddressing()) {
+ SetPCRelImmTarget(target);
+ } else {
+ SetBranchImmTarget(target);
+ }
+}
+
+
+void Instruction::SetPCRelImmTarget(const Instruction* target) {
+ ptrdiff_t imm21;
+ if ((Mask(PCRelAddressingMask) == ADR)) {
+ imm21 = target - this;
+ } else {
+ VIXL_ASSERT(Mask(PCRelAddressingMask) == ADRP);
+ uintptr_t this_page = reinterpret_cast<uintptr_t>(this) / kPageSize;
+ uintptr_t target_page = reinterpret_cast<uintptr_t>(target) / kPageSize;
+ imm21 = target_page - this_page;
+ }
+ Instr imm = Assembler::ImmPCRelAddress(static_cast<int32_t>(imm21));
+
+ SetInstructionBits(Mask(~ImmPCRel_mask) | imm);
+}
+
+
+void Instruction::SetBranchImmTarget(const Instruction* target) {
+ VIXL_ASSERT(((target - this) & 3) == 0);
+ Instr branch_imm = 0;
+ uint32_t imm_mask = 0;
+ int offset = static_cast<int>((target - this) >> kInstructionSizeLog2);
+ switch (BranchType()) {
+ case CondBranchType: {
+ branch_imm = Assembler::ImmCondBranch(offset);
+ imm_mask = ImmCondBranch_mask;
+ break;
+ }
+ case UncondBranchType: {
+ branch_imm = Assembler::ImmUncondBranch(offset);
+ imm_mask = ImmUncondBranch_mask;
+ break;
+ }
+ case CompareBranchType: {
+ branch_imm = Assembler::ImmCmpBranch(offset);
+ imm_mask = ImmCmpBranch_mask;
+ break;
+ }
+ case TestBranchType: {
+ branch_imm = Assembler::ImmTestBranch(offset);
+ imm_mask = ImmTestBranch_mask;
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+ SetInstructionBits(Mask(~imm_mask) | branch_imm);
+}
+
+
+void Instruction::SetImmLLiteral(const Instruction* source) {
+ VIXL_ASSERT(IsWordAligned(source));
+ ptrdiff_t offset = (source - this) >> kLiteralEntrySizeLog2;
+ Instr imm = Assembler::ImmLLiteral(static_cast<int>(offset));
+ Instr mask = ImmLLiteral_mask;
+
+ SetInstructionBits(Mask(~mask) | imm);
+}
+
+
+VectorFormat VectorFormatHalfWidth(const VectorFormat vform) {
+ VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D ||
+ vform == kFormatH || vform == kFormatS || vform == kFormatD);
+ switch (vform) {
+ case kFormat8H: return kFormat8B;
+ case kFormat4S: return kFormat4H;
+ case kFormat2D: return kFormat2S;
+ case kFormatH: return kFormatB;
+ case kFormatS: return kFormatH;
+ case kFormatD: return kFormatS;
+ default: VIXL_UNREACHABLE(); return kFormatUndefined;
+ }
+}
+
+
+VectorFormat VectorFormatDoubleWidth(const VectorFormat vform) {
+ VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S ||
+ vform == kFormatB || vform == kFormatH || vform == kFormatS);
+ switch (vform) {
+ case kFormat8B: return kFormat8H;
+ case kFormat4H: return kFormat4S;
+ case kFormat2S: return kFormat2D;
+ case kFormatB: return kFormatH;
+ case kFormatH: return kFormatS;
+ case kFormatS: return kFormatD;
+ default: VIXL_UNREACHABLE(); return kFormatUndefined;
+ }
+}
+
+
+VectorFormat VectorFormatFillQ(const VectorFormat vform) {
+ switch (vform) {
+ case kFormatB:
+ case kFormat8B:
+ case kFormat16B: return kFormat16B;
+ case kFormatH:
+ case kFormat4H:
+ case kFormat8H: return kFormat8H;
+ case kFormatS:
+ case kFormat2S:
+ case kFormat4S: return kFormat4S;
+ case kFormatD:
+ case kFormat1D:
+ case kFormat2D: return kFormat2D;
+ default: VIXL_UNREACHABLE(); return kFormatUndefined;
+ }
+}
+
+VectorFormat VectorFormatHalfWidthDoubleLanes(const VectorFormat vform) {
+ switch (vform) {
+ case kFormat4H: return kFormat8B;
+ case kFormat8H: return kFormat16B;
+ case kFormat2S: return kFormat4H;
+ case kFormat4S: return kFormat8H;
+ case kFormat1D: return kFormat2S;
+ case kFormat2D: return kFormat4S;
+ default: VIXL_UNREACHABLE(); return kFormatUndefined;
+ }
+}
+
+VectorFormat VectorFormatDoubleLanes(const VectorFormat vform) {
+ VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S);
+ switch (vform) {
+ case kFormat8B: return kFormat16B;
+ case kFormat4H: return kFormat8H;
+ case kFormat2S: return kFormat4S;
+ default: VIXL_UNREACHABLE(); return kFormatUndefined;
+ }
+}
+
+
+VectorFormat VectorFormatHalfLanes(const VectorFormat vform) {
+ VIXL_ASSERT(vform == kFormat16B || vform == kFormat8H || vform == kFormat4S);
+ switch (vform) {
+ case kFormat16B: return kFormat8B;
+ case kFormat8H: return kFormat4H;
+ case kFormat4S: return kFormat2S;
+ default: VIXL_UNREACHABLE(); return kFormatUndefined;
+ }
+}
+
+
+VectorFormat ScalarFormatFromLaneSize(int laneSize) {
+ switch (laneSize) {
+ case 8: return kFormatB;
+ case 16: return kFormatH;
+ case 32: return kFormatS;
+ case 64: return kFormatD;
+ default: VIXL_UNREACHABLE(); return kFormatUndefined;
+ }
+}
+
+
+unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
+ VIXL_ASSERT(vform != kFormatUndefined);
+ switch (vform) {
+ case kFormatB: return kBRegSize;
+ case kFormatH: return kHRegSize;
+ case kFormatS: return kSRegSize;
+ case kFormatD: return kDRegSize;
+ case kFormat8B:
+ case kFormat4H:
+ case kFormat2S:
+ case kFormat1D: return kDRegSize;
+ default: return kQRegSize;
+ }
+}
+
+
+unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) {
+ return RegisterSizeInBitsFromFormat(vform) / 8;
+}
+
+
+unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
+ VIXL_ASSERT(vform != kFormatUndefined);
+ switch (vform) {
+ case kFormatB:
+ case kFormat8B:
+ case kFormat16B: return 8;
+ case kFormatH:
+ case kFormat4H:
+ case kFormat8H: return 16;
+ case kFormatS:
+ case kFormat2S:
+ case kFormat4S: return 32;
+ case kFormatD:
+ case kFormat1D:
+ case kFormat2D: return 64;
+ default: VIXL_UNREACHABLE(); return 0;
+ }
+}
+
+
+int LaneSizeInBytesFromFormat(VectorFormat vform) {
+ return LaneSizeInBitsFromFormat(vform) / 8;
+}
+
+
+int LaneSizeInBytesLog2FromFormat(VectorFormat vform) {
+ VIXL_ASSERT(vform != kFormatUndefined);
+ switch (vform) {
+ case kFormatB:
+ case kFormat8B:
+ case kFormat16B: return 0;
+ case kFormatH:
+ case kFormat4H:
+ case kFormat8H: return 1;
+ case kFormatS:
+ case kFormat2S:
+ case kFormat4S: return 2;
+ case kFormatD:
+ case kFormat1D:
+ case kFormat2D: return 3;
+ default: VIXL_UNREACHABLE(); return 0;
+ }
+}
+
+
+int LaneCountFromFormat(VectorFormat vform) {
+ VIXL_ASSERT(vform != kFormatUndefined);
+ switch (vform) {
+ case kFormat16B: return 16;
+ case kFormat8B:
+ case kFormat8H: return 8;
+ case kFormat4H:
+ case kFormat4S: return 4;
+ case kFormat2S:
+ case kFormat2D: return 2;
+ case kFormat1D:
+ case kFormatB:
+ case kFormatH:
+ case kFormatS:
+ case kFormatD: return 1;
+ default: VIXL_UNREACHABLE(); return 0;
+ }
+}
+
+
+int MaxLaneCountFromFormat(VectorFormat vform) {
+ VIXL_ASSERT(vform != kFormatUndefined);
+ switch (vform) {
+ case kFormatB:
+ case kFormat8B:
+ case kFormat16B: return 16;
+ case kFormatH:
+ case kFormat4H:
+ case kFormat8H: return 8;
+ case kFormatS:
+ case kFormat2S:
+ case kFormat4S: return 4;
+ case kFormatD:
+ case kFormat1D:
+ case kFormat2D: return 2;
+ default: VIXL_UNREACHABLE(); return 0;
+ }
+}
+
+
+// Does 'vform' indicate a vector format or a scalar format?
+bool IsVectorFormat(VectorFormat vform) {
+ VIXL_ASSERT(vform != kFormatUndefined);
+ switch (vform) {
+ case kFormatB:
+ case kFormatH:
+ case kFormatS:
+ case kFormatD: return false;
+ default: return true;
+ }
+}
+
+
+int64_t MaxIntFromFormat(VectorFormat vform) {
+ return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+}
+
+
+int64_t MinIntFromFormat(VectorFormat vform) {
+ return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform));
+}
+
+
+uint64_t MaxUintFromFormat(VectorFormat vform) {
+ return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+}
+} // namespace vixl
+
diff --git a/js/src/jit/arm64/vixl/Instructions-vixl.h b/js/src/jit/arm64/vixl/Instructions-vixl.h
new file mode 100644
index 0000000000..4bcddf642a
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Instructions-vixl.h
@@ -0,0 +1,817 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_INSTRUCTIONS_A64_H_
+#define VIXL_A64_INSTRUCTIONS_A64_H_
+
+#include "jit/arm64/vixl/Constants-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+namespace vixl {
+// ISA constants. --------------------------------------------------------------
+
+typedef uint32_t Instr;
+const unsigned kInstructionSize = 4;
+const unsigned kInstructionSizeLog2 = 2;
+const unsigned kLiteralEntrySize = 4;
+const unsigned kLiteralEntrySizeLog2 = 2;
+const unsigned kMaxLoadLiteralRange = 1 * MBytes;
+
+// This is the nominal page size (as used by the adrp instruction); the actual
+// size of the memory pages allocated by the kernel is likely to differ.
+const unsigned kPageSize = 4 * KBytes;
+const unsigned kPageSizeLog2 = 12;
+
+const unsigned kBRegSize = 8;
+const unsigned kBRegSizeLog2 = 3;
+const unsigned kBRegSizeInBytes = kBRegSize / 8;
+const unsigned kBRegSizeInBytesLog2 = kBRegSizeLog2 - 3;
+const unsigned kHRegSize = 16;
+const unsigned kHRegSizeLog2 = 4;
+const unsigned kHRegSizeInBytes = kHRegSize / 8;
+const unsigned kHRegSizeInBytesLog2 = kHRegSizeLog2 - 3;
+const unsigned kWRegSize = 32;
+const unsigned kWRegSizeLog2 = 5;
+const unsigned kWRegSizeInBytes = kWRegSize / 8;
+const unsigned kWRegSizeInBytesLog2 = kWRegSizeLog2 - 3;
+const unsigned kXRegSize = 64;
+const unsigned kXRegSizeLog2 = 6;
+const unsigned kXRegSizeInBytes = kXRegSize / 8;
+const unsigned kXRegSizeInBytesLog2 = kXRegSizeLog2 - 3;
+const unsigned kSRegSize = 32;
+const unsigned kSRegSizeLog2 = 5;
+const unsigned kSRegSizeInBytes = kSRegSize / 8;
+const unsigned kSRegSizeInBytesLog2 = kSRegSizeLog2 - 3;
+const unsigned kDRegSize = 64;
+const unsigned kDRegSizeLog2 = 6;
+const unsigned kDRegSizeInBytes = kDRegSize / 8;
+const unsigned kDRegSizeInBytesLog2 = kDRegSizeLog2 - 3;
+const unsigned kQRegSize = 128;
+const unsigned kQRegSizeLog2 = 7;
+const unsigned kQRegSizeInBytes = kQRegSize / 8;
+const unsigned kQRegSizeInBytesLog2 = kQRegSizeLog2 - 3;
+const uint64_t kWRegMask = UINT64_C(0xffffffff);
+const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kSRegMask = UINT64_C(0xffffffff);
+const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kSSignMask = UINT64_C(0x80000000);
+const uint64_t kDSignMask = UINT64_C(0x8000000000000000);
+const uint64_t kWSignMask = UINT64_C(0x80000000);
+const uint64_t kXSignMask = UINT64_C(0x8000000000000000);
+const uint64_t kByteMask = UINT64_C(0xff);
+const uint64_t kHalfWordMask = UINT64_C(0xffff);
+const uint64_t kWordMask = UINT64_C(0xffffffff);
+const uint64_t kXMaxUInt = UINT64_C(0xffffffffffffffff);
+const uint64_t kXMaxExactUInt = UINT64_C(0xfffffffffffff800);
+const uint64_t kWMaxUInt = UINT64_C(0xffffffff);
+const int64_t kXMaxInt = INT64_C(0x7fffffffffffffff);
+const int64_t kXMaxExactInt = UINT64_C(0x7ffffffffffffc00);
+const int64_t kXMinInt = INT64_C(0x8000000000000000);
+const int32_t kWMaxInt = INT32_C(0x7fffffff);
+const int32_t kWMinInt = INT32_C(0x80000000);
+const unsigned kLinkRegCode = 30;
+const unsigned kZeroRegCode = 31;
+const unsigned kSPRegInternalCode = 63;
+const unsigned kRegCodeMask = 0x1f;
+
+const unsigned kAddressTagOffset = 56;
+const unsigned kAddressTagWidth = 8;
+const uint64_t kAddressTagMask =
+ ((UINT64_C(1) << kAddressTagWidth) - 1) << kAddressTagOffset;
+VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));
+
+static inline unsigned CalcLSDataSize(LoadStoreOp op) {
+ VIXL_ASSERT((LSSize_offset + LSSize_width) == (kInstructionSize * 8));
+ unsigned size = static_cast<Instr>(op) >> LSSize_offset;
+ if ((op & LSVector_mask) != 0) {
+ // Vector register memory operations encode the access size in the "size"
+ // and "opc" fields.
+ if ((size == 0) && ((op & LSOpc_mask) >> LSOpc_offset) >= 2) {
+ size = kQRegSizeInBytesLog2;
+ }
+ }
+ return size;
+}
+
+unsigned CalcLSPairDataSize(LoadStorePairOp op);
+
+enum ImmBranchType {
+ UnknownBranchType = 0,
+ CondBranchType = 1,
+ UncondBranchType = 2,
+ CompareBranchType = 3,
+ TestBranchType = 4
+};
+
+// The classes of immediate branch ranges, in order of increasing range.
+// Note that CondBranchType and CompareBranchType have the same range.
+enum ImmBranchRangeType {
+ TestBranchRangeType, // tbz/tbnz: imm14 = +/- 32KB.
+ CondBranchRangeType, // b.cond/cbz/cbnz: imm19 = +/- 1MB.
+ UncondBranchRangeType, // b/bl: imm26 = +/- 128MB.
+ UnknownBranchRangeType,
+
+ // Number of 'short-range' branch range types.
+ // We don't consider unconditional branches 'short-range'.
+ NumShortBranchRangeTypes = UncondBranchRangeType
+};
+
+enum AddrMode {
+ Offset,
+ PreIndex,
+ PostIndex
+};
+
+enum Reg31Mode {
+ Reg31IsStackPointer,
+ Reg31IsZeroRegister
+};
+
+// Instructions. ---------------------------------------------------------------
+
+class Instruction {
+ public:
+ Instr InstructionBits() const {
+ return *(reinterpret_cast<const Instr*>(this));
+ }
+
+ void SetInstructionBits(Instr new_instr) {
+ *(reinterpret_cast<Instr*>(this)) = new_instr;
+ }
+
+ int Bit(int pos) const {
+ return (InstructionBits() >> pos) & 1;
+ }
+
+ uint32_t Bits(int msb, int lsb) const {
+ return ExtractUnsignedBitfield32(msb, lsb, InstructionBits());
+ }
+
+ int32_t SignedBits(int msb, int lsb) const {
+ int32_t bits = *(reinterpret_cast<const int32_t*>(this));
+ return ExtractSignedBitfield32(msb, lsb, bits);
+ }
+
+ Instr Mask(uint32_t mask) const {
+ return InstructionBits() & mask;
+ }
+
+ #define DEFINE_GETTER(Name, HighBit, LowBit, Func) \
+ int32_t Name() const { return Func(HighBit, LowBit); }
+ INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
+ #undef DEFINE_GETTER
+
+ #define DEFINE_SETTER(Name, HighBit, LowBit, Func) \
+ inline void Set##Name(unsigned n) { SetBits32(HighBit, LowBit, n); }
+ INSTRUCTION_FIELDS_LIST(DEFINE_SETTER)
+ #undef DEFINE_SETTER
+
+ // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST),
+ // formed from ImmPCRelLo and ImmPCRelHi.
+ int ImmPCRel() const {
+ int offset =
+ static_cast<int>((ImmPCRelHi() << ImmPCRelLo_width) | ImmPCRelLo());
+ int width = ImmPCRelLo_width + ImmPCRelHi_width;
+ return ExtractSignedBitfield32(width - 1, 0, offset);
+ }
+
+ uint64_t ImmLogical() const;
+ unsigned ImmNEONabcdefgh() const;
+ float ImmFP32() const;
+ double ImmFP64() const;
+ float ImmNEONFP32() const;
+ double ImmNEONFP64() const;
+
+ unsigned SizeLS() const {
+ return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask)));
+ }
+
+ unsigned SizeLSPair() const {
+ return CalcLSPairDataSize(
+ static_cast<LoadStorePairOp>(Mask(LoadStorePairMask)));
+ }
+
+ int NEONLSIndex(int access_size_shift) const {
+ int64_t q = NEONQ();
+ int64_t s = NEONS();
+ int64_t size = NEONLSSize();
+ int64_t index = (q << 3) | (s << 2) | size;
+ return static_cast<int>(index >> access_size_shift);
+ }
+
+ // Helpers.
+ bool IsCondBranchImm() const {
+ return Mask(ConditionalBranchFMask) == ConditionalBranchFixed;
+ }
+
+ bool IsUncondBranchImm() const {
+ return Mask(UnconditionalBranchFMask) == UnconditionalBranchFixed;
+ }
+
+ bool IsCompareBranch() const {
+ return Mask(CompareBranchFMask) == CompareBranchFixed;
+ }
+
+ bool IsTestBranch() const {
+ return Mask(TestBranchFMask) == TestBranchFixed;
+ }
+
+ bool IsImmBranch() const {
+ return BranchType() != UnknownBranchType;
+ }
+
+ bool IsPCRelAddressing() const {
+ return Mask(PCRelAddressingFMask) == PCRelAddressingFixed;
+ }
+
+ bool IsLogicalImmediate() const {
+ return Mask(LogicalImmediateFMask) == LogicalImmediateFixed;
+ }
+
+ bool IsAddSubImmediate() const {
+ return Mask(AddSubImmediateFMask) == AddSubImmediateFixed;
+ }
+
+ bool IsAddSubExtended() const {
+ return Mask(AddSubExtendedFMask) == AddSubExtendedFixed;
+ }
+
+ bool IsLoadOrStore() const {
+ return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
+ }
+
+ bool IsLoad() const;
+ bool IsStore() const;
+
+ bool IsLoadLiteral() const {
+ // This includes PRFM_lit.
+ return Mask(LoadLiteralFMask) == LoadLiteralFixed;
+ }
+
+ bool IsMovn() const {
+ return (Mask(MoveWideImmediateMask) == MOVN_x) ||
+ (Mask(MoveWideImmediateMask) == MOVN_w);
+ }
+
+ // Mozilla modifications.
+ bool IsUncondB() const;
+ bool IsCondB() const;
+ bool IsBL() const;
+ bool IsBR() const;
+ bool IsBLR() const;
+ bool IsTBZ() const;
+ bool IsTBNZ() const;
+ bool IsCBZ() const;
+ bool IsCBNZ() const;
+ bool IsLDR() const;
+ bool IsNOP() const;
+ bool IsCSDB() const;
+ bool IsADR() const;
+ bool IsADRP() const;
+ bool IsMovz() const;
+ bool IsMovk() const;
+ bool IsBranchLinkImm() const;
+ bool IsTargetReachable(const Instruction* target) const;
+ ptrdiff_t ImmPCRawOffset() const;
+ void SetImmPCRawOffset(ptrdiff_t offset);
+ void SetBits32(int msb, int lsb, unsigned value);
+
+ // Is this a stack pointer synchronization instruction as inserted by
+ // MacroAssembler::syncStackPtr()?
+ bool IsStackPtrSync() const;
+
+ static int ImmBranchRangeBitwidth(ImmBranchType branch_type);
+ static int32_t ImmBranchForwardRange(ImmBranchType branch_type);
+
+ // Check if offset can be encoded as a RAW offset in a branch_type
+ // instruction. The offset must be encodeable directly as the immediate field
+ // in the instruction, it is not scaled by kInstructionSize first.
+ static bool IsValidImmPCOffset(ImmBranchType branch_type, int64_t offset);
+
+ // Get the range type corresponding to a branch type.
+ static ImmBranchRangeType ImmBranchTypeToRange(ImmBranchType);
+
+ // Get the maximum realizable forward PC offset (in bytes) for an immediate
+ // branch of the given range type.
+ // This is the largest positive multiple of kInstructionSize, offset, such
+ // that:
+ //
+ // IsValidImmPCOffset(xxx, offset / kInstructionSize)
+ //
+ // returns true for the same branch type.
+ static int32_t ImmBranchMaxForwardOffset(ImmBranchRangeType range_type);
+
+ // Get the minimuum realizable backward PC offset (in bytes) for an immediate
+ // branch of the given range type.
+ // This is the smallest (i.e., largest in magnitude) negative multiple of
+ // kInstructionSize, offset, such that:
+ //
+ // IsValidImmPCOffset(xxx, offset / kInstructionSize)
+ //
+ // returns true for the same branch type.
+ static int32_t ImmBranchMinBackwardOffset(ImmBranchRangeType range_type);
+
+ // Indicate whether Rd can be the stack pointer or the zero register. This
+ // does not check that the instruction actually has an Rd field.
+ Reg31Mode RdMode() const {
+ // The following instructions use sp or wsp as Rd:
+ // Add/sub (immediate) when not setting the flags.
+ // Add/sub (extended) when not setting the flags.
+ // Logical (immediate) when not setting the flags.
+ // Otherwise, r31 is the zero register.
+ if (IsAddSubImmediate() || IsAddSubExtended()) {
+ if (Mask(AddSubSetFlagsBit)) {
+ return Reg31IsZeroRegister;
+ } else {
+ return Reg31IsStackPointer;
+ }
+ }
+ if (IsLogicalImmediate()) {
+ // Of the logical (immediate) instructions, only ANDS (and its aliases)
+ // can set the flags. The others can all write into sp.
+ // Note that some logical operations are not available to
+ // immediate-operand instructions, so we have to combine two masks here.
+ if (Mask(LogicalImmediateMask & LogicalOpMask) == ANDS) {
+ return Reg31IsZeroRegister;
+ } else {
+ return Reg31IsStackPointer;
+ }
+ }
+ return Reg31IsZeroRegister;
+ }
+
+ // Indicate whether Rn can be the stack pointer or the zero register. This
+ // does not check that the instruction actually has an Rn field.
+ Reg31Mode RnMode() const {
+ // The following instructions use sp or wsp as Rn:
+ // All loads and stores.
+ // Add/sub (immediate).
+ // Add/sub (extended).
+ // Otherwise, r31 is the zero register.
+ if (IsLoadOrStore() || IsAddSubImmediate() || IsAddSubExtended()) {
+ return Reg31IsStackPointer;
+ }
+ return Reg31IsZeroRegister;
+ }
+
+ ImmBranchType BranchType() const {
+ if (IsCondBranchImm()) {
+ return CondBranchType;
+ } else if (IsUncondBranchImm()) {
+ return UncondBranchType;
+ } else if (IsCompareBranch()) {
+ return CompareBranchType;
+ } else if (IsTestBranch()) {
+ return TestBranchType;
+ } else {
+ return UnknownBranchType;
+ }
+ }
+
+ // Find the target of this instruction. 'this' may be a branch or a
+ // PC-relative addressing instruction.
+ const Instruction* ImmPCOffsetTarget() const;
+
+ // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or
+ // a PC-relative addressing instruction.
+ void SetImmPCOffsetTarget(const Instruction* target);
+ // Patch a literal load instruction to load from 'source'.
+ void SetImmLLiteral(const Instruction* source);
+
+ // The range of a load literal instruction, expressed as 'instr +- range'.
+ // The range is actually the 'positive' range; the branch instruction can
+ // target [instr - range - kInstructionSize, instr + range].
+ static const int kLoadLiteralImmBitwidth = 19;
+ static const int kLoadLiteralRange =
+ (1 << kLoadLiteralImmBitwidth) / 2 - kInstructionSize;
+
+ // Calculate the address of a literal referred to by a load-literal
+ // instruction, and return it as the specified type.
+ //
+ // The literal itself is safely mutable only if the backing buffer is safely
+ // mutable.
+ template <typename T>
+ T LiteralAddress() const {
+ uint64_t base_raw = reinterpret_cast<uint64_t>(this);
+ int64_t offset = ImmLLiteral() << kLiteralEntrySizeLog2;
+ uint64_t address_raw = base_raw + offset;
+
+ // Cast the address using a C-style cast. A reinterpret_cast would be
+ // appropriate, but it can't cast one integral type to another.
+ T address = (T)(address_raw);
+
+ // Assert that the address can be represented by the specified type.
+ VIXL_ASSERT((uint64_t)(address) == address_raw);
+
+ return address;
+ }
+
+ uint32_t Literal32() const {
+ uint32_t literal;
+ memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal));
+ return literal;
+ }
+
+ uint64_t Literal64() const {
+ uint64_t literal;
+ memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal));
+ return literal;
+ }
+
+ void SetLiteral64(uint64_t literal) const {
+ memcpy(LiteralAddress<void*>(), &literal, sizeof(literal));
+ }
+
+ float LiteralFP32() const {
+ return RawbitsToFloat(Literal32());
+ }
+
+ double LiteralFP64() const {
+ return RawbitsToDouble(Literal64());
+ }
+
+ const Instruction* NextInstruction() const {
+ return this + kInstructionSize;
+ }
+
+ // Skip any constant pools with artificial guards at this point.
+ // Return either |this| or the first instruction after the pool.
+ const Instruction* skipPool() const;
+
+ const Instruction* InstructionAtOffset(int64_t offset) const {
+ VIXL_ASSERT(IsWordAligned(this + offset));
+ return this + offset;
+ }
+
+ template<typename T> static Instruction* Cast(T src) {
+ return reinterpret_cast<Instruction*>(src);
+ }
+
+ template<typename T> static const Instruction* CastConst(T src) {
+ return reinterpret_cast<const Instruction*>(src);
+ }
+
+ private:
+ int ImmBranch() const;
+
+ static float Imm8ToFP32(uint32_t imm8);
+ static double Imm8ToFP64(uint32_t imm8);
+
+ void SetPCRelImmTarget(const Instruction* target);
+ void SetBranchImmTarget(const Instruction* target);
+};
+
+
+// Functions for handling NEON vector format information.
+enum VectorFormat {
+ kFormatUndefined = 0xffffffff,
+ kFormat8B = NEON_8B,
+ kFormat16B = NEON_16B,
+ kFormat4H = NEON_4H,
+ kFormat8H = NEON_8H,
+ kFormat2S = NEON_2S,
+ kFormat4S = NEON_4S,
+ kFormat1D = NEON_1D,
+ kFormat2D = NEON_2D,
+
+ // Scalar formats. We add the scalar bit to distinguish between scalar and
+ // vector enumerations; the bit is always set in the encoding of scalar ops
+ // and always clear for vector ops. Although kFormatD and kFormat1D appear
+ // to be the same, their meaning is subtly different. The first is a scalar
+ // operation, the second a vector operation that only affects one lane.
+ kFormatB = NEON_B | NEONScalar,
+ kFormatH = NEON_H | NEONScalar,
+ kFormatS = NEON_S | NEONScalar,
+ kFormatD = NEON_D | NEONScalar
+};
+
+VectorFormat VectorFormatHalfWidth(const VectorFormat vform);
+VectorFormat VectorFormatDoubleWidth(const VectorFormat vform);
+VectorFormat VectorFormatDoubleLanes(const VectorFormat vform);
+VectorFormat VectorFormatHalfLanes(const VectorFormat vform);
+VectorFormat ScalarFormatFromLaneSize(int lanesize);
+VectorFormat VectorFormatHalfWidthDoubleLanes(const VectorFormat vform);
+VectorFormat VectorFormatFillQ(const VectorFormat vform);
+unsigned RegisterSizeInBitsFromFormat(VectorFormat vform);
+unsigned RegisterSizeInBytesFromFormat(VectorFormat vform);
+// TODO: Make the return types of these functions consistent.
+unsigned LaneSizeInBitsFromFormat(VectorFormat vform);
+int LaneSizeInBytesFromFormat(VectorFormat vform);
+int LaneSizeInBytesLog2FromFormat(VectorFormat vform);
+int LaneCountFromFormat(VectorFormat vform);
+int MaxLaneCountFromFormat(VectorFormat vform);
+bool IsVectorFormat(VectorFormat vform);
+int64_t MaxIntFromFormat(VectorFormat vform);
+int64_t MinIntFromFormat(VectorFormat vform);
+uint64_t MaxUintFromFormat(VectorFormat vform);
+
+
+enum NEONFormat {
+ NF_UNDEF = 0,
+ NF_8B = 1,
+ NF_16B = 2,
+ NF_4H = 3,
+ NF_8H = 4,
+ NF_2S = 5,
+ NF_4S = 6,
+ NF_1D = 7,
+ NF_2D = 8,
+ NF_B = 9,
+ NF_H = 10,
+ NF_S = 11,
+ NF_D = 12
+};
+
+static const unsigned kNEONFormatMaxBits = 6;
+
+struct NEONFormatMap {
+ // The bit positions in the instruction to consider.
+ uint8_t bits[kNEONFormatMaxBits];
+
+ // Mapping from concatenated bits to format.
+ NEONFormat map[1 << kNEONFormatMaxBits];
+};
+
+class NEONFormatDecoder {
+ public:
+ enum SubstitutionMode {
+ kPlaceholder,
+ kFormat
+ };
+
+ // Construct a format decoder with increasingly specific format maps for each
+ // subsitution. If no format map is specified, the default is the integer
+ // format map.
+ explicit NEONFormatDecoder(const Instruction* instr) {
+ instrbits_ = instr->InstructionBits();
+ SetFormatMaps(IntegerFormatMap());
+ }
+ NEONFormatDecoder(const Instruction* instr,
+ const NEONFormatMap* format) {
+ instrbits_ = instr->InstructionBits();
+ SetFormatMaps(format);
+ }
+ NEONFormatDecoder(const Instruction* instr,
+ const NEONFormatMap* format0,
+ const NEONFormatMap* format1) {
+ instrbits_ = instr->InstructionBits();
+ SetFormatMaps(format0, format1);
+ }
+ NEONFormatDecoder(const Instruction* instr,
+ const NEONFormatMap* format0,
+ const NEONFormatMap* format1,
+ const NEONFormatMap* format2) {
+ instrbits_ = instr->InstructionBits();
+ SetFormatMaps(format0, format1, format2);
+ }
+
+ // Set the format mapping for all or individual substitutions.
+ void SetFormatMaps(const NEONFormatMap* format0,
+ const NEONFormatMap* format1 = NULL,
+ const NEONFormatMap* format2 = NULL) {
+ VIXL_ASSERT(format0 != NULL);
+ formats_[0] = format0;
+ formats_[1] = (format1 == NULL) ? formats_[0] : format1;
+ formats_[2] = (format2 == NULL) ? formats_[1] : format2;
+ }
+ void SetFormatMap(unsigned index, const NEONFormatMap* format) {
+ VIXL_ASSERT(index <= (sizeof(formats_) / sizeof(formats_[0])));
+ VIXL_ASSERT(format != NULL);
+ formats_[index] = format;
+ }
+
+ // Substitute %s in the input string with the placeholder string for each
+ // register, ie. "'B", "'H", etc.
+ const char* SubstitutePlaceholders(const char* string) {
+ return Substitute(string, kPlaceholder, kPlaceholder, kPlaceholder);
+ }
+
+ // Substitute %s in the input string with a new string based on the
+ // substitution mode.
+ const char* Substitute(const char* string,
+ SubstitutionMode mode0 = kFormat,
+ SubstitutionMode mode1 = kFormat,
+ SubstitutionMode mode2 = kFormat) {
+ snprintf(form_buffer_, sizeof(form_buffer_), string,
+ GetSubstitute(0, mode0),
+ GetSubstitute(1, mode1),
+ GetSubstitute(2, mode2));
+ return form_buffer_;
+ }
+
+ // Append a "2" to a mnemonic string based of the state of the Q bit.
+ const char* Mnemonic(const char* mnemonic) {
+ if ((instrbits_ & NEON_Q) != 0) {
+ snprintf(mne_buffer_, sizeof(mne_buffer_), "%s2", mnemonic);
+ return mne_buffer_;
+ }
+ return mnemonic;
+ }
+
+ VectorFormat GetVectorFormat(int format_index = 0) {
+ return GetVectorFormat(formats_[format_index]);
+ }
+
+ VectorFormat GetVectorFormat(const NEONFormatMap* format_map) {
+ static const VectorFormat vform[] = {
+ kFormatUndefined,
+ kFormat8B, kFormat16B, kFormat4H, kFormat8H,
+ kFormat2S, kFormat4S, kFormat1D, kFormat2D,
+ kFormatB, kFormatH, kFormatS, kFormatD
+ };
+ VIXL_ASSERT(GetNEONFormat(format_map) < (sizeof(vform) / sizeof(vform[0])));
+ return vform[GetNEONFormat(format_map)];
+ }
+
+ // Built in mappings for common cases.
+
+ // The integer format map uses three bits (Q, size<1:0>) to encode the
+ // "standard" set of NEON integer vector formats.
+ static const NEONFormatMap* IntegerFormatMap() {
+ static const NEONFormatMap map = {
+ {23, 22, 30},
+ {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_2D}
+ };
+ return &map;
+ }
+
+ // The long integer format map uses two bits (size<1:0>) to encode the
+ // long set of NEON integer vector formats. These are used in narrow, wide
+ // and long operations.
+ static const NEONFormatMap* LongIntegerFormatMap() {
+ static const NEONFormatMap map = {
+ {23, 22}, {NF_8H, NF_4S, NF_2D}
+ };
+ return &map;
+ }
+
+ // The FP format map uses two bits (Q, size<0>) to encode the NEON FP vector
+ // formats: NF_2S, NF_4S, NF_2D.
+ static const NEONFormatMap* FPFormatMap() {
+ // The FP format map assumes two bits (Q, size<0>) are used to encode the
+ // NEON FP vector formats: NF_2S, NF_4S, NF_2D.
+ static const NEONFormatMap map = {
+ {22, 30}, {NF_2S, NF_4S, NF_UNDEF, NF_2D}
+ };
+ return &map;
+ }
+
+ // The load/store format map uses three bits (Q, 11, 10) to encode the
+ // set of NEON vector formats.
+ static const NEONFormatMap* LoadStoreFormatMap() {
+ static const NEONFormatMap map = {
+ {11, 10, 30},
+ {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}
+ };
+ return &map;
+ }
+
+ // The logical format map uses one bit (Q) to encode the NEON vector format:
+ // NF_8B, NF_16B.
+ static const NEONFormatMap* LogicalFormatMap() {
+ static const NEONFormatMap map = {
+ {30}, {NF_8B, NF_16B}
+ };
+ return &map;
+ }
+
+ // The triangular format map uses between two and five bits to encode the NEON
+ // vector format:
+ // xxx10->8B, xxx11->16B, xx100->4H, xx101->8H
+ // x1000->2S, x1001->4S, 10001->2D, all others undefined.
+ static const NEONFormatMap* TriangularFormatMap() {
+ static const NEONFormatMap map = {
+ {19, 18, 17, 16, 30},
+ {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_2S,
+ NF_4S, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_UNDEF, NF_2D,
+ NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_2S, NF_4S, NF_8B, NF_16B,
+ NF_4H, NF_8H, NF_8B, NF_16B}
+ };
+ return &map;
+ }
+
+ // The scalar format map uses two bits (size<1:0>) to encode the NEON scalar
+ // formats: NF_B, NF_H, NF_S, NF_D.
+ static const NEONFormatMap* ScalarFormatMap() {
+ static const NEONFormatMap map = {
+ {23, 22}, {NF_B, NF_H, NF_S, NF_D}
+ };
+ return &map;
+ }
+
+ // The long scalar format map uses two bits (size<1:0>) to encode the longer
+ // NEON scalar formats: NF_H, NF_S, NF_D.
+ static const NEONFormatMap* LongScalarFormatMap() {
+ static const NEONFormatMap map = {
+ {23, 22}, {NF_H, NF_S, NF_D}
+ };
+ return &map;
+ }
+
+ // The FP scalar format map assumes one bit (size<0>) is used to encode the
+ // NEON FP scalar formats: NF_S, NF_D.
+ static const NEONFormatMap* FPScalarFormatMap() {
+ static const NEONFormatMap map = {
+ {22}, {NF_S, NF_D}
+ };
+ return &map;
+ }
+
+ // The triangular scalar format map uses between one and four bits to encode
+ // the NEON FP scalar formats:
+ // xxx1->B, xx10->H, x100->S, 1000->D, all others undefined.
+ static const NEONFormatMap* TriangularScalarFormatMap() {
+ static const NEONFormatMap map = {
+ {19, 18, 17, 16},
+ {NF_UNDEF, NF_B, NF_H, NF_B, NF_S, NF_B, NF_H, NF_B,
+ NF_D, NF_B, NF_H, NF_B, NF_S, NF_B, NF_H, NF_B}
+ };
+ return &map;
+ }
+
+ private:
+ // Get a pointer to a string that represents the format or placeholder for
+ // the specified substitution index, based on the format map and instruction.
+ const char* GetSubstitute(int index, SubstitutionMode mode) {
+ if (mode == kFormat) {
+ return NEONFormatAsString(GetNEONFormat(formats_[index]));
+ }
+ VIXL_ASSERT(mode == kPlaceholder);
+ return NEONFormatAsPlaceholder(GetNEONFormat(formats_[index]));
+ }
+
+ // Get the NEONFormat enumerated value for bits obtained from the
+ // instruction based on the specified format mapping.
+ NEONFormat GetNEONFormat(const NEONFormatMap* format_map) {
+ return format_map->map[PickBits(format_map->bits)];
+ }
+
+ // Convert a NEONFormat into a string.
+ static const char* NEONFormatAsString(NEONFormat format) {
+ static const char* formats[] = {
+ "undefined",
+ "8b", "16b", "4h", "8h", "2s", "4s", "1d", "2d",
+ "b", "h", "s", "d"
+ };
+ VIXL_ASSERT(format < (sizeof(formats) / sizeof(formats[0])));
+ return formats[format];
+ }
+
+ // Convert a NEONFormat into a register placeholder string.
+ static const char* NEONFormatAsPlaceholder(NEONFormat format) {
+ VIXL_ASSERT((format == NF_B) || (format == NF_H) ||
+ (format == NF_S) || (format == NF_D) ||
+ (format == NF_UNDEF));
+ static const char* formats[] = {
+ "undefined",
+ "undefined", "undefined", "undefined", "undefined",
+ "undefined", "undefined", "undefined", "undefined",
+ "'B", "'H", "'S", "'D"
+ };
+ return formats[format];
+ }
+
+ // Select bits from instrbits_ defined by the bits array, concatenate them,
+ // and return the value.
+ uint8_t PickBits(const uint8_t bits[]) {
+ uint8_t result = 0;
+ for (unsigned b = 0; b < kNEONFormatMaxBits; b++) {
+ if (bits[b] == 0) break;
+ result <<= 1;
+ result |= ((instrbits_ & (1 << bits[b])) == 0) ? 0 : 1;
+ }
+ return result;
+ }
+
+ Instr instrbits_;
+ const NEONFormatMap* formats_[3];
+ char form_buffer_[64];
+ char mne_buffer_[16];
+};
+} // namespace vixl
+
+#endif // VIXL_A64_INSTRUCTIONS_A64_H_
diff --git a/js/src/jit/arm64/vixl/Instrument-vixl.cpp b/js/src/jit/arm64/vixl/Instrument-vixl.cpp
new file mode 100644
index 0000000000..c07495c29d
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Instrument-vixl.cpp
@@ -0,0 +1,850 @@
+// Copyright 2014, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Instrument-vixl.h"
+
+namespace vixl {
+
+Counter::Counter(const char* name, CounterType type)
+ : count_(0), enabled_(false), type_(type) {
+ VIXL_ASSERT(name != NULL);
+ strncpy(name_, name, kCounterNameMaxLength);
+}
+
+
+void Counter::Enable() {
+ enabled_ = true;
+}
+
+
+void Counter::Disable() {
+ enabled_ = false;
+}
+
+
+bool Counter::IsEnabled() {
+ return enabled_;
+}
+
+
+void Counter::Increment() {
+ if (enabled_) {
+ count_++;
+ }
+}
+
+
+uint64_t Counter::count() {
+ uint64_t result = count_;
+ if (type_ == Gauge) {
+ // If the counter is a Gauge, reset the count after reading.
+ count_ = 0;
+ }
+ return result;
+}
+
+
+const char* Counter::name() {
+ return name_;
+}
+
+
+CounterType Counter::type() {
+ return type_;
+}
+
+
+struct CounterDescriptor {
+ const char* name;
+ CounterType type;
+};
+
+
+static const CounterDescriptor kCounterList[] = {
+ {"Instruction", Cumulative},
+
+ {"Move Immediate", Gauge},
+ {"Add/Sub DP", Gauge},
+ {"Logical DP", Gauge},
+ {"Other Int DP", Gauge},
+ {"FP DP", Gauge},
+
+ {"Conditional Select", Gauge},
+ {"Conditional Compare", Gauge},
+
+ {"Unconditional Branch", Gauge},
+ {"Compare and Branch", Gauge},
+ {"Test and Branch", Gauge},
+ {"Conditional Branch", Gauge},
+
+ {"Load Integer", Gauge},
+ {"Load FP", Gauge},
+ {"Load Pair", Gauge},
+ {"Load Literal", Gauge},
+
+ {"Store Integer", Gauge},
+ {"Store FP", Gauge},
+ {"Store Pair", Gauge},
+
+ {"PC Addressing", Gauge},
+ {"Other", Gauge},
+ {"NEON", Gauge},
+ {"Crypto", Gauge}
+};
+
+
+Instrument::Instrument(const char* datafile, uint64_t sample_period)
+ : output_stream_(stdout), sample_period_(sample_period) {
+
+ // Set up the output stream. If datafile is non-NULL, use that file. If it
+ // can't be opened, or datafile is NULL, use stdout.
+ if (datafile != NULL) {
+ output_stream_ = fopen(datafile, "w");
+ if (output_stream_ == NULL) {
+ printf("Can't open output file %s. Using stdout.\n", datafile);
+ output_stream_ = stdout;
+ }
+ }
+
+ static const int num_counters =
+ sizeof(kCounterList) / sizeof(CounterDescriptor);
+
+ // Dump an instrumentation description comment at the top of the file.
+ fprintf(output_stream_, "# counters=%d\n", num_counters);
+ fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_);
+
+ // Construct Counter objects from counter description array.
+ for (int i = 0; i < num_counters; i++) {
+ if (Counter* counter = js_new<Counter>(kCounterList[i].name, kCounterList[i].type))
+ (void)counters_.append(counter);
+ }
+
+ DumpCounterNames();
+}
+
+
+Instrument::~Instrument() {
+ // Dump any remaining instruction data to the output file.
+ DumpCounters();
+
+ // Free all the counter objects.
+ for (auto counter : counters_) {
+ js_delete(counter);
+ }
+
+ if (output_stream_ != stdout) {
+ fclose(output_stream_);
+ }
+}
+
+
+void Instrument::Update() {
+ // Increment the instruction counter, and dump all counters if a sample period
+ // has elapsed.
+ static Counter* counter = GetCounter("Instruction");
+ VIXL_ASSERT(counter->type() == Cumulative);
+ counter->Increment();
+
+ if (counter->IsEnabled() && (counter->count() % sample_period_) == 0) {
+ DumpCounters();
+ }
+}
+
+
+void Instrument::DumpCounters() {
+ // Iterate through the counter objects, dumping their values to the output
+ // stream.
+ for (auto counter : counters_) {
+ fprintf(output_stream_, "%" PRIu64 ",", counter->count());
+ }
+ fprintf(output_stream_, "\n");
+ fflush(output_stream_);
+}
+
+
+void Instrument::DumpCounterNames() {
+ // Iterate through the counter objects, dumping the counter names to the
+ // output stream.
+ for (auto counter : counters_) {
+ fprintf(output_stream_, "%s,", counter->name());
+ }
+ fprintf(output_stream_, "\n");
+ fflush(output_stream_);
+}
+
+
+void Instrument::HandleInstrumentationEvent(unsigned event) {
+ switch (event) {
+ case InstrumentStateEnable: Enable(); break;
+ case InstrumentStateDisable: Disable(); break;
+ default: DumpEventMarker(event);
+ }
+}
+
+
+void Instrument::DumpEventMarker(unsigned marker) {
+ // Dumpan event marker to the output stream as a specially formatted comment
+ // line.
+ static Counter* counter = GetCounter("Instruction");
+
+ fprintf(output_stream_, "# %c%c @ %" PRId64 "\n", marker & 0xff,
+ (marker >> 8) & 0xff, counter->count());
+}
+
+
+Counter* Instrument::GetCounter(const char* name) {
+ // Get a Counter object by name from the counter list.
+ for (auto counter : counters_) {
+ if (strcmp(counter->name(), name) == 0) {
+ return counter;
+ }
+ }
+
+ // A Counter by that name does not exist: print an error message to stderr
+ // and the output file, and exit.
+ static const char* error_message =
+ "# Error: Unknown counter \"%s\". Exiting.\n";
+ fprintf(stderr, error_message, name);
+ fprintf(output_stream_, error_message, name);
+ exit(1);
+}
+
+
+void Instrument::Enable() {
+ for (auto counter : counters_) {
+ counter->Enable();
+ }
+}
+
+
+void Instrument::Disable() {
+ for (auto counter : counters_) {
+ counter->Disable();
+ }
+}
+
+
+void Instrument::VisitPCRelAddressing(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("PC Addressing");
+ counter->Increment();
+}
+
+
+void Instrument::VisitAddSubImmediate(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Add/Sub DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitLogicalImmediate(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Logical DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitMoveWideImmediate(const Instruction* instr) {
+ Update();
+ static Counter* counter = GetCounter("Move Immediate");
+
+ if (instr->IsMovn() && (instr->Rd() == kZeroRegCode)) {
+ unsigned imm = instr->ImmMoveWide();
+ HandleInstrumentationEvent(imm);
+ } else {
+ counter->Increment();
+ }
+}
+
+
+void Instrument::VisitBitfield(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other Int DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitExtract(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other Int DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitUnconditionalBranch(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Unconditional Branch");
+ counter->Increment();
+}
+
+
+void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Unconditional Branch");
+ counter->Increment();
+}
+
+
+void Instrument::VisitCompareBranch(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Compare and Branch");
+ counter->Increment();
+}
+
+
+void Instrument::VisitTestBranch(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Test and Branch");
+ counter->Increment();
+}
+
+
+void Instrument::VisitConditionalBranch(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Conditional Branch");
+ counter->Increment();
+}
+
+
+void Instrument::VisitSystem(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other");
+ counter->Increment();
+}
+
+
+void Instrument::VisitException(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other");
+ counter->Increment();
+}
+
+
+void Instrument::InstrumentLoadStorePair(const Instruction* instr) {
+ static Counter* load_pair_counter = GetCounter("Load Pair");
+ static Counter* store_pair_counter = GetCounter("Store Pair");
+
+ if (instr->Mask(LoadStorePairLBit) != 0) {
+ load_pair_counter->Increment();
+ } else {
+ store_pair_counter->Increment();
+ }
+}
+
+
+void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) {
+ Update();
+ InstrumentLoadStorePair(instr);
+}
+
+
+void Instrument::VisitLoadStorePairOffset(const Instruction* instr) {
+ Update();
+ InstrumentLoadStorePair(instr);
+}
+
+
+void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) {
+ Update();
+ InstrumentLoadStorePair(instr);
+}
+
+
+void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) {
+ Update();
+ InstrumentLoadStorePair(instr);
+}
+
+
+void Instrument::VisitLoadStoreExclusive(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other");
+ counter->Increment();
+}
+
+void Instrument::VisitAtomicMemory(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other");
+ counter->Increment();
+}
+
+void Instrument::VisitLoadLiteral(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Load Literal");
+ counter->Increment();
+}
+
+
+void Instrument::InstrumentLoadStore(const Instruction* instr) {
+ static Counter* load_int_counter = GetCounter("Load Integer");
+ static Counter* store_int_counter = GetCounter("Store Integer");
+ static Counter* load_fp_counter = GetCounter("Load FP");
+ static Counter* store_fp_counter = GetCounter("Store FP");
+
+ switch (instr->Mask(LoadStoreMask)) {
+ case STRB_w:
+ case STRH_w:
+ case STR_w:
+ VIXL_FALLTHROUGH();
+ case STR_x: store_int_counter->Increment(); break;
+ case STR_s:
+ VIXL_FALLTHROUGH();
+ case STR_d: store_fp_counter->Increment(); break;
+ case LDRB_w:
+ case LDRH_w:
+ case LDR_w:
+ case LDR_x:
+ case LDRSB_x:
+ case LDRSH_x:
+ case LDRSW_x:
+ case LDRSB_w:
+ VIXL_FALLTHROUGH();
+ case LDRSH_w: load_int_counter->Increment(); break;
+ case LDR_s:
+ VIXL_FALLTHROUGH();
+ case LDR_d: load_fp_counter->Increment(); break;
+ }
+}
+
+
+void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
+ Update();
+ InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLoadStorePostIndex(const Instruction* instr) {
+ USE(instr);
+ Update();
+ InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLoadStorePreIndex(const Instruction* instr) {
+ Update();
+ InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) {
+ Update();
+ InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
+ Update();
+ InstrumentLoadStore(instr);
+}
+
+
+void Instrument::VisitLogicalShifted(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Logical DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitAddSubShifted(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Add/Sub DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitAddSubExtended(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Add/Sub DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitAddSubWithCarry(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Add/Sub DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitConditionalCompareRegister(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Conditional Compare");
+ counter->Increment();
+}
+
+
+void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Conditional Compare");
+ counter->Increment();
+}
+
+
+void Instrument::VisitConditionalSelect(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Conditional Select");
+ counter->Increment();
+}
+
+
+void Instrument::VisitDataProcessing1Source(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other Int DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitDataProcessing2Source(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other Int DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitDataProcessing3Source(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other Int DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPCompare(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("FP DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPConditionalCompare(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Conditional Compare");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPConditionalSelect(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Conditional Select");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPImmediate(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("FP DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("FP DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("FP DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("FP DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPIntegerConvert(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("FP DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitFPFixedPointConvert(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("FP DP");
+ counter->Increment();
+}
+
+
+void Instrument::VisitCrypto2RegSHA(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Crypto");
+ counter->Increment();
+}
+
+
+void Instrument::VisitCrypto3RegSHA(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Crypto");
+ counter->Increment();
+}
+
+
+void Instrument::VisitCryptoAES(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Crypto");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEON2RegMisc(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEON3Same(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEON3Different(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONAcrossLanes(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONByIndexedElement(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONCopy(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONExtract(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONLoadStoreMultiStructPostIndex(
+ const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONLoadStoreSingleStructPostIndex(
+ const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalar3Diff(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalar3Same(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalarCopy(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalarPairwise(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONShiftImmediate(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONTable(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitNEONPerm(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
+void Instrument::VisitUnallocated(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other");
+ counter->Increment();
+}
+
+
+void Instrument::VisitUnimplemented(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other");
+ counter->Increment();
+}
+
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Instrument-vixl.h b/js/src/jit/arm64/vixl/Instrument-vixl.h
new file mode 100644
index 0000000000..eca076d234
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Instrument-vixl.h
@@ -0,0 +1,109 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_INSTRUMENT_A64_H_
+#define VIXL_A64_INSTRUMENT_A64_H_
+
+#include "mozilla/Vector.h"
+
+#include "jit/arm64/vixl/Constants-vixl.h"
+#include "jit/arm64/vixl/Decoder-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+#include "js/AllocPolicy.h"
+
+namespace vixl {
+
+const int kCounterNameMaxLength = 256;
+const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22;
+
+
+enum InstrumentState {
+ InstrumentStateDisable = 0,
+ InstrumentStateEnable = 1
+};
+
+
+enum CounterType {
+ Gauge = 0, // Gauge counters reset themselves after reading.
+ Cumulative = 1 // Cumulative counters keep their value after reading.
+};
+
+
+class Counter {
+ public:
+ explicit Counter(const char* name, CounterType type = Gauge);
+
+ void Increment();
+ void Enable();
+ void Disable();
+ bool IsEnabled();
+ uint64_t count();
+ const char* name();
+ CounterType type();
+
+ private:
+ char name_[kCounterNameMaxLength];
+ uint64_t count_;
+ bool enabled_;
+ CounterType type_;
+};
+
+
+class Instrument: public DecoderVisitor {
+ public:
+ explicit Instrument(const char* datafile = NULL,
+ uint64_t sample_period = kDefaultInstrumentationSamplingPeriod);
+ ~Instrument();
+
+ void Enable();
+ void Disable();
+
+ // Declare all Visitor functions.
+ #define DECLARE(A) void Visit##A(const Instruction* instr) override;
+ VISITOR_LIST(DECLARE)
+ #undef DECLARE
+
+ private:
+ void Update();
+ void DumpCounters();
+ void DumpCounterNames();
+ void DumpEventMarker(unsigned marker);
+ void HandleInstrumentationEvent(unsigned event);
+ Counter* GetCounter(const char* name);
+
+ void InstrumentLoadStore(const Instruction* instr);
+ void InstrumentLoadStorePair(const Instruction* instr);
+
+ mozilla::Vector<Counter*, 8, js::SystemAllocPolicy> counters_;
+
+ FILE *output_stream_;
+ uint64_t sample_period_;
+};
+
+} // namespace vixl
+
+#endif // VIXL_A64_INSTRUMENT_A64_H_
diff --git a/js/src/jit/arm64/vixl/Logic-vixl.cpp b/js/src/jit/arm64/vixl/Logic-vixl.cpp
new file mode 100644
index 0000000000..71821a333f
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Logic-vixl.cpp
@@ -0,0 +1,4738 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef JS_SIMULATOR_ARM64
+
+#include <cmath>
+
+#include "jit/arm64/vixl/Simulator-vixl.h"
+
+namespace vixl {
+
+template<> double Simulator::FPDefaultNaN<double>() {
+ return kFP64DefaultNaN;
+}
+
+
+template<> float Simulator::FPDefaultNaN<float>() {
+ return kFP32DefaultNaN;
+}
+
+
+double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
+ if (src >= 0) {
+ return UFixedToDouble(src, fbits, round);
+ } else {
+ // This works for all negative values, including INT64_MIN.
+ return -UFixedToDouble(-src, fbits, round);
+ }
+}
+
+
+double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
+ // An input of 0 is a special case because the result is effectively
+ // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
+ if (src == 0) {
+ return 0.0;
+ }
+
+ // Calculate the exponent. The highest significant bit will have the value
+ // 2^exponent.
+ const int highest_significant_bit = 63 - CountLeadingZeros(src);
+ const int64_t exponent = highest_significant_bit - fbits;
+
+ return FPRoundToDouble(0, exponent, src, round);
+}
+
+
+float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
+ if (src >= 0) {
+ return UFixedToFloat(src, fbits, round);
+ } else {
+ // This works for all negative values, including INT64_MIN.
+ return -UFixedToFloat(-src, fbits, round);
+ }
+}
+
+
+float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
+ // An input of 0 is a special case because the result is effectively
+ // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
+ if (src == 0) {
+ return 0.0f;
+ }
+
+ // Calculate the exponent. The highest significant bit will have the value
+ // 2^exponent.
+ const int highest_significant_bit = 63 - CountLeadingZeros(src);
+ const int32_t exponent = highest_significant_bit - fbits;
+
+ return FPRoundToFloat(0, exponent, src, round);
+}
+
+
+void Simulator::ld1(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, 16))
+ return;
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.ReadUintFromMem(vform, i, addr);
+ addr += LaneSizeInBytesFromFormat(vform);
+ }
+}
+
+
+void Simulator::ld1(VectorFormat vform,
+ LogicVRegister dst,
+ int index,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)))
+ return;
+ dst.ReadUintFromMem(vform, index, addr);
+}
+
+
+void Simulator::ld1r(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)))
+ return;
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.ReadUintFromMem(vform, i, addr);
+ }
+}
+
+
+void Simulator::ld2(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ uint64_t addr1) {
+ if (handle_wasm_seg_fault(addr1, 16*2))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ int esize = LaneSizeInBytesFromFormat(vform);
+ uint64_t addr2 = addr1 + esize;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst1.ReadUintFromMem(vform, i, addr1);
+ dst2.ReadUintFromMem(vform, i, addr2);
+ addr1 += 2 * esize;
+ addr2 += 2 * esize;
+ }
+}
+
+
+void Simulator::ld2(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ int index,
+ uint64_t addr1) {
+ if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*2))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
+ dst1.ReadUintFromMem(vform, index, addr1);
+ dst2.ReadUintFromMem(vform, index, addr2);
+}
+
+
+void Simulator::ld2r(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*2))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst1.ReadUintFromMem(vform, i, addr);
+ dst2.ReadUintFromMem(vform, i, addr2);
+ }
+}
+
+
+void Simulator::ld3(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ uint64_t addr1) {
+ if (handle_wasm_seg_fault(addr1, 16*3))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ dst3.ClearForWrite(vform);
+ int esize = LaneSizeInBytesFromFormat(vform);
+ uint64_t addr2 = addr1 + esize;
+ uint64_t addr3 = addr2 + esize;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst1.ReadUintFromMem(vform, i, addr1);
+ dst2.ReadUintFromMem(vform, i, addr2);
+ dst3.ReadUintFromMem(vform, i, addr3);
+ addr1 += 3 * esize;
+ addr2 += 3 * esize;
+ addr3 += 3 * esize;
+ }
+}
+
+
+void Simulator::ld3(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ int index,
+ uint64_t addr1) {
+ if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*3))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ dst3.ClearForWrite(vform);
+ uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
+ uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
+ dst1.ReadUintFromMem(vform, index, addr1);
+ dst2.ReadUintFromMem(vform, index, addr2);
+ dst3.ReadUintFromMem(vform, index, addr3);
+}
+
+
+void Simulator::ld3r(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*3))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ dst3.ClearForWrite(vform);
+ uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
+ uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst1.ReadUintFromMem(vform, i, addr);
+ dst2.ReadUintFromMem(vform, i, addr2);
+ dst3.ReadUintFromMem(vform, i, addr3);
+ }
+}
+
+
+void Simulator::ld4(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ LogicVRegister dst4,
+ uint64_t addr1) {
+ if (handle_wasm_seg_fault(addr1, 16*4))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ dst3.ClearForWrite(vform);
+ dst4.ClearForWrite(vform);
+ int esize = LaneSizeInBytesFromFormat(vform);
+ uint64_t addr2 = addr1 + esize;
+ uint64_t addr3 = addr2 + esize;
+ uint64_t addr4 = addr3 + esize;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst1.ReadUintFromMem(vform, i, addr1);
+ dst2.ReadUintFromMem(vform, i, addr2);
+ dst3.ReadUintFromMem(vform, i, addr3);
+ dst4.ReadUintFromMem(vform, i, addr4);
+ addr1 += 4 * esize;
+ addr2 += 4 * esize;
+ addr3 += 4 * esize;
+ addr4 += 4 * esize;
+ }
+}
+
+
+void Simulator::ld4(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ LogicVRegister dst4,
+ int index,
+ uint64_t addr1) {
+ if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*4))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ dst3.ClearForWrite(vform);
+ dst4.ClearForWrite(vform);
+ uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
+ uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
+ uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
+ dst1.ReadUintFromMem(vform, index, addr1);
+ dst2.ReadUintFromMem(vform, index, addr2);
+ dst3.ReadUintFromMem(vform, index, addr3);
+ dst4.ReadUintFromMem(vform, index, addr4);
+}
+
+
+void Simulator::ld4r(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ LogicVRegister dst4,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*4))
+ return;
+ dst1.ClearForWrite(vform);
+ dst2.ClearForWrite(vform);
+ dst3.ClearForWrite(vform);
+ dst4.ClearForWrite(vform);
+ uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
+ uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
+ uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst1.ReadUintFromMem(vform, i, addr);
+ dst2.ReadUintFromMem(vform, i, addr2);
+ dst3.ReadUintFromMem(vform, i, addr3);
+ dst4.ReadUintFromMem(vform, i, addr4);
+ }
+}
+
+
+void Simulator::st1(VectorFormat vform,
+ LogicVRegister src,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, 16))
+ return;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ src.WriteUintToMem(vform, i, addr);
+ addr += LaneSizeInBytesFromFormat(vform);
+ }
+}
+
+
+void Simulator::st1(VectorFormat vform,
+ LogicVRegister src,
+ int index,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)))
+ return;
+ src.WriteUintToMem(vform, index, addr);
+}
+
+
+void Simulator::st2(VectorFormat vform,
+ LogicVRegister dst,
+ LogicVRegister dst2,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, 16*2))
+ return;
+ int esize = LaneSizeInBytesFromFormat(vform);
+ uint64_t addr2 = addr + esize;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.WriteUintToMem(vform, i, addr);
+ dst2.WriteUintToMem(vform, i, addr2);
+ addr += 2 * esize;
+ addr2 += 2 * esize;
+ }
+}
+
+
+void Simulator::st2(VectorFormat vform,
+ LogicVRegister dst,
+ LogicVRegister dst2,
+ int index,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*2))
+ return;
+ int esize = LaneSizeInBytesFromFormat(vform);
+ dst.WriteUintToMem(vform, index, addr);
+ dst2.WriteUintToMem(vform, index, addr + 1 * esize);
+}
+
+
+void Simulator::st3(VectorFormat vform,
+ LogicVRegister dst,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, 16*3))
+ return;
+ int esize = LaneSizeInBytesFromFormat(vform);
+ uint64_t addr2 = addr + esize;
+ uint64_t addr3 = addr2 + esize;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.WriteUintToMem(vform, i, addr);
+ dst2.WriteUintToMem(vform, i, addr2);
+ dst3.WriteUintToMem(vform, i, addr3);
+ addr += 3 * esize;
+ addr2 += 3 * esize;
+ addr3 += 3 * esize;
+ }
+}
+
+
+void Simulator::st3(VectorFormat vform,
+ LogicVRegister dst,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ int index,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*3))
+ return;
+ int esize = LaneSizeInBytesFromFormat(vform);
+ dst.WriteUintToMem(vform, index, addr);
+ dst2.WriteUintToMem(vform, index, addr + 1 * esize);
+ dst3.WriteUintToMem(vform, index, addr + 2 * esize);
+}
+
+
+void Simulator::st4(VectorFormat vform,
+ LogicVRegister dst,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ LogicVRegister dst4,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, 16*4))
+ return;
+ int esize = LaneSizeInBytesFromFormat(vform);
+ uint64_t addr2 = addr + esize;
+ uint64_t addr3 = addr2 + esize;
+ uint64_t addr4 = addr3 + esize;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.WriteUintToMem(vform, i, addr);
+ dst2.WriteUintToMem(vform, i, addr2);
+ dst3.WriteUintToMem(vform, i, addr3);
+ dst4.WriteUintToMem(vform, i, addr4);
+ addr += 4 * esize;
+ addr2 += 4 * esize;
+ addr3 += 4 * esize;
+ addr4 += 4 * esize;
+ }
+}
+
+
+void Simulator::st4(VectorFormat vform,
+ LogicVRegister dst,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ LogicVRegister dst4,
+ int index,
+ uint64_t addr) {
+ if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*4))
+ return;
+ int esize = LaneSizeInBytesFromFormat(vform);
+ dst.WriteUintToMem(vform, index, addr);
+ dst2.WriteUintToMem(vform, index, addr + 1 * esize);
+ dst3.WriteUintToMem(vform, index, addr + 2 * esize);
+ dst4.WriteUintToMem(vform, index, addr + 3 * esize);
+}
+
+
+LogicVRegister Simulator::cmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ Condition cond) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t sa = src1.Int(vform, i);
+ int64_t sb = src2.Int(vform, i);
+ uint64_t ua = src1.Uint(vform, i);
+ uint64_t ub = src2.Uint(vform, i);
+ bool result = false;
+ switch (cond) {
+ case eq: result = (ua == ub); break;
+ case ge: result = (sa >= sb); break;
+ case gt: result = (sa > sb) ; break;
+ case hi: result = (ua > ub) ; break;
+ case hs: result = (ua >= ub); break;
+ case lt: result = (sa < sb) ; break;
+ case le: result = (sa <= sb); break;
+ default: VIXL_UNREACHABLE(); break;
+ }
+ dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::cmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ int imm,
+ Condition cond) {
+ SimVRegister temp;
+ LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
+ return cmp(vform, dst, src1, imm_reg, cond);
+}
+
+
+LogicVRegister Simulator::cmptst(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t ua = src1.Uint(vform, i);
+ uint64_t ub = src2.Uint(vform, i);
+ dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::add(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Test for unsigned saturation.
+ uint64_t ua = src1.UintLeftJustified(vform, i);
+ uint64_t ub = src2.UintLeftJustified(vform, i);
+ uint64_t ur = ua + ub;
+ if (ur < ua) {
+ dst.SetUnsignedSat(i, true);
+ }
+
+ // Test for signed saturation.
+ int64_t sa = src1.IntLeftJustified(vform, i);
+ int64_t sb = src2.IntLeftJustified(vform, i);
+ int64_t sr = sa + sb;
+ // If the signs of the operands are the same, but different from the result,
+ // there was an overflow.
+ if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
+ dst.SetSignedSat(i, sa >= 0);
+ }
+
+ dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::addp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uzp1(vform, temp1, src1, src2);
+ uzp2(vform, temp2, src1, src2);
+ add(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::mla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ mul(vform, temp, src1, src2);
+ add(vform, dst, dst, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::mls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ mul(vform, temp, src1, src2);
+ sub(vform, dst, dst, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::mul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::mul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform = VectorFormatFillQ(vform);
+ return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::mla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform = VectorFormatFillQ(vform);
+ return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::mls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform = VectorFormatFillQ(vform);
+ return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::smlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::umlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
+ return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqdmulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform = VectorFormatFillQ(vform);
+ return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ SimVRegister temp;
+ VectorFormat indexform = VectorFormatFillQ(vform);
+ return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
+}
+
+
+uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
+ uint16_t result = 0;
+ uint16_t extended_op2 = op2;
+ for (int i = 0; i < 8; ++i) {
+ if ((op1 >> i) & 1) {
+ result = result ^ (extended_op2 << i);
+ }
+ }
+ return result;
+}
+
+
+LogicVRegister Simulator::pmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i,
+ PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::pmull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ VectorFormat vform_src = VectorFormatHalfWidth(vform);
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
+ src2.Uint(vform_src, i)));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::pmull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
+ dst.ClearForWrite(vform);
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count; i++) {
+ dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
+ src2.Uint(vform_src, lane_count + i)));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::sub(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Test for unsigned saturation.
+ if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
+ dst.SetUnsignedSat(i, false);
+ }
+
+ // Test for signed saturation.
+ int64_t sa = src1.IntLeftJustified(vform, i);
+ int64_t sb = src2.IntLeftJustified(vform, i);
+ int64_t sr = sa - sb;
+ // If the signs of the operands are different, and the sign of the first
+ // operand doesn't match the result, there was an overflow.
+ if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
+ dst.SetSignedSat(i, sr < 0);
+ }
+
+ dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::and_(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::orr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::orn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::eor(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::bic(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::bic(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ uint64_t imm) {
+ uint64_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ result[i] = src.Uint(vform, i) & ~imm;
+ }
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::bif(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t operand1 = dst.Uint(vform, i);
+ uint64_t operand2 = ~src2.Uint(vform, i);
+ uint64_t operand3 = src1.Uint(vform, i);
+ uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
+ dst.SetUint(vform, i, result);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::bit(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t operand1 = dst.Uint(vform, i);
+ uint64_t operand2 = src2.Uint(vform, i);
+ uint64_t operand3 = src1.Uint(vform, i);
+ uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
+ dst.SetUint(vform, i, result);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::bsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t operand1 = src2.Uint(vform, i);
+ uint64_t operand2 = dst.Uint(vform, i);
+ uint64_t operand3 = src1.Uint(vform, i);
+ uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
+ dst.SetUint(vform, i, result);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::sminmax(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool max) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t src1_val = src1.Int(vform, i);
+ int64_t src2_val = src2.Int(vform, i);
+ int64_t dst_val;
+ if (max == true) {
+ dst_val = (src1_val > src2_val) ? src1_val : src2_val;
+ } else {
+ dst_val = (src1_val < src2_val) ? src1_val : src2_val;
+ }
+ dst.SetInt(vform, i, dst_val);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::smax(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ return sminmax(vform, dst, src1, src2, true);
+}
+
+
+LogicVRegister Simulator::smin(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ return sminmax(vform, dst, src1, src2, false);
+}
+
+
+LogicVRegister Simulator::sminmaxp(VectorFormat vform,
+ LogicVRegister dst,
+ int dst_index,
+ const LogicVRegister& src,
+ bool max) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
+ int64_t src1_val = src.Int(vform, i);
+ int64_t src2_val = src.Int(vform, i + 1);
+ int64_t dst_val;
+ if (max == true) {
+ dst_val = (src1_val > src2_val) ? src1_val : src2_val;
+ } else {
+ dst_val = (src1_val < src2_val) ? src1_val : src2_val;
+ }
+ dst.SetInt(vform, dst_index + (i >> 1), dst_val);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::smaxp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ sminmaxp(vform, dst, 0, src1, true);
+ sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sminp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ sminmaxp(vform, dst, 0, src1, false);
+ sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
+ return dst;
+}
+
+
+LogicVRegister Simulator::addp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(vform == kFormatD);
+
+ int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
+ dst.ClearForWrite(vform);
+ dst.SetInt(vform, 0, dst_val);
+ return dst;
+}
+
+
+LogicVRegister Simulator::addv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VectorFormat vform_dst
+ = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+
+
+ int64_t dst_val = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst_val += src.Int(vform, i);
+ }
+
+ dst.ClearForWrite(vform_dst);
+ dst.SetInt(vform_dst, 0, dst_val);
+ return dst;
+}
+
+
+LogicVRegister Simulator::saddlv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VectorFormat vform_dst
+ = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
+
+ int64_t dst_val = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst_val += src.Int(vform, i);
+ }
+
+ dst.ClearForWrite(vform_dst);
+ dst.SetInt(vform_dst, 0, dst_val);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uaddlv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VectorFormat vform_dst
+ = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
+
+ uint64_t dst_val = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst_val += src.Uint(vform, i);
+ }
+
+ dst.ClearForWrite(vform_dst);
+ dst.SetUint(vform_dst, 0, dst_val);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sminmaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ bool max) {
+ dst.ClearForWrite(vform);
+ int64_t dst_val = max ? INT64_MIN : INT64_MAX;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t src_val = src.Int(vform, i);
+ if (max == true) {
+ dst_val = (src_val > dst_val) ? src_val : dst_val;
+ } else {
+ dst_val = (src_val < dst_val) ? src_val : dst_val;
+ }
+ }
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetInt(vform, i, 0);
+ }
+ dst.SetInt(vform, 0, dst_val);
+ return dst;
+}
+
+
+LogicVRegister Simulator::smaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ sminmaxv(vform, dst, src, true);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ sminmaxv(vform, dst, src, false);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uminmax(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool max) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t src1_val = src1.Uint(vform, i);
+ uint64_t src2_val = src2.Uint(vform, i);
+ uint64_t dst_val;
+ if (max == true) {
+ dst_val = (src1_val > src2_val) ? src1_val : src2_val;
+ } else {
+ dst_val = (src1_val < src2_val) ? src1_val : src2_val;
+ }
+ dst.SetUint(vform, i, dst_val);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::umax(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ return uminmax(vform, dst, src1, src2, true);
+}
+
+
+LogicVRegister Simulator::umin(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ return uminmax(vform, dst, src1, src2, false);
+}
+
+
+LogicVRegister Simulator::uminmaxp(VectorFormat vform,
+ LogicVRegister dst,
+ int dst_index,
+ const LogicVRegister& src,
+ bool max) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
+ uint64_t src1_val = src.Uint(vform, i);
+ uint64_t src2_val = src.Uint(vform, i + 1);
+ uint64_t dst_val;
+ if (max == true) {
+ dst_val = (src1_val > src2_val) ? src1_val : src2_val;
+ } else {
+ dst_val = (src1_val < src2_val) ? src1_val : src2_val;
+ }
+ dst.SetUint(vform, dst_index + (i >> 1), dst_val);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::umaxp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ uminmaxp(vform, dst, 0, src1, true);
+ uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uminp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ uminmaxp(vform, dst, 0, src1, false);
+ uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uminmaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ bool max) {
+ dst.ClearForWrite(vform);
+ uint64_t dst_val = max ? 0 : UINT64_MAX;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t src_val = src.Uint(vform, i);
+ if (max == true) {
+ dst_val = (src_val > dst_val) ? src_val : dst_val;
+ } else {
+ dst_val = (src_val < dst_val) ? src_val : dst_val;
+ }
+ }
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, 0);
+ }
+ dst.SetUint(vform, 0, dst_val);
+ return dst;
+}
+
+
+LogicVRegister Simulator::umaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ uminmaxv(vform, dst, src, true);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ uminmaxv(vform, dst, src, false);
+ return dst;
+}
+
+
+LogicVRegister Simulator::shl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp;
+ LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
+ return ushl(vform, dst, src, shiftreg);
+}
+
+
+LogicVRegister Simulator::sshll(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp1, temp2;
+ LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
+ LogicVRegister extendedreg = sxtl(vform, temp2, src);
+ return sshl(vform, dst, extendedreg, shiftreg);
+}
+
+
+LogicVRegister Simulator::sshll2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp1, temp2;
+ LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
+ LogicVRegister extendedreg = sxtl2(vform, temp2, src);
+ return sshl(vform, dst, extendedreg, shiftreg);
+}
+
+
+LogicVRegister Simulator::shll(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ int shift = LaneSizeInBitsFromFormat(vform) / 2;
+ return sshll(vform, dst, src, shift);
+}
+
+
+LogicVRegister Simulator::shll2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ int shift = LaneSizeInBitsFromFormat(vform) / 2;
+ return sshll2(vform, dst, src, shift);
+}
+
+
+LogicVRegister Simulator::ushll(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp1, temp2;
+ LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
+ LogicVRegister extendedreg = uxtl(vform, temp2, src);
+ return ushl(vform, dst, extendedreg, shiftreg);
+}
+
+
+LogicVRegister Simulator::ushll2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp1, temp2;
+ LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
+ LogicVRegister extendedreg = uxtl2(vform, temp2, src);
+ return ushl(vform, dst, extendedreg, shiftreg);
+}
+
+
+LogicVRegister Simulator::sli(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ dst.ClearForWrite(vform);
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; i++) {
+ uint64_t src_lane = src.Uint(vform, i);
+ uint64_t dst_lane = dst.Uint(vform, i);
+ uint64_t shifted = src_lane << shift;
+ uint64_t mask = MaxUintFromFormat(vform) << shift;
+ dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::sqshl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp;
+ LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
+ return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqshl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp;
+ LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
+ return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqshlu(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp;
+ LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
+ return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sri(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ dst.ClearForWrite(vform);
+ int laneCount = LaneCountFromFormat(vform);
+ VIXL_ASSERT((shift > 0) &&
+ (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
+ for (int i = 0; i < laneCount; i++) {
+ uint64_t src_lane = src.Uint(vform, i);
+ uint64_t dst_lane = dst.Uint(vform, i);
+ uint64_t shifted;
+ uint64_t mask;
+ if (shift == 64) {
+ shifted = 0;
+ mask = 0;
+ } else {
+ shifted = src_lane >> shift;
+ mask = MaxUintFromFormat(vform) >> shift;
+ }
+ dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::ushr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp;
+ LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
+ return ushl(vform, dst, src, shiftreg);
+}
+
+
+LogicVRegister Simulator::sshr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ VIXL_ASSERT(shift >= 0);
+ SimVRegister temp;
+ LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
+ return sshl(vform, dst, src, shiftreg);
+}
+
+
+LogicVRegister Simulator::ssra(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
+ return add(vform, dst, dst, shifted_reg);
+}
+
+
+LogicVRegister Simulator::usra(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
+ return add(vform, dst, dst, shifted_reg);
+}
+
+
+LogicVRegister Simulator::srsra(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
+ return add(vform, dst, dst, shifted_reg);
+}
+
+
+LogicVRegister Simulator::ursra(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
+ return add(vform, dst, dst, shifted_reg);
+}
+
+
+LogicVRegister Simulator::cls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ uint64_t result[16];
+ int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; i++) {
+ result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::clz(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ uint64_t result[16];
+ int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; i++) {
+ result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::cnt(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ uint64_t result[16];
+ int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; i++) {
+ uint64_t value = src.Uint(vform, i);
+ result[i] = 0;
+ for (int j = 0; j < laneSizeInBits; j++) {
+ result[i] += (value & 1);
+ value >>= 1;
+ }
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::sshl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int8_t shift_val = src2.Int(vform, i);
+ int64_t lj_src_val = src1.IntLeftJustified(vform, i);
+
+ // Set signed saturation state.
+ if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
+ (lj_src_val != 0)) {
+ dst.SetSignedSat(i, lj_src_val >= 0);
+ }
+
+ // Set unsigned saturation state.
+ if (lj_src_val < 0) {
+ dst.SetUnsignedSat(i, false);
+ } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
+ (lj_src_val != 0)) {
+ dst.SetUnsignedSat(i, true);
+ }
+
+ int64_t src_val = src1.Int(vform, i);
+ if (shift_val > 63) {
+ dst.SetInt(vform, i, 0);
+ } else if (shift_val < -63) {
+ dst.SetRounding(i, src_val < 0);
+ dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
+ } else {
+ if (shift_val < 0) {
+ // Set rounding state. Rounding only needed on right shifts.
+ if (((src_val >> (-shift_val - 1)) & 1) == 1) {
+ dst.SetRounding(i, true);
+ }
+ src_val >>= -shift_val;
+ } else {
+ src_val <<= shift_val;
+ }
+ dst.SetInt(vform, i, src_val);
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::ushl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int8_t shift_val = src2.Int(vform, i);
+ uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
+
+ // Set saturation state.
+ if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
+ dst.SetUnsignedSat(i, true);
+ }
+
+ uint64_t src_val = src1.Uint(vform, i);
+ if ((shift_val > 63) || (shift_val < -64)) {
+ dst.SetUint(vform, i, 0);
+ } else {
+ if (shift_val < 0) {
+ // Set rounding state. Rounding only needed on right shifts.
+ if (((src_val >> (-shift_val - 1)) & 1) == 1) {
+ dst.SetRounding(i, true);
+ }
+
+ if (shift_val == -64) {
+ src_val = 0;
+ } else {
+ src_val >>= -shift_val;
+ }
+ } else {
+ src_val <<= shift_val;
+ }
+ dst.SetUint(vform, i, src_val);
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::neg(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Test for signed saturation.
+ int64_t sa = src.Int(vform, i);
+ if (sa == MinIntFromFormat(vform)) {
+ dst.SetSignedSat(i, true);
+ }
+ dst.SetInt(vform, i, -sa);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::suqadd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t sa = dst.IntLeftJustified(vform, i);
+ uint64_t ub = src.UintLeftJustified(vform, i);
+ int64_t sr = sa + ub;
+
+ if (sr < sa) { // Test for signed positive saturation.
+ dst.SetInt(vform, i, MaxIntFromFormat(vform));
+ } else {
+ dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::usqadd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t ua = dst.UintLeftJustified(vform, i);
+ int64_t sb = src.IntLeftJustified(vform, i);
+ uint64_t ur = ua + sb;
+
+ if ((sb > 0) && (ur <= ua)) {
+ dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
+ } else if ((sb < 0) && (ur >= ua)) {
+ dst.SetUint(vform, i, 0); // Negative saturation.
+ } else {
+ dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::abs(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Test for signed saturation.
+ int64_t sa = src.Int(vform, i);
+ if (sa == MinIntFromFormat(vform)) {
+ dst.SetSignedSat(i, true);
+ }
+ if (sa < 0) {
+ dst.SetInt(vform, i, -sa);
+ } else {
+ dst.SetInt(vform, i, sa);
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
+ LogicVRegister dst,
+ bool dstIsSigned,
+ const LogicVRegister& src,
+ bool srcIsSigned) {
+ bool upperhalf = false;
+ VectorFormat srcform = kFormatUndefined;
+ int64_t ssrc[8];
+ uint64_t usrc[8];
+
+ switch (dstform) {
+ case kFormat8B : upperhalf = false; srcform = kFormat8H; break;
+ case kFormat16B: upperhalf = true; srcform = kFormat8H; break;
+ case kFormat4H : upperhalf = false; srcform = kFormat4S; break;
+ case kFormat8H : upperhalf = true; srcform = kFormat4S; break;
+ case kFormat2S : upperhalf = false; srcform = kFormat2D; break;
+ case kFormat4S : upperhalf = true; srcform = kFormat2D; break;
+ case kFormatB : upperhalf = false; srcform = kFormatH; break;
+ case kFormatH : upperhalf = false; srcform = kFormatS; break;
+ case kFormatS : upperhalf = false; srcform = kFormatD; break;
+ default:VIXL_UNIMPLEMENTED();
+ }
+
+ for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
+ ssrc[i] = src.Int(srcform, i);
+ usrc[i] = src.Uint(srcform, i);
+ }
+
+ int offset;
+ if (upperhalf) {
+ offset = LaneCountFromFormat(dstform) / 2;
+ } else {
+ offset = 0;
+ dst.ClearForWrite(dstform);
+ }
+
+ for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
+ // Test for signed saturation
+ if (ssrc[i] > MaxIntFromFormat(dstform)) {
+ dst.SetSignedSat(offset + i, true);
+ } else if (ssrc[i] < MinIntFromFormat(dstform)) {
+ dst.SetSignedSat(offset + i, false);
+ }
+
+ // Test for unsigned saturation
+ if (srcIsSigned) {
+ if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
+ dst.SetUnsignedSat(offset + i, true);
+ } else if (ssrc[i] < 0) {
+ dst.SetUnsignedSat(offset + i, false);
+ }
+ } else {
+ if (usrc[i] > MaxUintFromFormat(dstform)) {
+ dst.SetUnsignedSat(offset + i, true);
+ }
+ }
+
+ int64_t result;
+ if (srcIsSigned) {
+ result = ssrc[i] & MaxUintFromFormat(dstform);
+ } else {
+ result = usrc[i] & MaxUintFromFormat(dstform);
+ }
+
+ if (dstIsSigned) {
+ dst.SetInt(dstform, offset + i, result);
+ } else {
+ dst.SetUint(dstform, offset + i, result);
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::xtn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return extractnarrow(vform, dst, true, src, true);
+}
+
+
+LogicVRegister Simulator::sqxtn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqxtun(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqxtn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::absdiff(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool issigned) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (issigned) {
+ int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
+ sr = sr > 0 ? sr : -sr;
+ dst.SetInt(vform, i, sr);
+ } else {
+ int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
+ sr = sr > 0 ? sr : -sr;
+ dst.SetUint(vform, i, sr);
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::saba(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ dst.ClearForWrite(vform);
+ absdiff(vform, temp, src1, src2, true);
+ add(vform, dst, dst, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uaba(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ dst.ClearForWrite(vform);
+ absdiff(vform, temp, src1, src2, false);
+ add(vform, dst, dst, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::not_(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, ~src.Uint(vform, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::rbit(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ uint64_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
+ uint64_t reversed_value;
+ uint64_t value;
+ for (int i = 0; i < laneCount; i++) {
+ value = src.Uint(vform, i);
+ reversed_value = 0;
+ for (int j = 0; j < laneSizeInBits; j++) {
+ reversed_value = (reversed_value << 1) | (value & 1);
+ value >>= 1;
+ }
+ result[i] = reversed_value;
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::rev(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int revSize) {
+ uint64_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ int laneSize = LaneSizeInBytesFromFormat(vform);
+ int lanesPerLoop = revSize / laneSize;
+ for (int i = 0; i < laneCount; i += lanesPerLoop) {
+ for (int j = 0; j < lanesPerLoop; j++) {
+ result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
+ }
+ }
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::rev16(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return rev(vform, dst, src, 2);
+}
+
+
+LogicVRegister Simulator::rev32(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return rev(vform, dst, src, 4);
+}
+
+
+LogicVRegister Simulator::rev64(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return rev(vform, dst, src, 8);
+}
+
+
+LogicVRegister Simulator::addlp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ bool is_signed,
+ bool do_accumulate) {
+ VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
+
+ int64_t sr[16];
+ uint64_t ur[16];
+
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ if (is_signed) {
+ sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
+ } else {
+ ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
+ }
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ if (do_accumulate) {
+ if (is_signed) {
+ dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
+ } else {
+ dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
+ }
+ } else {
+ if (is_signed) {
+ dst.SetInt(vform, i, sr[i]);
+ } else {
+ dst.SetUint(vform, i, ur[i]);
+ }
+ }
+ }
+
+ return dst;
+}
+
+
+LogicVRegister Simulator::saddlp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return addlp(vform, dst, src, true, false);
+}
+
+
+LogicVRegister Simulator::uaddlp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return addlp(vform, dst, src, false, false);
+}
+
+
+LogicVRegister Simulator::sadalp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return addlp(vform, dst, src, true, true);
+}
+
+
+LogicVRegister Simulator::uadalp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return addlp(vform, dst, src, false, true);
+}
+
+
+LogicVRegister Simulator::ext(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ uint8_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount - index; ++i) {
+ result[i] = src1.Uint(vform, i + index);
+ }
+ for (int i = 0; i < index; ++i) {
+ result[laneCount - index + i] = src2.Uint(vform, i);
+ }
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::dup_element(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int src_index) {
+ int laneCount = LaneCountFromFormat(vform);
+ uint64_t value = src.Uint(vform, src_index);
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, value);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::dup_immediate(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t imm) {
+ int laneCount = LaneCountFromFormat(vform);
+ uint64_t value = imm & MaxUintFromFormat(vform);
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, value);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::ins_element(VectorFormat vform,
+ LogicVRegister dst,
+ int dst_index,
+ const LogicVRegister& src,
+ int src_index) {
+ dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
+ return dst;
+}
+
+
+LogicVRegister Simulator::ins_immediate(VectorFormat vform,
+ LogicVRegister dst,
+ int dst_index,
+ uint64_t imm) {
+ uint64_t value = imm & MaxUintFromFormat(vform);
+ dst.SetUint(vform, dst_index, value);
+ return dst;
+}
+
+
+LogicVRegister Simulator::mov(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+ dst.SetUint(vform, lane, src.Uint(vform, lane));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::movi(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t imm) {
+ int laneCount = LaneCountFromFormat(vform);
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, imm);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::mvni(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t imm) {
+ int laneCount = LaneCountFromFormat(vform);
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, ~imm);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::orr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ uint64_t imm) {
+ uint64_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ result[i] = src.Uint(vform, i) | imm;
+ }
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::uxtl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, src.Uint(vform_half, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::sxtl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetInt(vform, i, src.Int(vform_half, i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::uxtl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ int lane_count = LaneCountFromFormat(vform);
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < lane_count; i++) {
+ dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::sxtl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ int lane_count = LaneCountFromFormat(vform);
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < lane_count; i++) {
+ dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::shrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vform_src = VectorFormatDoubleWidth(vform);
+ VectorFormat vform_dst = vform;
+ LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
+ return extractnarrow(vform_dst, dst, false, shifted_src, false);
+}
+
+
+LogicVRegister Simulator::shrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
+ return extractnarrow(vformdst, dst, false, shifted_src, false);
+}
+
+
+LogicVRegister Simulator::rshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
+ return extractnarrow(vformdst, dst, false, shifted_src, false);
+}
+
+
+LogicVRegister Simulator::rshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
+ return extractnarrow(vformdst, dst, false, shifted_src, false);
+}
+
+
+LogicVRegister Simulator::tbl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& ind) {
+ SimVRegister result;
+ movi(vform, result, 0);
+ tbx(vform, result, tab, ind);
+ return orr(vform, dst, result, result);
+}
+
+
+LogicVRegister Simulator::tbl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& ind) {
+ SimVRegister result;
+ movi(vform, result, 0);
+ tbx(vform, result, tab, tab2, ind);
+ return orr(vform, dst, result, result);
+}
+
+
+LogicVRegister Simulator::tbl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& tab3,
+ const LogicVRegister& ind) {
+ SimVRegister result;
+ movi(vform, result, 0);
+ tbx(vform, result, tab, tab2, tab3, ind);
+ return orr(vform, dst, result, result);
+}
+
+
+LogicVRegister Simulator::tbl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& tab3,
+ const LogicVRegister& tab4,
+ const LogicVRegister& ind) {
+ SimVRegister result;
+ movi(vform, result, 0);
+ tbx(vform, result, tab, tab2, tab3, tab4, ind);
+ return orr(vform, dst, result, result);
+}
+
+
+LogicVRegister Simulator::tbx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& ind) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t j = ind.Uint(vform, i);
+ switch (j >> 4) {
+ case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::tbx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& ind) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t j = ind.Uint(vform, i);
+ switch (j >> 4) {
+ case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
+ case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::tbx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& tab3,
+ const LogicVRegister& ind) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t j = ind.Uint(vform, i);
+ switch (j >> 4) {
+ case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
+ case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
+ case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::tbx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& tab3,
+ const LogicVRegister& tab4,
+ const LogicVRegister& ind) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t j = ind.Uint(vform, i);
+ switch (j >> 4) {
+ case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
+ case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
+ case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
+ case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break;
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::uqshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqrshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
+ return sqxtn(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
+ return sqxtn(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqrshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
+ return sqxtn(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
+ return sqxtn(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqshrun(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
+ return sqxtun(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqshrun2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
+ return sqxtun(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqrshrun(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
+ return sqxtun(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift) {
+ SimVRegister temp;
+ VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
+ VectorFormat vformdst = vform;
+ LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
+ return sqxtun(vformdst, dst, shifted_src);
+}
+
+
+LogicVRegister Simulator::uaddl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl(vform, temp1, src1);
+ uxtl(vform, temp2, src2);
+ add(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uaddl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl2(vform, temp1, src1);
+ uxtl2(vform, temp2, src2);
+ add(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uaddw(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ uxtl(vform, temp, src2);
+ add(vform, dst, src1, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uaddw2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ uxtl2(vform, temp, src2);
+ add(vform, dst, src1, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::saddl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl(vform, temp1, src1);
+ sxtl(vform, temp2, src2);
+ add(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::saddl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl2(vform, temp1, src1);
+ sxtl2(vform, temp2, src2);
+ add(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::saddw(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ sxtl(vform, temp, src2);
+ add(vform, dst, src1, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::saddw2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ sxtl2(vform, temp, src2);
+ add(vform, dst, src1, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::usubl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl(vform, temp1, src1);
+ uxtl(vform, temp2, src2);
+ sub(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::usubl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl2(vform, temp1, src1);
+ uxtl2(vform, temp2, src2);
+ sub(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::usubw(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ uxtl(vform, temp, src2);
+ sub(vform, dst, src1, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::usubw2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ uxtl2(vform, temp, src2);
+ sub(vform, dst, src1, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::ssubl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl(vform, temp1, src1);
+ sxtl(vform, temp2, src2);
+ sub(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::ssubl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl2(vform, temp1, src1);
+ sxtl2(vform, temp2, src2);
+ sub(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::ssubw(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ sxtl(vform, temp, src2);
+ sub(vform, dst, src1, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::ssubw2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ sxtl2(vform, temp, src2);
+ sub(vform, dst, src1, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uabal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl(vform, temp1, src1);
+ uxtl(vform, temp2, src2);
+ uaba(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uabal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl2(vform, temp1, src1);
+ uxtl2(vform, temp2, src2);
+ uaba(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sabal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl(vform, temp1, src1);
+ sxtl(vform, temp2, src2);
+ saba(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sabal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl2(vform, temp1, src1);
+ sxtl2(vform, temp2, src2);
+ saba(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uabdl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl(vform, temp1, src1);
+ uxtl(vform, temp2, src2);
+ absdiff(vform, dst, temp1, temp2, false);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uabdl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl2(vform, temp1, src1);
+ uxtl2(vform, temp2, src2);
+ absdiff(vform, dst, temp1, temp2, false);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sabdl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl(vform, temp1, src1);
+ sxtl(vform, temp2, src2);
+ absdiff(vform, dst, temp1, temp2, true);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sabdl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl2(vform, temp1, src1);
+ sxtl2(vform, temp2, src2);
+ absdiff(vform, dst, temp1, temp2, true);
+ return dst;
+}
+
+
+LogicVRegister Simulator::umull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl(vform, temp1, src1);
+ uxtl(vform, temp2, src2);
+ mul(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::umull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl2(vform, temp1, src1);
+ uxtl2(vform, temp2, src2);
+ mul(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::smull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl(vform, temp1, src1);
+ sxtl(vform, temp2, src2);
+ mul(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::smull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl2(vform, temp1, src1);
+ sxtl2(vform, temp2, src2);
+ mul(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::umlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl(vform, temp1, src1);
+ uxtl(vform, temp2, src2);
+ mls(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::umlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl2(vform, temp1, src1);
+ uxtl2(vform, temp2, src2);
+ mls(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::smlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl(vform, temp1, src1);
+ sxtl(vform, temp2, src2);
+ mls(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::smlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl2(vform, temp1, src1);
+ sxtl2(vform, temp2, src2);
+ mls(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::umlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl(vform, temp1, src1);
+ uxtl(vform, temp2, src2);
+ mla(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::umlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ uxtl2(vform, temp1, src1);
+ uxtl2(vform, temp2, src2);
+ mla(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::smlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl(vform, temp1, src1);
+ sxtl(vform, temp2, src2);
+ mla(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::smlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp1, temp2;
+ sxtl2(vform, temp1, src1);
+ sxtl2(vform, temp2, src2);
+ mla(vform, dst, temp1, temp2);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sqdmlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ LogicVRegister product = sqdmull(vform, temp, src1, src2);
+ return add(vform, dst, dst, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ LogicVRegister product = sqdmull2(vform, temp, src1, src2);
+ return add(vform, dst, dst, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ LogicVRegister product = sqdmull(vform, temp, src1, src2);
+ return sub(vform, dst, dst, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ LogicVRegister product = sqdmull2(vform, temp, src1, src2);
+ return sub(vform, dst, dst, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ LogicVRegister product = smull(vform, temp, src1, src2);
+ return add(vform, dst, product, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqdmull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ LogicVRegister product = smull2(vform, temp, src1, src2);
+ return add(vform, dst, product, product).SignedSaturate(vform);
+}
+
+
+LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool round) {
+ // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
+ // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
+ // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
+
+ int esize = LaneSizeInBitsFromFormat(vform);
+ int round_const = round ? (1 << (esize - 2)) : 0;
+ int64_t product;
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ product = src1.Int(vform, i) * src2.Int(vform, i);
+ product += round_const;
+ product = product >> (esize - 1);
+
+ if (product > MaxIntFromFormat(vform)) {
+ product = MaxIntFromFormat(vform);
+ } else if (product < MinIntFromFormat(vform)) {
+ product = MinIntFromFormat(vform);
+ }
+ dst.SetInt(vform, i, product);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::sqdmulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ return sqrdmulh(vform, dst, src1, src2, false);
+}
+
+
+LogicVRegister Simulator::addhn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ add(VectorFormatDoubleWidth(vform), temp, src1, src2);
+ shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+ return dst;
+}
+
+
+LogicVRegister Simulator::addhn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
+ shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+ return dst;
+}
+
+
+LogicVRegister Simulator::raddhn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ add(VectorFormatDoubleWidth(vform), temp, src1, src2);
+ rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+ return dst;
+}
+
+
+LogicVRegister Simulator::raddhn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
+ rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+ return dst;
+}
+
+
+LogicVRegister Simulator::subhn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
+ shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+ return dst;
+}
+
+
+LogicVRegister Simulator::subhn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
+ shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+ return dst;
+}
+
+
+LogicVRegister Simulator::rsubhn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
+ rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+ return dst;
+}
+
+
+LogicVRegister Simulator::rsubhn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
+ rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
+ return dst;
+}
+
+
+LogicVRegister Simulator::trn1(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ uint64_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ int pairs = laneCount / 2;
+ for (int i = 0; i < pairs; ++i) {
+ result[2 * i] = src1.Uint(vform, 2 * i);
+ result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::trn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ uint64_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ int pairs = laneCount / 2;
+ for (int i = 0; i < pairs; ++i) {
+ result[2 * i] = src1.Uint(vform, (2 * i) + 1);
+ result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::zip1(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ uint64_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ int pairs = laneCount / 2;
+ for (int i = 0; i < pairs; ++i) {
+ result[2 * i] = src1.Uint(vform, i);
+ result[(2 * i) + 1] = src2.Uint(vform, i);
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::zip2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ uint64_t result[16];
+ int laneCount = LaneCountFromFormat(vform);
+ int pairs = laneCount / 2;
+ for (int i = 0; i < pairs; ++i) {
+ result[2 * i] = src1.Uint(vform, pairs + i);
+ result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::uzp1(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ uint64_t result[32];
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ result[i] = src1.Uint(vform, i);
+ result[laneCount + i] = src2.Uint(vform, i);
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[2 * i]);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::uzp2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ uint64_t result[32];
+ int laneCount = LaneCountFromFormat(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ result[i] = src1.Uint(vform, i);
+ result[laneCount + i] = src2.Uint(vform, i);
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < laneCount; ++i) {
+ dst.SetUint(vform, i, result[ (2 * i) + 1]);
+ }
+ return dst;
+}
+
+
+template <typename T>
+T Simulator::FPAdd(T op1, T op2) {
+ T result = FPProcessNaNs(op1, op2);
+ if (std::isnan(result)) return result;
+
+ if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
+ // inf + -inf returns the default NaN.
+ FPProcessException();
+ return FPDefaultNaN<T>();
+ } else {
+ // Other cases should be handled by standard arithmetic.
+ return op1 + op2;
+ }
+}
+
+
+template <typename T>
+T Simulator::FPSub(T op1, T op2) {
+ // NaNs should be handled elsewhere.
+ VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+
+ if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
+ // inf - inf returns the default NaN.
+ FPProcessException();
+ return FPDefaultNaN<T>();
+ } else {
+ // Other cases should be handled by standard arithmetic.
+ return op1 - op2;
+ }
+}
+
+
+template <typename T>
+T Simulator::FPMul(T op1, T op2) {
+ // NaNs should be handled elsewhere.
+ VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+
+ if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
+ // inf * 0.0 returns the default NaN.
+ FPProcessException();
+ return FPDefaultNaN<T>();
+ } else {
+ // Other cases should be handled by standard arithmetic.
+ return op1 * op2;
+ }
+}
+
+
+template<typename T>
+T Simulator::FPMulx(T op1, T op2) {
+ if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
+ // inf * 0.0 returns +/-2.0.
+ T two = 2.0;
+ return copysign(1.0, op1) * copysign(1.0, op2) * two;
+ }
+ return FPMul(op1, op2);
+}
+
+
+template<typename T>
+T Simulator::FPMulAdd(T a, T op1, T op2) {
+ T result = FPProcessNaNs3(a, op1, op2);
+
+ T sign_a = copysign(1.0, a);
+ T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
+ bool isinf_prod = std::isinf(op1) || std::isinf(op2);
+ bool operation_generates_nan =
+ (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
+ (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
+ (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
+
+ if (std::isnan(result)) {
+ // Generated NaNs override quiet NaNs propagated from a.
+ if (operation_generates_nan && IsQuietNaN(a)) {
+ FPProcessException();
+ return FPDefaultNaN<T>();
+ } else {
+ return result;
+ }
+ }
+
+ // If the operation would produce a NaN, return the default NaN.
+ if (operation_generates_nan) {
+ FPProcessException();
+ return FPDefaultNaN<T>();
+ }
+
+ // Work around broken fma implementations for exact zero results: The sign of
+ // exact 0.0 results is positive unless both a and op1 * op2 are negative.
+ if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
+ return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
+ }
+
+ result = FusedMultiplyAdd(op1, op2, a);
+ VIXL_ASSERT(!std::isnan(result));
+
+ // Work around broken fma implementations for rounded zero results: If a is
+ // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
+ if ((a == 0.0) && (result == 0.0)) {
+ return copysign(0.0, sign_prod);
+ }
+
+ return result;
+}
+
+
+template <typename T>
+T Simulator::FPDiv(T op1, T op2) {
+ // NaNs should be handled elsewhere.
+ VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+
+ if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
+ // inf / inf and 0.0 / 0.0 return the default NaN.
+ FPProcessException();
+ return FPDefaultNaN<T>();
+ } else {
+ if (op2 == 0.0) FPProcessException();
+
+ // Other cases should be handled by standard arithmetic.
+ return op1 / op2;
+ }
+}
+
+
+template <typename T>
+T Simulator::FPSqrt(T op) {
+ if (std::isnan(op)) {
+ return FPProcessNaN(op);
+ } else if (op < 0.0) {
+ FPProcessException();
+ return FPDefaultNaN<T>();
+ } else {
+ return sqrt(op);
+ }
+}
+
+
+template <typename T>
+T Simulator::FPMax(T a, T b) {
+ T result = FPProcessNaNs(a, b);
+ if (std::isnan(result)) return result;
+
+ if ((a == 0.0) && (b == 0.0) &&
+ (copysign(1.0, a) != copysign(1.0, b))) {
+ // a and b are zero, and the sign differs: return +0.0.
+ return 0.0;
+ } else {
+ return (a > b) ? a : b;
+ }
+}
+
+
+template <typename T>
+T Simulator::FPMaxNM(T a, T b) {
+ if (IsQuietNaN(a) && !IsQuietNaN(b)) {
+ a = kFP64NegativeInfinity;
+ } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
+ b = kFP64NegativeInfinity;
+ }
+
+ T result = FPProcessNaNs(a, b);
+ return std::isnan(result) ? result : FPMax(a, b);
+}
+
+
+template <typename T>
+T Simulator::FPMin(T a, T b) {
+ T result = FPProcessNaNs(a, b);
+ if (std::isnan(result)) return result;
+
+ if ((a == 0.0) && (b == 0.0) &&
+ (copysign(1.0, a) != copysign(1.0, b))) {
+ // a and b are zero, and the sign differs: return -0.0.
+ return -0.0;
+ } else {
+ return (a < b) ? a : b;
+ }
+}
+
+
+template <typename T>
+T Simulator::FPMinNM(T a, T b) {
+ if (IsQuietNaN(a) && !IsQuietNaN(b)) {
+ a = kFP64PositiveInfinity;
+ } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
+ b = kFP64PositiveInfinity;
+ }
+
+ T result = FPProcessNaNs(a, b);
+ return std::isnan(result) ? result : FPMin(a, b);
+}
+
+
+template <typename T>
+T Simulator::FPRecipStepFused(T op1, T op2) {
+ const T two = 2.0;
+ if ((std::isinf(op1) && (op2 == 0.0))
+ || ((op1 == 0.0) && (std::isinf(op2)))) {
+ return two;
+ } else if (std::isinf(op1) || std::isinf(op2)) {
+ // Return +inf if signs match, otherwise -inf.
+ return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
+ : kFP64NegativeInfinity;
+ } else {
+ return FusedMultiplyAdd(op1, op2, two);
+ }
+}
+
+
+template <typename T>
+T Simulator::FPRSqrtStepFused(T op1, T op2) {
+ const T one_point_five = 1.5;
+ const T two = 2.0;
+
+ if ((std::isinf(op1) && (op2 == 0.0))
+ || ((op1 == 0.0) && (std::isinf(op2)))) {
+ return one_point_five;
+ } else if (std::isinf(op1) || std::isinf(op2)) {
+ // Return +inf if signs match, otherwise -inf.
+ return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
+ : kFP64NegativeInfinity;
+ } else {
+ // The multiply-add-halve operation must be fully fused, so avoid interim
+ // rounding by checking which operand can be losslessly divided by two
+ // before doing the multiply-add.
+ if (std::isnormal(op1 / two)) {
+ return FusedMultiplyAdd(op1 / two, op2, one_point_five);
+ } else if (std::isnormal(op2 / two)) {
+ return FusedMultiplyAdd(op1, op2 / two, one_point_five);
+ } else {
+ // Neither operand is normal after halving: the result is dominated by
+ // the addition term, so just return that.
+ return one_point_five;
+ }
+ }
+}
+
+int32_t Simulator::FPToFixedJS(double value) {
+ // The Z-flag is set when the conversion from double precision floating-point
+ // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
+ // outside the bounds of a 32-bit integer, or isn't an exact integer then the
+ // Z-flag is unset.
+ int Z = 1;
+ int32_t result;
+
+ if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
+ (value == kFP64NegativeInfinity)) {
+ // +/- zero and infinity all return zero, however -0 and +/- Infinity also
+ // unset the Z-flag.
+ result = 0.0;
+ if ((value != 0.0) || std::signbit(value)) {
+ Z = 0;
+ }
+ } else if (std::isnan(value)) {
+ // NaN values unset the Z-flag and set the result to 0.
+ FPProcessNaN(value);
+ result = 0;
+ Z = 0;
+ } else {
+ // All other values are converted to an integer representation, rounded
+ // toward zero.
+ double int_result = std::floor(value);
+ double error = value - int_result;
+
+ if ((error != 0.0) && (int_result < 0.0)) {
+ int_result++;
+ }
+
+ // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
+ // write a one-liner with std::round, but the behaviour on ties is incorrect
+ // for our purposes.
+ double mod_const = static_cast<double>(UINT64_C(1) << 32);
+ double mod_error =
+ (int_result / mod_const) - std::floor(int_result / mod_const);
+ double constrained;
+ if (mod_error == 0.5) {
+ constrained = INT32_MIN;
+ } else {
+ constrained = int_result - mod_const * round(int_result / mod_const);
+ }
+
+ VIXL_ASSERT(std::floor(constrained) == constrained);
+ VIXL_ASSERT(constrained >= INT32_MIN);
+ VIXL_ASSERT(constrained <= INT32_MAX);
+
+ // Take the bottom 32 bits of the result as a 32-bit integer.
+ result = static_cast<int32_t>(constrained);
+
+ if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
+ (error != 0.0)) {
+ // If the integer result is out of range or the conversion isn't exact,
+ // take exception and unset the Z-flag.
+ FPProcessException();
+ Z = 0;
+ }
+ }
+
+ ReadNzcv().SetN(0);
+ ReadNzcv().SetZ(Z);
+ ReadNzcv().SetC(0);
+ ReadNzcv().SetV(0);
+
+ return result;
+}
+
+
+double Simulator::FPRoundInt(double value, FPRounding round_mode) {
+ if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
+ (value == kFP64NegativeInfinity)) {
+ return value;
+ } else if (std::isnan(value)) {
+ return FPProcessNaN(value);
+ }
+
+ double int_result = std::floor(value);
+ double error = value - int_result;
+ switch (round_mode) {
+ case FPTieAway: {
+ // Take care of correctly handling the range ]-0.5, -0.0], which must
+ // yield -0.0.
+ if ((-0.5 < value) && (value < 0.0)) {
+ int_result = -0.0;
+
+ } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
+ // If the error is greater than 0.5, or is equal to 0.5 and the integer
+ // result is positive, round up.
+ int_result++;
+ }
+ break;
+ }
+ case FPTieEven: {
+ // Take care of correctly handling the range [-0.5, -0.0], which must
+ // yield -0.0.
+ if ((-0.5 <= value) && (value < 0.0)) {
+ int_result = -0.0;
+
+ // If the error is greater than 0.5, or is equal to 0.5 and the integer
+ // result is odd, round up.
+ } else if ((error > 0.5) ||
+ ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
+ int_result++;
+ }
+ break;
+ }
+ case FPZero: {
+ // If value>0 then we take floor(value)
+ // otherwise, ceil(value).
+ if (value < 0) {
+ int_result = ceil(value);
+ }
+ break;
+ }
+ case FPNegativeInfinity: {
+ // We always use floor(value).
+ break;
+ }
+ case FPPositiveInfinity: {
+ // Take care of correctly handling the range ]-1.0, -0.0], which must
+ // yield -0.0.
+ if ((-1.0 < value) && (value < 0.0)) {
+ int_result = -0.0;
+
+ // If the error is non-zero, round up.
+ } else if (error > 0.0) {
+ int_result++;
+ }
+ break;
+ }
+ default: VIXL_UNIMPLEMENTED();
+ }
+ return int_result;
+}
+
+
+int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
+ value = FPRoundInt(value, rmode);
+ if (value >= kWMaxInt) {
+ return kWMaxInt;
+ } else if (value < kWMinInt) {
+ return kWMinInt;
+ }
+ return std::isnan(value) ? 0 : static_cast<int32_t>(value);
+}
+
+
+int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
+ value = FPRoundInt(value, rmode);
+ // The compiler would have to round kXMaxInt, triggering a warning. Compare
+ // against the largest int64_t that is exactly representable as a double.
+ if (value > kXMaxExactInt) {
+ return kXMaxInt;
+ } else if (value < kXMinInt) {
+ return kXMinInt;
+ }
+ return std::isnan(value) ? 0 : static_cast<int64_t>(value);
+}
+
+
+uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
+ value = FPRoundInt(value, rmode);
+ if (value >= kWMaxUInt) {
+ return kWMaxUInt;
+ } else if (value < 0.0) {
+ return 0;
+ }
+ return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
+}
+
+
+uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
+ value = FPRoundInt(value, rmode);
+ // The compiler would have to round kXMaxUInt, triggering a warning. Compare
+ // against the largest uint64_t that is exactly representable as a double.
+ if (value > kXMaxExactUInt) {
+ return kXMaxUInt;
+ } else if (value < 0.0) {
+ return 0;
+ }
+ return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
+}
+
+
+#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
+template <typename T> \
+LogicVRegister Simulator::FN(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2) { \
+ dst.ClearForWrite(vform); \
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
+ T op1 = src1.Float<T>(i); \
+ T op2 = src2.Float<T>(i); \
+ T result; \
+ if (PROCNAN) { \
+ result = FPProcessNaNs(op1, op2); \
+ if (!std::isnan(result)) { \
+ result = OP(op1, op2); \
+ } \
+ } else { \
+ result = OP(op1, op2); \
+ } \
+ dst.SetFloat(i, result); \
+ } \
+ return dst; \
+} \
+ \
+LogicVRegister Simulator::FN(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2) { \
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
+ FN<float>(vform, dst, src1, src2); \
+ } else { \
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
+ FN<double>(vform, dst, src1, src2); \
+ } \
+ return dst; \
+}
+NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
+#undef DEFINE_NEON_FP_VECTOR_OP
+
+
+LogicVRegister Simulator::fnmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ LogicVRegister product = fmul(vform, temp, src1, src2);
+ return fneg(vform, dst, product);
+}
+
+
+template <typename T>
+LogicVRegister Simulator::frecps(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ T op1 = -src1.Float<T>(i);
+ T op2 = src2.Float<T>(i);
+ T result = FPProcessNaNs(op1, op2);
+ dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::frecps(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ frecps<float>(vform, dst, src1, src2);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ frecps<double>(vform, dst, src1, src2);
+ }
+ return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::frsqrts(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ T op1 = -src1.Float<T>(i);
+ T op2 = src2.Float<T>(i);
+ T result = FPProcessNaNs(op1, op2);
+ dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::frsqrts(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ frsqrts<float>(vform, dst, src1, src2);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ frsqrts<double>(vform, dst, src1, src2);
+ }
+ return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fcmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ Condition cond) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ bool result = false;
+ T op1 = src1.Float<T>(i);
+ T op2 = src2.Float<T>(i);
+ T nan_result = FPProcessNaNs(op1, op2);
+ if (!std::isnan(nan_result)) {
+ switch (cond) {
+ case eq: result = (op1 == op2); break;
+ case ge: result = (op1 >= op2); break;
+ case gt: result = (op1 > op2) ; break;
+ case le: result = (op1 <= op2); break;
+ case lt: result = (op1 < op2) ; break;
+ default: VIXL_UNREACHABLE(); break;
+ }
+ }
+ dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ Condition cond) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ fcmp<float>(vform, dst, src1, src2, cond);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ fcmp<double>(vform, dst, src1, src2, cond);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ Condition cond) {
+ SimVRegister temp;
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
+ fcmp<float>(vform, dst, src, zero_reg, cond);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ LogicVRegister zero_reg = dup_immediate(vform, temp,
+ DoubleToRawbits(0.0));
+ fcmp<double>(vform, dst, src, zero_reg, cond);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fabscmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ Condition cond) {
+ SimVRegister temp1, temp2;
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
+ LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
+ fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
+ LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
+ fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
+ }
+ return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ T op1 = src1.Float<T>(i);
+ T op2 = src2.Float<T>(i);
+ T acc = dst.Float<T>(i);
+ T result = FPMulAdd(acc, op1, op2);
+ dst.SetFloat(i, result);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ fmla<float>(vform, dst, src1, src2);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ fmla<double>(vform, dst, src1, src2);
+ }
+ return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fmls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ T op1 = -src1.Float<T>(i);
+ T op2 = src2.Float<T>(i);
+ T acc = dst.Float<T>(i);
+ T result = FPMulAdd(acc, op1, op2);
+ dst.SetFloat(i, result);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fmls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ fmls<float>(vform, dst, src1, src2);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ fmls<double>(vform, dst, src1, src2);
+ }
+ return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fneg(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ T op = src.Float<T>(i);
+ op = -op;
+ dst.SetFloat(i, op);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fneg(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ fneg<float>(vform, dst, src);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ fneg<double>(vform, dst, src);
+ }
+ return dst;
+}
+
+
+template <typename T>
+LogicVRegister Simulator::fabs_(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ T op = src.Float<T>(i);
+ if (copysign(1.0, op) < 0.0) {
+ op = -op;
+ }
+ dst.SetFloat(i, op);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fabs_(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ fabs_<float>(vform, dst, src);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ fabs_<double>(vform, dst, src);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fabd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ fsub(vform, temp, src1, src2);
+ fabs_(vform, dst, temp);
+ return dst;
+}
+
+
+LogicVRegister Simulator::fsqrt(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ float result = FPSqrt(src.Float<float>(i));
+ dst.SetFloat(i, result);
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ double result = FPSqrt(src.Float<double>(i));
+ dst.SetFloat(i, result);
+ }
+ }
+ return dst;
+}
+
+
+#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
+LogicVRegister Simulator::FNP(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2) { \
+ SimVRegister temp1, temp2; \
+ uzp1(vform, temp1, src1, src2); \
+ uzp2(vform, temp2, src1, src2); \
+ FN(vform, dst, temp1, temp2); \
+ return dst; \
+} \
+ \
+LogicVRegister Simulator::FNP(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src) { \
+ if (vform == kFormatS) { \
+ float result = OP(src.Float<float>(0), src.Float<float>(1)); \
+ dst.SetFloat(0, result); \
+ } else { \
+ VIXL_ASSERT(vform == kFormatD); \
+ double result = OP(src.Float<double>(0), src.Float<double>(1)); \
+ dst.SetFloat(0, result); \
+ } \
+ dst.ClearForWrite(vform); \
+ return dst; \
+}
+NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
+#undef DEFINE_NEON_FP_PAIR_OP
+
+
+LogicVRegister Simulator::fminmaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPMinMaxOp Op) {
+ VIXL_ASSERT(vform == kFormat4S);
+ USE(vform);
+ float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
+ float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
+ float result = (this->*Op)(result1, result2);
+ dst.ClearForWrite(kFormatS);
+ dst.SetFloat<float>(0, result);
+ return dst;
+}
+
+
+LogicVRegister Simulator::fmaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return fminmaxv(vform, dst, src, &Simulator::FPMax);
+}
+
+
+LogicVRegister Simulator::fminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return fminmaxv(vform, dst, src, &Simulator::FPMin);
+}
+
+
+LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
+}
+
+
+LogicVRegister Simulator::fminnmv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
+}
+
+
+LogicVRegister Simulator::fmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ dst.ClearForWrite(vform);
+ SimVRegister temp;
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
+ fmul<float>(vform, dst, src1, index_reg);
+
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
+ fmul<double>(vform, dst, src1, index_reg);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ dst.ClearForWrite(vform);
+ SimVRegister temp;
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
+ fmla<float>(vform, dst, src1, index_reg);
+
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
+ fmla<double>(vform, dst, src1, index_reg);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fmls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ dst.ClearForWrite(vform);
+ SimVRegister temp;
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
+ fmls<float>(vform, dst, src1, index_reg);
+
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
+ fmls<double>(vform, dst, src1, index_reg);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fmulx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index) {
+ dst.ClearForWrite(vform);
+ SimVRegister temp;
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
+ fmulx<float>(vform, dst, src1, index_reg);
+
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
+ fmulx<double>(vform, dst, src1, index_reg);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::frint(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding rounding_mode,
+ bool inexact_exception) {
+ dst.ClearForWrite(vform);
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ float input = src.Float<float>(i);
+ float rounded = FPRoundInt(input, rounding_mode);
+ if (inexact_exception && !std::isnan(input) && (input != rounded)) {
+ FPProcessException();
+ }
+ dst.SetFloat<float>(i, rounded);
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ double input = src.Float<double>(i);
+ double rounded = FPRoundInt(input, rounding_mode);
+ if (inexact_exception && !std::isnan(input) && (input != rounded)) {
+ FPProcessException();
+ }
+ dst.SetFloat<double>(i, rounded);
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcvts(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding rounding_mode,
+ int fbits) {
+ dst.ClearForWrite(vform);
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ float op = src.Float<float>(i) * std::pow(2.0f, fbits);
+ dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ double op = src.Float<double>(i) * std::pow(2.0, fbits);
+ dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcvtu(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding rounding_mode,
+ int fbits) {
+ dst.ClearForWrite(vform);
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ float op = src.Float<float>(i) * std::pow(2.0f, fbits);
+ dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ double op = src.Float<double>(i) * std::pow(2.0, fbits);
+ dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcvtl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
+ // TODO: Full support for SimFloat16 in SimRegister(s).
+ dst.SetFloat(i,
+ FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
+ ReadDN()));
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
+ dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcvtl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ int lane_count = LaneCountFromFormat(vform);
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ for (int i = 0; i < lane_count; i++) {
+ // TODO: Full support for SimFloat16 in SimRegister(s).
+ dst.SetFloat(i,
+ FPToFloat(RawbitsToFloat16(
+ src.Float<uint16_t>(i + lane_count)),
+ ReadDN()));
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ for (int i = 0; i < lane_count; i++) {
+ dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcvtn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ SimVRegister tmp;
+ LogicVRegister srctmp = mov(kFormat2D, tmp, src);
+ dst.ClearForWrite(vform);
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetFloat(i,
+ Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
+ FPTieEven,
+ ReadDN())));
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcvtn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ int lane_count = LaneCountFromFormat(vform) / 2;
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ for (int i = lane_count - 1; i >= 0; i--) {
+ dst.SetFloat(i + lane_count,
+ Float16ToRawbits(
+ FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+ for (int i = lane_count - 1; i >= 0; i--) {
+ dst.SetFloat(i + lane_count,
+ FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcvtxn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ SimVRegister tmp;
+ LogicVRegister srctmp = mov(kFormat2D, tmp, src);
+ dst.ClearForWrite(vform);
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
+ int lane_count = LaneCountFromFormat(vform) / 2;
+ for (int i = lane_count - 1; i >= 0; i--) {
+ dst.SetFloat(i + lane_count,
+ FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
+ }
+ return dst;
+}
+
+
+// Based on reference C function recip_sqrt_estimate from ARM ARM.
+double Simulator::recip_sqrt_estimate(double a) {
+ int q0, q1, s;
+ double r;
+ if (a < 0.5) {
+ q0 = static_cast<int>(a * 512.0);
+ r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
+ } else {
+ q1 = static_cast<int>(a * 256.0);
+ r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
+ }
+ s = static_cast<int>(256.0 * r + 0.5);
+ return static_cast<double>(s) / 256.0;
+}
+
+
+static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
+ return ExtractUnsignedBitfield64(start_bit, end_bit, val);
+}
+
+
+template <typename T>
+T Simulator::FPRecipSqrtEstimate(T op) {
+ if (std::isnan(op)) {
+ return FPProcessNaN(op);
+ } else if (op == 0.0) {
+ if (copysign(1.0, op) < 0.0) {
+ return kFP64NegativeInfinity;
+ } else {
+ return kFP64PositiveInfinity;
+ }
+ } else if (copysign(1.0, op) < 0.0) {
+ FPProcessException();
+ return FPDefaultNaN<T>();
+ } else if (std::isinf(op)) {
+ return 0.0;
+ } else {
+ uint64_t fraction;
+ int exp, result_exp;
+
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ exp = FloatExp(op);
+ fraction = FloatMantissa(op);
+ fraction <<= 29;
+ } else {
+ exp = DoubleExp(op);
+ fraction = DoubleMantissa(op);
+ }
+
+ if (exp == 0) {
+ while (Bits(fraction, 51, 51) == 0) {
+ fraction = Bits(fraction, 50, 0) << 1;
+ exp -= 1;
+ }
+ fraction = Bits(fraction, 50, 0) << 1;
+ }
+
+ double scaled;
+ if (Bits(exp, 0, 0) == 0) {
+ scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
+ } else {
+ scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
+ }
+
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ result_exp = (380 - exp) / 2;
+ } else {
+ result_exp = (3068 - exp) / 2;
+ }
+
+ uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
+
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
+ uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
+ return FloatPack(0, exp_bits, est_bits);
+ } else {
+ return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
+ }
+ }
+}
+
+
+LogicVRegister Simulator::frsqrte(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ float input = src.Float<float>(i);
+ dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ double input = src.Float<double>(i);
+ dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
+ }
+ }
+ return dst;
+}
+
+template <typename T>
+T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
+ uint32_t sign;
+
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ sign = FloatSign(op);
+ } else {
+ sign = DoubleSign(op);
+ }
+
+ if (std::isnan(op)) {
+ return FPProcessNaN(op);
+ } else if (std::isinf(op)) {
+ return (sign == 1) ? -0.0 : 0.0;
+ } else if (op == 0.0) {
+ FPProcessException(); // FPExc_DivideByZero exception.
+ return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
+ } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
+ (std::fabs(op) < std::pow(2.0, -128.0))) ||
+ ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
+ (std::fabs(op) < std::pow(2.0, -1024.0)))) {
+ bool overflow_to_inf = false;
+ switch (rounding) {
+ case FPTieEven: overflow_to_inf = true; break;
+ case FPPositiveInfinity: overflow_to_inf = (sign == 0); break;
+ case FPNegativeInfinity: overflow_to_inf = (sign == 1); break;
+ case FPZero: overflow_to_inf = false; break;
+ default: break;
+ }
+ FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
+ if (overflow_to_inf) {
+ return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
+ } else {
+ // Return FPMaxNormal(sign).
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ return FloatPack(sign, 0xfe, 0x07fffff);
+ } else {
+ return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
+ }
+ }
+ } else {
+ uint64_t fraction;
+ int exp, result_exp;
+ uint32_t sign;
+
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ sign = FloatSign(op);
+ exp = FloatExp(op);
+ fraction = FloatMantissa(op);
+ fraction <<= 29;
+ } else {
+ sign = DoubleSign(op);
+ exp = DoubleExp(op);
+ fraction = DoubleMantissa(op);
+ }
+
+ if (exp == 0) {
+ if (Bits(fraction, 51, 51) == 0) {
+ exp -= 1;
+ fraction = Bits(fraction, 49, 0) << 2;
+ } else {
+ fraction = Bits(fraction, 50, 0) << 1;
+ }
+ }
+
+ double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
+
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
+ } else {
+ result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
+ }
+
+ double estimate = recip_estimate(scaled);
+
+ fraction = DoubleMantissa(estimate);
+ if (result_exp == 0) {
+ fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
+ } else if (result_exp == -1) {
+ fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
+ result_exp = 0;
+ }
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
+ uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
+ return FloatPack(sign, exp_bits, frac_bits);
+ } else {
+ return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
+ }
+ }
+}
+
+
+LogicVRegister Simulator::frecpe(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding round) {
+ dst.ClearForWrite(vform);
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ float input = src.Float<float>(i);
+ dst.SetFloat(i, FPRecipEstimate<float>(input, round));
+ }
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ double input = src.Float<double>(i);
+ dst.SetFloat(i, FPRecipEstimate<double>(input, round));
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::ursqrte(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ uint64_t operand;
+ uint32_t result;
+ double dp_operand, dp_result;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ operand = src.Uint(vform, i);
+ if (operand <= 0x3FFFFFFF) {
+ result = 0xFFFFFFFF;
+ } else {
+ dp_operand = operand * std::pow(2.0, -32);
+ dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
+ result = static_cast<uint32_t>(dp_result);
+ }
+ dst.SetUint(vform, i, result);
+ }
+ return dst;
+}
+
+
+// Based on reference C function recip_estimate from ARM ARM.
+double Simulator::recip_estimate(double a) {
+ int q, s;
+ double r;
+ q = static_cast<int>(a * 512.0);
+ r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
+ s = static_cast<int>(256.0 * r + 0.5);
+ return static_cast<double>(s) / 256.0;
+}
+
+
+LogicVRegister Simulator::urecpe(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ uint64_t operand;
+ uint32_t result;
+ double dp_operand, dp_result;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ operand = src.Uint(vform, i);
+ if (operand <= 0x7FFFFFFF) {
+ result = 0xFFFFFFFF;
+ } else {
+ dp_operand = operand * std::pow(2.0, -32);
+ dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
+ result = static_cast<uint32_t>(dp_result);
+ }
+ dst.SetUint(vform, i, result);
+ }
+ return dst;
+}
+
+template <typename T>
+LogicVRegister Simulator::frecpx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ T op = src.Float<T>(i);
+ T result;
+ if (std::isnan(op)) {
+ result = FPProcessNaN(op);
+ } else {
+ int exp;
+ uint32_t sign;
+ if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ sign = FloatSign(op);
+ exp = FloatExp(op);
+ exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
+ result = FloatPack(sign, exp, 0);
+ } else {
+ sign = DoubleSign(op);
+ exp = DoubleExp(op);
+ exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
+ result = DoublePack(sign, exp, 0);
+ }
+ }
+ dst.SetFloat(i, result);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::frecpx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ frecpx<float>(vform, dst, src);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ frecpx<double>(vform, dst, src);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::scvtf(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int fbits,
+ FPRounding round) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
+ dst.SetFloat<float>(i, result);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
+ dst.SetFloat<double>(i, result);
+ }
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::ucvtf(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int fbits,
+ FPRounding round) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
+ dst.SetFloat<float>(i, result);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
+ dst.SetFloat<double>(i, result);
+ }
+ }
+ return dst;
+}
+
+
+} // namespace vixl
+
+#endif // JS_SIMULATOR_ARM64
diff --git a/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp b/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp
new file mode 100644
index 0000000000..5c4a5ce145
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp
@@ -0,0 +1,2027 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/MacroAssembler-vixl.h"
+
+#include <ctype.h>
+
+namespace vixl {
+
+MacroAssembler::MacroAssembler()
+ : js::jit::Assembler(),
+ sp_(x28),
+ tmp_list_(ip0, ip1),
+ fptmp_list_(d31)
+{
+}
+
+
+void MacroAssembler::FinalizeCode() {
+ Assembler::FinalizeCode();
+}
+
+
+int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
+ const Register &rd,
+ uint64_t imm) {
+ bool emit_code = (masm != NULL);
+ VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits());
+ // The worst case for size is mov 64-bit immediate to sp:
+ // * up to 4 instructions to materialise the constant
+ // * 1 instruction to move to sp
+ MacroEmissionCheckScope guard(masm);
+
+ // Immediates on Aarch64 can be produced using an initial value, and zero to
+ // three move keep operations.
+ //
+ // Initial values can be generated with:
+ // 1. 64-bit move zero (movz).
+ // 2. 32-bit move inverted (movn).
+ // 3. 64-bit move inverted.
+ // 4. 32-bit orr immediate.
+ // 5. 64-bit orr immediate.
+ // Move-keep may then be used to modify each of the 16-bit half words.
+ //
+ // The code below supports all five initial value generators, and
+ // applying move-keep operations to move-zero and move-inverted initial
+ // values.
+
+ // Try to move the immediate in one instruction, and if that fails, switch to
+ // using multiple instructions.
+ if (OneInstrMoveImmediateHelper(masm, rd, imm)) {
+ return 1;
+ } else {
+ int instruction_count = 0;
+ unsigned reg_size = rd.size();
+
+ // Generic immediate case. Imm will be represented by
+ // [imm3, imm2, imm1, imm0], where each imm is 16 bits.
+ // A move-zero or move-inverted is generated for the first non-zero or
+ // non-0xffff immX, and a move-keep for subsequent non-zero immX.
+
+ uint64_t ignored_halfword = 0;
+ bool invert_move = false;
+ // If the number of 0xffff halfwords is greater than the number of 0x0000
+ // halfwords, it's more efficient to use move-inverted.
+ if (CountClearHalfWords(~imm, reg_size) >
+ CountClearHalfWords(imm, reg_size)) {
+ ignored_halfword = 0xffff;
+ invert_move = true;
+ }
+
+ // Mov instructions can't move values into the stack pointer, so set up a
+ // temporary register, if needed.
+ UseScratchRegisterScope temps;
+ Register temp;
+ if (emit_code) {
+ temps.Open(masm);
+ temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
+ }
+
+ // Iterate through the halfwords. Use movn/movz for the first non-ignored
+ // halfword, and movk for subsequent halfwords.
+ VIXL_ASSERT((reg_size % 16) == 0);
+ bool first_mov_done = false;
+ for (unsigned i = 0; i < (temp.size() / 16); i++) {
+ uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
+ if (imm16 != ignored_halfword) {
+ if (!first_mov_done) {
+ if (invert_move) {
+ if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i);
+ instruction_count++;
+ } else {
+ if (emit_code) masm->movz(temp, imm16, 16 * i);
+ instruction_count++;
+ }
+ first_mov_done = true;
+ } else {
+ // Construct a wider constant.
+ if (emit_code) masm->movk(temp, imm16, 16 * i);
+ instruction_count++;
+ }
+ }
+ }
+
+ VIXL_ASSERT(first_mov_done);
+
+ // Move the temporary if the original destination register was the stack
+ // pointer.
+ if (rd.IsSP()) {
+ if (emit_code) masm->mov(rd, temp);
+ instruction_count++;
+ }
+ return instruction_count;
+ }
+}
+
+
+bool MacroAssembler::OneInstrMoveImmediateHelper(MacroAssembler* masm,
+ const Register& dst,
+ int64_t imm) {
+ bool emit_code = masm != NULL;
+ unsigned n, imm_s, imm_r;
+ int reg_size = dst.size();
+
+ if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
+ // Immediate can be represented in a move zero instruction. Movz can't write
+ // to the stack pointer.
+ if (emit_code) {
+ masm->movz(dst, imm);
+ }
+ return true;
+ } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
+ // Immediate can be represented in a move negative instruction. Movn can't
+ // write to the stack pointer.
+ if (emit_code) {
+ masm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
+ }
+ return true;
+ } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
+ // Immediate can be represented in a logical orr instruction.
+ VIXL_ASSERT(!dst.IsZero());
+ if (emit_code) {
+ masm->LogicalImmediate(
+ dst, AppropriateZeroRegFor(dst), n, imm_s, imm_r, ORR);
+ }
+ return true;
+ }
+ return false;
+}
+
+
+void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
+ VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
+ ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
+ if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
+ B(static_cast<Condition>(type), label);
+ } else {
+ switch (type) {
+ case always: B(label); break;
+ case never: break;
+ case reg_zero: Cbz(reg, label); break;
+ case reg_not_zero: Cbnz(reg, label); break;
+ case reg_bit_clear: Tbz(reg, bit, label); break;
+ case reg_bit_set: Tbnz(reg, bit, label); break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ }
+}
+
+
+void MacroAssembler::B(Label* label) {
+ SingleEmissionCheckScope guard(this);
+ b(label);
+}
+
+
+void MacroAssembler::B(Label* label, Condition cond) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+ if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) {
+ Label done;
+ b(&done, InvertCondition(cond));
+ b(label);
+ bind(&done);
+ } else {
+ b(label, cond);
+ }
+}
+
+
+void MacroAssembler::Cbnz(const Register& rt, Label* label) {
+ VIXL_ASSERT(!rt.IsZero());
+ EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+ if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) {
+ Label done;
+ cbz(rt, &done);
+ b(label);
+ bind(&done);
+ } else {
+ cbnz(rt, label);
+ }
+}
+
+
+void MacroAssembler::Cbz(const Register& rt, Label* label) {
+ VIXL_ASSERT(!rt.IsZero());
+ EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+ if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) {
+ Label done;
+ cbnz(rt, &done);
+ b(label);
+ bind(&done);
+ } else {
+ cbz(rt, label);
+ }
+}
+
+
+void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
+ VIXL_ASSERT(!rt.IsZero());
+ EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+ if (label->bound() && LabelIsOutOfRange(label, TestBranchType)) {
+ Label done;
+ tbz(rt, bit_pos, &done);
+ b(label);
+ bind(&done);
+ } else {
+ tbnz(rt, bit_pos, label);
+ }
+}
+
+
+void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) {
+ VIXL_ASSERT(!rt.IsZero());
+ EmissionCheckScope guard(this, 2 * kInstructionSize);
+
+ if (label->bound() && LabelIsOutOfRange(label, TestBranchType)) {
+ Label done;
+ tbnz(rt, bit_pos, &done);
+ b(label);
+ bind(&done);
+ } else {
+ tbz(rt, bit_pos, label);
+ }
+}
+
+
+void MacroAssembler::And(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ LogicalMacro(rd, rn, operand, AND);
+}
+
+
+void MacroAssembler::Ands(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ LogicalMacro(rd, rn, operand, ANDS);
+}
+
+
+void MacroAssembler::Tst(const Register& rn,
+ const Operand& operand) {
+ Ands(AppropriateZeroRegFor(rn), rn, operand);
+}
+
+
+void MacroAssembler::Bic(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ LogicalMacro(rd, rn, operand, BIC);
+}
+
+
+void MacroAssembler::Bics(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ LogicalMacro(rd, rn, operand, BICS);
+}
+
+
+void MacroAssembler::Orr(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ LogicalMacro(rd, rn, operand, ORR);
+}
+
+
+void MacroAssembler::Orn(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ LogicalMacro(rd, rn, operand, ORN);
+}
+
+
+void MacroAssembler::Eor(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ LogicalMacro(rd, rn, operand, EOR);
+}
+
+
+void MacroAssembler::Eon(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ LogicalMacro(rd, rn, operand, EON);
+}
+
+
+void MacroAssembler::LogicalMacro(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ LogicalOp op) {
+ // The worst case for size is logical immediate to sp:
+ // * up to 4 instructions to materialise the constant
+ // * 1 instruction to do the operation
+ // * 1 instruction to move to sp
+ MacroEmissionCheckScope guard(this);
+ UseScratchRegisterScope temps(this);
+
+ if (operand.IsImmediate()) {
+ int64_t immediate = operand.immediate();
+ unsigned reg_size = rd.size();
+
+ // If the operation is NOT, invert the operation and immediate.
+ if ((op & NOT) == NOT) {
+ op = static_cast<LogicalOp>(op & ~NOT);
+ immediate = ~immediate;
+ }
+
+ // Ignore the top 32 bits of an immediate if we're moving to a W register.
+ if (rd.Is32Bits()) {
+ // Check that the top 32 bits are consistent.
+ VIXL_ASSERT(((immediate >> kWRegSize) == 0) ||
+ ((immediate >> kWRegSize) == -1));
+ immediate &= kWRegMask;
+ }
+
+ VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
+
+ // Special cases for all set or all clear immediates.
+ if (immediate == 0) {
+ switch (op) {
+ case AND:
+ Mov(rd, 0);
+ return;
+ case ORR:
+ VIXL_FALLTHROUGH();
+ case EOR:
+ Mov(rd, rn);
+ return;
+ case ANDS:
+ VIXL_FALLTHROUGH();
+ case BICS:
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ } else if ((rd.Is64Bits() && (immediate == -1)) ||
+ (rd.Is32Bits() && (immediate == 0xffffffff))) {
+ switch (op) {
+ case AND:
+ Mov(rd, rn);
+ return;
+ case ORR:
+ Mov(rd, immediate);
+ return;
+ case EOR:
+ Mvn(rd, rn);
+ return;
+ case ANDS:
+ VIXL_FALLTHROUGH();
+ case BICS:
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ }
+
+ unsigned n, imm_s, imm_r;
+ if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
+ // Immediate can be encoded in the instruction.
+ LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
+ } else {
+ // Immediate can't be encoded: synthesize using move immediate.
+ Register temp = temps.AcquireSameSizeAs(rn);
+
+ // If the left-hand input is the stack pointer, we can't pre-shift the
+ // immediate, as the encoding won't allow the subsequent post shift.
+ PreShiftImmMode mode = rn.IsSP() ? kNoShift : kAnyShift;
+ Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode);
+
+ // VIXL can acquire temp registers. Assert that the caller is aware.
+ VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn));
+ VIXL_ASSERT(!temp.Is(operand.maybeReg()));
+
+ if (rd.Is(sp)) {
+ // If rd is the stack pointer we cannot use it as the destination
+ // register so we use the temp register as an intermediate again.
+ Logical(temp, rn, imm_operand, op);
+ Mov(sp, temp);
+ } else {
+ Logical(rd, rn, imm_operand, op);
+ }
+ }
+ } else if (operand.IsExtendedRegister()) {
+ VIXL_ASSERT(operand.reg().size() <= rd.size());
+ // Add/sub extended supports shift <= 4. We want to support exactly the
+ // same modes here.
+ VIXL_ASSERT(operand.shift_amount() <= 4);
+ VIXL_ASSERT(operand.reg().Is64Bits() ||
+ ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
+
+ temps.Exclude(operand.reg());
+ Register temp = temps.AcquireSameSizeAs(rn);
+
+ // VIXL can acquire temp registers. Assert that the caller is aware.
+ VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn));
+ VIXL_ASSERT(!temp.Is(operand.maybeReg()));
+
+ EmitExtendShift(temp, operand.reg(), operand.extend(),
+ operand.shift_amount());
+ Logical(rd, rn, Operand(temp), op);
+ } else {
+ // The operand can be encoded in the instruction.
+ VIXL_ASSERT(operand.IsShiftedRegister());
+ Logical(rd, rn, operand, op);
+ }
+}
+
+
+void MacroAssembler::Mov(const Register& rd,
+ const Operand& operand,
+ DiscardMoveMode discard_mode) {
+ // The worst case for size is mov immediate with up to 4 instructions.
+ MacroEmissionCheckScope guard(this);
+
+ if (operand.IsImmediate()) {
+ // Call the macro assembler for generic immediates.
+ Mov(rd, operand.immediate());
+ } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
+ // Emit a shift instruction if moving a shifted register. This operation
+ // could also be achieved using an orr instruction (like orn used by Mvn),
+ // but using a shift instruction makes the disassembly clearer.
+ EmitShift(rd, operand.reg(), operand.shift(), operand.shift_amount());
+ } else if (operand.IsExtendedRegister()) {
+ // Emit an extend instruction if moving an extended register. This handles
+ // extend with post-shift operations, too.
+ EmitExtendShift(rd, operand.reg(), operand.extend(),
+ operand.shift_amount());
+ } else {
+ // Otherwise, emit a register move only if the registers are distinct, or
+ // if they are not X registers.
+ //
+ // Note that mov(w0, w0) is not a no-op because it clears the top word of
+ // x0. A flag is provided (kDiscardForSameWReg) if a move between the same W
+ // registers is not required to clear the top word of the X register. In
+ // this case, the instruction is discarded.
+ //
+ // If the sp is an operand, add #0 is emitted, otherwise, orr #0.
+ if (!rd.Is(operand.reg()) || (rd.Is32Bits() &&
+ (discard_mode == kDontDiscardForSameWReg))) {
+ mov(rd, operand.reg());
+ }
+ }
+}
+
+
+void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
+ VIXL_ASSERT(IsUint16(imm));
+ int byte1 = (imm & 0xff);
+ int byte2 = ((imm >> 8) & 0xff);
+ if (byte1 == byte2) {
+ movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
+ } else if (byte1 == 0) {
+ movi(vd, byte2, LSL, 8);
+ } else if (byte2 == 0) {
+ movi(vd, byte1);
+ } else if (byte1 == 0xff) {
+ mvni(vd, ~byte2 & 0xff, LSL, 8);
+ } else if (byte2 == 0xff) {
+ mvni(vd, ~byte1 & 0xff);
+ } else {
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireW();
+ movz(temp, imm);
+ dup(vd, temp);
+ }
+}
+
+
+void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
+ VIXL_ASSERT(IsUint32(imm));
+
+ uint8_t bytes[sizeof(imm)];
+ memcpy(bytes, &imm, sizeof(imm));
+
+ // All bytes are either 0x00 or 0xff.
+ {
+ bool all0orff = true;
+ for (int i = 0; i < 4; ++i) {
+ if ((bytes[i] != 0) && (bytes[i] != 0xff)) {
+ all0orff = false;
+ break;
+ }
+ }
+
+ if (all0orff == true) {
+ movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
+ return;
+ }
+ }
+
+ // Of the 4 bytes, only one byte is non-zero.
+ for (int i = 0; i < 4; i++) {
+ if ((imm & (0xff << (i * 8))) == imm) {
+ movi(vd, bytes[i], LSL, i * 8);
+ return;
+ }
+ }
+
+ // Of the 4 bytes, only one byte is not 0xff.
+ for (int i = 0; i < 4; i++) {
+ uint32_t mask = ~(0xff << (i * 8));
+ if ((imm & mask) == mask) {
+ mvni(vd, ~bytes[i] & 0xff, LSL, i * 8);
+ return;
+ }
+ }
+
+ // Immediate is of the form 0x00MMFFFF.
+ if ((imm & 0xff00ffff) == 0x0000ffff) {
+ movi(vd, bytes[2], MSL, 16);
+ return;
+ }
+
+ // Immediate is of the form 0x0000MMFF.
+ if ((imm & 0xffff00ff) == 0x000000ff) {
+ movi(vd, bytes[1], MSL, 8);
+ return;
+ }
+
+ // Immediate is of the form 0xFFMM0000.
+ if ((imm & 0xff00ffff) == 0xff000000) {
+ mvni(vd, ~bytes[2] & 0xff, MSL, 16);
+ return;
+ }
+ // Immediate is of the form 0xFFFFMM00.
+ if ((imm & 0xffff00ff) == 0xffff0000) {
+ mvni(vd, ~bytes[1] & 0xff, MSL, 8);
+ return;
+ }
+
+ // Top and bottom 16-bits are equal.
+ if (((imm >> 16) & 0xffff) == (imm & 0xffff)) {
+ Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff);
+ return;
+ }
+
+ // Default case.
+ {
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireW();
+ Mov(temp, imm);
+ dup(vd, temp);
+ }
+}
+
+
+void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
+ // All bytes are either 0x00 or 0xff.
+ {
+ bool all0orff = true;
+ for (int i = 0; i < 8; ++i) {
+ int byteval = (imm >> (i * 8)) & 0xff;
+ if (byteval != 0 && byteval != 0xff) {
+ all0orff = false;
+ break;
+ }
+ }
+ if (all0orff == true) {
+ movi(vd, imm);
+ return;
+ }
+ }
+
+ // Top and bottom 32-bits are equal.
+ if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) {
+ Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff);
+ return;
+ }
+
+ // Default case.
+ {
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireX();
+ Mov(temp, imm);
+ if (vd.Is1D()) {
+ mov(vd.D(), 0, temp);
+ } else {
+ dup(vd.V2D(), temp);
+ }
+ }
+}
+
+
+void MacroAssembler::Movi(const VRegister& vd,
+ uint64_t imm,
+ Shift shift,
+ int shift_amount) {
+ MacroEmissionCheckScope guard(this);
+ if (shift_amount != 0 || shift != LSL) {
+ movi(vd, imm, shift, shift_amount);
+ } else if (vd.Is8B() || vd.Is16B()) {
+ // 8-bit immediate.
+ VIXL_ASSERT(IsUint8(imm));
+ movi(vd, imm);
+ } else if (vd.Is4H() || vd.Is8H()) {
+ // 16-bit immediate.
+ Movi16bitHelper(vd, imm);
+ } else if (vd.Is2S() || vd.Is4S()) {
+ // 32-bit immediate.
+ Movi32bitHelper(vd, imm);
+ } else {
+ // 64-bit immediate.
+ Movi64bitHelper(vd, imm);
+ }
+}
+
+
+void MacroAssembler::Movi(const VRegister& vd,
+ uint64_t hi,
+ uint64_t lo) {
+ VIXL_ASSERT(vd.Is128Bits());
+ UseScratchRegisterScope temps(this);
+
+ // When hi == lo, the following generates good code.
+ //
+ // In situations where the constants are complex and hi != lo, the following
+ // can turn into up to 10 instructions: 2*(mov + 3*movk + dup/insert). To do
+ // any better, we could try to estimate whether splatting the high value and
+ // updating the low value would generate fewer instructions than vice versa
+ // (what we do now).
+ //
+ // (A PC-relative load from memory to the vector register (ADR + LD2) is going
+ // to have fairly high latency but is fairly compact; not clear what the best
+ // tradeoff is.)
+
+ Movi(vd.V2D(), lo);
+ if (hi != lo) {
+ Register temp = temps.AcquireX();
+ Mov(temp, hi);
+ Ins(vd.V2D(), 1, temp);
+ }
+}
+
+
+void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
+ // The worst case for size is mvn immediate with up to 4 instructions.
+ MacroEmissionCheckScope guard(this);
+
+ if (operand.IsImmediate()) {
+ // Call the macro assembler for generic immediates.
+ Mvn(rd, operand.immediate());
+ } else if (operand.IsExtendedRegister()) {
+ UseScratchRegisterScope temps(this);
+ temps.Exclude(operand.reg());
+
+ // Emit two instructions for the extend case. This differs from Mov, as
+ // the extend and invert can't be achieved in one instruction.
+ Register temp = temps.AcquireSameSizeAs(rd);
+
+ // VIXL can acquire temp registers. Assert that the caller is aware.
+ VIXL_ASSERT(!temp.Is(rd) && !temp.Is(operand.maybeReg()));
+
+ EmitExtendShift(temp, operand.reg(), operand.extend(),
+ operand.shift_amount());
+ mvn(rd, Operand(temp));
+ } else {
+ // Otherwise, register and shifted register cases can be handled by the
+ // assembler directly, using orn.
+ mvn(rd, operand);
+ }
+}
+
+
+void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
+ MoveImmediateHelper(this, rd, imm);
+}
+
+
+void MacroAssembler::Ccmp(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond) {
+ if (operand.IsImmediate() && (operand.immediate() < 0)) {
+ ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMN);
+ } else {
+ ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
+ }
+}
+
+
+void MacroAssembler::Ccmn(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond) {
+ if (operand.IsImmediate() && (operand.immediate() < 0)) {
+ ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMP);
+ } else {
+ ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
+ }
+}
+
+
+void MacroAssembler::ConditionalCompareMacro(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond,
+ ConditionalCompareOp op) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ // The worst case for size is ccmp immediate:
+ // * up to 4 instructions to materialise the constant
+ // * 1 instruction for ccmp
+ MacroEmissionCheckScope guard(this);
+
+ if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) ||
+ (operand.IsImmediate() && IsImmConditionalCompare(operand.immediate()))) {
+ // The immediate can be encoded in the instruction, or the operand is an
+ // unshifted register: call the assembler.
+ ConditionalCompare(rn, operand, nzcv, cond, op);
+ } else {
+ UseScratchRegisterScope temps(this);
+ // The operand isn't directly supported by the instruction: perform the
+ // operation on a temporary register.
+ Register temp = temps.AcquireSameSizeAs(rn);
+ VIXL_ASSERT(!temp.Is(rn) && !temp.Is(operand.maybeReg()));
+ Mov(temp, operand);
+ ConditionalCompare(rn, temp, nzcv, cond, op);
+ }
+}
+
+
+void MacroAssembler::Csel(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ // The worst case for size is csel immediate:
+ // * up to 4 instructions to materialise the constant
+ // * 1 instruction for csel
+ MacroEmissionCheckScope guard(this);
+
+ if (operand.IsImmediate()) {
+ // Immediate argument. Handle special cases of 0, 1 and -1 using zero
+ // register.
+ int64_t imm = operand.immediate();
+ Register zr = AppropriateZeroRegFor(rn);
+ if (imm == 0) {
+ csel(rd, rn, zr, cond);
+ } else if (imm == 1) {
+ csinc(rd, rn, zr, cond);
+ } else if (imm == -1) {
+ csinv(rd, rn, zr, cond);
+ } else {
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireSameSizeAs(rn);
+ VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn));
+ VIXL_ASSERT(!temp.Is(operand.maybeReg()));
+ Mov(temp, operand.immediate());
+ csel(rd, rn, temp, cond);
+ }
+ } else if (operand.IsShiftedRegister() && (operand.shift_amount() == 0)) {
+ // Unshifted register argument.
+ csel(rd, rn, operand.reg(), cond);
+ } else {
+ // All other arguments.
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireSameSizeAs(rn);
+ VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn));
+ VIXL_ASSERT(!temp.Is(operand.maybeReg()));
+ Mov(temp, operand);
+ csel(rd, rn, temp, cond);
+ }
+}
+
+
+void MacroAssembler::Add(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S) {
+ if (operand.IsImmediate() && (operand.immediate() < 0) &&
+ IsImmAddSub(-operand.immediate())) {
+ AddSubMacro(rd, rn, -operand.immediate(), S, SUB);
+ } else {
+ AddSubMacro(rd, rn, operand, S, ADD);
+ }
+}
+
+
+void MacroAssembler::Adds(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Add(rd, rn, operand, SetFlags);
+}
+
+
+void MacroAssembler::Sub(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S) {
+ if (operand.IsImmediate() && (operand.immediate() < 0) &&
+ IsImmAddSub(-operand.immediate())) {
+ AddSubMacro(rd, rn, -operand.immediate(), S, ADD);
+ } else {
+ AddSubMacro(rd, rn, operand, S, SUB);
+ }
+}
+
+
+void MacroAssembler::Subs(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ Sub(rd, rn, operand, SetFlags);
+}
+
+
+void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
+ Adds(AppropriateZeroRegFor(rn), rn, operand);
+}
+
+
+void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
+ Subs(AppropriateZeroRegFor(rn), rn, operand);
+}
+
+
+void MacroAssembler::Fcmp(const FPRegister& fn, double value,
+ FPTrapFlags trap) {
+ // The worst case for size is:
+ // * 1 to materialise the constant, using literal pool if necessary
+ // * 1 instruction for fcmp{e}
+ MacroEmissionCheckScope guard(this);
+ if (value != 0.0) {
+ UseScratchRegisterScope temps(this);
+ FPRegister tmp = temps.AcquireSameSizeAs(fn);
+ VIXL_ASSERT(!tmp.Is(fn));
+ Fmov(tmp, value);
+ FPCompareMacro(fn, tmp, trap);
+ } else {
+ FPCompareMacro(fn, value, trap);
+ }
+}
+
+
+void MacroAssembler::Fcmpe(const FPRegister& fn, double value) {
+ Fcmp(fn, value, EnableTrap);
+}
+
+
+void MacroAssembler::Fmov(VRegister vd, double imm) {
+ // Floating point immediates are loaded through the literal pool.
+ MacroEmissionCheckScope guard(this);
+
+ if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
+ Fmov(vd, static_cast<float>(imm));
+ return;
+ }
+
+ VIXL_ASSERT(vd.Is1D() || vd.Is2D());
+ if (IsImmFP64(imm)) {
+ fmov(vd, imm);
+ } else {
+ uint64_t rawbits = DoubleToRawbits(imm);
+ if (vd.IsScalar()) {
+ if (rawbits == 0) {
+ fmov(vd, xzr);
+ } else {
+ Assembler::fImmPool64(vd, imm);
+ }
+ } else {
+ // TODO: consider NEON support for load literal.
+ Movi(vd, rawbits);
+ }
+ }
+}
+
+
+void MacroAssembler::Fmov(VRegister vd, float imm) {
+ // Floating point immediates are loaded through the literal pool.
+ MacroEmissionCheckScope guard(this);
+
+ if (vd.Is1D() || vd.Is2D()) {
+ Fmov(vd, static_cast<double>(imm));
+ return;
+ }
+
+ VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S());
+ if (IsImmFP32(imm)) {
+ fmov(vd, imm);
+ } else {
+ uint32_t rawbits = FloatToRawbits(imm);
+ if (vd.IsScalar()) {
+ if (rawbits == 0) {
+ fmov(vd, wzr);
+ } else {
+ Assembler::fImmPool32(vd, imm);
+ }
+ } else {
+ // TODO: consider NEON support for load literal.
+ Movi(vd, rawbits);
+ }
+ }
+}
+
+
+
+void MacroAssembler::Neg(const Register& rd,
+ const Operand& operand) {
+ if (operand.IsImmediate()) {
+ Mov(rd, -operand.immediate());
+ } else {
+ Sub(rd, AppropriateZeroRegFor(rd), operand);
+ }
+}
+
+
+void MacroAssembler::Negs(const Register& rd,
+ const Operand& operand) {
+ Subs(rd, AppropriateZeroRegFor(rd), operand);
+}
+
+
+bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
+ int64_t imm) {
+ return OneInstrMoveImmediateHelper(this, dst, imm);
+}
+
+
+Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst,
+ int64_t imm,
+ PreShiftImmMode mode) {
+ int reg_size = dst.size();
+
+ // Encode the immediate in a single move instruction, if possible.
+ if (TryOneInstrMoveImmediate(dst, imm)) {
+ // The move was successful; nothing to do here.
+ } else {
+ // Pre-shift the immediate to the least-significant bits of the register.
+ int shift_low = CountTrailingZeros(imm, reg_size);
+ if (mode == kLimitShiftForSP) {
+ // When applied to the stack pointer, the subsequent arithmetic operation
+ // can use the extend form to shift left by a maximum of four bits. Right
+ // shifts are not allowed, so we filter them out later before the new
+ // immediate is tested.
+ shift_low = std::min(shift_low, 4);
+ }
+
+ int64_t imm_low = imm >> shift_low;
+
+ // Pre-shift the immediate to the most-significant bits of the register,
+ // inserting set bits in the least-significant bits.
+ int shift_high = CountLeadingZeros(imm, reg_size);
+ int64_t imm_high = (imm << shift_high) | ((INT64_C(1) << shift_high) - 1);
+
+ if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) {
+ // The new immediate has been moved into the destination's low bits:
+ // return a new leftward-shifting operand.
+ return Operand(dst, LSL, shift_low);
+ } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) {
+ // The new immediate has been moved into the destination's high bits:
+ // return a new rightward-shifting operand.
+ return Operand(dst, LSR, shift_high);
+ } else {
+ Mov(dst, imm);
+ }
+ }
+ return Operand(dst);
+}
+
+
+void MacroAssembler::ComputeAddress(const Register& dst,
+ const MemOperand& mem_op) {
+ // We cannot handle pre-indexing or post-indexing.
+ VIXL_ASSERT(mem_op.addrmode() == Offset);
+ Register base = mem_op.base();
+ if (mem_op.IsImmediateOffset()) {
+ Add(dst, base, mem_op.offset());
+ } else {
+ VIXL_ASSERT(mem_op.IsRegisterOffset());
+ Register reg_offset = mem_op.regoffset();
+ Shift shift = mem_op.shift();
+ Extend extend = mem_op.extend();
+ if (shift == NO_SHIFT) {
+ VIXL_ASSERT(extend != NO_EXTEND);
+ Add(dst, base, Operand(reg_offset, extend, mem_op.shift_amount()));
+ } else {
+ VIXL_ASSERT(extend == NO_EXTEND);
+ Add(dst, base, Operand(reg_offset, shift, mem_op.shift_amount()));
+ }
+ }
+}
+
+
+void MacroAssembler::AddSubMacro(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ AddSubOp op) {
+ // Worst case is add/sub immediate:
+ // * up to 4 instructions to materialise the constant
+ // * 1 instruction for add/sub
+ MacroEmissionCheckScope guard(this);
+
+ if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
+ (S == LeaveFlags)) {
+ // The instruction would be a nop. Avoid generating useless code.
+ return;
+ }
+
+ if ((operand.IsImmediate() && !IsImmAddSub(operand.immediate())) ||
+ (rn.IsZero() && !operand.IsShiftedRegister()) ||
+ (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireSameSizeAs(rn);
+ if (operand.IsImmediate()) {
+ PreShiftImmMode mode = kAnyShift;
+
+ // If the destination or source register is the stack pointer, we can
+ // only pre-shift the immediate right by values supported in the add/sub
+ // extend encoding.
+ if (rd.IsSP()) {
+ // If the destination is SP and flags will be set, we can't pre-shift
+ // the immediate at all.
+ mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP;
+ } else if (rn.IsSP()) {
+ mode = kLimitShiftForSP;
+ }
+
+ Operand imm_operand =
+ MoveImmediateForShiftedOp(temp, operand.immediate(), mode);
+ AddSub(rd, rn, imm_operand, S, op);
+ } else {
+ Mov(temp, operand);
+ AddSub(rd, rn, temp, S, op);
+ }
+ } else {
+ AddSub(rd, rn, operand, S, op);
+ }
+}
+
+
+void MacroAssembler::Adc(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC);
+}
+
+
+void MacroAssembler::Adcs(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC);
+}
+
+
+void MacroAssembler::Sbc(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC);
+}
+
+
+void MacroAssembler::Sbcs(const Register& rd,
+ const Register& rn,
+ const Operand& operand) {
+ AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC);
+}
+
+
+void MacroAssembler::Ngc(const Register& rd,
+ const Operand& operand) {
+ Register zr = AppropriateZeroRegFor(rd);
+ Sbc(rd, zr, operand);
+}
+
+
+void MacroAssembler::Ngcs(const Register& rd,
+ const Operand& operand) {
+ Register zr = AppropriateZeroRegFor(rd);
+ Sbcs(rd, zr, operand);
+}
+
+
+void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ AddSubWithCarryOp op) {
+ VIXL_ASSERT(rd.size() == rn.size());
+ // Worst case is addc/subc immediate:
+ // * up to 4 instructions to materialise the constant
+ // * 1 instruction for add/sub
+ MacroEmissionCheckScope guard(this);
+ UseScratchRegisterScope temps(this);
+
+ if (operand.IsImmediate() ||
+ (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
+ // Add/sub with carry (immediate or ROR shifted register.)
+ Register temp = temps.AcquireSameSizeAs(rn);
+ VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg()));
+ Mov(temp, operand);
+ AddSubWithCarry(rd, rn, Operand(temp), S, op);
+ } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
+ // Add/sub with carry (shifted register).
+ VIXL_ASSERT(operand.reg().size() == rd.size());
+ VIXL_ASSERT(operand.shift() != ROR);
+ VIXL_ASSERT(IsUintN(rd.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
+ operand.shift_amount()));
+ temps.Exclude(operand.reg());
+ Register temp = temps.AcquireSameSizeAs(rn);
+ VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg()));
+ EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
+ AddSubWithCarry(rd, rn, Operand(temp), S, op);
+ } else if (operand.IsExtendedRegister()) {
+ // Add/sub with carry (extended register).
+ VIXL_ASSERT(operand.reg().size() <= rd.size());
+ // Add/sub extended supports a shift <= 4. We want to support exactly the
+ // same modes.
+ VIXL_ASSERT(operand.shift_amount() <= 4);
+ VIXL_ASSERT(operand.reg().Is64Bits() ||
+ ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
+ temps.Exclude(operand.reg());
+ Register temp = temps.AcquireSameSizeAs(rn);
+ VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg()));
+ EmitExtendShift(temp, operand.reg(), operand.extend(),
+ operand.shift_amount());
+ AddSubWithCarry(rd, rn, Operand(temp), S, op);
+ } else {
+ // The addressing mode is directly supported by the instruction.
+ AddSubWithCarry(rd, rn, operand, S, op);
+ }
+}
+
+
+#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP) \
+void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
+ LoadStoreMacro(REG, addr, OP); \
+}
+LS_MACRO_LIST(DEFINE_FUNCTION)
+#undef DEFINE_FUNCTION
+
+
+void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
+ const MemOperand& addr,
+ LoadStoreOp op) {
+ // Worst case is ldr/str pre/post index:
+ // * 1 instruction for ldr/str
+ // * up to 4 instructions to materialise the constant
+ // * 1 instruction to update the base
+ MacroEmissionCheckScope guard(this);
+
+ int64_t offset = addr.offset();
+ unsigned access_size = CalcLSDataSize(op);
+
+ // Check if an immediate offset fits in the immediate field of the
+ // appropriate instruction. If not, emit two instructions to perform
+ // the operation.
+ if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) &&
+ !IsImmLSUnscaled(offset)) {
+ // Immediate offset that can't be encoded using unsigned or unscaled
+ // addressing modes.
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireSameSizeAs(addr.base());
+ VIXL_ASSERT(!temp.Is(rt));
+ VIXL_ASSERT(!temp.Is(addr.base()) && !temp.Is(addr.regoffset()));
+ Mov(temp, addr.offset());
+ LoadStore(rt, MemOperand(addr.base(), temp), op);
+ } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
+ // Post-index beyond unscaled addressing range.
+ LoadStore(rt, MemOperand(addr.base()), op);
+ Add(addr.base(), addr.base(), Operand(offset));
+ } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
+ // Pre-index beyond unscaled addressing range.
+ Add(addr.base(), addr.base(), Operand(offset));
+ LoadStore(rt, MemOperand(addr.base()), op);
+ } else {
+ // Encodable in one load/store instruction.
+ LoadStore(rt, addr, op);
+ }
+}
+
+
+#define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
+void MacroAssembler::FN(const REGTYPE REG, \
+ const REGTYPE REG2, \
+ const MemOperand& addr) { \
+ LoadStorePairMacro(REG, REG2, addr, OP); \
+}
+LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
+#undef DEFINE_FUNCTION
+
+void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& addr,
+ LoadStorePairOp op) {
+ // TODO(all): Should we support register offset for load-store-pair?
+ VIXL_ASSERT(!addr.IsRegisterOffset());
+ // Worst case is ldp/stp immediate:
+ // * 1 instruction for ldp/stp
+ // * up to 4 instructions to materialise the constant
+ // * 1 instruction to update the base
+ MacroEmissionCheckScope guard(this);
+
+ int64_t offset = addr.offset();
+ unsigned access_size = CalcLSPairDataSize(op);
+
+ // Check if the offset fits in the immediate field of the appropriate
+ // instruction. If not, emit two instructions to perform the operation.
+ if (IsImmLSPair(offset, access_size)) {
+ // Encodable in one load/store pair instruction.
+ LoadStorePair(rt, rt2, addr, op);
+ } else {
+ Register base = addr.base();
+ if (addr.IsImmediateOffset()) {
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireSameSizeAs(base);
+ Add(temp, base, offset);
+ LoadStorePair(rt, rt2, MemOperand(temp), op);
+ } else if (addr.IsPostIndex()) {
+ LoadStorePair(rt, rt2, MemOperand(base), op);
+ Add(base, base, offset);
+ } else {
+ VIXL_ASSERT(addr.IsPreIndex());
+ Add(base, base, offset);
+ LoadStorePair(rt, rt2, MemOperand(base), op);
+ }
+ }
+}
+
+
+void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) {
+ MacroEmissionCheckScope guard(this);
+
+ // There are no pre- or post-index modes for prfm.
+ VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset());
+
+ // The access size is implicitly 8 bytes for all prefetch operations.
+ unsigned size = kXRegSizeInBytesLog2;
+
+ // Check if an immediate offset fits in the immediate field of the
+ // appropriate instruction. If not, emit two instructions to perform
+ // the operation.
+ if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.offset(), size) &&
+ !IsImmLSUnscaled(addr.offset())) {
+ // Immediate offset that can't be encoded using unsigned or unscaled
+ // addressing modes.
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireSameSizeAs(addr.base());
+ Mov(temp, addr.offset());
+ Prefetch(op, MemOperand(addr.base(), temp));
+ } else {
+ // Simple register-offsets are encodable in one instruction.
+ Prefetch(op, addr);
+ }
+}
+
+
+void MacroAssembler::PushStackPointer() {
+ PrepareForPush(1, 8);
+
+ // Pushing a stack pointer leads to implementation-defined
+ // behavior, which may be surprising. In particular,
+ // str x28, [x28, #-8]!
+ // pre-decrements the stack pointer, storing the decremented value.
+ // Additionally, sp is read as xzr in this context, so it cannot be pushed.
+ // So we must use a scratch register.
+ UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX();
+
+ Mov(scratch, GetStackPointer64());
+ str(scratch, MemOperand(GetStackPointer64(), -8, PreIndex));
+}
+
+
+void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1,
+ const CPURegister& src2, const CPURegister& src3) {
+ VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
+ VIXL_ASSERT(src0.IsValid());
+
+ int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
+ int size = src0.SizeInBytes();
+
+ if (src0.Is(GetStackPointer64())) {
+ VIXL_ASSERT(count == 1);
+ VIXL_ASSERT(size == 8);
+ PushStackPointer();
+ return;
+ }
+
+ PrepareForPush(count, size);
+ PushHelper(count, size, src0, src1, src2, src3);
+}
+
+
+void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
+ const CPURegister& dst2, const CPURegister& dst3) {
+ // It is not valid to pop into the same register more than once in one
+ // instruction, not even into the zero register.
+ VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
+ VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
+ VIXL_ASSERT(dst0.IsValid());
+
+ int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
+ int size = dst0.SizeInBytes();
+
+ PrepareForPop(count, size);
+ PopHelper(count, size, dst0, dst1, dst2, dst3);
+}
+
+
+void MacroAssembler::PushCPURegList(CPURegList registers) {
+ VIXL_ASSERT(!registers.Overlaps(*TmpList()));
+ VIXL_ASSERT(!registers.Overlaps(*FPTmpList()));
+
+ int reg_size = registers.RegisterSizeInBytes();
+ PrepareForPush(registers.Count(), reg_size);
+
+ // Bump the stack pointer and store two registers at the bottom.
+ int size = registers.TotalSizeInBytes();
+ const CPURegister& bottom_0 = registers.PopLowestIndex();
+ const CPURegister& bottom_1 = registers.PopLowestIndex();
+ if (bottom_0.IsValid() && bottom_1.IsValid()) {
+ Stp(bottom_0, bottom_1, MemOperand(GetStackPointer64(), -size, PreIndex));
+ } else if (bottom_0.IsValid()) {
+ Str(bottom_0, MemOperand(GetStackPointer64(), -size, PreIndex));
+ }
+
+ int offset = 2 * reg_size;
+ while (!registers.IsEmpty()) {
+ const CPURegister& src0 = registers.PopLowestIndex();
+ const CPURegister& src1 = registers.PopLowestIndex();
+ if (src1.IsValid()) {
+ Stp(src0, src1, MemOperand(GetStackPointer64(), offset));
+ } else {
+ Str(src0, MemOperand(GetStackPointer64(), offset));
+ }
+ offset += 2 * reg_size;
+ }
+}
+
+
+void MacroAssembler::PopCPURegList(CPURegList registers) {
+ VIXL_ASSERT(!registers.Overlaps(*TmpList()));
+ VIXL_ASSERT(!registers.Overlaps(*FPTmpList()));
+
+ int reg_size = registers.RegisterSizeInBytes();
+ PrepareForPop(registers.Count(), reg_size);
+
+
+ int size = registers.TotalSizeInBytes();
+ const CPURegister& bottom_0 = registers.PopLowestIndex();
+ const CPURegister& bottom_1 = registers.PopLowestIndex();
+
+ int offset = 2 * reg_size;
+ while (!registers.IsEmpty()) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ if (dst1.IsValid()) {
+ Ldp(dst0, dst1, MemOperand(GetStackPointer64(), offset));
+ } else {
+ Ldr(dst0, MemOperand(GetStackPointer64(), offset));
+ }
+ offset += 2 * reg_size;
+ }
+
+ // Load the two registers at the bottom and drop the stack pointer.
+ if (bottom_0.IsValid() && bottom_1.IsValid()) {
+ Ldp(bottom_0, bottom_1, MemOperand(GetStackPointer64(), size, PostIndex));
+ } else if (bottom_0.IsValid()) {
+ Ldr(bottom_0, MemOperand(GetStackPointer64(), size, PostIndex));
+ }
+}
+
+
+void MacroAssembler::PushMultipleTimes(int count, Register src) {
+ int size = src.SizeInBytes();
+
+ PrepareForPush(count, size);
+ // Push up to four registers at a time if possible because if the current
+ // stack pointer is sp and the register size is 32, registers must be pushed
+ // in blocks of four in order to maintain the 16-byte alignment for sp.
+ while (count >= 4) {
+ PushHelper(4, size, src, src, src, src);
+ count -= 4;
+ }
+ if (count >= 2) {
+ PushHelper(2, size, src, src, NoReg, NoReg);
+ count -= 2;
+ }
+ if (count == 1) {
+ PushHelper(1, size, src, NoReg, NoReg, NoReg);
+ count -= 1;
+ }
+ VIXL_ASSERT(count == 0);
+}
+
+
+void MacroAssembler::PushHelper(int count, int size,
+ const CPURegister& src0,
+ const CPURegister& src1,
+ const CPURegister& src2,
+ const CPURegister& src3) {
+ // Ensure that we don't unintentionally modify scratch or debug registers.
+ // Worst case for size is 2 stp.
+ InstructionAccurateScope scope(this, 2,
+ InstructionAccurateScope::kMaximumSize);
+
+ VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
+ VIXL_ASSERT(size == src0.SizeInBytes());
+
+ // Pushing the stack pointer has unexpected behavior. See PushStackPointer().
+ VIXL_ASSERT(!src0.Is(GetStackPointer64()) && !src0.Is(sp));
+ VIXL_ASSERT(!src1.Is(GetStackPointer64()) && !src1.Is(sp));
+ VIXL_ASSERT(!src2.Is(GetStackPointer64()) && !src2.Is(sp));
+ VIXL_ASSERT(!src3.Is(GetStackPointer64()) && !src3.Is(sp));
+
+ // The JS engine should never push 4 bytes.
+ VIXL_ASSERT(size >= 8);
+
+ // When pushing multiple registers, the store order is chosen such that
+ // Push(a, b) is equivalent to Push(a) followed by Push(b).
+ switch (count) {
+ case 1:
+ VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
+ str(src0, MemOperand(GetStackPointer64(), -1 * size, PreIndex));
+ break;
+ case 2:
+ VIXL_ASSERT(src2.IsNone() && src3.IsNone());
+ stp(src1, src0, MemOperand(GetStackPointer64(), -2 * size, PreIndex));
+ break;
+ case 3:
+ VIXL_ASSERT(src3.IsNone());
+ stp(src2, src1, MemOperand(GetStackPointer64(), -3 * size, PreIndex));
+ str(src0, MemOperand(GetStackPointer64(), 2 * size));
+ break;
+ case 4:
+ // Skip over 4 * size, then fill in the gap. This allows four W registers
+ // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
+ // all times.
+ stp(src3, src2, MemOperand(GetStackPointer64(), -4 * size, PreIndex));
+ stp(src1, src0, MemOperand(GetStackPointer64(), 2 * size));
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+
+void MacroAssembler::PopHelper(int count, int size,
+ const CPURegister& dst0,
+ const CPURegister& dst1,
+ const CPURegister& dst2,
+ const CPURegister& dst3) {
+ // Ensure that we don't unintentionally modify scratch or debug registers.
+ // Worst case for size is 2 ldp.
+ InstructionAccurateScope scope(this, 2,
+ InstructionAccurateScope::kMaximumSize);
+
+ VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
+ VIXL_ASSERT(size == dst0.SizeInBytes());
+
+ // When popping multiple registers, the load order is chosen such that
+ // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
+ switch (count) {
+ case 1:
+ VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
+ ldr(dst0, MemOperand(GetStackPointer64(), 1 * size, PostIndex));
+ break;
+ case 2:
+ VIXL_ASSERT(dst2.IsNone() && dst3.IsNone());
+ ldp(dst0, dst1, MemOperand(GetStackPointer64(), 2 * size, PostIndex));
+ break;
+ case 3:
+ VIXL_ASSERT(dst3.IsNone());
+ ldr(dst2, MemOperand(GetStackPointer64(), 2 * size));
+ ldp(dst0, dst1, MemOperand(GetStackPointer64(), 3 * size, PostIndex));
+ break;
+ case 4:
+ // Load the higher addresses first, then load the lower addresses and skip
+ // the whole block in the second instruction. This allows four W registers
+ // to be popped using sp, whilst maintaining 16-byte alignment for sp at
+ // all times.
+ ldp(dst2, dst3, MemOperand(GetStackPointer64(), 2 * size));
+ ldp(dst0, dst1, MemOperand(GetStackPointer64(), 4 * size, PostIndex));
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+
+void MacroAssembler::PrepareForPush(int count, int size) {
+ if (sp.Is(GetStackPointer64())) {
+ // If the current stack pointer is sp, then it must be aligned to 16 bytes
+ // on entry and the total size of the specified registers must also be a
+ // multiple of 16 bytes.
+ VIXL_ASSERT((count * size) % 16 == 0);
+ } else {
+ // Even if the current stack pointer is not the system stack pointer (sp),
+ // the system stack pointer will still be modified in order to comply with
+ // ABI rules about accessing memory below the system stack pointer.
+ BumpSystemStackPointer(count * size);
+ }
+}
+
+
+void MacroAssembler::PrepareForPop(int count, int size) {
+ USE(count, size);
+ if (sp.Is(GetStackPointer64())) {
+ // If the current stack pointer is sp, then it must be aligned to 16 bytes
+ // on entry and the total size of the specified registers must also be a
+ // multiple of 16 bytes.
+ VIXL_ASSERT((count * size) % 16 == 0);
+ }
+}
+
+void MacroAssembler::Poke(const Register& src, const Operand& offset) {
+ if (offset.IsImmediate()) {
+ VIXL_ASSERT(offset.immediate() >= 0);
+ }
+
+ Str(src, MemOperand(GetStackPointer64(), offset));
+}
+
+
+void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
+ if (offset.IsImmediate()) {
+ VIXL_ASSERT(offset.immediate() >= 0);
+ }
+
+ Ldr(dst, MemOperand(GetStackPointer64(), offset));
+}
+
+
+void MacroAssembler::Claim(const Operand& size) {
+
+ if (size.IsZero()) {
+ return;
+ }
+
+ if (size.IsImmediate()) {
+ VIXL_ASSERT(size.immediate() > 0);
+ if (sp.Is(GetStackPointer64())) {
+ VIXL_ASSERT((size.immediate() % 16) == 0);
+ }
+ }
+
+ Sub(GetStackPointer64(), GetStackPointer64(), size);
+
+ // Make sure the real stack pointer reflects the claimed stack space.
+ // We can't use stack memory below the stack pointer, it could be clobbered by
+ // interupts and signal handlers.
+ if (!sp.Is(GetStackPointer64())) {
+ Mov(sp, GetStackPointer64());
+ }
+}
+
+
+void MacroAssembler::Drop(const Operand& size) {
+
+ if (size.IsZero()) {
+ return;
+ }
+
+ if (size.IsImmediate()) {
+ VIXL_ASSERT(size.immediate() > 0);
+ if (sp.Is(GetStackPointer64())) {
+ VIXL_ASSERT((size.immediate() % 16) == 0);
+ }
+ }
+
+ Add(GetStackPointer64(), GetStackPointer64(), size);
+}
+
+
+void MacroAssembler::PushCalleeSavedRegisters() {
+ // Ensure that the macro-assembler doesn't use any scratch registers.
+ // 10 stp will be emitted.
+ // TODO(all): Should we use GetCalleeSaved and SavedFP.
+ InstructionAccurateScope scope(this, 10);
+
+ // This method must not be called unless the current stack pointer is sp.
+ VIXL_ASSERT(sp.Is(GetStackPointer64()));
+
+ MemOperand tos(sp, -2 * static_cast<int>(kXRegSizeInBytes), PreIndex);
+
+ stp(x29, x30, tos);
+ stp(x27, x28, tos);
+ stp(x25, x26, tos);
+ stp(x23, x24, tos);
+ stp(x21, x22, tos);
+ stp(x19, x20, tos);
+
+ stp(d14, d15, tos);
+ stp(d12, d13, tos);
+ stp(d10, d11, tos);
+ stp(d8, d9, tos);
+}
+
+
+void MacroAssembler::PopCalleeSavedRegisters() {
+ // Ensure that the macro-assembler doesn't use any scratch registers.
+ // 10 ldp will be emitted.
+ // TODO(all): Should we use GetCalleeSaved and SavedFP.
+ InstructionAccurateScope scope(this, 10);
+
+ // This method must not be called unless the current stack pointer is sp.
+ VIXL_ASSERT(sp.Is(GetStackPointer64()));
+
+ MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
+
+ ldp(d8, d9, tos);
+ ldp(d10, d11, tos);
+ ldp(d12, d13, tos);
+ ldp(d14, d15, tos);
+
+ ldp(x19, x20, tos);
+ ldp(x21, x22, tos);
+ ldp(x23, x24, tos);
+ ldp(x25, x26, tos);
+ ldp(x27, x28, tos);
+ ldp(x29, x30, tos);
+}
+
+void MacroAssembler::LoadCPURegList(CPURegList registers,
+ const MemOperand& src) {
+ LoadStoreCPURegListHelper(kLoad, registers, src);
+}
+
+void MacroAssembler::StoreCPURegList(CPURegList registers,
+ const MemOperand& dst) {
+ LoadStoreCPURegListHelper(kStore, registers, dst);
+}
+
+
+void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
+ CPURegList registers,
+ const MemOperand& mem) {
+ // We do not handle pre-indexing or post-indexing.
+ VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
+ VIXL_ASSERT(!registers.Overlaps(tmp_list_));
+ VIXL_ASSERT(!registers.Overlaps(fptmp_list_));
+ VIXL_ASSERT(!registers.IncludesAliasOf(sp));
+
+ UseScratchRegisterScope temps(this);
+
+ MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers,
+ mem,
+ &temps);
+
+ while (registers.Count() >= 2) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ if (op == kStore) {
+ Stp(dst0, dst1, loc);
+ } else {
+ VIXL_ASSERT(op == kLoad);
+ Ldp(dst0, dst1, loc);
+ }
+ loc.AddOffset(2 * registers.RegisterSizeInBytes());
+ }
+ if (!registers.IsEmpty()) {
+ if (op == kStore) {
+ Str(registers.PopLowestIndex(), loc);
+ } else {
+ VIXL_ASSERT(op == kLoad);
+ Ldr(registers.PopLowestIndex(), loc);
+ }
+ }
+}
+
+MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
+ const CPURegList& registers,
+ const MemOperand& mem,
+ UseScratchRegisterScope* scratch_scope) {
+ // If necessary, pre-compute the base address for the accesses.
+ if (mem.IsRegisterOffset()) {
+ Register reg_base = scratch_scope->AcquireX();
+ ComputeAddress(reg_base, mem);
+ return MemOperand(reg_base);
+
+ } else if (mem.IsImmediateOffset()) {
+ int reg_size = registers.RegisterSizeInBytes();
+ int total_size = registers.TotalSizeInBytes();
+ int64_t min_offset = mem.offset();
+ int64_t max_offset = mem.offset() + std::max(0, total_size - 2 * reg_size);
+ if ((registers.Count() >= 2) &&
+ (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
+ !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
+ Register reg_base = scratch_scope->AcquireX();
+ ComputeAddress(reg_base, mem);
+ return MemOperand(reg_base);
+ }
+ }
+
+ return mem;
+}
+
+void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
+ VIXL_ASSERT(!sp.Is(GetStackPointer64()));
+ // TODO: Several callers rely on this not using scratch registers, so we use
+ // the assembler directly here. However, this means that large immediate
+ // values of 'space' cannot be handled.
+ InstructionAccurateScope scope(this, 1);
+ sub(sp, GetStackPointer64(), space);
+}
+
+
+void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
+
+#ifdef JS_SIMULATOR_ARM64
+ // The arguments to the trace pseudo instruction need to be contiguous in
+ // memory, so make sure we don't try to emit a literal pool.
+ InstructionAccurateScope scope(this, kTraceLength / kInstructionSize);
+
+ Label start;
+ bind(&start);
+
+ // Refer to simulator-a64.h for a description of the marker and its
+ // arguments.
+ hlt(kTraceOpcode);
+
+ // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
+ dc32(parameters);
+
+ // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
+ dc32(command);
+#else
+ // Emit nothing on real hardware.
+ USE(parameters, command);
+#endif
+}
+
+
+void MacroAssembler::Log(TraceParameters parameters) {
+
+#ifdef JS_SIMULATOR_ARM64
+ // The arguments to the log pseudo instruction need to be contiguous in
+ // memory, so make sure we don't try to emit a literal pool.
+ InstructionAccurateScope scope(this, kLogLength / kInstructionSize);
+
+ Label start;
+ bind(&start);
+
+ // Refer to simulator-a64.h for a description of the marker and its
+ // arguments.
+ hlt(kLogOpcode);
+
+ // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
+ dc32(parameters);
+#else
+ // Emit nothing on real hardware.
+ USE(parameters);
+#endif
+}
+
+
+void MacroAssembler::EnableInstrumentation() {
+ VIXL_ASSERT(!isprint(InstrumentStateEnable));
+ InstructionAccurateScope scope(this, 1);
+ movn(xzr, InstrumentStateEnable);
+}
+
+
+void MacroAssembler::DisableInstrumentation() {
+ VIXL_ASSERT(!isprint(InstrumentStateDisable));
+ InstructionAccurateScope scope(this, 1);
+ movn(xzr, InstrumentStateDisable);
+}
+
+
+void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
+ VIXL_ASSERT(strlen(marker_name) == 2);
+
+ // We allow only printable characters in the marker names. Unprintable
+ // characters are reserved for controlling features of the instrumentation.
+ VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));
+
+ InstructionAccurateScope scope(this, 1);
+ movn(xzr, (marker_name[1] << 8) | marker_name[0]);
+}
+
+
+void UseScratchRegisterScope::Open(MacroAssembler* masm) {
+ VIXL_ASSERT(!initialised_);
+ available_ = masm->TmpList();
+ availablefp_ = masm->FPTmpList();
+ old_available_ = available_->list();
+ old_availablefp_ = availablefp_->list();
+ VIXL_ASSERT(available_->type() == CPURegister::kRegister);
+ VIXL_ASSERT(availablefp_->type() == CPURegister::kVRegister);
+#ifdef DEBUG
+ initialised_ = true;
+#endif
+}
+
+
+void UseScratchRegisterScope::Close() {
+ if (available_) {
+ available_->set_list(old_available_);
+ available_ = NULL;
+ }
+ if (availablefp_) {
+ availablefp_->set_list(old_availablefp_);
+ availablefp_ = NULL;
+ }
+#ifdef DEBUG
+ initialised_ = false;
+#endif
+}
+
+
+UseScratchRegisterScope::UseScratchRegisterScope(MacroAssembler* masm) {
+#ifdef DEBUG
+ initialised_ = false;
+#endif
+ Open(masm);
+}
+
+// This allows deferred (and optional) initialisation of the scope.
+UseScratchRegisterScope::UseScratchRegisterScope()
+ : available_(NULL), availablefp_(NULL),
+ old_available_(0), old_availablefp_(0) {
+#ifdef DEBUG
+ initialised_ = false;
+#endif
+}
+
+UseScratchRegisterScope::~UseScratchRegisterScope() {
+ Close();
+}
+
+
+bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
+ return available_->IncludesAliasOf(reg) || availablefp_->IncludesAliasOf(reg);
+}
+
+
+Register UseScratchRegisterScope::AcquireSameSizeAs(const Register& reg) {
+ int code = AcquireNextAvailable(available_).code();
+ return Register(code, reg.size());
+}
+
+
+FPRegister UseScratchRegisterScope::AcquireSameSizeAs(const FPRegister& reg) {
+ int code = AcquireNextAvailable(availablefp_).code();
+ return FPRegister(code, reg.size());
+}
+
+
+void UseScratchRegisterScope::Release(const CPURegister& reg) {
+ VIXL_ASSERT(initialised_);
+ if (reg.IsRegister()) {
+ ReleaseByCode(available_, reg.code());
+ } else if (reg.IsFPRegister()) {
+ ReleaseByCode(availablefp_, reg.code());
+ } else {
+ VIXL_ASSERT(reg.IsNone());
+ }
+}
+
+
+void UseScratchRegisterScope::Include(const CPURegList& list) {
+ VIXL_ASSERT(initialised_);
+ if (list.type() == CPURegister::kRegister) {
+ // Make sure that neither sp nor xzr are included the list.
+ IncludeByRegList(available_, list.list() & ~(xzr.Bit() | sp.Bit()));
+ } else {
+ VIXL_ASSERT(list.type() == CPURegister::kVRegister);
+ IncludeByRegList(availablefp_, list.list());
+ }
+}
+
+
+void UseScratchRegisterScope::Include(const Register& reg1,
+ const Register& reg2,
+ const Register& reg3,
+ const Register& reg4) {
+ VIXL_ASSERT(initialised_);
+ RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
+ // Make sure that neither sp nor xzr are included the list.
+ include &= ~(xzr.Bit() | sp.Bit());
+
+ IncludeByRegList(available_, include);
+}
+
+
+void UseScratchRegisterScope::Include(const FPRegister& reg1,
+ const FPRegister& reg2,
+ const FPRegister& reg3,
+ const FPRegister& reg4) {
+ RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
+ IncludeByRegList(availablefp_, include);
+}
+
+
+void UseScratchRegisterScope::Exclude(const CPURegList& list) {
+ if (list.type() == CPURegister::kRegister) {
+ ExcludeByRegList(available_, list.list());
+ } else {
+ VIXL_ASSERT(list.type() == CPURegister::kVRegister);
+ ExcludeByRegList(availablefp_, list.list());
+ }
+}
+
+
+void UseScratchRegisterScope::Exclude(const Register& reg1,
+ const Register& reg2,
+ const Register& reg3,
+ const Register& reg4) {
+ RegList exclude = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
+ ExcludeByRegList(available_, exclude);
+}
+
+
+void UseScratchRegisterScope::Exclude(const FPRegister& reg1,
+ const FPRegister& reg2,
+ const FPRegister& reg3,
+ const FPRegister& reg4) {
+ RegList excludefp = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit();
+ ExcludeByRegList(availablefp_, excludefp);
+}
+
+
+void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4) {
+ RegList exclude = 0;
+ RegList excludefp = 0;
+
+ const CPURegister regs[] = {reg1, reg2, reg3, reg4};
+
+ for (unsigned i = 0; i < (sizeof(regs) / sizeof(regs[0])); i++) {
+ if (regs[i].IsRegister()) {
+ exclude |= regs[i].Bit();
+ } else if (regs[i].IsFPRegister()) {
+ excludefp |= regs[i].Bit();
+ } else {
+ VIXL_ASSERT(regs[i].IsNone());
+ }
+ }
+
+ ExcludeByRegList(available_, exclude);
+ ExcludeByRegList(availablefp_, excludefp);
+}
+
+
+void UseScratchRegisterScope::ExcludeAll() {
+ ExcludeByRegList(available_, available_->list());
+ ExcludeByRegList(availablefp_, availablefp_->list());
+}
+
+
+CPURegister UseScratchRegisterScope::AcquireNextAvailable(
+ CPURegList* available) {
+ VIXL_CHECK(!available->IsEmpty());
+ CPURegister result = available->PopLowestIndex();
+ VIXL_ASSERT(!AreAliased(result, xzr, sp));
+ return result;
+}
+
+
+void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) {
+ ReleaseByRegList(available, static_cast<RegList>(1) << code);
+}
+
+
+void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available,
+ RegList regs) {
+ available->set_list(available->list() | regs);
+}
+
+
+void UseScratchRegisterScope::IncludeByRegList(CPURegList* available,
+ RegList regs) {
+ available->set_list(available->list() | regs);
+}
+
+
+void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
+ RegList exclude) {
+ available->set_list(available->list() & ~exclude);
+}
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/MacroAssembler-vixl.h b/js/src/jit/arm64/vixl/MacroAssembler-vixl.h
new file mode 100644
index 0000000000..3c403a815f
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MacroAssembler-vixl.h
@@ -0,0 +1,2622 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_MACRO_ASSEMBLER_A64_H_
+#define VIXL_A64_MACRO_ASSEMBLER_A64_H_
+
+#include <algorithm>
+#include <limits>
+
+#include "jit/arm64/Assembler-arm64.h"
+#include "jit/arm64/vixl/Debugger-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instrument-vixl.h"
+#include "jit/arm64/vixl/Simulator-Constants-vixl.h"
+
+#define LS_MACRO_LIST(V) \
+ V(Ldrb, Register&, rt, LDRB_w) \
+ V(Strb, Register&, rt, STRB_w) \
+ V(Ldrsb, Register&, rt, rt.Is64Bits() ? LDRSB_x : LDRSB_w) \
+ V(Ldrh, Register&, rt, LDRH_w) \
+ V(Strh, Register&, rt, STRH_w) \
+ V(Ldrsh, Register&, rt, rt.Is64Bits() ? LDRSH_x : LDRSH_w) \
+ V(Ldr, CPURegister&, rt, LoadOpFor(rt)) \
+ V(Str, CPURegister&, rt, StoreOpFor(rt)) \
+ V(Ldrsw, Register&, rt, LDRSW_x)
+
+
+#define LSPAIR_MACRO_LIST(V) \
+ V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2)) \
+ V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \
+ V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x)
+
+namespace vixl {
+
+// Forward declaration
+class MacroAssembler;
+class UseScratchRegisterScope;
+
+// This scope has the following purposes:
+// * Acquire/Release the underlying assembler's code buffer.
+// * This is mandatory before emitting.
+// * Emit the literal or veneer pools if necessary before emitting the
+// macro-instruction.
+// * Ensure there is enough space to emit the macro-instruction.
+class EmissionCheckScope {
+ public:
+ EmissionCheckScope(MacroAssembler* masm, size_t size)
+ : masm_(masm)
+ { }
+
+ protected:
+ MacroAssembler* masm_;
+#ifdef DEBUG
+ Label start_;
+ size_t size_;
+#endif
+};
+
+
+// Helper for common Emission checks.
+// The macro-instruction maps to a single instruction.
+class SingleEmissionCheckScope : public EmissionCheckScope {
+ public:
+ explicit SingleEmissionCheckScope(MacroAssembler* masm)
+ : EmissionCheckScope(masm, kInstructionSize) {}
+};
+
+
+// The macro instruction is a "typical" macro-instruction. Typical macro-
+// instruction only emit a few instructions, a few being defined as 8 here.
+class MacroEmissionCheckScope : public EmissionCheckScope {
+ public:
+ explicit MacroEmissionCheckScope(MacroAssembler* masm)
+ : EmissionCheckScope(masm, kTypicalMacroInstructionMaxSize) {}
+
+ private:
+ static const size_t kTypicalMacroInstructionMaxSize = 8 * kInstructionSize;
+};
+
+
+enum BranchType {
+ // Copies of architectural conditions.
+ // The associated conditions can be used in place of those, the code will
+ // take care of reinterpreting them with the correct type.
+ integer_eq = eq,
+ integer_ne = ne,
+ integer_hs = hs,
+ integer_lo = lo,
+ integer_mi = mi,
+ integer_pl = pl,
+ integer_vs = vs,
+ integer_vc = vc,
+ integer_hi = hi,
+ integer_ls = ls,
+ integer_ge = ge,
+ integer_lt = lt,
+ integer_gt = gt,
+ integer_le = le,
+ integer_al = al,
+ integer_nv = nv,
+
+ // These two are *different* from the architectural codes al and nv.
+ // 'always' is used to generate unconditional branches.
+ // 'never' is used to not generate a branch (generally as the inverse
+ // branch type of 'always).
+ always, never,
+ // cbz and cbnz
+ reg_zero, reg_not_zero,
+ // tbz and tbnz
+ reg_bit_clear, reg_bit_set,
+
+ // Aliases.
+ kBranchTypeFirstCondition = eq,
+ kBranchTypeLastCondition = nv,
+ kBranchTypeFirstUsingReg = reg_zero,
+ kBranchTypeFirstUsingBit = reg_bit_clear
+};
+
+
+enum DiscardMoveMode { kDontDiscardForSameWReg, kDiscardForSameWReg };
+
+// The macro assembler supports moving automatically pre-shifted immediates for
+// arithmetic and logical instructions, and then applying a post shift in the
+// instruction to undo the modification, in order to reduce the code emitted for
+// an operation. For example:
+//
+// Add(x0, x0, 0x1f7de) => movz x16, 0xfbef; add x0, x0, x16, lsl #1.
+//
+// This optimisation can be only partially applied when the stack pointer is an
+// operand or destination, so this enumeration is used to control the shift.
+enum PreShiftImmMode {
+ kNoShift, // Don't pre-shift.
+ kLimitShiftForSP, // Limit pre-shift for add/sub extend use.
+ kAnyShift // Allow any pre-shift.
+};
+
+
+class MacroAssembler : public js::jit::Assembler {
+ public:
+ MacroAssembler();
+
+ // Finalize a code buffer of generated instructions. This function must be
+ // called before executing or copying code from the buffer.
+ void FinalizeCode();
+
+
+ // Constant generation helpers.
+ // These functions return the number of instructions required to move the
+ // immediate into the destination register. Also, if the masm pointer is
+ // non-null, it generates the code to do so.
+ // The two features are implemented using one function to avoid duplication of
+ // the logic.
+ // The function can be used to evaluate the cost of synthesizing an
+ // instruction using 'mov immediate' instructions. A user might prefer loading
+ // a constant using the literal pool instead of using multiple 'mov immediate'
+ // instructions.
+ static int MoveImmediateHelper(MacroAssembler* masm,
+ const Register &rd,
+ uint64_t imm);
+ static bool OneInstrMoveImmediateHelper(MacroAssembler* masm,
+ const Register& dst,
+ int64_t imm);
+
+
+ // Logical macros.
+ void And(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Ands(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Bic(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Bics(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Orr(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Orn(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Eor(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Eon(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Tst(const Register& rn, const Operand& operand);
+ void LogicalMacro(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ LogicalOp op);
+
+ // Add and sub macros.
+ void Add(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S = LeaveFlags);
+ void Adds(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Sub(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S = LeaveFlags);
+ void Subs(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Cmn(const Register& rn, const Operand& operand);
+ void Cmp(const Register& rn, const Operand& operand);
+ void Neg(const Register& rd,
+ const Operand& operand);
+ void Negs(const Register& rd,
+ const Operand& operand);
+
+ void AddSubMacro(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ AddSubOp op);
+
+ // Add/sub with carry macros.
+ void Adc(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Adcs(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Sbc(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Sbcs(const Register& rd,
+ const Register& rn,
+ const Operand& operand);
+ void Ngc(const Register& rd,
+ const Operand& operand);
+ void Ngcs(const Register& rd,
+ const Operand& operand);
+ void AddSubWithCarryMacro(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ FlagsUpdate S,
+ AddSubWithCarryOp op);
+
+ // Move macros.
+ void Mov(const Register& rd, uint64_t imm);
+ void Mov(const Register& rd,
+ const Operand& operand,
+ DiscardMoveMode discard_mode = kDontDiscardForSameWReg);
+ void Mvn(const Register& rd, uint64_t imm) {
+ Mov(rd, (rd.size() == kXRegSize) ? ~imm : (~imm & kWRegMask));
+ }
+ void Mvn(const Register& rd, const Operand& operand);
+
+ // Try to move an immediate into the destination register in a single
+ // instruction. Returns true for success, and updates the contents of dst.
+ // Returns false, otherwise.
+ bool TryOneInstrMoveImmediate(const Register& dst, int64_t imm);
+
+ // Move an immediate into register dst, and return an Operand object for
+ // use with a subsequent instruction that accepts a shift. The value moved
+ // into dst is not necessarily equal to imm; it may have had a shifting
+ // operation applied to it that will be subsequently undone by the shift
+ // applied in the Operand.
+ Operand MoveImmediateForShiftedOp(const Register& dst,
+ int64_t imm,
+ PreShiftImmMode mode);
+
+ // Synthesises the address represented by a MemOperand into a register.
+ void ComputeAddress(const Register& dst, const MemOperand& mem_op);
+
+ // Conditional macros.
+ void Ccmp(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond);
+ void Ccmn(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond);
+ void ConditionalCompareMacro(const Register& rn,
+ const Operand& operand,
+ StatusFlags nzcv,
+ Condition cond,
+ ConditionalCompareOp op);
+ void Csel(const Register& rd,
+ const Register& rn,
+ const Operand& operand,
+ Condition cond);
+
+ // Load/store macros.
+#define DECLARE_FUNCTION(FN, REGTYPE, REG, OP) \
+ void FN(const REGTYPE REG, const MemOperand& addr);
+ LS_MACRO_LIST(DECLARE_FUNCTION)
+#undef DECLARE_FUNCTION
+
+ void LoadStoreMacro(const CPURegister& rt,
+ const MemOperand& addr,
+ LoadStoreOp op);
+
+#define DECLARE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
+ void FN(const REGTYPE REG, const REGTYPE REG2, const MemOperand& addr);
+ LSPAIR_MACRO_LIST(DECLARE_FUNCTION)
+#undef DECLARE_FUNCTION
+
+ void LoadStorePairMacro(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& addr,
+ LoadStorePairOp op);
+
+ void Prfm(PrefetchOperation op, const MemOperand& addr);
+
+ // Push or pop up to 4 registers of the same width to or from the stack,
+ // using the current stack pointer as set by SetStackPointer.
+ //
+ // If an argument register is 'NoReg', all further arguments are also assumed
+ // to be 'NoReg', and are thus not pushed or popped.
+ //
+ // Arguments are ordered such that "Push(a, b);" is functionally equivalent
+ // to "Push(a); Push(b);".
+ //
+ // It is valid to push the same register more than once, and there is no
+ // restriction on the order in which registers are specified.
+ //
+ // It is not valid to pop into the same register more than once in one
+ // operation, not even into the zero register.
+ //
+ // If the current stack pointer (as set by SetStackPointer) is sp, then it
+ // must be aligned to 16 bytes on entry and the total size of the specified
+ // registers must also be a multiple of 16 bytes.
+ //
+ // Even if the current stack pointer is not the system stack pointer (sp),
+ // Push (and derived methods) will still modify the system stack pointer in
+ // order to comply with ABI rules about accessing memory below the system
+ // stack pointer.
+ //
+ // Other than the registers passed into Pop, the stack pointer and (possibly)
+ // the system stack pointer, these methods do not modify any other registers.
+ void Push(const CPURegister& src0, const CPURegister& src1 = NoReg,
+ const CPURegister& src2 = NoReg, const CPURegister& src3 = NoReg);
+ void Pop(const CPURegister& dst0, const CPURegister& dst1 = NoReg,
+ const CPURegister& dst2 = NoReg, const CPURegister& dst3 = NoReg);
+ void PushStackPointer();
+
+ // Alternative forms of Push and Pop, taking a RegList or CPURegList that
+ // specifies the registers that are to be pushed or popped. Higher-numbered
+ // registers are associated with higher memory addresses (as in the A32 push
+ // and pop instructions).
+ //
+ // (Push|Pop)SizeRegList allow you to specify the register size as a
+ // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are
+ // supported.
+ //
+ // Otherwise, (Push|Pop)(CPU|X|W|D|S)RegList is preferred.
+ void PushCPURegList(CPURegList registers);
+ void PopCPURegList(CPURegList registers);
+
+ void PushSizeRegList(RegList registers, unsigned reg_size,
+ CPURegister::RegisterType type = CPURegister::kRegister) {
+ PushCPURegList(CPURegList(type, reg_size, registers));
+ }
+ void PopSizeRegList(RegList registers, unsigned reg_size,
+ CPURegister::RegisterType type = CPURegister::kRegister) {
+ PopCPURegList(CPURegList(type, reg_size, registers));
+ }
+ void PushXRegList(RegList regs) {
+ PushSizeRegList(regs, kXRegSize);
+ }
+ void PopXRegList(RegList regs) {
+ PopSizeRegList(regs, kXRegSize);
+ }
+ void PushWRegList(RegList regs) {
+ PushSizeRegList(regs, kWRegSize);
+ }
+ void PopWRegList(RegList regs) {
+ PopSizeRegList(regs, kWRegSize);
+ }
+ void PushDRegList(RegList regs) {
+ PushSizeRegList(regs, kDRegSize, CPURegister::kVRegister);
+ }
+ void PopDRegList(RegList regs) {
+ PopSizeRegList(regs, kDRegSize, CPURegister::kVRegister);
+ }
+ void PushSRegList(RegList regs) {
+ PushSizeRegList(regs, kSRegSize, CPURegister::kVRegister);
+ }
+ void PopSRegList(RegList regs) {
+ PopSizeRegList(regs, kSRegSize, CPURegister::kVRegister);
+ }
+
+ // Push the specified register 'count' times.
+ void PushMultipleTimes(int count, Register src);
+
+ // Poke 'src' onto the stack. The offset is in bytes.
+ //
+ // If the current stack pointer (as set by SetStackPointer) is sp, then sp
+ // must be aligned to 16 bytes.
+ void Poke(const Register& src, const Operand& offset);
+
+ // Peek at a value on the stack, and put it in 'dst'. The offset is in bytes.
+ //
+ // If the current stack pointer (as set by SetStackPointer) is sp, then sp
+ // must be aligned to 16 bytes.
+ void Peek(const Register& dst, const Operand& offset);
+
+ // Alternative forms of Peek and Poke, taking a RegList or CPURegList that
+ // specifies the registers that are to be pushed or popped. Higher-numbered
+ // registers are associated with higher memory addresses.
+ //
+ // (Peek|Poke)SizeRegList allow you to specify the register size as a
+ // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are
+ // supported.
+ //
+ // Otherwise, (Peek|Poke)(CPU|X|W|D|S)RegList is preferred.
+ void PeekCPURegList(CPURegList registers, int64_t offset) {
+ LoadCPURegList(registers, MemOperand(StackPointer(), offset));
+ }
+ void PokeCPURegList(CPURegList registers, int64_t offset) {
+ StoreCPURegList(registers, MemOperand(StackPointer(), offset));
+ }
+
+ void PeekSizeRegList(RegList registers, int64_t offset, unsigned reg_size,
+ CPURegister::RegisterType type = CPURegister::kRegister) {
+ PeekCPURegList(CPURegList(type, reg_size, registers), offset);
+ }
+ void PokeSizeRegList(RegList registers, int64_t offset, unsigned reg_size,
+ CPURegister::RegisterType type = CPURegister::kRegister) {
+ PokeCPURegList(CPURegList(type, reg_size, registers), offset);
+ }
+ void PeekXRegList(RegList regs, int64_t offset) {
+ PeekSizeRegList(regs, offset, kXRegSize);
+ }
+ void PokeXRegList(RegList regs, int64_t offset) {
+ PokeSizeRegList(regs, offset, kXRegSize);
+ }
+ void PeekWRegList(RegList regs, int64_t offset) {
+ PeekSizeRegList(regs, offset, kWRegSize);
+ }
+ void PokeWRegList(RegList regs, int64_t offset) {
+ PokeSizeRegList(regs, offset, kWRegSize);
+ }
+ void PeekDRegList(RegList regs, int64_t offset) {
+ PeekSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister);
+ }
+ void PokeDRegList(RegList regs, int64_t offset) {
+ PokeSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister);
+ }
+ void PeekSRegList(RegList regs, int64_t offset) {
+ PeekSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister);
+ }
+ void PokeSRegList(RegList regs, int64_t offset) {
+ PokeSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister);
+ }
+
+
+ // Claim or drop stack space without actually accessing memory.
+ //
+ // If the current stack pointer (as set by SetStackPointer) is sp, then it
+ // must be aligned to 16 bytes and the size claimed or dropped must be a
+ // multiple of 16 bytes.
+ void Claim(const Operand& size);
+ void Drop(const Operand& size);
+
+ // Preserve the callee-saved registers (as defined by AAPCS64).
+ //
+ // Higher-numbered registers are pushed before lower-numbered registers, and
+ // thus get higher addresses.
+ // Floating-point registers are pushed before general-purpose registers, and
+ // thus get higher addresses.
+ //
+ // This method must not be called unless StackPointer() is sp, and it is
+ // aligned to 16 bytes.
+ void PushCalleeSavedRegisters();
+
+ // Restore the callee-saved registers (as defined by AAPCS64).
+ //
+ // Higher-numbered registers are popped after lower-numbered registers, and
+ // thus come from higher addresses.
+ // Floating-point registers are popped after general-purpose registers, and
+ // thus come from higher addresses.
+ //
+ // This method must not be called unless StackPointer() is sp, and it is
+ // aligned to 16 bytes.
+ void PopCalleeSavedRegisters();
+
+ void LoadCPURegList(CPURegList registers, const MemOperand& src);
+ void StoreCPURegList(CPURegList registers, const MemOperand& dst);
+
+ // Remaining instructions are simple pass-through calls to the assembler.
+ void Adr(const Register& rd, Label* label) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ adr(rd, label);
+ }
+ void Adrp(const Register& rd, Label* label) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ adrp(rd, label);
+ }
+ void Asr(const Register& rd, const Register& rn, unsigned shift) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ asr(rd, rn, shift);
+ }
+ void Asr(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ asrv(rd, rn, rm);
+ }
+
+ // Branch type inversion relies on these relations.
+ VIXL_STATIC_ASSERT((reg_zero == (reg_not_zero ^ 1)) &&
+ (reg_bit_clear == (reg_bit_set ^ 1)) &&
+ (always == (never ^ 1)));
+
+ BranchType InvertBranchType(BranchType type) {
+ if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
+ return static_cast<BranchType>(
+ InvertCondition(static_cast<Condition>(type)));
+ } else {
+ return static_cast<BranchType>(type ^ 1);
+ }
+ }
+
+ void B(Label* label, BranchType type, Register reg = NoReg, int bit = -1);
+
+ void B(Label* label);
+ void B(Label* label, Condition cond);
+ void B(Condition cond, Label* label) {
+ B(label, cond);
+ }
+ void Bfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ bfm(rd, rn, immr, imms);
+ }
+ void Bfi(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ bfi(rd, rn, lsb, width);
+ }
+ void Bfxil(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ bfxil(rd, rn, lsb, width);
+ }
+ void Bind(Label* label);
+ // Bind a label to a specified offset from the start of the buffer.
+ void BindToOffset(Label* label, ptrdiff_t offset);
+ void Bl(Label* label) {
+ SingleEmissionCheckScope guard(this);
+ bl(label);
+ }
+ void Blr(const Register& xn) {
+ VIXL_ASSERT(!xn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ blr(xn);
+ }
+ void Br(const Register& xn) {
+ VIXL_ASSERT(!xn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ br(xn);
+ }
+ void Brk(int code = 0) {
+ SingleEmissionCheckScope guard(this);
+ brk(code);
+ }
+ void Cbnz(const Register& rt, Label* label);
+ void Cbz(const Register& rt, Label* label);
+ void Cinc(const Register& rd, const Register& rn, Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ cinc(rd, rn, cond);
+ }
+ void Cinv(const Register& rd, const Register& rn, Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ cinv(rd, rn, cond);
+ }
+ void Clrex() {
+ SingleEmissionCheckScope guard(this);
+ clrex();
+ }
+ void Cls(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ cls(rd, rn);
+ }
+ void Clz(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ clz(rd, rn);
+ }
+ void Cneg(const Register& rd, const Register& rn, Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ cneg(rd, rn, cond);
+ }
+ void Cset(const Register& rd, Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ cset(rd, cond);
+ }
+ void Csetm(const Register& rd, Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ csetm(rd, cond);
+ }
+ void Csinc(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ // The VIXL source code contains these assertions, but the AArch64 ISR
+ // explicitly permits the use of zero registers. CSET itself is defined
+ // in terms of CSINC with WZR/XZR.
+ //
+ // VIXL_ASSERT(!rn.IsZero());
+ // VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ SingleEmissionCheckScope guard(this);
+ csinc(rd, rn, rm, cond);
+ }
+ void Csinv(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ SingleEmissionCheckScope guard(this);
+ csinv(rd, rn, rm, cond);
+ }
+ void Csneg(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ Condition cond) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ SingleEmissionCheckScope guard(this);
+ csneg(rd, rn, rm, cond);
+ }
+ void Dmb(BarrierDomain domain, BarrierType type) {
+ SingleEmissionCheckScope guard(this);
+ dmb(domain, type);
+ }
+ void Dsb(BarrierDomain domain, BarrierType type) {
+ SingleEmissionCheckScope guard(this);
+ dsb(domain, type);
+ }
+ void Extr(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ unsigned lsb) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ extr(rd, rn, rm, lsb);
+ }
+ void Fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fadd(vd, vn, vm);
+ }
+ void Fccmp(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond,
+ FPTrapFlags trap = DisableTrap) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ SingleEmissionCheckScope guard(this);
+ FPCCompareMacro(vn, vm, nzcv, cond, trap);
+ }
+ void Fccmpe(const VRegister& vn,
+ const VRegister& vm,
+ StatusFlags nzcv,
+ Condition cond) {
+ Fccmp(vn, vm, nzcv, cond, EnableTrap);
+ }
+ void Fcmp(const VRegister& vn, const VRegister& vm,
+ FPTrapFlags trap = DisableTrap) {
+ SingleEmissionCheckScope guard(this);
+ FPCompareMacro(vn, vm, trap);
+ }
+ void Fcmp(const VRegister& vn, double value,
+ FPTrapFlags trap = DisableTrap);
+ void Fcmpe(const VRegister& vn, double value);
+ void Fcmpe(const VRegister& vn, const VRegister& vm) {
+ Fcmp(vn, vm, EnableTrap);
+ }
+ void Fcsel(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ Condition cond) {
+ VIXL_ASSERT((cond != al) && (cond != nv));
+ SingleEmissionCheckScope guard(this);
+ fcsel(vd, vn, vm, cond);
+ }
+ void Fcvt(const VRegister& vd, const VRegister& vn) {
+ SingleEmissionCheckScope guard(this);
+ fcvt(vd, vn);
+ }
+ void Fcvtl(const VRegister& vd, const VRegister& vn) {
+ SingleEmissionCheckScope guard(this);
+ fcvtl(vd, vn);
+ }
+ void Fcvtl2(const VRegister& vd, const VRegister& vn) {
+ SingleEmissionCheckScope guard(this);
+ fcvtl2(vd, vn);
+ }
+ void Fcvtn(const VRegister& vd, const VRegister& vn) {
+ SingleEmissionCheckScope guard(this);
+ fcvtn(vd, vn);
+ }
+ void Fcvtn2(const VRegister& vd, const VRegister& vn) {
+ SingleEmissionCheckScope guard(this);
+ fcvtn2(vd, vn);
+ }
+ void Fcvtxn(const VRegister& vd, const VRegister& vn) {
+ SingleEmissionCheckScope guard(this);
+ fcvtxn(vd, vn);
+ }
+ void Fcvtxn2(const VRegister& vd, const VRegister& vn) {
+ SingleEmissionCheckScope guard(this);
+ fcvtxn2(vd, vn);
+ }
+ void Fcvtas(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtas(rd, vn);
+ }
+ void Fcvtau(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtau(rd, vn);
+ }
+ void Fcvtms(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtms(rd, vn);
+ }
+ void Fcvtmu(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtmu(rd, vn);
+ }
+ void Fcvtns(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtns(rd, vn);
+ }
+ void Fcvtnu(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtnu(rd, vn);
+ }
+ void Fcvtps(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtps(rd, vn);
+ }
+ void Fcvtpu(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtpu(rd, vn);
+ }
+ void Fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtzs(rd, vn, fbits);
+ }
+ void Fjcvtzs(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fjcvtzs(rd, vn);
+ }
+ void Fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fcvtzu(rd, vn, fbits);
+ }
+ void Fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fdiv(vd, vn, vm);
+ }
+ void Fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fmax(vd, vn, vm);
+ }
+ void Fmaxnm(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fmaxnm(vd, vn, vm);
+ }
+ void Fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fmin(vd, vn, vm);
+ }
+ void Fminnm(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fminnm(vd, vn, vm);
+ }
+ void Fmov(VRegister vd, VRegister vn) {
+ SingleEmissionCheckScope guard(this);
+ // Only emit an instruction if vd and vn are different, and they are both D
+ // registers. fmov(s0, s0) is not a no-op because it clears the top word of
+ // d0. Technically, fmov(d0, d0) is not a no-op either because it clears
+ // the top of q0, but VRegister does not currently support Q registers.
+ if (!vd.Is(vn) || !vd.Is64Bits()) {
+ fmov(vd, vn);
+ }
+ }
+ void Fmov(VRegister vd, Register rn) {
+ SingleEmissionCheckScope guard(this);
+ fmov(vd, rn);
+ }
+ void Fmov(const VRegister& vd, int index, const Register& rn) {
+ SingleEmissionCheckScope guard(this);
+ fmov(vd, index, rn);
+ }
+ void Fmov(const Register& rd, const VRegister& vn, int index) {
+ SingleEmissionCheckScope guard(this);
+ fmov(rd, vn, index);
+ }
+
+ // Provide explicit double and float interfaces for FP immediate moves, rather
+ // than relying on implicit C++ casts. This allows signalling NaNs to be
+ // preserved when the immediate matches the format of vd. Most systems convert
+ // signalling NaNs to quiet NaNs when converting between float and double.
+ void Fmov(VRegister vd, double imm);
+ void Fmov(VRegister vd, float imm);
+ // Provide a template to allow other types to be converted automatically.
+ template<typename T>
+ void Fmov(VRegister vd, T imm) {
+ Fmov(vd, static_cast<double>(imm));
+ }
+ void Fmov(Register rd, VRegister vn) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fmov(rd, vn);
+ }
+ void Fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fmul(vd, vn, vm);
+ }
+ void Fnmul(const VRegister& vd, const VRegister& vn,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fnmul(vd, vn, vm);
+ }
+ void Fmadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ SingleEmissionCheckScope guard(this);
+ fmadd(vd, vn, vm, va);
+ }
+ void Fmsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ SingleEmissionCheckScope guard(this);
+ fmsub(vd, vn, vm, va);
+ }
+ void Fnmadd(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ SingleEmissionCheckScope guard(this);
+ fnmadd(vd, vn, vm, va);
+ }
+ void Fnmsub(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ SingleEmissionCheckScope guard(this);
+ fnmsub(vd, vn, vm, va);
+ }
+ void Fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ fsub(vd, vn, vm);
+ }
+ void Hint(SystemHint code) {
+ SingleEmissionCheckScope guard(this);
+ hint(code);
+ }
+ void Hlt(int code) {
+ SingleEmissionCheckScope guard(this);
+ hlt(code);
+ }
+ void Isb() {
+ SingleEmissionCheckScope guard(this);
+ isb();
+ }
+ void Ldar(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldar(rt, src);
+ }
+ void Ldarb(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldarb(rt, src);
+ }
+ void Ldarh(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldarh(rt, src);
+ }
+ void Ldaxp(const Register& rt, const Register& rt2, const MemOperand& src) {
+ VIXL_ASSERT(!rt.Aliases(rt2));
+ SingleEmissionCheckScope guard(this);
+ ldaxp(rt, rt2, src);
+ }
+ void Ldaxr(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldaxr(rt, src);
+ }
+ void Ldaxrb(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldaxrb(rt, src);
+ }
+ void Ldaxrh(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldaxrh(rt, src);
+ }
+
+// clang-format off
+#define COMPARE_AND_SWAP_SINGLE_MACRO_LIST(V) \
+ V(cas, Cas) \
+ V(casa, Casa) \
+ V(casl, Casl) \
+ V(casal, Casal) \
+ V(casb, Casb) \
+ V(casab, Casab) \
+ V(caslb, Caslb) \
+ V(casalb, Casalb) \
+ V(cash, Cash) \
+ V(casah, Casah) \
+ V(caslh, Caslh) \
+ V(casalh, Casalh)
+ // clang-format on
+
+#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
+ void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rs, rt, src); \
+ }
+ COMPARE_AND_SWAP_SINGLE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+#undef DEFINE_MACRO_ASM_FUNC
+
+// clang-format off
+#define COMPARE_AND_SWAP_PAIR_MACRO_LIST(V) \
+ V(casp, Casp) \
+ V(caspa, Caspa) \
+ V(caspl, Caspl) \
+ V(caspal, Caspal)
+ // clang-format on
+
+#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
+ void MASM(const Register& rs, const Register& rs2, const Register& rt, \
+ const Register& rt2, const MemOperand& src) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rs, rs2, rt, rt2, src); \
+ }
+ COMPARE_AND_SWAP_PAIR_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+#undef DEFINE_MACRO_ASM_FUNC
+
+// These macros generate all the variations of the atomic memory operations,
+// e.g. ldadd, ldadda, ldaddb, staddl, etc.
+
+// clang-format off
+#define ATOMIC_MEMORY_SIMPLE_MACRO_LIST(V, DEF, MASM_PRE, ASM_PRE) \
+ V(DEF, MASM_PRE##add, ASM_PRE##add) \
+ V(DEF, MASM_PRE##clr, ASM_PRE##clr) \
+ V(DEF, MASM_PRE##eor, ASM_PRE##eor) \
+ V(DEF, MASM_PRE##set, ASM_PRE##set) \
+ V(DEF, MASM_PRE##smax, ASM_PRE##smax) \
+ V(DEF, MASM_PRE##smin, ASM_PRE##smin) \
+ V(DEF, MASM_PRE##umax, ASM_PRE##umax) \
+ V(DEF, MASM_PRE##umin, ASM_PRE##umin)
+
+#define ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \
+ V(MASM, ASM) \
+ V(MASM##l, ASM##l) \
+ V(MASM##b, ASM##b) \
+ V(MASM##lb, ASM##lb) \
+ V(MASM##h, ASM##h) \
+ V(MASM##lh, ASM##lh)
+
+#define ATOMIC_MEMORY_LOAD_MACRO_MODES(V, MASM, ASM) \
+ ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \
+ V(MASM##a, ASM##a) \
+ V(MASM##al, ASM##al) \
+ V(MASM##ab, ASM##ab) \
+ V(MASM##alb, ASM##alb) \
+ V(MASM##ah, ASM##ah) \
+ V(MASM##alh, ASM##alh)
+ // clang-format on
+
+#define DEFINE_MACRO_LOAD_ASM_FUNC(MASM, ASM) \
+ void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rs, rt, src); \
+ }
+#define DEFINE_MACRO_STORE_ASM_FUNC(MASM, ASM) \
+ void MASM(const Register& rs, const MemOperand& src) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rs, src); \
+ }
+
+ ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_LOAD_MACRO_MODES,
+ DEFINE_MACRO_LOAD_ASM_FUNC,
+ Ld,
+ ld)
+ ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_STORE_MACRO_MODES,
+ DEFINE_MACRO_STORE_ASM_FUNC,
+ St,
+ st)
+
+#define DEFINE_MACRO_SWP_ASM_FUNC(MASM, ASM) \
+ void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rs, rt, src); \
+ }
+
+ ATOMIC_MEMORY_LOAD_MACRO_MODES(DEFINE_MACRO_SWP_ASM_FUNC, Swp, swp)
+
+#undef DEFINE_MACRO_LOAD_ASM_FUNC
+#undef DEFINE_MACRO_STORE_ASM_FUNC
+#undef DEFINE_MACRO_SWP_ASM_FUNC
+
+ void Ldnp(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldnp(rt, rt2, src);
+ }
+ // Provide both double and float interfaces for FP immediate loads, rather
+ // than relying on implicit C++ casts. This allows signalling NaNs to be
+ // preserved when the immediate matches the format of fd. Most systems convert
+ // signalling NaNs to quiet NaNs when converting between float and double.
+ void Ldr(const VRegister& vt, double imm) {
+ SingleEmissionCheckScope guard(this);
+ if (vt.Is64Bits()) {
+ ldr(vt, imm);
+ } else {
+ ldr(vt, static_cast<float>(imm));
+ }
+ }
+ void Ldr(const VRegister& vt, float imm) {
+ SingleEmissionCheckScope guard(this);
+ if (vt.Is32Bits()) {
+ ldr(vt, imm);
+ } else {
+ ldr(vt, static_cast<double>(imm));
+ }
+ }
+ /*
+ void Ldr(const VRegister& vt, uint64_t high64, uint64_t low64) {
+ VIXL_ASSERT(vt.IsQ());
+ SingleEmissionCheckScope guard(this);
+ ldr(vt, new Literal<uint64_t>(high64, low64,
+ &literal_pool_,
+ RawLiteral::kDeletedOnPlacementByPool));
+ }
+ */
+ void Ldr(const Register& rt, uint64_t imm) {
+ VIXL_ASSERT(!rt.IsZero());
+ SingleEmissionCheckScope guard(this);
+ ldr(rt, imm);
+ }
+ void Ldrsw(const Register& rt, uint32_t imm) {
+ VIXL_ASSERT(!rt.IsZero());
+ SingleEmissionCheckScope guard(this);
+ ldrsw(rt, imm);
+ }
+ void Ldxp(const Register& rt, const Register& rt2, const MemOperand& src) {
+ VIXL_ASSERT(!rt.Aliases(rt2));
+ SingleEmissionCheckScope guard(this);
+ ldxp(rt, rt2, src);
+ }
+ void Ldxr(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldxr(rt, src);
+ }
+ void Ldxrb(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldxrb(rt, src);
+ }
+ void Ldxrh(const Register& rt, const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ldxrh(rt, src);
+ }
+ void Lsl(const Register& rd, const Register& rn, unsigned shift) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ lsl(rd, rn, shift);
+ }
+ void Lsl(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ lslv(rd, rn, rm);
+ }
+ void Lsr(const Register& rd, const Register& rn, unsigned shift) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ lsr(rd, rn, shift);
+ }
+ void Lsr(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ lsrv(rd, rn, rm);
+ }
+ void Madd(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT(!ra.IsZero());
+ SingleEmissionCheckScope guard(this);
+ madd(rd, rn, rm, ra);
+ }
+ void Mneg(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ mneg(rd, rn, rm);
+ }
+ void Mov(const Register& rd, const Register& rn) {
+ SingleEmissionCheckScope guard(this);
+ mov(rd, rn);
+ }
+ void Movk(const Register& rd, uint64_t imm, int shift = -1) {
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ movk(rd, imm, shift);
+ }
+ void Mrs(const Register& rt, SystemRegister sysreg) {
+ VIXL_ASSERT(!rt.IsZero());
+ SingleEmissionCheckScope guard(this);
+ mrs(rt, sysreg);
+ }
+ void Msr(SystemRegister sysreg, const Register& rt) {
+ VIXL_ASSERT(!rt.IsZero());
+ SingleEmissionCheckScope guard(this);
+ msr(sysreg, rt);
+ }
+ void Sys(int op1, int crn, int crm, int op2, const Register& rt = xzr) {
+ SingleEmissionCheckScope guard(this);
+ sys(op1, crn, crm, op2, rt);
+ }
+ void Dc(DataCacheOp op, const Register& rt) {
+ SingleEmissionCheckScope guard(this);
+ dc(op, rt);
+ }
+ void Ic(InstructionCacheOp op, const Register& rt) {
+ SingleEmissionCheckScope guard(this);
+ ic(op, rt);
+ }
+ void Msub(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT(!ra.IsZero());
+ SingleEmissionCheckScope guard(this);
+ msub(rd, rn, rm, ra);
+ }
+ void Mul(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ mul(rd, rn, rm);
+ }
+ void Nop() {
+ SingleEmissionCheckScope guard(this);
+ nop();
+ }
+ void Csdb() {
+ SingleEmissionCheckScope guard(this);
+ csdb();
+ }
+ void Rbit(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ rbit(rd, rn);
+ }
+ void Ret(const Register& xn = lr) {
+ VIXL_ASSERT(!xn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ ret(xn);
+ }
+ void Rev(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ rev(rd, rn);
+ }
+ void Rev16(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ rev16(rd, rn);
+ }
+ void Rev32(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ rev32(rd, rn);
+ }
+ void Ror(const Register& rd, const Register& rs, unsigned shift) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rs.IsZero());
+ SingleEmissionCheckScope guard(this);
+ ror(rd, rs, shift);
+ }
+ void Ror(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ rorv(rd, rn, rm);
+ }
+ void Sbfiz(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ sbfiz(rd, rn, lsb, width);
+ }
+ void Sbfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ sbfm(rd, rn, immr, imms);
+ }
+ void Sbfx(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ sbfx(rd, rn, lsb, width);
+ }
+ void Scvtf(const VRegister& vd, const Register& rn, int fbits = 0) {
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ scvtf(vd, rn, fbits);
+ }
+ void Sdiv(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ sdiv(rd, rn, rm);
+ }
+ void Smaddl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT(!ra.IsZero());
+ SingleEmissionCheckScope guard(this);
+ smaddl(rd, rn, rm, ra);
+ }
+ void Smsubl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT(!ra.IsZero());
+ SingleEmissionCheckScope guard(this);
+ smsubl(rd, rn, rm, ra);
+ }
+ void Smull(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ smull(rd, rn, rm);
+ }
+ void Smulh(const Register& xd, const Register& xn, const Register& xm) {
+ VIXL_ASSERT(!xd.IsZero());
+ VIXL_ASSERT(!xn.IsZero());
+ VIXL_ASSERT(!xm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ smulh(xd, xn, xm);
+ }
+ void Stlr(const Register& rt, const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ stlr(rt, dst);
+ }
+ void Stlrb(const Register& rt, const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ stlrb(rt, dst);
+ }
+ void Stlrh(const Register& rt, const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ stlrh(rt, dst);
+ }
+ void Stlxp(const Register& rs,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& dst) {
+ VIXL_ASSERT(!rs.Aliases(dst.base()));
+ VIXL_ASSERT(!rs.Aliases(rt));
+ VIXL_ASSERT(!rs.Aliases(rt2));
+ SingleEmissionCheckScope guard(this);
+ stlxp(rs, rt, rt2, dst);
+ }
+ void Stlxr(const Register& rs, const Register& rt, const MemOperand& dst) {
+ VIXL_ASSERT(!rs.Aliases(dst.base()));
+ VIXL_ASSERT(!rs.Aliases(rt));
+ SingleEmissionCheckScope guard(this);
+ stlxr(rs, rt, dst);
+ }
+ void Stlxrb(const Register& rs, const Register& rt, const MemOperand& dst) {
+ VIXL_ASSERT(!rs.Aliases(dst.base()));
+ VIXL_ASSERT(!rs.Aliases(rt));
+ SingleEmissionCheckScope guard(this);
+ stlxrb(rs, rt, dst);
+ }
+ void Stlxrh(const Register& rs, const Register& rt, const MemOperand& dst) {
+ VIXL_ASSERT(!rs.Aliases(dst.base()));
+ VIXL_ASSERT(!rs.Aliases(rt));
+ SingleEmissionCheckScope guard(this);
+ stlxrh(rs, rt, dst);
+ }
+ void Stnp(const CPURegister& rt,
+ const CPURegister& rt2,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ stnp(rt, rt2, dst);
+ }
+ void Stxp(const Register& rs,
+ const Register& rt,
+ const Register& rt2,
+ const MemOperand& dst) {
+ VIXL_ASSERT(!rs.Aliases(dst.base()));
+ VIXL_ASSERT(!rs.Aliases(rt));
+ VIXL_ASSERT(!rs.Aliases(rt2));
+ SingleEmissionCheckScope guard(this);
+ stxp(rs, rt, rt2, dst);
+ }
+ void Stxr(const Register& rs, const Register& rt, const MemOperand& dst) {
+ VIXL_ASSERT(!rs.Aliases(dst.base()));
+ VIXL_ASSERT(!rs.Aliases(rt));
+ SingleEmissionCheckScope guard(this);
+ stxr(rs, rt, dst);
+ }
+ void Stxrb(const Register& rs, const Register& rt, const MemOperand& dst) {
+ VIXL_ASSERT(!rs.Aliases(dst.base()));
+ VIXL_ASSERT(!rs.Aliases(rt));
+ SingleEmissionCheckScope guard(this);
+ stxrb(rs, rt, dst);
+ }
+ void Stxrh(const Register& rs, const Register& rt, const MemOperand& dst) {
+ VIXL_ASSERT(!rs.Aliases(dst.base()));
+ VIXL_ASSERT(!rs.Aliases(rt));
+ SingleEmissionCheckScope guard(this);
+ stxrh(rs, rt, dst);
+ }
+ void Svc(int code) {
+ SingleEmissionCheckScope guard(this);
+ svc(code);
+ }
+ void Sxtb(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ sxtb(rd, rn);
+ }
+ void Sxth(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ sxth(rd, rn);
+ }
+ void Sxtw(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ sxtw(rd, rn);
+ }
+ void Tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ tbl(vd, vn, vm);
+ }
+ void Tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ tbl(vd, vn, vn2, vm);
+ }
+ void Tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ tbl(vd, vn, vn2, vn3, vm);
+ }
+ void Tbl(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vn4,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ tbl(vd, vn, vn2, vn3, vn4, vm);
+ }
+ void Tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ tbx(vd, vn, vm);
+ }
+ void Tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ tbx(vd, vn, vn2, vm);
+ }
+ void Tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ tbx(vd, vn, vn2, vn3, vm);
+ }
+ void Tbx(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vn2,
+ const VRegister& vn3,
+ const VRegister& vn4,
+ const VRegister& vm) {
+ SingleEmissionCheckScope guard(this);
+ tbx(vd, vn, vn2, vn3, vn4, vm);
+ }
+ void Tbnz(const Register& rt, unsigned bit_pos, Label* label);
+ void Tbz(const Register& rt, unsigned bit_pos, Label* label);
+ void Ubfiz(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ ubfiz(rd, rn, lsb, width);
+ }
+ void Ubfm(const Register& rd,
+ const Register& rn,
+ unsigned immr,
+ unsigned imms) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ ubfm(rd, rn, immr, imms);
+ }
+ void Ubfx(const Register& rd,
+ const Register& rn,
+ unsigned lsb,
+ unsigned width) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ ubfx(rd, rn, lsb, width);
+ }
+ void Ucvtf(const VRegister& vd, const Register& rn, int fbits = 0) {
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ ucvtf(vd, rn, fbits);
+ }
+ void Udiv(const Register& rd, const Register& rn, const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ udiv(rd, rn, rm);
+ }
+ void Umaddl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT(!ra.IsZero());
+ SingleEmissionCheckScope guard(this);
+ umaddl(rd, rn, rm, ra);
+ }
+ void Umull(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ umull(rd, rn, rm);
+ }
+ void Umulh(const Register& xd, const Register& xn, const Register& xm) {
+ VIXL_ASSERT(!xd.IsZero());
+ VIXL_ASSERT(!xn.IsZero());
+ VIXL_ASSERT(!xm.IsZero());
+ SingleEmissionCheckScope guard(this);
+ umulh(xd, xn, xm);
+ }
+ void Umsubl(const Register& rd,
+ const Register& rn,
+ const Register& rm,
+ const Register& ra) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ VIXL_ASSERT(!rm.IsZero());
+ VIXL_ASSERT(!ra.IsZero());
+ SingleEmissionCheckScope guard(this);
+ umsubl(rd, rn, rm, ra);
+ }
+
+ void Unreachable() {
+ SingleEmissionCheckScope guard(this);
+ Emit(UNDEFINED_INST_PATTERN);
+ }
+
+ void Uxtb(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ uxtb(rd, rn);
+ }
+ void Uxth(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ uxth(rd, rn);
+ }
+ void Uxtw(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ uxtw(rd, rn);
+ }
+
+ // NEON 3 vector register instructions.
+ #define NEON_3VREG_MACRO_LIST(V) \
+ V(add, Add) \
+ V(addhn, Addhn) \
+ V(addhn2, Addhn2) \
+ V(addp, Addp) \
+ V(and_, And) \
+ V(bic, Bic) \
+ V(bif, Bif) \
+ V(bit, Bit) \
+ V(bsl, Bsl) \
+ V(cmeq, Cmeq) \
+ V(cmge, Cmge) \
+ V(cmgt, Cmgt) \
+ V(cmhi, Cmhi) \
+ V(cmhs, Cmhs) \
+ V(cmtst, Cmtst) \
+ V(eor, Eor) \
+ V(fabd, Fabd) \
+ V(facge, Facge) \
+ V(facgt, Facgt) \
+ V(faddp, Faddp) \
+ V(fcmeq, Fcmeq) \
+ V(fcmge, Fcmge) \
+ V(fcmgt, Fcmgt) \
+ V(fmaxnmp, Fmaxnmp) \
+ V(fmaxp, Fmaxp) \
+ V(fminnmp, Fminnmp) \
+ V(fminp, Fminp) \
+ V(fmla, Fmla) \
+ V(fmls, Fmls) \
+ V(fmulx, Fmulx) \
+ V(frecps, Frecps) \
+ V(frsqrts, Frsqrts) \
+ V(mla, Mla) \
+ V(mls, Mls) \
+ V(mul, Mul) \
+ V(orn, Orn) \
+ V(orr, Orr) \
+ V(pmul, Pmul) \
+ V(pmull, Pmull) \
+ V(pmull2, Pmull2) \
+ V(raddhn, Raddhn) \
+ V(raddhn2, Raddhn2) \
+ V(rsubhn, Rsubhn) \
+ V(rsubhn2, Rsubhn2) \
+ V(saba, Saba) \
+ V(sabal, Sabal) \
+ V(sabal2, Sabal2) \
+ V(sabd, Sabd) \
+ V(sabdl, Sabdl) \
+ V(sabdl2, Sabdl2) \
+ V(saddl, Saddl) \
+ V(saddl2, Saddl2) \
+ V(saddw, Saddw) \
+ V(saddw2, Saddw2) \
+ V(shadd, Shadd) \
+ V(shsub, Shsub) \
+ V(smax, Smax) \
+ V(smaxp, Smaxp) \
+ V(smin, Smin) \
+ V(sminp, Sminp) \
+ V(smlal, Smlal) \
+ V(smlal2, Smlal2) \
+ V(smlsl, Smlsl) \
+ V(smlsl2, Smlsl2) \
+ V(smull, Smull) \
+ V(smull2, Smull2) \
+ V(sqadd, Sqadd) \
+ V(sqdmlal, Sqdmlal) \
+ V(sqdmlal2, Sqdmlal2) \
+ V(sqdmlsl, Sqdmlsl) \
+ V(sqdmlsl2, Sqdmlsl2) \
+ V(sqdmulh, Sqdmulh) \
+ V(sqdmull, Sqdmull) \
+ V(sqdmull2, Sqdmull2) \
+ V(sqrdmulh, Sqrdmulh) \
+ V(sqrshl, Sqrshl) \
+ V(sqshl, Sqshl) \
+ V(sqsub, Sqsub) \
+ V(srhadd, Srhadd) \
+ V(srshl, Srshl) \
+ V(sshl, Sshl) \
+ V(ssubl, Ssubl) \
+ V(ssubl2, Ssubl2) \
+ V(ssubw, Ssubw) \
+ V(ssubw2, Ssubw2) \
+ V(sub, Sub) \
+ V(subhn, Subhn) \
+ V(subhn2, Subhn2) \
+ V(trn1, Trn1) \
+ V(trn2, Trn2) \
+ V(uaba, Uaba) \
+ V(uabal, Uabal) \
+ V(uabal2, Uabal2) \
+ V(uabd, Uabd) \
+ V(uabdl, Uabdl) \
+ V(uabdl2, Uabdl2) \
+ V(uaddl, Uaddl) \
+ V(uaddl2, Uaddl2) \
+ V(uaddw, Uaddw) \
+ V(uaddw2, Uaddw2) \
+ V(uhadd, Uhadd) \
+ V(uhsub, Uhsub) \
+ V(umax, Umax) \
+ V(umaxp, Umaxp) \
+ V(umin, Umin) \
+ V(uminp, Uminp) \
+ V(umlal, Umlal) \
+ V(umlal2, Umlal2) \
+ V(umlsl, Umlsl) \
+ V(umlsl2, Umlsl2) \
+ V(umull, Umull) \
+ V(umull2, Umull2) \
+ V(uqadd, Uqadd) \
+ V(uqrshl, Uqrshl) \
+ V(uqshl, Uqshl) \
+ V(uqsub, Uqsub) \
+ V(urhadd, Urhadd) \
+ V(urshl, Urshl) \
+ V(ushl, Ushl) \
+ V(usubl, Usubl) \
+ V(usubl2, Usubl2) \
+ V(usubw, Usubw) \
+ V(usubw2, Usubw2) \
+ V(uzp1, Uzp1) \
+ V(uzp2, Uzp2) \
+ V(zip1, Zip1) \
+ V(zip2, Zip2)
+
+ #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
+ void MASM(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(vd, vn, vm); \
+ }
+ NEON_3VREG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+ #undef DEFINE_MACRO_ASM_FUNC
+
+ // NEON 2 vector register instructions.
+ #define NEON_2VREG_MACRO_LIST(V) \
+ V(abs, Abs) \
+ V(addp, Addp) \
+ V(addv, Addv) \
+ V(cls, Cls) \
+ V(clz, Clz) \
+ V(cnt, Cnt) \
+ V(fabs, Fabs) \
+ V(faddp, Faddp) \
+ V(fcvtas, Fcvtas) \
+ V(fcvtau, Fcvtau) \
+ V(fcvtms, Fcvtms) \
+ V(fcvtmu, Fcvtmu) \
+ V(fcvtns, Fcvtns) \
+ V(fcvtnu, Fcvtnu) \
+ V(fcvtps, Fcvtps) \
+ V(fcvtpu, Fcvtpu) \
+ V(fmaxnmp, Fmaxnmp) \
+ V(fmaxnmv, Fmaxnmv) \
+ V(fmaxp, Fmaxp) \
+ V(fmaxv, Fmaxv) \
+ V(fminnmp, Fminnmp) \
+ V(fminnmv, Fminnmv) \
+ V(fminp, Fminp) \
+ V(fminv, Fminv) \
+ V(fneg, Fneg) \
+ V(frecpe, Frecpe) \
+ V(frecpx, Frecpx) \
+ V(frinta, Frinta) \
+ V(frinti, Frinti) \
+ V(frintm, Frintm) \
+ V(frintn, Frintn) \
+ V(frintp, Frintp) \
+ V(frintx, Frintx) \
+ V(frintz, Frintz) \
+ V(frsqrte, Frsqrte) \
+ V(fsqrt, Fsqrt) \
+ V(mov, Mov) \
+ V(mvn, Mvn) \
+ V(neg, Neg) \
+ V(not_, Not) \
+ V(rbit, Rbit) \
+ V(rev16, Rev16) \
+ V(rev32, Rev32) \
+ V(rev64, Rev64) \
+ V(sadalp, Sadalp) \
+ V(saddlp, Saddlp) \
+ V(saddlv, Saddlv) \
+ V(smaxv, Smaxv) \
+ V(sminv, Sminv) \
+ V(sqabs, Sqabs) \
+ V(sqneg, Sqneg) \
+ V(sqxtn, Sqxtn) \
+ V(sqxtn2, Sqxtn2) \
+ V(sqxtun, Sqxtun) \
+ V(sqxtun2, Sqxtun2) \
+ V(suqadd, Suqadd) \
+ V(sxtl, Sxtl) \
+ V(sxtl2, Sxtl2) \
+ V(uadalp, Uadalp) \
+ V(uaddlp, Uaddlp) \
+ V(uaddlv, Uaddlv) \
+ V(umaxv, Umaxv) \
+ V(uminv, Uminv) \
+ V(uqxtn, Uqxtn) \
+ V(uqxtn2, Uqxtn2) \
+ V(urecpe, Urecpe) \
+ V(ursqrte, Ursqrte) \
+ V(usqadd, Usqadd) \
+ V(uxtl, Uxtl) \
+ V(uxtl2, Uxtl2) \
+ V(xtn, Xtn) \
+ V(xtn2, Xtn2)
+
+ #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
+ void MASM(const VRegister& vd, \
+ const VRegister& vn) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(vd, vn); \
+ }
+ NEON_2VREG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+ #undef DEFINE_MACRO_ASM_FUNC
+
+ // NEON 2 vector register with immediate instructions.
+ #define NEON_2VREG_FPIMM_MACRO_LIST(V) \
+ V(fcmeq, Fcmeq) \
+ V(fcmge, Fcmge) \
+ V(fcmgt, Fcmgt) \
+ V(fcmle, Fcmle) \
+ V(fcmlt, Fcmlt)
+
+ #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
+ void MASM(const VRegister& vd, \
+ const VRegister& vn, \
+ double imm) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(vd, vn, imm); \
+ }
+ NEON_2VREG_FPIMM_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+ #undef DEFINE_MACRO_ASM_FUNC
+
+ // NEON by element instructions.
+ #define NEON_BYELEMENT_MACRO_LIST(V) \
+ V(fmul, Fmul) \
+ V(fmla, Fmla) \
+ V(fmls, Fmls) \
+ V(fmulx, Fmulx) \
+ V(mul, Mul) \
+ V(mla, Mla) \
+ V(mls, Mls) \
+ V(sqdmulh, Sqdmulh) \
+ V(sqrdmulh, Sqrdmulh) \
+ V(sqdmull, Sqdmull) \
+ V(sqdmull2, Sqdmull2) \
+ V(sqdmlal, Sqdmlal) \
+ V(sqdmlal2, Sqdmlal2) \
+ V(sqdmlsl, Sqdmlsl) \
+ V(sqdmlsl2, Sqdmlsl2) \
+ V(smull, Smull) \
+ V(smull2, Smull2) \
+ V(smlal, Smlal) \
+ V(smlal2, Smlal2) \
+ V(smlsl, Smlsl) \
+ V(smlsl2, Smlsl2) \
+ V(umull, Umull) \
+ V(umull2, Umull2) \
+ V(umlal, Umlal) \
+ V(umlal2, Umlal2) \
+ V(umlsl, Umlsl) \
+ V(umlsl2, Umlsl2)
+
+ #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
+ void MASM(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm, \
+ int vm_index \
+ ) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(vd, vn, vm, vm_index); \
+ }
+ NEON_BYELEMENT_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+ #undef DEFINE_MACRO_ASM_FUNC
+
+ #define NEON_2VREG_SHIFT_MACRO_LIST(V) \
+ V(rshrn, Rshrn) \
+ V(rshrn2, Rshrn2) \
+ V(shl, Shl) \
+ V(shll, Shll) \
+ V(shll2, Shll2) \
+ V(shrn, Shrn) \
+ V(shrn2, Shrn2) \
+ V(sli, Sli) \
+ V(sqrshrn, Sqrshrn) \
+ V(sqrshrn2, Sqrshrn2) \
+ V(sqrshrun, Sqrshrun) \
+ V(sqrshrun2, Sqrshrun2) \
+ V(sqshl, Sqshl) \
+ V(sqshlu, Sqshlu) \
+ V(sqshrn, Sqshrn) \
+ V(sqshrn2, Sqshrn2) \
+ V(sqshrun, Sqshrun) \
+ V(sqshrun2, Sqshrun2) \
+ V(sri, Sri) \
+ V(srshr, Srshr) \
+ V(srsra, Srsra) \
+ V(sshll, Sshll) \
+ V(sshll2, Sshll2) \
+ V(sshr, Sshr) \
+ V(ssra, Ssra) \
+ V(uqrshrn, Uqrshrn) \
+ V(uqrshrn2, Uqrshrn2) \
+ V(uqshl, Uqshl) \
+ V(uqshrn, Uqshrn) \
+ V(uqshrn2, Uqshrn2) \
+ V(urshr, Urshr) \
+ V(ursra, Ursra) \
+ V(ushll, Ushll) \
+ V(ushll2, Ushll2) \
+ V(ushr, Ushr) \
+ V(usra, Usra) \
+
+ #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
+ void MASM(const VRegister& vd, \
+ const VRegister& vn, \
+ int shift) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(vd, vn, shift); \
+ }
+ NEON_2VREG_SHIFT_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+ #undef DEFINE_MACRO_ASM_FUNC
+
+ void Bic(const VRegister& vd,
+ const int imm8,
+ const int left_shift = 0) {
+ SingleEmissionCheckScope guard(this);
+ bic(vd, imm8, left_shift);
+ }
+ void Cmeq(const VRegister& vd,
+ const VRegister& vn,
+ int imm) {
+ SingleEmissionCheckScope guard(this);
+ cmeq(vd, vn, imm);
+ }
+ void Cmge(const VRegister& vd,
+ const VRegister& vn,
+ int imm) {
+ SingleEmissionCheckScope guard(this);
+ cmge(vd, vn, imm);
+ }
+ void Cmgt(const VRegister& vd,
+ const VRegister& vn,
+ int imm) {
+ SingleEmissionCheckScope guard(this);
+ cmgt(vd, vn, imm);
+ }
+ void Cmle(const VRegister& vd,
+ const VRegister& vn,
+ int imm) {
+ SingleEmissionCheckScope guard(this);
+ cmle(vd, vn, imm);
+ }
+ void Cmlt(const VRegister& vd,
+ const VRegister& vn,
+ int imm) {
+ SingleEmissionCheckScope guard(this);
+ cmlt(vd, vn, imm);
+ }
+ void Dup(const VRegister& vd,
+ const VRegister& vn,
+ int index) {
+ SingleEmissionCheckScope guard(this);
+ dup(vd, vn, index);
+ }
+ void Dup(const VRegister& vd,
+ const Register& rn) {
+ SingleEmissionCheckScope guard(this);
+ dup(vd, rn);
+ }
+ void Ext(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int index) {
+ SingleEmissionCheckScope guard(this);
+ ext(vd, vn, vm, index);
+ }
+ void Ins(const VRegister& vd,
+ int vd_index,
+ const VRegister& vn,
+ int vn_index) {
+ SingleEmissionCheckScope guard(this);
+ ins(vd, vd_index, vn, vn_index);
+ }
+ void Ins(const VRegister& vd,
+ int vd_index,
+ const Register& rn) {
+ SingleEmissionCheckScope guard(this);
+ ins(vd, vd_index, rn);
+ }
+ void Ld1(const VRegister& vt,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld1(vt, src);
+ }
+ void Ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld1(vt, vt2, src);
+ }
+ void Ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld1(vt, vt2, vt3, src);
+ }
+ void Ld1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld1(vt, vt2, vt3, vt4, src);
+ }
+ void Ld1(const VRegister& vt,
+ int lane,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld1(vt, lane, src);
+ }
+ void Ld1r(const VRegister& vt,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld1r(vt, src);
+ }
+ void Ld2(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld2(vt, vt2, src);
+ }
+ void Ld2(const VRegister& vt,
+ const VRegister& vt2,
+ int lane,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld2(vt, vt2, lane, src);
+ }
+ void Ld2r(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld2r(vt, vt2, src);
+ }
+ void Ld3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld3(vt, vt2, vt3, src);
+ }
+ void Ld3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ int lane,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld3(vt, vt2, vt3, lane, src);
+ }
+ void Ld3r(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld3r(vt, vt2, vt3, src);
+ }
+ void Ld4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld4(vt, vt2, vt3, vt4, src);
+ }
+ void Ld4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ int lane,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld4(vt, vt2, vt3, vt4, lane, src);
+ }
+ void Ld4r(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& src) {
+ SingleEmissionCheckScope guard(this);
+ ld4r(vt, vt2, vt3, vt4, src);
+ }
+ void Mov(const VRegister& vd,
+ int vd_index,
+ const VRegister& vn,
+ int vn_index) {
+ SingleEmissionCheckScope guard(this);
+ mov(vd, vd_index, vn, vn_index);
+ }
+ void Mov(const VRegister& vd,
+ const VRegister& vn,
+ int index) {
+ SingleEmissionCheckScope guard(this);
+ mov(vd, vn, index);
+ }
+ void Mov(const VRegister& vd,
+ int vd_index,
+ const Register& rn) {
+ SingleEmissionCheckScope guard(this);
+ mov(vd, vd_index, rn);
+ }
+ void Mov(const Register& rd,
+ const VRegister& vn,
+ int vn_index) {
+ SingleEmissionCheckScope guard(this);
+ mov(rd, vn, vn_index);
+ }
+ void Movi(const VRegister& vd,
+ uint64_t imm,
+ Shift shift = LSL,
+ int shift_amount = 0);
+ void Movi(const VRegister& vd, uint64_t hi, uint64_t lo);
+ void Mvni(const VRegister& vd,
+ const int imm8,
+ Shift shift = LSL,
+ const int shift_amount = 0) {
+ SingleEmissionCheckScope guard(this);
+ mvni(vd, imm8, shift, shift_amount);
+ }
+ void Orr(const VRegister& vd,
+ const int imm8,
+ const int left_shift = 0) {
+ SingleEmissionCheckScope guard(this);
+ orr(vd, imm8, left_shift);
+ }
+ void Scvtf(const VRegister& vd,
+ const VRegister& vn,
+ int fbits = 0) {
+ SingleEmissionCheckScope guard(this);
+ scvtf(vd, vn, fbits);
+ }
+ void Ucvtf(const VRegister& vd,
+ const VRegister& vn,
+ int fbits = 0) {
+ SingleEmissionCheckScope guard(this);
+ ucvtf(vd, vn, fbits);
+ }
+ void Fcvtzs(const VRegister& vd,
+ const VRegister& vn,
+ int fbits = 0) {
+ SingleEmissionCheckScope guard(this);
+ fcvtzs(vd, vn, fbits);
+ }
+ void Fcvtzu(const VRegister& vd,
+ const VRegister& vn,
+ int fbits = 0) {
+ SingleEmissionCheckScope guard(this);
+ fcvtzu(vd, vn, fbits);
+ }
+ void St1(const VRegister& vt,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st1(vt, dst);
+ }
+ void St1(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st1(vt, vt2, dst);
+ }
+ void St1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st1(vt, vt2, vt3, dst);
+ }
+ void St1(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st1(vt, vt2, vt3, vt4, dst);
+ }
+ void St1(const VRegister& vt,
+ int lane,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st1(vt, lane, dst);
+ }
+ void St2(const VRegister& vt,
+ const VRegister& vt2,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st2(vt, vt2, dst);
+ }
+ void St3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st3(vt, vt2, vt3, dst);
+ }
+ void St4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st4(vt, vt2, vt3, vt4, dst);
+ }
+ void St2(const VRegister& vt,
+ const VRegister& vt2,
+ int lane,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st2(vt, vt2, lane, dst);
+ }
+ void St3(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ int lane,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st3(vt, vt2, vt3, lane, dst);
+ }
+ void St4(const VRegister& vt,
+ const VRegister& vt2,
+ const VRegister& vt3,
+ const VRegister& vt4,
+ int lane,
+ const MemOperand& dst) {
+ SingleEmissionCheckScope guard(this);
+ st4(vt, vt2, vt3, vt4, lane, dst);
+ }
+ void Smov(const Register& rd,
+ const VRegister& vn,
+ int vn_index) {
+ SingleEmissionCheckScope guard(this);
+ smov(rd, vn, vn_index);
+ }
+ void Umov(const Register& rd,
+ const VRegister& vn,
+ int vn_index) {
+ SingleEmissionCheckScope guard(this);
+ umov(rd, vn, vn_index);
+ }
+ void Crc32b(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ SingleEmissionCheckScope guard(this);
+ crc32b(rd, rn, rm);
+ }
+ void Crc32h(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ SingleEmissionCheckScope guard(this);
+ crc32h(rd, rn, rm);
+ }
+ void Crc32w(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ SingleEmissionCheckScope guard(this);
+ crc32w(rd, rn, rm);
+ }
+ void Crc32x(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ SingleEmissionCheckScope guard(this);
+ crc32x(rd, rn, rm);
+ }
+ void Crc32cb(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ SingleEmissionCheckScope guard(this);
+ crc32cb(rd, rn, rm);
+ }
+ void Crc32ch(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ SingleEmissionCheckScope guard(this);
+ crc32ch(rd, rn, rm);
+ }
+ void Crc32cw(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ SingleEmissionCheckScope guard(this);
+ crc32cw(rd, rn, rm);
+ }
+ void Crc32cx(const Register& rd,
+ const Register& rn,
+ const Register& rm) {
+ SingleEmissionCheckScope guard(this);
+ crc32cx(rd, rn, rm);
+ }
+
+ // Push the system stack pointer (sp) down to allow the same to be done to
+ // the current stack pointer (according to StackPointer()). This must be
+ // called _before_ accessing the memory.
+ //
+ // This is necessary when pushing or otherwise adding things to the stack, to
+ // satisfy the AAPCS64 constraint that the memory below the system stack
+ // pointer is not accessed.
+ //
+ // This method asserts that StackPointer() is not sp, since the call does
+ // not make sense in that context.
+ //
+ // TODO: This method can only accept values of 'space' that can be encoded in
+ // one instruction. Refer to the implementation for details.
+ void BumpSystemStackPointer(const Operand& space);
+
+ // Set the current stack pointer, but don't generate any code.
+ void SetStackPointer64(const Register& stack_pointer) {
+ VIXL_ASSERT(!TmpList()->IncludesAliasOf(stack_pointer));
+ sp_ = stack_pointer;
+ }
+
+ // Return the current stack pointer, as set by SetStackPointer.
+ const Register& StackPointer() const {
+ return sp_;
+ }
+
+ const Register& GetStackPointer64() const {
+ return sp_;
+ }
+
+ js::jit::RegisterOrSP getStackPointer() const {
+ return js::jit::RegisterOrSP(sp_.code());
+ }
+
+ CPURegList* TmpList() { return &tmp_list_; }
+ CPURegList* FPTmpList() { return &fptmp_list_; }
+
+ // Trace control when running the debug simulator.
+ //
+ // For example:
+ //
+ // __ Trace(LOG_REGS, TRACE_ENABLE);
+ // Will add registers to the trace if it wasn't already the case.
+ //
+ // __ Trace(LOG_DISASM, TRACE_DISABLE);
+ // Will stop logging disassembly. It has no effect if the disassembly wasn't
+ // already being logged.
+ void Trace(TraceParameters parameters, TraceCommand command);
+
+ // Log the requested data independently of what is being traced.
+ //
+ // For example:
+ //
+ // __ Log(LOG_FLAGS)
+ // Will output the flags.
+ void Log(TraceParameters parameters);
+
+ // Enable or disable instrumentation when an Instrument visitor is attached to
+ // the simulator.
+ void EnableInstrumentation();
+ void DisableInstrumentation();
+
+ // Add a marker to the instrumentation data produced by an Instrument visitor.
+ // The name is a two character string that will be attached to the marker in
+ // the output data.
+ void AnnotateInstrumentation(const char* marker_name);
+
+ private:
+ // The actual Push and Pop implementations. These don't generate any code
+ // other than that required for the push or pop. This allows
+ // (Push|Pop)CPURegList to bundle together setup code for a large block of
+ // registers.
+ //
+ // Note that size is per register, and is specified in bytes.
+ void PushHelper(int count, int size,
+ const CPURegister& src0, const CPURegister& src1,
+ const CPURegister& src2, const CPURegister& src3);
+ void PopHelper(int count, int size,
+ const CPURegister& dst0, const CPURegister& dst1,
+ const CPURegister& dst2, const CPURegister& dst3);
+
+ void Movi16bitHelper(const VRegister& vd, uint64_t imm);
+ void Movi32bitHelper(const VRegister& vd, uint64_t imm);
+ void Movi64bitHelper(const VRegister& vd, uint64_t imm);
+
+ // Perform necessary maintenance operations before a push or pop.
+ //
+ // Note that size is per register, and is specified in bytes.
+ void PrepareForPush(int count, int size);
+ void PrepareForPop(int count, int size);
+
+ // The actual implementation of load and store operations for CPURegList.
+ enum LoadStoreCPURegListAction {
+ kLoad,
+ kStore
+ };
+ void LoadStoreCPURegListHelper(LoadStoreCPURegListAction operation,
+ CPURegList registers,
+ const MemOperand& mem);
+ // Returns a MemOperand suitable for loading or storing a CPURegList at `dst`.
+ // This helper may allocate registers from `scratch_scope` and generate code
+ // to compute an intermediate address. The resulting MemOperand is only valid
+ // as long as `scratch_scope` remains valid.
+ MemOperand BaseMemOperandForLoadStoreCPURegList(
+ const CPURegList& registers,
+ const MemOperand& mem,
+ UseScratchRegisterScope* scratch_scope);
+
+ bool LabelIsOutOfRange(Label* label, ImmBranchType branch_type) {
+ return !Instruction::IsValidImmPCOffset(branch_type, nextOffset().getOffset() - label->offset());
+ }
+
+ // The register to use as a stack pointer for stack operations.
+ Register sp_;
+
+ // Scratch registers available for use by the MacroAssembler.
+ CPURegList tmp_list_;
+ CPURegList fptmp_list_;
+
+ ptrdiff_t checkpoint_;
+ ptrdiff_t recommended_checkpoint_;
+};
+
+
+// All Assembler emits MUST acquire/release the underlying code buffer. The
+// helper scope below will do so and optionally ensure the buffer is big enough
+// to receive the emit. It is possible to request the scope not to perform any
+// checks (kNoCheck) if for example it is known in advance the buffer size is
+// adequate or there is some other size checking mechanism in place.
+class CodeBufferCheckScope {
+ public:
+ // Tell whether or not the scope needs to ensure the associated CodeBuffer
+ // has enough space for the requested size.
+ enum CheckPolicy {
+ kNoCheck,
+ kCheck
+ };
+
+ // Tell whether or not the scope should assert the amount of code emitted
+ // within the scope is consistent with the requested amount.
+ enum AssertPolicy {
+ kNoAssert, // No assert required.
+ kExactSize, // The code emitted must be exactly size bytes.
+ kMaximumSize // The code emitted must be at most size bytes.
+ };
+
+ CodeBufferCheckScope(Assembler* assm,
+ size_t size,
+ CheckPolicy check_policy = kCheck,
+ AssertPolicy assert_policy = kMaximumSize)
+ { }
+
+ // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
+ explicit CodeBufferCheckScope(Assembler* assm) {}
+};
+
+
+// Use this scope when you need a one-to-one mapping between methods and
+// instructions. This scope prevents the MacroAssembler from being called and
+// literal pools from being emitted. It also asserts the number of instructions
+// emitted is what you specified when creating the scope.
+// FIXME: Because of the disabled calls below, this class asserts nothing.
+class InstructionAccurateScope : public CodeBufferCheckScope {
+ public:
+ InstructionAccurateScope(MacroAssembler* masm,
+ int64_t count,
+ AssertPolicy policy = kExactSize)
+ : CodeBufferCheckScope(masm,
+ (count * kInstructionSize),
+ kCheck,
+ policy) {
+ }
+};
+
+
+// This scope utility allows scratch registers to be managed safely. The
+// MacroAssembler's TmpList() (and FPTmpList()) is used as a pool of scratch
+// registers. These registers can be allocated on demand, and will be returned
+// at the end of the scope.
+//
+// When the scope ends, the MacroAssembler's lists will be restored to their
+// original state, even if the lists were modified by some other means.
+class UseScratchRegisterScope {
+ public:
+ // This constructor implicitly calls the `Open` function to initialise the
+ // scope, so it is ready to use immediately after it has been constructed.
+ explicit UseScratchRegisterScope(MacroAssembler* masm);
+ // This constructor allows deferred and optional initialisation of the scope.
+ // The user is required to explicitly call the `Open` function before using
+ // the scope.
+ UseScratchRegisterScope();
+ // This function performs the actual initialisation work.
+ void Open(MacroAssembler* masm);
+
+ // The destructor always implicitly calls the `Close` function.
+ ~UseScratchRegisterScope();
+ // This function performs the cleaning-up work. It must succeed even if the
+ // scope has not been opened. It is safe to call multiple times.
+ void Close();
+
+
+ bool IsAvailable(const CPURegister& reg) const;
+
+
+ // Take a register from the appropriate temps list. It will be returned
+ // automatically when the scope ends.
+ Register AcquireW() { return AcquireNextAvailable(available_).W(); }
+ Register AcquireX() { return AcquireNextAvailable(available_).X(); }
+ VRegister AcquireS() { return AcquireNextAvailable(availablefp_).S(); }
+ VRegister AcquireD() { return AcquireNextAvailable(availablefp_).D(); }
+ VRegister AcquireQ() { return AcquireNextAvailable(availablefp_).Q(); }
+
+
+ Register AcquireSameSizeAs(const Register& reg);
+ VRegister AcquireSameSizeAs(const VRegister& reg);
+
+
+ // Explicitly release an acquired (or excluded) register, putting it back in
+ // the appropriate temps list.
+ void Release(const CPURegister& reg);
+
+
+ // Make the specified registers available as scratch registers for the
+ // duration of this scope.
+ void Include(const CPURegList& list);
+ void Include(const Register& reg1,
+ const Register& reg2 = NoReg,
+ const Register& reg3 = NoReg,
+ const Register& reg4 = NoReg);
+ void Include(const VRegister& reg1,
+ const VRegister& reg2 = NoVReg,
+ const VRegister& reg3 = NoVReg,
+ const VRegister& reg4 = NoVReg);
+
+
+ // Make sure that the specified registers are not available in this scope.
+ // This can be used to prevent helper functions from using sensitive
+ // registers, for example.
+ void Exclude(const CPURegList& list);
+ void Exclude(const Register& reg1,
+ const Register& reg2 = NoReg,
+ const Register& reg3 = NoReg,
+ const Register& reg4 = NoReg);
+ void Exclude(const VRegister& reg1,
+ const VRegister& reg2 = NoVReg,
+ const VRegister& reg3 = NoVReg,
+ const VRegister& reg4 = NoVReg);
+ void Exclude(const CPURegister& reg1,
+ const CPURegister& reg2 = NoCPUReg,
+ const CPURegister& reg3 = NoCPUReg,
+ const CPURegister& reg4 = NoCPUReg);
+
+
+ // Prevent any scratch registers from being used in this scope.
+ void ExcludeAll();
+
+
+ private:
+ static CPURegister AcquireNextAvailable(CPURegList* available);
+
+ static void ReleaseByCode(CPURegList* available, int code);
+
+ static void ReleaseByRegList(CPURegList* available,
+ RegList regs);
+
+ static void IncludeByRegList(CPURegList* available,
+ RegList exclude);
+
+ static void ExcludeByRegList(CPURegList* available,
+ RegList exclude);
+
+ // Available scratch registers.
+ CPURegList* available_; // kRegister
+ CPURegList* availablefp_; // kVRegister
+
+ // The state of the available lists at the start of this scope.
+ RegList old_available_; // kRegister
+ RegList old_availablefp_; // kVRegister
+#ifdef DEBUG
+ bool initialised_;
+#endif
+
+ // Disallow copy constructor and operator=.
+ UseScratchRegisterScope(const UseScratchRegisterScope&) {
+ VIXL_UNREACHABLE();
+ }
+ void operator=(const UseScratchRegisterScope&) {
+ VIXL_UNREACHABLE();
+ }
+};
+
+
+} // namespace vixl
+
+#endif // VIXL_A64_MACRO_ASSEMBLER_A64_H_
diff --git a/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp b/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp
new file mode 100644
index 0000000000..b9189cc23b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp
@@ -0,0 +1,610 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+#include "jit/Label.h"
+
+namespace vixl {
+
+using LabelDoc = js::jit::DisassemblerSpew::LabelDoc;
+
+// Assembler
+void Assembler::FinalizeCode() {
+#ifdef DEBUG
+ finalized_ = true;
+#endif
+}
+
+// Unbound Label Representation.
+//
+// We can have multiple branches using the same label before it is bound.
+// Assembler::bind() must then be able to enumerate all the branches and patch
+// them to target the final label location.
+//
+// When a Label is unbound with uses, its offset is pointing to the tip of a
+// linked list of uses. The uses can be branches or adr/adrp instructions. In
+// the case of branches, the next member in the linked list is simply encoded
+// as the branch target. For adr/adrp, the relative pc offset is encoded in the
+// immediate field as a signed instruction offset.
+//
+// In both cases, the end of the list is encoded as a 0 pc offset, i.e. the
+// tail is pointing to itself.
+
+static const ptrdiff_t kEndOfLabelUseList = 0;
+
+BufferOffset
+MozBaseAssembler::NextLink(BufferOffset cur)
+{
+ Instruction* link = getInstructionAt(cur);
+ // Raw encoded offset.
+ ptrdiff_t offset = link->ImmPCRawOffset();
+ // End of the list is encoded as 0.
+ if (offset == kEndOfLabelUseList)
+ return BufferOffset();
+ // The encoded offset is the number of instructions to move.
+ return BufferOffset(cur.getOffset() + offset * kInstructionSize);
+}
+
+static ptrdiff_t
+EncodeOffset(BufferOffset cur, BufferOffset next)
+{
+ MOZ_ASSERT(next.assigned() && cur.assigned());
+ ptrdiff_t offset = next.getOffset() - cur.getOffset();
+ MOZ_ASSERT(offset % kInstructionSize == 0);
+ return offset / kInstructionSize;
+}
+
+void
+MozBaseAssembler::SetNextLink(BufferOffset cur, BufferOffset next)
+{
+ Instruction* link = getInstructionAt(cur);
+ link->SetImmPCRawOffset(EncodeOffset(cur, next));
+}
+
+// A common implementation for the LinkAndGet<Type>OffsetTo helpers.
+//
+// If the label is bound, returns the offset as a multiple of 1 << elementShift.
+// Otherwise, links the instruction to the label and returns the raw offset to
+// encode. (This will be an instruction count.)
+//
+// The offset is calculated by aligning the PC and label addresses down to a
+// multiple of 1 << elementShift, then calculating the (scaled) offset between
+// them. This matches the semantics of adrp, for example. (Assuming that the
+// assembler buffer is page-aligned, which it probably isn't.)
+//
+// For an unbound label, the returned offset will be encodable in the provided
+// branch range. If the label is already bound, the caller is expected to make
+// sure that it is in range, and emit the necessary branch instrutions if it
+// isn't.
+//
+ptrdiff_t
+MozBaseAssembler::LinkAndGetOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange,
+ unsigned elementShift, Label* label)
+{
+ if (armbuffer_.oom())
+ return kEndOfLabelUseList;
+
+ if (label->bound()) {
+ // The label is bound: all uses are already linked.
+ ptrdiff_t branch_offset = ptrdiff_t(branch.getOffset() >> elementShift);
+ ptrdiff_t label_offset = ptrdiff_t(label->offset() >> elementShift);
+ return label_offset - branch_offset;
+ }
+
+ // Keep track of short-range branches targeting unbound labels. We may need
+ // to insert veneers in PatchShortRangeBranchToVeneer() below.
+ if (branchRange < NumShortBranchRangeTypes) {
+ // This is the last possible branch target.
+ BufferOffset deadline(branch.getOffset() +
+ Instruction::ImmBranchMaxForwardOffset(branchRange));
+ armbuffer_.registerBranchDeadline(branchRange, deadline);
+ }
+
+ // The label is unbound and previously unused: Store the offset in the label
+ // itself for patching by bind().
+ if (!label->used()) {
+ label->use(branch.getOffset());
+ return kEndOfLabelUseList;
+ }
+
+ // The label is unbound and has multiple users. Create a linked list between
+ // the branches, and update the linked list head in the label struct. This is
+ // not always trivial since the branches in the linked list have limited
+ // ranges.
+
+ // What is the earliest buffer offset that would be reachable by the branch
+ // we're about to add?
+ ptrdiff_t earliestReachable =
+ branch.getOffset() + Instruction::ImmBranchMinBackwardOffset(branchRange);
+
+ // If the existing instruction at the head of the list is within reach of the
+ // new branch, we can simply insert the new branch at the front of the list.
+ if (label->offset() >= earliestReachable) {
+ ptrdiff_t offset = EncodeOffset(branch, BufferOffset(label));
+ label->use(branch.getOffset());
+ MOZ_ASSERT(offset != kEndOfLabelUseList);
+ return offset;
+ }
+
+ // The label already has a linked list of uses, but we can't reach the head
+ // of the list with the allowed branch range. Insert this branch at a
+ // different position in the list.
+ //
+ // Find an existing branch, exbr, such that:
+ //
+ // 1. The new branch can be reached by exbr, and either
+ // 2a. The new branch can reach exbr's target, or
+ // 2b. The exbr branch is at the end of the list.
+ //
+ // Then the new branch can be inserted after exbr in the linked list.
+ //
+ // We know that it is always possible to find an exbr branch satisfying these
+ // conditions because of the PatchShortRangeBranchToVeneer() mechanism. All
+ // branches are guaranteed to either be able to reach the end of the
+ // assembler buffer, or they will be pointing to an unconditional branch that
+ // can.
+ //
+ // In particular, the end of the list is always a viable candidate, so we'll
+ // just get that.
+ BufferOffset next(label);
+ BufferOffset exbr;
+ do {
+ exbr = next;
+ next = NextLink(next);
+ } while (next.assigned());
+ SetNextLink(exbr, branch);
+
+ // This branch becomes the new end of the list.
+ return kEndOfLabelUseList;
+}
+
+ptrdiff_t MozBaseAssembler::LinkAndGetByteOffsetTo(BufferOffset branch, Label* label) {
+ return LinkAndGetOffsetTo(branch, UncondBranchRangeType, 0, label);
+}
+
+ptrdiff_t MozBaseAssembler::LinkAndGetInstructionOffsetTo(BufferOffset branch,
+ ImmBranchRangeType branchRange,
+ Label* label) {
+ return LinkAndGetOffsetTo(branch, branchRange, kInstructionSizeLog2, label);
+}
+
+ptrdiff_t MozBaseAssembler::LinkAndGetPageOffsetTo(BufferOffset branch, Label* label) {
+ return LinkAndGetOffsetTo(branch, UncondBranchRangeType, kPageSizeLog2, label);
+}
+
+BufferOffset Assembler::b(int imm26, const LabelDoc& doc) {
+ return EmitBranch(B | ImmUncondBranch(imm26), doc);
+}
+
+
+void Assembler::b(Instruction* at, int imm26) {
+ return EmitBranch(at, B | ImmUncondBranch(imm26));
+}
+
+
+BufferOffset Assembler::b(int imm19, Condition cond, const LabelDoc& doc) {
+ return EmitBranch(B_cond | ImmCondBranch(imm19) | cond, doc);
+}
+
+
+void Assembler::b(Instruction* at, int imm19, Condition cond) {
+ EmitBranch(at, B_cond | ImmCondBranch(imm19) | cond);
+}
+
+
+BufferOffset Assembler::b(Label* label) {
+ // Encode the relative offset from the inserted branch to the label.
+ LabelDoc doc = refLabel(label);
+ return b(LinkAndGetInstructionOffsetTo(nextInstrOffset(), UncondBranchRangeType, label), doc);
+}
+
+
+BufferOffset Assembler::b(Label* label, Condition cond) {
+ // Encode the relative offset from the inserted branch to the label.
+ LabelDoc doc = refLabel(label);
+ return b(LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), cond, doc);
+}
+
+void Assembler::br(Instruction* at, const Register& xn) {
+ VIXL_ASSERT(xn.Is64Bits());
+ // No need for EmitBranch(): no immediate offset needs fixing.
+ Emit(at, BR | Rn(xn));
+}
+
+
+void Assembler::blr(Instruction* at, const Register& xn) {
+ VIXL_ASSERT(xn.Is64Bits());
+ // No need for EmitBranch(): no immediate offset needs fixing.
+ Emit(at, BLR | Rn(xn));
+}
+
+
+void Assembler::bl(int imm26, const LabelDoc& doc) {
+ EmitBranch(BL | ImmUncondBranch(imm26), doc);
+}
+
+
+void Assembler::bl(Instruction* at, int imm26) {
+ EmitBranch(at, BL | ImmUncondBranch(imm26));
+}
+
+
+void Assembler::bl(Label* label) {
+ // Encode the relative offset from the inserted branch to the label.
+ LabelDoc doc = refLabel(label);
+ return bl(LinkAndGetInstructionOffsetTo(nextInstrOffset(), UncondBranchRangeType, label), doc);
+}
+
+
+void Assembler::cbz(const Register& rt, int imm19, const LabelDoc& doc) {
+ EmitBranch(SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt), doc);
+}
+
+
+void Assembler::cbz(Instruction* at, const Register& rt, int imm19) {
+ EmitBranch(at, SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt));
+}
+
+
+void Assembler::cbz(const Register& rt, Label* label) {
+ // Encode the relative offset from the inserted branch to the label.
+ LabelDoc doc = refLabel(label);
+ return cbz(rt, LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), doc);
+}
+
+
+void Assembler::cbnz(const Register& rt, int imm19, const LabelDoc& doc) {
+ EmitBranch(SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt), doc);
+}
+
+
+void Assembler::cbnz(Instruction* at, const Register& rt, int imm19) {
+ EmitBranch(at, SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt));
+}
+
+
+void Assembler::cbnz(const Register& rt, Label* label) {
+ // Encode the relative offset from the inserted branch to the label.
+ LabelDoc doc = refLabel(label);
+ return cbnz(rt, LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), doc);
+}
+
+
+void Assembler::tbz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc) {
+ VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize)));
+ EmitBranch(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt), doc);
+}
+
+
+void Assembler::tbz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14) {
+ VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize)));
+ EmitBranch(at, TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
+}
+
+
+void Assembler::tbz(const Register& rt, unsigned bit_pos, Label* label) {
+ // Encode the relative offset from the inserted branch to the label.
+ LabelDoc doc = refLabel(label);
+ return tbz(rt, bit_pos, LinkAndGetInstructionOffsetTo(nextInstrOffset(), TestBranchRangeType, label), doc);
+}
+
+
+void Assembler::tbnz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc) {
+ VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize)));
+ EmitBranch(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt), doc);
+}
+
+
+void Assembler::tbnz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14) {
+ VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize)));
+ EmitBranch(at, TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
+}
+
+
+void Assembler::tbnz(const Register& rt, unsigned bit_pos, Label* label) {
+ // Encode the relative offset from the inserted branch to the label.
+ LabelDoc doc = refLabel(label);
+ return tbnz(rt, bit_pos, LinkAndGetInstructionOffsetTo(nextInstrOffset(), TestBranchRangeType, label), doc);
+}
+
+
+void Assembler::adr(const Register& rd, int imm21, const LabelDoc& doc) {
+ VIXL_ASSERT(rd.Is64Bits());
+ EmitBranch(ADR | ImmPCRelAddress(imm21) | Rd(rd), doc);
+}
+
+
+void Assembler::adr(Instruction* at, const Register& rd, int imm21) {
+ VIXL_ASSERT(rd.Is64Bits());
+ EmitBranch(at, ADR | ImmPCRelAddress(imm21) | Rd(rd));
+}
+
+
+void Assembler::adr(const Register& rd, Label* label) {
+ // Encode the relative offset from the inserted adr to the label.
+ LabelDoc doc = refLabel(label);
+ return adr(rd, LinkAndGetByteOffsetTo(nextInstrOffset(), label), doc);
+}
+
+
+void Assembler::adrp(const Register& rd, int imm21, const LabelDoc& doc) {
+ VIXL_ASSERT(rd.Is64Bits());
+ EmitBranch(ADRP | ImmPCRelAddress(imm21) | Rd(rd), doc);
+}
+
+
+void Assembler::adrp(Instruction* at, const Register& rd, int imm21) {
+ VIXL_ASSERT(rd.Is64Bits());
+ EmitBranch(at, ADRP | ImmPCRelAddress(imm21) | Rd(rd));
+}
+
+
+void Assembler::adrp(const Register& rd, Label* label) {
+ VIXL_ASSERT(AllowPageOffsetDependentCode());
+ // Encode the relative offset from the inserted adr to the label.
+ LabelDoc doc = refLabel(label);
+ return adrp(rd, LinkAndGetPageOffsetTo(nextInstrOffset(), label), doc);
+}
+
+
+BufferOffset Assembler::ands(const Register& rd, const Register& rn, const Operand& operand) {
+ return Logical(rd, rn, operand, ANDS);
+}
+
+
+BufferOffset Assembler::tst(const Register& rn, const Operand& operand) {
+ return ands(AppropriateZeroRegFor(rn), rn, operand);
+}
+
+
+void Assembler::ldr(Instruction* at, const CPURegister& rt, int imm19) {
+ LoadLiteralOp op = LoadLiteralOpFor(rt);
+ Emit(at, op | ImmLLiteral(imm19) | Rt(rt));
+}
+
+
+BufferOffset Assembler::hint(SystemHint code) {
+ return Emit(HINT | ImmHint(code));
+}
+
+
+void Assembler::hint(Instruction* at, SystemHint code) {
+ Emit(at, HINT | ImmHint(code));
+}
+
+
+void Assembler::svc(Instruction* at, int code) {
+ VIXL_ASSERT(IsUint16(code));
+ Emit(at, SVC | ImmException(code));
+}
+
+
+void Assembler::nop(Instruction* at) {
+ hint(at, NOP);
+}
+
+
+void Assembler::csdb(Instruction* at) {
+ hint(at, CSDB);
+}
+
+
+BufferOffset Assembler::Logical(const Register& rd, const Register& rn,
+ const Operand& operand, LogicalOp op)
+{
+ VIXL_ASSERT(rd.size() == rn.size());
+ if (operand.IsImmediate()) {
+ int64_t immediate = operand.immediate();
+ unsigned reg_size = rd.size();
+
+ VIXL_ASSERT(immediate != 0);
+ VIXL_ASSERT(immediate != -1);
+ VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
+
+ // If the operation is NOT, invert the operation and immediate.
+ if ((op & NOT) == NOT) {
+ op = static_cast<LogicalOp>(op & ~NOT);
+ immediate = rd.Is64Bits() ? ~immediate : (~immediate & kWRegMask);
+ }
+
+ unsigned n, imm_s, imm_r;
+ if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
+ // Immediate can be encoded in the instruction.
+ return LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
+ } else {
+ // This case is handled in the macro assembler.
+ VIXL_UNREACHABLE();
+ }
+ } else {
+ VIXL_ASSERT(operand.IsShiftedRegister());
+ VIXL_ASSERT(operand.reg().size() == rd.size());
+ Instr dp_op = static_cast<Instr>(op | LogicalShiftedFixed);
+ return DataProcShiftedRegister(rd, rn, operand, LeaveFlags, dp_op);
+ }
+}
+
+
+BufferOffset Assembler::LogicalImmediate(const Register& rd, const Register& rn,
+ unsigned n, unsigned imm_s, unsigned imm_r, LogicalOp op)
+{
+ unsigned reg_size = rd.size();
+ Instr dest_reg = (op == ANDS) ? Rd(rd) : RdSP(rd);
+ return Emit(SF(rd) | LogicalImmediateFixed | op | BitN(n, reg_size) |
+ ImmSetBits(imm_s, reg_size) | ImmRotate(imm_r, reg_size) | dest_reg | Rn(rn));
+}
+
+
+BufferOffset Assembler::DataProcShiftedRegister(const Register& rd, const Register& rn,
+ const Operand& operand, FlagsUpdate S, Instr op)
+{
+ VIXL_ASSERT(operand.IsShiftedRegister());
+ VIXL_ASSERT(rn.Is64Bits() || (rn.Is32Bits() && IsUint5(operand.shift_amount())));
+ return Emit(SF(rd) | op | Flags(S) |
+ ShiftDP(operand.shift()) | ImmDPShift(operand.shift_amount()) |
+ Rm(operand.reg()) | Rn(rn) | Rd(rd));
+}
+
+
+void MozBaseAssembler::InsertIndexIntoTag(uint8_t* load, uint32_t index) {
+ // Store the js::jit::PoolEntry index into the instruction.
+ // finishPool() will walk over all literal load instructions
+ // and use PatchConstantPoolLoad() to patch to the final relative offset.
+ *((uint32_t*)load) |= Assembler::ImmLLiteral(index);
+}
+
+
+bool MozBaseAssembler::PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr) {
+ Instruction* load = reinterpret_cast<Instruction*>(loadAddr);
+
+ // The load currently contains the js::jit::PoolEntry's index,
+ // as written by InsertIndexIntoTag().
+ uint32_t index = load->ImmLLiteral();
+
+ // Each entry in the literal pool is uint32_t-sized,
+ // but literals may use multiple entries.
+ uint32_t* constPool = reinterpret_cast<uint32_t*>(constPoolAddr);
+ Instruction* source = reinterpret_cast<Instruction*>(&constPool[index]);
+
+ load->SetImmLLiteral(source);
+ return false; // Nothing uses the return value.
+}
+
+void
+MozBaseAssembler::PatchShortRangeBranchToVeneer(ARMBuffer* buffer, unsigned rangeIdx,
+ BufferOffset deadline, BufferOffset veneer)
+{
+ // Reconstruct the position of the branch from (rangeIdx, deadline).
+ vixl::ImmBranchRangeType branchRange = static_cast<vixl::ImmBranchRangeType>(rangeIdx);
+ BufferOffset branch(deadline.getOffset() - Instruction::ImmBranchMaxForwardOffset(branchRange));
+ Instruction *branchInst = buffer->getInst(branch);
+ Instruction *veneerInst = buffer->getInst(veneer);
+
+ // Verify that the branch range matches what's encoded.
+ MOZ_ASSERT(Instruction::ImmBranchTypeToRange(branchInst->BranchType()) == branchRange);
+
+ // We want to insert veneer after branch in the linked list of instructions
+ // that use the same unbound label.
+ // The veneer should be an unconditional branch.
+ ptrdiff_t nextElemOffset = branchInst->ImmPCRawOffset();
+
+ // If offset is 0, this is the end of the linked list.
+ if (nextElemOffset != kEndOfLabelUseList) {
+ // Make the offset relative to veneer so it targets the same instruction
+ // as branchInst.
+ nextElemOffset *= kInstructionSize;
+ nextElemOffset += branch.getOffset() - veneer.getOffset();
+ nextElemOffset /= kInstructionSize;
+ }
+ Assembler::b(veneerInst, nextElemOffset);
+
+ // Now point branchInst at veneer. See also SetNextLink() above.
+ branchInst->SetImmPCRawOffset(EncodeOffset(branch, veneer));
+}
+
+struct PoolHeader {
+ uint32_t data;
+
+ struct Header {
+ // The size should take into account the pool header.
+ // The size is in units of Instruction (4bytes), not byte.
+ union {
+ struct {
+ uint32_t size : 15;
+
+ // "Natural" guards are part of the normal instruction stream,
+ // while "non-natural" guards are inserted for the sole purpose
+ // of skipping around a pool.
+ uint32_t isNatural : 1;
+ uint32_t ONES : 16;
+ };
+ uint32_t data;
+ };
+
+ Header(int size_, bool isNatural_)
+ : size(size_),
+ isNatural(isNatural_),
+ ONES(0xffff)
+ { }
+
+ Header(uint32_t data)
+ : data(data)
+ {
+ VIXL_STATIC_ASSERT(sizeof(Header) == sizeof(uint32_t));
+ VIXL_ASSERT(ONES == 0xffff);
+ }
+
+ uint32_t raw() const {
+ VIXL_STATIC_ASSERT(sizeof(Header) == sizeof(uint32_t));
+ return data;
+ }
+ };
+
+ PoolHeader(int size_, bool isNatural_)
+ : data(Header(size_, isNatural_).raw())
+ { }
+
+ uint32_t size() const {
+ Header tmp(data);
+ return tmp.size;
+ }
+
+ uint32_t isNatural() const {
+ Header tmp(data);
+ return tmp.isNatural;
+ }
+};
+
+
+void MozBaseAssembler::WritePoolHeader(uint8_t* start, js::jit::Pool* p, bool isNatural) {
+ static_assert(sizeof(PoolHeader) == 4);
+
+ // Get the total size of the pool.
+ const uintptr_t totalPoolSize = sizeof(PoolHeader) + p->getPoolSize();
+ const uintptr_t totalPoolInstructions = totalPoolSize / kInstructionSize;
+
+ VIXL_ASSERT((totalPoolSize & 0x3) == 0);
+ VIXL_ASSERT(totalPoolInstructions < (1 << 15));
+
+ PoolHeader header(totalPoolInstructions, isNatural);
+ *(PoolHeader*)start = header;
+}
+
+
+void MozBaseAssembler::WritePoolFooter(uint8_t* start, js::jit::Pool* p, bool isNatural) {
+ return;
+}
+
+
+void MozBaseAssembler::WritePoolGuard(BufferOffset branch, Instruction* inst, BufferOffset dest) {
+ int byteOffset = dest.getOffset() - branch.getOffset();
+ VIXL_ASSERT(byteOffset % kInstructionSize == 0);
+
+ int instOffset = byteOffset >> kInstructionSizeLog2;
+ Assembler::b(inst, instOffset);
+}
+
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
new file mode 100644
index 0000000000..5d12f81bb1
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
@@ -0,0 +1,356 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef jit_arm64_vixl_MozBaseAssembler_vixl_h
+#define jit_arm64_vixl_MozBaseAssembler_vixl_h
+
+
+#include "mozilla/Assertions.h" // MOZ_ASSERT
+#include "mozilla/Sprintf.h" // SprintfLiteral
+
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t, uint32_t
+#include <string.h> // strstr
+
+#include "jit/arm64/vixl/Constants-vixl.h" // vixl::{HINT, NOP, ImmHint_offset}
+#include "jit/arm64/vixl/Globals-vixl.h" // VIXL_ASSERT
+#include "jit/arm64/vixl/Instructions-vixl.h" // vixl::{Instruction, NumShortBranchRangeTypes, Instr, ImmBranchRangeType}
+
+#include "jit/Label.h" // jit::Label
+#include "jit/shared/Assembler-shared.h" // jit::AssemblerShared
+#include "jit/shared/Disassembler-shared.h" // jit::DisassemblerSpew
+#include "jit/shared/IonAssemblerBuffer.h" // jit::BufferOffset
+#include "jit/shared/IonAssemblerBufferWithConstantPools.h" // jit::AssemblerBufferWithConstantPools
+
+namespace vixl {
+
+
+using js::jit::BufferOffset;
+using js::jit::DisassemblerSpew;
+using js::jit::Label;
+
+using LabelDoc = DisassemblerSpew::LabelDoc;
+using LiteralDoc = DisassemblerSpew::LiteralDoc;
+
+#ifdef JS_DISASM_ARM64
+void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr);
+#endif
+
+class MozBaseAssembler;
+typedef js::jit::AssemblerBufferWithConstantPools<1024, 4, Instruction, MozBaseAssembler,
+ NumShortBranchRangeTypes> ARMBuffer;
+
+// Base class for vixl::Assembler, for isolating Moz-specific changes to VIXL.
+class MozBaseAssembler : public js::jit::AssemblerShared {
+ // Buffer initialization constants.
+ static const unsigned BufferGuardSize = 1;
+ static const unsigned BufferHeaderSize = 1;
+ static const size_t BufferCodeAlignment = 8;
+ static const size_t BufferMaxPoolOffset = 1024;
+ static const unsigned BufferPCBias = 0;
+ static const uint32_t BufferAlignmentFillInstruction = HINT | (NOP << ImmHint_offset);
+ static const uint32_t BufferNopFillInstruction = HINT | (NOP << ImmHint_offset);
+ static const unsigned BufferNumDebugNopsToInsert = 0;
+
+#ifdef JS_DISASM_ARM64
+ static constexpr const char* const InstrIndent = " ";
+ static constexpr const char* const LabelIndent = " ";
+ static constexpr const char* const TargetIndent = " ";
+#endif
+
+ public:
+ MozBaseAssembler()
+ : armbuffer_(BufferGuardSize,
+ BufferHeaderSize,
+ BufferCodeAlignment,
+ BufferMaxPoolOffset,
+ BufferPCBias,
+ BufferAlignmentFillInstruction,
+ BufferNopFillInstruction,
+ BufferNumDebugNopsToInsert)
+ {
+#ifdef JS_DISASM_ARM64
+ spew_.setLabelIndent(LabelIndent);
+ spew_.setTargetIndent(TargetIndent);
+#endif
+}
+ ~MozBaseAssembler()
+ {
+#ifdef JS_DISASM_ARM64
+ spew_.spewOrphans();
+#endif
+ }
+
+ public:
+ // Return the Instruction at a given byte offset.
+ Instruction* getInstructionAt(BufferOffset offset) {
+ return armbuffer_.getInst(offset);
+ }
+
+ // Return the byte offset of a bound label.
+ template <typename T>
+ inline T GetLabelByteOffset(const js::jit::Label* label) {
+ VIXL_ASSERT(label->bound());
+ static_assert(sizeof(T) >= sizeof(uint32_t));
+ return reinterpret_cast<T>(label->offset());
+ }
+
+ protected:
+ // Get the buffer offset of the next inserted instruction. This may flush
+ // constant pools.
+ BufferOffset nextInstrOffset() {
+ return armbuffer_.nextInstrOffset();
+ }
+
+ // Get the next usable buffer offset. Note that a constant pool may be placed
+ // here before the next instruction is emitted.
+ BufferOffset nextOffset() const {
+ return armbuffer_.nextOffset();
+ }
+
+ // Allocate memory in the buffer by forwarding to armbuffer_.
+ // Propagate OOM errors.
+ BufferOffset allocLiteralLoadEntry(size_t numInst, unsigned numPoolEntries,
+ uint8_t* inst, uint8_t* data,
+ const LiteralDoc& doc = LiteralDoc(),
+ ARMBuffer::PoolEntry* pe = nullptr)
+ {
+ MOZ_ASSERT(inst);
+ MOZ_ASSERT(numInst == 1); /* If not, then fix disassembly */
+ BufferOffset offset = armbuffer_.allocEntry(numInst, numPoolEntries, inst,
+ data, pe);
+ propagateOOM(offset.assigned());
+#ifdef JS_DISASM_ARM64
+ Instruction* instruction = armbuffer_.getInstOrNull(offset);
+ if (instruction)
+ spewLiteralLoad(offset,
+ reinterpret_cast<vixl::Instruction*>(instruction), doc);
+#endif
+ return offset;
+ }
+
+#ifdef JS_DISASM_ARM64
+ DisassemblerSpew spew_;
+
+ void spew(BufferOffset offs, const vixl::Instruction* instr) {
+ if (spew_.isDisabled() || !instr)
+ return;
+
+ char buffer[2048];
+ DisassembleInstruction(buffer, sizeof(buffer), instr);
+ spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s",
+ (uint32_t)offs.getOffset(),
+ instr->InstructionBits(), InstrIndent, buffer);
+ }
+
+ void spewBranch(BufferOffset offs,
+ const vixl::Instruction* instr, const LabelDoc& target) {
+ if (spew_.isDisabled() || !instr)
+ return;
+
+ char buffer[2048];
+ DisassembleInstruction(buffer, sizeof(buffer), instr);
+
+ char labelBuf[128];
+ labelBuf[0] = 0;
+
+ bool hasTarget = target.valid;
+ if (!hasTarget)
+ SprintfLiteral(labelBuf, "-> (link-time target)");
+
+ if (instr->IsImmBranch() && hasTarget) {
+ // The target information in the instruction is likely garbage, so remove it.
+ // The target label will in any case be printed if we have it.
+ //
+ // The format of the instruction disassembly is /.*#.*/. Strip the # and later.
+ size_t i;
+ const size_t BUFLEN = sizeof(buffer)-1;
+ for ( i=0 ; i < BUFLEN && buffer[i] && buffer[i] != '#' ; i++ )
+ ;
+ buffer[i] = 0;
+
+ SprintfLiteral(labelBuf, "-> %d%s", target.doc, !target.bound ? "f" : "");
+ hasTarget = false;
+ }
+
+ spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s%s",
+ (uint32_t)offs.getOffset(),
+ instr->InstructionBits(), InstrIndent, buffer, labelBuf);
+
+ if (hasTarget)
+ spew_.spewRef(target);
+ }
+
+ void spewLiteralLoad(BufferOffset offs,
+ const vixl::Instruction* instr, const LiteralDoc& doc) {
+ if (spew_.isDisabled() || !instr)
+ return;
+
+ char buffer[2048];
+ DisassembleInstruction(buffer, sizeof(buffer), instr);
+
+ char litbuf[2048];
+ spew_.formatLiteral(doc, litbuf, sizeof(litbuf));
+
+ // The instruction will have the form /^.*pc\+0/ followed by junk that we
+ // don't need; try to strip it.
+
+ char *probe = strstr(buffer, "pc+0");
+ if (probe)
+ *(probe + 4) = 0;
+ spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s ; .const %s",
+ (uint32_t)offs.getOffset(),
+ instr->InstructionBits(), InstrIndent, buffer, litbuf);
+ }
+
+ LabelDoc refLabel(Label* label) {
+ if (spew_.isDisabled())
+ return LabelDoc();
+
+ return spew_.refLabel(label);
+ }
+#else
+ LabelDoc refLabel(js::jit::Label*) {
+ return LabelDoc();
+ }
+#endif
+
+ // Emit the instruction, returning its offset.
+ BufferOffset Emit(Instr instruction, bool isBranch = false) {
+ static_assert(sizeof(instruction) == kInstructionSize);
+ // TODO: isBranch is obsolete and should be removed.
+ (void)isBranch;
+ MOZ_ASSERT(hasCreator());
+ BufferOffset offs = armbuffer_.putInt(*(uint32_t*)(&instruction));
+#ifdef JS_DISASM_ARM64
+ if (!isBranch)
+ spew(offs, armbuffer_.getInstOrNull(offs));
+#endif
+ return offs;
+ }
+
+ BufferOffset EmitBranch(Instr instruction, const LabelDoc& doc) {
+ BufferOffset offs = Emit(instruction, true);
+#ifdef JS_DISASM_ARM64
+ spewBranch(offs, armbuffer_.getInstOrNull(offs), doc);
+#endif
+ return offs;
+ }
+
+ public:
+ // Emit the instruction at |at|.
+ static void Emit(Instruction* at, Instr instruction) {
+ static_assert(sizeof(instruction) == kInstructionSize);
+ memcpy(at, &instruction, sizeof(instruction));
+ }
+
+ static void EmitBranch(Instruction* at, Instr instruction) {
+ // TODO: Assert that the buffer already has the instruction marked as a branch.
+ Emit(at, instruction);
+ }
+
+ // Emit data inline in the instruction stream.
+ BufferOffset EmitData(void const * data, unsigned size) {
+ VIXL_ASSERT(size % 4 == 0);
+ MOZ_ASSERT(hasCreator());
+ return armbuffer_.allocEntry(size / sizeof(uint32_t), 0, (uint8_t*)(data), nullptr);
+ }
+
+ public:
+ // Size of the code generated in bytes, including pools.
+ size_t SizeOfCodeGenerated() const {
+ return armbuffer_.size();
+ }
+
+ // Move the pool into the instruction stream.
+ void flushBuffer() {
+ armbuffer_.flushPool();
+ }
+
+ // Inhibit pool flushing for the given number of instructions.
+ // Generating more than |maxInst| instructions in a no-pool region
+ // triggers an assertion within the ARMBuffer.
+ // Does not nest.
+ void enterNoPool(size_t maxInst) {
+ armbuffer_.enterNoPool(maxInst);
+ }
+
+ // Marks the end of a no-pool region.
+ void leaveNoPool() {
+ armbuffer_.leaveNoPool();
+ }
+
+ void enterNoNops() {
+ armbuffer_.enterNoNops();
+ }
+ void leaveNoNops() {
+ armbuffer_.leaveNoNops();
+ }
+
+ public:
+ // Static interface used by IonAssemblerBufferWithConstantPools.
+ static void InsertIndexIntoTag(uint8_t* load, uint32_t index);
+ static bool PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr);
+ static void PatchShortRangeBranchToVeneer(ARMBuffer*, unsigned rangeIdx, BufferOffset deadline,
+ BufferOffset veneer);
+ static uint32_t PlaceConstantPoolBarrier(int offset);
+
+ static void WritePoolHeader(uint8_t* start, js::jit::Pool* p, bool isNatural);
+ static void WritePoolFooter(uint8_t* start, js::jit::Pool* p, bool isNatural);
+ static void WritePoolGuard(BufferOffset branch, Instruction* inst, BufferOffset dest);
+
+ protected:
+ // Functions for managing Labels and linked lists of Label uses.
+
+ // Get the next Label user in the linked list of Label uses.
+ // Return an unassigned BufferOffset when the end of the list is reached.
+ BufferOffset NextLink(BufferOffset cur);
+
+ // Patch the instruction at cur to link to the instruction at next.
+ void SetNextLink(BufferOffset cur, BufferOffset next);
+
+ // Link the current (not-yet-emitted) instruction to the specified label,
+ // then return a raw offset to be encoded in the instruction.
+ ptrdiff_t LinkAndGetByteOffsetTo(BufferOffset branch, js::jit::Label* label);
+ ptrdiff_t LinkAndGetInstructionOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange,
+ js::jit::Label* label);
+ ptrdiff_t LinkAndGetPageOffsetTo(BufferOffset branch, js::jit::Label* label);
+
+ // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
+ ptrdiff_t LinkAndGetOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange,
+ unsigned elementSizeBits, js::jit::Label* label);
+
+ protected:
+ // The buffer into which code and relocation info are generated.
+ ARMBuffer armbuffer_;
+};
+
+
+} // namespace vixl
+
+
+#endif // jit_arm64_vixl_MozBaseAssembler_vixl_h
+
diff --git a/js/src/jit/arm64/vixl/MozCachingDecoder.h b/js/src/jit/arm64/vixl/MozCachingDecoder.h
new file mode 100644
index 0000000000..5b4cfc17d5
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozCachingDecoder.h
@@ -0,0 +1,179 @@
+#ifndef VIXL_A64_MOZ_CACHING_DECODER_A64_H_
+#define VIXL_A64_MOZ_CACHING_DECODER_A64_H_
+
+#include "mozilla/HashTable.h"
+
+#include "jit/arm64/vixl/Decoder-vixl.h"
+#include "js/AllocPolicy.h"
+
+#ifdef DEBUG
+#define JS_CACHE_SIMULATOR_ARM64 1
+#endif
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+namespace vixl {
+
+// This enumeration list the different kind of instructions which can be
+// decoded. These kind correspond to the set of visitor defined by the default
+// Decoder.
+enum class InstDecodedKind : uint8_t {
+ NotDecodedYet,
+#define DECLARE(E) E,
+ VISITOR_LIST(DECLARE)
+#undef DECLARE
+};
+
+// A SinglePageDecodeCache is used to store the decoded kind of all instructions
+// in an executable page of code. Each time an instruction is decoded, its
+// decoded kind is recorded in this structure. The previous instruction value is
+// also recorded in this structure when using a debug build.
+//
+// The next time the same offset is visited, the instruction would be decoded
+// using the previously recorded decode kind. It is also compared against the
+// previously recorded bits of the instruction to check for potential missing
+// cache invalidations, in debug builds.
+//
+// This structure stores the equivalent of a single page of code to have better
+// memory locality when using the simulator. As opposed to having a hash-table
+// for all instructions. However a hash-table is used by the CachingDecoder to
+// map the prefixes of page addresses to these SinglePageDecodeCaches.
+class SinglePageDecodeCache {
+ public:
+ static const uintptr_t PageSize = 1 << 12;
+ static const uintptr_t PageMask = PageSize - 1;
+ static const uintptr_t InstSize = vixl::kInstructionSize;
+ static const uintptr_t InstMask = InstSize - 1;
+ static const uintptr_t InstPerPage = PageSize / InstSize;
+
+ SinglePageDecodeCache(const Instruction* inst)
+ : pageStart_(PageStart(inst))
+ {
+ memset(&decodeCache_, int(InstDecodedKind::NotDecodedYet), sizeof(decodeCache_));
+ }
+ // Compute the start address of the page which contains this instruction.
+ static uintptr_t PageStart(const Instruction* inst) {
+ return uintptr_t(inst) & ~PageMask;
+ }
+ // Returns whether the instruction decoded kind is stored in this
+ // SinglePageDecodeCache.
+ bool contains(const Instruction* inst) {
+ return pageStart_ == PageStart(inst);
+ }
+ void clearDecode(const Instruction* inst) {
+ uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize;
+ decodeCache_[offset] = InstDecodedKind::NotDecodedYet;
+ }
+ InstDecodedKind* decodePtr(const Instruction* inst) {
+ uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize;
+ uint32_t instValue = *reinterpret_cast<const uint32_t*>(inst);
+ instCache_[offset] = instValue;
+ return &decodeCache_[offset];
+ }
+ InstDecodedKind decode(const Instruction* inst) const {
+ uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize;
+ InstDecodedKind val = decodeCache_[offset];
+ uint32_t instValue = *reinterpret_cast<const uint32_t*>(inst);
+ MOZ_ASSERT_IF(val != InstDecodedKind::NotDecodedYet,
+ instCache_[offset] == instValue);
+ return val;
+ }
+
+ private:
+ // Record the address at which the corresponding code page starts.
+ const uintptr_t pageStart_;
+
+ // Cache what instruction got decoded previously, in order to assert if we see
+ // any stale instructions after.
+ uint32_t instCache_[InstPerPage];
+
+ // Cache the decoding of the instruction such that we can skip the decoding
+ // part.
+ InstDecodedKind decodeCache_[InstPerPage];
+};
+
+// A DecoderVisitor which will record which visitor function should be called
+// the next time we want to decode the same instruction.
+class CachingDecoderVisitor : public DecoderVisitor {
+ public:
+ CachingDecoderVisitor() = default;
+ virtual ~CachingDecoderVisitor() {}
+
+#define DECLARE(A) virtual void Visit##A(const Instruction* instr) { \
+ if (last_) { \
+ MOZ_ASSERT(*last_ == InstDecodedKind::NotDecodedYet); \
+ *last_ = InstDecodedKind::A; \
+ last_ = nullptr; \
+ } \
+ };
+
+ VISITOR_LIST(DECLARE)
+#undef DECLARE
+
+ void setDecodePtr(InstDecodedKind* ptr) {
+ last_ = ptr;
+ }
+
+ private:
+ InstDecodedKind* last_;
+};
+
+// The Caching decoder works by extending the default vixl Decoder class. It
+// extends it by overloading the Decode function.
+//
+// The overloaded Decode function checks whether the instruction given as
+// argument got decoded before or since it got invalidated. If it was not
+// previously decoded, the value of the instruction is recorded as well as the
+// kind of instruction. Otherwise, the value of the instruction is checked
+// against the previously recorded value and the instruction kind is used to
+// skip the decoding visitor and resume the execution of instruction.
+//
+// The caching decoder stores the equivalent of a page of executable code in a
+// hash-table. Each SinglePageDecodeCache stores an array of decoded kind as
+// well as the value of the previously decoded instruction.
+//
+// When testing if an instruction was decoded before, we check if the address of
+// the instruction is contained in the last SinglePageDecodeCache. If it is not,
+// then the hash-table entry is queried and created if necessary, and the last
+// SinglePageDecodeCache is updated. Then, the last SinglePageDecodeCache
+// necessary contains the decoded kind of the instruction given as argument.
+//
+// The caching decoder add an extra function for flushing the cache, which is in
+// charge of clearing the decoded kind of instruction in the range of addresses
+// given as argument. This is indirectly called by
+// CPU::EnsureIAndDCacheCoherency.
+class CachingDecoder : public Decoder {
+ using ICacheMap = mozilla::HashMap<uintptr_t, SinglePageDecodeCache*>;
+ public:
+ CachingDecoder()
+ : lastPage_(nullptr)
+ {
+ PrependVisitor(&cachingDecoder_);
+ }
+ ~CachingDecoder() {
+ RemoveVisitor(&cachingDecoder_);
+ }
+
+ void Decode(const Instruction* instr);
+ void Decode(Instruction* instr) {
+ Decode(const_cast<const Instruction*>(instr));
+ }
+
+ void FlushICache(void* start, size_t size);
+
+ private:
+ // Record the type of the decoded instruction, to avoid decoding it a second
+ // time the next time we execute it.
+ CachingDecoderVisitor cachingDecoder_;
+
+ // Store the mapping of Instruction pointer to the corresponding
+ // SinglePageDecodeCache.
+ ICacheMap iCache_;
+
+ // Record the last SinglePageDecodeCache seen, such that we can quickly access
+ // it for the next instruction.
+ SinglePageDecodeCache* lastPage_;
+};
+
+}
+#endif // !JS_CACHE_SIMULATOR_ARM64
+#endif // !VIXL_A64_MOZ_CACHING_DECODER_A64_H_
diff --git a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp
new file mode 100644
index 0000000000..909cc590ae
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp
@@ -0,0 +1,226 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Cpu-vixl.h"
+#include "jit/arm64/vixl/Simulator-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+#include "util/WindowsWrapper.h"
+
+#if defined(XP_DARWIN)
+# include <libkern/OSCacheControl.h>
+#endif
+
+namespace vixl {
+
+// Currently computes I and D cache line size.
+void CPU::SetUp() {
+ uint32_t cache_type_register = GetCacheType();
+
+ // The cache type register holds information about the caches, including I
+ // D caches line size.
+ static const int kDCacheLineSizeShift = 16;
+ static const int kICacheLineSizeShift = 0;
+ static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
+ static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
+
+ // The cache type register holds the size of the I and D caches in words as
+ // a power of two.
+ uint32_t dcache_line_size_power_of_two =
+ (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
+ uint32_t icache_line_size_power_of_two =
+ (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
+
+ dcache_line_size_ = 4 << dcache_line_size_power_of_two;
+ icache_line_size_ = 4 << icache_line_size_power_of_two;
+
+ // Bug 1521158 suggests that having CPU with different cache line sizes could
+ // cause issues as we would only invalidate half of the cache line of we
+ // invalidate every 128 bytes, but other little cores have a different stride
+ // such as 64 bytes. To be conservative, we will try reducing the stride to 32
+ // bytes, which should be smaller than any known cache line.
+ const uint32_t conservative_line_size = 32;
+ dcache_line_size_ = std::min(dcache_line_size_, conservative_line_size);
+ icache_line_size_ = std::min(icache_line_size_, conservative_line_size);
+}
+
+
+uint32_t CPU::GetCacheType() {
+#if defined(__aarch64__) && (defined(__linux__) || defined(__android__))
+ uint64_t cache_type_register;
+ // Copy the content of the cache type register to a core register.
+ __asm__ __volatile__ ("mrs %[ctr], ctr_el0" // NOLINT
+ : [ctr] "=r" (cache_type_register));
+ VIXL_ASSERT(IsUint32(cache_type_register));
+ return static_cast<uint32_t>(cache_type_register);
+#else
+ // This will lead to a cache with 1 byte long lines, which is fine since
+ // neither EnsureIAndDCacheCoherency nor the simulator will need this
+ // information.
+ return 0;
+#endif
+}
+
+void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
+#if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64)
+ // This code attempts to emulate what the following assembly sequence is
+ // doing, which is sending the information to all cores that some cache line
+ // have to be invalidated and invalidating them only on the current core.
+ //
+ // This is done by recording the current range to be flushed to all
+ // simulators, then if there is a simulator associated with the current
+ // thread, applying all flushed ranges as the "isb" instruction would do.
+ //
+ // As we have no control over the CPU cores used by the code generator and the
+ // execution threads, this code assumes that each thread runs on its own core.
+ //
+ // See Bug 1529933 for more detailed explanation of this issue.
+ using js::jit::SimulatorProcess;
+ js::jit::AutoLockSimulatorCache alsc;
+ if (length > 0) {
+ SimulatorProcess::recordICacheFlush(address, length);
+ }
+ Simulator* sim = vixl::Simulator::Current();
+ if (sim) {
+ sim->FlushICache();
+ }
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+ FlushInstructionCache(GetCurrentProcess(), address, length);
+#elif defined(XP_DARWIN)
+ sys_icache_invalidate(address, length);
+#elif defined(__aarch64__) && (defined(__linux__) || defined(__android__))
+ // Implement the cache synchronisation for all targets where AArch64 is the
+ // host, even if we're building the simulator for an AAarch64 host. This
+ // allows for cases where the user wants to simulate code as well as run it
+ // natively.
+
+ if (length == 0) {
+ return;
+ }
+
+ // The code below assumes user space cache operations are allowed.
+
+ // Work out the line sizes for each cache, and use them to determine the
+ // start addresses.
+ uintptr_t start = reinterpret_cast<uintptr_t>(address);
+ uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
+ uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
+ uintptr_t dline = start & ~(dsize - 1);
+ uintptr_t iline = start & ~(isize - 1);
+
+ // Cache line sizes are always a power of 2.
+ VIXL_ASSERT(IsPowerOf2(dsize));
+ VIXL_ASSERT(IsPowerOf2(isize));
+ uintptr_t end = start + length;
+
+ do {
+ __asm__ __volatile__ (
+ // Clean each line of the D cache containing the target data.
+ //
+ // dc : Data Cache maintenance
+ // c : Clean
+ // i : Invalidate
+ // va : by (Virtual) Address
+ // c : to the point of Coherency
+ // Original implementation used cvau, but changed to civac due to
+ // errata on Cortex-A53 819472, 826319, 827319 and 824069.
+ // See ARM DDI 0406B page B2-12 for more information.
+ //
+ " dc civac, %[dline]\n"
+ :
+ : [dline] "r" (dline)
+ // This code does not write to memory, but the "memory" dependency
+ // prevents GCC from reordering the code.
+ : "memory");
+ dline += dsize;
+ } while (dline < end);
+
+ __asm__ __volatile__ (
+ // Make sure that the data cache operations (above) complete before the
+ // instruction cache operations (below).
+ //
+ // dsb : Data Synchronisation Barrier
+ // ish : Inner SHareable domain
+ //
+ // The point of unification for an Inner Shareable shareability domain is
+ // the point by which the instruction and data caches of all the processors
+ // in that Inner Shareable shareability domain are guaranteed to see the
+ // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
+ // information.
+ " dsb ish\n"
+ : : : "memory");
+
+ do {
+ __asm__ __volatile__ (
+ // Invalidate each line of the I cache containing the target data.
+ //
+ // ic : Instruction Cache maintenance
+ // i : Invalidate
+ // va : by Address
+ // u : to the point of Unification
+ " ic ivau, %[iline]\n"
+ :
+ : [iline] "r" (iline)
+ : "memory");
+ iline += isize;
+ } while (iline < end);
+
+ __asm__ __volatile__(
+ // Make sure that the instruction cache operations (above) take effect
+ // before the isb (below).
+ " dsb ish\n"
+
+ // Ensure that any instructions already in the pipeline are discarded and
+ // reloaded from the new data.
+ // isb : Instruction Synchronisation Barrier
+ " isb\n"
+ :
+ :
+ : "memory");
+#else
+ // If the host isn't AArch64, we must be using the simulator, so this function
+ // doesn't have to do anything.
+ USE(address, length);
+#endif
+}
+
+void CPU::FlushExecutionContext() {
+#if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64)
+ // Performing an 'isb' will ensure the current core instruction pipeline is
+ // synchronized with an icache flush executed by another core.
+ using js::jit::SimulatorProcess;
+ js::jit::AutoLockSimulatorCache alsc;
+ Simulator* sim = vixl::Simulator::Current();
+ if (sim) {
+ sim->FlushICache();
+ }
+#elif defined(__aarch64__)
+ // Ensure that any instructions already in the pipeline are discarded and
+ // reloaded from the icache.
+ __asm__ __volatile__("isb\n" : : : "memory");
+#endif
+}
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp b/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp
new file mode 100644
index 0000000000..398f864493
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp
@@ -0,0 +1,211 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/Architecture-arm64.h"
+#include "jit/arm64/vixl/Assembler-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+
+namespace vixl {
+
+bool Instruction::IsUncondB() const {
+ return Mask(UnconditionalBranchMask) == (UnconditionalBranchFixed | B);
+}
+
+
+bool Instruction::IsCondB() const {
+ return Mask(ConditionalBranchMask) == (ConditionalBranchFixed | B_cond);
+}
+
+
+bool Instruction::IsBL() const {
+ return Mask(UnconditionalBranchMask) == (UnconditionalBranchFixed | BL);
+}
+
+
+bool Instruction::IsBR() const {
+ return Mask(UnconditionalBranchToRegisterMask) == (UnconditionalBranchToRegisterFixed | BR);
+}
+
+
+bool Instruction::IsBLR() const {
+ return Mask(UnconditionalBranchToRegisterMask) == (UnconditionalBranchToRegisterFixed | BLR);
+}
+
+
+bool Instruction::IsTBZ() const {
+ return Mask(TestBranchMask) == TBZ;
+}
+
+
+bool Instruction::IsTBNZ() const {
+ return Mask(TestBranchMask) == TBNZ;
+}
+
+
+bool Instruction::IsCBZ() const {
+ return Mask(CompareBranchMask) == CBZ_w || Mask(CompareBranchMask) == CBZ_x;
+}
+
+
+bool Instruction::IsCBNZ() const {
+ return Mask(CompareBranchMask) == CBNZ_w || Mask(CompareBranchMask) == CBNZ_x;
+}
+
+
+bool Instruction::IsLDR() const {
+ return Mask(LoadLiteralMask) == LDR_x_lit;
+}
+
+
+bool Instruction::IsNOP() const {
+ return Mask(SystemHintMask) == HINT && ImmHint() == NOP;
+}
+
+
+bool Instruction::IsCSDB() const {
+ return Mask(SystemHintMask) == HINT && ImmHint() == CSDB;
+}
+
+
+bool Instruction::IsADR() const {
+ return Mask(PCRelAddressingMask) == ADR;
+}
+
+
+bool Instruction::IsADRP() const {
+ return Mask(PCRelAddressingMask) == ADRP;
+}
+
+
+bool Instruction::IsMovz() const {
+ return (Mask(MoveWideImmediateMask) == MOVZ_x) ||
+ (Mask(MoveWideImmediateMask) == MOVZ_w);
+}
+
+
+bool Instruction::IsMovk() const {
+ return (Mask(MoveWideImmediateMask) == MOVK_x) ||
+ (Mask(MoveWideImmediateMask) == MOVK_w);
+}
+
+bool Instruction::IsBranchLinkImm() const {
+ return Mask(UnconditionalBranchFMask) == (UnconditionalBranchFixed | BL);
+}
+
+
+bool Instruction::IsTargetReachable(const Instruction* target) const {
+ VIXL_ASSERT(((target - this) & 3) == 0);
+ int offset = (target - this) >> kInstructionSizeLog2;
+ switch (BranchType()) {
+ case CondBranchType:
+ return IsInt19(offset);
+ case UncondBranchType:
+ return IsInt26(offset);
+ case CompareBranchType:
+ return IsInt19(offset);
+ case TestBranchType:
+ return IsInt14(offset);
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+
+ptrdiff_t Instruction::ImmPCRawOffset() const {
+ ptrdiff_t offset;
+ if (IsPCRelAddressing()) {
+ // ADR and ADRP.
+ offset = ImmPCRel();
+ } else if (BranchType() == UnknownBranchType) {
+ offset = ImmLLiteral();
+ } else {
+ offset = ImmBranch();
+ }
+ return offset;
+}
+
+void
+Instruction::SetImmPCRawOffset(ptrdiff_t offset)
+{
+ if (IsPCRelAddressing()) {
+ // ADR and ADRP. We're encoding a raw offset here.
+ // See also SetPCRelImmTarget().
+ Instr imm = vixl::Assembler::ImmPCRelAddress(offset);
+ SetInstructionBits(Mask(~ImmPCRel_mask) | imm);
+ } else {
+ SetBranchImmTarget(this + (offset << kInstructionSizeLog2));
+ }
+}
+
+// Is this a stack pointer synchronization instruction as inserted by
+// MacroAssembler::syncStackPtr()?
+bool
+Instruction::IsStackPtrSync() const
+{
+ // The stack pointer sync is a move to the stack pointer.
+ // This is encoded as 'add sp, Rs, #0'.
+ return IsAddSubImmediate() && Rd() == js::jit::Registers::sp && ImmAddSub() == 0;
+}
+
+// Skip over a constant pool at |this| if there is one.
+//
+// If |this| is pointing to the artifical guard branch around a constant pool,
+// return the instruction after the pool. Otherwise return |this| itself.
+//
+// This function does not skip constant pools with a natural guard branch. It
+// is assumed that anyone inspecting the instruction stream understands about
+// branches that were inserted naturally.
+const Instruction*
+Instruction::skipPool() const
+{
+ // Artificial pool guards can only be B (rather than BR), and they must be
+ // forward branches.
+ if (!IsUncondB() || ImmUncondBranch() <= 0)
+ return this;
+
+ // Check for a constant pool header which has the high 16 bits set. See
+ // struct PoolHeader. Bit 15 indicates a natural pool guard when set. It
+ // must be clear which indicates an artificial pool guard.
+ const Instruction *header = InstructionAtOffset(kInstructionSize);
+ if (header->Mask(0xffff8000) != 0xffff0000)
+ return this;
+
+ // OK, this is an artificial jump around a constant pool.
+ return ImmPCOffsetTarget();
+}
+
+
+void Instruction::SetBits32(int msb, int lsb, unsigned value) {
+ uint32_t me;
+ memcpy(&me, this, sizeof(me));
+ uint32_t new_mask = (1 << (msb+1)) - (1 << lsb);
+ uint32_t keep_mask = ~new_mask;
+ me = (me & keep_mask) | ((value << lsb) & new_mask);
+ memcpy(this, &me, sizeof(me));
+}
+
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp
new file mode 100644
index 0000000000..9f817cf0a3
--- /dev/null
+++ b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp
@@ -0,0 +1,1258 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "mozilla/DebugOnly.h"
+
+#include "jit/arm64/vixl/Debugger-vixl.h"
+#include "jit/arm64/vixl/MozCachingDecoder.h"
+#include "jit/arm64/vixl/Simulator-vixl.h"
+#include "jit/IonTypes.h"
+#include "js/UniquePtr.h"
+#include "js/Utility.h"
+#include "threading/LockGuard.h"
+#include "vm/JSContext.h"
+#include "vm/Runtime.h"
+
+js::jit::SimulatorProcess* js::jit::SimulatorProcess::singleton_ = nullptr;
+
+namespace vixl {
+
+using mozilla::DebugOnly;
+using js::jit::ABIFunctionType;
+using js::jit::JitActivation;
+using js::jit::SimulatorProcess;
+
+Simulator::Simulator(Decoder* decoder, FILE* stream)
+ : stream_(nullptr)
+ , print_disasm_(nullptr)
+ , instrumentation_(nullptr)
+ , stack_(nullptr)
+ , stack_limit_(nullptr)
+ , decoder_(nullptr)
+ , oom_(false)
+{
+ this->init(decoder, stream);
+
+ // If this environment variable is present, trace the executed instructions.
+ // (Very helpful for debugging code generation crashes.)
+ if (getenv("VIXL_TRACE")) {
+ set_trace_parameters(LOG_DISASM);
+ }
+}
+
+
+Simulator::~Simulator() {
+ js_free(stack_);
+ stack_ = nullptr;
+
+ // The decoder may outlive the simulator.
+ if (print_disasm_) {
+ decoder_->RemoveVisitor(print_disasm_);
+ js_delete(print_disasm_);
+ print_disasm_ = nullptr;
+ }
+
+ if (instrumentation_) {
+ decoder_->RemoveVisitor(instrumentation_);
+ js_delete(instrumentation_);
+ instrumentation_ = nullptr;
+ }
+}
+
+
+void Simulator::ResetState() {
+ // Reset the system registers.
+ nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
+ fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
+
+ // Reset registers to 0.
+ pc_ = nullptr;
+ pc_modified_ = false;
+ for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+ set_xreg(i, 0xbadbeef);
+ }
+ // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
+ uint64_t nan_bits = UINT64_C(0x7ff0dead7f8beef1);
+ VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
+ VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
+ for (unsigned i = 0; i < kNumberOfFPRegisters; i++) {
+ set_dreg_bits(i, nan_bits);
+ }
+ // Returning to address 0 exits the Simulator.
+ set_lr(kEndOfSimAddress);
+}
+
+
+void Simulator::init(Decoder* decoder, FILE* stream) {
+ // Ensure that shift operations act as the simulator expects.
+ VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
+ VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7FFFFFFF);
+
+ instruction_stats_ = false;
+
+ // Set up the decoder.
+ decoder_ = decoder;
+ decoder_->AppendVisitor(this);
+
+ stream_ = stream;
+ print_disasm_ = js_new<PrintDisassembler>(stream_);
+ if (!print_disasm_) {
+ oom_ = true;
+ return;
+ }
+ set_coloured_trace(false);
+ trace_parameters_ = LOG_NONE;
+
+ ResetState();
+
+ // Allocate and set up the simulator stack.
+ stack_ = js_pod_malloc<byte>(stack_size_);
+ if (!stack_) {
+ oom_ = true;
+ return;
+ }
+ stack_limit_ = stack_ + stack_protection_size_;
+ // Configure the starting stack pointer.
+ // - Find the top of the stack.
+ byte * tos = stack_ + stack_size_;
+ // - There's a protection region at both ends of the stack.
+ tos -= stack_protection_size_;
+ // - The stack pointer must be 16-byte aligned.
+ tos = AlignDown(tos, 16);
+ set_sp(tos);
+
+ // Set the sample period to 10, as the VIXL examples and tests are short.
+ if (getenv("VIXL_STATS")) {
+ instrumentation_ = js_new<Instrument>("vixl_stats.csv", 10);
+ if (!instrumentation_) {
+ oom_ = true;
+ return;
+ }
+ }
+
+ // Print a warning about exclusive-access instructions, but only the first
+ // time they are encountered. This warning can be silenced using
+ // SilenceExclusiveAccessWarning().
+ print_exclusive_access_warning_ = true;
+}
+
+
+Simulator* Simulator::Current() {
+ JSContext* cx = js::TlsContext.get();
+ if (!cx) {
+ return nullptr;
+ }
+ JSRuntime* rt = cx->runtime();
+ if (!rt) {
+ return nullptr;
+ }
+ if (!js::CurrentThreadCanAccessRuntime(rt)) {
+ return nullptr;
+ }
+ return cx->simulator();
+}
+
+
+Simulator* Simulator::Create() {
+ Decoder *decoder = js_new<Decoder>();
+ if (!decoder)
+ return nullptr;
+
+ // FIXME: This just leaks the Decoder object for now, which is probably OK.
+ // FIXME: We should free it at some point.
+ // FIXME: Note that it can't be stored in the SimulatorRuntime due to lifetime conflicts.
+ js::UniquePtr<Simulator> sim;
+ if (getenv("USE_DEBUGGER") != nullptr) {
+ sim.reset(js_new<Debugger>(decoder, stdout));
+ } else {
+ sim.reset(js_new<Simulator>(decoder, stdout));
+ }
+
+ // Check if Simulator:init ran out of memory.
+ if (sim && sim->oom()) {
+ return nullptr;
+ }
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+ // Register the simulator in the Simulator process to handle cache flushes
+ // across threads.
+ js::jit::AutoLockSimulatorCache alsc;
+ if (!SimulatorProcess::registerSimulator(sim.get())) {
+ return nullptr;
+ }
+#endif
+
+ return sim.release();
+}
+
+
+void Simulator::Destroy(Simulator* sim) {
+#ifdef JS_CACHE_SIMULATOR_ARM64
+ if (sim) {
+ js::jit::AutoLockSimulatorCache alsc;
+ SimulatorProcess::unregisterSimulator(sim);
+ }
+#endif
+
+ js_delete(sim);
+}
+
+
+void Simulator::ExecuteInstruction() {
+ // The program counter should always be aligned.
+ VIXL_ASSERT(IsWordAligned(pc_));
+#ifdef JS_CACHE_SIMULATOR_ARM64
+ if (pendingCacheRequests) {
+ // We're here emulating the behavior of the membarrier carried over on
+ // real hardware does; see syscalls to membarrier in MozCpu-vixl.cpp.
+ // There's a slight difference that the simulator is not being
+ // interrupted: instead, we effectively run the icache flush request
+ // before executing the next instruction, which is close enough and
+ // sufficient for our use case.
+ js::jit::AutoLockSimulatorCache alsc;
+ FlushICache();
+ }
+#endif
+ decoder_->Decode(pc_);
+ increment_pc();
+}
+
+
+uintptr_t Simulator::stackLimit() const {
+ return reinterpret_cast<uintptr_t>(stack_limit_);
+}
+
+
+uintptr_t* Simulator::addressOfStackLimit() {
+ return (uintptr_t*)&stack_limit_;
+}
+
+
+bool Simulator::overRecursed(uintptr_t newsp) const {
+ if (newsp == 0) {
+ newsp = get_sp();
+ }
+ return newsp <= stackLimit();
+}
+
+
+bool Simulator::overRecursedWithExtra(uint32_t extra) const {
+ uintptr_t newsp = get_sp() - extra;
+ return newsp <= stackLimit();
+}
+
+
+JS::ProfilingFrameIterator::RegisterState
+Simulator::registerState()
+{
+ JS::ProfilingFrameIterator::RegisterState state;
+ state.pc = (uint8_t*) get_pc();
+ state.fp = (uint8_t*) get_fp();
+ state.lr = (uint8_t*) get_lr();
+ state.sp = (uint8_t*) get_sp();
+ return state;
+}
+
+int64_t Simulator::call(uint8_t* entry, int argument_count, ...) {
+ va_list parameters;
+ va_start(parameters, argument_count);
+
+ // First eight arguments passed in registers.
+ VIXL_ASSERT(argument_count <= 8);
+ // This code should use the type of the called function
+ // (with templates, like the callVM machinery), but since the
+ // number of called functions is miniscule, their types have been
+ // divined from the number of arguments.
+ if (argument_count == 8) {
+ // EnterJitData::jitcode.
+ set_xreg(0, va_arg(parameters, int64_t));
+ // EnterJitData::maxArgc.
+ set_xreg(1, va_arg(parameters, unsigned));
+ // EnterJitData::maxArgv.
+ set_xreg(2, va_arg(parameters, int64_t));
+ // EnterJitData::osrFrame.
+ set_xreg(3, va_arg(parameters, int64_t));
+ // EnterJitData::calleeToken.
+ set_xreg(4, va_arg(parameters, int64_t));
+ // EnterJitData::scopeChain.
+ set_xreg(5, va_arg(parameters, int64_t));
+ // EnterJitData::osrNumStackValues.
+ set_xreg(6, va_arg(parameters, unsigned));
+ // Address of EnterJitData::result.
+ set_xreg(7, va_arg(parameters, int64_t));
+ } else if (argument_count == 2) {
+ // EntryArg* args
+ set_xreg(0, va_arg(parameters, int64_t));
+ // uint8_t* GlobalData
+ set_xreg(1, va_arg(parameters, int64_t));
+ } else if (argument_count == 1) { // irregexp
+ // InputOutputData& data
+ set_xreg(0, va_arg(parameters, int64_t));
+ } else if (argument_count == 0) { // testsJit.cpp
+ // accept.
+ } else {
+ MOZ_CRASH("Unknown number of arguments");
+ }
+
+ va_end(parameters);
+
+ // Call must transition back to native code on exit.
+ VIXL_ASSERT(get_lr() == int64_t(kEndOfSimAddress));
+
+ // Execute the simulation.
+ DebugOnly<int64_t> entryStack = get_sp();
+ RunFrom((Instruction*)entry);
+ DebugOnly<int64_t> exitStack = get_sp();
+ VIXL_ASSERT(entryStack == exitStack);
+
+ int64_t result = xreg(0);
+ if (getenv("USE_DEBUGGER")) {
+ printf("LEAVE\n");
+ }
+ return result;
+}
+
+
+// When the generated code calls a VM function (masm.callWithABI) we need to
+// call that function instead of trying to execute it with the simulator
+// (because it's x64 code instead of AArch64 code). We do that by redirecting the VM
+// call to a svc (Supervisor Call) instruction that is handled by the
+// simulator. We write the original destination of the jump just at a known
+// offset from the svc instruction so the simulator knows what to call.
+class Redirection
+{
+ friend class Simulator;
+
+ Redirection(void* nativeFunction, ABIFunctionType type)
+ : nativeFunction_(nativeFunction),
+ type_(type),
+ next_(nullptr)
+ {
+ next_ = SimulatorProcess::redirection();
+ SimulatorProcess::setRedirection(this);
+
+ Instruction* instr = (Instruction*)(&svcInstruction_);
+ vixl::Assembler::svc(instr, kCallRtRedirected);
+ }
+
+ public:
+ void* addressOfSvcInstruction() { return &svcInstruction_; }
+ void* nativeFunction() const { return nativeFunction_; }
+ ABIFunctionType type() const { return type_; }
+
+ static Redirection* Get(void* nativeFunction, ABIFunctionType type) {
+ js::jit::AutoLockSimulatorCache alsr;
+
+ // TODO: Store srt_ in the simulator for this assertion.
+ // VIXL_ASSERT_IF(pt->simulator(), pt->simulator()->srt_ == srt);
+
+ Redirection* current = SimulatorProcess::redirection();
+ for (; current != nullptr; current = current->next_) {
+ if (current->nativeFunction_ == nativeFunction) {
+ VIXL_ASSERT(current->type() == type);
+ return current;
+ }
+ }
+
+ // Note: we can't use js_new here because the constructor is private.
+ js::AutoEnterOOMUnsafeRegion oomUnsafe;
+ Redirection* redir = js_pod_malloc<Redirection>(1);
+ if (!redir)
+ oomUnsafe.crash("Simulator redirection");
+ new(redir) Redirection(nativeFunction, type);
+ return redir;
+ }
+
+ static const Redirection* FromSvcInstruction(const Instruction* svcInstruction) {
+ const uint8_t* addrOfSvc = reinterpret_cast<const uint8_t*>(svcInstruction);
+ const uint8_t* addrOfRedirection = addrOfSvc - offsetof(Redirection, svcInstruction_);
+ return reinterpret_cast<const Redirection*>(addrOfRedirection);
+ }
+
+ private:
+ void* nativeFunction_;
+ uint32_t svcInstruction_;
+ ABIFunctionType type_;
+ Redirection* next_;
+};
+
+
+
+
+void* Simulator::RedirectNativeFunction(void* nativeFunction, ABIFunctionType type) {
+ Redirection* redirection = Redirection::Get(nativeFunction, type);
+ return redirection->addressOfSvcInstruction();
+}
+
+void Simulator::VisitException(const Instruction* instr) {
+ if (instr->InstructionBits() == UNDEFINED_INST_PATTERN) {
+ uint8_t* newPC;
+ if (js::wasm::HandleIllegalInstruction(registerState(), &newPC)) {
+ set_pc((Instruction*)newPC);
+ return;
+ }
+ DoUnreachable(instr);
+ }
+
+ switch (instr->Mask(ExceptionMask)) {
+ case BRK: {
+ int lowbit = ImmException_offset;
+ int highbit = ImmException_offset + ImmException_width - 1;
+ HostBreakpoint(instr->Bits(highbit, lowbit));
+ break;
+ }
+ case HLT:
+ switch (instr->ImmException()) {
+ case kTraceOpcode:
+ DoTrace(instr);
+ return;
+ case kLogOpcode:
+ DoLog(instr);
+ return;
+ case kPrintfOpcode:
+ DoPrintf(instr);
+ return;
+ default:
+ HostBreakpoint();
+ return;
+ }
+ case SVC:
+ // The SVC instruction is hijacked by the JIT as a pseudo-instruction
+ // causing the Simulator to execute host-native code for callWithABI.
+ switch (instr->ImmException()) {
+ case kCallRtRedirected:
+ VisitCallRedirection(instr);
+ return;
+ case kMarkStackPointer: {
+ js::AutoEnterOOMUnsafeRegion oomUnsafe;
+ if (!spStack_.append(get_sp()))
+ oomUnsafe.crash("tracking stack for ARM64 simulator");
+ return;
+ }
+ case kCheckStackPointer: {
+ DebugOnly<int64_t> current = get_sp();
+ DebugOnly<int64_t> expected = spStack_.popCopy();
+ VIXL_ASSERT(current == expected);
+ return;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::setGPR32Result(int32_t result) {
+ set_wreg(0, result);
+}
+
+
+void Simulator::setGPR64Result(int64_t result) {
+ set_xreg(0, result);
+}
+
+
+void Simulator::setFP32Result(float result) {
+ set_sreg(0, result);
+}
+
+
+void Simulator::setFP64Result(double result) {
+ set_dreg(0, result);
+}
+
+
+typedef int64_t (*Prototype_General0)();
+typedef int64_t (*Prototype_General1)(int64_t arg0);
+typedef int64_t (*Prototype_General2)(int64_t arg0, int64_t arg1);
+typedef int64_t (*Prototype_General3)(int64_t arg0, int64_t arg1, int64_t arg2);
+typedef int64_t (*Prototype_General4)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3);
+typedef int64_t (*Prototype_General5)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3,
+ int64_t arg4);
+typedef int64_t (*Prototype_General6)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3,
+ int64_t arg4, int64_t arg5);
+typedef int64_t (*Prototype_General7)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3,
+ int64_t arg4, int64_t arg5, int64_t arg6);
+typedef int64_t (*Prototype_General8)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3,
+ int64_t arg4, int64_t arg5, int64_t arg6, int64_t arg7);
+typedef int64_t (*Prototype_GeneralGeneralGeneralInt64)(int64_t arg0, int64_t arg1, int64_t arg2,
+ int64_t arg3);
+typedef int64_t (*Prototype_GeneralGeneralInt64Int64)(int64_t arg0, int64_t arg1, int64_t arg2,
+ int64_t arg3);
+
+typedef int64_t (*Prototype_Int_Double)(double arg0);
+typedef int64_t (*Prototype_Int_IntDouble)(int64_t arg0, double arg1);
+typedef int64_t (*Prototype_Int_DoubleInt)(double arg0, int64_t arg1);
+typedef int64_t (*Prototype_Int_DoubleIntInt)(double arg0, uint64_t arg1, uint64_t arg2);
+typedef int64_t (*Prototype_Int_IntDoubleIntInt)(uint64_t arg0, double arg1,
+ uint64_t arg2, uint64_t arg3);
+
+typedef float (*Prototype_Float32_Float32)(float arg0);
+typedef int64_t (*Prototype_Int_Float32)(float arg0);
+typedef float (*Prototype_Float32_Float32Float32)(float arg0, float arg1);
+
+typedef double (*Prototype_Double_None)();
+typedef double (*Prototype_Double_Double)(double arg0);
+typedef double (*Prototype_Double_Int)(int64_t arg0);
+typedef double (*Prototype_Double_DoubleInt)(double arg0, int64_t arg1);
+typedef double (*Prototype_Double_IntDouble)(int64_t arg0, double arg1);
+typedef double (*Prototype_Double_DoubleDouble)(double arg0, double arg1);
+typedef double (*Prototype_Double_DoubleDoubleDouble)(double arg0, double arg1, double arg2);
+typedef double (*Prototype_Double_DoubleDoubleDoubleDouble)(double arg0, double arg1,
+ double arg2, double arg3);
+
+typedef int32_t (*Prototype_Int32_General)(int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32)(int64_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32)(int64_t, int32_t, int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32)(int64_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32)(int64_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32General)(int64_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General)(int64_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General)(int64_t,
+ int32_t,
+ float,
+ float,
+ int32_t,
+ int32_t,
+ int32_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General)(int64_t,
+ int32_t,
+ float,
+ float,
+ float,
+ float,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General)(int64_t,
+ int32_t,
+ float,
+ float,
+ int32_t,
+ float,
+ float,
+ int32_t,
+ float,
+ int32_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32General)(int64_t,
+ int32_t,
+ int32_t,
+ int32_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int64)(int64_t,
+ int32_t,
+ int32_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int32General)(int64_t,
+ int32_t,
+ int32_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32Int64Int64)(int64_t,
+ int32_t,
+ int64_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32)(int64_t,
+ int32_t,
+ int64_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32Int32)(int64_t,
+ int32_t,
+ int64_t,
+ int32_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneral)(int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralGeneral)(int64_t,
+ int64_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32Int32)(int64_t,
+ int64_t,
+ int32_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int32Int32)(int64_t, int64_t,
+ int32_t, int32_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32)(int64_t, int64_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64)(int64_t, int64_t,
+ int32_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64General)(int64_t, int64_t,
+ int32_t, int64_t,
+ int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64)(int64_t, int64_t,
+ int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64General)(int64_t, int64_t,
+ int64_t, int64_t);
+typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64General)(int64_t, int64_t,
+ int64_t, int64_t,
+ int64_t);
+typedef int64_t (*Prototype_General_GeneralInt32)(int64_t, int32_t);
+typedef int64_t (*Prototype_General_GeneralInt32Int32)(int64_t,
+ int32_t,
+ int32_t);
+typedef int64_t (*Prototype_General_GeneralInt32General)(int64_t,
+ int32_t,
+ int64_t);
+typedef int64_t (*Prototype_General_GeneralInt32Int32GeneralInt32)(int64_t,
+ int32_t,
+ int32_t,
+ int64_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32)(
+ int64_t,
+ int64_t,
+ int32_t,
+ int64_t,
+ int32_t,
+ int32_t,
+ int32_t);
+typedef int32_t (*Prototype_Int32_GeneralGeneralInt32General)(int64_t,
+ int64_t,
+ int32_t,
+ int64_t);
+typedef int64_t (*Prototype_Int64_General)(int64_t);
+typedef int64_t (*Prototype_Int64_GeneralInt64)(int64_t, int64_t);
+
+// Simulator support for callWithABI().
+void
+Simulator::VisitCallRedirection(const Instruction* instr)
+{
+ VIXL_ASSERT(instr->Mask(ExceptionMask) == SVC);
+ VIXL_ASSERT(instr->ImmException() == kCallRtRedirected);
+
+ const Redirection* redir = Redirection::FromSvcInstruction(instr);
+ uintptr_t nativeFn = reinterpret_cast<uintptr_t>(redir->nativeFunction());
+
+ // Stack must be aligned prior to the call.
+ // FIXME: It's actually our job to perform the alignment...
+ //VIXL_ASSERT((xreg(31, Reg31IsStackPointer) & (StackAlignment - 1)) == 0);
+
+ // Used to assert that callee-saved registers are preserved.
+ DebugOnly<int64_t> x19 = xreg(19);
+ DebugOnly<int64_t> x20 = xreg(20);
+ DebugOnly<int64_t> x21 = xreg(21);
+ DebugOnly<int64_t> x22 = xreg(22);
+ DebugOnly<int64_t> x23 = xreg(23);
+ DebugOnly<int64_t> x24 = xreg(24);
+ DebugOnly<int64_t> x25 = xreg(25);
+ DebugOnly<int64_t> x26 = xreg(26);
+ DebugOnly<int64_t> x27 = xreg(27);
+ DebugOnly<int64_t> x28 = xreg(28);
+ DebugOnly<int64_t> x29 = xreg(29);
+ DebugOnly<int64_t> savedSP = get_sp();
+
+ // Remember LR for returning from the "call".
+ int64_t savedLR = xreg(30);
+
+ // Allow recursive Simulator calls: returning from the call must stop
+ // the simulation and transition back to native Simulator code.
+ set_xreg(30, int64_t(kEndOfSimAddress));
+
+ // Store argument register values in local variables for ease of use below.
+ int64_t x0 = xreg(0);
+ int64_t x1 = xreg(1);
+ int64_t x2 = xreg(2);
+ int64_t x3 = xreg(3);
+ int64_t x4 = xreg(4);
+ int64_t x5 = xreg(5);
+ int64_t x6 = xreg(6);
+ int64_t x7 = xreg(7);
+ int64_t x8 = xreg(8);
+ double d0 = dreg(0);
+ double d1 = dreg(1);
+ double d2 = dreg(2);
+ double d3 = dreg(3);
+ float s0 = sreg(0);
+ float s1 = sreg(1);
+ float s2 = sreg(2);
+ float s3 = sreg(3);
+ float s4 = sreg(4);
+
+ // Dispatch the call and set the return value.
+ switch (redir->type()) {
+ // Cases with int64_t return type.
+ case js::jit::Args_General0: {
+ int64_t ret = reinterpret_cast<Prototype_General0>(nativeFn)();
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General1: {
+ int64_t ret = reinterpret_cast<Prototype_General1>(nativeFn)(x0);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General2: {
+ int64_t ret = reinterpret_cast<Prototype_General2>(nativeFn)(x0, x1);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General3: {
+ int64_t ret = reinterpret_cast<Prototype_General3>(nativeFn)(x0, x1, x2);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General4: {
+ int64_t ret = reinterpret_cast<Prototype_General4>(nativeFn)(x0, x1, x2, x3);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General5: {
+ int64_t ret = reinterpret_cast<Prototype_General5>(nativeFn)(x0, x1, x2, x3, x4);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General6: {
+ int64_t ret = reinterpret_cast<Prototype_General6>(nativeFn)(x0, x1, x2, x3, x4, x5);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General7: {
+ int64_t ret = reinterpret_cast<Prototype_General7>(nativeFn)(x0, x1, x2, x3, x4, x5, x6);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General8: {
+ int64_t ret = reinterpret_cast<Prototype_General8>(nativeFn)(x0, x1, x2, x3, x4, x5, x6, x7);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_Int_GeneralGeneralGeneralInt64: {
+ int64_t ret = reinterpret_cast<Prototype_GeneralGeneralGeneralInt64>(nativeFn)(x0, x1, x2, x3);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_Int_GeneralGeneralInt64Int64: {
+ int64_t ret = reinterpret_cast<Prototype_GeneralGeneralInt64Int64>(nativeFn)(x0, x1, x2, x3);
+ setGPR64Result(ret);
+ break;
+ }
+
+ // Cases with GPR return type. This can be int32 or int64, but int64 is a safer assumption.
+ case js::jit::Args_Int_Double: {
+ int64_t ret = reinterpret_cast<Prototype_Int_Double>(nativeFn)(d0);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_Int_IntDouble: {
+ int64_t ret = reinterpret_cast<Prototype_Int_IntDouble>(nativeFn)(x0, d0);
+ setGPR64Result(ret);
+ break;
+ }
+
+ case js::jit::Args_Int_DoubleInt: {
+ int64_t ret = reinterpret_cast<Prototype_Int_DoubleInt>(nativeFn)(d0, x0);
+ setGPR64Result(ret);
+ break;
+ }
+
+ case js::jit::Args_Int_IntDoubleIntInt: {
+ int64_t ret = reinterpret_cast<Prototype_Int_IntDoubleIntInt>(nativeFn)(x0, d0, x1, x2);
+ setGPR64Result(ret);
+ break;
+ }
+
+ case js::jit::Args_Int_DoubleIntInt: {
+ int64_t ret = reinterpret_cast<Prototype_Int_DoubleIntInt>(nativeFn)(d0, x0, x1);
+ setGPR64Result(ret);
+ break;
+ }
+
+ // Cases with float return type.
+ case js::jit::Args_Float32_Float32: {
+ float ret = reinterpret_cast<Prototype_Float32_Float32>(nativeFn)(s0);
+ setFP32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int_Float32: {
+ int64_t ret = reinterpret_cast<Prototype_Int_Float32>(nativeFn)(s0);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_Float32_Float32Float32: {
+ float ret = reinterpret_cast<Prototype_Float32_Float32Float32>(nativeFn)(s0, s1);
+ setFP32Result(ret);
+ break;
+ }
+
+ // Cases with double return type.
+ case js::jit::Args_Double_None: {
+ double ret = reinterpret_cast<Prototype_Double_None>(nativeFn)();
+ setFP64Result(ret);
+ break;
+ }
+ case js::jit::Args_Double_Double: {
+ double ret = reinterpret_cast<Prototype_Double_Double>(nativeFn)(d0);
+ setFP64Result(ret);
+ break;
+ }
+ case js::jit::Args_Double_Int: {
+ double ret = reinterpret_cast<Prototype_Double_Int>(nativeFn)(x0);
+ setFP64Result(ret);
+ break;
+ }
+ case js::jit::Args_Double_DoubleInt: {
+ double ret = reinterpret_cast<Prototype_Double_DoubleInt>(nativeFn)(d0, x0);
+ setFP64Result(ret);
+ break;
+ }
+ case js::jit::Args_Double_DoubleDouble: {
+ double ret = reinterpret_cast<Prototype_Double_DoubleDouble>(nativeFn)(d0, d1);
+ setFP64Result(ret);
+ break;
+ }
+ case js::jit::Args_Double_DoubleDoubleDouble: {
+ double ret = reinterpret_cast<Prototype_Double_DoubleDoubleDouble>(nativeFn)(d0, d1, d2);
+ setFP64Result(ret);
+ break;
+ }
+ case js::jit::Args_Double_DoubleDoubleDoubleDouble: {
+ double ret = reinterpret_cast<Prototype_Double_DoubleDoubleDoubleDouble>(nativeFn)(d0, d1, d2, d3);
+ setFP64Result(ret);
+ break;
+ }
+
+ case js::jit::Args_Double_IntDouble: {
+ double ret = reinterpret_cast<Prototype_Double_IntDouble>(nativeFn)(x0, d0);
+ setFP64Result(ret);
+ break;
+ }
+
+ case js::jit::Args_Int32_General: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_General>(nativeFn)(x0);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32>(nativeFn)(x0, x1);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int32: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32>(
+ nativeFn)(x0, x1, x2);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int32Int32Int32: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32>(
+ nativeFn)(x0, x1, x2, x3, x4);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int32Int32Int32Int32: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32Int32>(
+ nativeFn)(x0, x1, x2, x3, x4, x5);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int32Int32Int32General: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32General>(
+ nativeFn)(x0, x1, x2, x3, x4, x5);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int32Int32Int32Int32Int32General: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General>(
+ nativeFn)(x0, x1, x2, x3, x4, x5, x6, x7);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General>(
+ nativeFn)(x0, x1, s0, s1, x2, x3, x4, x5);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General>(
+ nativeFn)(x0, x1, s0, s1, s2, s3, x2, x3, x4, x5, x6);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General>(
+ nativeFn)(x0, x1, s0, s1, x2, s2, s3, x3, s4, x4, x5, x6, x7, x8);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int32Int32General: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32General>(
+ nativeFn)(x0, x1, x2, x3, x4);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int32Int64: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int64>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int32General: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32General>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32Int64Int64: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int64Int64>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32GeneralInt32: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt32GeneralInt32Int32: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32Int32>(
+ nativeFn)(x0, x1, x2, x3, x4);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralGeneral: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralGeneral>(nativeFn)(x0, x1);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralGeneralGeneral: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralGeneralGeneral>(
+ nativeFn)(x0, x1, x2);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralGeneralInt32Int32: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralGeneralInt32Int32>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt64Int32Int32Int32: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int32Int32>(
+ nativeFn)(x0, x1, x2, x3, x4);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt64Int32: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32>(
+ nativeFn)(x0, x1, x2);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt64Int32Int64: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt64Int32Int64General: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64General>(
+ nativeFn)(x0, x1, x2, x3, x4);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt64Int64Int64: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt64Int64General: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64General>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralInt64Int64Int64General: {
+ int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64General>(
+ nativeFn)(x0, x1, x2, x3, x4);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_General_GeneralInt32: {
+ int64_t ret =
+ reinterpret_cast<Prototype_General_GeneralInt32>(nativeFn)(x0, x1);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General_GeneralInt32Int32: {
+ int64_t ret = reinterpret_cast<Prototype_General_GeneralInt32Int32>(
+ nativeFn)(x0, x1, x2);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General_GeneralInt32General: {
+ int64_t ret =
+ reinterpret_cast<Prototype_General_GeneralInt32General>(
+ nativeFn)(x0, x1, x2);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_General_GeneralInt32Int32GeneralInt32: {
+ int64_t ret =
+ reinterpret_cast<Prototype_General_GeneralInt32Int32GeneralInt32>(
+ nativeFn)(x0, x1, x2, x3, x4);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralGeneralInt32GeneralInt32Int32Int32: {
+ int32_t ret = reinterpret_cast<
+ Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32>(nativeFn)(
+ x0, x1, x2, x3, x4, x5, x6);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int32_GeneralGeneralInt32General: {
+ int32_t ret =
+ reinterpret_cast<Prototype_Int32_GeneralGeneralInt32General>(
+ nativeFn)(x0, x1, x2, x3);
+ setGPR32Result(ret);
+ break;
+ }
+ case js::jit::Args_Int64_General: {
+ int64_t ret =
+ reinterpret_cast<Prototype_Int64_General>(
+ nativeFn)(x0);
+ setGPR64Result(ret);
+ break;
+ }
+ case js::jit::Args_Int64_GeneralInt64: {
+ int64_t ret =
+ reinterpret_cast<Prototype_Int64_GeneralInt64>(
+ nativeFn)(x0, x1);
+ setGPR64Result(ret);
+ break;
+ }
+
+ default:
+ MOZ_CRASH("Unknown function type.");
+ }
+
+ // Nuke the volatile registers. x0-x7 are used as result registers, but except
+ // for x0, none are used in the above signatures.
+ for (int i = 1; i <= 18; i++) {
+ // Code feed 1 bad data
+ set_xreg(i, int64_t(0xc0defeed1badda7a));
+ }
+
+ // Assert that callee-saved registers are unchanged.
+ VIXL_ASSERT(xreg(19) == x19);
+ VIXL_ASSERT(xreg(20) == x20);
+ VIXL_ASSERT(xreg(21) == x21);
+ VIXL_ASSERT(xreg(22) == x22);
+ VIXL_ASSERT(xreg(23) == x23);
+ VIXL_ASSERT(xreg(24) == x24);
+ VIXL_ASSERT(xreg(25) == x25);
+ VIXL_ASSERT(xreg(26) == x26);
+ VIXL_ASSERT(xreg(27) == x27);
+ VIXL_ASSERT(xreg(28) == x28);
+ VIXL_ASSERT(xreg(29) == x29);
+
+ // Assert that the stack is unchanged.
+ VIXL_ASSERT(savedSP == get_sp());
+
+ // Simulate a return.
+ set_lr(savedLR);
+ set_pc((Instruction*)savedLR);
+ if (getenv("USE_DEBUGGER"))
+ printf("SVCRET\n");
+}
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+void
+Simulator::FlushICache()
+{
+ // Flush the caches recorded by the current thread as well as what got
+ // recorded from other threads before this call.
+ auto& vec = SimulatorProcess::getICacheFlushes(this);
+ for (auto& flush : vec) {
+ decoder_->FlushICache(flush.start, flush.length);
+ }
+ vec.clear();
+ pendingCacheRequests = false;
+}
+
+void CachingDecoder::Decode(const Instruction* instr) {
+ InstDecodedKind state;
+ if (lastPage_ && lastPage_->contains(instr)) {
+ state = lastPage_->decode(instr);
+ } else {
+ uintptr_t key = SinglePageDecodeCache::PageStart(instr);
+ ICacheMap::AddPtr p = iCache_.lookupForAdd(key);
+ if (p) {
+ lastPage_ = p->value();
+ state = lastPage_->decode(instr);
+ } else {
+ js::AutoEnterOOMUnsafeRegion oomUnsafe;
+ SinglePageDecodeCache* newPage = js_new<SinglePageDecodeCache>(instr);
+ if (!newPage || !iCache_.add(p, key, newPage)) {
+ oomUnsafe.crash("Simulator SinglePageDecodeCache");
+ }
+ lastPage_ = newPage;
+ state = InstDecodedKind::NotDecodedYet;
+ }
+ }
+
+ switch (state) {
+ case InstDecodedKind::NotDecodedYet: {
+ cachingDecoder_.setDecodePtr(lastPage_->decodePtr(instr));
+ this->Decoder::Decode(instr);
+ break;
+ }
+#define CASE(A) \
+ case InstDecodedKind::A: { \
+ Visit##A(instr); \
+ break; \
+ }
+
+ VISITOR_LIST(CASE)
+#undef CASE
+ }
+}
+
+void CachingDecoder::FlushICache(void* start, size_t size) {
+ MOZ_ASSERT(uintptr_t(start) % vixl::kInstructionSize == 0);
+ MOZ_ASSERT(size % vixl::kInstructionSize == 0);
+ const uint8_t* it = reinterpret_cast<const uint8_t*>(start);
+ const uint8_t* end = it + size;
+ SinglePageDecodeCache* last = nullptr;
+ for (; it < end; it += vixl::kInstructionSize) {
+ auto instr = reinterpret_cast<const Instruction*>(it);
+ if (last && last->contains(instr)) {
+ last->clearDecode(instr);
+ } else {
+ uintptr_t key = SinglePageDecodeCache::PageStart(instr);
+ ICacheMap::Ptr p = iCache_.lookup(key);
+ if (p) {
+ last = p->value();
+ last->clearDecode(instr);
+ }
+ }
+ }
+}
+#endif
+
+} // namespace vixl
+
+namespace js {
+namespace jit {
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+void SimulatorProcess::recordICacheFlush(void* start, size_t length) {
+ singleton_->lock_.assertOwnedByCurrentThread();
+ AutoEnterOOMUnsafeRegion oomUnsafe;
+ ICacheFlush range{start, length};
+ for (auto& s : singleton_->pendingFlushes_) {
+ if (!s.records.append(range)) {
+ oomUnsafe.crash("Simulator recordFlushICache");
+ }
+ }
+}
+
+void SimulatorProcess::membarrier() {
+ singleton_->lock_.assertOwnedByCurrentThread();
+ for (auto& s : singleton_->pendingFlushes_) {
+ s.thread->pendingCacheRequests = true;
+ }
+}
+
+SimulatorProcess::ICacheFlushes& SimulatorProcess::getICacheFlushes(Simulator* sim) {
+ singleton_->lock_.assertOwnedByCurrentThread();
+ for (auto& s : singleton_->pendingFlushes_) {
+ if (s.thread == sim) {
+ return s.records;
+ }
+ }
+ MOZ_CRASH("Simulator is not registered in the SimulatorProcess");
+}
+
+bool SimulatorProcess::registerSimulator(Simulator* sim) {
+ singleton_->lock_.assertOwnedByCurrentThread();
+ ICacheFlushes empty;
+ SimFlushes simFlushes{sim, std::move(empty)};
+ return singleton_->pendingFlushes_.append(std::move(simFlushes));
+}
+
+void SimulatorProcess::unregisterSimulator(Simulator* sim) {
+ singleton_->lock_.assertOwnedByCurrentThread();
+ for (auto& s : singleton_->pendingFlushes_) {
+ if (s.thread == sim) {
+ singleton_->pendingFlushes_.erase(&s);
+ return;
+ }
+ }
+ MOZ_CRASH("Simulator is not registered in the SimulatorProcess");
+}
+#endif // !JS_CACHE_SIMULATOR_ARM64
+
+} // namespace jit
+} // namespace js
+
+vixl::Simulator* JSContext::simulator() const {
+ return simulator_;
+}
diff --git a/js/src/jit/arm64/vixl/Platform-vixl.h b/js/src/jit/arm64/vixl/Platform-vixl.h
new file mode 100644
index 0000000000..a4de54c785
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Platform-vixl.h
@@ -0,0 +1,39 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_PLATFORM_H
+#define VIXL_PLATFORM_H
+
+// Define platform specific functionalities.
+#include <signal.h>
+
+#include "jstypes.h"
+
+namespace vixl {
+inline void HostBreakpoint(int64_t code = 0) { raise(SIGINT); }
+} // namespace vixl
+
+#endif
diff --git a/js/src/jit/arm64/vixl/README.md b/js/src/jit/arm64/vixl/README.md
new file mode 100644
index 0000000000..7111753279
--- /dev/null
+++ b/js/src/jit/arm64/vixl/README.md
@@ -0,0 +1,7 @@
+This directory is a mix of VIXL files for ARM64, and files added to integrate
+VIXL within SpiderMonkey MacroAssembler. Many of SpiderMonkey extension would be
+in files prefixed with Moz*, but some might be spread across imported files when
+convenient.
+
+VIXL upstream sources can be found at:
+https://git.linaro.org/arm/vixl.git/about/
diff --git a/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h b/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h
new file mode 100644
index 0000000000..4b9064a89b
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h
@@ -0,0 +1,140 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_SIMULATOR_CONSTANTS_A64_H_
+#define VIXL_A64_SIMULATOR_CONSTANTS_A64_H_
+
+namespace vixl {
+
+// Debug instructions.
+//
+// VIXL's macro-assembler and simulator support a few pseudo instructions to
+// make debugging easier. These pseudo instructions do not exist on real
+// hardware.
+//
+// TODO: Also consider allowing these pseudo-instructions to be disabled in the
+// simulator, so that users can check that the input is a valid native code.
+// (This isn't possible in all cases. Printf won't work, for example.)
+//
+// Each debug pseudo instruction is represented by a HLT instruction. The HLT
+// immediate field is used to identify the type of debug pseudo instruction.
+
+enum DebugHltOpcodes {
+ kPrintfOpcode,
+ kTraceOpcode,
+ kLogOpcode,
+ // Aliases.
+ kDebugHltFirstOpcode = kPrintfOpcode,
+ kDebugHltLastOpcode = kLogOpcode
+};
+
+// Each pseudo instruction uses a custom encoding for additional arguments, as
+// described below.
+
+// Unreachable - kUnreachableOpcode
+//
+// Instruction which should never be executed. This is used as a guard in parts
+// of the code that should not be reachable, such as in data encoded inline in
+// the instructions.
+
+// Printf - kPrintfOpcode
+// - arg_count: The number of arguments.
+// - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields.
+//
+// Simulate a call to printf.
+//
+// Floating-point and integer arguments are passed in separate sets of registers
+// in AAPCS64 (even for varargs functions), so it is not possible to determine
+// the type of each argument without some information about the values that were
+// passed in. This information could be retrieved from the printf format string,
+// but the format string is not trivial to parse so we encode the relevant
+// information with the HLT instruction.
+//
+// Also, the following registers are populated (as if for a native A64 call):
+// x0: The format string
+// x1-x7: Optional arguments, if type == CPURegister::kRegister
+// d0-d7: Optional arguments, if type == CPURegister::kFPRegister
+const unsigned kPrintfArgCountOffset = 1 * kInstructionSize;
+const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize;
+const unsigned kPrintfLength = 3 * kInstructionSize;
+
+const unsigned kPrintfMaxArgCount = 4;
+
+// The argument pattern is a set of two-bit-fields, each with one of the
+// following values:
+enum PrintfArgPattern {
+ kPrintfArgW = 1,
+ kPrintfArgX = 2,
+ // There is no kPrintfArgS because floats are always converted to doubles in C
+ // varargs calls.
+ kPrintfArgD = 3
+};
+static const unsigned kPrintfArgPatternBits = 2;
+
+// Trace - kTraceOpcode
+// - parameter: TraceParameter stored as a uint32_t
+// - command: TraceCommand stored as a uint32_t
+//
+// Allow for trace management in the generated code. This enables or disables
+// automatic tracing of the specified information for every simulated
+// instruction.
+const unsigned kTraceParamsOffset = 1 * kInstructionSize;
+const unsigned kTraceCommandOffset = 2 * kInstructionSize;
+const unsigned kTraceLength = 3 * kInstructionSize;
+
+// Trace parameters.
+enum TraceParameters {
+ LOG_DISASM = 1 << 0, // Log disassembly.
+ LOG_REGS = 1 << 1, // Log general purpose registers.
+ LOG_VREGS = 1 << 2, // Log NEON and floating-point registers.
+ LOG_SYSREGS = 1 << 3, // Log the flags and system registers.
+ LOG_WRITE = 1 << 4, // Log writes to memory.
+
+ LOG_NONE = 0,
+ LOG_STATE = LOG_REGS | LOG_VREGS | LOG_SYSREGS,
+ LOG_ALL = LOG_DISASM | LOG_STATE | LOG_WRITE
+};
+
+// Trace commands.
+enum TraceCommand {
+ TRACE_ENABLE = 1,
+ TRACE_DISABLE = 2
+};
+
+// Log - kLogOpcode
+// - parameter: TraceParameter stored as a uint32_t
+//
+// Print the specified information once. This mechanism is separate from Trace.
+// In particular, _all_ of the specified registers are printed, rather than just
+// the registers that the instruction writes.
+//
+// Any combination of the TraceParameters values can be used, except that
+// LOG_DISASM is not supported for Log.
+const unsigned kLogParamsOffset = 1 * kInstructionSize;
+const unsigned kLogLength = 2 * kInstructionSize;
+} // namespace vixl
+
+#endif // VIXL_A64_SIMULATOR_CONSTANTS_A64_H_
diff --git a/js/src/jit/arm64/vixl/Simulator-vixl.cpp b/js/src/jit/arm64/vixl/Simulator-vixl.cpp
new file mode 100644
index 0000000000..71e1a31d46
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Simulator-vixl.cpp
@@ -0,0 +1,4371 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jstypes.h"
+
+#ifdef JS_SIMULATOR_ARM64
+
+#include "jit/arm64/vixl/Simulator-vixl.h"
+
+#include <cmath>
+#include <string.h>
+
+#include "jit/AtomicOperations.h"
+
+namespace vixl {
+
+const Instruction* Simulator::kEndOfSimAddress = NULL;
+
+void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
+ int width = msb - lsb + 1;
+ VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
+
+ bits <<= lsb;
+ uint32_t mask = ((1 << width) - 1) << lsb;
+ VIXL_ASSERT((mask & write_ignore_mask_) == 0);
+
+ value_ = (value_ & ~mask) | (bits & mask);
+}
+
+
+SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
+ switch (id) {
+ case NZCV:
+ return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
+ case FPCR:
+ return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
+ default:
+ VIXL_UNREACHABLE();
+ return SimSystemRegister();
+ }
+}
+
+
+void Simulator::Run() {
+ pc_modified_ = false;
+ while (pc_ != kEndOfSimAddress) {
+ ExecuteInstruction();
+ LogAllWrittenRegisters();
+ }
+}
+
+
+void Simulator::RunFrom(const Instruction* first) {
+ set_pc(first);
+ Run();
+}
+
+
+const char* Simulator::xreg_names[] = {
+"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+"x24", "x25", "x26", "x27", "x28", "x29", "lr", "xzr", "sp"};
+
+const char* Simulator::wreg_names[] = {
+"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
+"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
+"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
+"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr", "wsp"};
+
+const char* Simulator::sreg_names[] = {
+"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+"s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
+"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"};
+
+const char* Simulator::dreg_names[] = {
+"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
+
+const char* Simulator::vreg_names[] = {
+"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"};
+
+
+
+const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
+ VIXL_ASSERT(code < kNumberOfRegisters);
+ // If the code represents the stack pointer, index the name after zr.
+ if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
+ code = kZeroRegCode + 1;
+ }
+ return wreg_names[code];
+}
+
+
+const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
+ VIXL_ASSERT(code < kNumberOfRegisters);
+ // If the code represents the stack pointer, index the name after zr.
+ if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
+ code = kZeroRegCode + 1;
+ }
+ return xreg_names[code];
+}
+
+
+const char* Simulator::SRegNameForCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfFPRegisters);
+ return sreg_names[code];
+}
+
+
+const char* Simulator::DRegNameForCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfFPRegisters);
+ return dreg_names[code];
+}
+
+
+const char* Simulator::VRegNameForCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ return vreg_names[code];
+}
+
+
+#define COLOUR(colour_code) "\033[0;" colour_code "m"
+#define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m"
+#define NORMAL ""
+#define GREY "30"
+#define RED "31"
+#define GREEN "32"
+#define YELLOW "33"
+#define BLUE "34"
+#define MAGENTA "35"
+#define CYAN "36"
+#define WHITE "37"
+void Simulator::set_coloured_trace(bool value) {
+ coloured_trace_ = value;
+
+ clr_normal = value ? COLOUR(NORMAL) : "";
+ clr_flag_name = value ? COLOUR_BOLD(WHITE) : "";
+ clr_flag_value = value ? COLOUR(NORMAL) : "";
+ clr_reg_name = value ? COLOUR_BOLD(CYAN) : "";
+ clr_reg_value = value ? COLOUR(CYAN) : "";
+ clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : "";
+ clr_vreg_value = value ? COLOUR(MAGENTA) : "";
+ clr_memory_address = value ? COLOUR_BOLD(BLUE) : "";
+ clr_warning = value ? COLOUR_BOLD(YELLOW) : "";
+ clr_warning_message = value ? COLOUR(YELLOW) : "";
+ clr_printf = value ? COLOUR(GREEN) : "";
+}
+#undef COLOUR
+#undef COLOUR_BOLD
+#undef NORMAL
+#undef GREY
+#undef RED
+#undef GREEN
+#undef YELLOW
+#undef BLUE
+#undef MAGENTA
+#undef CYAN
+#undef WHITE
+
+
+void Simulator::set_trace_parameters(int parameters) {
+ bool disasm_before = trace_parameters_ & LOG_DISASM;
+ trace_parameters_ = parameters;
+ bool disasm_after = trace_parameters_ & LOG_DISASM;
+
+ if (disasm_before != disasm_after) {
+ if (disasm_after) {
+ decoder_->InsertVisitorBefore(print_disasm_, this);
+ } else {
+ decoder_->RemoveVisitor(print_disasm_);
+ }
+ }
+}
+
+
+void Simulator::set_instruction_stats(bool value) {
+ if (instrumentation_ == nullptr) {
+ return;
+ }
+
+ if (value != instruction_stats_) {
+ if (value) {
+ decoder_->AppendVisitor(instrumentation_);
+ } else {
+ decoder_->RemoveVisitor(instrumentation_);
+ }
+ instruction_stats_ = value;
+ }
+}
+
+// Helpers ---------------------------------------------------------------------
+uint64_t Simulator::AddWithCarry(unsigned reg_size,
+ bool set_flags,
+ uint64_t left,
+ uint64_t right,
+ int carry_in) {
+ VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
+ VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
+
+ uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
+ uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
+ uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
+
+ left &= reg_mask;
+ right &= reg_mask;
+ uint64_t result = (left + right + carry_in) & reg_mask;
+
+ if (set_flags) {
+ nzcv().SetN(CalcNFlag(result, reg_size));
+ nzcv().SetZ(CalcZFlag(result));
+
+ // Compute the C flag by comparing the result to the max unsigned integer.
+ uint64_t max_uint_2op = max_uint - carry_in;
+ bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
+ nzcv().SetC(C ? 1 : 0);
+
+ // Overflow iff the sign bit is the same for the two inputs and different
+ // for the result.
+ uint64_t left_sign = left & sign_mask;
+ uint64_t right_sign = right & sign_mask;
+ uint64_t result_sign = result & sign_mask;
+ bool V = (left_sign == right_sign) && (left_sign != result_sign);
+ nzcv().SetV(V ? 1 : 0);
+
+ LogSystemRegister(NZCV);
+ }
+ return result;
+}
+
+
+int64_t Simulator::ShiftOperand(unsigned reg_size,
+ int64_t value,
+ Shift shift_type,
+ unsigned amount) {
+ if (amount == 0) {
+ return value;
+ }
+ int64_t mask = reg_size == kXRegSize ? kXRegMask : kWRegMask;
+ switch (shift_type) {
+ case LSL:
+ return (value << amount) & mask;
+ case LSR:
+ return static_cast<uint64_t>(value) >> amount;
+ case ASR: {
+ // Shift used to restore the sign.
+ unsigned s_shift = kXRegSize - reg_size;
+ // Value with its sign restored.
+ int64_t s_value = (value << s_shift) >> s_shift;
+ return (s_value >> amount) & mask;
+ }
+ case ROR: {
+ if (reg_size == kWRegSize) {
+ value &= kWRegMask;
+ }
+ return (static_cast<uint64_t>(value) >> amount) |
+ ((value & ((INT64_C(1) << amount) - 1)) <<
+ (reg_size - amount));
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ return 0;
+ }
+}
+
+
+int64_t Simulator::ExtendValue(unsigned reg_size,
+ int64_t value,
+ Extend extend_type,
+ unsigned left_shift) {
+ switch (extend_type) {
+ case UXTB:
+ value &= kByteMask;
+ break;
+ case UXTH:
+ value &= kHalfWordMask;
+ break;
+ case UXTW:
+ value &= kWordMask;
+ break;
+ case SXTB:
+ value = (value << 56) >> 56;
+ break;
+ case SXTH:
+ value = (value << 48) >> 48;
+ break;
+ case SXTW:
+ value = (value << 32) >> 32;
+ break;
+ case UXTX:
+ case SXTX:
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ int64_t mask = (reg_size == kXRegSize) ? kXRegMask : kWRegMask;
+ return (value << left_shift) & mask;
+}
+
+
+void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
+ AssertSupportedFPCR();
+
+ // TODO: This assumes that the C++ implementation handles comparisons in the
+ // way that we expect (as per AssertSupportedFPCR()).
+ bool process_exception = false;
+ if ((std::isnan(val0) != 0) || (std::isnan(val1) != 0)) {
+ nzcv().SetRawValue(FPUnorderedFlag);
+ if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
+ (trap == EnableTrap)) {
+ process_exception = true;
+ }
+ } else if (val0 < val1) {
+ nzcv().SetRawValue(FPLessThanFlag);
+ } else if (val0 > val1) {
+ nzcv().SetRawValue(FPGreaterThanFlag);
+ } else if (val0 == val1) {
+ nzcv().SetRawValue(FPEqualFlag);
+ } else {
+ VIXL_UNREACHABLE();
+ }
+ LogSystemRegister(NZCV);
+ if (process_exception) FPProcessException();
+}
+
+
+Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
+ unsigned reg_size, unsigned lane_size) {
+ VIXL_ASSERT(reg_size >= lane_size);
+
+ uint32_t format = 0;
+ if (reg_size != lane_size) {
+ switch (reg_size) {
+ default: VIXL_UNREACHABLE(); break;
+ case kQRegSizeInBytes: format = kPrintRegAsQVector; break;
+ case kDRegSizeInBytes: format = kPrintRegAsDVector; break;
+ }
+ }
+
+ switch (lane_size) {
+ default: VIXL_UNREACHABLE(); break;
+ case kQRegSizeInBytes: format |= kPrintReg1Q; break;
+ case kDRegSizeInBytes: format |= kPrintReg1D; break;
+ case kSRegSizeInBytes: format |= kPrintReg1S; break;
+ case kHRegSizeInBytes: format |= kPrintReg1H; break;
+ case kBRegSizeInBytes: format |= kPrintReg1B; break;
+ }
+ // These sizes would be duplicate case labels.
+ VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
+ VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
+ VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
+ VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
+
+ return static_cast<PrintRegisterFormat>(format);
+}
+
+
+Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
+ VectorFormat vform) {
+ switch (vform) {
+ default: VIXL_UNREACHABLE(); return kPrintReg16B;
+ case kFormat16B: return kPrintReg16B;
+ case kFormat8B: return kPrintReg8B;
+ case kFormat8H: return kPrintReg8H;
+ case kFormat4H: return kPrintReg4H;
+ case kFormat4S: return kPrintReg4S;
+ case kFormat2S: return kPrintReg2S;
+ case kFormat2D: return kPrintReg2D;
+ case kFormat1D: return kPrintReg1D;
+ }
+}
+
+
+void Simulator::PrintWrittenRegisters() {
+ for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+ if (registers_[i].WrittenSinceLastLog()) PrintRegister(i);
+ }
+}
+
+
+void Simulator::PrintWrittenVRegisters() {
+ for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
+ // At this point there is no type information, so print as a raw 1Q.
+ if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q);
+ }
+}
+
+
+void Simulator::PrintSystemRegisters() {
+ PrintSystemRegister(NZCV);
+ PrintSystemRegister(FPCR);
+}
+
+
+void Simulator::PrintRegisters() {
+ for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+ PrintRegister(i);
+ }
+}
+
+
+void Simulator::PrintVRegisters() {
+ for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
+ // At this point there is no type information, so print as a raw 1Q.
+ PrintVRegister(i, kPrintReg1Q);
+ }
+}
+
+
+// Print a register's name and raw value.
+//
+// Only the least-significant `size_in_bytes` bytes of the register are printed,
+// but the value is aligned as if the whole register had been printed.
+//
+// For typical register updates, size_in_bytes should be set to kXRegSizeInBytes
+// -- the default -- so that the whole register is printed. Other values of
+// size_in_bytes are intended for use when the register hasn't actually been
+// updated (such as in PrintWrite).
+//
+// No newline is printed. This allows the caller to print more details (such as
+// a memory access annotation).
+void Simulator::PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
+ int size_in_bytes) {
+ // The template for all supported sizes.
+ // "# x{code}: 0xffeeddccbbaa9988"
+ // "# w{code}: 0xbbaa9988"
+ // "# w{code}<15:0>: 0x9988"
+ // "# w{code}<7:0>: 0x88"
+ unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2;
+
+ const char * name = "";
+ const char * suffix = "";
+ switch (size_in_bytes) {
+ case kXRegSizeInBytes: name = XRegNameForCode(code, r31mode); break;
+ case kWRegSizeInBytes: name = WRegNameForCode(code, r31mode); break;
+ case 2:
+ name = WRegNameForCode(code, r31mode);
+ suffix = "<15:0>";
+ padding_chars -= strlen(suffix);
+ break;
+ case 1:
+ name = WRegNameForCode(code, r31mode);
+ suffix = "<7:0>";
+ padding_chars -= strlen(suffix);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix);
+
+ // Print leading padding spaces.
+ VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2));
+ for (unsigned i = 0; i < padding_chars; i++) {
+ putc(' ', stream_);
+ }
+
+ // Print the specified bits in hexadecimal format.
+ uint64_t bits = reg<uint64_t>(code, r31mode);
+ bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8);
+ VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes);
+
+ int chars = size_in_bytes * 2;
+ fprintf(stream_, "%s0x%0*" PRIx64 "%s",
+ clr_reg_value, chars, bits, clr_normal);
+}
+
+
+void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) {
+ registers_[code].NotifyRegisterLogged();
+
+ // Don't print writes into xzr.
+ if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) {
+ return;
+ }
+
+ // The template for all x and w registers:
+ // "# x{code}: 0x{value}"
+ // "# w{code}: 0x{value}"
+
+ PrintRegisterRawHelper(code, r31mode);
+ fprintf(stream_, "\n");
+}
+
+
+// Print a register's name and raw value.
+//
+// The `bytes` and `lsb` arguments can be used to limit the bytes that are
+// printed. These arguments are intended for use in cases where register hasn't
+// actually been updated (such as in PrintVWrite).
+//
+// No newline is printed. This allows the caller to print more details (such as
+// a floating-point interpretation or a memory access annotation).
+void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) {
+ // The template for vector types:
+ // "# v{code}: 0xffeeddccbbaa99887766554433221100".
+ // An example with bytes=4 and lsb=8:
+ // "# v{code}: 0xbbaa9988 ".
+ fprintf(stream_, "# %s%5s: %s",
+ clr_vreg_name, VRegNameForCode(code), clr_vreg_value);
+
+ int msb = lsb + bytes - 1;
+ int byte = kQRegSizeInBytes - 1;
+
+ // Print leading padding spaces. (Two spaces per byte.)
+ while (byte > msb) {
+ fprintf(stream_, " ");
+ byte--;
+ }
+
+ // Print the specified part of the value, byte by byte.
+ qreg_t rawbits = qreg(code);
+ fprintf(stream_, "0x");
+ while (byte >= lsb) {
+ fprintf(stream_, "%02x", rawbits.val[byte]);
+ byte--;
+ }
+
+ // Print trailing padding spaces.
+ while (byte >= 0) {
+ fprintf(stream_, " ");
+ byte--;
+ }
+ fprintf(stream_, "%s", clr_normal);
+}
+
+
+// Print each of the specified lanes of a register as a float or double value.
+//
+// The `lane_count` and `lslane` arguments can be used to limit the lanes that
+// are printed. These arguments are intended for use in cases where register
+// hasn't actually been updated (such as in PrintVWrite).
+//
+// No newline is printed. This allows the caller to print more details (such as
+// a memory access annotation).
+void Simulator::PrintVRegisterFPHelper(unsigned code,
+ unsigned lane_size_in_bytes,
+ int lane_count,
+ int rightmost_lane) {
+ VIXL_ASSERT((lane_size_in_bytes == kSRegSizeInBytes) ||
+ (lane_size_in_bytes == kDRegSizeInBytes));
+
+ unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes);
+ VIXL_ASSERT(msb <= kQRegSizeInBytes);
+
+ // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register
+ // name is used:
+ // " (s{code}: {value})"
+ // " (d{code}: {value})"
+ // For vector types, "..." is used to represent one or more omitted lanes.
+ // " (..., {value}, {value}, ...)"
+ if ((lane_count == 1) && (rightmost_lane == 0)) {
+ const char * name =
+ (lane_size_in_bytes == kSRegSizeInBytes) ? SRegNameForCode(code)
+ : DRegNameForCode(code);
+ fprintf(stream_, " (%s%s: ", clr_vreg_name, name);
+ } else {
+ if (msb < (kQRegSizeInBytes - 1)) {
+ fprintf(stream_, " (..., ");
+ } else {
+ fprintf(stream_, " (");
+ }
+ }
+
+ // Print the list of values.
+ const char * separator = "";
+ int leftmost_lane = rightmost_lane + lane_count - 1;
+ for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) {
+ double value =
+ (lane_size_in_bytes == kSRegSizeInBytes) ? vreg(code).Get<float>(lane)
+ : vreg(code).Get<double>(lane);
+ fprintf(stream_, "%s%s%#g%s", separator, clr_vreg_value, value, clr_normal);
+ separator = ", ";
+ }
+
+ if (rightmost_lane > 0) {
+ fprintf(stream_, ", ...");
+ }
+ fprintf(stream_, ")");
+}
+
+
+void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) {
+ vregisters_[code].NotifyRegisterLogged();
+
+ int lane_size_log2 = format & kPrintRegLaneSizeMask;
+
+ int reg_size_log2;
+ if (format & kPrintRegAsQVector) {
+ reg_size_log2 = kQRegSizeInBytesLog2;
+ } else if (format & kPrintRegAsDVector) {
+ reg_size_log2 = kDRegSizeInBytesLog2;
+ } else {
+ // Scalar types.
+ reg_size_log2 = lane_size_log2;
+ }
+
+ int lane_count = 1 << (reg_size_log2 - lane_size_log2);
+ int lane_size = 1 << lane_size_log2;
+
+ // The template for vector types:
+ // "# v{code}: 0x{rawbits} (..., {value}, ...)".
+ // The template for scalar types:
+ // "# v{code}: 0x{rawbits} ({reg}:{value})".
+ // The values in parentheses after the bit representations are floating-point
+ // interpretations. They are displayed only if the kPrintVRegAsFP bit is set.
+
+ PrintVRegisterRawHelper(code);
+ if (format & kPrintRegAsFP) {
+ PrintVRegisterFPHelper(code, lane_size, lane_count);
+ }
+
+ fprintf(stream_, "\n");
+}
+
+
+void Simulator::PrintSystemRegister(SystemRegister id) {
+ switch (id) {
+ case NZCV:
+ fprintf(stream_, "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
+ clr_flag_name, clr_flag_value,
+ nzcv().N(), nzcv().Z(), nzcv().C(), nzcv().V(),
+ clr_normal);
+ break;
+ case FPCR: {
+ static const char * rmode[] = {
+ "0b00 (Round to Nearest)",
+ "0b01 (Round towards Plus Infinity)",
+ "0b10 (Round towards Minus Infinity)",
+ "0b11 (Round towards Zero)"
+ };
+ VIXL_ASSERT(fpcr().RMode() < (sizeof(rmode) / sizeof(rmode[0])));
+ fprintf(stream_,
+ "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
+ clr_flag_name, clr_flag_value,
+ fpcr().AHP(), fpcr().DN(), fpcr().FZ(), rmode[fpcr().RMode()],
+ clr_normal);
+ break;
+ }
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+
+void Simulator::PrintRead(uintptr_t address,
+ unsigned reg_code,
+ PrintRegisterFormat format) {
+ registers_[reg_code].NotifyRegisterLogged();
+
+ USE(format);
+
+ // The template is "# {reg}: 0x{value} <- {address}".
+ PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister);
+ fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
+ clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintVRead(uintptr_t address,
+ unsigned reg_code,
+ PrintRegisterFormat format,
+ unsigned lane) {
+ vregisters_[reg_code].NotifyRegisterLogged();
+
+ // The template is "# v{code}: 0x{rawbits} <- address".
+ PrintVRegisterRawHelper(reg_code);
+ if (format & kPrintRegAsFP) {
+ PrintVRegisterFPHelper(reg_code, GetPrintRegLaneSizeInBytes(format),
+ GetPrintRegLaneCount(format), lane);
+ }
+ fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
+ clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintWrite(uintptr_t address,
+ unsigned reg_code,
+ PrintRegisterFormat format) {
+ VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+
+ // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy
+ // and readable, the value is aligned with the values in the register trace.
+ PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister,
+ GetPrintRegSizeInBytes(format));
+ fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
+ clr_memory_address, address, clr_normal);
+}
+
+
+void Simulator::PrintVWrite(uintptr_t address,
+ unsigned reg_code,
+ PrintRegisterFormat format,
+ unsigned lane) {
+ // The templates:
+ // "# v{code}: 0x{rawbits} -> {address}"
+ // "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}".
+ // "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}"
+ // Because this trace doesn't represent a change to the source register's
+ // value, only the relevant part of the value is printed. To keep the trace
+ // tidy and readable, the raw value is aligned with the other values in the
+ // register trace.
+ int lane_count = GetPrintRegLaneCount(format);
+ int lane_size = GetPrintRegLaneSizeInBytes(format);
+ int reg_size = GetPrintRegSizeInBytes(format);
+ PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane);
+ if (format & kPrintRegAsFP) {
+ PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane);
+ }
+ fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
+ clr_memory_address, address, clr_normal);
+}
+
+
+// Visitors---------------------------------------------------------------------
+
+void Simulator::VisitUnimplemented(const Instruction* instr) {
+ printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
+ reinterpret_cast<const void*>(instr), instr->InstructionBits());
+ VIXL_UNIMPLEMENTED();
+}
+
+
+void Simulator::VisitUnallocated(const Instruction* instr) {
+ printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
+ reinterpret_cast<const void*>(instr), instr->InstructionBits());
+ VIXL_UNIMPLEMENTED();
+}
+
+
+void Simulator::VisitPCRelAddressing(const Instruction* instr) {
+ VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
+ (instr->Mask(PCRelAddressingMask) == ADRP));
+
+ set_reg(instr->Rd(), instr->ImmPCOffsetTarget());
+}
+
+
+void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
+ switch (instr->Mask(UnconditionalBranchMask)) {
+ case BL:
+ set_lr(instr->NextInstruction());
+ VIXL_FALLTHROUGH();
+ case B:
+ set_pc(instr->ImmPCOffsetTarget());
+ break;
+ default: VIXL_UNREACHABLE();
+ }
+}
+
+
+void Simulator::VisitConditionalBranch(const Instruction* instr) {
+ VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
+ if (ConditionPassed(instr->ConditionBranch())) {
+ set_pc(instr->ImmPCOffsetTarget());
+ }
+}
+
+
+void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
+ const Instruction* target = Instruction::Cast(xreg(instr->Rn()));
+
+ switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
+ case BLR:
+ set_lr(instr->NextInstruction());
+ VIXL_FALLTHROUGH();
+ case BR:
+ case RET: set_pc(target); break;
+ default: VIXL_UNREACHABLE();
+ }
+}
+
+
+void Simulator::VisitTestBranch(const Instruction* instr) {
+ unsigned bit_pos = (instr->ImmTestBranchBit5() << 5) |
+ instr->ImmTestBranchBit40();
+ bool bit_zero = ((xreg(instr->Rt()) >> bit_pos) & 1) == 0;
+ bool take_branch = false;
+ switch (instr->Mask(TestBranchMask)) {
+ case TBZ: take_branch = bit_zero; break;
+ case TBNZ: take_branch = !bit_zero; break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ if (take_branch) {
+ set_pc(instr->ImmPCOffsetTarget());
+ }
+}
+
+
+void Simulator::VisitCompareBranch(const Instruction* instr) {
+ unsigned rt = instr->Rt();
+ bool take_branch = false;
+ switch (instr->Mask(CompareBranchMask)) {
+ case CBZ_w: take_branch = (wreg(rt) == 0); break;
+ case CBZ_x: take_branch = (xreg(rt) == 0); break;
+ case CBNZ_w: take_branch = (wreg(rt) != 0); break;
+ case CBNZ_x: take_branch = (xreg(rt) != 0); break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ if (take_branch) {
+ set_pc(instr->ImmPCOffsetTarget());
+ }
+}
+
+
+void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ bool set_flags = instr->FlagsUpdate();
+ int64_t new_val = 0;
+ Instr operation = instr->Mask(AddSubOpMask);
+
+ switch (operation) {
+ case ADD:
+ case ADDS: {
+ new_val = AddWithCarry(reg_size,
+ set_flags,
+ reg(reg_size, instr->Rn(), instr->RnMode()),
+ op2);
+ break;
+ }
+ case SUB:
+ case SUBS: {
+ new_val = AddWithCarry(reg_size,
+ set_flags,
+ reg(reg_size, instr->Rn(), instr->RnMode()),
+ ~op2,
+ 1);
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+
+ set_reg(reg_size, instr->Rd(), new_val, LogRegWrites, instr->RdMode());
+}
+
+
+void Simulator::VisitAddSubShifted(const Instruction* instr) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ int64_t op2 = ShiftOperand(reg_size,
+ reg(reg_size, instr->Rm()),
+ static_cast<Shift>(instr->ShiftDP()),
+ instr->ImmDPShift());
+ AddSubHelper(instr, op2);
+}
+
+
+void Simulator::VisitAddSubImmediate(const Instruction* instr) {
+ int64_t op2 = instr->ImmAddSub() << ((instr->ShiftAddSub() == 1) ? 12 : 0);
+ AddSubHelper(instr, op2);
+}
+
+
+void Simulator::VisitAddSubExtended(const Instruction* instr) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ int64_t op2 = ExtendValue(reg_size,
+ reg(reg_size, instr->Rm()),
+ static_cast<Extend>(instr->ExtendMode()),
+ instr->ImmExtendShift());
+ AddSubHelper(instr, op2);
+}
+
+
+void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ int64_t op2 = reg(reg_size, instr->Rm());
+ int64_t new_val;
+
+ if ((instr->Mask(AddSubOpMask) == SUB) || instr->Mask(AddSubOpMask) == SUBS) {
+ op2 = ~op2;
+ }
+
+ new_val = AddWithCarry(reg_size,
+ instr->FlagsUpdate(),
+ reg(reg_size, instr->Rn()),
+ op2,
+ C());
+
+ set_reg(reg_size, instr->Rd(), new_val);
+}
+
+
+void Simulator::VisitLogicalShifted(const Instruction* instr) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ Shift shift_type = static_cast<Shift>(instr->ShiftDP());
+ unsigned shift_amount = instr->ImmDPShift();
+ int64_t op2 = ShiftOperand(reg_size, reg(reg_size, instr->Rm()), shift_type,
+ shift_amount);
+ if (instr->Mask(NOT) == NOT) {
+ op2 = ~op2;
+ }
+ LogicalHelper(instr, op2);
+}
+
+
+void Simulator::VisitLogicalImmediate(const Instruction* instr) {
+ LogicalHelper(instr, instr->ImmLogical());
+}
+
+
+void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ int64_t op1 = reg(reg_size, instr->Rn());
+ int64_t result = 0;
+ bool update_flags = false;
+
+ // Switch on the logical operation, stripping out the NOT bit, as it has a
+ // different meaning for logical immediate instructions.
+ switch (instr->Mask(LogicalOpMask & ~NOT)) {
+ case ANDS: update_flags = true; VIXL_FALLTHROUGH();
+ case AND: result = op1 & op2; break;
+ case ORR: result = op1 | op2; break;
+ case EOR: result = op1 ^ op2; break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+
+ if (update_flags) {
+ nzcv().SetN(CalcNFlag(result, reg_size));
+ nzcv().SetZ(CalcZFlag(result));
+ nzcv().SetC(0);
+ nzcv().SetV(0);
+ LogSystemRegister(NZCV);
+ }
+
+ set_reg(reg_size, instr->Rd(), result, LogRegWrites, instr->RdMode());
+}
+
+
+void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ ConditionalCompareHelper(instr, reg(reg_size, instr->Rm()));
+}
+
+
+void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
+ ConditionalCompareHelper(instr, instr->ImmCondCmp());
+}
+
+
+void Simulator::ConditionalCompareHelper(const Instruction* instr,
+ int64_t op2) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ int64_t op1 = reg(reg_size, instr->Rn());
+
+ if (ConditionPassed(instr->Condition())) {
+ // If the condition passes, set the status flags to the result of comparing
+ // the operands.
+ if (instr->Mask(ConditionalCompareMask) == CCMP) {
+ AddWithCarry(reg_size, true, op1, ~op2, 1);
+ } else {
+ VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
+ AddWithCarry(reg_size, true, op1, op2, 0);
+ }
+ } else {
+ // If the condition fails, set the status flags to the nzcv immediate.
+ nzcv().SetFlags(instr->Nzcv());
+ LogSystemRegister(NZCV);
+ }
+}
+
+
+void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
+ int offset = instr->ImmLSUnsigned() << instr->SizeLS();
+ LoadStoreHelper(instr, offset, Offset);
+}
+
+
+void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
+ LoadStoreHelper(instr, instr->ImmLS(), Offset);
+}
+
+
+void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
+ LoadStoreHelper(instr, instr->ImmLS(), PreIndex);
+}
+
+
+void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
+ LoadStoreHelper(instr, instr->ImmLS(), PostIndex);
+}
+
+
+void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
+ Extend ext = static_cast<Extend>(instr->ExtendMode());
+ VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
+ unsigned shift_amount = instr->ImmShiftLS() * instr->SizeLS();
+
+ int64_t offset = ExtendValue(kXRegSize, xreg(instr->Rm()), ext,
+ shift_amount);
+ LoadStoreHelper(instr, offset, Offset);
+}
+
+template<typename T>
+static T Faulted() {
+ return ~0;
+}
+
+template<>
+Simulator::qreg_t Faulted() {
+ static_assert(kQRegSizeInBytes == 16, "Known constraint");
+ static Simulator::qreg_t dummy = { {
+ 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255
+ } };
+ return dummy;
+}
+
+template<typename T> T
+Simulator::Read(uintptr_t address)
+{
+ address = Memory::AddressUntag(address);
+ if (handle_wasm_seg_fault(address, sizeof(T)))
+ return Faulted<T>();
+ return Memory::Read<T>(address);
+}
+
+template <typename T> void
+Simulator::Write(uintptr_t address, T value)
+{
+ address = Memory::AddressUntag(address);
+ if (handle_wasm_seg_fault(address, sizeof(T)))
+ return;
+ Memory::Write<T>(address, value);
+}
+
+void Simulator::LoadStoreHelper(const Instruction* instr,
+ int64_t offset,
+ AddrMode addrmode) {
+ unsigned srcdst = instr->Rt();
+ uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
+
+ LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
+ switch (op) {
+ case LDRB_w:
+ set_wreg(srcdst, Read<uint8_t>(address), NoRegLog); break;
+ case LDRH_w:
+ set_wreg(srcdst, Read<uint16_t>(address), NoRegLog); break;
+ case LDR_w:
+ set_wreg(srcdst, Read<uint32_t>(address), NoRegLog); break;
+ case LDR_x:
+ set_xreg(srcdst, Read<uint64_t>(address), NoRegLog); break;
+ case LDRSB_w:
+ set_wreg(srcdst, Read<int8_t>(address), NoRegLog); break;
+ case LDRSH_w:
+ set_wreg(srcdst, Read<int16_t>(address), NoRegLog); break;
+ case LDRSB_x:
+ set_xreg(srcdst, Read<int8_t>(address), NoRegLog); break;
+ case LDRSH_x:
+ set_xreg(srcdst, Read<int16_t>(address), NoRegLog); break;
+ case LDRSW_x:
+ set_xreg(srcdst, Read<int32_t>(address), NoRegLog); break;
+ case LDR_b:
+ set_breg(srcdst, Read<uint8_t>(address), NoRegLog); break;
+ case LDR_h:
+ set_hreg(srcdst, Read<uint16_t>(address), NoRegLog); break;
+ case LDR_s:
+ set_sreg(srcdst, Read<float>(address), NoRegLog); break;
+ case LDR_d:
+ set_dreg(srcdst, Read<double>(address), NoRegLog); break;
+ case LDR_q:
+ set_qreg(srcdst, Read<qreg_t>(address), NoRegLog); break;
+
+ case STRB_w: Write<uint8_t>(address, wreg(srcdst)); break;
+ case STRH_w: Write<uint16_t>(address, wreg(srcdst)); break;
+ case STR_w: Write<uint32_t>(address, wreg(srcdst)); break;
+ case STR_x: Write<uint64_t>(address, xreg(srcdst)); break;
+ case STR_b: Write<uint8_t>(address, breg(srcdst)); break;
+ case STR_h: Write<uint16_t>(address, hreg(srcdst)); break;
+ case STR_s: Write<float>(address, sreg(srcdst)); break;
+ case STR_d: Write<double>(address, dreg(srcdst)); break;
+ case STR_q: Write<qreg_t>(address, qreg(srcdst)); break;
+
+ // Ignore prfm hint instructions.
+ case PRFM: break;
+
+ default: VIXL_UNIMPLEMENTED();
+ }
+
+ unsigned access_size = 1 << instr->SizeLS();
+ if (instr->IsLoad()) {
+ if ((op == LDR_s) || (op == LDR_d)) {
+ LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
+ } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) {
+ LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+ } else {
+ LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+ }
+ } else {
+ if ((op == STR_s) || (op == STR_d)) {
+ LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
+ } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) {
+ LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+ } else {
+ LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+ }
+ }
+
+ local_monitor_.MaybeClear();
+}
+
+
+void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
+ LoadStorePairHelper(instr, Offset);
+}
+
+
+void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
+ LoadStorePairHelper(instr, PreIndex);
+}
+
+
+void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
+ LoadStorePairHelper(instr, PostIndex);
+}
+
+
+void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
+ LoadStorePairHelper(instr, Offset);
+}
+
+
+void Simulator::LoadStorePairHelper(const Instruction* instr,
+ AddrMode addrmode) {
+ unsigned rt = instr->Rt();
+ unsigned rt2 = instr->Rt2();
+ int element_size = 1 << instr->SizeLSPair();
+ int64_t offset = instr->ImmLSPair() * element_size;
+ uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
+ uintptr_t address2 = address + element_size;
+
+ LoadStorePairOp op =
+ static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
+
+ // 'rt' and 'rt2' can only be aliased for stores.
+ VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
+
+ switch (op) {
+ // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
+ // will print a more detailed log.
+ case LDP_w: {
+ set_wreg(rt, Read<uint32_t>(address), NoRegLog);
+ set_wreg(rt2, Read<uint32_t>(address2), NoRegLog);
+ break;
+ }
+ case LDP_s: {
+ set_sreg(rt, Read<float>(address), NoRegLog);
+ set_sreg(rt2, Read<float>(address2), NoRegLog);
+ break;
+ }
+ case LDP_x: {
+ set_xreg(rt, Read<uint64_t>(address), NoRegLog);
+ set_xreg(rt2, Read<uint64_t>(address2), NoRegLog);
+ break;
+ }
+ case LDP_d: {
+ set_dreg(rt, Read<double>(address), NoRegLog);
+ set_dreg(rt2, Read<double>(address2), NoRegLog);
+ break;
+ }
+ case LDP_q: {
+ set_qreg(rt, Read<qreg_t>(address), NoRegLog);
+ set_qreg(rt2, Read<qreg_t>(address2), NoRegLog);
+ break;
+ }
+ case LDPSW_x: {
+ set_xreg(rt, Read<int32_t>(address), NoRegLog);
+ set_xreg(rt2, Read<int32_t>(address2), NoRegLog);
+ break;
+ }
+ case STP_w: {
+ Write<uint32_t>(address, wreg(rt));
+ Write<uint32_t>(address2, wreg(rt2));
+ break;
+ }
+ case STP_s: {
+ Write<float>(address, sreg(rt));
+ Write<float>(address2, sreg(rt2));
+ break;
+ }
+ case STP_x: {
+ Write<uint64_t>(address, xreg(rt));
+ Write<uint64_t>(address2, xreg(rt2));
+ break;
+ }
+ case STP_d: {
+ Write<double>(address, dreg(rt));
+ Write<double>(address2, dreg(rt2));
+ break;
+ }
+ case STP_q: {
+ Write<qreg_t>(address, qreg(rt));
+ Write<qreg_t>(address2, qreg(rt2));
+ break;
+ }
+ default: VIXL_UNREACHABLE();
+ }
+
+ // Print a detailed trace (including the memory address) instead of the basic
+ // register:value trace generated by set_*reg().
+ if (instr->IsLoad()) {
+ if ((op == LDP_s) || (op == LDP_d)) {
+ LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
+ LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
+ } else if (op == LDP_q) {
+ LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+ LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+ } else {
+ LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+ LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+ }
+ } else {
+ if ((op == STP_s) || (op == STP_d)) {
+ LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
+ LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
+ } else if (op == STP_q) {
+ LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+ LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+ } else {
+ LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+ LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+ }
+ }
+
+ local_monitor_.MaybeClear();
+}
+
+
+void Simulator::PrintExclusiveAccessWarning() {
+ if (print_exclusive_access_warning_) {
+ fprintf(
+ stderr,
+ "%sWARNING:%s VIXL simulator support for load-/store-/clear-exclusive "
+ "instructions is limited. Refer to the README for details.%s\n",
+ clr_warning, clr_warning_message, clr_normal);
+ print_exclusive_access_warning_ = false;
+ }
+}
+
+template <typename T>
+void Simulator::CompareAndSwapHelper(const Instruction* instr) {
+ unsigned rs = instr->Rs();
+ unsigned rt = instr->Rt();
+ unsigned rn = instr->Rn();
+
+ unsigned element_size = sizeof(T);
+ uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ address = Memory::AddressUntag(address);
+ if (handle_wasm_seg_fault(address, element_size))
+ return;
+
+ bool is_acquire = instr->Bit(22) == 1;
+ bool is_release = instr->Bit(15) == 1;
+
+ T comparevalue = reg<T>(rs);
+ T newvalue = reg<T>(rt);
+
+ // The architecture permits that the data read clears any exclusive monitors
+ // associated with that location, even if the compare subsequently fails.
+ local_monitor_.Clear();
+
+ T data = Memory::Read<T>(address);
+ if (is_acquire) {
+ // Approximate load-acquire by issuing a full barrier after the load.
+ __sync_synchronize();
+ }
+
+ if (data == comparevalue) {
+ if (is_release) {
+ // Approximate store-release by issuing a full barrier before the store.
+ __sync_synchronize();
+ }
+ Memory::Write<T>(address, newvalue);
+ LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+ }
+ set_reg<T>(rs, data);
+ LogRead(address, rs, GetPrintRegisterFormatForSize(element_size));
+}
+
+template <typename T>
+void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
+ VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8));
+ unsigned rs = instr->Rs();
+ unsigned rt = instr->Rt();
+ unsigned rn = instr->Rn();
+
+ VIXL_ASSERT((rs % 2 == 0) && (rs % 2 == 0));
+
+ unsigned element_size = sizeof(T);
+ uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ address = Memory::AddressUntag(address);
+ if (handle_wasm_seg_fault(address, element_size))
+ return;
+
+ uint64_t address2 = address + element_size;
+
+ bool is_acquire = instr->Bit(22) == 1;
+ bool is_release = instr->Bit(15) == 1;
+
+ T comparevalue_high = reg<T>(rs + 1);
+ T comparevalue_low = reg<T>(rs);
+ T newvalue_high = reg<T>(rt + 1);
+ T newvalue_low = reg<T>(rt);
+
+ // The architecture permits that the data read clears any exclusive monitors
+ // associated with that location, even if the compare subsequently fails.
+ local_monitor_.Clear();
+
+ T data_high = Memory::Read<T>(address);
+ T data_low = Memory::Read<T>(address2);
+
+ if (is_acquire) {
+ // Approximate load-acquire by issuing a full barrier after the load.
+ __sync_synchronize();
+ }
+
+ bool same =
+ (data_high == comparevalue_high) && (data_low == comparevalue_low);
+ if (same) {
+ if (is_release) {
+ // Approximate store-release by issuing a full barrier before the store.
+ __sync_synchronize();
+ }
+
+ Memory::Write<T>(address, newvalue_high);
+ Memory::Write<T>(address2, newvalue_low);
+ }
+
+ set_reg<T>(rs + 1, data_high);
+ set_reg<T>(rs, data_low);
+
+ LogRead(address, rs + 1, GetPrintRegisterFormatForSize(element_size));
+ LogRead(address2, rs, GetPrintRegisterFormatForSize(element_size));
+
+ if (same) {
+ LogWrite(address, rt + 1, GetPrintRegisterFormatForSize(element_size));
+ LogWrite(address2, rt, GetPrintRegisterFormatForSize(element_size));
+ }
+}
+
+void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
+ LoadStoreExclusive op =
+ static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
+
+ switch (op) {
+ case CAS_w:
+ case CASA_w:
+ case CASL_w:
+ case CASAL_w:
+ CompareAndSwapHelper<uint32_t>(instr);
+ break;
+ case CAS_x:
+ case CASA_x:
+ case CASL_x:
+ case CASAL_x:
+ CompareAndSwapHelper<uint64_t>(instr);
+ break;
+ case CASB:
+ case CASAB:
+ case CASLB:
+ case CASALB:
+ CompareAndSwapHelper<uint8_t>(instr);
+ break;
+ case CASH:
+ case CASAH:
+ case CASLH:
+ case CASALH:
+ CompareAndSwapHelper<uint16_t>(instr);
+ break;
+ case CASP_w:
+ case CASPA_w:
+ case CASPL_w:
+ case CASPAL_w:
+ CompareAndSwapPairHelper<uint32_t>(instr);
+ break;
+ case CASP_x:
+ case CASPA_x:
+ case CASPL_x:
+ case CASPAL_x:
+ CompareAndSwapPairHelper<uint64_t>(instr);
+ break;
+ default:
+ PrintExclusiveAccessWarning();
+
+ unsigned rs = instr->Rs();
+ unsigned rt = instr->Rt();
+ unsigned rt2 = instr->Rt2();
+ unsigned rn = instr->Rn();
+
+ bool is_exclusive = !instr->LdStXNotExclusive();
+ bool is_acquire_release = !is_exclusive || instr->LdStXAcquireRelease();
+ bool is_load = instr->LdStXLoad();
+ bool is_pair = instr->LdStXPair();
+
+ unsigned element_size = 1 << instr->LdStXSizeLog2();
+ unsigned access_size = is_pair ? element_size * 2 : element_size;
+ uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ // Check the alignment of `address`.
+ if (AlignDown(address, access_size) != address) {
+ VIXL_ALIGNMENT_EXCEPTION();
+ }
+
+ // The sp must be aligned to 16 bytes when it is accessed.
+ if ((rn == 31) && (AlignDown(address, 16) != address)) {
+ VIXL_ALIGNMENT_EXCEPTION();
+ }
+
+ if (is_load) {
+ if (is_exclusive) {
+ local_monitor_.MarkExclusive(address, access_size);
+ } else {
+ // Any non-exclusive load can clear the local monitor as a side
+ // effect. We don't need to do this, but it is useful to stress the
+ // simulated code.
+ local_monitor_.Clear();
+ }
+
+ // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
+ // We will print a more detailed log.
+ switch (op) {
+ case LDXRB_w:
+ case LDAXRB_w:
+ case LDARB_w:
+ set_wreg(rt, Read<uint8_t>(address), NoRegLog);
+ break;
+ case LDXRH_w:
+ case LDAXRH_w:
+ case LDARH_w:
+ set_wreg(rt, Read<uint16_t>(address), NoRegLog);
+ break;
+ case LDXR_w:
+ case LDAXR_w:
+ case LDAR_w:
+ set_wreg(rt, Read<uint32_t>(address), NoRegLog);
+ break;
+ case LDXR_x:
+ case LDAXR_x:
+ case LDAR_x:
+ set_xreg(rt, Read<uint64_t>(address), NoRegLog);
+ break;
+ case LDXP_w:
+ case LDAXP_w:
+ set_wreg(rt, Read<uint32_t>(address), NoRegLog);
+ set_wreg(rt2, Read<uint32_t>(address + element_size), NoRegLog);
+ break;
+ case LDXP_x:
+ case LDAXP_x:
+ set_xreg(rt, Read<uint64_t>(address), NoRegLog);
+ set_xreg(rt2, Read<uint64_t>(address + element_size), NoRegLog);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ if (is_acquire_release) {
+ // Approximate load-acquire by issuing a full barrier after the load.
+ js::jit::AtomicOperations::fenceSeqCst();
+ }
+
+ LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+ if (is_pair) {
+ LogRead(address + element_size, rt2,
+ GetPrintRegisterFormatForSize(element_size));
+ }
+ } else {
+ if (is_acquire_release) {
+ // Approximate store-release by issuing a full barrier before the
+ // store.
+ js::jit::AtomicOperations::fenceSeqCst();
+ }
+
+ bool do_store = true;
+ if (is_exclusive) {
+ do_store = local_monitor_.IsExclusive(address, access_size) &&
+ global_monitor_.IsExclusive(address, access_size);
+ set_wreg(rs, do_store ? 0 : 1);
+
+ // - All exclusive stores explicitly clear the local monitor.
+ local_monitor_.Clear();
+ } else {
+ // - Any other store can clear the local monitor as a side effect.
+ local_monitor_.MaybeClear();
+ }
+
+ if (do_store) {
+ switch (op) {
+ case STXRB_w:
+ case STLXRB_w:
+ case STLRB_w:
+ Write<uint8_t>(address, wreg(rt));
+ break;
+ case STXRH_w:
+ case STLXRH_w:
+ case STLRH_w:
+ Write<uint16_t>(address, wreg(rt));
+ break;
+ case STXR_w:
+ case STLXR_w:
+ case STLR_w:
+ Write<uint32_t>(address, wreg(rt));
+ break;
+ case STXR_x:
+ case STLXR_x:
+ case STLR_x:
+ Write<uint64_t>(address, xreg(rt));
+ break;
+ case STXP_w:
+ case STLXP_w:
+ Write<uint32_t>(address, wreg(rt));
+ Write<uint32_t>(address + element_size, wreg(rt2));
+ break;
+ case STXP_x:
+ case STLXP_x:
+ Write<uint64_t>(address, xreg(rt));
+ Write<uint64_t>(address + element_size, xreg(rt2));
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+ if (is_pair) {
+ LogWrite(address + element_size, rt2,
+ GetPrintRegisterFormatForSize(element_size));
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
+ unsigned rs = instr->Rs();
+ unsigned rt = instr->Rt();
+ unsigned rn = instr->Rn();
+
+ bool is_acquire = (instr->Bit(23) == 1) && (rt != kZeroRegCode);
+ bool is_release = instr->Bit(22) == 1;
+
+ unsigned element_size = sizeof(T);
+ uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ address = Memory::AddressUntag(address);
+ if (handle_wasm_seg_fault(address, sizeof(T)))
+ return;
+
+ T value = reg<T>(rs);
+
+ T data = Memory::Read<T>(address);
+
+ if (is_acquire) {
+ // Approximate load-acquire by issuing a full barrier after the load.
+ __sync_synchronize();
+ }
+
+ T result = 0;
+ switch (instr->Mask(AtomicMemorySimpleOpMask)) {
+ case LDADDOp:
+ result = data + value;
+ break;
+ case LDCLROp:
+ VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+ result = data & ~value;
+ break;
+ case LDEOROp:
+ VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+ result = data ^ value;
+ break;
+ case LDSETOp:
+ VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+ result = data | value;
+ break;
+
+ // Signed/Unsigned difference is done via the templated type T.
+ case LDSMAXOp:
+ case LDUMAXOp:
+ result = (data > value) ? data : value;
+ break;
+ case LDSMINOp:
+ case LDUMINOp:
+ result = (data > value) ? value : data;
+ break;
+ }
+
+ if (is_release) {
+ // Approximate store-release by issuing a full barrier before the store.
+ __sync_synchronize();
+ }
+
+ Memory::Write<T>(address, result);
+ set_reg<T>(rt, data, NoRegLog);
+
+ LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+ LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size));
+}
+
+template <typename T>
+void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
+ unsigned rs = instr->Rs();
+ unsigned rt = instr->Rt();
+ unsigned rn = instr->Rn();
+
+ bool is_acquire = (instr->Bit(23) == 1) && (rt != kZeroRegCode);
+ bool is_release = instr->Bit(22) == 1;
+
+ unsigned element_size = sizeof(T);
+ uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ address = Memory::AddressUntag(address);
+ if (handle_wasm_seg_fault(address, sizeof(T)))
+ return;
+
+ T data = Memory::Read<T>(address);
+ if (is_acquire) {
+ // Approximate load-acquire by issuing a full barrier after the load.
+ __sync_synchronize();
+ }
+
+ if (is_release) {
+ // Approximate store-release by issuing a full barrier before the store.
+ __sync_synchronize();
+ }
+ Memory::Write<T>(address, reg<T>(rs));
+
+ set_reg<T>(rt, data);
+
+ LogRead(address, rt, GetPrintRegisterFormat(element_size));
+ LogWrite(address, rs, GetPrintRegisterFormat(element_size));
+}
+
+template <typename T>
+void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
+ unsigned rt = instr->Rt();
+ unsigned rn = instr->Rn();
+
+ unsigned element_size = sizeof(T);
+ uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ address = Memory::AddressUntag(address);
+ if (handle_wasm_seg_fault(address, sizeof(T)))
+ return;
+
+ set_reg<T>(rt, Memory::Read<T>(address));
+
+ // Approximate load-acquire by issuing a full barrier after the load.
+ __sync_synchronize();
+
+ LogRead(address, rt, GetPrintRegisterFormat(element_size));
+}
+
+#define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
+ V(LDADD) \
+ V(LDCLR) \
+ V(LDEOR) \
+ V(LDSET) \
+ V(LDUMAX) \
+ V(LDUMIN)
+
+#define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
+ V(LDSMAX) \
+ V(LDSMIN)
+
+void Simulator::VisitAtomicMemory(const Instruction* instr) {
+ switch (instr->Mask(AtomicMemoryMask)) {
+// clang-format off
+#define SIM_FUNC_B(A) \
+ case A##B: \
+ case A##AB: \
+ case A##LB: \
+ case A##ALB:
+#define SIM_FUNC_H(A) \
+ case A##H: \
+ case A##AH: \
+ case A##LH: \
+ case A##ALH:
+#define SIM_FUNC_w(A) \
+ case A##_w: \
+ case A##A_w: \
+ case A##L_w: \
+ case A##AL_w:
+#define SIM_FUNC_x(A) \
+ case A##_x: \
+ case A##A_x: \
+ case A##L_x: \
+ case A##AL_x:
+
+ ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
+ AtomicMemorySimpleHelper<uint8_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
+ AtomicMemorySimpleHelper<int8_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
+ AtomicMemorySimpleHelper<uint16_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
+ AtomicMemorySimpleHelper<int16_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
+ AtomicMemorySimpleHelper<uint32_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
+ AtomicMemorySimpleHelper<int32_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
+ AtomicMemorySimpleHelper<uint64_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
+ AtomicMemorySimpleHelper<int64_t>(instr);
+ break;
+ // clang-format on
+
+ case SWPB:
+ case SWPAB:
+ case SWPLB:
+ case SWPALB:
+ AtomicMemorySwapHelper<uint8_t>(instr);
+ break;
+ case SWPH:
+ case SWPAH:
+ case SWPLH:
+ case SWPALH:
+ AtomicMemorySwapHelper<uint16_t>(instr);
+ break;
+ case SWP_w:
+ case SWPA_w:
+ case SWPL_w:
+ case SWPAL_w:
+ AtomicMemorySwapHelper<uint32_t>(instr);
+ break;
+ case SWP_x:
+ case SWPA_x:
+ case SWPL_x:
+ case SWPAL_x:
+ AtomicMemorySwapHelper<uint64_t>(instr);
+ break;
+ case LDAPRB:
+ LoadAcquireRCpcHelper<uint8_t>(instr);
+ break;
+ case LDAPRH:
+ LoadAcquireRCpcHelper<uint16_t>(instr);
+ break;
+ case LDAPR_w:
+ LoadAcquireRCpcHelper<uint32_t>(instr);
+ break;
+ case LDAPR_x:
+ LoadAcquireRCpcHelper<uint64_t>(instr);
+ break;
+ }
+}
+
+void Simulator::VisitLoadLiteral(const Instruction* instr) {
+ unsigned rt = instr->Rt();
+ uint64_t address = instr->LiteralAddress<uint64_t>();
+
+ // Verify that the calculated address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ switch (instr->Mask(LoadLiteralMask)) {
+ // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
+ // print a more detailed log.
+ case LDR_w_lit:
+ set_wreg(rt, Read<uint32_t>(address), NoRegLog);
+ LogRead(address, rt, kPrintWReg);
+ break;
+ case LDR_x_lit:
+ set_xreg(rt, Read<uint64_t>(address), NoRegLog);
+ LogRead(address, rt, kPrintXReg);
+ break;
+ case LDR_s_lit:
+ set_sreg(rt, Read<float>(address), NoRegLog);
+ LogVRead(address, rt, kPrintSReg);
+ break;
+ case LDR_d_lit:
+ set_dreg(rt, Read<double>(address), NoRegLog);
+ LogVRead(address, rt, kPrintDReg);
+ break;
+ case LDR_q_lit:
+ set_qreg(rt, Read<qreg_t>(address), NoRegLog);
+ LogVRead(address, rt, kPrintReg1Q);
+ break;
+ case LDRSW_x_lit:
+ set_xreg(rt, Read<int32_t>(address), NoRegLog);
+ LogRead(address, rt, kPrintWReg);
+ break;
+
+ // Ignore prfm hint instructions.
+ case PRFM_lit: break;
+
+ default: VIXL_UNREACHABLE();
+ }
+
+ local_monitor_.MaybeClear();
+}
+
+
+uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
+ int64_t offset,
+ AddrMode addrmode) {
+ uint64_t address = xreg(addr_reg, Reg31IsStackPointer);
+
+ if ((addr_reg == 31) && ((address % 16) != 0)) {
+ // When the base register is SP the stack pointer is required to be
+ // quadword aligned prior to the address calculation and write-backs.
+ // Misalignment will cause a stack alignment fault.
+ VIXL_ALIGNMENT_EXCEPTION();
+ }
+
+ if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
+ VIXL_ASSERT(offset != 0);
+ // Only preindex should log the register update here. For Postindex, the
+ // update will be printed automatically by LogWrittenRegisters _after_ the
+ // memory access itself is logged.
+ RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
+ set_xreg(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
+ }
+
+ if ((addrmode == Offset) || (addrmode == PreIndex)) {
+ address += offset;
+ }
+
+ // Verify that the calculated address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ return static_cast<uintptr_t>(address);
+}
+
+
+void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
+ MoveWideImmediateOp mov_op =
+ static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
+ int64_t new_xn_val = 0;
+
+ bool is_64_bits = instr->SixtyFourBits() == 1;
+ // Shift is limited for W operations.
+ VIXL_ASSERT(is_64_bits || (instr->ShiftMoveWide() < 2));
+
+ // Get the shifted immediate.
+ int64_t shift = instr->ShiftMoveWide() * 16;
+ int64_t shifted_imm16 = static_cast<int64_t>(instr->ImmMoveWide()) << shift;
+
+ // Compute the new value.
+ switch (mov_op) {
+ case MOVN_w:
+ case MOVN_x: {
+ new_xn_val = ~shifted_imm16;
+ if (!is_64_bits) new_xn_val &= kWRegMask;
+ break;
+ }
+ case MOVK_w:
+ case MOVK_x: {
+ unsigned reg_code = instr->Rd();
+ int64_t prev_xn_val = is_64_bits ? xreg(reg_code)
+ : wreg(reg_code);
+ new_xn_val =
+ (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
+ break;
+ }
+ case MOVZ_w:
+ case MOVZ_x: {
+ new_xn_val = shifted_imm16;
+ break;
+ }
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ // Update the destination register.
+ set_xreg(instr->Rd(), new_xn_val);
+}
+
+
+void Simulator::VisitConditionalSelect(const Instruction* instr) {
+ uint64_t new_val = xreg(instr->Rn());
+
+ if (ConditionFailed(static_cast<Condition>(instr->Condition()))) {
+ new_val = xreg(instr->Rm());
+ switch (instr->Mask(ConditionalSelectMask)) {
+ case CSEL_w:
+ case CSEL_x: break;
+ case CSINC_w:
+ case CSINC_x: new_val++; break;
+ case CSINV_w:
+ case CSINV_x: new_val = ~new_val; break;
+ case CSNEG_w:
+ case CSNEG_x: new_val = -new_val; break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ }
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ set_reg(reg_size, instr->Rd(), new_val);
+}
+
+
+void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
+ unsigned dst = instr->Rd();
+ unsigned src = instr->Rn();
+
+ switch (instr->Mask(DataProcessing1SourceMask)) {
+ case RBIT_w: set_wreg(dst, ReverseBits(wreg(src))); break;
+ case RBIT_x: set_xreg(dst, ReverseBits(xreg(src))); break;
+ case REV16_w: set_wreg(dst, ReverseBytes(wreg(src), 1)); break;
+ case REV16_x: set_xreg(dst, ReverseBytes(xreg(src), 1)); break;
+ case REV_w: set_wreg(dst, ReverseBytes(wreg(src), 2)); break;
+ case REV32_x: set_xreg(dst, ReverseBytes(xreg(src), 2)); break;
+ case REV_x: set_xreg(dst, ReverseBytes(xreg(src), 3)); break;
+ case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src))); break;
+ case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src))); break;
+ case CLS_w: {
+ set_wreg(dst, CountLeadingSignBits(wreg(src)));
+ break;
+ }
+ case CLS_x: {
+ set_xreg(dst, CountLeadingSignBits(xreg(src)));
+ break;
+ }
+ default: VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
+ VIXL_ASSERT((n > 32) && (n <= 64));
+ for (unsigned i = (n - 1); i >= 32; i--) {
+ if (((data >> i) & 1) != 0) {
+ uint64_t polysh32 = (uint64_t)poly << (i - 32);
+ uint64_t mask = (UINT64_C(1) << i) - 1;
+ data = ((data & mask) ^ polysh32);
+ }
+ }
+ return data & 0xffffffff;
+}
+
+
+template <typename T>
+uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
+ unsigned size = sizeof(val) * 8; // Number of bits in type T.
+ VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
+ uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
+ uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
+ return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
+}
+
+
+uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
+ // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
+ // the CRC of each 32-bit word sequentially.
+ acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
+ return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
+}
+
+
+void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
+ Shift shift_op = NO_SHIFT;
+ int64_t result = 0;
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+
+ switch (instr->Mask(DataProcessing2SourceMask)) {
+ case SDIV_w: {
+ int32_t rn = wreg(instr->Rn());
+ int32_t rm = wreg(instr->Rm());
+ if ((rn == kWMinInt) && (rm == -1)) {
+ result = kWMinInt;
+ } else if (rm == 0) {
+ // Division by zero can be trapped, but not on A-class processors.
+ result = 0;
+ } else {
+ result = rn / rm;
+ }
+ break;
+ }
+ case SDIV_x: {
+ int64_t rn = xreg(instr->Rn());
+ int64_t rm = xreg(instr->Rm());
+ if ((rn == kXMinInt) && (rm == -1)) {
+ result = kXMinInt;
+ } else if (rm == 0) {
+ // Division by zero can be trapped, but not on A-class processors.
+ result = 0;
+ } else {
+ result = rn / rm;
+ }
+ break;
+ }
+ case UDIV_w: {
+ uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn()));
+ uint32_t rm = static_cast<uint32_t>(wreg(instr->Rm()));
+ if (rm == 0) {
+ // Division by zero can be trapped, but not on A-class processors.
+ result = 0;
+ } else {
+ result = rn / rm;
+ }
+ break;
+ }
+ case UDIV_x: {
+ uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn()));
+ uint64_t rm = static_cast<uint64_t>(xreg(instr->Rm()));
+ if (rm == 0) {
+ // Division by zero can be trapped, but not on A-class processors.
+ result = 0;
+ } else {
+ result = rn / rm;
+ }
+ break;
+ }
+ case LSLV_w:
+ case LSLV_x: shift_op = LSL; break;
+ case LSRV_w:
+ case LSRV_x: shift_op = LSR; break;
+ case ASRV_w:
+ case ASRV_x: shift_op = ASR; break;
+ case RORV_w:
+ case RORV_x: shift_op = ROR; break;
+ case CRC32B: {
+ uint32_t acc = reg<uint32_t>(instr->Rn());
+ uint8_t val = reg<uint8_t>(instr->Rm());
+ result = Crc32Checksum(acc, val, CRC32_POLY);
+ break;
+ }
+ case CRC32H: {
+ uint32_t acc = reg<uint32_t>(instr->Rn());
+ uint16_t val = reg<uint16_t>(instr->Rm());
+ result = Crc32Checksum(acc, val, CRC32_POLY);
+ break;
+ }
+ case CRC32W: {
+ uint32_t acc = reg<uint32_t>(instr->Rn());
+ uint32_t val = reg<uint32_t>(instr->Rm());
+ result = Crc32Checksum(acc, val, CRC32_POLY);
+ break;
+ }
+ case CRC32X: {
+ uint32_t acc = reg<uint32_t>(instr->Rn());
+ uint64_t val = reg<uint64_t>(instr->Rm());
+ result = Crc32Checksum(acc, val, CRC32_POLY);
+ reg_size = kWRegSize;
+ break;
+ }
+ case CRC32CB: {
+ uint32_t acc = reg<uint32_t>(instr->Rn());
+ uint8_t val = reg<uint8_t>(instr->Rm());
+ result = Crc32Checksum(acc, val, CRC32C_POLY);
+ break;
+ }
+ case CRC32CH: {
+ uint32_t acc = reg<uint32_t>(instr->Rn());
+ uint16_t val = reg<uint16_t>(instr->Rm());
+ result = Crc32Checksum(acc, val, CRC32C_POLY);
+ break;
+ }
+ case CRC32CW: {
+ uint32_t acc = reg<uint32_t>(instr->Rn());
+ uint32_t val = reg<uint32_t>(instr->Rm());
+ result = Crc32Checksum(acc, val, CRC32C_POLY);
+ break;
+ }
+ case CRC32CX: {
+ uint32_t acc = reg<uint32_t>(instr->Rn());
+ uint64_t val = reg<uint64_t>(instr->Rm());
+ result = Crc32Checksum(acc, val, CRC32C_POLY);
+ reg_size = kWRegSize;
+ break;
+ }
+ default: VIXL_UNIMPLEMENTED();
+ }
+
+ if (shift_op != NO_SHIFT) {
+ // Shift distance encoded in the least-significant five/six bits of the
+ // register.
+ int mask = (instr->SixtyFourBits() == 1) ? 0x3f : 0x1f;
+ unsigned shift = wreg(instr->Rm()) & mask;
+ result = ShiftOperand(reg_size, reg(reg_size, instr->Rn()), shift_op,
+ shift);
+ }
+ set_reg(reg_size, instr->Rd(), result);
+}
+
+
+// The algorithm used is adapted from the one described in section 8.2 of
+// Hacker's Delight, by Henry S. Warren, Jr.
+// It assumes that a right shift on a signed integer is an arithmetic shift.
+// Type T must be either uint64_t or int64_t.
+template <typename T>
+static T MultiplyHigh(T u, T v) {
+ uint64_t u0, v0, w0;
+ T u1, v1, w1, w2, t;
+
+ VIXL_ASSERT(sizeof(u) == sizeof(u0));
+
+ u0 = u & 0xffffffff;
+ u1 = u >> 32;
+ v0 = v & 0xffffffff;
+ v1 = v >> 32;
+
+ w0 = u0 * v0;
+ t = u1 * v0 + (w0 >> 32);
+ w1 = t & 0xffffffff;
+ w2 = t >> 32;
+ w1 = u0 * v1 + w1;
+
+ return u1 * v1 + w2 + (w1 >> 32);
+}
+
+
+void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+
+ int64_t result = 0;
+ // Extract and sign- or zero-extend 32-bit arguments for widening operations.
+ uint64_t rn_u32 = reg<uint32_t>(instr->Rn());
+ uint64_t rm_u32 = reg<uint32_t>(instr->Rm());
+ int64_t rn_s32 = reg<int32_t>(instr->Rn());
+ int64_t rm_s32 = reg<int32_t>(instr->Rm());
+ switch (instr->Mask(DataProcessing3SourceMask)) {
+ case MADD_w:
+ case MADD_x:
+ result = xreg(instr->Ra()) + (xreg(instr->Rn()) * xreg(instr->Rm()));
+ break;
+ case MSUB_w:
+ case MSUB_x:
+ result = xreg(instr->Ra()) - (xreg(instr->Rn()) * xreg(instr->Rm()));
+ break;
+ case SMADDL_x: result = xreg(instr->Ra()) + (rn_s32 * rm_s32); break;
+ case SMSUBL_x: result = xreg(instr->Ra()) - (rn_s32 * rm_s32); break;
+ case UMADDL_x: result = xreg(instr->Ra()) + (rn_u32 * rm_u32); break;
+ case UMSUBL_x: result = xreg(instr->Ra()) - (rn_u32 * rm_u32); break;
+ case UMULH_x:
+ result = MultiplyHigh(reg<uint64_t>(instr->Rn()),
+ reg<uint64_t>(instr->Rm()));
+ break;
+ case SMULH_x:
+ result = MultiplyHigh(xreg(instr->Rn()), xreg(instr->Rm()));
+ break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ set_reg(reg_size, instr->Rd(), result);
+}
+
+
+void Simulator::VisitBitfield(const Instruction* instr) {
+ unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
+ int64_t reg_mask = instr->SixtyFourBits() ? kXRegMask : kWRegMask;
+ int64_t R = instr->ImmR();
+ int64_t S = instr->ImmS();
+ int64_t diff = S - R;
+ int64_t mask;
+ if (diff >= 0) {
+ mask = (diff < (reg_size - 1)) ? (INT64_C(1) << (diff + 1)) - 1
+ : reg_mask;
+ } else {
+ mask = (INT64_C(1) << (S + 1)) - 1;
+ mask = (static_cast<uint64_t>(mask) >> R) | (mask << (reg_size - R));
+ diff += reg_size;
+ }
+
+ // inzero indicates if the extracted bitfield is inserted into the
+ // destination register value or in zero.
+ // If extend is true, extend the sign of the extracted bitfield.
+ bool inzero = false;
+ bool extend = false;
+ switch (instr->Mask(BitfieldMask)) {
+ case BFM_x:
+ case BFM_w:
+ break;
+ case SBFM_x:
+ case SBFM_w:
+ inzero = true;
+ extend = true;
+ break;
+ case UBFM_x:
+ case UBFM_w:
+ inzero = true;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+
+ int64_t dst = inzero ? 0 : reg(reg_size, instr->Rd());
+ int64_t src = reg(reg_size, instr->Rn());
+ // Rotate source bitfield into place.
+ int64_t result = (static_cast<uint64_t>(src) >> R) | (src << (reg_size - R));
+ // Determine the sign extension.
+ int64_t topbits = ((INT64_C(1) << (reg_size - diff - 1)) - 1) << (diff + 1);
+ int64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
+
+ // Merge sign extension, dest/zero and bitfield.
+ result = signbits | (result & mask) | (dst & ~mask);
+
+ set_reg(reg_size, instr->Rd(), result);
+}
+
+
+void Simulator::VisitExtract(const Instruction* instr) {
+ unsigned lsb = instr->ImmS();
+ unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize
+ : kWRegSize;
+ uint64_t low_res = static_cast<uint64_t>(reg(reg_size, instr->Rm())) >> lsb;
+ uint64_t high_res =
+ (lsb == 0) ? 0 : reg(reg_size, instr->Rn()) << (reg_size - lsb);
+ set_reg(reg_size, instr->Rd(), low_res | high_res);
+}
+
+
+void Simulator::VisitFPImmediate(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ unsigned dest = instr->Rd();
+ switch (instr->Mask(FPImmediateMask)) {
+ case FMOV_s_imm: set_sreg(dest, instr->ImmFP32()); break;
+ case FMOV_d_imm: set_dreg(dest, instr->ImmFP64()); break;
+ default: VIXL_UNREACHABLE();
+ }
+}
+
+
+void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ unsigned dst = instr->Rd();
+ unsigned src = instr->Rn();
+
+ FPRounding round = RMode();
+
+ switch (instr->Mask(FPIntegerConvertMask)) {
+ case FCVTAS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieAway)); break;
+ case FCVTAS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieAway)); break;
+ case FCVTAS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieAway)); break;
+ case FCVTAS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieAway)); break;
+ case FCVTAU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieAway)); break;
+ case FCVTAU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieAway)); break;
+ case FCVTAU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieAway)); break;
+ case FCVTAU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieAway)); break;
+ case FCVTMS_ws:
+ set_wreg(dst, FPToInt32(sreg(src), FPNegativeInfinity));
+ break;
+ case FCVTMS_xs:
+ set_xreg(dst, FPToInt64(sreg(src), FPNegativeInfinity));
+ break;
+ case FCVTMS_wd:
+ set_wreg(dst, FPToInt32(dreg(src), FPNegativeInfinity));
+ break;
+ case FCVTMS_xd:
+ set_xreg(dst, FPToInt64(dreg(src), FPNegativeInfinity));
+ break;
+ case FCVTMU_ws:
+ set_wreg(dst, FPToUInt32(sreg(src), FPNegativeInfinity));
+ break;
+ case FCVTMU_xs:
+ set_xreg(dst, FPToUInt64(sreg(src), FPNegativeInfinity));
+ break;
+ case FCVTMU_wd:
+ set_wreg(dst, FPToUInt32(dreg(src), FPNegativeInfinity));
+ break;
+ case FCVTMU_xd:
+ set_xreg(dst, FPToUInt64(dreg(src), FPNegativeInfinity));
+ break;
+ case FCVTPS_ws:
+ set_wreg(dst, FPToInt32(sreg(src), FPPositiveInfinity));
+ break;
+ case FCVTPS_xs:
+ set_xreg(dst, FPToInt64(sreg(src), FPPositiveInfinity));
+ break;
+ case FCVTPS_wd:
+ set_wreg(dst, FPToInt32(dreg(src), FPPositiveInfinity));
+ break;
+ case FCVTPS_xd:
+ set_xreg(dst, FPToInt64(dreg(src), FPPositiveInfinity));
+ break;
+ case FCVTPU_ws:
+ set_wreg(dst, FPToUInt32(sreg(src), FPPositiveInfinity));
+ break;
+ case FCVTPU_xs:
+ set_xreg(dst, FPToUInt64(sreg(src), FPPositiveInfinity));
+ break;
+ case FCVTPU_wd:
+ set_wreg(dst, FPToUInt32(dreg(src), FPPositiveInfinity));
+ break;
+ case FCVTPU_xd:
+ set_xreg(dst, FPToUInt64(dreg(src), FPPositiveInfinity));
+ break;
+ case FCVTNS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieEven)); break;
+ case FCVTNS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieEven)); break;
+ case FCVTNS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieEven)); break;
+ case FCVTNS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieEven)); break;
+ case FCVTNU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieEven)); break;
+ case FCVTNU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieEven)); break;
+ case FCVTNU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieEven)); break;
+ case FCVTNU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieEven)); break;
+ case FCVTZS_ws: set_wreg(dst, FPToInt32(sreg(src), FPZero)); break;
+ case FCVTZS_xs: set_xreg(dst, FPToInt64(sreg(src), FPZero)); break;
+ case FCVTZS_wd: set_wreg(dst, FPToInt32(dreg(src), FPZero)); break;
+ case FCVTZS_xd: set_xreg(dst, FPToInt64(dreg(src), FPZero)); break;
+ case FCVTZU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPZero)); break;
+ case FCVTZU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPZero)); break;
+ case FCVTZU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPZero)); break;
+ case FCVTZU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPZero)); break;
+ case FJCVTZS: set_wreg(dst, FPToFixedJS(dreg(src))); break;
+ case FMOV_ws: set_wreg(dst, sreg_bits(src)); break;
+ case FMOV_xd: set_xreg(dst, dreg_bits(src)); break;
+ case FMOV_sw: set_sreg_bits(dst, wreg(src)); break;
+ case FMOV_dx: set_dreg_bits(dst, xreg(src)); break;
+ case FMOV_d1_x:
+ LogicVRegister(vreg(dst)).SetUint(kFormatD, 1, xreg(src));
+ break;
+ case FMOV_x_d1:
+ set_xreg(dst, LogicVRegister(vreg(src)).Uint(kFormatD, 1));
+ break;
+
+ // A 32-bit input can be handled in the same way as a 64-bit input, since
+ // the sign- or zero-extension will not affect the conversion.
+ case SCVTF_dx: set_dreg(dst, FixedToDouble(xreg(src), 0, round)); break;
+ case SCVTF_dw: set_dreg(dst, FixedToDouble(wreg(src), 0, round)); break;
+ case UCVTF_dx: set_dreg(dst, UFixedToDouble(xreg(src), 0, round)); break;
+ case UCVTF_dw: {
+ set_dreg(dst, UFixedToDouble(static_cast<uint32_t>(wreg(src)), 0, round));
+ break;
+ }
+ case SCVTF_sx: set_sreg(dst, FixedToFloat(xreg(src), 0, round)); break;
+ case SCVTF_sw: set_sreg(dst, FixedToFloat(wreg(src), 0, round)); break;
+ case UCVTF_sx: set_sreg(dst, UFixedToFloat(xreg(src), 0, round)); break;
+ case UCVTF_sw: {
+ set_sreg(dst, UFixedToFloat(static_cast<uint32_t>(wreg(src)), 0, round));
+ break;
+ }
+
+ default: VIXL_UNREACHABLE();
+ }
+}
+
+
+void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ unsigned dst = instr->Rd();
+ unsigned src = instr->Rn();
+ int fbits = 64 - instr->FPScale();
+
+ FPRounding round = RMode();
+
+ switch (instr->Mask(FPFixedPointConvertMask)) {
+ // A 32-bit input can be handled in the same way as a 64-bit input, since
+ // the sign- or zero-extension will not affect the conversion.
+ case SCVTF_dx_fixed:
+ set_dreg(dst, FixedToDouble(xreg(src), fbits, round));
+ break;
+ case SCVTF_dw_fixed:
+ set_dreg(dst, FixedToDouble(wreg(src), fbits, round));
+ break;
+ case UCVTF_dx_fixed:
+ set_dreg(dst, UFixedToDouble(xreg(src), fbits, round));
+ break;
+ case UCVTF_dw_fixed: {
+ set_dreg(dst,
+ UFixedToDouble(static_cast<uint32_t>(wreg(src)), fbits, round));
+ break;
+ }
+ case SCVTF_sx_fixed:
+ set_sreg(dst, FixedToFloat(xreg(src), fbits, round));
+ break;
+ case SCVTF_sw_fixed:
+ set_sreg(dst, FixedToFloat(wreg(src), fbits, round));
+ break;
+ case UCVTF_sx_fixed:
+ set_sreg(dst, UFixedToFloat(xreg(src), fbits, round));
+ break;
+ case UCVTF_sw_fixed: {
+ set_sreg(dst,
+ UFixedToFloat(static_cast<uint32_t>(wreg(src)), fbits, round));
+ break;
+ }
+ case FCVTZS_xd_fixed:
+ set_xreg(dst, FPToInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
+ break;
+ case FCVTZS_wd_fixed:
+ set_wreg(dst, FPToInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
+ break;
+ case FCVTZU_xd_fixed:
+ set_xreg(dst, FPToUInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
+ break;
+ case FCVTZU_wd_fixed:
+ set_wreg(dst, FPToUInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
+ break;
+ case FCVTZS_xs_fixed:
+ set_xreg(dst, FPToInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
+ break;
+ case FCVTZS_ws_fixed:
+ set_wreg(dst, FPToInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
+ break;
+ case FCVTZU_xs_fixed:
+ set_xreg(dst, FPToUInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
+ break;
+ case FCVTZU_ws_fixed:
+ set_wreg(dst, FPToUInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
+ break;
+ default: VIXL_UNREACHABLE();
+ }
+}
+
+
+void Simulator::VisitFPCompare(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ FPTrapFlags trap = DisableTrap;
+ switch (instr->Mask(FPCompareMask)) {
+ case FCMPE_s: trap = EnableTrap; VIXL_FALLTHROUGH();
+ case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap); break;
+ case FCMPE_d: trap = EnableTrap; VIXL_FALLTHROUGH();
+ case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap); break;
+ case FCMPE_s_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
+ case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f, trap); break;
+ case FCMPE_d_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
+ case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0, trap); break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ FPTrapFlags trap = DisableTrap;
+ switch (instr->Mask(FPConditionalCompareMask)) {
+ case FCCMPE_s: trap = EnableTrap;
+ VIXL_FALLTHROUGH();
+ case FCCMP_s:
+ if (ConditionPassed(instr->Condition())) {
+ FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap);
+ } else {
+ nzcv().SetFlags(instr->Nzcv());
+ LogSystemRegister(NZCV);
+ }
+ break;
+ case FCCMPE_d: trap = EnableTrap;
+ VIXL_FALLTHROUGH();
+ case FCCMP_d:
+ if (ConditionPassed(instr->Condition())) {
+ FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap);
+ } else {
+ nzcv().SetFlags(instr->Nzcv());
+ LogSystemRegister(NZCV);
+ }
+ break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ Instr selected;
+ if (ConditionPassed(instr->Condition())) {
+ selected = instr->Rn();
+ } else {
+ selected = instr->Rm();
+ }
+
+ switch (instr->Mask(FPConditionalSelectMask)) {
+ case FCSEL_s: set_sreg(instr->Rd(), sreg(selected)); break;
+ case FCSEL_d: set_dreg(instr->Rd(), dreg(selected)); break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+ VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS;
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ bool inexact_exception = false;
+
+ unsigned fd = instr->Rd();
+ unsigned fn = instr->Rn();
+
+ switch (instr->Mask(FPDataProcessing1SourceMask)) {
+ case FMOV_s: set_sreg(fd, sreg(fn)); return;
+ case FMOV_d: set_dreg(fd, dreg(fn)); return;
+ case FABS_s: fabs_(kFormatS, vreg(fd), vreg(fn)); return;
+ case FABS_d: fabs_(kFormatD, vreg(fd), vreg(fn)); return;
+ case FNEG_s: fneg(kFormatS, vreg(fd), vreg(fn)); return;
+ case FNEG_d: fneg(kFormatD, vreg(fd), vreg(fn)); return;
+ case FCVT_ds:
+ set_dreg(fd, FPToDouble(sreg(fn), ReadDN()));
+ return;
+ case FCVT_sd:
+ set_sreg(fd, FPToFloat(dreg(fn), FPTieEven, ReadDN()));
+ return;
+ case FCVT_hs:
+ set_hreg(fd, Float16ToRawbits(FPToFloat16(sreg(fn), FPTieEven, ReadDN())));
+ return;
+ case FCVT_sh:
+ set_sreg(fd, FPToFloat(RawbitsToFloat16(hreg(fn)), ReadDN()));
+ return;
+ case FCVT_dh:
+ set_dreg(fd, FPToDouble(hreg(fn), ReadDN()));
+ return;
+ case FCVT_hd:
+ set_hreg(fd, Float16ToRawbits(FPToFloat16(dreg(fn), FPTieEven, ReadDN())));
+ return;
+ case FSQRT_s:
+ case FSQRT_d: fsqrt(vform, rd, rn); return;
+ case FRINTI_s:
+ case FRINTI_d: break; // Use FPCR rounding mode.
+ case FRINTX_s:
+ case FRINTX_d: inexact_exception = true; break;
+ case FRINTA_s:
+ case FRINTA_d: fpcr_rounding = FPTieAway; break;
+ case FRINTM_s:
+ case FRINTM_d: fpcr_rounding = FPNegativeInfinity; break;
+ case FRINTN_s:
+ case FRINTN_d: fpcr_rounding = FPTieEven; break;
+ case FRINTP_s:
+ case FRINTP_d: fpcr_rounding = FPPositiveInfinity; break;
+ case FRINTZ_s:
+ case FRINTZ_d: fpcr_rounding = FPZero; break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+
+ // Only FRINT* instructions fall through the switch above.
+ frint(vform, rd, rn, fpcr_rounding, inexact_exception);
+}
+
+
+void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS;
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ SimVRegister& rm = vreg(instr->Rm());
+
+ switch (instr->Mask(FPDataProcessing2SourceMask)) {
+ case FADD_s:
+ case FADD_d: fadd(vform, rd, rn, rm); break;
+ case FSUB_s:
+ case FSUB_d: fsub(vform, rd, rn, rm); break;
+ case FMUL_s:
+ case FMUL_d: fmul(vform, rd, rn, rm); break;
+ case FNMUL_s:
+ case FNMUL_d: fnmul(vform, rd, rn, rm); break;
+ case FDIV_s:
+ case FDIV_d: fdiv(vform, rd, rn, rm); break;
+ case FMAX_s:
+ case FMAX_d: fmax(vform, rd, rn, rm); break;
+ case FMIN_s:
+ case FMIN_d: fmin(vform, rd, rn, rm); break;
+ case FMAXNM_s:
+ case FMAXNM_d: fmaxnm(vform, rd, rn, rm); break;
+ case FMINNM_s:
+ case FMINNM_d: fminnm(vform, rd, rn, rm); break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+
+void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
+ AssertSupportedFPCR();
+
+ unsigned fd = instr->Rd();
+ unsigned fn = instr->Rn();
+ unsigned fm = instr->Rm();
+ unsigned fa = instr->Ra();
+
+ switch (instr->Mask(FPDataProcessing3SourceMask)) {
+ // fd = fa +/- (fn * fm)
+ case FMADD_s: set_sreg(fd, FPMulAdd(sreg(fa), sreg(fn), sreg(fm))); break;
+ case FMSUB_s: set_sreg(fd, FPMulAdd(sreg(fa), -sreg(fn), sreg(fm))); break;
+ case FMADD_d: set_dreg(fd, FPMulAdd(dreg(fa), dreg(fn), dreg(fm))); break;
+ case FMSUB_d: set_dreg(fd, FPMulAdd(dreg(fa), -dreg(fn), dreg(fm))); break;
+ // Negated variants of the above.
+ case FNMADD_s:
+ set_sreg(fd, FPMulAdd(-sreg(fa), -sreg(fn), sreg(fm)));
+ break;
+ case FNMSUB_s:
+ set_sreg(fd, FPMulAdd(-sreg(fa), sreg(fn), sreg(fm)));
+ break;
+ case FNMADD_d:
+ set_dreg(fd, FPMulAdd(-dreg(fa), -dreg(fn), dreg(fm)));
+ break;
+ case FNMSUB_d:
+ set_dreg(fd, FPMulAdd(-dreg(fa), dreg(fn), dreg(fm)));
+ break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+bool Simulator::FPProcessNaNs(const Instruction* instr) {
+ unsigned fd = instr->Rd();
+ unsigned fn = instr->Rn();
+ unsigned fm = instr->Rm();
+ bool done = false;
+
+ if (instr->Mask(FP64) == FP64) {
+ double result = FPProcessNaNs(dreg(fn), dreg(fm));
+ if (std::isnan(result)) {
+ set_dreg(fd, result);
+ done = true;
+ }
+ } else {
+ float result = FPProcessNaNs(sreg(fn), sreg(fm));
+ if (std::isnan(result)) {
+ set_sreg(fd, result);
+ done = true;
+ }
+ }
+
+ return done;
+}
+
+
+void Simulator::SysOp_W(int op, int64_t val) {
+ switch (op) {
+ case IVAU:
+ case CVAC:
+ case CVAU:
+ case CIVAC: {
+ // Perform a dummy memory access to ensure that we have read access
+ // to the specified address.
+ volatile uint8_t y = Read<uint8_t>(val);
+ USE(y);
+ // TODO: Implement "case ZVA:".
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitSystem(const Instruction* instr) {
+ // Some system instructions hijack their Op and Cp fields to represent a
+ // range of immediates instead of indicating a different instruction. This
+ // makes the decoding tricky.
+ if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
+ VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX);
+ switch (instr->Mask(SystemExclusiveMonitorMask)) {
+ case CLREX: {
+ PrintExclusiveAccessWarning();
+ ClearLocalMonitor();
+ break;
+ }
+ }
+ } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
+ switch (instr->Mask(SystemSysRegMask)) {
+ case MRS: {
+ switch (instr->ImmSystemRegister()) {
+ case NZCV: set_xreg(instr->Rt(), nzcv().RawValue()); break;
+ case FPCR: set_xreg(instr->Rt(), fpcr().RawValue()); break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ break;
+ }
+ case MSR: {
+ switch (instr->ImmSystemRegister()) {
+ case NZCV:
+ nzcv().SetRawValue(wreg(instr->Rt()));
+ LogSystemRegister(NZCV);
+ break;
+ case FPCR:
+ fpcr().SetRawValue(wreg(instr->Rt()));
+ LogSystemRegister(FPCR);
+ break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ break;
+ }
+ }
+ } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
+ VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
+ switch (instr->ImmHint()) {
+ case NOP: break;
+ case CSDB: break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
+ js::jit::AtomicOperations::fenceSeqCst();
+ } else if ((instr->Mask(SystemSysFMask) == SystemSysFixed)) {
+ switch (instr->Mask(SystemSysMask)) {
+ case SYS: SysOp_W(instr->SysOp(), xreg(instr->Rt())); break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
+ VisitUnimplemented(instr);
+}
+
+
+void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
+ VisitUnimplemented(instr);
+}
+
+
+void Simulator::VisitCryptoAES(const Instruction* instr) {
+ VisitUnimplemented(instr);
+}
+
+
+void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ static const NEONFormatMap map_lp = {
+ {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}
+ };
+ VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
+
+ static const NEONFormatMap map_fcvtl = {
+ {22}, {NF_4S, NF_2D}
+ };
+ VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
+
+ static const NEONFormatMap map_fcvtn = {
+ {22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S}
+ };
+ VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+
+ if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
+ // These instructions all use a two bit size field, except NOT and RBIT,
+ // which use the field to encode the operation.
+ switch (instr->Mask(NEON2RegMiscMask)) {
+ case NEON_REV64: rev64(vf, rd, rn); break;
+ case NEON_REV32: rev32(vf, rd, rn); break;
+ case NEON_REV16: rev16(vf, rd, rn); break;
+ case NEON_SUQADD: suqadd(vf, rd, rn); break;
+ case NEON_USQADD: usqadd(vf, rd, rn); break;
+ case NEON_CLS: cls(vf, rd, rn); break;
+ case NEON_CLZ: clz(vf, rd, rn); break;
+ case NEON_CNT: cnt(vf, rd, rn); break;
+ case NEON_SQABS: abs(vf, rd, rn).SignedSaturate(vf); break;
+ case NEON_SQNEG: neg(vf, rd, rn).SignedSaturate(vf); break;
+ case NEON_CMGT_zero: cmp(vf, rd, rn, 0, gt); break;
+ case NEON_CMGE_zero: cmp(vf, rd, rn, 0, ge); break;
+ case NEON_CMEQ_zero: cmp(vf, rd, rn, 0, eq); break;
+ case NEON_CMLE_zero: cmp(vf, rd, rn, 0, le); break;
+ case NEON_CMLT_zero: cmp(vf, rd, rn, 0, lt); break;
+ case NEON_ABS: abs(vf, rd, rn); break;
+ case NEON_NEG: neg(vf, rd, rn); break;
+ case NEON_SADDLP: saddlp(vf_lp, rd, rn); break;
+ case NEON_UADDLP: uaddlp(vf_lp, rd, rn); break;
+ case NEON_SADALP: sadalp(vf_lp, rd, rn); break;
+ case NEON_UADALP: uadalp(vf_lp, rd, rn); break;
+ case NEON_RBIT_NOT:
+ vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
+ switch (instr->FPType()) {
+ case 0: not_(vf, rd, rn); break;
+ case 1: rbit(vf, rd, rn);; break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ break;
+ }
+ } else {
+ VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
+ FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+ bool inexact_exception = false;
+
+ // These instructions all use a one bit size field, except XTN, SQXTUN,
+ // SHLL, SQXTN and UQXTN, which use a two bit size field.
+ switch (instr->Mask(NEON2RegMiscFPMask)) {
+ case NEON_FABS: fabs_(fpf, rd, rn); return;
+ case NEON_FNEG: fneg(fpf, rd, rn); return;
+ case NEON_FSQRT: fsqrt(fpf, rd, rn); return;
+ case NEON_FCVTL:
+ if (instr->Mask(NEON_Q)) {
+ fcvtl2(vf_fcvtl, rd, rn);
+ } else {
+ fcvtl(vf_fcvtl, rd, rn);
+ }
+ return;
+ case NEON_FCVTN:
+ if (instr->Mask(NEON_Q)) {
+ fcvtn2(vf_fcvtn, rd, rn);
+ } else {
+ fcvtn(vf_fcvtn, rd, rn);
+ }
+ return;
+ case NEON_FCVTXN:
+ if (instr->Mask(NEON_Q)) {
+ fcvtxn2(vf_fcvtn, rd, rn);
+ } else {
+ fcvtxn(vf_fcvtn, rd, rn);
+ }
+ return;
+
+ // The following instructions break from the switch statement, rather
+ // than return.
+ case NEON_FRINTI: break; // Use FPCR rounding mode.
+ case NEON_FRINTX: inexact_exception = true; break;
+ case NEON_FRINTA: fpcr_rounding = FPTieAway; break;
+ case NEON_FRINTM: fpcr_rounding = FPNegativeInfinity; break;
+ case NEON_FRINTN: fpcr_rounding = FPTieEven; break;
+ case NEON_FRINTP: fpcr_rounding = FPPositiveInfinity; break;
+ case NEON_FRINTZ: fpcr_rounding = FPZero; break;
+
+ case NEON_FCVTNS: fcvts(fpf, rd, rn, FPTieEven); return;
+ case NEON_FCVTNU: fcvtu(fpf, rd, rn, FPTieEven); return;
+ case NEON_FCVTPS: fcvts(fpf, rd, rn, FPPositiveInfinity); return;
+ case NEON_FCVTPU: fcvtu(fpf, rd, rn, FPPositiveInfinity); return;
+ case NEON_FCVTMS: fcvts(fpf, rd, rn, FPNegativeInfinity); return;
+ case NEON_FCVTMU: fcvtu(fpf, rd, rn, FPNegativeInfinity); return;
+ case NEON_FCVTZS: fcvts(fpf, rd, rn, FPZero); return;
+ case NEON_FCVTZU: fcvtu(fpf, rd, rn, FPZero); return;
+ case NEON_FCVTAS: fcvts(fpf, rd, rn, FPTieAway); return;
+ case NEON_FCVTAU: fcvtu(fpf, rd, rn, FPTieAway); return;
+ case NEON_SCVTF: scvtf(fpf, rd, rn, 0, fpcr_rounding); return;
+ case NEON_UCVTF: ucvtf(fpf, rd, rn, 0, fpcr_rounding); return;
+ case NEON_URSQRTE: ursqrte(fpf, rd, rn); return;
+ case NEON_URECPE: urecpe(fpf, rd, rn); return;
+ case NEON_FRSQRTE: frsqrte(fpf, rd, rn); return;
+ case NEON_FRECPE: frecpe(fpf, rd, rn, fpcr_rounding); return;
+ case NEON_FCMGT_zero: fcmp_zero(fpf, rd, rn, gt); return;
+ case NEON_FCMGE_zero: fcmp_zero(fpf, rd, rn, ge); return;
+ case NEON_FCMEQ_zero: fcmp_zero(fpf, rd, rn, eq); return;
+ case NEON_FCMLE_zero: fcmp_zero(fpf, rd, rn, le); return;
+ case NEON_FCMLT_zero: fcmp_zero(fpf, rd, rn, lt); return;
+ default:
+ if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
+ (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
+ switch (instr->Mask(NEON2RegMiscMask)) {
+ case NEON_XTN: xtn(vf, rd, rn); return;
+ case NEON_SQXTN: sqxtn(vf, rd, rn); return;
+ case NEON_UQXTN: uqxtn(vf, rd, rn); return;
+ case NEON_SQXTUN: sqxtun(vf, rd, rn); return;
+ case NEON_SHLL:
+ vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
+ if (instr->Mask(NEON_Q)) {
+ shll2(vf, rd, rn);
+ } else {
+ shll(vf, rd, rn);
+ }
+ return;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+
+ // Only FRINT* instructions fall through the switch above.
+ frint(fpf, rd, rn, fpcr_rounding, inexact_exception);
+ }
+}
+
+
+void Simulator::VisitNEON3Same(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ SimVRegister& rm = vreg(instr->Rm());
+
+ if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
+ VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
+ switch (instr->Mask(NEON3SameLogicalMask)) {
+ case NEON_AND: and_(vf, rd, rn, rm); break;
+ case NEON_ORR: orr(vf, rd, rn, rm); break;
+ case NEON_ORN: orn(vf, rd, rn, rm); break;
+ case NEON_EOR: eor(vf, rd, rn, rm); break;
+ case NEON_BIC: bic(vf, rd, rn, rm); break;
+ case NEON_BIF: bif(vf, rd, rn, rm); break;
+ case NEON_BIT: bit(vf, rd, rn, rm); break;
+ case NEON_BSL: bsl(vf, rd, rn, rm); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
+ VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
+ switch (instr->Mask(NEON3SameFPMask)) {
+ case NEON_FADD: fadd(vf, rd, rn, rm); break;
+ case NEON_FSUB: fsub(vf, rd, rn, rm); break;
+ case NEON_FMUL: fmul(vf, rd, rn, rm); break;
+ case NEON_FDIV: fdiv(vf, rd, rn, rm); break;
+ case NEON_FMAX: fmax(vf, rd, rn, rm); break;
+ case NEON_FMIN: fmin(vf, rd, rn, rm); break;
+ case NEON_FMAXNM: fmaxnm(vf, rd, rn, rm); break;
+ case NEON_FMINNM: fminnm(vf, rd, rn, rm); break;
+ case NEON_FMLA: fmla(vf, rd, rn, rm); break;
+ case NEON_FMLS: fmls(vf, rd, rn, rm); break;
+ case NEON_FMULX: fmulx(vf, rd, rn, rm); break;
+ case NEON_FACGE: fabscmp(vf, rd, rn, rm, ge); break;
+ case NEON_FACGT: fabscmp(vf, rd, rn, rm, gt); break;
+ case NEON_FCMEQ: fcmp(vf, rd, rn, rm, eq); break;
+ case NEON_FCMGE: fcmp(vf, rd, rn, rm, ge); break;
+ case NEON_FCMGT: fcmp(vf, rd, rn, rm, gt); break;
+ case NEON_FRECPS: frecps(vf, rd, rn, rm); break;
+ case NEON_FRSQRTS: frsqrts(vf, rd, rn, rm); break;
+ case NEON_FABD: fabd(vf, rd, rn, rm); break;
+ case NEON_FADDP: faddp(vf, rd, rn, rm); break;
+ case NEON_FMAXP: fmaxp(vf, rd, rn, rm); break;
+ case NEON_FMAXNMP: fmaxnmp(vf, rd, rn, rm); break;
+ case NEON_FMINP: fminp(vf, rd, rn, rm); break;
+ case NEON_FMINNMP: fminnmp(vf, rd, rn, rm); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ } else {
+ VectorFormat vf = nfd.GetVectorFormat();
+ switch (instr->Mask(NEON3SameMask)) {
+ case NEON_ADD: add(vf, rd, rn, rm); break;
+ case NEON_ADDP: addp(vf, rd, rn, rm); break;
+ case NEON_CMEQ: cmp(vf, rd, rn, rm, eq); break;
+ case NEON_CMGE: cmp(vf, rd, rn, rm, ge); break;
+ case NEON_CMGT: cmp(vf, rd, rn, rm, gt); break;
+ case NEON_CMHI: cmp(vf, rd, rn, rm, hi); break;
+ case NEON_CMHS: cmp(vf, rd, rn, rm, hs); break;
+ case NEON_CMTST: cmptst(vf, rd, rn, rm); break;
+ case NEON_MLS: mls(vf, rd, rn, rm); break;
+ case NEON_MLA: mla(vf, rd, rn, rm); break;
+ case NEON_MUL: mul(vf, rd, rn, rm); break;
+ case NEON_PMUL: pmul(vf, rd, rn, rm); break;
+ case NEON_SMAX: smax(vf, rd, rn, rm); break;
+ case NEON_SMAXP: smaxp(vf, rd, rn, rm); break;
+ case NEON_SMIN: smin(vf, rd, rn, rm); break;
+ case NEON_SMINP: sminp(vf, rd, rn, rm); break;
+ case NEON_SUB: sub(vf, rd, rn, rm); break;
+ case NEON_UMAX: umax(vf, rd, rn, rm); break;
+ case NEON_UMAXP: umaxp(vf, rd, rn, rm); break;
+ case NEON_UMIN: umin(vf, rd, rn, rm); break;
+ case NEON_UMINP: uminp(vf, rd, rn, rm); break;
+ case NEON_SSHL: sshl(vf, rd, rn, rm); break;
+ case NEON_USHL: ushl(vf, rd, rn, rm); break;
+ case NEON_SABD: absdiff(vf, rd, rn, rm, true); break;
+ case NEON_UABD: absdiff(vf, rd, rn, rm, false); break;
+ case NEON_SABA: saba(vf, rd, rn, rm); break;
+ case NEON_UABA: uaba(vf, rd, rn, rm); break;
+ case NEON_UQADD: add(vf, rd, rn, rm).UnsignedSaturate(vf); break;
+ case NEON_SQADD: add(vf, rd, rn, rm).SignedSaturate(vf); break;
+ case NEON_UQSUB: sub(vf, rd, rn, rm).UnsignedSaturate(vf); break;
+ case NEON_SQSUB: sub(vf, rd, rn, rm).SignedSaturate(vf); break;
+ case NEON_SQDMULH: sqdmulh(vf, rd, rn, rm); break;
+ case NEON_SQRDMULH: sqrdmulh(vf, rd, rn, rm); break;
+ case NEON_UQSHL: ushl(vf, rd, rn, rm).UnsignedSaturate(vf); break;
+ case NEON_SQSHL: sshl(vf, rd, rn, rm).SignedSaturate(vf); break;
+ case NEON_URSHL: ushl(vf, rd, rn, rm).Round(vf); break;
+ case NEON_SRSHL: sshl(vf, rd, rn, rm).Round(vf); break;
+ case NEON_UQRSHL:
+ ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
+ break;
+ case NEON_SQRSHL:
+ sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
+ break;
+ case NEON_UHADD:
+ add(vf, rd, rn, rm).Uhalve(vf);
+ break;
+ case NEON_URHADD:
+ add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
+ break;
+ case NEON_SHADD:
+ add(vf, rd, rn, rm).Halve(vf);
+ break;
+ case NEON_SRHADD:
+ add(vf, rd, rn, rm).Halve(vf).Round(vf);
+ break;
+ case NEON_UHSUB:
+ sub(vf, rd, rn, rm).Uhalve(vf);
+ break;
+ case NEON_SHSUB:
+ sub(vf, rd, rn, rm).Halve(vf);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+}
+
+
+void Simulator::VisitNEON3Different(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+ VectorFormat vf = nfd.GetVectorFormat();
+ VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ SimVRegister& rm = vreg(instr->Rm());
+
+ switch (instr->Mask(NEON3DifferentMask)) {
+ case NEON_PMULL: pmull(vf_l, rd, rn, rm); break;
+ case NEON_PMULL2: pmull2(vf_l, rd, rn, rm); break;
+ case NEON_UADDL: uaddl(vf_l, rd, rn, rm); break;
+ case NEON_UADDL2: uaddl2(vf_l, rd, rn, rm); break;
+ case NEON_SADDL: saddl(vf_l, rd, rn, rm); break;
+ case NEON_SADDL2: saddl2(vf_l, rd, rn, rm); break;
+ case NEON_USUBL: usubl(vf_l, rd, rn, rm); break;
+ case NEON_USUBL2: usubl2(vf_l, rd, rn, rm); break;
+ case NEON_SSUBL: ssubl(vf_l, rd, rn, rm); break;
+ case NEON_SSUBL2: ssubl2(vf_l, rd, rn, rm); break;
+ case NEON_SABAL: sabal(vf_l, rd, rn, rm); break;
+ case NEON_SABAL2: sabal2(vf_l, rd, rn, rm); break;
+ case NEON_UABAL: uabal(vf_l, rd, rn, rm); break;
+ case NEON_UABAL2: uabal2(vf_l, rd, rn, rm); break;
+ case NEON_SABDL: sabdl(vf_l, rd, rn, rm); break;
+ case NEON_SABDL2: sabdl2(vf_l, rd, rn, rm); break;
+ case NEON_UABDL: uabdl(vf_l, rd, rn, rm); break;
+ case NEON_UABDL2: uabdl2(vf_l, rd, rn, rm); break;
+ case NEON_SMLAL: smlal(vf_l, rd, rn, rm); break;
+ case NEON_SMLAL2: smlal2(vf_l, rd, rn, rm); break;
+ case NEON_UMLAL: umlal(vf_l, rd, rn, rm); break;
+ case NEON_UMLAL2: umlal2(vf_l, rd, rn, rm); break;
+ case NEON_SMLSL: smlsl(vf_l, rd, rn, rm); break;
+ case NEON_SMLSL2: smlsl2(vf_l, rd, rn, rm); break;
+ case NEON_UMLSL: umlsl(vf_l, rd, rn, rm); break;
+ case NEON_UMLSL2: umlsl2(vf_l, rd, rn, rm); break;
+ case NEON_SMULL: smull(vf_l, rd, rn, rm); break;
+ case NEON_SMULL2: smull2(vf_l, rd, rn, rm); break;
+ case NEON_UMULL: umull(vf_l, rd, rn, rm); break;
+ case NEON_UMULL2: umull2(vf_l, rd, rn, rm); break;
+ case NEON_SQDMLAL: sqdmlal(vf_l, rd, rn, rm); break;
+ case NEON_SQDMLAL2: sqdmlal2(vf_l, rd, rn, rm); break;
+ case NEON_SQDMLSL: sqdmlsl(vf_l, rd, rn, rm); break;
+ case NEON_SQDMLSL2: sqdmlsl2(vf_l, rd, rn, rm); break;
+ case NEON_SQDMULL: sqdmull(vf_l, rd, rn, rm); break;
+ case NEON_SQDMULL2: sqdmull2(vf_l, rd, rn, rm); break;
+ case NEON_UADDW: uaddw(vf_l, rd, rn, rm); break;
+ case NEON_UADDW2: uaddw2(vf_l, rd, rn, rm); break;
+ case NEON_SADDW: saddw(vf_l, rd, rn, rm); break;
+ case NEON_SADDW2: saddw2(vf_l, rd, rn, rm); break;
+ case NEON_USUBW: usubw(vf_l, rd, rn, rm); break;
+ case NEON_USUBW2: usubw2(vf_l, rd, rn, rm); break;
+ case NEON_SSUBW: ssubw(vf_l, rd, rn, rm); break;
+ case NEON_SSUBW2: ssubw2(vf_l, rd, rn, rm); break;
+ case NEON_ADDHN: addhn(vf, rd, rn, rm); break;
+ case NEON_ADDHN2: addhn2(vf, rd, rn, rm); break;
+ case NEON_RADDHN: raddhn(vf, rd, rn, rm); break;
+ case NEON_RADDHN2: raddhn2(vf, rd, rn, rm); break;
+ case NEON_SUBHN: subhn(vf, rd, rn, rm); break;
+ case NEON_SUBHN2: subhn2(vf, rd, rn, rm); break;
+ case NEON_RSUBHN: rsubhn(vf, rd, rn, rm); break;
+ case NEON_RSUBHN2: rsubhn2(vf, rd, rn, rm); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+
+ // The input operand's VectorFormat is passed for these instructions.
+ if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
+
+ switch (instr->Mask(NEONAcrossLanesFPMask)) {
+ case NEON_FMAXV: fmaxv(vf, rd, rn); break;
+ case NEON_FMINV: fminv(vf, rd, rn); break;
+ case NEON_FMAXNMV: fmaxnmv(vf, rd, rn); break;
+ case NEON_FMINNMV: fminnmv(vf, rd, rn); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ } else {
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ switch (instr->Mask(NEONAcrossLanesMask)) {
+ case NEON_ADDV: addv(vf, rd, rn); break;
+ case NEON_SMAXV: smaxv(vf, rd, rn); break;
+ case NEON_SMINV: sminv(vf, rd, rn); break;
+ case NEON_UMAXV: umaxv(vf, rd, rn); break;
+ case NEON_UMINV: uminv(vf, rd, rn); break;
+ case NEON_SADDLV: saddlv(vf, rd, rn); break;
+ case NEON_UADDLV: uaddlv(vf, rd, rn); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+}
+
+
+void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+ VectorFormat vf_r = nfd.GetVectorFormat();
+ VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+
+ ByElementOp Op = NULL;
+
+ int rm_reg = instr->Rm();
+ int index = (instr->NEONH() << 1) | instr->NEONL();
+ if (instr->NEONSize() == 1) {
+ rm_reg &= 0xf;
+ index = (index << 1) | instr->NEONM();
+ }
+
+ switch (instr->Mask(NEONByIndexedElementMask)) {
+ case NEON_MUL_byelement: Op = &Simulator::mul; vf = vf_r; break;
+ case NEON_MLA_byelement: Op = &Simulator::mla; vf = vf_r; break;
+ case NEON_MLS_byelement: Op = &Simulator::mls; vf = vf_r; break;
+ case NEON_SQDMULH_byelement: Op = &Simulator::sqdmulh; vf = vf_r; break;
+ case NEON_SQRDMULH_byelement: Op = &Simulator::sqrdmulh; vf = vf_r; break;
+ case NEON_SMULL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::smull2;
+ } else {
+ Op = &Simulator::smull;
+ }
+ break;
+ case NEON_UMULL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::umull2;
+ } else {
+ Op = &Simulator::umull;
+ }
+ break;
+ case NEON_SMLAL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::smlal2;
+ } else {
+ Op = &Simulator::smlal;
+ }
+ break;
+ case NEON_UMLAL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::umlal2;
+ } else {
+ Op = &Simulator::umlal;
+ }
+ break;
+ case NEON_SMLSL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::smlsl2;
+ } else {
+ Op = &Simulator::smlsl;
+ }
+ break;
+ case NEON_UMLSL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::umlsl2;
+ } else {
+ Op = &Simulator::umlsl;
+ }
+ break;
+ case NEON_SQDMULL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::sqdmull2;
+ } else {
+ Op = &Simulator::sqdmull;
+ }
+ break;
+ case NEON_SQDMLAL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::sqdmlal2;
+ } else {
+ Op = &Simulator::sqdmlal;
+ }
+ break;
+ case NEON_SQDMLSL_byelement:
+ if (instr->Mask(NEON_Q)) {
+ Op = &Simulator::sqdmlsl2;
+ } else {
+ Op = &Simulator::sqdmlsl;
+ }
+ break;
+ default:
+ index = instr->NEONH();
+ if ((instr->FPType() & 1) == 0) {
+ index = (index << 1) | instr->NEONL();
+ }
+
+ vf = nfd.GetVectorFormat(nfd.FPFormatMap());
+
+ switch (instr->Mask(NEONByIndexedElementFPMask)) {
+ case NEON_FMUL_byelement: Op = &Simulator::fmul; break;
+ case NEON_FMLA_byelement: Op = &Simulator::fmla; break;
+ case NEON_FMLS_byelement: Op = &Simulator::fmls; break;
+ case NEON_FMULX_byelement: Op = &Simulator::fmulx; break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ }
+
+ (this->*Op)(vf, rd, rn, vreg(rm_reg), index);
+}
+
+
+void Simulator::VisitNEONCopy(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ int imm5 = instr->ImmNEON5();
+ int tz = CountTrailingZeros(imm5, 32);
+ int reg_index = imm5 >> (tz + 1);
+
+ if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
+ int imm4 = instr->ImmNEON4();
+ int rn_index = imm4 >> tz;
+ ins_element(vf, rd, reg_index, rn, rn_index);
+ } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
+ ins_immediate(vf, rd, reg_index, xreg(instr->Rn()));
+ } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
+ uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
+ value &= MaxUintFromFormat(vf);
+ set_xreg(instr->Rd(), value);
+ } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
+ int64_t value = LogicVRegister(rn).Int(vf, reg_index);
+ if (instr->NEONQ()) {
+ set_xreg(instr->Rd(), value);
+ } else {
+ set_wreg(instr->Rd(), (int32_t)value);
+ }
+ } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
+ dup_element(vf, rd, rn, reg_index);
+ } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
+ dup_immediate(vf, rd, xreg(instr->Rn()));
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitNEONExtract(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ SimVRegister& rm = vreg(instr->Rm());
+ if (instr->Mask(NEONExtractMask) == NEON_EXT) {
+ int index = instr->ImmNEONExt();
+ ext(vf, rd, rn, rm, index);
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
+ AddrMode addr_mode) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ uint64_t addr_base = xreg(instr->Rn(), Reg31IsStackPointer);
+ int reg_size = RegisterSizeInBytesFromFormat(vf);
+
+ int reg[4];
+ uint64_t addr[4];
+ for (int i = 0; i < 4; i++) {
+ reg[i] = (instr->Rt() + i) % kNumberOfVRegisters;
+ addr[i] = addr_base + (i * reg_size);
+ }
+ int count = 1;
+ bool log_read = true;
+
+ Instr itype = instr->Mask(NEONLoadStoreMultiStructMask);
+ if (((itype == NEON_LD1_1v) || (itype == NEON_LD1_2v) ||
+ (itype == NEON_LD1_3v) || (itype == NEON_LD1_4v) ||
+ (itype == NEON_ST1_1v) || (itype == NEON_ST1_2v) ||
+ (itype == NEON_ST1_3v) || (itype == NEON_ST1_4v)) &&
+ (instr->Bits(20, 16) != 0)) {
+ VIXL_UNREACHABLE();
+ }
+
+ // We use the PostIndex mask here, as it works in this case for both Offset
+ // and PostIndex addressing.
+ switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
+ case NEON_LD1_4v:
+ case NEON_LD1_4v_post: ld1(vf, vreg(reg[3]), addr[3]); count++;
+ VIXL_FALLTHROUGH();
+ case NEON_LD1_3v:
+ case NEON_LD1_3v_post: ld1(vf, vreg(reg[2]), addr[2]); count++;
+ VIXL_FALLTHROUGH();
+ case NEON_LD1_2v:
+ case NEON_LD1_2v_post: ld1(vf, vreg(reg[1]), addr[1]); count++;
+ VIXL_FALLTHROUGH();
+ case NEON_LD1_1v:
+ case NEON_LD1_1v_post:
+ ld1(vf, vreg(reg[0]), addr[0]);
+ log_read = true;
+ break;
+ case NEON_ST1_4v:
+ case NEON_ST1_4v_post: st1(vf, vreg(reg[3]), addr[3]); count++;
+ VIXL_FALLTHROUGH();
+ case NEON_ST1_3v:
+ case NEON_ST1_3v_post: st1(vf, vreg(reg[2]), addr[2]); count++;
+ VIXL_FALLTHROUGH();
+ case NEON_ST1_2v:
+ case NEON_ST1_2v_post: st1(vf, vreg(reg[1]), addr[1]); count++;
+ VIXL_FALLTHROUGH();
+ case NEON_ST1_1v:
+ case NEON_ST1_1v_post:
+ st1(vf, vreg(reg[0]), addr[0]);
+ log_read = false;
+ break;
+ case NEON_LD2_post:
+ case NEON_LD2:
+ ld2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]);
+ count = 2;
+ break;
+ case NEON_ST2:
+ case NEON_ST2_post:
+ st2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]);
+ count = 2;
+ break;
+ case NEON_LD3_post:
+ case NEON_LD3:
+ ld3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]);
+ count = 3;
+ break;
+ case NEON_ST3:
+ case NEON_ST3_post:
+ st3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]);
+ count = 3;
+ break;
+ case NEON_ST4:
+ case NEON_ST4_post:
+ st4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]),
+ addr[0]);
+ count = 4;
+ break;
+ case NEON_LD4_post:
+ case NEON_LD4:
+ ld4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]),
+ addr[0]);
+ count = 4;
+ break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+
+ // Explicitly log the register update whilst we have type information.
+ for (int i = 0; i < count; i++) {
+ // For de-interleaving loads, only print the base address.
+ int lane_size = LaneSizeInBytesFromFormat(vf);
+ PrintRegisterFormat format = GetPrintRegisterFormatTryFP(
+ GetPrintRegisterFormatForSize(reg_size, lane_size));
+ if (log_read) {
+ LogVRead(addr_base, reg[i], format);
+ } else {
+ LogVWrite(addr_base, reg[i], format);
+ }
+ }
+
+ if (addr_mode == PostIndex) {
+ int rm = instr->Rm();
+ // The immediate post index addressing mode is indicated by rm = 31.
+ // The immediate is implied by the number of vector registers used.
+ addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count
+ : xreg(rm);
+ set_xreg(instr->Rn(), addr_base);
+ } else {
+ VIXL_ASSERT(addr_mode == Offset);
+ }
+}
+
+
+void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
+ NEONLoadStoreMultiStructHelper(instr, Offset);
+}
+
+
+void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
+ const Instruction* instr) {
+ NEONLoadStoreMultiStructHelper(instr, PostIndex);
+}
+
+
+void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
+ AddrMode addr_mode) {
+ uint64_t addr = xreg(instr->Rn(), Reg31IsStackPointer);
+ int rt = instr->Rt();
+
+ Instr itype = instr->Mask(NEONLoadStoreSingleStructMask);
+ if (((itype == NEON_LD1_b) || (itype == NEON_LD1_h) ||
+ (itype == NEON_LD1_s) || (itype == NEON_LD1_d)) &&
+ (instr->Bits(20, 16) != 0)) {
+ VIXL_UNREACHABLE();
+ }
+
+ // We use the PostIndex mask here, as it works in this case for both Offset
+ // and PostIndex addressing.
+ bool do_load = false;
+
+ bool replicating = false;
+
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
+ VectorFormat vf_t = nfd.GetVectorFormat();
+
+ VectorFormat vf = kFormat16B;
+ switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
+ case NEON_LD1_b:
+ case NEON_LD1_b_post:
+ case NEON_LD2_b:
+ case NEON_LD2_b_post:
+ case NEON_LD3_b:
+ case NEON_LD3_b_post:
+ case NEON_LD4_b:
+ case NEON_LD4_b_post: do_load = true;
+ VIXL_FALLTHROUGH();
+ case NEON_ST1_b:
+ case NEON_ST1_b_post:
+ case NEON_ST2_b:
+ case NEON_ST2_b_post:
+ case NEON_ST3_b:
+ case NEON_ST3_b_post:
+ case NEON_ST4_b:
+ case NEON_ST4_b_post: break;
+
+ case NEON_LD1_h:
+ case NEON_LD1_h_post:
+ case NEON_LD2_h:
+ case NEON_LD2_h_post:
+ case NEON_LD3_h:
+ case NEON_LD3_h_post:
+ case NEON_LD4_h:
+ case NEON_LD4_h_post: do_load = true;
+ VIXL_FALLTHROUGH();
+ case NEON_ST1_h:
+ case NEON_ST1_h_post:
+ case NEON_ST2_h:
+ case NEON_ST2_h_post:
+ case NEON_ST3_h:
+ case NEON_ST3_h_post:
+ case NEON_ST4_h:
+ case NEON_ST4_h_post: vf = kFormat8H; break;
+ case NEON_LD1_s:
+ case NEON_LD1_s_post:
+ case NEON_LD2_s:
+ case NEON_LD2_s_post:
+ case NEON_LD3_s:
+ case NEON_LD3_s_post:
+ case NEON_LD4_s:
+ case NEON_LD4_s_post: do_load = true;
+ VIXL_FALLTHROUGH();
+ case NEON_ST1_s:
+ case NEON_ST1_s_post:
+ case NEON_ST2_s:
+ case NEON_ST2_s_post:
+ case NEON_ST3_s:
+ case NEON_ST3_s_post:
+ case NEON_ST4_s:
+ case NEON_ST4_s_post: {
+ VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
+ VIXL_STATIC_ASSERT(
+ (NEON_LD1_s_post | (1 << NEONLSSize_offset)) == NEON_LD1_d_post);
+ VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
+ VIXL_STATIC_ASSERT(
+ (NEON_ST1_s_post | (1 << NEONLSSize_offset)) == NEON_ST1_d_post);
+ vf = ((instr->NEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
+ break;
+ }
+
+ case NEON_LD1R:
+ case NEON_LD1R_post:
+ case NEON_LD2R:
+ case NEON_LD2R_post:
+ case NEON_LD3R:
+ case NEON_LD3R_post:
+ case NEON_LD4R:
+ case NEON_LD4R_post: {
+ vf = vf_t;
+ do_load = true;
+ replicating = true;
+ break;
+ }
+ default: VIXL_UNIMPLEMENTED();
+ }
+
+ PrintRegisterFormat print_format =
+ GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
+ // Make sure that the print_format only includes a single lane.
+ print_format =
+ static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask);
+
+ int esize = LaneSizeInBytesFromFormat(vf);
+ int index_shift = LaneSizeInBytesLog2FromFormat(vf);
+ int lane = instr->NEONLSIndex(index_shift);
+ int scale = 0;
+ int rt2 = (rt + 1) % kNumberOfVRegisters;
+ int rt3 = (rt2 + 1) % kNumberOfVRegisters;
+ int rt4 = (rt3 + 1) % kNumberOfVRegisters;
+ switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
+ case NEONLoadStoreSingle1:
+ scale = 1;
+ if (do_load) {
+ if (replicating) {
+ ld1r(vf, vreg(rt), addr);
+ } else {
+ ld1(vf, vreg(rt), lane, addr);
+ }
+ LogVRead(addr, rt, print_format, lane);
+ } else {
+ st1(vf, vreg(rt), lane, addr);
+ LogVWrite(addr, rt, print_format, lane);
+ }
+ break;
+ case NEONLoadStoreSingle2:
+ scale = 2;
+ if (do_load) {
+ if (replicating) {
+ ld2r(vf, vreg(rt), vreg(rt2), addr);
+ } else {
+ ld2(vf, vreg(rt), vreg(rt2), lane, addr);
+ }
+ LogVRead(addr, rt, print_format, lane);
+ LogVRead(addr + esize, rt2, print_format, lane);
+ } else {
+ st2(vf, vreg(rt), vreg(rt2), lane, addr);
+ LogVWrite(addr, rt, print_format, lane);
+ LogVWrite(addr + esize, rt2, print_format, lane);
+ }
+ break;
+ case NEONLoadStoreSingle3:
+ scale = 3;
+ if (do_load) {
+ if (replicating) {
+ ld3r(vf, vreg(rt), vreg(rt2), vreg(rt3), addr);
+ } else {
+ ld3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr);
+ }
+ LogVRead(addr, rt, print_format, lane);
+ LogVRead(addr + esize, rt2, print_format, lane);
+ LogVRead(addr + (2 * esize), rt3, print_format, lane);
+ } else {
+ st3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr);
+ LogVWrite(addr, rt, print_format, lane);
+ LogVWrite(addr + esize, rt2, print_format, lane);
+ LogVWrite(addr + (2 * esize), rt3, print_format, lane);
+ }
+ break;
+ case NEONLoadStoreSingle4:
+ scale = 4;
+ if (do_load) {
+ if (replicating) {
+ ld4r(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), addr);
+ } else {
+ ld4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr);
+ }
+ LogVRead(addr, rt, print_format, lane);
+ LogVRead(addr + esize, rt2, print_format, lane);
+ LogVRead(addr + (2 * esize), rt3, print_format, lane);
+ LogVRead(addr + (3 * esize), rt4, print_format, lane);
+ } else {
+ st4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr);
+ LogVWrite(addr, rt, print_format, lane);
+ LogVWrite(addr + esize, rt2, print_format, lane);
+ LogVWrite(addr + (2 * esize), rt3, print_format, lane);
+ LogVWrite(addr + (3 * esize), rt4, print_format, lane);
+ }
+ break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+
+ if (addr_mode == PostIndex) {
+ int rm = instr->Rm();
+ int lane_size = LaneSizeInBytesFromFormat(vf);
+ set_xreg(instr->Rn(), addr + ((rm == 31) ? (scale * lane_size) : xreg(rm)));
+ }
+}
+
+
+void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
+ NEONLoadStoreSingleStructHelper(instr, Offset);
+}
+
+
+void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
+ const Instruction* instr) {
+ NEONLoadStoreSingleStructHelper(instr, PostIndex);
+}
+
+
+void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
+ SimVRegister& rd = vreg(instr->Rd());
+ int cmode = instr->NEONCmode();
+ int cmode_3_1 = (cmode >> 1) & 7;
+ int cmode_3 = (cmode >> 3) & 1;
+ int cmode_2 = (cmode >> 2) & 1;
+ int cmode_1 = (cmode >> 1) & 1;
+ int cmode_0 = cmode & 1;
+ int q = instr->NEONQ();
+ int op_bit = instr->NEONModImmOp();
+ uint64_t imm8 = instr->ImmNEONabcdefgh();
+
+ // Find the format and immediate value
+ uint64_t imm = 0;
+ VectorFormat vform = kFormatUndefined;
+ switch (cmode_3_1) {
+ case 0x0:
+ case 0x1:
+ case 0x2:
+ case 0x3:
+ vform = (q == 1) ? kFormat4S : kFormat2S;
+ imm = imm8 << (8 * cmode_3_1);
+ break;
+ case 0x4:
+ case 0x5:
+ vform = (q == 1) ? kFormat8H : kFormat4H;
+ imm = imm8 << (8 * cmode_1);
+ break;
+ case 0x6:
+ vform = (q == 1) ? kFormat4S : kFormat2S;
+ if (cmode_0 == 0) {
+ imm = imm8 << 8 | 0x000000ff;
+ } else {
+ imm = imm8 << 16 | 0x0000ffff;
+ }
+ break;
+ case 0x7:
+ if (cmode_0 == 0 && op_bit == 0) {
+ vform = q ? kFormat16B : kFormat8B;
+ imm = imm8;
+ } else if (cmode_0 == 0 && op_bit == 1) {
+ vform = q ? kFormat2D : kFormat1D;
+ imm = 0;
+ for (int i = 0; i < 8; ++i) {
+ if (imm8 & (1ULL << i)) {
+ imm |= (UINT64_C(0xff) << (8 * i));
+ }
+ }
+ } else { // cmode_0 == 1, cmode == 0xf.
+ if (op_bit == 0) {
+ vform = q ? kFormat4S : kFormat2S;
+ imm = FloatToRawbits(instr->ImmNEONFP32());
+ } else if (q == 1) {
+ vform = kFormat2D;
+ imm = DoubleToRawbits(instr->ImmNEONFP64());
+ } else {
+ VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
+ VisitUnallocated(instr);
+ }
+ }
+ break;
+ default: VIXL_UNREACHABLE(); break;
+ }
+
+ // Find the operation
+ NEONModifiedImmediateOp op;
+ if (cmode_3 == 0) {
+ if (cmode_0 == 0) {
+ op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
+ } else { // cmode<0> == '1'
+ op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
+ }
+ } else { // cmode<3> == '1'
+ if (cmode_2 == 0) {
+ if (cmode_0 == 0) {
+ op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
+ } else { // cmode<0> == '1'
+ op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
+ }
+ } else { // cmode<2> == '1'
+ if (cmode_1 == 0) {
+ op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
+ } else { // cmode<1> == '1'
+ if (cmode_0 == 0) {
+ op = NEONModifiedImmediate_MOVI;
+ } else { // cmode<0> == '1'
+ op = NEONModifiedImmediate_MOVI;
+ }
+ }
+ }
+ }
+
+ // Call the logic function
+ if (op == NEONModifiedImmediate_ORR) {
+ orr(vform, rd, rd, imm);
+ } else if (op == NEONModifiedImmediate_BIC) {
+ bic(vform, rd, rd, imm);
+ } else if (op == NEONModifiedImmediate_MOVI) {
+ movi(vform, rd, imm);
+ } else if (op == NEONModifiedImmediate_MVNI) {
+ mvni(vform, rd, imm);
+ } else {
+ VisitUnimplemented(instr);
+ }
+}
+
+
+void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+
+ if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
+ // These instructions all use a two bit size field, except NOT and RBIT,
+ // which use the field to encode the operation.
+ switch (instr->Mask(NEONScalar2RegMiscMask)) {
+ case NEON_CMEQ_zero_scalar: cmp(vf, rd, rn, 0, eq); break;
+ case NEON_CMGE_zero_scalar: cmp(vf, rd, rn, 0, ge); break;
+ case NEON_CMGT_zero_scalar: cmp(vf, rd, rn, 0, gt); break;
+ case NEON_CMLT_zero_scalar: cmp(vf, rd, rn, 0, lt); break;
+ case NEON_CMLE_zero_scalar: cmp(vf, rd, rn, 0, le); break;
+ case NEON_ABS_scalar: abs(vf, rd, rn); break;
+ case NEON_SQABS_scalar: abs(vf, rd, rn).SignedSaturate(vf); break;
+ case NEON_NEG_scalar: neg(vf, rd, rn); break;
+ case NEON_SQNEG_scalar: neg(vf, rd, rn).SignedSaturate(vf); break;
+ case NEON_SUQADD_scalar: suqadd(vf, rd, rn); break;
+ case NEON_USQADD_scalar: usqadd(vf, rd, rn); break;
+ default: VIXL_UNIMPLEMENTED(); break;
+ }
+ } else {
+ VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
+ FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+
+ // These instructions all use a one bit size field, except SQXTUN, SQXTN
+ // and UQXTN, which use a two bit size field.
+ switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
+ case NEON_FRECPE_scalar: frecpe(fpf, rd, rn, fpcr_rounding); break;
+ case NEON_FRECPX_scalar: frecpx(fpf, rd, rn); break;
+ case NEON_FRSQRTE_scalar: frsqrte(fpf, rd, rn); break;
+ case NEON_FCMGT_zero_scalar: fcmp_zero(fpf, rd, rn, gt); break;
+ case NEON_FCMGE_zero_scalar: fcmp_zero(fpf, rd, rn, ge); break;
+ case NEON_FCMEQ_zero_scalar: fcmp_zero(fpf, rd, rn, eq); break;
+ case NEON_FCMLE_zero_scalar: fcmp_zero(fpf, rd, rn, le); break;
+ case NEON_FCMLT_zero_scalar: fcmp_zero(fpf, rd, rn, lt); break;
+ case NEON_SCVTF_scalar: scvtf(fpf, rd, rn, 0, fpcr_rounding); break;
+ case NEON_UCVTF_scalar: ucvtf(fpf, rd, rn, 0, fpcr_rounding); break;
+ case NEON_FCVTNS_scalar: fcvts(fpf, rd, rn, FPTieEven); break;
+ case NEON_FCVTNU_scalar: fcvtu(fpf, rd, rn, FPTieEven); break;
+ case NEON_FCVTPS_scalar: fcvts(fpf, rd, rn, FPPositiveInfinity); break;
+ case NEON_FCVTPU_scalar: fcvtu(fpf, rd, rn, FPPositiveInfinity); break;
+ case NEON_FCVTMS_scalar: fcvts(fpf, rd, rn, FPNegativeInfinity); break;
+ case NEON_FCVTMU_scalar: fcvtu(fpf, rd, rn, FPNegativeInfinity); break;
+ case NEON_FCVTZS_scalar: fcvts(fpf, rd, rn, FPZero); break;
+ case NEON_FCVTZU_scalar: fcvtu(fpf, rd, rn, FPZero); break;
+ case NEON_FCVTAS_scalar: fcvts(fpf, rd, rn, FPTieAway); break;
+ case NEON_FCVTAU_scalar: fcvtu(fpf, rd, rn, FPTieAway); break;
+ case NEON_FCVTXN_scalar:
+ // Unlike all of the other FP instructions above, fcvtxn encodes dest
+ // size S as size<0>=1. There's only one case, so we ignore the form.
+ VIXL_ASSERT(instr->Bit(22) == 1);
+ fcvtxn(kFormatS, rd, rn);
+ break;
+ default:
+ switch (instr->Mask(NEONScalar2RegMiscMask)) {
+ case NEON_SQXTN_scalar: sqxtn(vf, rd, rn); break;
+ case NEON_UQXTN_scalar: uqxtn(vf, rd, rn); break;
+ case NEON_SQXTUN_scalar: sqxtun(vf, rd, rn); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ }
+}
+
+
+void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ SimVRegister& rm = vreg(instr->Rm());
+ switch (instr->Mask(NEONScalar3DiffMask)) {
+ case NEON_SQDMLAL_scalar: sqdmlal(vf, rd, rn, rm); break;
+ case NEON_SQDMLSL_scalar: sqdmlsl(vf, rd, rn, rm); break;
+ case NEON_SQDMULL_scalar: sqdmull(vf, rd, rn, rm); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ SimVRegister& rm = vreg(instr->Rm());
+
+ if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
+ vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
+ switch (instr->Mask(NEONScalar3SameFPMask)) {
+ case NEON_FMULX_scalar: fmulx(vf, rd, rn, rm); break;
+ case NEON_FACGE_scalar: fabscmp(vf, rd, rn, rm, ge); break;
+ case NEON_FACGT_scalar: fabscmp(vf, rd, rn, rm, gt); break;
+ case NEON_FCMEQ_scalar: fcmp(vf, rd, rn, rm, eq); break;
+ case NEON_FCMGE_scalar: fcmp(vf, rd, rn, rm, ge); break;
+ case NEON_FCMGT_scalar: fcmp(vf, rd, rn, rm, gt); break;
+ case NEON_FRECPS_scalar: frecps(vf, rd, rn, rm); break;
+ case NEON_FRSQRTS_scalar: frsqrts(vf, rd, rn, rm); break;
+ case NEON_FABD_scalar: fabd(vf, rd, rn, rm); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ } else {
+ switch (instr->Mask(NEONScalar3SameMask)) {
+ case NEON_ADD_scalar: add(vf, rd, rn, rm); break;
+ case NEON_SUB_scalar: sub(vf, rd, rn, rm); break;
+ case NEON_CMEQ_scalar: cmp(vf, rd, rn, rm, eq); break;
+ case NEON_CMGE_scalar: cmp(vf, rd, rn, rm, ge); break;
+ case NEON_CMGT_scalar: cmp(vf, rd, rn, rm, gt); break;
+ case NEON_CMHI_scalar: cmp(vf, rd, rn, rm, hi); break;
+ case NEON_CMHS_scalar: cmp(vf, rd, rn, rm, hs); break;
+ case NEON_CMTST_scalar: cmptst(vf, rd, rn, rm); break;
+ case NEON_USHL_scalar: ushl(vf, rd, rn, rm); break;
+ case NEON_SSHL_scalar: sshl(vf, rd, rn, rm); break;
+ case NEON_SQDMULH_scalar: sqdmulh(vf, rd, rn, rm); break;
+ case NEON_SQRDMULH_scalar: sqrdmulh(vf, rd, rn, rm); break;
+ case NEON_UQADD_scalar:
+ add(vf, rd, rn, rm).UnsignedSaturate(vf);
+ break;
+ case NEON_SQADD_scalar:
+ add(vf, rd, rn, rm).SignedSaturate(vf);
+ break;
+ case NEON_UQSUB_scalar:
+ sub(vf, rd, rn, rm).UnsignedSaturate(vf);
+ break;
+ case NEON_SQSUB_scalar:
+ sub(vf, rd, rn, rm).SignedSaturate(vf);
+ break;
+ case NEON_UQSHL_scalar:
+ ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
+ break;
+ case NEON_SQSHL_scalar:
+ sshl(vf, rd, rn, rm).SignedSaturate(vf);
+ break;
+ case NEON_URSHL_scalar:
+ ushl(vf, rd, rn, rm).Round(vf);
+ break;
+ case NEON_SRSHL_scalar:
+ sshl(vf, rd, rn, rm).Round(vf);
+ break;
+ case NEON_UQRSHL_scalar:
+ ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
+ break;
+ case NEON_SQRSHL_scalar:
+ sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+}
+
+
+void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+ VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ ByElementOp Op = NULL;
+
+ int rm_reg = instr->Rm();
+ int index = (instr->NEONH() << 1) | instr->NEONL();
+ if (instr->NEONSize() == 1) {
+ rm_reg &= 0xf;
+ index = (index << 1) | instr->NEONM();
+ }
+
+ switch (instr->Mask(NEONScalarByIndexedElementMask)) {
+ case NEON_SQDMULL_byelement_scalar: Op = &Simulator::sqdmull; break;
+ case NEON_SQDMLAL_byelement_scalar: Op = &Simulator::sqdmlal; break;
+ case NEON_SQDMLSL_byelement_scalar: Op = &Simulator::sqdmlsl; break;
+ case NEON_SQDMULH_byelement_scalar:
+ Op = &Simulator::sqdmulh;
+ vf = vf_r;
+ break;
+ case NEON_SQRDMULH_byelement_scalar:
+ Op = &Simulator::sqrdmulh;
+ vf = vf_r;
+ break;
+ default:
+ vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
+ index = instr->NEONH();
+ if ((instr->FPType() & 1) == 0) {
+ index = (index << 1) | instr->NEONL();
+ }
+ switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
+ case NEON_FMUL_byelement_scalar: Op = &Simulator::fmul; break;
+ case NEON_FMLA_byelement_scalar: Op = &Simulator::fmla; break;
+ case NEON_FMLS_byelement_scalar: Op = &Simulator::fmls; break;
+ case NEON_FMULX_byelement_scalar: Op = &Simulator::fmulx; break;
+ default: VIXL_UNIMPLEMENTED();
+ }
+ }
+
+ (this->*Op)(vf, rd, rn, vreg(rm_reg), index);
+}
+
+
+void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+
+ if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
+ int imm5 = instr->ImmNEON5();
+ int tz = CountTrailingZeros(imm5, 32);
+ int rn_index = imm5 >> (tz + 1);
+ dup_element(vf, rd, rn, rn_index);
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ switch (instr->Mask(NEONScalarPairwiseMask)) {
+ case NEON_ADDP_scalar: addp(vf, rd, rn); break;
+ case NEON_FADDP_scalar: faddp(vf, rd, rn); break;
+ case NEON_FMAXP_scalar: fmaxp(vf, rd, rn); break;
+ case NEON_FMAXNMP_scalar: fmaxnmp(vf, rd, rn); break;
+ case NEON_FMINP_scalar: fminp(vf, rd, rn); break;
+ case NEON_FMINNMP_scalar: fminnmp(vf, rd, rn); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+
+ static const NEONFormatMap map = {
+ {22, 21, 20, 19},
+ {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S,
+ NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D}
+ };
+ NEONFormatDecoder nfd(instr, &map);
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh());
+ int immhimmb = instr->ImmNEONImmhImmb();
+ int right_shift = (16 << highestSetBit) - immhimmb;
+ int left_shift = immhimmb - (8 << highestSetBit);
+ switch (instr->Mask(NEONScalarShiftImmediateMask)) {
+ case NEON_SHL_scalar: shl(vf, rd, rn, left_shift); break;
+ case NEON_SLI_scalar: sli(vf, rd, rn, left_shift); break;
+ case NEON_SQSHL_imm_scalar: sqshl(vf, rd, rn, left_shift); break;
+ case NEON_UQSHL_imm_scalar: uqshl(vf, rd, rn, left_shift); break;
+ case NEON_SQSHLU_scalar: sqshlu(vf, rd, rn, left_shift); break;
+ case NEON_SRI_scalar: sri(vf, rd, rn, right_shift); break;
+ case NEON_SSHR_scalar: sshr(vf, rd, rn, right_shift); break;
+ case NEON_USHR_scalar: ushr(vf, rd, rn, right_shift); break;
+ case NEON_SRSHR_scalar: sshr(vf, rd, rn, right_shift).Round(vf); break;
+ case NEON_URSHR_scalar: ushr(vf, rd, rn, right_shift).Round(vf); break;
+ case NEON_SSRA_scalar: ssra(vf, rd, rn, right_shift); break;
+ case NEON_USRA_scalar: usra(vf, rd, rn, right_shift); break;
+ case NEON_SRSRA_scalar: srsra(vf, rd, rn, right_shift); break;
+ case NEON_URSRA_scalar: ursra(vf, rd, rn, right_shift); break;
+ case NEON_UQSHRN_scalar: uqshrn(vf, rd, rn, right_shift); break;
+ case NEON_UQRSHRN_scalar: uqrshrn(vf, rd, rn, right_shift); break;
+ case NEON_SQSHRN_scalar: sqshrn(vf, rd, rn, right_shift); break;
+ case NEON_SQRSHRN_scalar: sqrshrn(vf, rd, rn, right_shift); break;
+ case NEON_SQSHRUN_scalar: sqshrun(vf, rd, rn, right_shift); break;
+ case NEON_SQRSHRUN_scalar: sqrshrun(vf, rd, rn, right_shift); break;
+ case NEON_FCVTZS_imm_scalar: fcvts(vf, rd, rn, FPZero, right_shift); break;
+ case NEON_FCVTZU_imm_scalar: fcvtu(vf, rd, rn, FPZero, right_shift); break;
+ case NEON_SCVTF_imm_scalar:
+ scvtf(vf, rd, rn, right_shift, fpcr_rounding);
+ break;
+ case NEON_UCVTF_imm_scalar:
+ ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
+
+ // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
+ // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
+ static const NEONFormatMap map = {
+ {22, 21, 20, 19, 30},
+ {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_4H, NF_8H,
+ NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S,
+ NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
+ NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}
+ };
+ NEONFormatDecoder nfd(instr, &map);
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
+ static const NEONFormatMap map_l = {
+ {22, 21, 20, 19},
+ {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}
+ };
+ VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
+
+ int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh());
+ int immhimmb = instr->ImmNEONImmhImmb();
+ int right_shift = (16 << highestSetBit) - immhimmb;
+ int left_shift = immhimmb - (8 << highestSetBit);
+
+ switch (instr->Mask(NEONShiftImmediateMask)) {
+ case NEON_SHL: shl(vf, rd, rn, left_shift); break;
+ case NEON_SLI: sli(vf, rd, rn, left_shift); break;
+ case NEON_SQSHLU: sqshlu(vf, rd, rn, left_shift); break;
+ case NEON_SRI: sri(vf, rd, rn, right_shift); break;
+ case NEON_SSHR: sshr(vf, rd, rn, right_shift); break;
+ case NEON_USHR: ushr(vf, rd, rn, right_shift); break;
+ case NEON_SRSHR: sshr(vf, rd, rn, right_shift).Round(vf); break;
+ case NEON_URSHR: ushr(vf, rd, rn, right_shift).Round(vf); break;
+ case NEON_SSRA: ssra(vf, rd, rn, right_shift); break;
+ case NEON_USRA: usra(vf, rd, rn, right_shift); break;
+ case NEON_SRSRA: srsra(vf, rd, rn, right_shift); break;
+ case NEON_URSRA: ursra(vf, rd, rn, right_shift); break;
+ case NEON_SQSHL_imm: sqshl(vf, rd, rn, left_shift); break;
+ case NEON_UQSHL_imm: uqshl(vf, rd, rn, left_shift); break;
+ case NEON_SCVTF_imm: scvtf(vf, rd, rn, right_shift, fpcr_rounding); break;
+ case NEON_UCVTF_imm: ucvtf(vf, rd, rn, right_shift, fpcr_rounding); break;
+ case NEON_FCVTZS_imm: fcvts(vf, rd, rn, FPZero, right_shift); break;
+ case NEON_FCVTZU_imm: fcvtu(vf, rd, rn, FPZero, right_shift); break;
+ case NEON_SSHLL:
+ vf = vf_l;
+ if (instr->Mask(NEON_Q)) {
+ sshll2(vf, rd, rn, left_shift);
+ } else {
+ sshll(vf, rd, rn, left_shift);
+ }
+ break;
+ case NEON_USHLL:
+ vf = vf_l;
+ if (instr->Mask(NEON_Q)) {
+ ushll2(vf, rd, rn, left_shift);
+ } else {
+ ushll(vf, rd, rn, left_shift);
+ }
+ break;
+ case NEON_SHRN:
+ if (instr->Mask(NEON_Q)) {
+ shrn2(vf, rd, rn, right_shift);
+ } else {
+ shrn(vf, rd, rn, right_shift);
+ }
+ break;
+ case NEON_RSHRN:
+ if (instr->Mask(NEON_Q)) {
+ rshrn2(vf, rd, rn, right_shift);
+ } else {
+ rshrn(vf, rd, rn, right_shift);
+ }
+ break;
+ case NEON_UQSHRN:
+ if (instr->Mask(NEON_Q)) {
+ uqshrn2(vf, rd, rn, right_shift);
+ } else {
+ uqshrn(vf, rd, rn, right_shift);
+ }
+ break;
+ case NEON_UQRSHRN:
+ if (instr->Mask(NEON_Q)) {
+ uqrshrn2(vf, rd, rn, right_shift);
+ } else {
+ uqrshrn(vf, rd, rn, right_shift);
+ }
+ break;
+ case NEON_SQSHRN:
+ if (instr->Mask(NEON_Q)) {
+ sqshrn2(vf, rd, rn, right_shift);
+ } else {
+ sqshrn(vf, rd, rn, right_shift);
+ }
+ break;
+ case NEON_SQRSHRN:
+ if (instr->Mask(NEON_Q)) {
+ sqrshrn2(vf, rd, rn, right_shift);
+ } else {
+ sqrshrn(vf, rd, rn, right_shift);
+ }
+ break;
+ case NEON_SQSHRUN:
+ if (instr->Mask(NEON_Q)) {
+ sqshrun2(vf, rd, rn, right_shift);
+ } else {
+ sqshrun(vf, rd, rn, right_shift);
+ }
+ break;
+ case NEON_SQRSHRUN:
+ if (instr->Mask(NEON_Q)) {
+ sqrshrun2(vf, rd, rn, right_shift);
+ } else {
+ sqrshrun(vf, rd, rn, right_shift);
+ }
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitNEONTable(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ SimVRegister& rn2 = vreg((instr->Rn() + 1) % kNumberOfVRegisters);
+ SimVRegister& rn3 = vreg((instr->Rn() + 2) % kNumberOfVRegisters);
+ SimVRegister& rn4 = vreg((instr->Rn() + 3) % kNumberOfVRegisters);
+ SimVRegister& rm = vreg(instr->Rm());
+
+ switch (instr->Mask(NEONTableMask)) {
+ case NEON_TBL_1v: tbl(vf, rd, rn, rm); break;
+ case NEON_TBL_2v: tbl(vf, rd, rn, rn2, rm); break;
+ case NEON_TBL_3v: tbl(vf, rd, rn, rn2, rn3, rm); break;
+ case NEON_TBL_4v: tbl(vf, rd, rn, rn2, rn3, rn4, rm); break;
+ case NEON_TBX_1v: tbx(vf, rd, rn, rm); break;
+ case NEON_TBX_2v: tbx(vf, rd, rn, rn2, rm); break;
+ case NEON_TBX_3v: tbx(vf, rd, rn, rn2, rn3, rm); break;
+ case NEON_TBX_4v: tbx(vf, rd, rn, rn2, rn3, rn4, rm); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::VisitNEONPerm(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+ VectorFormat vf = nfd.GetVectorFormat();
+
+ SimVRegister& rd = vreg(instr->Rd());
+ SimVRegister& rn = vreg(instr->Rn());
+ SimVRegister& rm = vreg(instr->Rm());
+
+ switch (instr->Mask(NEONPermMask)) {
+ case NEON_TRN1: trn1(vf, rd, rn, rm); break;
+ case NEON_TRN2: trn2(vf, rd, rn, rm); break;
+ case NEON_UZP1: uzp1(vf, rd, rn, rm); break;
+ case NEON_UZP2: uzp2(vf, rd, rn, rm); break;
+ case NEON_ZIP1: zip1(vf, rd, rn, rm); break;
+ case NEON_ZIP2: zip2(vf, rd, rn, rm); break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+
+void Simulator::DoUnreachable(const Instruction* instr) {
+ VIXL_ASSERT(instr->InstructionBits() == UNDEFINED_INST_PATTERN);
+
+ fprintf(stream_, "Hit UNREACHABLE marker at pc=%p.\n",
+ reinterpret_cast<const void*>(instr));
+ abort();
+}
+
+
+void Simulator::DoTrace(const Instruction* instr) {
+ VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+ (instr->ImmException() == kTraceOpcode));
+
+ // Read the arguments encoded inline in the instruction stream.
+ uint32_t parameters;
+ uint32_t command;
+
+ VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+ memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
+ memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
+
+ switch (command) {
+ case TRACE_ENABLE:
+ set_trace_parameters(trace_parameters() | parameters);
+ break;
+ case TRACE_DISABLE:
+ set_trace_parameters(trace_parameters() & ~parameters);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ set_pc(instr->InstructionAtOffset(kTraceLength));
+}
+
+
+void Simulator::DoLog(const Instruction* instr) {
+ VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+ (instr->ImmException() == kLogOpcode));
+
+ // Read the arguments encoded inline in the instruction stream.
+ uint32_t parameters;
+
+ VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+ memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
+
+ // We don't support a one-shot LOG_DISASM.
+ VIXL_ASSERT((parameters & LOG_DISASM) == 0);
+ // Print the requested information.
+ if (parameters & LOG_SYSREGS) PrintSystemRegisters();
+ if (parameters & LOG_REGS) PrintRegisters();
+ if (parameters & LOG_VREGS) PrintVRegisters();
+
+ set_pc(instr->InstructionAtOffset(kLogLength));
+}
+
+
+void Simulator::DoPrintf(const Instruction* instr) {
+ VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
+ (instr->ImmException() == kPrintfOpcode));
+
+ // Read the arguments encoded inline in the instruction stream.
+ uint32_t arg_count;
+ uint32_t arg_pattern_list;
+ VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
+ memcpy(&arg_count,
+ instr + kPrintfArgCountOffset,
+ sizeof(arg_count));
+ memcpy(&arg_pattern_list,
+ instr + kPrintfArgPatternListOffset,
+ sizeof(arg_pattern_list));
+
+ VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
+ VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
+
+ // We need to call the host printf function with a set of arguments defined by
+ // arg_pattern_list. Because we don't know the types and sizes of the
+ // arguments, this is very difficult to do in a robust and portable way. To
+ // work around the problem, we pick apart the format string, and print one
+ // format placeholder at a time.
+
+ // Allocate space for the format string. We take a copy, so we can modify it.
+ // Leave enough space for one extra character per expected argument (plus the
+ // '\0' termination).
+ const char * format_base = reg<const char *>(0);
+ VIXL_ASSERT(format_base != NULL);
+ size_t length = strlen(format_base) + 1;
+ char * const format = (char *)js_calloc(length + arg_count);
+
+ // A list of chunks, each with exactly one format placeholder.
+ const char * chunks[kPrintfMaxArgCount];
+
+ // Copy the format string and search for format placeholders.
+ uint32_t placeholder_count = 0;
+ char * format_scratch = format;
+ for (size_t i = 0; i < length; i++) {
+ if (format_base[i] != '%') {
+ *format_scratch++ = format_base[i];
+ } else {
+ if (format_base[i + 1] == '%') {
+ // Ignore explicit "%%" sequences.
+ *format_scratch++ = format_base[i];
+ i++;
+ // Chunks after the first are passed as format strings to printf, so we
+ // need to escape '%' characters in those chunks.
+ if (placeholder_count > 0) *format_scratch++ = format_base[i];
+ } else {
+ VIXL_CHECK(placeholder_count < arg_count);
+ // Insert '\0' before placeholders, and store their locations.
+ *format_scratch++ = '\0';
+ chunks[placeholder_count++] = format_scratch;
+ *format_scratch++ = format_base[i];
+ }
+ }
+ }
+ VIXL_CHECK(placeholder_count == arg_count);
+
+ // Finally, call printf with each chunk, passing the appropriate register
+ // argument. Normally, printf returns the number of bytes transmitted, so we
+ // can emulate a single printf call by adding the result from each chunk. If
+ // any call returns a negative (error) value, though, just return that value.
+
+ printf("%s", clr_printf);
+
+ // Because '\0' is inserted before each placeholder, the first string in
+ // 'format' contains no format placeholders and should be printed literally.
+ int result = printf("%s", format);
+ int pcs_r = 1; // Start at x1. x0 holds the format string.
+ int pcs_f = 0; // Start at d0.
+ if (result >= 0) {
+ for (uint32_t i = 0; i < placeholder_count; i++) {
+ int part_result = -1;
+
+ uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
+ arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
+ switch (arg_pattern) {
+ case kPrintfArgW: part_result = printf(chunks[i], wreg(pcs_r++)); break;
+ case kPrintfArgX: part_result = printf(chunks[i], xreg(pcs_r++)); break;
+ case kPrintfArgD: part_result = printf(chunks[i], dreg(pcs_f++)); break;
+ default: VIXL_UNREACHABLE();
+ }
+
+ if (part_result < 0) {
+ // Handle error values.
+ result = part_result;
+ break;
+ }
+
+ result += part_result;
+ }
+ }
+
+ printf("%s", clr_normal);
+
+ // Printf returns its result in x0 (just like the C library's printf).
+ set_xreg(0, result);
+
+ // The printf parameters are inlined in the code, so skip them.
+ set_pc(instr->InstructionAtOffset(kPrintfLength));
+
+ // Set LR as if we'd just called a native printf function.
+ set_lr(pc());
+
+ js_free(format);
+}
+
+} // namespace vixl
+
+#endif // JS_SIMULATOR_ARM64
diff --git a/js/src/jit/arm64/vixl/Simulator-vixl.h b/js/src/jit/arm64/vixl/Simulator-vixl.h
new file mode 100644
index 0000000000..af78f5bad0
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Simulator-vixl.h
@@ -0,0 +1,2592 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_SIMULATOR_A64_H_
+#define VIXL_A64_SIMULATOR_A64_H_
+
+#include "jstypes.h"
+
+#ifdef JS_SIMULATOR_ARM64
+
+#include "mozilla/Vector.h"
+
+#include "jit/arm64/vixl/Assembler-vixl.h"
+#include "jit/arm64/vixl/Disasm-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+#include "jit/arm64/vixl/Instructions-vixl.h"
+#include "jit/arm64/vixl/Instrument-vixl.h"
+#include "jit/arm64/vixl/MozCachingDecoder.h"
+#include "jit/arm64/vixl/Simulator-Constants-vixl.h"
+#include "jit/arm64/vixl/Utils-vixl.h"
+#include "jit/IonTypes.h"
+#include "js/AllocPolicy.h"
+#include "vm/MutexIDs.h"
+#include "wasm/WasmSignalHandlers.h"
+
+namespace vixl {
+
+// Representation of memory, with typed getters and setters for access.
+class Memory {
+ public:
+ template <typename T>
+ static T AddressUntag(T address) {
+ // Cast the address using a C-style cast. A reinterpret_cast would be
+ // appropriate, but it can't cast one integral type to another.
+ uint64_t bits = (uint64_t)address;
+ return (T)(bits & ~kAddressTagMask);
+ }
+
+ template <typename T, typename A>
+ static T Read(A address) {
+ T value;
+ address = AddressUntag(address);
+ VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
+ (sizeof(value) == 4) || (sizeof(value) == 8) ||
+ (sizeof(value) == 16));
+ memcpy(&value, reinterpret_cast<const char *>(address), sizeof(value));
+ return value;
+ }
+
+ template <typename T, typename A>
+ static void Write(A address, T value) {
+ address = AddressUntag(address);
+ VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
+ (sizeof(value) == 4) || (sizeof(value) == 8) ||
+ (sizeof(value) == 16));
+ memcpy(reinterpret_cast<char *>(address), &value, sizeof(value));
+ }
+};
+
+// Represent a register (r0-r31, v0-v31).
+template<int kSizeInBytes>
+class SimRegisterBase {
+ public:
+ SimRegisterBase() : written_since_last_log_(false) {}
+
+ // Write the specified value. The value is zero-extended if necessary.
+ template<typename T>
+ void Set(T new_value) {
+ VIXL_STATIC_ASSERT(sizeof(new_value) <= kSizeInBytes);
+ if (sizeof(new_value) < kSizeInBytes) {
+ // All AArch64 registers are zero-extending.
+ memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));
+ }
+ memcpy(value_, &new_value, sizeof(new_value));
+ NotifyRegisterWrite();
+ }
+
+ // Insert a typed value into a register, leaving the rest of the register
+ // unchanged. The lane parameter indicates where in the register the value
+ // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
+ // 0 represents the least significant bits.
+ template<typename T>
+ void Insert(int lane, T new_value) {
+ VIXL_ASSERT(lane >= 0);
+ VIXL_ASSERT((sizeof(new_value) +
+ (lane * sizeof(new_value))) <= kSizeInBytes);
+ memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value));
+ NotifyRegisterWrite();
+ }
+
+ // Read the value as the specified type. The value is truncated if necessary.
+ template<typename T>
+ T Get(int lane = 0) const {
+ T result;
+ VIXL_ASSERT(lane >= 0);
+ VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes);
+ memcpy(&result, &value_[lane * sizeof(result)], sizeof(result));
+ return result;
+ }
+
+ // TODO: Make this return a map of updated bytes, so that we can highlight
+ // updated lanes for load-and-insert. (That never happens for scalar code, but
+ // NEON has some instructions that can update individual lanes.)
+ bool WrittenSinceLastLog() const {
+ return written_since_last_log_;
+ }
+
+ void NotifyRegisterLogged() {
+ written_since_last_log_ = false;
+ }
+
+ protected:
+ uint8_t value_[kSizeInBytes];
+
+ // Helpers to aid with register tracing.
+ bool written_since_last_log_;
+
+ void NotifyRegisterWrite() {
+ written_since_last_log_ = true;
+ }
+};
+typedef SimRegisterBase<kXRegSizeInBytes> SimRegister; // r0-r31
+typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister; // v0-v31
+
+// Representation of a vector register, with typed getters and setters for lanes
+// and additional information to represent lane state.
+class LogicVRegister {
+ public:
+ inline LogicVRegister(SimVRegister& other) // NOLINT
+ : register_(other) {
+ for (unsigned i = 0; i < sizeof(saturated_) / sizeof(saturated_[0]); i++) {
+ saturated_[i] = kNotSaturated;
+ }
+ for (unsigned i = 0; i < sizeof(round_) / sizeof(round_[0]); i++) {
+ round_[i] = 0;
+ }
+ }
+
+ int64_t Int(VectorFormat vform, int index) const {
+ int64_t element;
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8: element = register_.Get<int8_t>(index); break;
+ case 16: element = register_.Get<int16_t>(index); break;
+ case 32: element = register_.Get<int32_t>(index); break;
+ case 64: element = register_.Get<int64_t>(index); break;
+ default: VIXL_UNREACHABLE(); return 0;
+ }
+ return element;
+ }
+
+ uint64_t Uint(VectorFormat vform, int index) const {
+ uint64_t element;
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8: element = register_.Get<uint8_t>(index); break;
+ case 16: element = register_.Get<uint16_t>(index); break;
+ case 32: element = register_.Get<uint32_t>(index); break;
+ case 64: element = register_.Get<uint64_t>(index); break;
+ default: VIXL_UNREACHABLE(); return 0;
+ }
+ return element;
+ }
+
+ int64_t IntLeftJustified(VectorFormat vform, int index) const {
+ return Int(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
+ }
+
+ uint64_t UintLeftJustified(VectorFormat vform, int index) const {
+ return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
+ }
+
+ void SetInt(VectorFormat vform, int index, int64_t value) const {
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8: register_.Insert(index, static_cast<int8_t>(value)); break;
+ case 16: register_.Insert(index, static_cast<int16_t>(value)); break;
+ case 32: register_.Insert(index, static_cast<int32_t>(value)); break;
+ case 64: register_.Insert(index, static_cast<int64_t>(value)); break;
+ default: VIXL_UNREACHABLE(); return;
+ }
+ }
+
+ void SetUint(VectorFormat vform, int index, uint64_t value) const {
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8: register_.Insert(index, static_cast<uint8_t>(value)); break;
+ case 16: register_.Insert(index, static_cast<uint16_t>(value)); break;
+ case 32: register_.Insert(index, static_cast<uint32_t>(value)); break;
+ case 64: register_.Insert(index, static_cast<uint64_t>(value)); break;
+ default: VIXL_UNREACHABLE(); return;
+ }
+ }
+
+ void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const {
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8: register_.Insert(index, Memory::Read<uint8_t>(addr)); break;
+ case 16: register_.Insert(index, Memory::Read<uint16_t>(addr)); break;
+ case 32: register_.Insert(index, Memory::Read<uint32_t>(addr)); break;
+ case 64: register_.Insert(index, Memory::Read<uint64_t>(addr)); break;
+ default: VIXL_UNREACHABLE(); return;
+ }
+ }
+
+ void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const {
+ uint64_t value = Uint(vform, index);
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8: Memory::Write(addr, static_cast<uint8_t>(value)); break;
+ case 16: Memory::Write(addr, static_cast<uint16_t>(value)); break;
+ case 32: Memory::Write(addr, static_cast<uint32_t>(value)); break;
+ case 64: Memory::Write(addr, value); break;
+ }
+ }
+
+ template <typename T>
+ T Float(int index) const {
+ return register_.Get<T>(index);
+ }
+
+ template <typename T>
+ void SetFloat(int index, T value) const {
+ register_.Insert(index, value);
+ }
+
+ // When setting a result in a register of size less than Q, the top bits of
+ // the Q register must be cleared.
+ void ClearForWrite(VectorFormat vform) const {
+ unsigned size = RegisterSizeInBytesFromFormat(vform);
+ for (unsigned i = size; i < kQRegSizeInBytes; i++) {
+ SetUint(kFormat16B, i, 0);
+ }
+ }
+
+ // Saturation state for each lane of a vector.
+ enum Saturation {
+ kNotSaturated = 0,
+ kSignedSatPositive = 1 << 0,
+ kSignedSatNegative = 1 << 1,
+ kSignedSatMask = kSignedSatPositive | kSignedSatNegative,
+ kSignedSatUndefined = kSignedSatMask,
+ kUnsignedSatPositive = 1 << 2,
+ kUnsignedSatNegative = 1 << 3,
+ kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,
+ kUnsignedSatUndefined = kUnsignedSatMask
+ };
+
+ // Getters for saturation state.
+ Saturation GetSignedSaturation(int index) {
+ return static_cast<Saturation>(saturated_[index] & kSignedSatMask);
+ }
+
+ Saturation GetUnsignedSaturation(int index) {
+ return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);
+ }
+
+ // Setters for saturation state.
+ void ClearSat(int index) {
+ saturated_[index] = kNotSaturated;
+ }
+
+ void SetSignedSat(int index, bool positive) {
+ SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);
+ }
+
+ void SetUnsignedSat(int index, bool positive) {
+ SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);
+ }
+
+ void SetSatFlag(int index, Saturation sat) {
+ saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);
+ VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined);
+ VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined);
+ }
+
+ // Saturate lanes of a vector based on saturation state.
+ LogicVRegister& SignedSaturate(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ Saturation sat = GetSignedSaturation(i);
+ if (sat == kSignedSatPositive) {
+ SetInt(vform, i, MaxIntFromFormat(vform));
+ } else if (sat == kSignedSatNegative) {
+ SetInt(vform, i, MinIntFromFormat(vform));
+ }
+ }
+ return *this;
+ }
+
+ LogicVRegister& UnsignedSaturate(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ Saturation sat = GetUnsignedSaturation(i);
+ if (sat == kUnsignedSatPositive) {
+ SetUint(vform, i, MaxUintFromFormat(vform));
+ } else if (sat == kUnsignedSatNegative) {
+ SetUint(vform, i, 0);
+ }
+ }
+ return *this;
+ }
+
+ // Getter for rounding state.
+ bool GetRounding(int index) {
+ return round_[index];
+ }
+
+ // Setter for rounding state.
+ void SetRounding(int index, bool round) {
+ round_[index] = round;
+ }
+
+ // Round lanes of a vector based on rounding state.
+ LogicVRegister& Round(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SetInt(vform, i, Int(vform, i) + (GetRounding(i) ? 1 : 0));
+ }
+ return *this;
+ }
+
+ // Unsigned halve lanes of a vector, and use the saturation state to set the
+ // top bit.
+ LogicVRegister& Uhalve(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t val = Uint(vform, i);
+ SetRounding(i, (val & 1) == 1);
+ val >>= 1;
+ if (GetUnsignedSaturation(i) != kNotSaturated) {
+ // If the operation causes unsigned saturation, the bit shifted into the
+ // most significant bit must be set.
+ val |= (MaxUintFromFormat(vform) >> 1) + 1;
+ }
+ SetInt(vform, i, val);
+ }
+ return *this;
+ }
+
+ // Signed halve lanes of a vector, and use the carry state to set the top bit.
+ LogicVRegister& Halve(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t val = Int(vform, i);
+ SetRounding(i, (val & 1) == 1);
+ val >>= 1;
+ if (GetSignedSaturation(i) != kNotSaturated) {
+ // If the operation causes signed saturation, the sign bit must be
+ // inverted.
+ val ^= (MaxUintFromFormat(vform) >> 1) + 1;
+ }
+ SetInt(vform, i, val);
+ }
+ return *this;
+ }
+
+ private:
+ SimVRegister& register_;
+
+ // Allocate one saturation state entry per lane; largest register is type Q,
+ // and lanes can be a minimum of one byte wide.
+ Saturation saturated_[kQRegSizeInBytes];
+
+ // Allocate one rounding state entry per lane.
+ bool round_[kQRegSizeInBytes];
+};
+
+// The proper way to initialize a simulated system register (such as NZCV) is as
+// follows:
+// SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV);
+class SimSystemRegister {
+ public:
+ // The default constructor represents a register which has no writable bits.
+ // It is not possible to set its value to anything other than 0.
+ SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) { }
+
+ uint32_t RawValue() const {
+ return value_;
+ }
+
+ void SetRawValue(uint32_t new_value) {
+ value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_);
+ }
+
+ uint32_t Bits(int msb, int lsb) const {
+ return ExtractUnsignedBitfield32(msb, lsb, value_);
+ }
+
+ int32_t SignedBits(int msb, int lsb) const {
+ return ExtractSignedBitfield32(msb, lsb, value_);
+ }
+
+ void SetBits(int msb, int lsb, uint32_t bits);
+
+ // Default system register values.
+ static SimSystemRegister DefaultValueFor(SystemRegister id);
+
+#define DEFINE_GETTER(Name, HighBit, LowBit, Func) \
+ uint32_t Name() const { return Func(HighBit, LowBit); } \
+ void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
+#define DEFINE_WRITE_IGNORE_MASK(Name, Mask) \
+ static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask);
+
+ SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK)
+
+#undef DEFINE_ZERO_BITS
+#undef DEFINE_GETTER
+
+ protected:
+ // Most system registers only implement a few of the bits in the word. Other
+ // bits are "read-as-zero, write-ignored". The write_ignore_mask argument
+ // describes the bits which are not modifiable.
+ SimSystemRegister(uint32_t value, uint32_t write_ignore_mask)
+ : value_(value), write_ignore_mask_(write_ignore_mask) { }
+
+ uint32_t value_;
+ uint32_t write_ignore_mask_;
+};
+
+
+class SimExclusiveLocalMonitor {
+ public:
+ SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) {
+ Clear();
+ }
+
+ // Clear the exclusive monitor (like clrex).
+ void Clear() {
+ address_ = 0;
+ size_ = 0;
+ }
+
+ // Clear the exclusive monitor most of the time.
+ void MaybeClear() {
+ if ((seed_ % kSkipClearProbability) != 0) {
+ Clear();
+ }
+
+ // Advance seed_ using a simple linear congruential generator.
+ seed_ = (seed_ * 48271) % 2147483647;
+ }
+
+ // Mark the address range for exclusive access (like load-exclusive).
+ void MarkExclusive(uint64_t address, size_t size) {
+ address_ = address;
+ size_ = size;
+ }
+
+ // Return true if the address range is marked (like store-exclusive).
+ // This helper doesn't implicitly clear the monitor.
+ bool IsExclusive(uint64_t address, size_t size) {
+ VIXL_ASSERT(size > 0);
+ // Be pedantic: Require both the address and the size to match.
+ return (size == size_) && (address == address_);
+ }
+
+ private:
+ uint64_t address_;
+ size_t size_;
+
+ const int kSkipClearProbability;
+ uint32_t seed_;
+};
+
+
+// We can't accurate simulate the global monitor since it depends on external
+// influences. Instead, this implementation occasionally causes accesses to
+// fail, according to kPassProbability.
+class SimExclusiveGlobalMonitor {
+ public:
+ SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {}
+
+ bool IsExclusive(uint64_t address, size_t size) {
+ USE(address, size);
+
+ bool pass = (seed_ % kPassProbability) != 0;
+ // Advance seed_ using a simple linear congruential generator.
+ seed_ = (seed_ * 48271) % 2147483647;
+ return pass;
+ }
+
+ private:
+ const int kPassProbability;
+ uint32_t seed_;
+};
+
+class Redirection;
+
+class Simulator : public DecoderVisitor {
+ public:
+#ifdef JS_CACHE_SIMULATOR_ARM64
+ using Decoder = CachingDecoder;
+ mozilla::Atomic<bool> pendingCacheRequests = mozilla::Atomic<bool>{ false };
+#endif
+ explicit Simulator(Decoder* decoder, FILE* stream = stdout);
+ ~Simulator();
+
+ // Moz changes.
+ void init(Decoder* decoder, FILE* stream);
+ static Simulator* Current();
+ static Simulator* Create();
+ static void Destroy(Simulator* sim);
+ uintptr_t stackLimit() const;
+ uintptr_t* addressOfStackLimit();
+ bool overRecursed(uintptr_t newsp = 0) const;
+ bool overRecursedWithExtra(uint32_t extra) const;
+ int64_t call(uint8_t* entry, int argument_count, ...);
+ static void* RedirectNativeFunction(void* nativeFunction, js::jit::ABIFunctionType type);
+ void setGPR32Result(int32_t result);
+ void setGPR64Result(int64_t result);
+ void setFP32Result(float result);
+ void setFP64Result(double result);
+#ifdef JS_CACHE_SIMULATOR_ARM64
+ void FlushICache();
+#endif
+ void VisitCallRedirection(const Instruction* instr);
+ static uintptr_t StackLimit() {
+ return Simulator::Current()->stackLimit();
+ }
+ template<typename T> T Read(uintptr_t address);
+ template <typename T> void Write(uintptr_t address_, T value);
+ JS::ProfilingFrameIterator::RegisterState registerState();
+
+ void ResetState();
+
+ // Run the simulator.
+ virtual void Run();
+ void RunFrom(const Instruction* first);
+
+ // Simulation helpers.
+ const Instruction* pc() const { return pc_; }
+ const Instruction* get_pc() const { return pc_; }
+ int64_t get_sp() const { return xreg(31, Reg31IsStackPointer); }
+ int64_t get_lr() const { return xreg(30); }
+ int64_t get_fp() const { return xreg(29); }
+
+ template <typename T>
+ T get_pc_as() const { return reinterpret_cast<T>(const_cast<Instruction*>(pc())); }
+
+ void set_pc(const Instruction* new_pc) {
+ pc_ = Memory::AddressUntag(new_pc);
+ pc_modified_ = true;
+ }
+
+ // Handle any wasm faults, returning true if the fault was handled.
+ // This method is rather hot so inline the normal (no-wasm) case.
+ bool MOZ_ALWAYS_INLINE handle_wasm_seg_fault(uintptr_t addr, unsigned numBytes) {
+ if (MOZ_LIKELY(!js::wasm::CodeExists)) {
+ return false;
+ }
+
+ uint8_t* newPC;
+ if (!js::wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes, &newPC)) {
+ return false;
+ }
+
+ set_pc((Instruction*)newPC);
+ return true;
+ }
+
+ void increment_pc() {
+ if (!pc_modified_) {
+ pc_ = pc_->NextInstruction();
+ }
+
+ pc_modified_ = false;
+ }
+
+ void ExecuteInstruction();
+
+ // Declare all Visitor functions.
+ #define DECLARE(A) virtual void Visit##A(const Instruction* instr) override;
+ VISITOR_LIST_THAT_RETURN(DECLARE)
+ VISITOR_LIST_THAT_DONT_RETURN(DECLARE)
+ #undef DECLARE
+
+
+ // Integer register accessors.
+
+ // Basic accessor: Read the register as the specified type.
+ template<typename T>
+ T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
+ VIXL_ASSERT(code < kNumberOfRegisters);
+ if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
+ T result;
+ memset(&result, 0, sizeof(result));
+ return result;
+ }
+ return registers_[code].Get<T>();
+ }
+
+ // Common specialized accessors for the reg() template.
+ int32_t wreg(unsigned code,
+ Reg31Mode r31mode = Reg31IsZeroRegister) const {
+ return reg<int32_t>(code, r31mode);
+ }
+
+ int64_t xreg(unsigned code,
+ Reg31Mode r31mode = Reg31IsZeroRegister) const {
+ return reg<int64_t>(code, r31mode);
+ }
+
+ // As above, with parameterized size and return type. The value is
+ // either zero-extended or truncated to fit, as required.
+ template<typename T>
+ T reg(unsigned size, unsigned code,
+ Reg31Mode r31mode = Reg31IsZeroRegister) const {
+ uint64_t raw;
+ switch (size) {
+ case kWRegSize: raw = reg<uint32_t>(code, r31mode); break;
+ case kXRegSize: raw = reg<uint64_t>(code, r31mode); break;
+ default:
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+
+ T result;
+ VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
+ // Copy the result and truncate to fit. This assumes a little-endian host.
+ memcpy(&result, &raw, sizeof(result));
+ return result;
+ }
+
+ // Use int64_t by default if T is not specified.
+ int64_t reg(unsigned size, unsigned code,
+ Reg31Mode r31mode = Reg31IsZeroRegister) const {
+ return reg<int64_t>(size, code, r31mode);
+ }
+
+ enum RegLogMode {
+ LogRegWrites,
+ NoRegLog
+ };
+
+ // Write 'value' into an integer register. The value is zero-extended. This
+ // behaviour matches AArch64 register writes.
+ template<typename T>
+ void set_reg(unsigned code, T value,
+ RegLogMode log_mode = LogRegWrites,
+ Reg31Mode r31mode = Reg31IsZeroRegister) {
+ if (sizeof(T) < kWRegSizeInBytes) {
+ // We use a C-style cast on purpose here.
+ // Since we do not have access to 'constepxr if', the casts in this `if`
+ // must be valid even if we know the code will never be executed, in
+ // particular when `T` is a pointer type.
+ int64_t tmp_64bit = (int64_t)value;
+ int32_t tmp_32bit = static_cast<int32_t>(tmp_64bit);
+ set_reg<int32_t>(code, tmp_32bit, log_mode, r31mode);
+ return;
+ }
+
+ VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
+ (sizeof(T) == kXRegSizeInBytes));
+ VIXL_ASSERT(code < kNumberOfRegisters);
+
+ if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
+ return;
+ }
+
+ registers_[code].Set(value);
+
+ if (log_mode == LogRegWrites) LogRegister(code, r31mode);
+ }
+
+ // Common specialized accessors for the set_reg() template.
+ void set_wreg(unsigned code, int32_t value,
+ RegLogMode log_mode = LogRegWrites,
+ Reg31Mode r31mode = Reg31IsZeroRegister) {
+ set_reg(code, value, log_mode, r31mode);
+ }
+
+ void set_xreg(unsigned code, int64_t value,
+ RegLogMode log_mode = LogRegWrites,
+ Reg31Mode r31mode = Reg31IsZeroRegister) {
+ set_reg(code, value, log_mode, r31mode);
+ }
+
+ // As above, with parameterized size and type. The value is either
+ // zero-extended or truncated to fit, as required.
+ template<typename T>
+ void set_reg(unsigned size, unsigned code, T value,
+ RegLogMode log_mode = LogRegWrites,
+ Reg31Mode r31mode = Reg31IsZeroRegister) {
+ // Zero-extend the input.
+ uint64_t raw = 0;
+ VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw));
+ memcpy(&raw, &value, sizeof(value));
+
+ // Write (and possibly truncate) the value.
+ switch (size) {
+ case kWRegSize:
+ set_reg(code, static_cast<uint32_t>(raw), log_mode, r31mode);
+ break;
+ case kXRegSize:
+ set_reg(code, raw, log_mode, r31mode);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ return;
+ }
+ }
+
+ // Common specialized accessors for the set_reg() template.
+
+ // Commonly-used special cases.
+ template<typename T>
+ void set_lr(T value) {
+ set_reg(kLinkRegCode, value);
+ }
+
+ template<typename T>
+ void set_sp(T value) {
+ set_reg(31, value, LogRegWrites, Reg31IsStackPointer);
+ }
+
+ // Vector register accessors.
+ // These are equivalent to the integer register accessors, but for vector
+ // registers.
+
+ // A structure for representing a 128-bit Q register.
+ struct qreg_t { uint8_t val[kQRegSizeInBytes]; };
+
+ // Basic accessor: read the register as the specified type.
+ template<typename T>
+ T vreg(unsigned code) const {
+ VIXL_STATIC_ASSERT((sizeof(T) == kBRegSizeInBytes) ||
+ (sizeof(T) == kHRegSizeInBytes) ||
+ (sizeof(T) == kSRegSizeInBytes) ||
+ (sizeof(T) == kDRegSizeInBytes) ||
+ (sizeof(T) == kQRegSizeInBytes));
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+
+ return vregisters_[code].Get<T>();
+ }
+
+ // Common specialized accessors for the vreg() template.
+ int8_t breg(unsigned code) const {
+ return vreg<int8_t>(code);
+ }
+
+ int16_t hreg(unsigned code) const {
+ return vreg<int16_t>(code);
+ }
+
+ float sreg(unsigned code) const {
+ return vreg<float>(code);
+ }
+
+ uint32_t sreg_bits(unsigned code) const {
+ return vreg<uint32_t>(code);
+ }
+
+ double dreg(unsigned code) const {
+ return vreg<double>(code);
+ }
+
+ uint64_t dreg_bits(unsigned code) const {
+ return vreg<uint64_t>(code);
+ }
+
+ qreg_t qreg(unsigned code) const {
+ return vreg<qreg_t>(code);
+ }
+
+ // As above, with parameterized size and return type. The value is
+ // either zero-extended or truncated to fit, as required.
+ template<typename T>
+ T vreg(unsigned size, unsigned code) const {
+ uint64_t raw = 0;
+ T result;
+
+ switch (size) {
+ case kSRegSize: raw = vreg<uint32_t>(code); break;
+ case kDRegSize: raw = vreg<uint64_t>(code); break;
+ default:
+ VIXL_UNREACHABLE();
+ break;
+ }
+
+ VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
+ // Copy the result and truncate to fit. This assumes a little-endian host.
+ memcpy(&result, &raw, sizeof(result));
+ return result;
+ }
+
+ inline SimVRegister& vreg(unsigned code) {
+ return vregisters_[code];
+ }
+
+ // Basic accessor: Write the specified value.
+ template<typename T>
+ void set_vreg(unsigned code, T value,
+ RegLogMode log_mode = LogRegWrites) {
+ VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) ||
+ (sizeof(value) == kHRegSizeInBytes) ||
+ (sizeof(value) == kSRegSizeInBytes) ||
+ (sizeof(value) == kDRegSizeInBytes) ||
+ (sizeof(value) == kQRegSizeInBytes));
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ vregisters_[code].Set(value);
+
+ if (log_mode == LogRegWrites) {
+ LogVRegister(code, GetPrintRegisterFormat(value));
+ }
+ }
+
+ // Common specialized accessors for the set_vreg() template.
+ void set_breg(unsigned code, int8_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_hreg(unsigned code, int16_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_sreg(unsigned code, float value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_sreg_bits(unsigned code, uint32_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_dreg(unsigned code, double value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_dreg_bits(unsigned code, uint64_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_qreg(unsigned code, qreg_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ bool N() const { return nzcv_.N() != 0; }
+ bool Z() const { return nzcv_.Z() != 0; }
+ bool C() const { return nzcv_.C() != 0; }
+ bool V() const { return nzcv_.V() != 0; }
+
+ SimSystemRegister& ReadNzcv() { return nzcv_; }
+ SimSystemRegister& nzcv() { return nzcv_; }
+
+ // TODO: Find a way to make the fpcr_ members return the proper types, so
+ // these accessors are not necessary.
+ FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); }
+ bool DN() { return fpcr_.DN() != 0; }
+ SimSystemRegister& fpcr() { return fpcr_; }
+
+ UseDefaultNaN ReadDN() const {
+ return fpcr_.DN() != 0 ? kUseDefaultNaN : kIgnoreDefaultNaN;
+ }
+
+ // Specify relevant register formats for Print(V)Register and related helpers.
+ enum PrintRegisterFormat {
+ // The lane size.
+ kPrintRegLaneSizeB = 0 << 0,
+ kPrintRegLaneSizeH = 1 << 0,
+ kPrintRegLaneSizeS = 2 << 0,
+ kPrintRegLaneSizeW = kPrintRegLaneSizeS,
+ kPrintRegLaneSizeD = 3 << 0,
+ kPrintRegLaneSizeX = kPrintRegLaneSizeD,
+ kPrintRegLaneSizeQ = 4 << 0,
+
+ kPrintRegLaneSizeOffset = 0,
+ kPrintRegLaneSizeMask = 7 << 0,
+
+ // The lane count.
+ kPrintRegAsScalar = 0,
+ kPrintRegAsDVector = 1 << 3,
+ kPrintRegAsQVector = 2 << 3,
+
+ kPrintRegAsVectorMask = 3 << 3,
+
+ // Indicate floating-point format lanes. (This flag is only supported for S-
+ // and D-sized lanes.)
+ kPrintRegAsFP = 1 << 5,
+
+ // Supported combinations.
+
+ kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar,
+ kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar,
+ kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
+ kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
+
+ kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar,
+ kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector,
+ kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector,
+ kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar,
+ kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector,
+ kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector,
+ kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar,
+ kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector,
+ kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector,
+ kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
+ kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP,
+ kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP,
+ kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar,
+ kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector,
+ kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
+ kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP,
+ kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar
+ };
+
+ unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
+ return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
+ }
+
+ unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {
+ return 1 << GetPrintRegLaneSizeInBytesLog2(format);
+ }
+
+ unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
+ if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2;
+ if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2;
+
+ // Scalar types.
+ return GetPrintRegLaneSizeInBytesLog2(format);
+ }
+
+ unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
+ return 1 << GetPrintRegSizeInBytesLog2(format);
+ }
+
+ unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
+ unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
+ unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
+ VIXL_ASSERT(reg_size_log2 >= lane_size_log2);
+ return 1 << (reg_size_log2 - lane_size_log2);
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
+ unsigned lane_size);
+
+ PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) {
+ return GetPrintRegisterFormatForSize(size, size);
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
+ switch (size) {
+ default: VIXL_UNREACHABLE(); return kPrintDReg;
+ case kDRegSizeInBytes: return kPrintDReg;
+ case kSRegSizeInBytes: return kPrintSReg;
+ }
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {
+ if ((GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) ||
+ (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) {
+ return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);
+ }
+ return format;
+ }
+
+ template<typename T>
+ PrintRegisterFormat GetPrintRegisterFormat(T value) {
+ return GetPrintRegisterFormatForSize(sizeof(value));
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormat(double value) {
+ VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes);
+ return GetPrintRegisterFormatForSizeFP(sizeof(value));
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormat(float value) {
+ VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes);
+ return GetPrintRegisterFormatForSizeFP(sizeof(value));
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
+
+ // Print all registers of the specified types.
+ void PrintRegisters();
+ void PrintVRegisters();
+ void PrintSystemRegisters();
+
+ // As above, but only print the registers that have been updated.
+ void PrintWrittenRegisters();
+ void PrintWrittenVRegisters();
+
+ // As above, but respect LOG_REG and LOG_VREG.
+ inline void LogWrittenRegisters() {
+ if (trace_parameters() & LOG_REGS) PrintWrittenRegisters();
+ }
+ inline void LogWrittenVRegisters() {
+ if (trace_parameters() & LOG_VREGS) PrintWrittenVRegisters();
+ }
+ inline void LogAllWrittenRegisters() {
+ LogWrittenRegisters();
+ LogWrittenVRegisters();
+ }
+
+ // Print individual register values (after update).
+ void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);
+ void PrintVRegister(unsigned code, PrintRegisterFormat format);
+ void PrintSystemRegister(SystemRegister id);
+
+ // Like Print* (above), but respect trace_parameters().
+ void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {
+ if (trace_parameters() & LOG_REGS) PrintRegister(code, r31mode);
+ }
+ void LogVRegister(unsigned code, PrintRegisterFormat format) {
+ if (trace_parameters() & LOG_VREGS) PrintVRegister(code, format);
+ }
+ void LogSystemRegister(SystemRegister id) {
+ if (trace_parameters() & LOG_SYSREGS) PrintSystemRegister(id);
+ }
+
+ // Print memory accesses.
+ void PrintRead(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format);
+ void PrintWrite(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format);
+ void PrintVRead(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format, unsigned lane);
+ void PrintVWrite(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format, unsigned lane);
+
+ // Like Print* (above), but respect trace_parameters().
+ void LogRead(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format) {
+ if (trace_parameters() & LOG_REGS) PrintRead(address, reg_code, format);
+ }
+ void LogWrite(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format) {
+ if (trace_parameters() & LOG_WRITE) PrintWrite(address, reg_code, format);
+ }
+ void LogVRead(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format, unsigned lane = 0) {
+ if (trace_parameters() & LOG_VREGS) {
+ PrintVRead(address, reg_code, format, lane);
+ }
+ }
+ void LogVWrite(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format, unsigned lane = 0) {
+ if (trace_parameters() & LOG_WRITE) {
+ PrintVWrite(address, reg_code, format, lane);
+ }
+ }
+
+ // Helper functions for register tracing.
+ void PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
+ int size_in_bytes = kXRegSizeInBytes);
+ void PrintVRegisterRawHelper(unsigned code, int bytes = kQRegSizeInBytes,
+ int lsb = 0);
+ void PrintVRegisterFPHelper(unsigned code, unsigned lane_size_in_bytes,
+ int lane_count = 1, int rightmost_lane = 0);
+
+ void DoUnreachable(const Instruction* instr);
+ void DoTrace(const Instruction* instr);
+ void DoLog(const Instruction* instr);
+
+ static const char* WRegNameForCode(unsigned code,
+ Reg31Mode mode = Reg31IsZeroRegister);
+ static const char* XRegNameForCode(unsigned code,
+ Reg31Mode mode = Reg31IsZeroRegister);
+ static const char* SRegNameForCode(unsigned code);
+ static const char* DRegNameForCode(unsigned code);
+ static const char* VRegNameForCode(unsigned code);
+
+ bool coloured_trace() const { return coloured_trace_; }
+ void set_coloured_trace(bool value);
+
+ int trace_parameters() const { return trace_parameters_; }
+ void set_trace_parameters(int parameters);
+
+ void set_instruction_stats(bool value);
+
+ // Clear the simulated local monitor to force the next store-exclusive
+ // instruction to fail.
+ void ClearLocalMonitor() {
+ local_monitor_.Clear();
+ }
+
+ void SilenceExclusiveAccessWarning() {
+ print_exclusive_access_warning_ = false;
+ }
+
+ protected:
+ const char* clr_normal;
+ const char* clr_flag_name;
+ const char* clr_flag_value;
+ const char* clr_reg_name;
+ const char* clr_reg_value;
+ const char* clr_vreg_name;
+ const char* clr_vreg_value;
+ const char* clr_memory_address;
+ const char* clr_warning;
+ const char* clr_warning_message;
+ const char* clr_printf;
+
+ // Simulation helpers ------------------------------------
+ bool ConditionPassed(Condition cond) {
+ switch (cond) {
+ case eq:
+ return Z();
+ case ne:
+ return !Z();
+ case hs:
+ return C();
+ case lo:
+ return !C();
+ case mi:
+ return N();
+ case pl:
+ return !N();
+ case vs:
+ return V();
+ case vc:
+ return !V();
+ case hi:
+ return C() && !Z();
+ case ls:
+ return !(C() && !Z());
+ case ge:
+ return N() == V();
+ case lt:
+ return N() != V();
+ case gt:
+ return !Z() && (N() == V());
+ case le:
+ return !(!Z() && (N() == V()));
+ case nv:
+ VIXL_FALLTHROUGH();
+ case al:
+ return true;
+ default:
+ VIXL_UNREACHABLE();
+ return false;
+ }
+ }
+
+ bool ConditionPassed(Instr cond) {
+ return ConditionPassed(static_cast<Condition>(cond));
+ }
+
+ bool ConditionFailed(Condition cond) {
+ return !ConditionPassed(cond);
+ }
+
+ void AddSubHelper(const Instruction* instr, int64_t op2);
+ uint64_t AddWithCarry(unsigned reg_size,
+ bool set_flags,
+ uint64_t left,
+ uint64_t right,
+ int carry_in = 0);
+ void LogicalHelper(const Instruction* instr, int64_t op2);
+ void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
+ void LoadStoreHelper(const Instruction* instr,
+ int64_t offset,
+ AddrMode addrmode);
+ void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
+ template <typename T>
+ void CompareAndSwapHelper(const Instruction* instr);
+ template <typename T>
+ void CompareAndSwapPairHelper(const Instruction* instr);
+ template <typename T>
+ void AtomicMemorySimpleHelper(const Instruction* instr);
+ template <typename T>
+ void AtomicMemorySwapHelper(const Instruction* instr);
+ template <typename T>
+ void LoadAcquireRCpcHelper(const Instruction* instr);
+ uintptr_t AddressModeHelper(unsigned addr_reg,
+ int64_t offset,
+ AddrMode addrmode);
+ void NEONLoadStoreMultiStructHelper(const Instruction* instr,
+ AddrMode addr_mode);
+ void NEONLoadStoreSingleStructHelper(const Instruction* instr,
+ AddrMode addr_mode);
+
+ uint64_t AddressUntag(uint64_t address) {
+ return address & ~kAddressTagMask;
+ }
+
+ template <typename T>
+ T* AddressUntag(T* address) {
+ uintptr_t address_raw = reinterpret_cast<uintptr_t>(address);
+ return reinterpret_cast<T*>(AddressUntag(address_raw));
+ }
+
+ int64_t ShiftOperand(unsigned reg_size,
+ int64_t value,
+ Shift shift_type,
+ unsigned amount);
+ int64_t Rotate(unsigned reg_width,
+ int64_t value,
+ Shift shift_type,
+ unsigned amount);
+ int64_t ExtendValue(unsigned reg_width,
+ int64_t value,
+ Extend extend_type,
+ unsigned left_shift = 0);
+ uint16_t PolynomialMult(uint8_t op1, uint8_t op2);
+
+ void ld1(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t addr);
+ void ld1(VectorFormat vform,
+ LogicVRegister dst,
+ int index,
+ uint64_t addr);
+ void ld1r(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t addr);
+ void ld2(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ uint64_t addr);
+ void ld2(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ int index,
+ uint64_t addr);
+ void ld2r(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ uint64_t addr);
+ void ld3(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ uint64_t addr);
+ void ld3(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ int index,
+ uint64_t addr);
+ void ld3r(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ uint64_t addr);
+ void ld4(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ LogicVRegister dst4,
+ uint64_t addr);
+ void ld4(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ LogicVRegister dst4,
+ int index,
+ uint64_t addr);
+ void ld4r(VectorFormat vform,
+ LogicVRegister dst1,
+ LogicVRegister dst2,
+ LogicVRegister dst3,
+ LogicVRegister dst4,
+ uint64_t addr);
+ void st1(VectorFormat vform,
+ LogicVRegister src,
+ uint64_t addr);
+ void st1(VectorFormat vform,
+ LogicVRegister src,
+ int index,
+ uint64_t addr);
+ void st2(VectorFormat vform,
+ LogicVRegister src,
+ LogicVRegister src2,
+ uint64_t addr);
+ void st2(VectorFormat vform,
+ LogicVRegister src,
+ LogicVRegister src2,
+ int index,
+ uint64_t addr);
+ void st3(VectorFormat vform,
+ LogicVRegister src,
+ LogicVRegister src2,
+ LogicVRegister src3,
+ uint64_t addr);
+ void st3(VectorFormat vform,
+ LogicVRegister src,
+ LogicVRegister src2,
+ LogicVRegister src3,
+ int index,
+ uint64_t addr);
+ void st4(VectorFormat vform,
+ LogicVRegister src,
+ LogicVRegister src2,
+ LogicVRegister src3,
+ LogicVRegister src4,
+ uint64_t addr);
+ void st4(VectorFormat vform,
+ LogicVRegister src,
+ LogicVRegister src2,
+ LogicVRegister src3,
+ LogicVRegister src4,
+ int index,
+ uint64_t addr);
+ LogicVRegister cmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ Condition cond);
+ LogicVRegister cmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ int imm,
+ Condition cond);
+ LogicVRegister cmptst(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister add(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister addp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister mla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister mls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister mul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister mul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister mla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister mls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister pmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+
+ typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister fmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister fmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister fmls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister fmulx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister smull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister smull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister umull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister umull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister smlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister smlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister umlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister umlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister smlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister smlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister umlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister umlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmull(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmull2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmlal(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmlal2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmlsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmlsl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqrdmulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister sub(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister and_(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister orr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister orn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister eor(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister bic(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister bic(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ uint64_t imm);
+ LogicVRegister bif(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister bit(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister bsl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister cls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister clz(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister cnt(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister not_(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister rbit(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister rev(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int revSize);
+ LogicVRegister rev16(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister rev32(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister rev64(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister addlp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ bool is_signed,
+ bool do_accumulate);
+ LogicVRegister saddlp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uaddlp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sadalp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uadalp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister ext(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister ins_element(VectorFormat vform,
+ LogicVRegister dst,
+ int dst_index,
+ const LogicVRegister& src,
+ int src_index);
+ LogicVRegister ins_immediate(VectorFormat vform,
+ LogicVRegister dst,
+ int dst_index,
+ uint64_t imm);
+ LogicVRegister dup_element(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int src_index);
+ LogicVRegister dup_immediate(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t imm);
+ LogicVRegister mov(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister movi(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t imm);
+ LogicVRegister mvni(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t imm);
+ LogicVRegister orr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ uint64_t imm);
+ LogicVRegister sshl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister ushl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister sminmax(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool max);
+ LogicVRegister smax(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister smin(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister sminmaxp(VectorFormat vform,
+ LogicVRegister dst,
+ int dst_index,
+ const LogicVRegister& src,
+ bool max);
+ LogicVRegister smaxp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister sminp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister addp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister addv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uaddlv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister saddlv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sminmaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ bool max);
+ LogicVRegister smaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uxtl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uxtl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sxtl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sxtl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister tbl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& ind);
+ LogicVRegister tbl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& ind);
+ LogicVRegister tbl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& tab3,
+ const LogicVRegister& ind);
+ LogicVRegister tbl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& tab3,
+ const LogicVRegister& tab4,
+ const LogicVRegister& ind);
+ LogicVRegister tbx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& ind);
+ LogicVRegister tbx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& ind);
+ LogicVRegister tbx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& tab3,
+ const LogicVRegister& ind);
+ LogicVRegister tbx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& tab2,
+ const LogicVRegister& tab3,
+ const LogicVRegister& tab4,
+ const LogicVRegister& ind);
+ LogicVRegister uaddl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uaddl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uaddw(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uaddw2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister saddl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister saddl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister saddw(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister saddw2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister usubl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister usubl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister usubw(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister usubw2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister ssubl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister ssubl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister ssubw(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister ssubw2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uminmax(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool max);
+ LogicVRegister umax(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister umin(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uminmaxp(VectorFormat vform,
+ LogicVRegister dst,
+ int dst_index,
+ const LogicVRegister& src,
+ bool max);
+ LogicVRegister umaxp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uminp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uminmaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ bool max);
+ LogicVRegister umaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister trn1(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister trn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister zip1(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister zip2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uzp1(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uzp2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister shl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister scvtf(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int fbits,
+ FPRounding rounding_mode);
+ LogicVRegister ucvtf(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int fbits,
+ FPRounding rounding_mode);
+ LogicVRegister sshll(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sshll2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister shll(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister shll2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister ushll(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister ushll2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sli(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sri(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sshr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister ushr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister ssra(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister usra(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister srsra(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister ursra(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister suqadd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister usqadd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sqshl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister uqshl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqshlu(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister abs(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister neg(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister extractnarrow(VectorFormat vform,
+ LogicVRegister dst,
+ bool dstIsSigned,
+ const LogicVRegister& src,
+ bool srcIsSigned);
+ LogicVRegister xtn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sqxtn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uqxtn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sqxtun(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister absdiff(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool issigned);
+ LogicVRegister saba(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister uaba(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister shrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister shrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister rshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister rshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister uqshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister uqshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister uqrshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister uqrshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqrshrn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqrshrn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqshrun(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqshrun2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqrshrun(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqrshrun2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int shift);
+ LogicVRegister sqrdmulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool round = true);
+ LogicVRegister sqdmulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ #define NEON_3VREG_LOGIC_LIST(V) \
+ V(addhn) \
+ V(addhn2) \
+ V(raddhn) \
+ V(raddhn2) \
+ V(subhn) \
+ V(subhn2) \
+ V(rsubhn) \
+ V(rsubhn2) \
+ V(pmull) \
+ V(pmull2) \
+ V(sabal) \
+ V(sabal2) \
+ V(uabal) \
+ V(uabal2) \
+ V(sabdl) \
+ V(sabdl2) \
+ V(uabdl) \
+ V(uabdl2) \
+ V(smull) \
+ V(smull2) \
+ V(umull) \
+ V(umull2) \
+ V(smlal) \
+ V(smlal2) \
+ V(umlal) \
+ V(umlal2) \
+ V(smlsl) \
+ V(smlsl2) \
+ V(umlsl) \
+ V(umlsl2) \
+ V(sqdmlal) \
+ V(sqdmlal2) \
+ V(sqdmlsl) \
+ V(sqdmlsl2) \
+ V(sqdmull) \
+ V(sqdmull2)
+
+ #define DEFINE_LOGIC_FUNC(FXN) \
+ LogicVRegister FXN(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2);
+ NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
+ #undef DEFINE_LOGIC_FUNC
+
+ #define NEON_FP3SAME_LIST(V) \
+ V(fadd, FPAdd, false) \
+ V(fsub, FPSub, true) \
+ V(fmul, FPMul, true) \
+ V(fmulx, FPMulx, true) \
+ V(fdiv, FPDiv, true) \
+ V(fmax, FPMax, false) \
+ V(fmin, FPMin, false) \
+ V(fmaxnm, FPMaxNM, false) \
+ V(fminnm, FPMinNM, false)
+
+ #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
+ template <typename T> \
+ LogicVRegister FN(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2); \
+ LogicVRegister FN(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2);
+ NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)
+ #undef DECLARE_NEON_FP_VECTOR_OP
+
+ #define NEON_FPPAIRWISE_LIST(V) \
+ V(faddp, fadd, FPAdd) \
+ V(fmaxp, fmax, FPMax) \
+ V(fmaxnmp, fmaxnm, FPMaxNM) \
+ V(fminp, fmin, FPMin) \
+ V(fminnmp, fminnm, FPMinNM)
+
+ #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP) \
+ LogicVRegister FNP(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2); \
+ LogicVRegister FNP(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src);
+ NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)
+ #undef DECLARE_NEON_FP_PAIR_OP
+
+ template <typename T>
+ LogicVRegister frecps(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister frecps(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ template <typename T>
+ LogicVRegister frsqrts(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister frsqrts(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ template <typename T>
+ LogicVRegister fmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister fmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ template <typename T>
+ LogicVRegister fmls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister fmls(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister fnmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+
+ template <typename T>
+ LogicVRegister fcmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ Condition cond);
+ LogicVRegister fcmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ Condition cond);
+ LogicVRegister fabscmp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ Condition cond);
+ LogicVRegister fcmp_zero(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ Condition cond);
+
+ template <typename T>
+ LogicVRegister fneg(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fneg(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ template <typename T>
+ LogicVRegister frecpx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister frecpx(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ template <typename T>
+ LogicVRegister fabs_(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fabs_(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fabd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister frint(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding rounding_mode,
+ bool inexact_exception = false);
+ LogicVRegister fcvts(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding rounding_mode,
+ int fbits = 0);
+ LogicVRegister fcvtu(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding rounding_mode,
+ int fbits = 0);
+ LogicVRegister fcvtl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtl2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtxn(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtxn2(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fsqrt(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister frsqrte(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister frecpe(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding rounding);
+ LogicVRegister ursqrte(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister urecpe(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+
+ typedef float (Simulator::*FPMinMaxOp)(float a, float b);
+
+ LogicVRegister fminmaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPMinMaxOp Op);
+
+ LogicVRegister fminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fmaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fminnmv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fmaxnmv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+
+ static const uint32_t CRC32_POLY = 0x04C11DB7;
+ static const uint32_t CRC32C_POLY = 0x1EDC6F41;
+ uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly);
+ template <typename T>
+ uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
+ uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
+
+ void SysOp_W(int op, int64_t val);
+
+ template <typename T>
+ T FPRecipSqrtEstimate(T op);
+ template <typename T>
+ T FPRecipEstimate(T op, FPRounding rounding);
+ template <typename T, typename R>
+ R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
+
+ void FPCompare(double val0, double val1, FPTrapFlags trap);
+ double FPRoundInt(double value, FPRounding round_mode);
+ double recip_sqrt_estimate(double a);
+ double recip_estimate(double a);
+ double FPRecipSqrtEstimate(double a);
+ double FPRecipEstimate(double a);
+ double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);
+ double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
+ float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
+ float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode);
+ int32_t FPToInt32(double value, FPRounding rmode);
+ int64_t FPToInt64(double value, FPRounding rmode);
+ uint32_t FPToUInt32(double value, FPRounding rmode);
+ uint64_t FPToUInt64(double value, FPRounding rmode);
+ int32_t FPToFixedJS(double value);
+
+ template <typename T>
+ T FPAdd(T op1, T op2);
+
+ template <typename T>
+ T FPDiv(T op1, T op2);
+
+ template <typename T>
+ T FPMax(T a, T b);
+
+ template <typename T>
+ T FPMaxNM(T a, T b);
+
+ template <typename T>
+ T FPMin(T a, T b);
+
+ template <typename T>
+ T FPMinNM(T a, T b);
+
+ template <typename T>
+ T FPMul(T op1, T op2);
+
+ template <typename T>
+ T FPMulx(T op1, T op2);
+
+ template <typename T>
+ T FPMulAdd(T a, T op1, T op2);
+
+ template <typename T>
+ T FPSqrt(T op);
+
+ template <typename T>
+ T FPSub(T op1, T op2);
+
+ template <typename T>
+ T FPRecipStepFused(T op1, T op2);
+
+ template <typename T>
+ T FPRSqrtStepFused(T op1, T op2);
+
+ // This doesn't do anything at the moment. We'll need it if we want support
+ // for cumulative exception bits or floating-point exceptions.
+ void FPProcessException() { }
+
+ bool FPProcessNaNs(const Instruction* instr);
+
+ // Pseudo Printf instruction
+ void DoPrintf(const Instruction* instr);
+
+ // Processor state ---------------------------------------
+
+ // Simulated monitors for exclusive access instructions.
+ SimExclusiveLocalMonitor local_monitor_;
+ SimExclusiveGlobalMonitor global_monitor_;
+
+ // Output stream.
+ FILE* stream_;
+ PrintDisassembler* print_disasm_;
+
+ // Instruction statistics instrumentation.
+ Instrument* instrumentation_;
+
+ // General purpose registers. Register 31 is the stack pointer.
+ SimRegister registers_[kNumberOfRegisters];
+
+ // Vector registers
+ SimVRegister vregisters_[kNumberOfVRegisters];
+
+ // Program Status Register.
+ // bits[31, 27]: Condition flags N, Z, C, and V.
+ // (Negative, Zero, Carry, Overflow)
+ SimSystemRegister nzcv_;
+
+ // Floating-Point Control Register
+ SimSystemRegister fpcr_;
+
+ // Only a subset of FPCR features are supported by the simulator. This helper
+ // checks that the FPCR settings are supported.
+ //
+ // This is checked when floating-point instructions are executed, not when
+ // FPCR is set. This allows generated code to modify FPCR for external
+ // functions, or to save and restore it when entering and leaving generated
+ // code.
+ void AssertSupportedFPCR() {
+ VIXL_ASSERT(fpcr().FZ() == 0); // No flush-to-zero support.
+ VIXL_ASSERT(fpcr().RMode() == FPTieEven); // Ties-to-even rounding only.
+
+ // The simulator does not support half-precision operations so fpcr().AHP()
+ // is irrelevant, and is not checked here.
+ }
+
+ static int CalcNFlag(uint64_t result, unsigned reg_size) {
+ return (result >> (reg_size - 1)) & 1;
+ }
+
+ static int CalcZFlag(uint64_t result) {
+ return (result == 0) ? 1 : 0;
+ }
+
+ static const uint32_t kConditionFlagsMask = 0xf0000000;
+
+ // Stack
+ byte* stack_;
+ static const int stack_protection_size_ = 512 * KBytes;
+ static const int stack_size_ = (2 * MBytes) + (2 * stack_protection_size_);
+ byte* stack_limit_;
+
+ Decoder* decoder_;
+ // Indicates if the pc has been modified by the instruction and should not be
+ // automatically incremented.
+ bool pc_modified_;
+ const Instruction* pc_;
+
+ static const char* xreg_names[];
+ static const char* wreg_names[];
+ static const char* sreg_names[];
+ static const char* dreg_names[];
+ static const char* vreg_names[];
+
+ static const Instruction* kEndOfSimAddress;
+
+ private:
+ template <typename T>
+ static T FPDefaultNaN();
+
+ // Standard NaN processing.
+ template <typename T>
+ T FPProcessNaN(T op) {
+ VIXL_ASSERT(std::isnan(op));
+ if (IsSignallingNaN(op)) {
+ FPProcessException();
+ }
+ return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
+ }
+
+ template <typename T>
+ T FPProcessNaNs(T op1, T op2) {
+ if (IsSignallingNaN(op1)) {
+ return FPProcessNaN(op1);
+ } else if (IsSignallingNaN(op2)) {
+ return FPProcessNaN(op2);
+ } else if (std::isnan(op1)) {
+ VIXL_ASSERT(IsQuietNaN(op1));
+ return FPProcessNaN(op1);
+ } else if (std::isnan(op2)) {
+ VIXL_ASSERT(IsQuietNaN(op2));
+ return FPProcessNaN(op2);
+ } else {
+ return 0.0;
+ }
+ }
+
+ template <typename T>
+ T FPProcessNaNs3(T op1, T op2, T op3) {
+ if (IsSignallingNaN(op1)) {
+ return FPProcessNaN(op1);
+ } else if (IsSignallingNaN(op2)) {
+ return FPProcessNaN(op2);
+ } else if (IsSignallingNaN(op3)) {
+ return FPProcessNaN(op3);
+ } else if (std::isnan(op1)) {
+ VIXL_ASSERT(IsQuietNaN(op1));
+ return FPProcessNaN(op1);
+ } else if (std::isnan(op2)) {
+ VIXL_ASSERT(IsQuietNaN(op2));
+ return FPProcessNaN(op2);
+ } else if (std::isnan(op3)) {
+ VIXL_ASSERT(IsQuietNaN(op3));
+ return FPProcessNaN(op3);
+ } else {
+ return 0.0;
+ }
+ }
+
+ bool coloured_trace_;
+
+ // A set of TraceParameters flags.
+ int trace_parameters_;
+
+ // Indicates whether the instruction instrumentation is active.
+ bool instruction_stats_;
+
+ // Indicates whether the exclusive-access warning has been printed.
+ bool print_exclusive_access_warning_;
+ void PrintExclusiveAccessWarning();
+
+ // Indicates that the simulator ran out of memory at some point.
+ // Data structures may not be fully allocated.
+ bool oom_;
+
+ public:
+ // True if the simulator ran out of memory during or after construction.
+ bool oom() const { return oom_; }
+
+ protected:
+ mozilla::Vector<int64_t, 0, js::SystemAllocPolicy> spStack_;
+};
+
+} // namespace vixl
+
+namespace js {
+namespace jit {
+
+class SimulatorProcess
+{
+ public:
+ static SimulatorProcess* singleton_;
+
+ SimulatorProcess()
+ : lock_(mutexid::Arm64SimulatorLock)
+ , redirection_(nullptr)
+ {}
+
+ // Synchronizes access between main thread and compilation threads.
+ js::Mutex lock_ MOZ_UNANNOTATED;
+ vixl::Redirection* redirection_;
+
+#ifdef JS_CACHE_SIMULATOR_ARM64
+ // For each simulator, record what other thread registered as instruction
+ // being invalidated.
+ struct ICacheFlush {
+ void* start;
+ size_t length;
+ };
+ using ICacheFlushes = mozilla::Vector<ICacheFlush, 2>;
+ struct SimFlushes {
+ vixl::Simulator* thread;
+ ICacheFlushes records;
+ };
+ mozilla::Vector<SimFlushes, 1> pendingFlushes_;
+
+ static void recordICacheFlush(void* start, size_t length);
+ static void membarrier();
+ static ICacheFlushes& getICacheFlushes(vixl::Simulator* sim);
+ [[nodiscard]] static bool registerSimulator(vixl::Simulator* sim);
+ static void unregisterSimulator(vixl::Simulator* sim);
+#endif
+
+ static void setRedirection(vixl::Redirection* redirection) {
+ singleton_->lock_.assertOwnedByCurrentThread();
+ singleton_->redirection_ = redirection;
+ }
+
+ static vixl::Redirection* redirection() {
+ singleton_->lock_.assertOwnedByCurrentThread();
+ return singleton_->redirection_;
+ }
+
+ static bool initialize() {
+ singleton_ = js_new<SimulatorProcess>();
+ return !!singleton_;
+ }
+ static void destroy() {
+ js_delete(singleton_);
+ singleton_ = nullptr;
+ }
+};
+
+// Protects the icache and redirection properties of the simulator.
+class AutoLockSimulatorCache : public js::LockGuard<js::Mutex>
+{
+ using Base = js::LockGuard<js::Mutex>;
+
+ public:
+ explicit AutoLockSimulatorCache()
+ : Base(SimulatorProcess::singleton_->lock_)
+ {
+ }
+};
+
+} // namespace jit
+} // namespace js
+
+#endif // JS_SIMULATOR_ARM64
+#endif // VIXL_A64_SIMULATOR_A64_H_
diff --git a/js/src/jit/arm64/vixl/Utils-vixl.cpp b/js/src/jit/arm64/vixl/Utils-vixl.cpp
new file mode 100644
index 0000000000..381c3501d1
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Utils-vixl.cpp
@@ -0,0 +1,555 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "jit/arm64/vixl/Utils-vixl.h"
+
+#include <cstdio>
+
+namespace vixl {
+
+// The default NaN values (for FPCR.DN=1).
+const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
+const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
+const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
+
+// Floating-point zero values.
+const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
+const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
+
+// Floating-point infinity values.
+const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
+const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
+const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
+const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
+const double kFP64PositiveInfinity =
+ RawbitsToDouble(UINT64_C(0x7ff0000000000000));
+const double kFP64NegativeInfinity =
+ RawbitsToDouble(UINT64_C(0xfff0000000000000));
+
+bool IsZero(Float16 value) {
+ uint16_t bits = Float16ToRawbits(value);
+ return (bits == Float16ToRawbits(kFP16PositiveZero) ||
+ bits == Float16ToRawbits(kFP16NegativeZero));
+}
+
+uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
+
+uint32_t FloatToRawbits(float value) {
+ uint32_t bits = 0;
+ memcpy(&bits, &value, 4);
+ return bits;
+}
+
+
+uint64_t DoubleToRawbits(double value) {
+ uint64_t bits = 0;
+ memcpy(&bits, &value, 8);
+ return bits;
+}
+
+
+Float16 RawbitsToFloat16(uint16_t bits) {
+ Float16 f;
+ f.rawbits_ = bits;
+ return f;
+}
+
+
+float RawbitsToFloat(uint32_t bits) {
+ float value = 0.0;
+ memcpy(&value, &bits, 4);
+ return value;
+}
+
+
+double RawbitsToDouble(uint64_t bits) {
+ double value = 0.0;
+ memcpy(&value, &bits, 8);
+ return value;
+}
+
+
+uint32_t Float16Sign(internal::SimFloat16 val) {
+ uint16_t rawbits = Float16ToRawbits(val);
+ return ExtractUnsignedBitfield32(15, 15, rawbits);
+}
+
+
+uint32_t Float16Exp(internal::SimFloat16 val) {
+ uint16_t rawbits = Float16ToRawbits(val);
+ return ExtractUnsignedBitfield32(14, 10, rawbits);
+}
+
+uint32_t Float16Mantissa(internal::SimFloat16 val) {
+ uint16_t rawbits = Float16ToRawbits(val);
+ return ExtractUnsignedBitfield32(9, 0, rawbits);
+}
+
+
+uint32_t FloatSign(float val) {
+ uint32_t rawbits = FloatToRawbits(val);
+ return ExtractUnsignedBitfield32(31, 31, rawbits);
+}
+
+
+uint32_t FloatExp(float val) {
+ uint32_t rawbits = FloatToRawbits(val);
+ return ExtractUnsignedBitfield32(30, 23, rawbits);
+}
+
+
+uint32_t FloatMantissa(float val) {
+ uint32_t rawbits = FloatToRawbits(val);
+ return ExtractUnsignedBitfield32(22, 0, rawbits);
+}
+
+
+uint32_t DoubleSign(double val) {
+ uint64_t rawbits = DoubleToRawbits(val);
+ return static_cast<uint32_t>(ExtractUnsignedBitfield64(63, 63, rawbits));
+}
+
+
+uint32_t DoubleExp(double val) {
+ uint64_t rawbits = DoubleToRawbits(val);
+ return static_cast<uint32_t>(ExtractUnsignedBitfield64(62, 52, rawbits));
+}
+
+
+uint64_t DoubleMantissa(double val) {
+ uint64_t rawbits = DoubleToRawbits(val);
+ return ExtractUnsignedBitfield64(51, 0, rawbits);
+}
+
+
+internal::SimFloat16 Float16Pack(uint16_t sign,
+ uint16_t exp,
+ uint16_t mantissa) {
+ uint16_t bits = (sign << 15) | (exp << 10) | mantissa;
+ return RawbitsToFloat16(bits);
+}
+
+
+float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
+ uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
+ return RawbitsToFloat(bits);
+}
+
+
+double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) {
+ uint64_t bits = (sign << 63) | (exp << 52) | mantissa;
+ return RawbitsToDouble(bits);
+}
+
+
+int Float16Classify(Float16 value) {
+ uint16_t bits = Float16ToRawbits(value);
+ uint16_t exponent_max = (1 << 5) - 1;
+ uint16_t exponent_mask = exponent_max << 10;
+ uint16_t mantissa_mask = (1 << 10) - 1;
+
+ uint16_t exponent = (bits & exponent_mask) >> 10;
+ uint16_t mantissa = bits & mantissa_mask;
+ if (exponent == 0) {
+ if (mantissa == 0) {
+ return FP_ZERO;
+ }
+ return FP_SUBNORMAL;
+ } else if (exponent == exponent_max) {
+ if (mantissa == 0) {
+ return FP_INFINITE;
+ }
+ return FP_NAN;
+ }
+ return FP_NORMAL;
+}
+
+
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
+ VIXL_ASSERT((reg_size % 8) == 0);
+ int count = 0;
+ for (unsigned i = 0; i < (reg_size / 16); i++) {
+ if ((imm & 0xffff) == 0) {
+ count++;
+ }
+ imm >>= 16;
+ }
+ return count;
+}
+
+
+int BitCount(uint64_t value) { return CountSetBits(value); }
+
+// Float16 definitions.
+
+Float16::Float16(double dvalue) {
+ rawbits_ =
+ Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN));
+}
+
+namespace internal {
+
+SimFloat16 SimFloat16::operator-() const {
+ return RawbitsToFloat16(rawbits_ ^ 0x8000);
+}
+
+// SimFloat16 definitions.
+SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const {
+ return static_cast<double>(*this) + static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const {
+ return static_cast<double>(*this) - static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const {
+ return static_cast<double>(*this) * static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const {
+ return static_cast<double>(*this) / static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator<(SimFloat16 rhs) const {
+ return static_cast<double>(*this) < static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator>(SimFloat16 rhs) const {
+ return static_cast<double>(*this) > static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator==(SimFloat16 rhs) const {
+ if (IsNaN(*this) || IsNaN(rhs)) {
+ return false;
+ } else if (IsZero(rhs) && IsZero(*this)) {
+ // +0 and -0 should be treated as equal.
+ return true;
+ }
+ return this->rawbits_ == rhs.rawbits_;
+}
+
+bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); }
+
+bool SimFloat16::operator==(double rhs) const {
+ return static_cast<double>(*this) == static_cast<double>(rhs);
+}
+
+SimFloat16::operator double() const {
+ return FPToDouble(*this, kIgnoreDefaultNaN);
+}
+
+Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); }
+
+} // namespace internal
+
+float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) {
+ uint16_t bits = Float16ToRawbits(value);
+ uint32_t sign = bits >> 15;
+ uint32_t exponent =
+ ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
+ kFloat16MantissaBits,
+ bits);
+ uint32_t mantissa =
+ ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits);
+
+ switch (Float16Classify(value)) {
+ case FP_ZERO:
+ return (sign == 0) ? 0.0f : -0.0f;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
+
+ case FP_SUBNORMAL: {
+ // Calculate shift required to put mantissa into the most-significant bits
+ // of the destination mantissa.
+ int shift = CountLeadingZeros(mantissa << (32 - 10));
+
+ // Shift mantissa and discard implicit '1'.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
+ mantissa &= (1 << kFloatMantissaBits) - 1;
+
+ // Adjust the exponent for the shift applied, and rebias.
+ exponent = exponent - shift + (-15 + 127);
+ break;
+ }
+
+ case FP_NAN:
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred entirely, except that the top
+ // bit is forced to '1', making the result a quiet NaN. The unused
+ // (low-order) payload bits are set to 0.
+ exponent = (1 << kFloatExponentBits) - 1;
+
+ // Increase bits in mantissa, making low-order bits 0.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+ mantissa |= 1 << 22; // Force a quiet NaN.
+ break;
+
+ case FP_NORMAL:
+ // Increase bits in mantissa, making low-order bits 0.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+
+ // Change exponent bias.
+ exponent += (-15 + 127);
+ break;
+
+ default:
+ VIXL_UNREACHABLE();
+ }
+ return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
+ mantissa);
+}
+
+
+float FPToFloat(double value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+ USE(round_mode);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ uint64_t raw = DoubleToRawbits(value);
+
+ uint32_t sign = raw >> 63;
+ uint32_t exponent = (1 << 8) - 1;
+ uint32_t payload =
+ static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
+ payload |= (1 << 22); // Force a quiet NaN.
+
+ return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
+ }
+
+ case FP_ZERO:
+ case FP_INFINITE: {
+ // In a C++ cast, any value representable in the target type will be
+ // unchanged. This is always the case for +/-0.0 and infinities.
+ return static_cast<float>(value);
+ }
+
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert double-to-float as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+ uint64_t raw = DoubleToRawbits(value);
+ // Extract the IEEE-754 double components.
+ uint32_t sign = raw >> 63;
+ // Extract the exponent and remove the IEEE-754 encoding bias.
+ int32_t exponent =
+ static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
+ // Extract the mantissa and add the implicit '1' bit.
+ uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
+ if (std::fpclassify(value) == FP_NORMAL) {
+ mantissa |= (UINT64_C(1) << 52);
+ }
+ return FPRoundToFloat(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return value;
+}
+
+// TODO: We should consider implementing a full FPToDouble(Float16)
+// conversion function (for performance reasons).
+double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) {
+ // We can rely on implicit float to double conversion here.
+ return FPToFloat(value, DN, exception);
+}
+
+
+double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred entirely, except that the top
+ // bit is forced to '1', making the result a quiet NaN. The unused
+ // (low-order) payload bits are set to 0.
+ uint32_t raw = FloatToRawbits(value);
+
+ uint64_t sign = raw >> 31;
+ uint64_t exponent = (1 << 11) - 1;
+ uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
+ payload <<= (52 - 23); // The unused low-order bits should be 0.
+ payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
+
+ return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
+ }
+
+ case FP_ZERO:
+ case FP_NORMAL:
+ case FP_SUBNORMAL:
+ case FP_INFINITE: {
+ // All other inputs are preserved in a standard cast, because every value
+ // representable using an IEEE-754 float is also representable using an
+ // IEEE-754 double.
+ return static_cast<double>(value);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return static_cast<double>(value);
+}
+
+
+Float16 FPToFloat16(float value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT(round_mode == FPTieEven);
+ USE(round_mode);
+
+ uint32_t raw = FloatToRawbits(value);
+ int32_t sign = raw >> 31;
+ int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
+ uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
+ : Float16ToRawbits(kFP16NegativeInfinity);
+ result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
+ result |= (1 << 9); // Force a quiet NaN;
+ return RawbitsToFloat16(result);
+ }
+
+ case FP_ZERO:
+ return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert float-to-half as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+
+ // Add the implicit '1' bit to the mantissa.
+ mantissa += (1 << 23);
+ return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return kFP16PositiveZero;
+}
+
+
+Float16 FPToFloat16(double value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT(round_mode == FPTieEven);
+ USE(round_mode);
+
+ uint64_t raw = DoubleToRawbits(value);
+ int32_t sign = raw >> 63;
+ int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
+ uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
+ : Float16ToRawbits(kFP16NegativeInfinity);
+ result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
+ result |= (1 << 9); // Force a quiet NaN;
+ return RawbitsToFloat16(result);
+ }
+
+ case FP_ZERO:
+ return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert double-to-half as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+
+ // Add the implicit '1' bit to the mantissa.
+ mantissa += (UINT64_C(1) << 52);
+ return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return kFP16PositiveZero;
+}
+
+} // namespace vixl
diff --git a/js/src/jit/arm64/vixl/Utils-vixl.h b/js/src/jit/arm64/vixl/Utils-vixl.h
new file mode 100644
index 0000000000..d1f6a835f8
--- /dev/null
+++ b/js/src/jit/arm64/vixl/Utils-vixl.h
@@ -0,0 +1,1283 @@
+// Copyright 2015, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_UTILS_H
+#define VIXL_UTILS_H
+
+#include "mozilla/FloatingPoint.h"
+
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <vector>
+
+#include "jit/arm64/vixl/CompilerIntrinsics-vixl.h"
+#include "jit/arm64/vixl/Globals-vixl.h"
+
+namespace vixl {
+
+// Macros for compile-time format checking.
+#if GCC_VERSION_OR_NEWER(4, 4, 0)
+#define PRINTF_CHECK(format_index, varargs_index) \
+ __attribute__((format(gnu_printf, format_index, varargs_index)))
+#else
+#define PRINTF_CHECK(format_index, varargs_index)
+#endif
+
+#ifdef __GNUC__
+#define VIXL_HAS_DEPRECATED_WITH_MSG
+#elif defined(__clang__)
+#ifdef __has_extension
+#define VIXL_HAS_DEPRECATED_WITH_MSG
+#endif
+#endif
+
+#ifdef VIXL_HAS_DEPRECATED_WITH_MSG
+#define VIXL_DEPRECATED(replaced_by, declarator) \
+ __attribute__((deprecated("Use \"" replaced_by "\" instead"))) declarator
+#else
+#define VIXL_DEPRECATED(replaced_by, declarator) declarator
+#endif
+
+#ifdef VIXL_DEBUG
+#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_UNREACHABLE()
+#else
+#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH()
+#endif
+
+template <typename T, size_t n>
+size_t ArrayLength(const T (&)[n]) {
+ return n;
+}
+
+// Check number width.
+// TODO: Refactor these using templates.
+inline bool IsIntN(unsigned n, uint32_t x) {
+ VIXL_ASSERT((0 < n) && (n < 32));
+ uint32_t limit = UINT32_C(1) << (n - 1);
+ return x < limit;
+}
+inline bool IsIntN(unsigned n, int32_t x) {
+ VIXL_ASSERT((0 < n) && (n < 32));
+ int32_t limit = INT32_C(1) << (n - 1);
+ return (-limit <= x) && (x < limit);
+}
+inline bool IsIntN(unsigned n, uint64_t x) {
+ VIXL_ASSERT((0 < n) && (n < 64));
+ uint64_t limit = UINT64_C(1) << (n - 1);
+ return x < limit;
+}
+inline bool IsIntN(unsigned n, int64_t x) {
+ VIXL_ASSERT((0 < n) && (n < 64));
+ int64_t limit = INT64_C(1) << (n - 1);
+ return (-limit <= x) && (x < limit);
+}
+VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) {
+ return IsIntN(n, x);
+}
+
+inline bool IsUintN(unsigned n, uint32_t x) {
+ VIXL_ASSERT((0 < n) && (n < 32));
+ return !(x >> n);
+}
+inline bool IsUintN(unsigned n, int32_t x) {
+ VIXL_ASSERT((0 < n) && (n < 32));
+ // Convert to an unsigned integer to avoid implementation-defined behavior.
+ return !(static_cast<uint32_t>(x) >> n);
+}
+inline bool IsUintN(unsigned n, uint64_t x) {
+ VIXL_ASSERT((0 < n) && (n < 64));
+ return !(x >> n);
+}
+inline bool IsUintN(unsigned n, int64_t x) {
+ VIXL_ASSERT((0 < n) && (n < 64));
+ // Convert to an unsigned integer to avoid implementation-defined behavior.
+ return !(static_cast<uint64_t>(x) >> n);
+}
+VIXL_DEPRECATED("IsUintN", inline bool is_uintn(unsigned n, int64_t x)) {
+ return IsUintN(n, x);
+}
+
+inline uint64_t TruncateToUintN(unsigned n, uint64_t x) {
+ VIXL_ASSERT((0 < n) && (n < 64));
+ return static_cast<uint64_t>(x) & ((UINT64_C(1) << n) - 1);
+}
+VIXL_DEPRECATED("TruncateToUintN",
+ inline uint64_t truncate_to_intn(unsigned n, int64_t x)) {
+ return TruncateToUintN(n, x);
+}
+
+// clang-format off
+#define INT_1_TO_32_LIST(V) \
+V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \
+V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \
+V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \
+V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)
+
+#define INT_33_TO_63_LIST(V) \
+V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \
+V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \
+V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \
+V(57) V(58) V(59) V(60) V(61) V(62) V(63)
+
+#define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V)
+
+// clang-format on
+
+#define DECLARE_IS_INT_N(N) \
+ inline bool IsInt##N(int64_t x) { return IsIntN(N, x); } \
+ VIXL_DEPRECATED("IsInt" #N, inline bool is_int##N(int64_t x)) { \
+ return IsIntN(N, x); \
+ }
+
+#define DECLARE_IS_UINT_N(N) \
+ inline bool IsUint##N(int64_t x) { return IsUintN(N, x); } \
+ VIXL_DEPRECATED("IsUint" #N, inline bool is_uint##N(int64_t x)) { \
+ return IsUintN(N, x); \
+ }
+
+#define DECLARE_TRUNCATE_TO_UINT_32(N) \
+ inline uint32_t TruncateToUint##N(uint64_t x) { \
+ return static_cast<uint32_t>(TruncateToUintN(N, x)); \
+ } \
+ VIXL_DEPRECATED("TruncateToUint" #N, \
+ inline uint32_t truncate_to_int##N(int64_t x)) { \
+ return TruncateToUint##N(x); \
+ }
+
+INT_1_TO_63_LIST(DECLARE_IS_INT_N)
+INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
+INT_1_TO_32_LIST(DECLARE_TRUNCATE_TO_UINT_32)
+
+#undef DECLARE_IS_INT_N
+#undef DECLARE_IS_UINT_N
+#undef DECLARE_TRUNCATE_TO_INT_N
+
+// Bit field extraction.
+inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) {
+ VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
+ (msb >= lsb));
+ if ((msb == 63) && (lsb == 0)) return x;
+ return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1);
+}
+
+
+inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) {
+ VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
+ (msb >= lsb));
+ return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x));
+}
+
+
+inline int64_t ExtractSignedBitfield64(int msb, int lsb, int64_t x) {
+ VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
+ (msb >= lsb));
+ uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x);
+ // If the highest extracted bit is set, sign extend.
+ if ((temp >> (msb - lsb)) == 1) {
+ temp |= ~UINT64_C(0) << (msb - lsb);
+ }
+ int64_t result;
+ memcpy(&result, &temp, sizeof(result));
+ return result;
+}
+
+
+inline int32_t ExtractSignedBitfield32(int msb, int lsb, int32_t x) {
+ VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
+ (msb >= lsb));
+ uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x));
+ int32_t result;
+ memcpy(&result, &temp, sizeof(result));
+ return result;
+}
+
+
+inline uint64_t RotateRight(uint64_t value,
+ unsigned int rotate,
+ unsigned int width) {
+ VIXL_ASSERT((width > 0) && (width <= 64));
+ uint64_t width_mask = ~UINT64_C(0) >> (64 - width);
+ rotate &= 63;
+ if (rotate > 0) {
+ value &= width_mask;
+ value = (value << (width - rotate)) | (value >> rotate);
+ }
+ return value & width_mask;
+}
+
+
+// Wrapper class for passing FP16 values through the assembler.
+// This is purely to aid with type checking/casting.
+class Float16 {
+ public:
+ explicit Float16(double dvalue);
+ Float16() : rawbits_(0x0) {}
+ friend uint16_t Float16ToRawbits(Float16 value);
+ friend Float16 RawbitsToFloat16(uint16_t bits);
+
+ protected:
+ uint16_t rawbits_;
+};
+
+// Floating point representation.
+uint16_t Float16ToRawbits(Float16 value);
+
+
+uint32_t FloatToRawbits(float value);
+VIXL_DEPRECATED("FloatToRawbits",
+ inline uint32_t float_to_rawbits(float value)) {
+ return FloatToRawbits(value);
+}
+
+uint64_t DoubleToRawbits(double value);
+VIXL_DEPRECATED("DoubleToRawbits",
+ inline uint64_t double_to_rawbits(double value)) {
+ return DoubleToRawbits(value);
+}
+
+Float16 RawbitsToFloat16(uint16_t bits);
+
+float RawbitsToFloat(uint32_t bits);
+VIXL_DEPRECATED("RawbitsToFloat",
+ inline float rawbits_to_float(uint32_t bits)) {
+ return RawbitsToFloat(bits);
+}
+
+double RawbitsToDouble(uint64_t bits);
+VIXL_DEPRECATED("RawbitsToDouble",
+ inline double rawbits_to_double(uint64_t bits)) {
+ return RawbitsToDouble(bits);
+}
+
+namespace internal {
+
+// Internal simulation class used solely by the simulator to
+// provide an abstraction layer for any half-precision arithmetic.
+class SimFloat16 : public Float16 {
+ public:
+ // TODO: We should investigate making this constructor explicit.
+ // This is currently difficult to do due to a number of templated
+ // functions in the simulator which rely on returning double values.
+ SimFloat16(double dvalue) : Float16(dvalue) {} // NOLINT(runtime/explicit)
+ SimFloat16(Float16 f) { // NOLINT(runtime/explicit)
+ this->rawbits_ = Float16ToRawbits(f);
+ }
+ SimFloat16() : Float16() {}
+ SimFloat16 operator-() const;
+ SimFloat16 operator+(SimFloat16 rhs) const;
+ SimFloat16 operator-(SimFloat16 rhs) const;
+ SimFloat16 operator*(SimFloat16 rhs) const;
+ SimFloat16 operator/(SimFloat16 rhs) const;
+ bool operator<(SimFloat16 rhs) const;
+ bool operator>(SimFloat16 rhs) const;
+ bool operator==(SimFloat16 rhs) const;
+ bool operator!=(SimFloat16 rhs) const;
+ // This is necessary for conversions peformed in (macro asm) Fmov.
+ bool operator==(double rhs) const;
+ operator double() const;
+};
+} // namespace internal
+
+uint32_t Float16Sign(internal::SimFloat16 value);
+
+uint32_t Float16Exp(internal::SimFloat16 value);
+
+uint32_t Float16Mantissa(internal::SimFloat16 value);
+
+uint32_t FloatSign(float value);
+VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) {
+ return FloatSign(value);
+}
+
+uint32_t FloatExp(float value);
+VIXL_DEPRECATED("FloatExp", inline uint32_t float_exp(float value)) {
+ return FloatExp(value);
+}
+
+uint32_t FloatMantissa(float value);
+VIXL_DEPRECATED("FloatMantissa", inline uint32_t float_mantissa(float value)) {
+ return FloatMantissa(value);
+}
+
+uint32_t DoubleSign(double value);
+VIXL_DEPRECATED("DoubleSign", inline uint32_t double_sign(double value)) {
+ return DoubleSign(value);
+}
+
+uint32_t DoubleExp(double value);
+VIXL_DEPRECATED("DoubleExp", inline uint32_t double_exp(double value)) {
+ return DoubleExp(value);
+}
+
+uint64_t DoubleMantissa(double value);
+VIXL_DEPRECATED("DoubleMantissa",
+ inline uint64_t double_mantissa(double value)) {
+ return DoubleMantissa(value);
+}
+
+internal::SimFloat16 Float16Pack(uint16_t sign,
+ uint16_t exp,
+ uint16_t mantissa);
+
+float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa);
+VIXL_DEPRECATED("FloatPack",
+ inline float float_pack(uint32_t sign,
+ uint32_t exp,
+ uint32_t mantissa)) {
+ return FloatPack(sign, exp, mantissa);
+}
+
+double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa);
+VIXL_DEPRECATED("DoublePack",
+ inline double double_pack(uint32_t sign,
+ uint32_t exp,
+ uint64_t mantissa)) {
+ return DoublePack(sign, exp, mantissa);
+}
+
+// An fpclassify() function for 16-bit half-precision floats.
+int Float16Classify(Float16 value);
+VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) {
+ return Float16Classify(RawbitsToFloat16(value));
+}
+
+bool IsZero(Float16 value);
+
+inline bool IsNaN(float value) { return std::isnan(value); }
+
+inline bool IsNaN(double value) { return std::isnan(value); }
+
+inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; }
+
+inline bool IsInf(float value) { return std::isinf(value); }
+
+inline bool IsInf(double value) { return std::isinf(value); }
+
+inline bool IsInf(Float16 value) {
+ return Float16Classify(value) == FP_INFINITE;
+}
+
+
+// NaN tests.
+inline bool IsSignallingNaN(double num) {
+ const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
+ uint64_t raw = DoubleToRawbits(num);
+ if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) {
+ return true;
+ }
+ return false;
+}
+
+
+inline bool IsSignallingNaN(float num) {
+ const uint32_t kFP32QuietNaNMask = 0x00400000;
+ uint32_t raw = FloatToRawbits(num);
+ if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) {
+ return true;
+ }
+ return false;
+}
+
+
+inline bool IsSignallingNaN(Float16 num) {
+ const uint16_t kFP16QuietNaNMask = 0x0200;
+ return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0);
+}
+
+
+template <typename T>
+inline bool IsQuietNaN(T num) {
+ return IsNaN(num) && !IsSignallingNaN(num);
+}
+
+
+// Convert the NaN in 'num' to a quiet NaN.
+inline double ToQuietNaN(double num) {
+ const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
+ VIXL_ASSERT(IsNaN(num));
+ return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask);
+}
+
+
+inline float ToQuietNaN(float num) {
+ const uint32_t kFP32QuietNaNMask = 0x00400000;
+ VIXL_ASSERT(IsNaN(num));
+ return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask);
+}
+
+
+inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) {
+ const uint16_t kFP16QuietNaNMask = 0x0200;
+ VIXL_ASSERT(IsNaN(num));
+ return internal::SimFloat16(
+ RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask));
+}
+
+
+// Fused multiply-add.
+inline double FusedMultiplyAdd(double op1, double op2, double a) {
+ return fma(op1, op2, a);
+}
+
+
+inline float FusedMultiplyAdd(float op1, float op2, float a) {
+ return fmaf(op1, op2, a);
+}
+
+
+inline uint64_t LowestSetBit(uint64_t value) { return value & -value; }
+
+
+template <typename T>
+inline int HighestSetBitPosition(T value) {
+ VIXL_ASSERT(value != 0);
+ return (sizeof(value) * 8 - 1) - CountLeadingZeros(value);
+}
+
+
+template <typename V>
+inline int WhichPowerOf2(V value) {
+ VIXL_ASSERT(IsPowerOf2(value));
+ return CountTrailingZeros(value);
+}
+
+
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size);
+
+
+int BitCount(uint64_t value);
+
+
+template <typename T>
+T ReverseBits(T value) {
+ VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
+ (sizeof(value) == 4) || (sizeof(value) == 8));
+ T result = 0;
+ for (unsigned i = 0; i < (sizeof(value) * 8); i++) {
+ result = (result << 1) | (value & 1);
+ value >>= 1;
+ }
+ return result;
+}
+
+
+template <typename T>
+inline T SignExtend(T val, int bitSize) {
+ VIXL_ASSERT(bitSize > 0);
+ T mask = (T(2) << (bitSize - 1)) - T(1);
+ val &= mask;
+ T sign_bits = -((val >> (bitSize - 1)) << bitSize);
+ val |= sign_bits;
+ return val;
+}
+
+
+template <typename T>
+T ReverseBytes(T value, int block_bytes_log2) {
+ VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8));
+ VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value));
+ // Split the 64-bit value into an 8-bit array, where b[0] is the least
+ // significant byte, and b[7] is the most significant.
+ uint8_t bytes[8];
+ uint64_t mask = UINT64_C(0xff00000000000000);
+ for (int i = 7; i >= 0; i--) {
+ bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8);
+ mask >>= 8;
+ }
+
+ // Permutation tables for REV instructions.
+ // permute_table[0] is used by REV16_x, REV16_w
+ // permute_table[1] is used by REV32_x, REV_w
+ // permute_table[2] is used by REV_x
+ VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4));
+ static const uint8_t permute_table[3][8] = {{6, 7, 4, 5, 2, 3, 0, 1},
+ {4, 5, 6, 7, 0, 1, 2, 3},
+ {0, 1, 2, 3, 4, 5, 6, 7}};
+ uint64_t temp = 0;
+ for (int i = 0; i < 8; i++) {
+ temp <<= 8;
+ temp |= bytes[permute_table[block_bytes_log2 - 1][i]];
+ }
+
+ T result;
+ VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(temp));
+ memcpy(&result, &temp, sizeof(result));
+ return result;
+}
+
+template <unsigned MULTIPLE, typename T>
+inline bool IsMultiple(T value) {
+ VIXL_ASSERT(IsPowerOf2(MULTIPLE));
+ return (value & (MULTIPLE - 1)) == 0;
+}
+
+template <typename T>
+inline bool IsMultiple(T value, unsigned multiple) {
+ VIXL_ASSERT(IsPowerOf2(multiple));
+ return (value & (multiple - 1)) == 0;
+}
+
+template <typename T>
+inline bool IsAligned(T pointer, int alignment) {
+ VIXL_ASSERT(IsPowerOf2(alignment));
+ return (pointer & (alignment - 1)) == 0;
+}
+
+// Pointer alignment
+// TODO: rename/refactor to make it specific to instructions.
+template <unsigned ALIGN, typename T>
+inline bool IsAligned(T pointer) {
+ VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t)); // NOLINT(runtime/sizeof)
+ // Use C-style casts to get static_cast behaviour for integral types (T), and
+ // reinterpret_cast behaviour for other types.
+ return IsAligned((intptr_t)(pointer), ALIGN);
+}
+
+template <typename T>
+bool IsWordAligned(T pointer) {
+ return IsAligned<4>(pointer);
+}
+
+// Increment a pointer until it has the specified alignment. The alignment must
+// be a power of two.
+template <class T>
+T AlignUp(T pointer,
+ typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) {
+ VIXL_ASSERT(IsPowerOf2(alignment));
+ // Use C-style casts to get static_cast behaviour for integral types (T), and
+ // reinterpret_cast behaviour for other types.
+
+ typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
+ (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+ VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
+
+ size_t mask = alignment - 1;
+ T result = (T)((pointer_raw + mask) & ~mask);
+ VIXL_ASSERT(result >= pointer);
+
+ return result;
+}
+
+// Decrement a pointer until it has the specified alignment. The alignment must
+// be a power of two.
+template <class T>
+T AlignDown(T pointer,
+ typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) {
+ VIXL_ASSERT(IsPowerOf2(alignment));
+ // Use C-style casts to get static_cast behaviour for integral types (T), and
+ // reinterpret_cast behaviour for other types.
+
+ typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
+ (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+ VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
+
+ size_t mask = alignment - 1;
+ return (T)(pointer_raw & ~mask);
+}
+
+
+template <typename T>
+inline T ExtractBit(T value, unsigned bit) {
+ return (value >> bit) & T(1);
+}
+
+template <typename Ts, typename Td>
+inline Td ExtractBits(Ts value, int least_significant_bit, Td mask) {
+ return Td((value >> least_significant_bit) & Ts(mask));
+}
+
+template <typename Ts, typename Td>
+inline void AssignBit(Td& dst, // NOLINT(runtime/references)
+ int bit,
+ Ts value) {
+ VIXL_ASSERT((value == Ts(0)) || (value == Ts(1)));
+ VIXL_ASSERT(bit >= 0);
+ VIXL_ASSERT(bit < static_cast<int>(sizeof(Td) * 8));
+ Td mask(1);
+ dst &= ~(mask << bit);
+ dst |= Td(value) << bit;
+}
+
+template <typename Td, typename Ts>
+inline void AssignBits(Td& dst, // NOLINT(runtime/references)
+ int least_significant_bit,
+ Ts mask,
+ Ts value) {
+ VIXL_ASSERT(least_significant_bit >= 0);
+ VIXL_ASSERT(least_significant_bit < static_cast<int>(sizeof(Td) * 8));
+ VIXL_ASSERT(((Td(mask) << least_significant_bit) >> least_significant_bit) ==
+ Td(mask));
+ VIXL_ASSERT((value & mask) == value);
+ dst &= ~(Td(mask) << least_significant_bit);
+ dst |= Td(value) << least_significant_bit;
+}
+
+class VFP {
+ public:
+ static uint32_t FP32ToImm8(float imm) {
+ // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
+ uint32_t bits = FloatToRawbits(imm);
+ // bit7: a000.0000
+ uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
+ // bit6: 0b00.0000
+ uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
+ // bit5_to_0: 00cd.efgh
+ uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
+ return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
+ }
+ static uint32_t FP64ToImm8(double imm) {
+ // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+ // 0000.0000.0000.0000.0000.0000.0000.0000
+ uint64_t bits = DoubleToRawbits(imm);
+ // bit7: a000.0000
+ uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
+ // bit6: 0b00.0000
+ uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
+ // bit5_to_0: 00cd.efgh
+ uint64_t bit5_to_0 = (bits >> 48) & 0x3f;
+
+ return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
+ }
+ static float Imm8ToFP32(uint32_t imm8) {
+ // Imm8: abcdefgh (8 bits)
+ // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
+ // where B is b ^ 1
+ uint32_t bits = imm8;
+ uint32_t bit7 = (bits >> 7) & 0x1;
+ uint32_t bit6 = (bits >> 6) & 0x1;
+ uint32_t bit5_to_0 = bits & 0x3f;
+ uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19);
+
+ return RawbitsToFloat(result);
+ }
+ static double Imm8ToFP64(uint32_t imm8) {
+ // Imm8: abcdefgh (8 bits)
+ // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+ // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
+ // where B is b ^ 1
+ uint32_t bits = imm8;
+ uint64_t bit7 = (bits >> 7) & 0x1;
+ uint64_t bit6 = (bits >> 6) & 0x1;
+ uint64_t bit5_to_0 = bits & 0x3f;
+ uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48);
+ return RawbitsToDouble(result);
+ }
+ static bool IsImmFP32(float imm) {
+ // Valid values will have the form:
+ // aBbb.bbbc.defg.h000.0000.0000.0000.0000
+ uint32_t bits = FloatToRawbits(imm);
+ // bits[19..0] are cleared.
+ if ((bits & 0x7ffff) != 0) {
+ return false;
+ }
+
+
+ // bits[29..25] are all set or all cleared.
+ uint32_t b_pattern = (bits >> 16) & 0x3e00;
+ if (b_pattern != 0 && b_pattern != 0x3e00) {
+ return false;
+ }
+ // bit[30] and bit[29] are opposite.
+ if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
+ return false;
+ }
+ return true;
+ }
+ static bool IsImmFP64(double imm) {
+ // Valid values will have the form:
+ // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+ // 0000.0000.0000.0000.0000.0000.0000.0000
+ uint64_t bits = DoubleToRawbits(imm);
+ // bits[47..0] are cleared.
+ if ((bits & 0x0000ffffffffffff) != 0) {
+ return false;
+ }
+ // bits[61..54] are all set or all cleared.
+ uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+ if ((b_pattern != 0) && (b_pattern != 0x3fc0)) {
+ return false;
+ }
+ // bit[62] and bit[61] are opposite.
+ if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) {
+ return false;
+ }
+ return true;
+ }
+};
+
+class BitField {
+ // ForEachBitHelper is a functor that will call
+ // bool ForEachBitHelper::execute(ElementType id) const
+ // and expects a boolean in return whether to continue (if true)
+ // or stop (if false)
+ // check_set will check if the bits are on (true) or off(false)
+ template <typename ForEachBitHelper, bool check_set>
+ bool ForEachBit(const ForEachBitHelper& helper) {
+ for (int i = 0; static_cast<size_t>(i) < bitfield_.size(); i++) {
+ if (bitfield_[i] == check_set)
+ if (!helper.execute(i)) return false;
+ }
+ return true;
+ }
+
+ public:
+ explicit BitField(unsigned size) : bitfield_(size, 0) {}
+
+ void Set(int i) {
+ VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size()));
+ bitfield_[i] = true;
+ }
+
+ void Unset(int i) {
+ VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size()));
+ bitfield_[i] = true;
+ }
+
+ bool IsSet(int i) const { return bitfield_[i]; }
+
+ // For each bit not set in the bitfield call the execute functor
+ // execute.
+ // ForEachBitSetHelper::execute returns true if the iteration through
+ // the bits can continue, otherwise it will stop.
+ // struct ForEachBitSetHelper {
+ // bool execute(int /*id*/) { return false; }
+ // };
+ template <typename ForEachBitNotSetHelper>
+ bool ForEachBitNotSet(const ForEachBitNotSetHelper& helper) {
+ return ForEachBit<ForEachBitNotSetHelper, false>(helper);
+ }
+
+ // For each bit set in the bitfield call the execute functor
+ // execute.
+ template <typename ForEachBitSetHelper>
+ bool ForEachBitSet(const ForEachBitSetHelper& helper) {
+ return ForEachBit<ForEachBitSetHelper, true>(helper);
+ }
+
+ private:
+ std::vector<bool> bitfield_;
+};
+
+namespace internal {
+
+typedef int64_t Int64;
+class Uint64;
+class Uint128;
+
+class Uint32 {
+ uint32_t data_;
+
+ public:
+ // Unlike uint32_t, Uint32 has a default constructor.
+ Uint32() { data_ = 0; }
+ explicit Uint32(uint32_t data) : data_(data) {}
+ inline explicit Uint32(Uint64 data);
+ uint32_t Get() const { return data_; }
+ template <int N>
+ int32_t GetSigned() const {
+ return ExtractSignedBitfield32(N - 1, 0, data_);
+ }
+ int32_t GetSigned() const { return data_; }
+ Uint32 operator~() const { return Uint32(~data_); }
+ Uint32 operator-() const { return Uint32(-data_); }
+ bool operator==(Uint32 value) const { return data_ == value.data_; }
+ bool operator!=(Uint32 value) const { return data_ != value.data_; }
+ bool operator>(Uint32 value) const { return data_ > value.data_; }
+ Uint32 operator+(Uint32 value) const { return Uint32(data_ + value.data_); }
+ Uint32 operator-(Uint32 value) const { return Uint32(data_ - value.data_); }
+ Uint32 operator&(Uint32 value) const { return Uint32(data_ & value.data_); }
+ Uint32 operator&=(Uint32 value) {
+ data_ &= value.data_;
+ return *this;
+ }
+ Uint32 operator^(Uint32 value) const { return Uint32(data_ ^ value.data_); }
+ Uint32 operator^=(Uint32 value) {
+ data_ ^= value.data_;
+ return *this;
+ }
+ Uint32 operator|(Uint32 value) const { return Uint32(data_ | value.data_); }
+ Uint32 operator|=(Uint32 value) {
+ data_ |= value.data_;
+ return *this;
+ }
+ // Unlike uint32_t, the shift functions can accept negative shift and
+ // return 0 when the shift is too big.
+ Uint32 operator>>(int shift) const {
+ if (shift == 0) return *this;
+ if (shift < 0) {
+ int tmp = -shift;
+ if (tmp >= 32) return Uint32(0);
+ return Uint32(data_ << tmp);
+ }
+ int tmp = shift;
+ if (tmp >= 32) return Uint32(0);
+ return Uint32(data_ >> tmp);
+ }
+ Uint32 operator<<(int shift) const {
+ if (shift == 0) return *this;
+ if (shift < 0) {
+ int tmp = -shift;
+ if (tmp >= 32) return Uint32(0);
+ return Uint32(data_ >> tmp);
+ }
+ int tmp = shift;
+ if (tmp >= 32) return Uint32(0);
+ return Uint32(data_ << tmp);
+ }
+};
+
+class Uint64 {
+ uint64_t data_;
+
+ public:
+ // Unlike uint64_t, Uint64 has a default constructor.
+ Uint64() { data_ = 0; }
+ explicit Uint64(uint64_t data) : data_(data) {}
+ explicit Uint64(Uint32 data) : data_(data.Get()) {}
+ inline explicit Uint64(Uint128 data);
+ uint64_t Get() const { return data_; }
+ int64_t GetSigned(int N) const {
+ return ExtractSignedBitfield64(N - 1, 0, data_);
+ }
+ int64_t GetSigned() const { return data_; }
+ Uint32 ToUint32() const {
+ VIXL_ASSERT((data_ >> 32) == 0);
+ return Uint32(static_cast<uint32_t>(data_));
+ }
+ Uint32 GetHigh32() const { return Uint32(data_ >> 32); }
+ Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); }
+ Uint64 operator~() const { return Uint64(~data_); }
+ Uint64 operator-() const { return Uint64(-data_); }
+ bool operator==(Uint64 value) const { return data_ == value.data_; }
+ bool operator!=(Uint64 value) const { return data_ != value.data_; }
+ Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); }
+ Uint64 operator-(Uint64 value) const { return Uint64(data_ - value.data_); }
+ Uint64 operator&(Uint64 value) const { return Uint64(data_ & value.data_); }
+ Uint64 operator&=(Uint64 value) {
+ data_ &= value.data_;
+ return *this;
+ }
+ Uint64 operator^(Uint64 value) const { return Uint64(data_ ^ value.data_); }
+ Uint64 operator^=(Uint64 value) {
+ data_ ^= value.data_;
+ return *this;
+ }
+ Uint64 operator|(Uint64 value) const { return Uint64(data_ | value.data_); }
+ Uint64 operator|=(Uint64 value) {
+ data_ |= value.data_;
+ return *this;
+ }
+ // Unlike uint64_t, the shift functions can accept negative shift and
+ // return 0 when the shift is too big.
+ Uint64 operator>>(int shift) const {
+ if (shift == 0) return *this;
+ if (shift < 0) {
+ int tmp = -shift;
+ if (tmp >= 64) return Uint64(0);
+ return Uint64(data_ << tmp);
+ }
+ int tmp = shift;
+ if (tmp >= 64) return Uint64(0);
+ return Uint64(data_ >> tmp);
+ }
+ Uint64 operator<<(int shift) const {
+ if (shift == 0) return *this;
+ if (shift < 0) {
+ int tmp = -shift;
+ if (tmp >= 64) return Uint64(0);
+ return Uint64(data_ >> tmp);
+ }
+ int tmp = shift;
+ if (tmp >= 64) return Uint64(0);
+ return Uint64(data_ << tmp);
+ }
+};
+
+class Uint128 {
+ uint64_t data_high_;
+ uint64_t data_low_;
+
+ public:
+ Uint128() : data_high_(0), data_low_(0) {}
+ explicit Uint128(uint64_t data_low) : data_high_(0), data_low_(data_low) {}
+ explicit Uint128(Uint64 data_low)
+ : data_high_(0), data_low_(data_low.Get()) {}
+ Uint128(uint64_t data_high, uint64_t data_low)
+ : data_high_(data_high), data_low_(data_low) {}
+ Uint64 ToUint64() const {
+ VIXL_ASSERT(data_high_ == 0);
+ return Uint64(data_low_);
+ }
+ Uint64 GetHigh64() const { return Uint64(data_high_); }
+ Uint64 GetLow64() const { return Uint64(data_low_); }
+ Uint128 operator~() const { return Uint128(~data_high_, ~data_low_); }
+ bool operator==(Uint128 value) const {
+ return (data_high_ == value.data_high_) && (data_low_ == value.data_low_);
+ }
+ Uint128 operator&(Uint128 value) const {
+ return Uint128(data_high_ & value.data_high_, data_low_ & value.data_low_);
+ }
+ Uint128 operator&=(Uint128 value) {
+ data_high_ &= value.data_high_;
+ data_low_ &= value.data_low_;
+ return *this;
+ }
+ Uint128 operator|=(Uint128 value) {
+ data_high_ |= value.data_high_;
+ data_low_ |= value.data_low_;
+ return *this;
+ }
+ Uint128 operator>>(int shift) const {
+ VIXL_ASSERT((shift >= 0) && (shift < 128));
+ if (shift == 0) return *this;
+ if (shift >= 64) {
+ return Uint128(0, data_high_ >> (shift - 64));
+ }
+ uint64_t tmp = (data_high_ << (64 - shift)) | (data_low_ >> shift);
+ return Uint128(data_high_ >> shift, tmp);
+ }
+ Uint128 operator<<(int shift) const {
+ VIXL_ASSERT((shift >= 0) && (shift < 128));
+ if (shift == 0) return *this;
+ if (shift >= 64) {
+ return Uint128(data_low_ << (shift - 64), 0);
+ }
+ uint64_t tmp = (data_high_ << shift) | (data_low_ >> (64 - shift));
+ return Uint128(tmp, data_low_ << shift);
+ }
+};
+
+Uint32::Uint32(Uint64 data) : data_(data.ToUint32().Get()) {}
+Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {}
+
+Int64 BitCount(Uint32 value);
+
+} // namespace internal
+
+// The default NaN values (for FPCR.DN=1).
+extern const double kFP64DefaultNaN;
+extern const float kFP32DefaultNaN;
+extern const Float16 kFP16DefaultNaN;
+
+// Floating-point infinity values.
+extern const Float16 kFP16PositiveInfinity;
+extern const Float16 kFP16NegativeInfinity;
+extern const float kFP32PositiveInfinity;
+extern const float kFP32NegativeInfinity;
+extern const double kFP64PositiveInfinity;
+extern const double kFP64NegativeInfinity;
+
+// Floating-point zero values.
+extern const Float16 kFP16PositiveZero;
+extern const Float16 kFP16NegativeZero;
+
+// AArch64 floating-point specifics. These match IEEE-754.
+const unsigned kDoubleMantissaBits = 52;
+const unsigned kDoubleExponentBits = 11;
+const unsigned kFloatMantissaBits = 23;
+const unsigned kFloatExponentBits = 8;
+const unsigned kFloat16MantissaBits = 10;
+const unsigned kFloat16ExponentBits = 5;
+
+enum FPRounding {
+ // The first four values are encodable directly by FPCR<RMode>.
+ FPTieEven = 0x0,
+ FPPositiveInfinity = 0x1,
+ FPNegativeInfinity = 0x2,
+ FPZero = 0x3,
+
+ // The final rounding modes are only available when explicitly specified by
+ // the instruction (such as with fcvta). It cannot be set in FPCR.
+ FPTieAway,
+ FPRoundOdd
+};
+
+enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN };
+
+// Assemble the specified IEEE-754 components into the target type and apply
+// appropriate rounding.
+// sign: 0 = positive, 1 = negative
+// exponent: Unbiased IEEE-754 exponent.
+// mantissa: The mantissa of the input. The top bit (which is not encoded for
+// normal IEEE-754 values) must not be omitted. This bit has the
+// value 'pow(2, exponent)'.
+//
+// The input value is assumed to be a normalized value. That is, the input may
+// not be infinity or NaN. If the source value is subnormal, it must be
+// normalized before calling this function such that the highest set bit in the
+// mantissa has the value 'pow(2, exponent)'.
+//
+// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
+// calling a templated FPRound.
+template <class T, int ebits, int mbits>
+T FPRound(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ VIXL_ASSERT((sign == 0) || (sign == 1));
+
+ // Only FPTieEven and FPRoundOdd rounding modes are implemented.
+ VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+
+ // Rounding can promote subnormals to normals, and normals to infinities. For
+ // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
+ // encodable as a float, but rounding based on the low-order mantissa bits
+ // could make it overflow. With ties-to-even rounding, this value would become
+ // an infinity.
+
+ // ---- Rounding Method ----
+ //
+ // The exponent is irrelevant in the rounding operation, so we treat the
+ // lowest-order bit that will fit into the result ('onebit') as having
+ // the value '1'. Similarly, the highest-order bit that won't fit into
+ // the result ('halfbit') has the value '0.5'. The 'point' sits between
+ // 'onebit' and 'halfbit':
+ //
+ // These bits fit into the result.
+ // |---------------------|
+ // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ // ||
+ // / |
+ // / halfbit
+ // onebit
+ //
+ // For subnormal outputs, the range of representable bits is smaller and
+ // the position of onebit and halfbit depends on the exponent of the
+ // input, but the method is otherwise similar.
+ //
+ // onebit(frac)
+ // |
+ // | halfbit(frac) halfbit(adjusted)
+ // | / /
+ // | | |
+ // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
+ // 0b00.0... -> 0b00.0... -> 0b00
+ // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
+ // 0b00.1... -> 0b00.1... -> 0b01
+ // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
+ // 0b01.0... -> 0b01.0... -> 0b01
+ // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
+ // 0b01.1... -> 0b01.1... -> 0b10
+ // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
+ // 0b10.0... -> 0b10.0... -> 0b10
+ // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
+ // 0b10.1... -> 0b10.1... -> 0b11
+ // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
+ // ... / | / |
+ // / | / |
+ // / |
+ // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
+ //
+ // mantissa = (mantissa >> shift) + halfbit(adjusted);
+
+ static const int mantissa_offset = 0;
+ static const int exponent_offset = mantissa_offset + mbits;
+ static const int sign_offset = exponent_offset + ebits;
+ VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
+
+ // Bail out early for zero inputs.
+ if (mantissa == 0) {
+ return static_cast<T>(sign << sign_offset);
+ }
+
+ // If all bits in the exponent are set, the value is infinite or NaN.
+ // This is true for all binary IEEE-754 formats.
+ static const int infinite_exponent = (1 << ebits) - 1;
+ static const int max_normal_exponent = infinite_exponent - 1;
+
+ // Apply the exponent bias to encode it for the result. Doing this early makes
+ // it easy to detect values that will be infinite or subnormal.
+ exponent += max_normal_exponent >> 1;
+
+ if (exponent > max_normal_exponent) {
+ // Overflow: the input is too large for the result type to represent.
+ if (round_mode == FPTieEven) {
+ // FPTieEven rounding mode handles overflows using infinities.
+ exponent = infinite_exponent;
+ mantissa = 0;
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ // FPRoundOdd rounding mode handles overflows using the largest magnitude
+ // normal number.
+ exponent = max_normal_exponent;
+ mantissa = (UINT64_C(1) << exponent_offset) - 1;
+ }
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ (mantissa << mantissa_offset));
+ }
+
+ // Calculate the shift required to move the top mantissa bit to the proper
+ // place in the destination type.
+ const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
+ int shift = highest_significant_bit - mbits;
+
+ if (exponent <= 0) {
+ // The output will be subnormal (before rounding).
+ // For subnormal outputs, the shift must be adjusted by the exponent. The +1
+ // is necessary because the exponent of a subnormal value (encoded as 0) is
+ // the same as the exponent of the smallest normal value (encoded as 1).
+ shift += -exponent + 1;
+
+ // Handle inputs that would produce a zero output.
+ //
+ // Shifts higher than highest_significant_bit+1 will always produce a zero
+ // result. A shift of exactly highest_significant_bit+1 might produce a
+ // non-zero result after rounding.
+ if (shift > (highest_significant_bit + 1)) {
+ if (round_mode == FPTieEven) {
+ // The result will always be +/-0.0.
+ return static_cast<T>(sign << sign_offset);
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ VIXL_ASSERT(mantissa != 0);
+ // For FPRoundOdd, if the mantissa is too small to represent and
+ // non-zero return the next "odd" value.
+ return static_cast<T>((sign << sign_offset) | 1);
+ }
+ }
+
+ // Properly encode the exponent for a subnormal output.
+ exponent = 0;
+ } else {
+ // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
+ // normal values.
+ mantissa &= ~(UINT64_C(1) << highest_significant_bit);
+ }
+
+ // The casts below are only well-defined for unsigned integers.
+ VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer);
+ VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed);
+
+ if (shift > 0) {
+ if (round_mode == FPTieEven) {
+ // We have to shift the mantissa to the right. Some precision is lost, so
+ // we need to apply rounding.
+ uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
+ uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1;
+ uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
+ uint64_t adjusted = mantissa - adjustment;
+ T halfbit_adjusted = (adjusted >> (shift - 1)) & 1;
+
+ T result =
+ static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) |
+ ((mantissa >> shift) << mantissa_offset));
+
+ // A very large mantissa can overflow during rounding. If this happens,
+ // the exponent should be incremented and the mantissa set to 1.0
+ // (encoded as 0). Applying halfbit_adjusted after assembling the float
+ // has the nice side-effect that this case is handled for free.
+ //
+ // This also handles cases where a very large finite value overflows to
+ // infinity, or where a very large subnormal value overflows to become
+ // normal.
+ return result + halfbit_adjusted;
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ // If any bits at position halfbit or below are set, onebit (ie. the
+ // bottom bit of the resulting mantissa) must be set.
+ uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
+ if (fractional_bits != 0) {
+ mantissa |= UINT64_C(1) << shift;
+ }
+
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa >> shift) << mantissa_offset));
+ }
+ } else {
+ // We have to shift the mantissa to the left (or not at all). The input
+ // mantissa is exactly representable in the output mantissa, so apply no
+ // rounding correction.
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa << -shift) << mantissa_offset));
+ }
+}
+
+
+// See FPRound for a description of this function.
+inline double FPRoundToDouble(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ uint64_t bits =
+ FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
+ exponent,
+ mantissa,
+ round_mode);
+ return RawbitsToDouble(bits);
+}
+
+
+// See FPRound for a description of this function.
+inline Float16 FPRoundToFloat16(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ return RawbitsToFloat16(
+ FPRound<uint16_t,
+ kFloat16ExponentBits,
+ kFloat16MantissaBits>(sign, exponent, mantissa, round_mode));
+}
+
+
+// See FPRound for a description of this function.
+static inline float FPRoundToFloat(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ uint32_t bits =
+ FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
+ exponent,
+ mantissa,
+ round_mode);
+ return RawbitsToFloat(bits);
+}
+
+
+float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
+float FPToFloat(double value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception = NULL);
+
+double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
+double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL);
+
+Float16 FPToFloat16(float value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception = NULL);
+
+Float16 FPToFloat16(double value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception = NULL);
+} // namespace vixl
+
+#endif // VIXL_UTILS_H