diff options
Diffstat (limited to '')
55 files changed, 64751 insertions, 0 deletions
diff --git a/js/src/jit/arm64/Architecture-arm64.cpp b/js/src/jit/arm64/Architecture-arm64.cpp new file mode 100644 index 0000000000..eb3dd67b1a --- /dev/null +++ b/js/src/jit/arm64/Architecture-arm64.cpp @@ -0,0 +1,129 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/arm64/Architecture-arm64.h" + +#include <cstring> + +#include "jit/arm64/vixl/Cpu-vixl.h" +#include "jit/FlushICache.h" // js::jit::FlushICache +#include "jit/RegisterSets.h" + +namespace js { +namespace jit { + +Registers::Code Registers::FromName(const char* name) { + // Check for some register aliases first. + if (strcmp(name, "ip0") == 0) { + return ip0; + } + if (strcmp(name, "ip1") == 0) { + return ip1; + } + if (strcmp(name, "fp") == 0) { + return fp; + } + + for (uint32_t i = 0; i < Total; i++) { + if (strcmp(GetName(i), name) == 0) { + return Code(i); + } + } + + return Invalid; +} + +FloatRegisters::Code FloatRegisters::FromName(const char* name) { + for (size_t i = 0; i < Total; i++) { + if (strcmp(GetName(i), name) == 0) { + return Code(i); + } + } + + return Invalid; +} + +// This must sync with GetPushSizeInBytes just below and also with +// MacroAssembler::PushRegsInMask. +FloatRegisterSet FloatRegister::ReduceSetForPush(const FloatRegisterSet& s) { + SetType all = s.bits(); + SetType set128b = + (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128; + SetType doubleSet = + (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble; + SetType singleSet = + (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle; + + // See GetPushSizeInBytes. + SetType set64b = (singleSet | doubleSet) & ~set128b; + + SetType reduced = (set128b << FloatRegisters::ShiftSimd128) | + (set64b << FloatRegisters::ShiftDouble); + return FloatRegisterSet(reduced); +} + +// Compute the size of the dump area for |s.ReduceSetForPush()|, as defined by +// MacroAssembler::PushRegsInMask for this target. +uint32_t FloatRegister::GetPushSizeInBytes(const FloatRegisterSet& s) { + SetType all = s.bits(); + SetType set128b = + (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128; + SetType doubleSet = + (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble; + SetType singleSet = + (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle; + + // PushRegsInMask pushes singles as if they were doubles. Also we need to + // remove singles or doubles which are also pushed as part of a vector + // register. + SetType set64b = (singleSet | doubleSet) & ~set128b; + + // The "+ 1) & ~1" is to take into account the alignment hole below the + // double-reg dump area. See MacroAssembler::PushRegsInMaskSizeInBytes. + return ((set64b.size() + 1) & ~1) * sizeof(double) + + set128b.size() * SizeOfSimd128; +} + +uint32_t FloatRegister::getRegisterDumpOffsetInBytes() { + // See block comment in MacroAssembler.h for further required invariants. + static_assert(sizeof(jit::FloatRegisters::RegisterContent) == 16); + return encoding() * sizeof(jit::FloatRegisters::RegisterContent); +} + +// For N in 0..31, if any of sN, dN or qN is a member of `s`, the returned set +// will contain all of sN, dN and qN. +FloatRegisterSet FloatRegister::BroadcastToAllSizes(const FloatRegisterSet& s) { + SetType all = s.bits(); + SetType set128b = + (all & FloatRegisters::AllSimd128Mask) >> FloatRegisters::ShiftSimd128; + SetType doubleSet = + (all & FloatRegisters::AllDoubleMask) >> FloatRegisters::ShiftDouble; + SetType singleSet = + (all & FloatRegisters::AllSingleMask) >> FloatRegisters::ShiftSingle; + + SetType merged = set128b | doubleSet | singleSet; + SetType broadcasted = (merged << FloatRegisters::ShiftSimd128) | + (merged << FloatRegisters::ShiftDouble) | + (merged << FloatRegisters::ShiftSingle); + + return FloatRegisterSet(broadcasted); +} + +uint32_t GetARM64Flags() { return 0; } + +// CPU flags handling on ARM64 is currently different from other platforms: +// the flags are computed and stored per-assembler and are thus "always +// computed". +bool CPUFlagsHaveBeenComputed() { return true; } + +void FlushICache(void* code, size_t size) { + vixl::CPU::EnsureIAndDCacheCoherency(code, size); +} + +void FlushExecutionContext() { vixl::CPU::FlushExecutionContext(); } + +} // namespace jit +} // namespace js diff --git a/js/src/jit/arm64/Architecture-arm64.h b/js/src/jit/arm64/Architecture-arm64.h new file mode 100644 index 0000000000..96bbc63848 --- /dev/null +++ b/js/src/jit/arm64/Architecture-arm64.h @@ -0,0 +1,773 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_Architecture_arm64_h +#define jit_arm64_Architecture_arm64_h + +#include "mozilla/Assertions.h" +#include "mozilla/MathAlgorithms.h" + +#include <algorithm> +#include <iterator> + +#include "jit/arm64/vixl/Instructions-vixl.h" +#include "jit/shared/Architecture-shared.h" + +#include "js/Utility.h" + +#define JS_HAS_HIDDEN_SP +static const uint32_t HiddenSPEncoding = vixl::kSPRegInternalCode; + +namespace js { +namespace jit { + +// AArch64 has 32 64-bit integer registers, x0 though x31. +// +// x31 (or, more accurately, the integer register with encoding 31, since +// there is no x31 per se) is special and functions as both the stack pointer +// and a zero register. +// +// The bottom 32 bits of each of the X registers is accessible as w0 through +// w31. The program counter is not accessible as a register. +// +// SIMD and scalar floating-point registers share a register bank. +// 32 bit float registers are s0 through s31. +// 64 bit double registers are d0 through d31. +// 128 bit SIMD registers are v0 through v31. +// e.g., s0 is the bottom 32 bits of d0, which is the bottom 64 bits of v0. + +// AArch64 Calling Convention: +// x0 - x7: arguments and return value +// x8: indirect result (struct) location +// x9 - x15: temporary registers +// x16 - x17: intra-call-use registers (PLT, linker) +// x18: platform specific use (TLS) +// x19 - x28: callee-saved registers +// x29: frame pointer +// x30: link register + +// AArch64 Calling Convention for Floats: +// d0 - d7: arguments and return value +// d8 - d15: callee-saved registers +// Bits 64:128 are not saved for v8-v15. +// d16 - d31: temporary registers + +// AArch64 does not have soft float. + +class Registers { + public: + enum RegisterID { + w0 = 0, + x0 = 0, + w1 = 1, + x1 = 1, + w2 = 2, + x2 = 2, + w3 = 3, + x3 = 3, + w4 = 4, + x4 = 4, + w5 = 5, + x5 = 5, + w6 = 6, + x6 = 6, + w7 = 7, + x7 = 7, + w8 = 8, + x8 = 8, + w9 = 9, + x9 = 9, + w10 = 10, + x10 = 10, + w11 = 11, + x11 = 11, + w12 = 12, + x12 = 12, + w13 = 13, + x13 = 13, + w14 = 14, + x14 = 14, + w15 = 15, + x15 = 15, + w16 = 16, + x16 = 16, + ip0 = 16, // MacroAssembler scratch register 1. + w17 = 17, + x17 = 17, + ip1 = 17, // MacroAssembler scratch register 2. + w18 = 18, + x18 = 18, + tls = 18, // Platform-specific use (TLS). + w19 = 19, + x19 = 19, + w20 = 20, + x20 = 20, + w21 = 21, + x21 = 21, + w22 = 22, + x22 = 22, + w23 = 23, + x23 = 23, + w24 = 24, + x24 = 24, + w25 = 25, + x25 = 25, + w26 = 26, + x26 = 26, + w27 = 27, + x27 = 27, + w28 = 28, + x28 = 28, + w29 = 29, + x29 = 29, + fp = 29, + w30 = 30, + x30 = 30, + lr = 30, + w31 = 31, + x31 = 31, + wzr = 31, + xzr = 31, + sp = 31, // Special: both stack pointer and a zero register. + }; + typedef uint8_t Code; + typedef uint32_t Encoding; + typedef uint32_t SetType; + + static const Code Invalid = 0xFF; + + union RegisterContent { + uintptr_t r; + }; + + static uint32_t SetSize(SetType x) { + static_assert(sizeof(SetType) == 4, "SetType must be 32 bits"); + return mozilla::CountPopulation32(x); + } + static uint32_t FirstBit(SetType x) { + return mozilla::CountTrailingZeroes32(x); + } + static uint32_t LastBit(SetType x) { + return 31 - mozilla::CountLeadingZeroes32(x); + } + + static const char* GetName(uint32_t code) { + static const char* const Names[] = { + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"}; + static_assert(Total == std::size(Names), "Table is the correct size"); + if (code >= Total) { + return "invalid"; + } + return Names[code]; + } + + static Code FromName(const char* name); + + static const uint32_t Total = 32; + static const uint32_t TotalPhys = 32; + static const uint32_t Allocatable = + 27; // No named special-function registers. + + static const SetType AllMask = 0xFFFFFFFF; + static const SetType NoneMask = 0x0; + + static const SetType ArgRegMask = + (1 << Registers::x0) | (1 << Registers::x1) | (1 << Registers::x2) | + (1 << Registers::x3) | (1 << Registers::x4) | (1 << Registers::x5) | + (1 << Registers::x6) | (1 << Registers::x7) | (1 << Registers::x8); + + static const SetType VolatileMask = + (1 << Registers::x0) | (1 << Registers::x1) | (1 << Registers::x2) | + (1 << Registers::x3) | (1 << Registers::x4) | (1 << Registers::x5) | + (1 << Registers::x6) | (1 << Registers::x7) | (1 << Registers::x8) | + (1 << Registers::x9) | (1 << Registers::x10) | (1 << Registers::x11) | + (1 << Registers::x12) | (1 << Registers::x13) | (1 << Registers::x14) | + (1 << Registers::x15) | (1 << Registers::x16) | (1 << Registers::x17) | + (1 << Registers::x18); + + static const SetType NonVolatileMask = + (1 << Registers::x19) | (1 << Registers::x20) | (1 << Registers::x21) | + (1 << Registers::x22) | (1 << Registers::x23) | (1 << Registers::x24) | + (1 << Registers::x25) | (1 << Registers::x26) | (1 << Registers::x27) | + (1 << Registers::x28) | (1 << Registers::x29) | (1 << Registers::x30); + + static const SetType NonAllocatableMask = + (1 << Registers::x28) | // PseudoStackPointer. + (1 << Registers::ip0) | // First scratch register. + (1 << Registers::ip1) | // Second scratch register. + (1 << Registers::tls) | (1 << Registers::lr) | (1 << Registers::sp) | + (1 << Registers::fp); + + static const SetType WrapperMask = VolatileMask; + + // Registers returned from a JS -> JS call. + static const SetType JSCallMask = (1 << Registers::x2); + + // Registers returned from a JS -> C call. + static const SetType CallMask = (1 << Registers::x0); + + static const SetType AllocatableMask = AllMask & ~NonAllocatableMask; +}; + +// Smallest integer type that can hold a register bitmask. +typedef uint32_t PackedRegisterMask; + +template <typename T> +class TypedRegisterSet; + +// 128-bit bitset for FloatRegisters::SetType. + +class Bitset128 { + // The order (hi, lo) looks best in the debugger. + uint64_t hi, lo; + + public: + MOZ_IMPLICIT constexpr Bitset128(uint64_t initial) : hi(0), lo(initial) {} + MOZ_IMPLICIT constexpr Bitset128(const Bitset128& that) + : hi(that.hi), lo(that.lo) {} + + constexpr Bitset128(uint64_t hi, uint64_t lo) : hi(hi), lo(lo) {} + + constexpr uint64_t high() const { return hi; } + + constexpr uint64_t low() const { return lo; } + + constexpr Bitset128 operator|(Bitset128 that) const { + return Bitset128(hi | that.hi, lo | that.lo); + } + + constexpr Bitset128 operator&(Bitset128 that) const { + return Bitset128(hi & that.hi, lo & that.lo); + } + + constexpr Bitset128 operator^(Bitset128 that) const { + return Bitset128(hi ^ that.hi, lo ^ that.lo); + } + + constexpr Bitset128 operator~() const { return Bitset128(~hi, ~lo); } + + // We must avoid shifting by the word width, which is complex. Inlining plus + // shift-by-constant will remove a lot of code in the normal case. + + constexpr Bitset128 operator<<(size_t shift) const { + if (shift == 0) { + return *this; + } + if (shift < 64) { + return Bitset128((hi << shift) | (lo >> (64 - shift)), lo << shift); + } + if (shift == 64) { + return Bitset128(lo, 0); + } + return Bitset128(lo << (shift - 64), 0); + } + + constexpr Bitset128 operator>>(size_t shift) const { + if (shift == 0) { + return *this; + } + if (shift < 64) { + return Bitset128(hi >> shift, (lo >> shift) | (hi << (64 - shift))); + } + if (shift == 64) { + return Bitset128(0, hi); + } + return Bitset128(0, hi >> (shift - 64)); + } + + constexpr bool operator==(Bitset128 that) const { + return lo == that.lo && hi == that.hi; + } + + constexpr bool operator!=(Bitset128 that) const { + return lo != that.lo || hi != that.hi; + } + + constexpr bool operator!() const { return (hi | lo) == 0; } + + Bitset128& operator|=(const Bitset128& that) { + hi |= that.hi; + lo |= that.lo; + return *this; + } + + Bitset128& operator&=(const Bitset128& that) { + hi &= that.hi; + lo &= that.lo; + return *this; + } + + uint32_t size() const { + return mozilla::CountPopulation64(hi) + mozilla::CountPopulation64(lo); + } + + uint32_t countTrailingZeroes() const { + if (lo) { + return mozilla::CountTrailingZeroes64(lo); + } + return mozilla::CountTrailingZeroes64(hi) + 64; + } + + uint32_t countLeadingZeroes() const { + if (hi) { + return mozilla::CountLeadingZeroes64(hi); + } + return mozilla::CountLeadingZeroes64(lo) + 64; + } +}; + +class FloatRegisters { + public: + enum FPRegisterID { + s0 = 0, + d0 = 0, + v0 = 0, + s1 = 1, + d1 = 1, + v1 = 1, + s2 = 2, + d2 = 2, + v2 = 2, + s3 = 3, + d3 = 3, + v3 = 3, + s4 = 4, + d4 = 4, + v4 = 4, + s5 = 5, + d5 = 5, + v5 = 5, + s6 = 6, + d6 = 6, + v6 = 6, + s7 = 7, + d7 = 7, + v7 = 7, + s8 = 8, + d8 = 8, + v8 = 8, + s9 = 9, + d9 = 9, + v9 = 9, + s10 = 10, + d10 = 10, + v10 = 10, + s11 = 11, + d11 = 11, + v11 = 11, + s12 = 12, + d12 = 12, + v12 = 12, + s13 = 13, + d13 = 13, + v13 = 13, + s14 = 14, + d14 = 14, + v14 = 14, + s15 = 15, + d15 = 15, + v15 = 15, + s16 = 16, + d16 = 16, + v16 = 16, + s17 = 17, + d17 = 17, + v17 = 17, + s18 = 18, + d18 = 18, + v18 = 18, + s19 = 19, + d19 = 19, + v19 = 19, + s20 = 20, + d20 = 20, + v20 = 20, + s21 = 21, + d21 = 21, + v21 = 21, + s22 = 22, + d22 = 22, + v22 = 22, + s23 = 23, + d23 = 23, + v23 = 23, + s24 = 24, + d24 = 24, + v24 = 24, + s25 = 25, + d25 = 25, + v25 = 25, + s26 = 26, + d26 = 26, + v26 = 26, + s27 = 27, + d27 = 27, + v27 = 27, + s28 = 28, + d28 = 28, + v28 = 28, + s29 = 29, + d29 = 29, + v29 = 29, + s30 = 30, + d30 = 30, + v30 = 30, + s31 = 31, + d31 = 31, + v31 = 31, // Scratch register. + }; + + // Eight bits: (invalid << 7) | (kind << 5) | encoding + typedef uint8_t Code; + typedef FPRegisterID Encoding; + typedef Bitset128 SetType; + + enum Kind : uint8_t { Single, Double, Simd128, NumTypes }; + + static constexpr Code Invalid = 0x80; + + static const char* GetName(uint32_t code) { + // clang-format off + static const char* const Names[] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", + "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", + "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", + "s30", "s31", + + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", + "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "d30", "d31", + + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "v30", "v31", + }; + // clang-format on + static_assert(Total == std::size(Names), "Table is the correct size"); + if (code >= Total) { + return "invalid"; + } + return Names[code]; + } + + static Code FromName(const char* name); + + static const uint32_t TotalPhys = 32; + static const uint32_t Total = TotalPhys * NumTypes; + static const uint32_t Allocatable = 31; // Without d31, the scratch register. + + static_assert(sizeof(SetType) * 8 >= Total, + "SetType should be large enough to enumerate all registers."); + + static constexpr unsigned ShiftSingle = uint32_t(Single) * TotalPhys; + static constexpr unsigned ShiftDouble = uint32_t(Double) * TotalPhys; + static constexpr unsigned ShiftSimd128 = uint32_t(Simd128) * TotalPhys; + + static constexpr SetType NoneMask = SetType(0); + static constexpr SetType AllPhysMask = ~(~SetType(0) << TotalPhys); + static constexpr SetType AllSingleMask = AllPhysMask << ShiftSingle; + static constexpr SetType AllDoubleMask = AllPhysMask << ShiftDouble; + static constexpr SetType AllSimd128Mask = AllPhysMask << ShiftSimd128; + static constexpr SetType AllMask = + AllDoubleMask | AllSingleMask | AllSimd128Mask; + static constexpr SetType AliasMask = (SetType(1) << ShiftSingle) | + (SetType(1) << ShiftDouble) | + (SetType(1) << ShiftSimd128); + + static_assert(ShiftSingle == 0, + "Or the NonVolatileMask must be computed differently"); + + // s31 is the ScratchFloatReg. + static constexpr SetType NonVolatileSingleMask = + SetType((1 << FloatRegisters::s8) | (1 << FloatRegisters::s9) | + (1 << FloatRegisters::s10) | (1 << FloatRegisters::s11) | + (1 << FloatRegisters::s12) | (1 << FloatRegisters::s13) | + (1 << FloatRegisters::s14) | (1 << FloatRegisters::s15) | + (1 << FloatRegisters::s16) | (1 << FloatRegisters::s17) | + (1 << FloatRegisters::s18) | (1 << FloatRegisters::s19) | + (1 << FloatRegisters::s20) | (1 << FloatRegisters::s21) | + (1 << FloatRegisters::s22) | (1 << FloatRegisters::s23) | + (1 << FloatRegisters::s24) | (1 << FloatRegisters::s25) | + (1 << FloatRegisters::s26) | (1 << FloatRegisters::s27) | + (1 << FloatRegisters::s28) | (1 << FloatRegisters::s29) | + (1 << FloatRegisters::s30)); + + static constexpr SetType NonVolatileMask = + (NonVolatileSingleMask << ShiftSingle) | + (NonVolatileSingleMask << ShiftDouble) | + (NonVolatileSingleMask << ShiftSimd128); + + static constexpr SetType VolatileMask = AllMask & ~NonVolatileMask; + + static constexpr SetType WrapperMask = VolatileMask; + + static_assert(ShiftSingle == 0, + "Or the NonAllocatableMask must be computed differently"); + + // d31 is the ScratchFloatReg. + static constexpr SetType NonAllocatableSingleMask = + (SetType(1) << FloatRegisters::s31); + + static constexpr SetType NonAllocatableMask = + NonAllocatableSingleMask | (NonAllocatableSingleMask << ShiftDouble) | + (NonAllocatableSingleMask << ShiftSimd128); + + static constexpr SetType AllocatableMask = AllMask & ~NonAllocatableMask; + + // Content spilled during bailouts. + union RegisterContent { + float s; + double d; + uint8_t v128[16]; + }; + + static constexpr Encoding encoding(Code c) { + // assert() not available in constexpr function. + // assert(c < Total); + return Encoding(c & 31); + } + + static constexpr Kind kind(Code c) { + // assert() not available in constexpr function. + // assert(c < Total && ((c >> 5) & 3) < NumTypes); + return Kind((c >> 5) & 3); + } + + static constexpr Code fromParts(uint32_t encoding, uint32_t kind, + uint32_t invalid) { + return Code((invalid << 7) | (kind << 5) | encoding); + } +}; + +static const uint32_t SpillSlotSize = + std::max(sizeof(Registers::RegisterContent), + sizeof(FloatRegisters::RegisterContent)); + +static const uint32_t ShadowStackSpace = 0; + +// When our only strategy for far jumps is to encode the offset directly, and +// not insert any jump islands during assembly for even further jumps, then the +// architecture restricts us to -2^27 .. 2^27-4, to fit into a signed 28-bit +// value. We further reduce this range to allow the far-jump inserting code to +// have some breathing room. +static const uint32_t JumpImmediateRange = ((1 << 27) - (20 * 1024 * 1024)); + +static const uint32_t ABIStackAlignment = 16; +static const uint32_t CodeAlignment = 16; +static const bool StackKeptAligned = false; + +// Although sp is only usable if 16-byte alignment is kept, +// the Pseudo-StackPointer enables use of 8-byte alignment. +static const uint32_t StackAlignment = 8; +static const uint32_t NativeFrameSize = 8; + +struct FloatRegister { + typedef FloatRegisters Codes; + typedef Codes::Code Code; + typedef Codes::Encoding Encoding; + typedef Codes::SetType SetType; + + static uint32_t SetSize(SetType x) { + static_assert(sizeof(SetType) == 16, "SetType must be 128 bits"); + x |= x >> FloatRegisters::TotalPhys; + x |= x >> FloatRegisters::TotalPhys; + x &= FloatRegisters::AllPhysMask; + MOZ_ASSERT(x.high() == 0); + MOZ_ASSERT((x.low() >> 32) == 0); + return mozilla::CountPopulation32(x.low()); + } + + static uint32_t FirstBit(SetType x) { + static_assert(sizeof(SetType) == 16, "SetType"); + return x.countTrailingZeroes(); + } + static uint32_t LastBit(SetType x) { + static_assert(sizeof(SetType) == 16, "SetType"); + return 127 - x.countLeadingZeroes(); + } + + static constexpr size_t SizeOfSimd128 = 16; + + private: + // These fields only hold valid values: an invalid register is always + // represented as a valid encoding and kind with the invalid_ bit set. + uint8_t encoding_; // 32 encodings + uint8_t kind_; // Double, Single, Simd128 + bool invalid_; + + typedef Codes::Kind Kind; + + public: + constexpr FloatRegister(Encoding encoding, Kind kind) + : encoding_(encoding), kind_(kind), invalid_(false) { + // assert(uint32_t(encoding) < Codes::TotalPhys); + } + + constexpr FloatRegister() + : encoding_(0), kind_(FloatRegisters::Double), invalid_(true) {} + + static FloatRegister FromCode(uint32_t i) { + MOZ_ASSERT(i < Codes::Total); + return FloatRegister(FloatRegisters::encoding(i), FloatRegisters::kind(i)); + } + + bool isSingle() const { + MOZ_ASSERT(!invalid_); + return kind_ == FloatRegisters::Single; + } + bool isDouble() const { + MOZ_ASSERT(!invalid_); + return kind_ == FloatRegisters::Double; + } + bool isSimd128() const { + MOZ_ASSERT(!invalid_); + return kind_ == FloatRegisters::Simd128; + } + bool isInvalid() const { return invalid_; } + + FloatRegister asSingle() const { + MOZ_ASSERT(!invalid_); + return FloatRegister(Encoding(encoding_), FloatRegisters::Single); + } + FloatRegister asDouble() const { + MOZ_ASSERT(!invalid_); + return FloatRegister(Encoding(encoding_), FloatRegisters::Double); + } + FloatRegister asSimd128() const { + MOZ_ASSERT(!invalid_); + return FloatRegister(Encoding(encoding_), FloatRegisters::Simd128); + } + + constexpr uint32_t size() const { + MOZ_ASSERT(!invalid_); + if (kind_ == FloatRegisters::Double) { + return sizeof(double); + } + if (kind_ == FloatRegisters::Single) { + return sizeof(float); + } + MOZ_ASSERT(kind_ == FloatRegisters::Simd128); + return SizeOfSimd128; + } + + constexpr Code code() const { + // assert(!invalid_); + return Codes::fromParts(encoding_, kind_, invalid_); + } + + constexpr Encoding encoding() const { + MOZ_ASSERT(!invalid_); + return Encoding(encoding_); + } + + const char* name() const { return FloatRegisters::GetName(code()); } + bool volatile_() const { + MOZ_ASSERT(!invalid_); + return !!((SetType(1) << code()) & FloatRegisters::VolatileMask); + } + constexpr bool operator!=(FloatRegister other) const { + return code() != other.code(); + } + constexpr bool operator==(FloatRegister other) const { + return code() == other.code(); + } + + bool aliases(FloatRegister other) const { + return other.encoding_ == encoding_; + } + // This function mostly exists for the ARM backend. It is to ensure that two + // floating point registers' types are equivalent. e.g. S0 is not equivalent + // to D16, since S0 holds a float32, and D16 holds a Double. + // Since all floating point registers on x86 and x64 are equivalent, it is + // reasonable for this function to do the same. + bool equiv(FloatRegister other) const { + MOZ_ASSERT(!invalid_); + return kind_ == other.kind_; + } + + uint32_t numAliased() const { return Codes::NumTypes; } + uint32_t numAlignedAliased() { return numAliased(); } + + FloatRegister aliased(uint32_t aliasIdx) { + MOZ_ASSERT(!invalid_); + MOZ_ASSERT(aliasIdx < numAliased()); + return FloatRegister(Encoding(encoding_), + Kind((aliasIdx + kind_) % Codes::NumTypes)); + } + FloatRegister alignedAliased(uint32_t aliasIdx) { return aliased(aliasIdx); } + SetType alignedOrDominatedAliasedSet() const { + return Codes::AliasMask << encoding_; + } + + static constexpr RegTypeName DefaultType = RegTypeName::Float64; + + template <RegTypeName Name = DefaultType> + static SetType LiveAsIndexableSet(SetType s) { + return SetType(0); + } + + template <RegTypeName Name = DefaultType> + static SetType AllocatableAsIndexableSet(SetType s) { + static_assert(Name != RegTypeName::Any, "Allocatable set are not iterable"); + return LiveAsIndexableSet<Name>(s); + } + + static TypedRegisterSet<FloatRegister> ReduceSetForPush( + const TypedRegisterSet<FloatRegister>& s); + static uint32_t GetPushSizeInBytes(const TypedRegisterSet<FloatRegister>& s); + uint32_t getRegisterDumpOffsetInBytes(); + + // For N in 0..31, if any of sN, dN or qN is a member of `s`, the + // returned set will contain all of sN, dN and qN. + static TypedRegisterSet<FloatRegister> BroadcastToAllSizes( + const TypedRegisterSet<FloatRegister>& s); +}; + +template <> +inline FloatRegister::SetType +FloatRegister::LiveAsIndexableSet<RegTypeName::Float32>(SetType set) { + return set & FloatRegisters::AllSingleMask; +} + +template <> +inline FloatRegister::SetType +FloatRegister::LiveAsIndexableSet<RegTypeName::Float64>(SetType set) { + return set & FloatRegisters::AllDoubleMask; +} + +template <> +inline FloatRegister::SetType +FloatRegister::LiveAsIndexableSet<RegTypeName::Vector128>(SetType set) { + return set & FloatRegisters::AllSimd128Mask; +} + +template <> +inline FloatRegister::SetType +FloatRegister::LiveAsIndexableSet<RegTypeName::Any>(SetType set) { + return set; +} + +// ARM/D32 has double registers that cannot be treated as float32. +// Luckily, ARMv8 doesn't have the same misfortune. +inline bool hasUnaliasedDouble() { return false; } + +// ARM prior to ARMv8 also has doubles that alias multiple floats. +// Again, ARMv8 is in the clear. +inline bool hasMultiAlias() { return false; } + +uint32_t GetARM64Flags(); + +bool CanFlushICacheFromBackgroundThreads(); + +} // namespace jit +} // namespace js + +#endif // jit_arm64_Architecture_arm64_h diff --git a/js/src/jit/arm64/Assembler-arm64.cpp b/js/src/jit/arm64/Assembler-arm64.cpp new file mode 100644 index 0000000000..1e441ae635 --- /dev/null +++ b/js/src/jit/arm64/Assembler-arm64.cpp @@ -0,0 +1,609 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/arm64/Assembler-arm64.h" + +#include "mozilla/DebugOnly.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/Maybe.h" + +#include "gc/Marking.h" +#include "jit/arm64/Architecture-arm64.h" +#include "jit/arm64/MacroAssembler-arm64.h" +#include "jit/arm64/vixl/Disasm-vixl.h" +#include "jit/AutoWritableJitCode.h" +#include "jit/ExecutableAllocator.h" +#include "vm/Realm.h" + +#include "gc/StoreBuffer-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::CountLeadingZeroes32; +using mozilla::DebugOnly; + +// Note this is used for inter-wasm calls and may pass arguments and results +// in floating point registers even if the system ABI does not. + +ABIArg ABIArgGenerator::next(MIRType type) { + switch (type) { + case MIRType::Int32: + case MIRType::Int64: + case MIRType::Pointer: + case MIRType::RefOrNull: + case MIRType::StackResults: + if (intRegIndex_ == NumIntArgRegs) { + current_ = ABIArg(stackOffset_); + stackOffset_ += sizeof(uintptr_t); + break; + } + current_ = ABIArg(Register::FromCode(intRegIndex_)); + intRegIndex_++; + break; + + case MIRType::Float32: + case MIRType::Double: + if (floatRegIndex_ == NumFloatArgRegs) { + current_ = ABIArg(stackOffset_); + stackOffset_ += sizeof(double); + break; + } + current_ = ABIArg(FloatRegister(FloatRegisters::Encoding(floatRegIndex_), + type == MIRType::Double + ? FloatRegisters::Double + : FloatRegisters::Single)); + floatRegIndex_++; + break; + +#ifdef ENABLE_WASM_SIMD + case MIRType::Simd128: + if (floatRegIndex_ == NumFloatArgRegs) { + stackOffset_ = AlignBytes(stackOffset_, SimdMemoryAlignment); + current_ = ABIArg(stackOffset_); + stackOffset_ += FloatRegister::SizeOfSimd128; + break; + } + current_ = ABIArg(FloatRegister(FloatRegisters::Encoding(floatRegIndex_), + FloatRegisters::Simd128)); + floatRegIndex_++; + break; +#endif + + default: + // Note that in Assembler-x64.cpp there's a special case for Win64 which + // does not allow passing SIMD by value. Since there's Win64 on ARM64 we + // may need to duplicate that logic here. + MOZ_CRASH("Unexpected argument type"); + } + return current_; +} + +namespace js { +namespace jit { + +void Assembler::finish() { + armbuffer_.flushPool(); + + // The extended jump table is part of the code buffer. + ExtendedJumpTable_ = emitExtendedJumpTable(); + Assembler::FinalizeCode(); +} + +bool Assembler::appendRawCode(const uint8_t* code, size_t numBytes) { + flush(); + return armbuffer_.appendRawCode(code, numBytes); +} + +bool Assembler::reserve(size_t size) { + // This buffer uses fixed-size chunks so there's no point in reserving + // now vs. on-demand. + return !oom(); +} + +bool Assembler::swapBuffer(wasm::Bytes& bytes) { + // For now, specialize to the one use case. As long as wasm::Bytes is a + // Vector, not a linked-list of chunks, there's not much we can do other + // than copy. + MOZ_ASSERT(bytes.empty()); + if (!bytes.resize(bytesNeeded())) { + return false; + } + armbuffer_.executableCopy(bytes.begin()); + return true; +} + +BufferOffset Assembler::emitExtendedJumpTable() { + if (!pendingJumps_.length() || oom()) { + return BufferOffset(); + } + + armbuffer_.flushPool(); + armbuffer_.align(SizeOfJumpTableEntry); + + BufferOffset tableOffset = armbuffer_.nextOffset(); + + for (size_t i = 0; i < pendingJumps_.length(); i++) { + // Each JumpTableEntry is of the form: + // LDR ip0 [PC, 8] + // BR ip0 + // [Patchable 8-byte constant low bits] + // [Patchable 8-byte constant high bits] + DebugOnly<size_t> preOffset = size_t(armbuffer_.nextOffset().getOffset()); + + // The unguarded use of ScratchReg64 here is OK: + // + // - The present function is called from code that does not claim any + // scratch registers, we're done compiling user code and are emitting jump + // tables. Hence the scratch registers are available when we enter. + // + // - The pendingJumps_ represent jumps to other code sections that are not + // known to this MacroAssembler instance, and we're generating code to + // jump there. It is safe to assume that any code using such a generated + // branch to an unknown location did not store any valuable value in any + // scratch register. Hence the scratch registers can definitely be + // clobbered here. + // + // - Scratch register usage is restricted to sequential control flow within + // MacroAssembler functions. Hence the scratch registers will not be + // clobbered by ldr and br as they are Assembler primitives, not + // MacroAssembler functions. + + ldr(ScratchReg64, ptrdiff_t(8 / vixl::kInstructionSize)); + br(ScratchReg64); + + DebugOnly<size_t> prePointer = size_t(armbuffer_.nextOffset().getOffset()); + MOZ_ASSERT_IF(!oom(), + prePointer - preOffset == OffsetOfJumpTableEntryPointer); + + brk(0x0); + brk(0x0); + + DebugOnly<size_t> postOffset = size_t(armbuffer_.nextOffset().getOffset()); + + MOZ_ASSERT_IF(!oom(), postOffset - preOffset == SizeOfJumpTableEntry); + } + + if (oom()) { + return BufferOffset(); + } + + return tableOffset; +} + +void Assembler::executableCopy(uint8_t* buffer) { + // Copy the code and all constant pools into the output buffer. + armbuffer_.executableCopy(buffer); + + // Patch any relative jumps that target code outside the buffer. + // The extended jump table may be used for distant jumps. + for (size_t i = 0; i < pendingJumps_.length(); i++) { + RelativePatch& rp = pendingJumps_[i]; + MOZ_ASSERT(rp.target); + + Instruction* target = (Instruction*)rp.target; + Instruction* branch = (Instruction*)(buffer + rp.offset.getOffset()); + JumpTableEntry* extendedJumpTable = reinterpret_cast<JumpTableEntry*>( + buffer + ExtendedJumpTable_.getOffset()); + if (branch->BranchType() != vixl::UnknownBranchType) { + if (branch->IsTargetReachable(target)) { + branch->SetImmPCOffsetTarget(target); + } else { + JumpTableEntry* entry = &extendedJumpTable[i]; + branch->SetImmPCOffsetTarget(entry->getLdr()); + entry->data = target; + } + } else { + // Currently a two-instruction call, it should be possible to optimize + // this into a single instruction call + nop in some instances, but this + // will work. + } + } +} + +BufferOffset Assembler::immPool(ARMRegister dest, uint8_t* value, + vixl::LoadLiteralOp op, const LiteralDoc& doc, + ARMBuffer::PoolEntry* pe) { + uint32_t inst = op | Rt(dest); + const size_t numInst = 1; + const unsigned sizeOfPoolEntryInBytes = 4; + const unsigned numPoolEntries = sizeof(value) / sizeOfPoolEntryInBytes; + return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&inst, value, + doc, pe); +} + +BufferOffset Assembler::immPool64(ARMRegister dest, uint64_t value, + ARMBuffer::PoolEntry* pe) { + return immPool(dest, (uint8_t*)&value, vixl::LDR_x_lit, LiteralDoc(value), + pe); +} + +BufferOffset Assembler::fImmPool(ARMFPRegister dest, uint8_t* value, + vixl::LoadLiteralOp op, + const LiteralDoc& doc) { + uint32_t inst = op | Rt(dest); + const size_t numInst = 1; + const unsigned sizeOfPoolEntryInBits = 32; + const unsigned numPoolEntries = dest.size() / sizeOfPoolEntryInBits; + return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&inst, value, + doc); +} + +BufferOffset Assembler::fImmPool64(ARMFPRegister dest, double value) { + return fImmPool(dest, (uint8_t*)&value, vixl::LDR_d_lit, LiteralDoc(value)); +} + +BufferOffset Assembler::fImmPool32(ARMFPRegister dest, float value) { + return fImmPool(dest, (uint8_t*)&value, vixl::LDR_s_lit, LiteralDoc(value)); +} + +void Assembler::bind(Label* label, BufferOffset targetOffset) { +#ifdef JS_DISASM_ARM64 + spew_.spewBind(label); +#endif + // Nothing has seen the label yet: just mark the location. + // If we've run out of memory, don't attempt to modify the buffer which may + // not be there. Just mark the label as bound to the (possibly bogus) + // targetOffset. + if (!label->used() || oom()) { + label->bind(targetOffset.getOffset()); + return; + } + + // Get the most recent instruction that used the label, as stored in the + // label. This instruction is the head of an implicit linked list of label + // uses. + BufferOffset branchOffset(label); + + while (branchOffset.assigned()) { + // Before overwriting the offset in this instruction, get the offset of + // the next link in the implicit branch list. + BufferOffset nextOffset = NextLink(branchOffset); + + // Linking against the actual (Instruction*) would be invalid, + // since that Instruction could be anywhere in memory. + // Instead, just link against the correct relative offset, assuming + // no constant pools, which will be taken into consideration + // during finalization. + ptrdiff_t relativeByteOffset = + targetOffset.getOffset() - branchOffset.getOffset(); + Instruction* link = getInstructionAt(branchOffset); + + // This branch may still be registered for callbacks. Stop tracking it. + vixl::ImmBranchType branchType = link->BranchType(); + vixl::ImmBranchRangeType branchRange = + Instruction::ImmBranchTypeToRange(branchType); + if (branchRange < vixl::NumShortBranchRangeTypes) { + BufferOffset deadline( + branchOffset.getOffset() + + Instruction::ImmBranchMaxForwardOffset(branchRange)); + armbuffer_.unregisterBranchDeadline(branchRange, deadline); + } + + // Is link able to reach the label? + if (link->IsPCRelAddressing() || + link->IsTargetReachable(link + relativeByteOffset)) { + // Write a new relative offset into the instruction. + link->SetImmPCOffsetTarget(link + relativeByteOffset); + } else { + // This is a short-range branch, and it can't reach the label directly. + // Verify that it branches to a veneer: an unconditional branch. + MOZ_ASSERT(getInstructionAt(nextOffset)->BranchType() == + vixl::UncondBranchType); + } + + branchOffset = nextOffset; + } + + // Bind the label, so that future uses may encode the offset immediately. + label->bind(targetOffset.getOffset()); +} + +void Assembler::addPendingJump(BufferOffset src, ImmPtr target, + RelocationKind reloc) { + MOZ_ASSERT(target.value != nullptr); + + if (reloc == RelocationKind::JITCODE) { + jumpRelocations_.writeUnsigned(src.getOffset()); + } + + // This jump is not patchable at runtime. Extended jump table entry + // requirements cannot be known until finalization, so to be safe, give each + // jump and entry. This also causes GC tracing of the target. + enoughMemory_ &= + pendingJumps_.append(RelativePatch(src, target.value, reloc)); +} + +void Assembler::PatchWrite_NearCall(CodeLocationLabel start, + CodeLocationLabel toCall) { + Instruction* dest = (Instruction*)start.raw(); + ptrdiff_t relTarget = (Instruction*)toCall.raw() - dest; + ptrdiff_t relTarget00 = relTarget >> 2; + MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0); + MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00)); + + bl(dest, relTarget00); +} + +void Assembler::PatchDataWithValueCheck(CodeLocationLabel label, + PatchedImmPtr newValue, + PatchedImmPtr expected) { + Instruction* i = (Instruction*)label.raw(); + void** pValue = i->LiteralAddress<void**>(); + MOZ_ASSERT(*pValue == expected.value); + *pValue = newValue.value; +} + +void Assembler::PatchDataWithValueCheck(CodeLocationLabel label, + ImmPtr newValue, ImmPtr expected) { + PatchDataWithValueCheck(label, PatchedImmPtr(newValue.value), + PatchedImmPtr(expected.value)); +} + +void Assembler::ToggleToJmp(CodeLocationLabel inst_) { + Instruction* i = (Instruction*)inst_.raw(); + MOZ_ASSERT(i->IsAddSubImmediate()); + + // Refer to instruction layout in ToggleToCmp(). + int imm19 = (int)i->Bits(23, 5); + MOZ_ASSERT(vixl::IsInt19(imm19)); + + b(i, imm19, Always); +} + +void Assembler::ToggleToCmp(CodeLocationLabel inst_) { + Instruction* i = (Instruction*)inst_.raw(); + MOZ_ASSERT(i->IsCondB()); + + int imm19 = i->ImmCondBranch(); + // bit 23 is reserved, and the simulator throws an assertion when this happens + // It'll be messy to decode, but we can steal bit 30 or bit 31. + MOZ_ASSERT(vixl::IsInt18(imm19)); + + // 31 - 64-bit if set, 32-bit if unset. (OK!) + // 30 - sub if set, add if unset. (OK!) + // 29 - SetFlagsBit. Must be set. + // 22:23 - ShiftAddSub. (OK!) + // 10:21 - ImmAddSub. (OK!) + // 5:9 - First source register (Rn). (OK!) + // 0:4 - Destination Register. Must be xzr. + + // From the above, there is a safe 19-bit contiguous region from 5:23. + Emit(i, vixl::ThirtyTwoBits | vixl::AddSubImmediateFixed | vixl::SUB | + Flags(vixl::SetFlags) | Rd(vixl::xzr) | + (imm19 << vixl::Rn_offset)); +} + +void Assembler::ToggleCall(CodeLocationLabel inst_, bool enabled) { + const Instruction* first = reinterpret_cast<Instruction*>(inst_.raw()); + Instruction* load; + Instruction* call; + + // There might be a constant pool at the very first instruction. + first = first->skipPool(); + + // Skip the stack pointer restore instruction. + if (first->IsStackPtrSync()) { + first = first->InstructionAtOffset(vixl::kInstructionSize)->skipPool(); + } + + load = const_cast<Instruction*>(first); + + // The call instruction follows the load, but there may be an injected + // constant pool. + call = const_cast<Instruction*>( + load->InstructionAtOffset(vixl::kInstructionSize)->skipPool()); + + if (call->IsBLR() == enabled) { + return; + } + + if (call->IsBLR()) { + // If the second instruction is blr(), then we have: + // ldr x17, [pc, offset] + // blr x17 + MOZ_ASSERT(load->IsLDR()); + // We want to transform this to: + // adr xzr, [pc, offset] + // nop + int32_t offset = load->ImmLLiteral(); + adr(load, xzr, int32_t(offset)); + nop(call); + } else { + // We have: + // adr xzr, [pc, offset] (or ldr x17, [pc, offset]) + // nop + MOZ_ASSERT(load->IsADR() || load->IsLDR()); + MOZ_ASSERT(call->IsNOP()); + // Transform this to: + // ldr x17, [pc, offset] + // blr x17 + int32_t offset = (int)load->ImmPCRawOffset(); + MOZ_ASSERT(vixl::IsInt19(offset)); + ldr(load, ScratchReg2_64, int32_t(offset)); + blr(call, ScratchReg2_64); + } +} + +// Patches loads generated by MacroAssemblerCompat::mov(CodeLabel*, Register). +// The loading code is implemented in movePatchablePtr(). +void Assembler::UpdateLoad64Value(Instruction* inst0, uint64_t value) { + MOZ_ASSERT(inst0->IsLDR()); + uint64_t* literal = inst0->LiteralAddress<uint64_t*>(); + *literal = value; +} + +class RelocationIterator { + CompactBufferReader reader_; + uint32_t offset_ = 0; + + public: + explicit RelocationIterator(CompactBufferReader& reader) : reader_(reader) {} + + bool read() { + if (!reader_.more()) { + return false; + } + offset_ = reader_.readUnsigned(); + return true; + } + + uint32_t offset() const { return offset_; } +}; + +static JitCode* CodeFromJump(JitCode* code, uint8_t* jump) { + const Instruction* inst = (const Instruction*)jump; + uint8_t* target; + + // We're expecting a call created by MacroAssembler::call(JitCode*). + // It looks like: + // + // ldr scratch, [pc, offset] + // blr scratch + // + // If the call has been toggled by ToggleCall(), it looks like: + // + // adr xzr, [pc, offset] + // nop + // + // There might be a constant pool at the very first instruction. + // See also ToggleCall(). + inst = inst->skipPool(); + + // Skip the stack pointer restore instruction. + if (inst->IsStackPtrSync()) { + inst = inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool(); + } + + if (inst->BranchType() != vixl::UnknownBranchType) { + // This is an immediate branch. + target = (uint8_t*)inst->ImmPCOffsetTarget(); + } else if (inst->IsLDR()) { + // This is an ldr+blr call that is enabled. See ToggleCall(). + mozilla::DebugOnly<const Instruction*> nextInst = + inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool(); + MOZ_ASSERT(nextInst->IsNOP() || nextInst->IsBLR()); + target = (uint8_t*)inst->Literal64(); + } else if (inst->IsADR()) { + // This is a disabled call: adr+nop. See ToggleCall(). + mozilla::DebugOnly<const Instruction*> nextInst = + inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool(); + MOZ_ASSERT(nextInst->IsNOP()); + ptrdiff_t offset = inst->ImmPCRawOffset() << vixl::kLiteralEntrySizeLog2; + // This is what Literal64 would do with the corresponding ldr. + memcpy(&target, inst + offset, sizeof(target)); + } else { + MOZ_CRASH("Unrecognized jump instruction."); + } + + // If the jump is within the code buffer, it uses the extended jump table. + if (target >= code->raw() && + target < code->raw() + code->instructionsSize()) { + MOZ_ASSERT(target + Assembler::SizeOfJumpTableEntry <= + code->raw() + code->instructionsSize()); + + uint8_t** patchablePtr = + (uint8_t**)(target + Assembler::OffsetOfJumpTableEntryPointer); + target = *patchablePtr; + } + + return JitCode::FromExecutable(target); +} + +void Assembler::TraceJumpRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader) { + RelocationIterator iter(reader); + while (iter.read()) { + JitCode* child = CodeFromJump(code, code->raw() + iter.offset()); + TraceManuallyBarrieredEdge(trc, &child, "rel32"); + MOZ_ASSERT(child == CodeFromJump(code, code->raw() + iter.offset())); + } +} + +/* static */ +void Assembler::TraceDataRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader) { + mozilla::Maybe<AutoWritableJitCode> awjc; + + uint8_t* buffer = code->raw(); + + while (reader.more()) { + size_t offset = reader.readUnsigned(); + Instruction* load = (Instruction*)&buffer[offset]; + + // The only valid traceable operation is a 64-bit load to an ARMRegister. + // Refer to movePatchablePtr() for generation. + MOZ_ASSERT(load->Mask(vixl::LoadLiteralMask) == vixl::LDR_x_lit); + + uintptr_t* literalAddr = load->LiteralAddress<uintptr_t*>(); + uintptr_t literal = *literalAddr; + + // Data relocations can be for Values or for raw pointers. If a Value is + // zero-tagged, we can trace it as if it were a raw pointer. If a Value + // is not zero-tagged, we have to interpret it as a Value to ensure that the + // tag bits are masked off to recover the actual pointer. + + if (literal >> JSVAL_TAG_SHIFT) { + // This relocation is a Value with a non-zero tag. + Value v = Value::fromRawBits(literal); + TraceManuallyBarrieredEdge(trc, &v, "jit-masm-value"); + if (*literalAddr != v.asRawBits()) { + if (awjc.isNothing()) { + awjc.emplace(code); + } + *literalAddr = v.asRawBits(); + } + continue; + } + + // This relocation is a raw pointer or a Value with a zero tag. + // No barriers needed since the pointers are constants. + gc::Cell* cell = reinterpret_cast<gc::Cell*>(literal); + MOZ_ASSERT(gc::IsCellPointerValid(cell)); + TraceManuallyBarrieredGenericPointerEdge(trc, &cell, "jit-masm-ptr"); + if (uintptr_t(cell) != literal) { + if (awjc.isNothing()) { + awjc.emplace(code); + } + *literalAddr = uintptr_t(cell); + } + } +} + +void Assembler::retarget(Label* label, Label* target) { +#ifdef JS_DISASM_ARM64 + spew_.spewRetarget(label, target); +#endif + if (label->used()) { + if (target->bound()) { + bind(label, BufferOffset(target)); + } else if (target->used()) { + // The target is not bound but used. Prepend label's branch list + // onto target's. + BufferOffset labelBranchOffset(label); + + // Find the head of the use chain for label. + BufferOffset next = NextLink(labelBranchOffset); + while (next.assigned()) { + labelBranchOffset = next; + next = NextLink(next); + } + + // Then patch the head of label's use chain to the tail of target's + // use chain, prepending the entire use chain of target. + SetNextLink(labelBranchOffset, BufferOffset(target)); + target->use(label->offset()); + } else { + // The target is unbound and unused. We can just take the head of + // the list hanging off of label, and dump that into target. + target->use(label->offset()); + } + } + label->reset(); +} + +} // namespace jit +} // namespace js diff --git a/js/src/jit/arm64/Assembler-arm64.h b/js/src/jit/arm64/Assembler-arm64.h new file mode 100644 index 0000000000..9745e9d262 --- /dev/null +++ b/js/src/jit/arm64/Assembler-arm64.h @@ -0,0 +1,793 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef A64_ASSEMBLER_A64_H_ +#define A64_ASSEMBLER_A64_H_ + +#include <iterator> + +#include "jit/arm64/vixl/Assembler-vixl.h" + +#include "jit/CompactBuffer.h" +#include "jit/shared/Disassembler-shared.h" +#include "wasm/WasmTypeDecls.h" + +namespace js { +namespace jit { + +// VIXL imports. +typedef vixl::Register ARMRegister; +typedef vixl::FPRegister ARMFPRegister; +using vixl::ARMBuffer; +using vixl::Instruction; + +using LabelDoc = DisassemblerSpew::LabelDoc; +using LiteralDoc = DisassemblerSpew::LiteralDoc; + +static const uint32_t AlignmentAtPrologue = 0; +static const uint32_t AlignmentMidPrologue = 8; +static const Scale ScalePointer = TimesEight; + +// The MacroAssembler uses scratch registers extensively and unexpectedly. +// For safety, scratch registers should always be acquired using +// vixl::UseScratchRegisterScope. +static constexpr Register ScratchReg{Registers::ip0}; +static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64}; + +static constexpr Register ScratchReg2{Registers::ip1}; +static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64}; + +static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0, + FloatRegisters::Double}; +static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31, + FloatRegisters::Double}; +struct ScratchDoubleScope : public AutoFloatRegisterScope { + explicit ScratchDoubleScope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {} +}; + +static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0, + FloatRegisters::Single}; +static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31, + FloatRegisters::Single}; +struct ScratchFloat32Scope : public AutoFloatRegisterScope { + explicit ScratchFloat32Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {} +}; + +#ifdef ENABLE_WASM_SIMD +static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0, + FloatRegisters::Simd128}; +static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31, + FloatRegisters::Simd128}; +struct ScratchSimd128Scope : public AutoFloatRegisterScope { + explicit ScratchSimd128Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {} +}; +#else +struct ScratchSimd128Scope : public AutoFloatRegisterScope { + explicit ScratchSimd128Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchDoubleReg_) { + MOZ_CRASH("SIMD not enabled"); + } +}; +#endif + +static constexpr Register InvalidReg{Registers::Invalid}; +static constexpr FloatRegister InvalidFloatReg = {}; + +static constexpr Register OsrFrameReg{Registers::x3}; +static constexpr Register CallTempReg0{Registers::x9}; +static constexpr Register CallTempReg1{Registers::x10}; +static constexpr Register CallTempReg2{Registers::x11}; +static constexpr Register CallTempReg3{Registers::x12}; +static constexpr Register CallTempReg4{Registers::x13}; +static constexpr Register CallTempReg5{Registers::x14}; + +static constexpr Register PreBarrierReg{Registers::x1}; + +static constexpr Register InterpreterPCReg{Registers::x9}; + +static constexpr Register ReturnReg{Registers::x0}; +static constexpr Register64 ReturnReg64(ReturnReg); +static constexpr Register JSReturnReg{Registers::x2}; +static constexpr Register FramePointer{Registers::fp}; +static constexpr ARMRegister FramePointer64{FramePointer, 64}; +static constexpr Register ZeroRegister{Registers::sp}; +static constexpr ARMRegister ZeroRegister64{Registers::sp, 64}; +static constexpr ARMRegister ZeroRegister32{Registers::sp, 32}; + +// [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions +// +// ================ +// +// Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer: +// +// The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it +// is used as the base pointer for a memory access. (SP+offset need not be +// 16-byte aligned, but the SP value itself must be.) The SP register may +// take on unaligned values but may not be used for a memory access while it +// is unaligned. +// +// Stack-alignment checking can be enabled or disabled by a control register; +// however that register cannot be modified by user space. We have to assume +// stack alignment checking is enabled, and that does usually appear to be the +// case. See the ARM Architecture Reference Manual, "D1.8.2 SP alignment +// checking", for further details. +// +// A second constraint is forced upon us by the ARM64 ABI. This requires that +// all accesses to the stack must be at or above SP. Accesses below SP are +// strictly forbidden, presumably because the kernel might use that area of +// memory for its own purposes -- in particular, signal delivery -- and hence +// it may get trashed at any time. +// +// Note this doesn't mean that accesses to the stack must be based off +// register SP. Only that the effective addresses must be >= SP, regardless +// of how the address is formed. +// +// In order to allow word-wise pushes and pops, some of our ARM64 jits +// (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to +// be used as a PseudoStackPointer (PSP). +// +// Initially the PSP will have the same value as the SP. Code can, if it +// wants, push a single word by subtracting 8 from the PSP, doing SP := PSP, +// then storing the value at PSP+0. Given other constraints on the alignment +// of the SP at function call boundaries, this works out OK, at the cost of +// the two extra instructions per push / pop. +// +// This is all a bit messy, and is probably not robustly adhered to. However, +// the following appear to be the intended, and mostly implemented, current +// invariants: +// +// (1) PSP is "primary", SP is "secondary". Most stack refs are +// PSP-relative. SP-relative is rare and (obviously) only done when we +// know that SP is aligned. +// +// (2) At all times, the relationship SP <= PSP is maintained. The fact that +// SP may validly be less than PSP means that pushes on the stack force +// the two values to become equal, by copying PSP into SP. However, pops +// behave differently: PSP moves back up and SP stays the same, since that +// doesn't break the SP <= PSP invariant. +// +// (3) However, immediately before a call instruction, SP and PSP must be the +// same. To enforce this, PSP is copied into SP by the arm64-specific +// MacroAssembler::call routines. +// +// (4) Also, after a function has returned, it is expected that SP holds the +// "primary" value. How exactly this is implemented remains not entirely +// clear and merits further investigation. The following points are +// believed to be relevant: +// +// - For calls to functions observing the system AArch64 ABI, PSP (x28) is +// callee-saved. That, combined with (3) above, implies SP == PSP +// immediately after the call returns. +// +// - JIT-generated routines return using MacroAssemblerCompat::retn, and +// that copies PSP into SP (bizarrely; this would make more sense if it +// copied SP into PSP); but in any case, the point is that they are the +// same at the point that the return instruction executes. +// +// - MacroAssembler::callWithABIPost copies PSP into SP after the return +// of a call requiring dynamic alignment. +// +// Given the above, it is unclear exactly where in the return sequence it +// is expected that SP == PSP, and also whether it is the callee or caller +// that is expected to enforce it. +// +// In general it would be nice to be able to move (at some time in the future, +// not now) to a world where *every* assignment to PSP or SP is followed +// immediately by a copy into the other register. That would make all +// required correctness proofs trivial in the sense that it would require only +// local inspection of code immediately following (dominated by) any such +// assignment. For the moment, however, this is a guideline, not a hard +// requirement. +// +// ================ +// +// Mechanics of keeping the stack pointers in sync: +// +// The following two methods require that the masm's SP has been set to the PSP +// with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be +// no-ops. The setup is performed manually by the jits after creating the masm. +// +// * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has +// been updated, so SP needs to move too. This is used pretty liberally +// throughout the code base. +// +// * MacroAssembler::initPseudoStackPtr() performs PSP := SP. This can be used +// after calls to non-ABI compliant code; it's not used much. +// +// In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that +// recognizes the instruction emitted by syncStackPtr(), and this is used to +// skip that instruction a few places, should it be present, in the JS JIT where +// code is generated to deal with toggled calls. +// +// In various places there are calls to MacroAssembler::syncStackPtr() which +// appear to be redundant. Investigation shows that they often are redundant, +// but not always. Finding and removing such redundancies would be quite some +// work, so we live for now with the occasional redundant update. Perusal of +// the Cortex-A55 and -A72 optimization guides shows no evidence that such +// assignments are any more expensive than assignments between vanilla integer +// registers, so the costs of such redundant updates are assumed to be small. +// +// Invariants on the PSP at function call boundaries: +// +// It *appears* that the following invariants exist: +// +// * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via +// both registers. +// +// * On entry to C++ code, PSP == SP. Certainly it appears that all calls +// created by the MacroAssembler::call(..) routines perform 'syncStackPtr' +// immediately before the call, and all ABI calls are routed through the +// MacroAssembler::call layer. +// +// * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the +// active stack pointer and that PSP is dead. +// +// * The PSP is non-volatile (callee-saved). Along a normal return path from +// JIT code, simply having PSP == SP on exit is correct, since the exit SP is +// the same as the entry SP by the JIT ABI. +// +// * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be +// used), and will not need to restore the PSP on return because x28 is +// non-volatile in the ARM64 ABI. +// +// ================ +// +// Future cleanups to the SP-vs-PSP machinery: +// +// Currently we have somewhat unclear invariants, which are not obviously +// always enforced, and which may require complex non-local reasoning. +// Auditing the code to ensure that the invariants always hold, whilst not +// generating duplicate syncs, is close to impossible. A future rework to +// tidy this might be as follows. (This suggestion pertains the the entire +// JIT complex: all of the JS compilers, wasm compilers, stub generators, +// regexp compilers, etc). +// +// Currently we have that, in JIT-generated code, PSP is "primary" and SP is +// "secondary", meaning that PSP has the "real" stack pointer value and SP is +// updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP. +// An exception to this scheme is the stubs code generated by WasmStubs.cpp, +// which assumes that SP is "primary" and PSP is dead. +// +// It might give us an easier incremental path to eventually removing PSP +// entirely if we switched to having SP always be the primary. That is: +// +// (1) SP is primary, PSP is secondary +// (2) After any assignment to SP, it is copied into PSP +// (3) All (non-frame-pointer-based) stack accesses are PSP-relative +// (as at present) +// +// This would have the effect that: +// +// * It would reinstate the invariant that on all targets, the "real" SP value +// is in the ABI-and-or-hardware-mandated stack pointer register. +// +// * It would give us a simple story about calls and returns: +// - for calls to non-JIT generated code (viz, C++ etc), we need no extra +// copies, because PSP (x28) is callee-saved +// - for calls to JIT-generated code, we need no extra copies, because of (2) +// above +// +// * We could incrementally migrate those parts of the code generator where we +// know that SP is 16-aligned, to use SP- rather than PSP-relative accesses +// +// * The consistent use of (2) would remove the requirement to have to perform +// path-dependent reasoning (for paths in the generated code, not in the +// compiler) when reading/understanding the code. +// +// * x28 would become free for use by stubs and the baseline compiler without +// having to worry about interoperating with code that expects x28 to hold a +// valid PSP. +// +// One might ask what mechanical checks we can add to ensure correctness, rather +// than having to verify these invariants by hand indefinitely. Maybe some +// combination of: +// +// * In debug builds, compiling-in assert(SP == PSP) at critical places. This +// can be done using the existing `assertStackPtrsSynced` function. +// +// * In debug builds, scanning sections of generated code to ensure no +// SP-relative stack accesses have been created -- for some sections, at +// least every assignment to SP is immediately followed by a copy to x28. +// This would also facilitate detection of duplicate syncs. +// +// ================ +// +// Other investigative notes, for the code base at present: +// +// * Some disassembly dumps suggest that we sync the stack pointer too often. +// This could be the result of various pieces of code working at cross +// purposes when syncing the stack pointer, or of not paying attention to the +// precise invariants. +// +// * As documented in RegExpNativeMacroAssembler.cpp, function +// SMRegExpMacroAssembler::createStackFrame: +// +// // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for +// // addressing. The register we use for PSP may however also be used by +// // calling code, and it is nonvolatile, so save it. Do this as a special +// // case first because the generic save/restore code needs the PSP to be +// // initialized already. +// +// and also in function SMRegExpMacroAssembler::exitHandler: +// +// // Restore the saved value of the PSP register, this value is whatever the +// // caller had saved in it, not any actual SP value, and it must not be +// // overwritten subsequently. +// +// The original source for these comments was a patch for bug 1445907. +// +// * MacroAssembler-arm64.h has an interesting comment in the retn() +// function: +// +// syncStackPtr(); // SP is always used to transmit the stack between calls. +// +// Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp, +// at callWithABIPre and callWithABIPost. +// +// * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find +// +// // SP is used to transfer stack across call boundaries. +// masm.initPseudoStackPtr(); +// +// after the return point of a callWithVMWrapper. The only reasonable +// conclusion from all those (assuming they are right) is that SP == PSP. +// +// * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP +// and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP +// before a call to wasm code. +// +// ================ + +// StackPointer is intentionally undefined on ARM64 to prevent misuse: using +// sp as a base register is only valid if sp % 16 == 0. +static constexpr Register RealStackPointer{Registers::sp}; + +static constexpr Register PseudoStackPointer{Registers::x28}; +static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64}; +static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32}; + +static constexpr Register IntArgReg0{Registers::x0}; +static constexpr Register IntArgReg1{Registers::x1}; +static constexpr Register IntArgReg2{Registers::x2}; +static constexpr Register IntArgReg3{Registers::x3}; +static constexpr Register IntArgReg4{Registers::x4}; +static constexpr Register IntArgReg5{Registers::x5}; +static constexpr Register IntArgReg6{Registers::x6}; +static constexpr Register IntArgReg7{Registers::x7}; +static constexpr Register HeapReg{Registers::x21}; + +// Define unsized Registers. +#define DEFINE_UNSIZED_REGISTERS(N) \ + static constexpr Register r##N{Registers::x##N}; +REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS) +#undef DEFINE_UNSIZED_REGISTERS +static constexpr Register ip0{Registers::x16}; +static constexpr Register ip1{Registers::x17}; +static constexpr Register fp{Registers::x29}; +static constexpr Register lr{Registers::x30}; +static constexpr Register rzr{Registers::xzr}; + +// Import VIXL registers into the js::jit namespace. +#define IMPORT_VIXL_REGISTERS(N) \ + static constexpr ARMRegister w##N = vixl::w##N; \ + static constexpr ARMRegister x##N = vixl::x##N; +REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS) +#undef IMPORT_VIXL_REGISTERS +static constexpr ARMRegister wzr = vixl::wzr; +static constexpr ARMRegister xzr = vixl::xzr; +static constexpr ARMRegister wsp = vixl::wsp; +static constexpr ARMRegister sp = vixl::sp; + +// Import VIXL VRegisters into the js::jit namespace. +#define IMPORT_VIXL_VREGISTERS(N) \ + static constexpr ARMFPRegister s##N = vixl::s##N; \ + static constexpr ARMFPRegister d##N = vixl::d##N; +REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS) +#undef IMPORT_VIXL_VREGISTERS + +static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg); + +// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use +// JSReturnOperand). +static constexpr Register RegExpMatcherRegExpReg = CallTempReg0; +static constexpr Register RegExpMatcherStringReg = CallTempReg1; +static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2; + +// Registers used by RegExpExecTest stub (do not use ReturnReg). +static constexpr Register RegExpExecTestRegExpReg = CallTempReg0; +static constexpr Register RegExpExecTestStringReg = CallTempReg1; + +// Registers used by RegExpSearcher stub (do not use ReturnReg). +static constexpr Register RegExpSearcherRegExpReg = CallTempReg0; +static constexpr Register RegExpSearcherStringReg = CallTempReg1; +static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2; + +static constexpr Register JSReturnReg_Type = r3; +static constexpr Register JSReturnReg_Data = r2; + +static constexpr FloatRegister NANReg = {FloatRegisters::d14, + FloatRegisters::Single}; +// N.B. r8 isn't listed as an aapcs temp register, but we can use it as such +// because we never use return-structs. +static constexpr Register CallTempNonArgRegs[] = {r8, r9, r10, r11, + r12, r13, r14, r15}; +static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs); + +static constexpr uint32_t JitStackAlignment = 16; + +static constexpr uint32_t JitStackValueAlignment = + JitStackAlignment / sizeof(Value); +static_assert(JitStackAlignment % sizeof(Value) == 0 && + JitStackValueAlignment >= 1, + "Stack alignment should be a non-zero multiple of sizeof(Value)"); + +static constexpr uint32_t SimdMemoryAlignment = 16; + +static_assert(CodeAlignment % SimdMemoryAlignment == 0, + "Code alignment should be larger than any of the alignments " + "which are used for " + "the constant sections of the code buffer. Thus it should be " + "larger than the " + "alignment for SIMD constants."); + +static const uint32_t WasmStackAlignment = SimdMemoryAlignment; +static const uint32_t WasmTrapInstructionLength = 4; + +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. +static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; + +class Assembler : public vixl::Assembler { + public: + Assembler() : vixl::Assembler() {} + + typedef vixl::Condition Condition; + + void finish(); + bool appendRawCode(const uint8_t* code, size_t numBytes); + bool reserve(size_t size); + bool swapBuffer(wasm::Bytes& bytes); + + // Emit the jump table, returning the BufferOffset to the first entry in the + // table. + BufferOffset emitExtendedJumpTable(); + BufferOffset ExtendedJumpTable_; + void executableCopy(uint8_t* buffer); + + BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op, + const LiteralDoc& doc, + ARMBuffer::PoolEntry* pe = nullptr); + BufferOffset immPool64(ARMRegister dest, uint64_t value, + ARMBuffer::PoolEntry* pe = nullptr); + BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value, + vixl::LoadLiteralOp op, const LiteralDoc& doc); + BufferOffset fImmPool64(ARMFPRegister dest, double value); + BufferOffset fImmPool32(ARMFPRegister dest, float value); + + uint32_t currentOffset() const { return nextOffset().getOffset(); } + + void bind(Label* label) { bind(label, nextOffset()); } + void bind(Label* label, BufferOffset boff); + void bind(CodeLabel* label) { label->target()->bind(currentOffset()); } + + void setUnlimitedBuffer() { armbuffer_.setUnlimited(); } + bool oom() const { + return AssemblerShared::oom() || armbuffer_.oom() || + jumpRelocations_.oom() || dataRelocations_.oom(); + } + + void copyJumpRelocationTable(uint8_t* dest) const { + if (jumpRelocations_.length()) { + memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length()); + } + } + void copyDataRelocationTable(uint8_t* dest) const { + if (dataRelocations_.length()) { + memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length()); + } + } + + size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); } + size_t dataRelocationTableBytes() const { return dataRelocations_.length(); } + size_t bytesNeeded() const { + return SizeOfCodeGenerated() + jumpRelocationTableBytes() + + dataRelocationTableBytes(); + } + + void processCodeLabels(uint8_t* rawCode) { + for (const CodeLabel& label : codeLabels_) { + Bind(rawCode, label); + } + } + + static void UpdateLoad64Value(Instruction* inst0, uint64_t value); + + static void Bind(uint8_t* rawCode, const CodeLabel& label) { + auto mode = label.linkMode(); + size_t patchAtOffset = label.patchAt().offset(); + size_t targetOffset = label.target().offset(); + + if (mode == CodeLabel::MoveImmediate) { + Instruction* inst = (Instruction*)(rawCode + patchAtOffset); + Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset)); + } else { + *reinterpret_cast<const void**>(rawCode + patchAtOffset) = + rawCode + targetOffset; + } + } + + void retarget(Label* cur, Label* next); + + // The buffer is about to be linked. Ensure any constant pools or + // excess bookkeeping has been flushed to the instruction stream. + void flush() { armbuffer_.flushPool(); } + + void comment(const char* msg) { +#ifdef JS_DISASM_ARM64 + spew_.spew("; %s", msg); +#endif + } + + void setPrinter(Sprinter* sp) { +#ifdef JS_DISASM_ARM64 + spew_.setPrinter(sp); +#endif + } + + static bool SupportsFloatingPoint() { return true; } + static bool SupportsUnalignedAccesses() { return true; } + static bool SupportsFastUnalignedFPAccesses() { return true; } + static bool SupportsWasmSimd() { return true; } + + static bool HasRoundInstruction(RoundingMode mode) { + switch (mode) { + case RoundingMode::Up: + case RoundingMode::Down: + case RoundingMode::NearestTiesToEven: + case RoundingMode::TowardsZero: + return true; + } + MOZ_CRASH("unexpected mode"); + } + + protected: + // Add a jump whose target is unknown until finalization. + // The jump may not be patched at runtime. + void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind); + + public: + static uint32_t PatchWrite_NearCallSize() { return 4; } + + static uint32_t NopSize() { return 4; } + + static void PatchWrite_NearCall(CodeLocationLabel start, + CodeLocationLabel toCall); + static void PatchDataWithValueCheck(CodeLocationLabel label, + PatchedImmPtr newValue, + PatchedImmPtr expected); + + static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue, + ImmPtr expected); + + static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) { + // Raw is going to be the return address. + uint32_t* raw = (uint32_t*)label.raw(); + // Overwrite the 4 bytes before the return address, which will end up being + // the call instruction. + *(raw - 1) = imm.value; + } + static uint32_t AlignDoubleArg(uint32_t offset) { + MOZ_CRASH("AlignDoubleArg()"); + } + static uintptr_t GetPointer(uint8_t* ptr) { + Instruction* i = reinterpret_cast<Instruction*>(ptr); + uint64_t ret = i->Literal64(); + return ret; + } + + // Toggle a jmp or cmp emitted by toggledJump(). + static void ToggleToJmp(CodeLocationLabel inst_); + static void ToggleToCmp(CodeLocationLabel inst_); + static void ToggleCall(CodeLocationLabel inst_, bool enabled); + + static void TraceJumpRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader); + static void TraceDataRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader); + + void assertNoGCThings() const { +#ifdef DEBUG + MOZ_ASSERT(dataRelocations_.length() == 0); + for (auto& j : pendingJumps_) { + MOZ_ASSERT(j.kind == RelocationKind::HARDCODED); + } +#endif + } + + public: + // A Jump table entry is 2 instructions, with 8 bytes of raw data + static const size_t SizeOfJumpTableEntry = 16; + + struct JumpTableEntry { + uint32_t ldr; + uint32_t br; + void* data; + + Instruction* getLdr() { return reinterpret_cast<Instruction*>(&ldr); } + }; + + // Offset of the patchable target for the given entry. + static const size_t OffsetOfJumpTableEntryPointer = 8; + + public: + void writeCodePointer(CodeLabel* label) { + armbuffer_.assertNoPoolAndNoNops(); + uintptr_t x = uintptr_t(-1); + BufferOffset off = EmitData(&x, sizeof(uintptr_t)); + label->patchAt()->bind(off.getOffset()); + } + + void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, + const Disassembler::HeapAccess& heapAccess) { + MOZ_CRASH("verifyHeapAccessDisassembly"); + } + + protected: + // Structure for fixing up pc-relative loads/jumps when the machine + // code gets moved (executable copy, gc, etc.). + struct RelativePatch { + BufferOffset offset; + void* target; + RelocationKind kind; + + RelativePatch(BufferOffset offset, void* target, RelocationKind kind) + : offset(offset), target(target), kind(kind) {} + }; + + // List of jumps for which the target is either unknown until finalization, + // or cannot be known due to GC. Each entry here requires a unique entry + // in the extended jump table, and is patched at finalization. + js::Vector<RelativePatch, 8, SystemAllocPolicy> pendingJumps_; + + // Final output formatters. + CompactBufferWriter jumpRelocations_; + CompactBufferWriter dataRelocations_; +}; + +static const uint32_t NumIntArgRegs = 8; +static const uint32_t NumFloatArgRegs = 8; + +class ABIArgGenerator { + public: + ABIArgGenerator() + : intRegIndex_(0), floatRegIndex_(0), stackOffset_(0), current_() {} + + ABIArg next(MIRType argType); + ABIArg& current() { return current_; } + uint32_t stackBytesConsumedSoFar() const { return stackOffset_; } + void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; } + + protected: + unsigned intRegIndex_; + unsigned floatRegIndex_; + uint32_t stackOffset_; + ABIArg current_; +}; + +// These registers may be volatile or nonvolatile. +static constexpr Register ABINonArgReg0 = r8; +static constexpr Register ABINonArgReg1 = r9; +static constexpr Register ABINonArgReg2 = r10; +static constexpr Register ABINonArgReg3 = r11; + +// This register may be volatile or nonvolatile. Avoid d31 which is the +// ScratchDoubleReg_. +static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16, + FloatRegisters::Single}; + +// These registers may be volatile or nonvolatile. +// Note: these three registers are all guaranteed to be different +static constexpr Register ABINonArgReturnReg0 = r8; +static constexpr Register ABINonArgReturnReg1 = r9; +static constexpr Register ABINonVolatileReg{Registers::x19}; + +// This register is guaranteed to be clobberable during the prologue and +// epilogue of an ABI call which must preserve both ABI argument, return +// and non-volatile registers. +static constexpr Register ABINonArgReturnVolatileReg = lr; + +// Instance pointer argument register for WebAssembly functions. This must not +// alias any other register used for passing function arguments or return +// values. Preserved by WebAssembly functions. Must be nonvolatile. +static constexpr Register InstanceReg{Registers::x23}; + +// Registers used for wasm table calls. These registers must be disjoint +// from the ABI argument registers, InstanceReg and each other. +static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0; +static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1; +static constexpr Register WasmTableCallSigReg = ABINonArgReg2; +static constexpr Register WasmTableCallIndexReg = ABINonArgReg3; + +// Registers used for ref calls. +static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0; +static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1; +static constexpr Register WasmCallRefReg = ABINonArgReg3; + +// Register used as a scratch along the return path in the fast js -> wasm stub +// code. This must not overlap ReturnReg, JSReturnOperand, or InstanceReg. +// It must be a volatile register. +static constexpr Register WasmJitEntryReturnScratch = r9; + +static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, + Register* out) { + if (usedIntArgs >= NumIntArgRegs) { + return false; + } + *out = Register::FromCode(usedIntArgs); + return true; +} + +static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, + FloatRegister* out) { + if (usedFloatArgs >= NumFloatArgRegs) { + return false; + } + *out = FloatRegister::FromCode(usedFloatArgs); + return true; +} + +// Get a register in which we plan to put a quantity that will be used as an +// integer argument. This differs from GetIntArgReg in that if we have no more +// actual argument registers to use we will fall back on using whatever +// CallTempReg* don't overlap the argument registers, and only fail once those +// run out too. +static inline bool GetTempRegForIntArg(uint32_t usedIntArgs, + uint32_t usedFloatArgs, Register* out) { + if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) { + return true; + } + // Unfortunately, we have to assume things about the point at which + // GetIntArgReg returns false, because we need to know how many registers it + // can allocate. + usedIntArgs -= NumIntArgRegs; + if (usedIntArgs >= NumCallTempNonArgRegs) { + return false; + } + *out = CallTempNonArgRegs[usedIntArgs]; + return true; +} + +inline Imm32 Imm64::firstHalf() const { return low(); } + +inline Imm32 Imm64::secondHalf() const { return hi(); } + +// Forbids nop filling for testing purposes. Not nestable. +class AutoForbidNops { + protected: + Assembler* asm_; + + public: + explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); } + ~AutoForbidNops() { asm_->leaveNoNops(); } +}; + +// Forbids pool generation during a specified interval. Not nestable. +class AutoForbidPoolsAndNops : public AutoForbidNops { + public: + AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst) + : AutoForbidNops(asm_) { + asm_->enterNoPool(maxInst); + } + ~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); } +}; + +} // namespace jit +} // namespace js + +#endif // A64_ASSEMBLER_A64_H_ diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp new file mode 100644 index 0000000000..d738ea548e --- /dev/null +++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp @@ -0,0 +1,4245 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/arm64/CodeGenerator-arm64.h" + +#include "mozilla/DebugOnly.h" +#include "mozilla/MathAlgorithms.h" + +#include "jsnum.h" + +#include "jit/CodeGenerator.h" +#include "jit/InlineScriptTree.h" +#include "jit/JitRuntime.h" +#include "jit/MIR.h" +#include "jit/MIRGraph.h" +#include "jit/ReciprocalMulConstants.h" +#include "vm/JSContext.h" +#include "vm/Realm.h" +#include "vm/Shape.h" + +#include "jit/shared/CodeGenerator-shared-inl.h" +#include "vm/JSScript-inl.h" + +using namespace js; +using namespace js::jit; + +using JS::GenericNaN; +using mozilla::FloorLog2; +using mozilla::Maybe; +using mozilla::NegativeInfinity; +using mozilla::Nothing; +using mozilla::Some; + +// shared +CodeGeneratorARM64::CodeGeneratorARM64(MIRGenerator* gen, LIRGraph* graph, + MacroAssembler* masm) + : CodeGeneratorShared(gen, graph, masm) {} + +bool CodeGeneratorARM64::generateOutOfLineCode() { + AutoCreatedBy acb(masm, "CodeGeneratorARM64::generateOutOfLineCode"); + + if (!CodeGeneratorShared::generateOutOfLineCode()) { + return false; + } + + if (deoptLabel_.used()) { + // All non-table-based bailouts will go here. + masm.bind(&deoptLabel_); + + // Store the frame size, so the handler can recover the IonScript. + masm.push(Imm32(frameSize())); + + TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler(); + masm.jump(handler); + } + + return !masm.oom(); +} + +void CodeGeneratorARM64::emitBranch(Assembler::Condition cond, + MBasicBlock* mirTrue, + MBasicBlock* mirFalse) { + if (isNextBlock(mirFalse->lir())) { + jumpToBlock(mirTrue, cond); + } else { + jumpToBlock(mirFalse, Assembler::InvertCondition(cond)); + jumpToBlock(mirTrue); + } +} + +void OutOfLineBailout::accept(CodeGeneratorARM64* codegen) { + codegen->visitOutOfLineBailout(this); +} + +void CodeGenerator::visitTestIAndBranch(LTestIAndBranch* test) { + Register input = ToRegister(test->input()); + MBasicBlock* mirTrue = test->ifTrue(); + MBasicBlock* mirFalse = test->ifFalse(); + + // Jump to the True block if NonZero. + // Jump to the False block if Zero. + if (isNextBlock(mirFalse->lir())) { + masm.branch32(Assembler::NonZero, input, Imm32(0), + getJumpLabelForBranch(mirTrue)); + } else { + masm.branch32(Assembler::Zero, input, Imm32(0), + getJumpLabelForBranch(mirFalse)); + if (!isNextBlock(mirTrue->lir())) { + jumpToBlock(mirTrue); + } + } +} + +void CodeGenerator::visitCompare(LCompare* comp) { + const MCompare* mir = comp->mir(); + const MCompare::CompareType type = mir->compareType(); + const Assembler::Condition cond = JSOpToCondition(type, comp->jsop()); + const Register leftreg = ToRegister(comp->getOperand(0)); + const LAllocation* right = comp->getOperand(1); + const Register defreg = ToRegister(comp->getDef(0)); + + if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol || + type == MCompare::Compare_UIntPtr || + type == MCompare::Compare_RefOrNull) { + if (right->isConstant()) { + MOZ_ASSERT(type == MCompare::Compare_UIntPtr); + masm.cmpPtrSet(cond, leftreg, Imm32(ToInt32(right)), defreg); + } else { + masm.cmpPtrSet(cond, leftreg, ToRegister(right), defreg); + } + return; + } + + if (right->isConstant()) { + masm.cmp32Set(cond, leftreg, Imm32(ToInt32(right)), defreg); + } else { + masm.cmp32Set(cond, leftreg, ToRegister(right), defreg); + } +} + +void CodeGenerator::visitCompareAndBranch(LCompareAndBranch* comp) { + const MCompare* mir = comp->cmpMir(); + const MCompare::CompareType type = mir->compareType(); + const LAllocation* left = comp->left(); + const LAllocation* right = comp->right(); + + if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol || + type == MCompare::Compare_UIntPtr || + type == MCompare::Compare_RefOrNull) { + if (right->isConstant()) { + MOZ_ASSERT(type == MCompare::Compare_UIntPtr); + masm.cmpPtr(ToRegister(left), Imm32(ToInt32(right))); + } else { + masm.cmpPtr(ToRegister(left), ToRegister(right)); + } + } else if (right->isConstant()) { + masm.cmp32(ToRegister(left), Imm32(ToInt32(right))); + } else { + masm.cmp32(ToRegister(left), ToRegister(right)); + } + + Assembler::Condition cond = JSOpToCondition(type, comp->jsop()); + emitBranch(cond, comp->ifTrue(), comp->ifFalse()); +} + +void CodeGeneratorARM64::bailoutIf(Assembler::Condition condition, + LSnapshot* snapshot) { + encode(snapshot); + + InlineScriptTree* tree = snapshot->mir()->block()->trackedTree(); + OutOfLineBailout* ool = new (alloc()) OutOfLineBailout(snapshot); + addOutOfLineCode(ool, + new (alloc()) BytecodeSite(tree, tree->script()->code())); + + masm.B(ool->entry(), condition); +} + +void CodeGeneratorARM64::bailoutFrom(Label* label, LSnapshot* snapshot) { + MOZ_ASSERT_IF(!masm.oom(), label->used()); + MOZ_ASSERT_IF(!masm.oom(), !label->bound()); + + encode(snapshot); + + InlineScriptTree* tree = snapshot->mir()->block()->trackedTree(); + OutOfLineBailout* ool = new (alloc()) OutOfLineBailout(snapshot); + addOutOfLineCode(ool, + new (alloc()) BytecodeSite(tree, tree->script()->code())); + + masm.retarget(label, ool->entry()); +} + +void CodeGeneratorARM64::bailout(LSnapshot* snapshot) { + Label label; + masm.b(&label); + bailoutFrom(&label, snapshot); +} + +void CodeGeneratorARM64::visitOutOfLineBailout(OutOfLineBailout* ool) { + masm.push(Imm32(ool->snapshot()->snapshotOffset())); + masm.B(&deoptLabel_); +} + +void CodeGenerator::visitMinMaxD(LMinMaxD* ins) { + ARMFPRegister lhs(ToFloatRegister(ins->first()), 64); + ARMFPRegister rhs(ToFloatRegister(ins->second()), 64); + ARMFPRegister output(ToFloatRegister(ins->output()), 64); + if (ins->mir()->isMax()) { + masm.Fmax(output, lhs, rhs); + } else { + masm.Fmin(output, lhs, rhs); + } +} + +void CodeGenerator::visitMinMaxF(LMinMaxF* ins) { + ARMFPRegister lhs(ToFloatRegister(ins->first()), 32); + ARMFPRegister rhs(ToFloatRegister(ins->second()), 32); + ARMFPRegister output(ToFloatRegister(ins->output()), 32); + if (ins->mir()->isMax()) { + masm.Fmax(output, lhs, rhs); + } else { + masm.Fmin(output, lhs, rhs); + } +} + +template <typename T> +static ARMRegister toWRegister(const T* a) { + return ARMRegister(ToRegister(a), 32); +} + +template <typename T> +static ARMRegister toXRegister(const T* a) { + return ARMRegister(ToRegister(a), 64); +} + +Operand toWOperand(const LAllocation* a) { + if (a->isConstant()) { + return Operand(ToInt32(a)); + } + return Operand(toWRegister(a)); +} + +vixl::CPURegister ToCPURegister(const LAllocation* a, Scalar::Type type) { + if (a->isFloatReg() && type == Scalar::Float64) { + return ARMFPRegister(ToFloatRegister(a), 64); + } + if (a->isFloatReg() && type == Scalar::Float32) { + return ARMFPRegister(ToFloatRegister(a), 32); + } + if (a->isGeneralReg()) { + return ARMRegister(ToRegister(a), 32); + } + MOZ_CRASH("Unknown LAllocation"); +} + +vixl::CPURegister ToCPURegister(const LDefinition* d, Scalar::Type type) { + return ToCPURegister(d->output(), type); +} + +// Let |cond| be an ARM64 condition code that we could reasonably use in a +// conditional branch or select following a comparison instruction. This +// function returns the condition to use in the case where we swap the two +// operands of the comparison instruction. +Assembler::Condition GetCondForSwappedOperands(Assembler::Condition cond) { + // EQ and NE map to themselves + // Of the remaining 14 cases, 4 other pairings can meaningfully swap: + // HS -- LS + // LO -- HI + // GE -- LE + // GT -- LT + switch (cond) { + case vixl::eq: + case vixl::ne: + return cond; + case vixl::hs: + return vixl::ls; + case vixl::ls: + return vixl::hs; + case vixl::lo: + return vixl::hi; + case vixl::hi: + return vixl::lo; + case vixl::ge: + return vixl::le; + case vixl::le: + return vixl::ge; + case vixl::gt: + return vixl::lt; + case vixl::lt: + return vixl::gt; + default: + MOZ_CRASH("no meaningful swapped-operand condition"); + } +} + +void CodeGenerator::visitAddI(LAddI* ins) { + const LAllocation* lhs = ins->getOperand(0); + const LAllocation* rhs = ins->getOperand(1); + const LDefinition* dest = ins->getDef(0); + + // Platforms with three-operand arithmetic ops don't need recovery. + MOZ_ASSERT(!ins->recoversInput()); + + if (ins->snapshot()) { + masm.Adds(toWRegister(dest), toWRegister(lhs), toWOperand(rhs)); + bailoutIf(Assembler::Overflow, ins->snapshot()); + } else { + masm.Add(toWRegister(dest), toWRegister(lhs), toWOperand(rhs)); + } +} + +void CodeGenerator::visitSubI(LSubI* ins) { + const LAllocation* lhs = ins->getOperand(0); + const LAllocation* rhs = ins->getOperand(1); + const LDefinition* dest = ins->getDef(0); + + // Platforms with three-operand arithmetic ops don't need recovery. + MOZ_ASSERT(!ins->recoversInput()); + + if (ins->snapshot()) { + masm.Subs(toWRegister(dest), toWRegister(lhs), toWOperand(rhs)); + bailoutIf(Assembler::Overflow, ins->snapshot()); + } else { + masm.Sub(toWRegister(dest), toWRegister(lhs), toWOperand(rhs)); + } +} + +void CodeGenerator::visitMulI(LMulI* ins) { + const LAllocation* lhs = ins->getOperand(0); + const LAllocation* rhs = ins->getOperand(1); + const LDefinition* dest = ins->getDef(0); + MMul* mul = ins->mir(); + MOZ_ASSERT_IF(mul->mode() == MMul::Integer, + !mul->canBeNegativeZero() && !mul->canOverflow()); + + Register lhsreg = ToRegister(lhs); + const ARMRegister lhsreg32 = ARMRegister(lhsreg, 32); + Register destreg = ToRegister(dest); + const ARMRegister destreg32 = ARMRegister(destreg, 32); + + if (rhs->isConstant()) { + // Bailout on -0.0. + int32_t constant = ToInt32(rhs); + if (mul->canBeNegativeZero() && constant <= 0) { + Assembler::Condition bailoutCond = + (constant == 0) ? Assembler::LessThan : Assembler::Equal; + masm.Cmp(toWRegister(lhs), Operand(0)); + bailoutIf(bailoutCond, ins->snapshot()); + } + + switch (constant) { + case -1: + masm.Negs(destreg32, Operand(lhsreg32)); + break; // Go to overflow check. + case 0: + masm.Mov(destreg32, wzr); + return; // Avoid overflow check. + case 1: + if (destreg != lhsreg) { + masm.Mov(destreg32, lhsreg32); + } + return; // Avoid overflow check. + case 2: + if (!mul->canOverflow()) { + masm.Add(destreg32, lhsreg32, Operand(lhsreg32)); + return; // Avoid overflow check. + } + masm.Adds(destreg32, lhsreg32, Operand(lhsreg32)); + break; // Go to overflow check. + default: + // Use shift if cannot overflow and constant is a power of 2 + if (!mul->canOverflow() && constant > 0) { + int32_t shift = FloorLog2(constant); + if ((1 << shift) == constant) { + masm.Lsl(destreg32, lhsreg32, shift); + return; + } + } + + // Otherwise, just multiply. We have to check for overflow. + // Negative zero was handled above. + Label bailout; + Label* onOverflow = mul->canOverflow() ? &bailout : nullptr; + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const Register scratch = temps.AcquireW().asUnsized(); + + masm.move32(Imm32(constant), scratch); + masm.mul32(lhsreg, scratch, destreg, onOverflow); + + if (onOverflow) { + MOZ_ASSERT(lhsreg != destreg); + bailoutFrom(&bailout, ins->snapshot()); + } + return; + } + + // Overflow check. + if (mul->canOverflow()) { + bailoutIf(Assembler::Overflow, ins->snapshot()); + } + } else { + Register rhsreg = ToRegister(rhs); + const ARMRegister rhsreg32 = ARMRegister(rhsreg, 32); + + Label bailout; + Label* onOverflow = mul->canOverflow() ? &bailout : nullptr; + + if (mul->canBeNegativeZero()) { + // The product of two integer operands is negative zero iff one + // operand is zero, and the other is negative. Therefore, the + // sum of the two operands will also be negative (specifically, + // it will be the non-zero operand). If the result of the + // multiplication is 0, we can check the sign of the sum to + // determine whether we should bail out. + + // This code can bailout, so lowering guarantees that the input + // operands are not overwritten. + MOZ_ASSERT(destreg != lhsreg); + MOZ_ASSERT(destreg != rhsreg); + + // Do the multiplication. + masm.mul32(lhsreg, rhsreg, destreg, onOverflow); + + // Set Zero flag if destreg is 0. + masm.test32(destreg, destreg); + + // ccmn is 'conditional compare negative'. + // If the Zero flag is set: + // perform a compare negative (compute lhs+rhs and set flags) + // else: + // clear flags + masm.Ccmn(lhsreg32, rhsreg32, vixl::NoFlag, Assembler::Zero); + + // Bails out if (lhs * rhs == 0) && (lhs + rhs < 0): + bailoutIf(Assembler::LessThan, ins->snapshot()); + + } else { + masm.mul32(lhsreg, rhsreg, destreg, onOverflow); + } + if (onOverflow) { + bailoutFrom(&bailout, ins->snapshot()); + } + } +} + +void CodeGenerator::visitDivI(LDivI* ins) { + const Register lhs = ToRegister(ins->lhs()); + const Register rhs = ToRegister(ins->rhs()); + const Register output = ToRegister(ins->output()); + + const ARMRegister lhs32 = toWRegister(ins->lhs()); + const ARMRegister rhs32 = toWRegister(ins->rhs()); + const ARMRegister temp32 = toWRegister(ins->getTemp(0)); + const ARMRegister output32 = toWRegister(ins->output()); + + MDiv* mir = ins->mir(); + + Label done; + + // Handle division by zero. + if (mir->canBeDivideByZero()) { + masm.test32(rhs, rhs); + if (mir->trapOnError()) { + Label nonZero; + masm.j(Assembler::NonZero, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset()); + masm.bind(&nonZero); + } else if (mir->canTruncateInfinities()) { + // Truncated division by zero is zero: (Infinity|0 = 0). + Label nonZero; + masm.j(Assembler::NonZero, &nonZero); + masm.Mov(output32, wzr); + masm.jump(&done); + masm.bind(&nonZero); + } else { + MOZ_ASSERT(mir->fallible()); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + // Handle an integer overflow from (INT32_MIN / -1). + // The integer division gives INT32_MIN, but should be -(double)INT32_MIN. + if (mir->canBeNegativeOverflow()) { + Label notOverflow; + + // Branch to handle the non-overflow cases. + masm.branch32(Assembler::NotEqual, lhs, Imm32(INT32_MIN), ¬Overflow); + masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), ¬Overflow); + + // Handle overflow. + if (mir->trapOnError()) { + masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset()); + } else if (mir->canTruncateOverflow()) { + // (-INT32_MIN)|0 == INT32_MIN, which is already in lhs. + masm.move32(lhs, output); + masm.jump(&done); + } else { + MOZ_ASSERT(mir->fallible()); + bailout(ins->snapshot()); + } + masm.bind(¬Overflow); + } + + // Handle negative zero: lhs == 0 && rhs < 0. + if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) { + Label nonZero; + masm.branch32(Assembler::NotEqual, lhs, Imm32(0), &nonZero); + masm.cmp32(rhs, Imm32(0)); + bailoutIf(Assembler::LessThan, ins->snapshot()); + masm.bind(&nonZero); + } + + // Perform integer division. + if (mir->canTruncateRemainder()) { + masm.Sdiv(output32, lhs32, rhs32); + } else { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + ARMRegister scratch32 = temps.AcquireW(); + + // ARM does not automatically calculate the remainder. + // The ISR suggests multiplication to determine whether a remainder exists. + masm.Sdiv(scratch32, lhs32, rhs32); + masm.Mul(temp32, scratch32, rhs32); + masm.Cmp(lhs32, temp32); + bailoutIf(Assembler::NotEqual, ins->snapshot()); + masm.Mov(output32, scratch32); + } + + masm.bind(&done); +} + +void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) { + const Register numerator = ToRegister(ins->numerator()); + const ARMRegister numerator32 = toWRegister(ins->numerator()); + const ARMRegister output32 = toWRegister(ins->output()); + + int32_t shift = ins->shift(); + bool negativeDivisor = ins->negativeDivisor(); + MDiv* mir = ins->mir(); + + if (!mir->isTruncated() && negativeDivisor) { + // 0 divided by a negative number returns a -0 double. + bailoutTest32(Assembler::Zero, numerator, numerator, ins->snapshot()); + } + + if (shift) { + if (!mir->isTruncated()) { + // If the remainder is != 0, bailout since this must be a double. + bailoutTest32(Assembler::NonZero, numerator, + Imm32(UINT32_MAX >> (32 - shift)), ins->snapshot()); + } + + if (mir->isUnsigned()) { + // shift right + masm.Lsr(output32, numerator32, shift); + } else { + ARMRegister temp32 = numerator32; + // Adjust the value so that shifting produces a correctly + // rounded result when the numerator is negative. See 10-1 + // "Signed Division by a Known Power of 2" in Henry + // S. Warren, Jr.'s Hacker's Delight. + if (mir->canBeNegativeDividend() && mir->isTruncated()) { + if (shift > 1) { + // Copy the sign bit of the numerator. (= (2^32 - 1) or 0) + masm.Asr(output32, numerator32, 31); + temp32 = output32; + } + // Divide by 2^(32 - shift) + // i.e. (= (2^32 - 1) / 2^(32 - shift) or 0) + // i.e. (= (2^shift - 1) or 0) + masm.Lsr(output32, temp32, 32 - shift); + // If signed, make any 1 bit below the shifted bits to bubble up, such + // that once shifted the value would be rounded towards 0. + masm.Add(output32, output32, numerator32); + temp32 = output32; + } + masm.Asr(output32, temp32, shift); + + if (negativeDivisor) { + masm.Neg(output32, output32); + } + } + return; + } + + if (negativeDivisor) { + // INT32_MIN / -1 overflows. + if (!mir->isTruncated()) { + masm.Negs(output32, numerator32); + bailoutIf(Assembler::Overflow, ins->snapshot()); + } else if (mir->trapOnError()) { + Label ok; + masm.Negs(output32, numerator32); + masm.branch(Assembler::NoOverflow, &ok); + masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset()); + masm.bind(&ok); + } else { + // Do not set condition flags. + masm.Neg(output32, numerator32); + } + } else { + if (mir->isUnsigned() && !mir->isTruncated()) { + // Copy and set flags. + masm.Adds(output32, numerator32, 0); + // Unsigned division by 1 can overflow if output is not truncated, as we + // do not have an Unsigned type for MIR instructions. + bailoutIf(Assembler::Signed, ins->snapshot()); + } else { + // Copy the result. + masm.Mov(output32, numerator32); + } + } +} + +void CodeGenerator::visitDivConstantI(LDivConstantI* ins) { + const ARMRegister lhs32 = toWRegister(ins->numerator()); + const ARMRegister lhs64 = toXRegister(ins->numerator()); + const ARMRegister const32 = toWRegister(ins->temp()); + const ARMRegister output32 = toWRegister(ins->output()); + const ARMRegister output64 = toXRegister(ins->output()); + int32_t d = ins->denominator(); + + // The absolute value of the denominator isn't a power of 2. + using mozilla::Abs; + MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0); + + // We will first divide by Abs(d), and negate the answer if d is negative. + // If desired, this can be avoided by generalizing computeDivisionConstants. + auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(Abs(d)); + + // We first compute (M * n) >> 32, where M = rmc.multiplier. + masm.Mov(const32, int32_t(rmc.multiplier)); + if (rmc.multiplier > INT32_MAX) { + MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32)); + + // We actually compute (int32_t(M) * n) instead, without the upper bit. + // Thus, (M * n) = (int32_t(M) * n) + n << 32. + // + // ((int32_t(M) * n) + n << 32) can't overflow, as both operands have + // opposite signs because int32_t(M) is negative. + masm.Lsl(output64, lhs64, 32); + + // Store (M * n) in output64. + masm.Smaddl(output64, const32, lhs32, output64); + } else { + // Store (M * n) in output64. + masm.Smull(output64, const32, lhs32); + } + + // (M * n) >> (32 + shift) is the truncated division answer if n is + // non-negative, as proved in the comments of computeDivisionConstants. We + // must add 1 later if n is negative to get the right answer in all cases. + masm.Asr(output64, output64, 32 + rmc.shiftAmount); + + // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be + // computed with just a sign-extending shift of 31 bits. + if (ins->canBeNegativeDividend()) { + masm.Asr(const32, lhs32, 31); + masm.Sub(output32, output32, const32); + } + + // After this, output32 contains the correct truncated division result. + if (d < 0) { + masm.Neg(output32, output32); + } + + if (!ins->mir()->isTruncated()) { + // This is a division op. Multiply the obtained value by d to check if + // the correct answer is an integer. This cannot overflow, since |d| > 1. + masm.Mov(const32, d); + masm.Msub(const32, output32, const32, lhs32); + // bailout if (lhs - output * d != 0) + masm.Cmp(const32, wzr); + auto bailoutCond = Assembler::NonZero; + + // If lhs is zero and the divisor is negative, the answer should have + // been -0. + if (d < 0) { + // or bailout if (lhs == 0). + // ^ ^ + // | '-- masm.Ccmp(lhs32, lhs32, .., ..) + // '-- masm.Ccmp(.., .., vixl::ZFlag, ! bailoutCond) + masm.Ccmp(lhs32, wzr, vixl::ZFlag, Assembler::Zero); + bailoutCond = Assembler::Zero; + } + + // bailout if (lhs - output * d != 0) or (d < 0 && lhs == 0) + bailoutIf(bailoutCond, ins->snapshot()); + } +} + +void CodeGenerator::visitUDivConstantI(LUDivConstantI* ins) { + const ARMRegister lhs32 = toWRegister(ins->numerator()); + const ARMRegister lhs64 = toXRegister(ins->numerator()); + const ARMRegister const32 = toWRegister(ins->temp()); + const ARMRegister output32 = toWRegister(ins->output()); + const ARMRegister output64 = toXRegister(ins->output()); + uint32_t d = ins->denominator(); + + if (d == 0) { + if (ins->mir()->isTruncated()) { + if (ins->mir()->trapOnError()) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, + ins->mir()->bytecodeOffset()); + } else { + masm.Mov(output32, wzr); + } + } else { + bailout(ins->snapshot()); + } + return; + } + + // The denominator isn't a power of 2 (see LDivPowTwoI). + MOZ_ASSERT((d & (d - 1)) != 0); + + auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d); + + // We first compute (M * n), where M = rmc.multiplier. + masm.Mov(const32, int32_t(rmc.multiplier)); + masm.Umull(output64, const32, lhs32); + if (rmc.multiplier > UINT32_MAX) { + // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that + // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, + // contradicting the proof of correctness in computeDivisionConstants. + MOZ_ASSERT(rmc.shiftAmount > 0); + MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33)); + + // We actually compute (uint32_t(M) * n) instead, without the upper bit. + // Thus, (M * n) = (uint32_t(M) * n) + n << 32. + // + // ((uint32_t(M) * n) + n << 32) can overflow. Hacker's Delight explains a + // trick to avoid this overflow case, but we can avoid it by computing the + // addition on 64 bits registers. + // + // Compute ((uint32_t(M) * n) >> 32 + n) + masm.Add(output64, lhs64, Operand(output64, vixl::LSR, 32)); + + // (M * n) >> (32 + shift) is the truncated division answer. + masm.Lsr(output64, output64, rmc.shiftAmount); + } else { + // (M * n) >> (32 + shift) is the truncated division answer. + masm.Lsr(output64, output64, 32 + rmc.shiftAmount); + } + + // We now have the truncated division value. We are checking whether the + // division resulted in an integer, we multiply the obtained value by d and + // check the remainder of the division. + if (!ins->mir()->isTruncated()) { + masm.Mov(const32, d); + masm.Msub(const32, output32, const32, lhs32); + // bailout if (lhs - output * d != 0) + masm.Cmp(const32, const32); + bailoutIf(Assembler::NonZero, ins->snapshot()); + } +} + +void CodeGenerator::visitModI(LModI* ins) { + ARMRegister lhs = toWRegister(ins->lhs()); + ARMRegister rhs = toWRegister(ins->rhs()); + ARMRegister output = toWRegister(ins->output()); + Label done; + + MMod* mir = ins->mir(); + + // Prevent divide by zero. + if (mir->canBeDivideByZero()) { + if (mir->isTruncated()) { + if (mir->trapOnError()) { + Label nonZero; + masm.Cbnz(rhs, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset()); + masm.bind(&nonZero); + } else { + // Truncated division by zero yields integer zero. + masm.Mov(output, rhs); + masm.Cbz(rhs, &done); + } + } else { + // Non-truncated division by zero produces a non-integer. + MOZ_ASSERT(!gen->compilingWasm()); + masm.Cmp(rhs, Operand(0)); + bailoutIf(Assembler::Equal, ins->snapshot()); + } + } + + // Signed division. + masm.Sdiv(output, lhs, rhs); + + // Compute the remainder: output = lhs - (output * rhs). + masm.Msub(output, output, rhs, lhs); + + if (mir->canBeNegativeDividend() && !mir->isTruncated()) { + // If output == 0 and lhs < 0, then the result should be double -0.0. + // Note that this guard handles lhs == INT_MIN and rhs == -1: + // output = INT_MIN - (INT_MIN / -1) * -1 + // = INT_MIN - INT_MIN + // = 0 + masm.Cbnz(output, &done); + bailoutCmp32(Assembler::LessThan, lhs, Imm32(0), ins->snapshot()); + } + + if (done.used()) { + masm.bind(&done); + } +} + +void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) { + Register lhs = ToRegister(ins->getOperand(0)); + ARMRegister lhsw = toWRegister(ins->getOperand(0)); + ARMRegister outw = toWRegister(ins->output()); + + int32_t shift = ins->shift(); + bool canBeNegative = + !ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend(); + + Label negative; + if (canBeNegative) { + // Switch based on sign of the lhs. + // Positive numbers are just a bitmask. + masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); + } + + masm.And(outw, lhsw, Operand((uint32_t(1) << shift) - 1)); + + if (canBeNegative) { + Label done; + masm.jump(&done); + + // Negative numbers need a negate, bitmask, negate. + masm.bind(&negative); + masm.Neg(outw, Operand(lhsw)); + masm.And(outw, outw, Operand((uint32_t(1) << shift) - 1)); + + // Since a%b has the same sign as b, and a is negative in this branch, + // an answer of 0 means the correct result is actually -0. Bail out. + if (!ins->mir()->isTruncated()) { + masm.Negs(outw, Operand(outw)); + bailoutIf(Assembler::Zero, ins->snapshot()); + } else { + masm.Neg(outw, Operand(outw)); + } + + masm.bind(&done); + } +} + +void CodeGenerator::visitModMaskI(LModMaskI* ins) { + MMod* mir = ins->mir(); + int32_t shift = ins->shift(); + + const Register src = ToRegister(ins->getOperand(0)); + const Register dest = ToRegister(ins->getDef(0)); + const Register hold = ToRegister(ins->getTemp(0)); + const Register remain = ToRegister(ins->getTemp(1)); + + const ARMRegister src32 = ARMRegister(src, 32); + const ARMRegister dest32 = ARMRegister(dest, 32); + const ARMRegister remain32 = ARMRegister(remain, 32); + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMRegister scratch32 = temps.AcquireW(); + const Register scratch = scratch32.asUnsized(); + + // We wish to compute x % (1<<y) - 1 for a known constant, y. + // + // 1. Let b = (1<<y) and C = (1<<y)-1, then think of the 32 bit dividend as + // a number in base b, namely c_0*1 + c_1*b + c_2*b^2 ... c_n*b^n + // + // 2. Since both addition and multiplication commute with modulus: + // x % C == (c_0 + c_1*b + ... + c_n*b^n) % C == + // (c_0 % C) + (c_1%C) * (b % C) + (c_2 % C) * (b^2 % C)... + // + // 3. Since b == C + 1, b % C == 1, and b^n % C == 1 the whole thing + // simplifies to: c_0 + c_1 + c_2 ... c_n % C + // + // Each c_n can easily be computed by a shift/bitextract, and the modulus + // can be maintained by simply subtracting by C whenever the number gets + // over C. + int32_t mask = (1 << shift) - 1; + Label loop; + + // Register 'hold' holds -1 if the value was negative, 1 otherwise. + // The remain reg holds the remaining bits that have not been processed. + // The scratch reg serves as a temporary location to store extracted bits. + // The dest reg is the accumulator, becoming final result. + // + // Move the whole value into the remain. + masm.Mov(remain32, src32); + // Zero out the dest. + masm.Mov(dest32, wzr); + // Set the hold appropriately. + { + Label negative; + masm.branch32(Assembler::Signed, remain, Imm32(0), &negative); + masm.move32(Imm32(1), hold); + masm.jump(&loop); + + masm.bind(&negative); + masm.move32(Imm32(-1), hold); + masm.neg32(remain); + } + + // Begin the main loop. + masm.bind(&loop); + { + // Extract the bottom bits into scratch. + masm.And(scratch32, remain32, Operand(mask)); + // Add those bits to the accumulator. + masm.Add(dest32, dest32, scratch32); + // Do a trial subtraction. This functions as a cmp but remembers the result. + masm.Subs(scratch32, dest32, Operand(mask)); + // If (sum - C) > 0, store sum - C back into sum, thus performing a modulus. + { + Label sumSigned; + masm.branch32(Assembler::Signed, scratch, scratch, &sumSigned); + masm.Mov(dest32, scratch32); + masm.bind(&sumSigned); + } + // Get rid of the bits that we extracted before. + masm.Lsr(remain32, remain32, shift); + // If the shift produced zero, finish, otherwise, continue in the loop. + masm.branchTest32(Assembler::NonZero, remain, remain, &loop); + } + + // Check the hold to see if we need to negate the result. + { + Label done; + + // If the hold was non-zero, negate the result to match JS expectations. + masm.branchTest32(Assembler::NotSigned, hold, hold, &done); + if (mir->canBeNegativeDividend() && !mir->isTruncated()) { + // Bail in case of negative zero hold. + bailoutTest32(Assembler::Zero, hold, hold, ins->snapshot()); + } + + masm.neg32(dest); + masm.bind(&done); + } +} + +void CodeGeneratorARM64::emitBigIntDiv(LBigIntDiv* ins, Register dividend, + Register divisor, Register output, + Label* fail) { + // Callers handle division by zero and integer overflow. + + const ARMRegister dividend64(dividend, 64); + const ARMRegister divisor64(divisor, 64); + + masm.Sdiv(/* result= */ dividend64, dividend64, divisor64); + + // Create and return the result. + masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail); + masm.initializeBigInt(output, dividend); +} + +void CodeGeneratorARM64::emitBigIntMod(LBigIntMod* ins, Register dividend, + Register divisor, Register output, + Label* fail) { + // Callers handle division by zero and integer overflow. + + const ARMRegister dividend64(dividend, 64); + const ARMRegister divisor64(divisor, 64); + const ARMRegister output64(output, 64); + + // Signed division. + masm.Sdiv(output64, dividend64, divisor64); + + // Compute the remainder: output = dividend - (output * divisor). + masm.Msub(/* result= */ dividend64, output64, divisor64, dividend64); + + // Create and return the result. + masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail); + masm.initializeBigInt(output, dividend); +} + +void CodeGenerator::visitBitNotI(LBitNotI* ins) { + const LAllocation* input = ins->getOperand(0); + const LDefinition* output = ins->getDef(0); + masm.Mvn(toWRegister(output), toWOperand(input)); +} + +void CodeGenerator::visitBitNotI64(LBitNotI64* ins) { + Register input = ToRegister(ins->input()); + Register output = ToRegister(ins->output()); + masm.Mvn(vixl::Register(output, 64), vixl::Register(input, 64)); +} + +void CodeGenerator::visitBitOpI(LBitOpI* ins) { + const ARMRegister lhs = toWRegister(ins->getOperand(0)); + const Operand rhs = toWOperand(ins->getOperand(1)); + const ARMRegister dest = toWRegister(ins->getDef(0)); + + switch (ins->bitop()) { + case JSOp::BitOr: + masm.Orr(dest, lhs, rhs); + break; + case JSOp::BitXor: + masm.Eor(dest, lhs, rhs); + break; + case JSOp::BitAnd: + masm.And(dest, lhs, rhs); + break; + default: + MOZ_CRASH("unexpected binary opcode"); + } +} + +void CodeGenerator::visitShiftI(LShiftI* ins) { + const ARMRegister lhs = toWRegister(ins->lhs()); + const LAllocation* rhs = ins->rhs(); + const ARMRegister dest = toWRegister(ins->output()); + + if (rhs->isConstant()) { + int32_t shift = ToInt32(rhs) & 0x1F; + switch (ins->bitop()) { + case JSOp::Lsh: + masm.Lsl(dest, lhs, shift); + break; + case JSOp::Rsh: + masm.Asr(dest, lhs, shift); + break; + case JSOp::Ursh: + if (shift) { + masm.Lsr(dest, lhs, shift); + } else if (ins->mir()->toUrsh()->fallible()) { + // x >>> 0 can overflow. + masm.Ands(dest, lhs, Operand(0xFFFFFFFF)); + bailoutIf(Assembler::Signed, ins->snapshot()); + } else { + masm.Mov(dest, lhs); + } + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } else { + const ARMRegister rhsreg = toWRegister(rhs); + switch (ins->bitop()) { + case JSOp::Lsh: + masm.Lsl(dest, lhs, rhsreg); + break; + case JSOp::Rsh: + masm.Asr(dest, lhs, rhsreg); + break; + case JSOp::Ursh: + masm.Lsr(dest, lhs, rhsreg); + if (ins->mir()->toUrsh()->fallible()) { + /// x >>> 0 can overflow. + masm.Cmp(dest, Operand(0)); + bailoutIf(Assembler::LessThan, ins->snapshot()); + } + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } +} + +void CodeGenerator::visitUrshD(LUrshD* ins) { + const ARMRegister lhs = toWRegister(ins->lhs()); + const LAllocation* rhs = ins->rhs(); + const FloatRegister out = ToFloatRegister(ins->output()); + + const Register temp = ToRegister(ins->temp()); + const ARMRegister temp32 = toWRegister(ins->temp()); + + if (rhs->isConstant()) { + int32_t shift = ToInt32(rhs) & 0x1F; + if (shift) { + masm.Lsr(temp32, lhs, shift); + masm.convertUInt32ToDouble(temp, out); + } else { + masm.convertUInt32ToDouble(ToRegister(ins->lhs()), out); + } + } else { + masm.And(temp32, toWRegister(rhs), Operand(0x1F)); + masm.Lsr(temp32, lhs, temp32); + masm.convertUInt32ToDouble(temp, out); + } +} + +void CodeGenerator::visitPowHalfD(LPowHalfD* ins) { + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + + ScratchDoubleScope scratch(masm); + + Label done, sqrt; + + if (!ins->mir()->operandIsNeverNegativeInfinity()) { + // Branch if not -Infinity. + masm.loadConstantDouble(NegativeInfinity<double>(), scratch); + + Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered; + if (ins->mir()->operandIsNeverNaN()) { + cond = Assembler::DoubleNotEqual; + } + masm.branchDouble(cond, input, scratch, &sqrt); + + // Math.pow(-Infinity, 0.5) == Infinity. + masm.zeroDouble(output); + masm.subDouble(scratch, output); + masm.jump(&done); + + masm.bind(&sqrt); + } + + if (!ins->mir()->operandIsNeverNegativeZero()) { + // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). + // Adding 0 converts any -0 to 0. + masm.zeroDouble(scratch); + masm.addDouble(input, scratch); + masm.sqrtDouble(scratch, output); + } else { + masm.sqrtDouble(input, output); + } + + masm.bind(&done); +} + +MoveOperand CodeGeneratorARM64::toMoveOperand(const LAllocation a) const { + if (a.isGeneralReg()) { + return MoveOperand(ToRegister(a)); + } + if (a.isFloatReg()) { + return MoveOperand(ToFloatRegister(a)); + } + MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress + : MoveOperand::Kind::Memory; + return MoveOperand(ToAddress(a), kind); +} + +class js::jit::OutOfLineTableSwitch + : public OutOfLineCodeBase<CodeGeneratorARM64> { + MTableSwitch* mir_; + CodeLabel jumpLabel_; + + void accept(CodeGeneratorARM64* codegen) override { + codegen->visitOutOfLineTableSwitch(this); + } + + public: + explicit OutOfLineTableSwitch(MTableSwitch* mir) : mir_(mir) {} + + MTableSwitch* mir() const { return mir_; } + + CodeLabel* jumpLabel() { return &jumpLabel_; } +}; + +void CodeGeneratorARM64::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool) { + MTableSwitch* mir = ool->mir(); + + // Prevent nop and pools sequences to appear in the jump table. + AutoForbidPoolsAndNops afp( + &masm, (mir->numCases() + 1) * (sizeof(void*) / vixl::kInstructionSize)); + masm.haltingAlign(sizeof(void*)); + masm.bind(ool->jumpLabel()); + masm.addCodeLabel(*ool->jumpLabel()); + + for (size_t i = 0; i < mir->numCases(); i++) { + LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir(); + Label* caseheader = caseblock->label(); + uint32_t caseoffset = caseheader->offset(); + + // The entries of the jump table need to be absolute addresses, + // and thus must be patched after codegen is finished. + CodeLabel cl; + masm.writeCodePointer(&cl); + cl.target()->bind(caseoffset); + masm.addCodeLabel(cl); + } +} + +void CodeGeneratorARM64::emitTableSwitchDispatch(MTableSwitch* mir, + Register index, + Register base) { + Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label(); + + // Let the lowest table entry be indexed at 0. + if (mir->low() != 0) { + masm.sub32(Imm32(mir->low()), index); + } + + // Jump to the default case if input is out of range. + int32_t cases = mir->numCases(); + masm.branch32(Assembler::AboveOrEqual, index, Imm32(cases), defaultcase); + + // Because the target code has not yet been generated, we cannot know the + // instruction offsets for use as jump targets. Therefore we construct + // an OutOfLineTableSwitch that winds up holding the jump table. + // + // Because the jump table is generated as part of out-of-line code, + // it is generated after all the regular codegen, so the jump targets + // are guaranteed to exist when generating the jump table. + OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(mir); + addOutOfLineCode(ool, mir); + + // Use the index to get the address of the jump target from the table. + masm.mov(ool->jumpLabel(), base); + BaseIndex pointer(base, index, ScalePointer); + + // Load the target from the jump table and branch to it. + masm.branchToComputedAddress(pointer); +} + +void CodeGenerator::visitMathD(LMathD* math) { + ARMFPRegister lhs(ToFloatRegister(math->lhs()), 64); + ARMFPRegister rhs(ToFloatRegister(math->rhs()), 64); + ARMFPRegister output(ToFloatRegister(math->output()), 64); + + switch (math->jsop()) { + case JSOp::Add: + masm.Fadd(output, lhs, rhs); + break; + case JSOp::Sub: + masm.Fsub(output, lhs, rhs); + break; + case JSOp::Mul: + masm.Fmul(output, lhs, rhs); + break; + case JSOp::Div: + masm.Fdiv(output, lhs, rhs); + break; + default: + MOZ_CRASH("unexpected opcode"); + } +} + +void CodeGenerator::visitMathF(LMathF* math) { + ARMFPRegister lhs(ToFloatRegister(math->lhs()), 32); + ARMFPRegister rhs(ToFloatRegister(math->rhs()), 32); + ARMFPRegister output(ToFloatRegister(math->output()), 32); + + switch (math->jsop()) { + case JSOp::Add: + masm.Fadd(output, lhs, rhs); + break; + case JSOp::Sub: + masm.Fsub(output, lhs, rhs); + break; + case JSOp::Mul: + masm.Fmul(output, lhs, rhs); + break; + case JSOp::Div: + masm.Fdiv(output, lhs, rhs); + break; + default: + MOZ_CRASH("unexpected opcode"); + } +} + +void CodeGenerator::visitClzI(LClzI* lir) { + ARMRegister input = toWRegister(lir->input()); + ARMRegister output = toWRegister(lir->output()); + masm.Clz(output, input); +} + +void CodeGenerator::visitCtzI(LCtzI* lir) { + Register input = ToRegister(lir->input()); + Register output = ToRegister(lir->output()); + masm.ctz32(input, output, /* knownNotZero = */ false); +} + +void CodeGenerator::visitTruncateDToInt32(LTruncateDToInt32* ins) { + emitTruncateDouble(ToFloatRegister(ins->input()), ToRegister(ins->output()), + ins->mir()); +} + +void CodeGenerator::visitNearbyInt(LNearbyInt* lir) { + FloatRegister input = ToFloatRegister(lir->input()); + FloatRegister output = ToFloatRegister(lir->output()); + + RoundingMode roundingMode = lir->mir()->roundingMode(); + masm.nearbyIntDouble(roundingMode, input, output); +} + +void CodeGenerator::visitNearbyIntF(LNearbyIntF* lir) { + FloatRegister input = ToFloatRegister(lir->input()); + FloatRegister output = ToFloatRegister(lir->output()); + + RoundingMode roundingMode = lir->mir()->roundingMode(); + masm.nearbyIntFloat32(roundingMode, input, output); +} + +void CodeGenerator::visitWasmBuiltinTruncateDToInt32( + LWasmBuiltinTruncateDToInt32* lir) { + emitTruncateDouble(ToFloatRegister(lir->getOperand(0)), + ToRegister(lir->getDef(0)), lir->mir()); +} + +void CodeGenerator::visitTruncateFToInt32(LTruncateFToInt32* ins) { + emitTruncateFloat32(ToFloatRegister(ins->input()), ToRegister(ins->output()), + ins->mir()); +} + +void CodeGenerator::visitWasmBuiltinTruncateFToInt32( + LWasmBuiltinTruncateFToInt32* lir) { + emitTruncateFloat32(ToFloatRegister(lir->getOperand(0)), + ToRegister(lir->getDef(0)), lir->mir()); +} + +ValueOperand CodeGeneratorARM64::ToValue(LInstruction* ins, size_t pos) { + return ValueOperand(ToRegister(ins->getOperand(pos))); +} + +ValueOperand CodeGeneratorARM64::ToTempValue(LInstruction* ins, size_t pos) { + MOZ_CRASH("CodeGeneratorARM64::ToTempValue"); +} + +void CodeGenerator::visitValue(LValue* value) { + ValueOperand result = ToOutValue(value); + masm.moveValue(value->value(), result); +} + +void CodeGenerator::visitBox(LBox* box) { + const LAllocation* in = box->getOperand(0); + ValueOperand result = ToOutValue(box); + + masm.moveValue(TypedOrValueRegister(box->type(), ToAnyRegister(in)), result); +} + +void CodeGenerator::visitUnbox(LUnbox* unbox) { + MUnbox* mir = unbox->mir(); + + Register result = ToRegister(unbox->output()); + + if (mir->fallible()) { + const ValueOperand value = ToValue(unbox, LUnbox::Input); + Label bail; + switch (mir->type()) { + case MIRType::Int32: + masm.fallibleUnboxInt32(value, result, &bail); + break; + case MIRType::Boolean: + masm.fallibleUnboxBoolean(value, result, &bail); + break; + case MIRType::Object: + masm.fallibleUnboxObject(value, result, &bail); + break; + case MIRType::String: + masm.fallibleUnboxString(value, result, &bail); + break; + case MIRType::Symbol: + masm.fallibleUnboxSymbol(value, result, &bail); + break; + case MIRType::BigInt: + masm.fallibleUnboxBigInt(value, result, &bail); + break; + default: + MOZ_CRASH("Given MIRType cannot be unboxed."); + } + bailoutFrom(&bail, unbox->snapshot()); + return; + } + + // Infallible unbox. + + ValueOperand input = ToValue(unbox, LUnbox::Input); + +#ifdef DEBUG + // Assert the types match. + JSValueTag tag = MIRTypeToTag(mir->type()); + Label ok; + { + ScratchTagScope scratch(masm, input); + masm.splitTagForTest(input, scratch); + masm.cmpTag(scratch, ImmTag(tag)); + } + masm.B(&ok, Assembler::Condition::Equal); + masm.assumeUnreachable("Infallible unbox type mismatch"); + masm.bind(&ok); +#endif + + switch (mir->type()) { + case MIRType::Int32: + masm.unboxInt32(input, result); + break; + case MIRType::Boolean: + masm.unboxBoolean(input, result); + break; + case MIRType::Object: + masm.unboxObject(input, result); + break; + case MIRType::String: + masm.unboxString(input, result); + break; + case MIRType::Symbol: + masm.unboxSymbol(input, result); + break; + case MIRType::BigInt: + masm.unboxBigInt(input, result); + break; + default: + MOZ_CRASH("Given MIRType cannot be unboxed."); + } +} + +void CodeGenerator::visitDouble(LDouble* ins) { + const LDefinition* out = ins->getDef(0); + masm.loadConstantDouble(ins->value(), ToFloatRegister(out)); +} + +void CodeGenerator::visitFloat32(LFloat32* ins) { + const LDefinition* out = ins->getDef(0); + masm.loadConstantFloat32(ins->value(), ToFloatRegister(out)); +} + +void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) { + const LAllocation* opd = test->input(); + MBasicBlock* ifTrue = test->ifTrue(); + MBasicBlock* ifFalse = test->ifFalse(); + + masm.Fcmp(ARMFPRegister(ToFloatRegister(opd), 64), 0.0); + + // If the compare set the 0 bit, then the result is definitely false. + jumpToBlock(ifFalse, Assembler::Zero); + + // Overflow means one of the operands was NaN, which is also false. + jumpToBlock(ifFalse, Assembler::Overflow); + jumpToBlock(ifTrue); +} + +void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) { + const LAllocation* opd = test->input(); + MBasicBlock* ifTrue = test->ifTrue(); + MBasicBlock* ifFalse = test->ifFalse(); + + masm.Fcmp(ARMFPRegister(ToFloatRegister(opd), 32), 0.0); + + // If the compare set the 0 bit, then the result is definitely false. + jumpToBlock(ifFalse, Assembler::Zero); + + // Overflow means one of the operands was NaN, which is also false. + jumpToBlock(ifFalse, Assembler::Overflow); + jumpToBlock(ifTrue); +} + +void CodeGenerator::visitCompareD(LCompareD* comp) { + const FloatRegister left = ToFloatRegister(comp->left()); + const FloatRegister right = ToFloatRegister(comp->right()); + ARMRegister output = toWRegister(comp->output()); + Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); + + masm.compareDouble(cond, left, right); + masm.cset(output, Assembler::ConditionFromDoubleCondition(cond)); +} + +void CodeGenerator::visitCompareF(LCompareF* comp) { + const FloatRegister left = ToFloatRegister(comp->left()); + const FloatRegister right = ToFloatRegister(comp->right()); + ARMRegister output = toWRegister(comp->output()); + Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); + + masm.compareFloat(cond, left, right); + masm.cset(output, Assembler::ConditionFromDoubleCondition(cond)); +} + +void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) { + const FloatRegister left = ToFloatRegister(comp->left()); + const FloatRegister right = ToFloatRegister(comp->right()); + Assembler::DoubleCondition doubleCond = + JSOpToDoubleCondition(comp->cmpMir()->jsop()); + Assembler::Condition cond = + Assembler::ConditionFromDoubleCondition(doubleCond); + + masm.compareDouble(doubleCond, left, right); + emitBranch(cond, comp->ifTrue(), comp->ifFalse()); +} + +void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) { + const FloatRegister left = ToFloatRegister(comp->left()); + const FloatRegister right = ToFloatRegister(comp->right()); + Assembler::DoubleCondition doubleCond = + JSOpToDoubleCondition(comp->cmpMir()->jsop()); + Assembler::Condition cond = + Assembler::ConditionFromDoubleCondition(doubleCond); + + masm.compareFloat(doubleCond, left, right); + emitBranch(cond, comp->ifTrue(), comp->ifFalse()); +} + +void CodeGenerator::visitBitAndAndBranch(LBitAndAndBranch* baab) { + if (baab->is64()) { + ARMRegister regL = toXRegister(baab->left()); + if (baab->right()->isConstant()) { + masm.Tst(regL, Operand(ToInt64(baab->right()))); + } else { + masm.Tst(regL, toXRegister(baab->right())); + } + } else { + ARMRegister regL = toWRegister(baab->left()); + if (baab->right()->isConstant()) { + masm.Tst(regL, Operand(ToInt32(baab->right()))); + } else { + masm.Tst(regL, toWRegister(baab->right())); + } + } + emitBranch(baab->cond(), baab->ifTrue(), baab->ifFalse()); +} + +void CodeGenerator::visitWasmUint32ToDouble(LWasmUint32ToDouble* lir) { + masm.convertUInt32ToDouble(ToRegister(lir->input()), + ToFloatRegister(lir->output())); +} + +void CodeGenerator::visitWasmUint32ToFloat32(LWasmUint32ToFloat32* lir) { + masm.convertUInt32ToFloat32(ToRegister(lir->input()), + ToFloatRegister(lir->output())); +} + +void CodeGenerator::visitNotI(LNotI* ins) { + ARMRegister input = toWRegister(ins->input()); + ARMRegister output = toWRegister(ins->output()); + + masm.Cmp(input, ZeroRegister32); + masm.Cset(output, Assembler::Zero); +} + +// NZCV +// NAN -> 0011 +// == -> 0110 +// < -> 1000 +// > -> 0010 +void CodeGenerator::visitNotD(LNotD* ins) { + ARMFPRegister input(ToFloatRegister(ins->input()), 64); + ARMRegister output = toWRegister(ins->output()); + + // Set output to 1 if input compares equal to 0.0, else 0. + masm.Fcmp(input, 0.0); + masm.Cset(output, Assembler::Equal); + + // Comparison with NaN sets V in the NZCV register. + // If the input was NaN, output must now be zero, so it can be incremented. + // The instruction is read: "output = if NoOverflow then output else 0+1". + masm.Csinc(output, output, ZeroRegister32, Assembler::NoOverflow); +} + +void CodeGenerator::visitNotF(LNotF* ins) { + ARMFPRegister input(ToFloatRegister(ins->input()), 32); + ARMRegister output = toWRegister(ins->output()); + + // Set output to 1 input compares equal to 0.0, else 0. + masm.Fcmp(input, 0.0); + masm.Cset(output, Assembler::Equal); + + // Comparison with NaN sets V in the NZCV register. + // If the input was NaN, output must now be zero, so it can be incremented. + // The instruction is read: "output = if NoOverflow then output else 0+1". + masm.Csinc(output, output, ZeroRegister32, Assembler::NoOverflow); +} + +void CodeGeneratorARM64::generateInvalidateEpilogue() { + // Ensure that there is enough space in the buffer for the OsiPoint patching + // to occur. Otherwise, we could overwrite the invalidation epilogue. + for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) { + masm.nop(); + } + + masm.bind(&invalidate_); + + // Push the return address of the point that we bailout out onto the stack. + masm.push(lr); + + // Push the Ion script onto the stack (when we determine what that pointer + // is). + invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1))); + + // Jump to the invalidator which will replace the current frame. + TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk(); + masm.jump(thunk); +} + +template <class U> +Register getBase(U* mir) { + switch (mir->base()) { + case U::Heap: + return HeapReg; + } + return InvalidReg; +} + +void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) { + const MAsmJSLoadHeap* mir = ins->mir(); + MOZ_ASSERT(!mir->hasMemoryBase()); + + const LAllocation* ptr = ins->ptr(); + const LAllocation* boundsCheckLimit = ins->boundsCheckLimit(); + + Register ptrReg = ToRegister(ptr); + Scalar::Type accessType = mir->accessType(); + bool isFloat = accessType == Scalar::Float32 || accessType == Scalar::Float64; + Label done; + + if (mir->needsBoundsCheck()) { + Label boundsCheckPassed; + Register boundsCheckLimitReg = ToRegister(boundsCheckLimit); + masm.wasmBoundsCheck32(Assembler::Below, ptrReg, boundsCheckLimitReg, + &boundsCheckPassed); + // Return a default value in case of a bounds-check failure. + if (isFloat) { + if (accessType == Scalar::Float32) { + masm.loadConstantFloat32(GenericNaN(), ToFloatRegister(ins->output())); + } else { + masm.loadConstantDouble(GenericNaN(), ToFloatRegister(ins->output())); + } + } else { + masm.Mov(ARMRegister(ToRegister(ins->output()), 64), 0); + } + masm.jump(&done); + masm.bind(&boundsCheckPassed); + } + + MemOperand addr(ARMRegister(HeapReg, 64), ARMRegister(ptrReg, 64)); + switch (accessType) { + case Scalar::Int8: + masm.Ldrb(toWRegister(ins->output()), addr); + masm.Sxtb(toWRegister(ins->output()), toWRegister(ins->output())); + break; + case Scalar::Uint8: + masm.Ldrb(toWRegister(ins->output()), addr); + break; + case Scalar::Int16: + masm.Ldrh(toWRegister(ins->output()), addr); + masm.Sxth(toWRegister(ins->output()), toWRegister(ins->output())); + break; + case Scalar::Uint16: + masm.Ldrh(toWRegister(ins->output()), addr); + break; + case Scalar::Int32: + case Scalar::Uint32: + masm.Ldr(toWRegister(ins->output()), addr); + break; + case Scalar::Float64: + masm.Ldr(ARMFPRegister(ToFloatRegister(ins->output()), 64), addr); + break; + case Scalar::Float32: + masm.Ldr(ARMFPRegister(ToFloatRegister(ins->output()), 32), addr); + break; + default: + MOZ_CRASH("unexpected array type"); + } + if (done.used()) { + masm.bind(&done); + } +} + +void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) { + const MAsmJSStoreHeap* mir = ins->mir(); + MOZ_ASSERT(!mir->hasMemoryBase()); + + const LAllocation* ptr = ins->ptr(); + const LAllocation* boundsCheckLimit = ins->boundsCheckLimit(); + + Register ptrReg = ToRegister(ptr); + + Label done; + if (mir->needsBoundsCheck()) { + Register boundsCheckLimitReg = ToRegister(boundsCheckLimit); + masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ptrReg, boundsCheckLimitReg, + &done); + } + + MemOperand addr(ARMRegister(HeapReg, 64), ARMRegister(ptrReg, 64)); + switch (mir->accessType()) { + case Scalar::Int8: + case Scalar::Uint8: + masm.Strb(toWRegister(ins->value()), addr); + break; + case Scalar::Int16: + case Scalar::Uint16: + masm.Strh(toWRegister(ins->value()), addr); + break; + case Scalar::Int32: + case Scalar::Uint32: + masm.Str(toWRegister(ins->value()), addr); + break; + case Scalar::Float64: + masm.Str(ARMFPRegister(ToFloatRegister(ins->value()), 64), addr); + break; + case Scalar::Float32: + masm.Str(ARMFPRegister(ToFloatRegister(ins->value()), 32), addr); + break; + default: + MOZ_CRASH("unexpected array type"); + } + if (done.used()) { + masm.bind(&done); + } +} + +void CodeGenerator::visitWasmCompareExchangeHeap( + LWasmCompareExchangeHeap* ins) { + MWasmCompareExchangeHeap* mir = ins->mir(); + + Register ptr = ToRegister(ins->ptr()); + Register oldval = ToRegister(ins->oldValue()); + Register newval = ToRegister(ins->newValue()); + Register out = ToRegister(ins->output()); + MOZ_ASSERT(ins->addrTemp()->isBogusTemp()); + + BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset()); + + if (mir->access().type() == Scalar::Int64) { + masm.wasmCompareExchange64(mir->access(), srcAddr, Register64(oldval), + Register64(newval), Register64(out)); + } else { + masm.wasmCompareExchange(mir->access(), srcAddr, oldval, newval, out); + } +} + +void CodeGenerator::visitWasmAtomicExchangeHeap(LWasmAtomicExchangeHeap* ins) { + MWasmAtomicExchangeHeap* mir = ins->mir(); + + Register ptr = ToRegister(ins->ptr()); + Register oldval = ToRegister(ins->value()); + Register out = ToRegister(ins->output()); + MOZ_ASSERT(ins->addrTemp()->isBogusTemp()); + + BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset()); + + if (mir->access().type() == Scalar::Int64) { + masm.wasmAtomicExchange64(mir->access(), srcAddr, Register64(oldval), + Register64(out)); + } else { + masm.wasmAtomicExchange(mir->access(), srcAddr, oldval, out); + } +} + +void CodeGenerator::visitWasmAtomicBinopHeap(LWasmAtomicBinopHeap* ins) { + MWasmAtomicBinopHeap* mir = ins->mir(); + + MOZ_ASSERT(mir->hasUses()); + + Register ptr = ToRegister(ins->ptr()); + Register value = ToRegister(ins->value()); + Register flagTemp = ToRegister(ins->flagTemp()); + Register out = ToRegister(ins->output()); + MOZ_ASSERT(ins->temp()->isBogusTemp()); + MOZ_ASSERT(ins->addrTemp()->isBogusTemp()); + + BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset()); + AtomicOp op = mir->operation(); + + if (mir->access().type() == Scalar::Int64) { + masm.wasmAtomicFetchOp64(mir->access(), op, Register64(value), srcAddr, + Register64(flagTemp), Register64(out)); + } else { + masm.wasmAtomicFetchOp(mir->access(), op, value, srcAddr, flagTemp, out); + } +} + +void CodeGenerator::visitWasmAtomicBinopHeapForEffect( + LWasmAtomicBinopHeapForEffect* ins) { + MWasmAtomicBinopHeap* mir = ins->mir(); + + MOZ_ASSERT(!mir->hasUses()); + + Register ptr = ToRegister(ins->ptr()); + Register value = ToRegister(ins->value()); + Register flagTemp = ToRegister(ins->flagTemp()); + MOZ_ASSERT(ins->addrTemp()->isBogusTemp()); + + BaseIndex srcAddr(HeapReg, ptr, TimesOne, mir->access().offset()); + AtomicOp op = mir->operation(); + + if (mir->access().type() == Scalar::Int64) { + masm.wasmAtomicEffectOp64(mir->access(), op, Register64(value), srcAddr, + Register64(flagTemp)); + } else { + masm.wasmAtomicEffectOp(mir->access(), op, value, srcAddr, flagTemp); + } +} + +void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) { + const MWasmStackArg* mir = ins->mir(); + Address dst(masm.getStackPointer(), mir->spOffset()); + if (ins->arg()->isConstant()) { + masm.storePtr(ImmWord(ToInt32(ins->arg())), dst); + } else if (ins->arg()->isGeneralReg()) { + masm.storePtr(ToRegister(ins->arg()), dst); + } else { + switch (mir->input()->type()) { + case MIRType::Double: + masm.storeDouble(ToFloatRegister(ins->arg()), dst); + return; + case MIRType::Float32: + masm.storeFloat32(ToFloatRegister(ins->arg()), dst); + return; +#ifdef ENABLE_WASM_SIMD + case MIRType::Simd128: + masm.storeUnalignedSimd128(ToFloatRegister(ins->arg()), dst); + return; +#endif + default: + break; + } + MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE( + "unexpected mir type in WasmStackArg"); + } +} + +void CodeGenerator::visitUDiv(LUDiv* ins) { + MDiv* mir = ins->mir(); + Register lhs = ToRegister(ins->lhs()); + Register rhs = ToRegister(ins->rhs()); + Register output = ToRegister(ins->output()); + ARMRegister lhs32 = ARMRegister(lhs, 32); + ARMRegister rhs32 = ARMRegister(rhs, 32); + ARMRegister output32 = ARMRegister(output, 32); + + // Prevent divide by zero. + if (mir->canBeDivideByZero()) { + if (mir->isTruncated()) { + if (mir->trapOnError()) { + Label nonZero; + masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset()); + masm.bind(&nonZero); + } else { + // ARM64 UDIV instruction will return 0 when divided by 0. + // No need for extra tests. + } + } else { + bailoutTest32(Assembler::Zero, rhs, rhs, ins->snapshot()); + } + } + + // Unsigned division. + masm.Udiv(output32, lhs32, rhs32); + + // If the remainder is > 0, bailout since this must be a double. + if (!mir->canTruncateRemainder()) { + Register remainder = ToRegister(ins->remainder()); + ARMRegister remainder32 = ARMRegister(remainder, 32); + + // Compute the remainder: remainder = lhs - (output * rhs). + masm.Msub(remainder32, output32, rhs32, lhs32); + + bailoutTest32(Assembler::NonZero, remainder, remainder, ins->snapshot()); + } + + // Unsigned div can return a value that's not a signed int32. + // If our users aren't expecting that, bail. + if (!mir->isTruncated()) { + bailoutTest32(Assembler::Signed, output, output, ins->snapshot()); + } +} + +void CodeGenerator::visitUMod(LUMod* ins) { + MMod* mir = ins->mir(); + ARMRegister lhs = toWRegister(ins->lhs()); + ARMRegister rhs = toWRegister(ins->rhs()); + ARMRegister output = toWRegister(ins->output()); + Label done; + + if (mir->canBeDivideByZero()) { + if (mir->isTruncated()) { + if (mir->trapOnError()) { + Label nonZero; + masm.Cbnz(rhs, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset()); + masm.bind(&nonZero); + } else { + // Truncated division by zero yields integer zero. + masm.Mov(output, rhs); + masm.Cbz(rhs, &done); + } + } else { + // Non-truncated division by zero produces a non-integer. + masm.Cmp(rhs, Operand(0)); + bailoutIf(Assembler::Equal, ins->snapshot()); + } + } + + // Unsigned division. + masm.Udiv(output, lhs, rhs); + + // Compute the remainder: output = lhs - (output * rhs). + masm.Msub(output, output, rhs, lhs); + + if (!mir->isTruncated()) { + // Bail if the output would be negative. + // + // LUMod inputs may be Uint32, so care is taken to ensure the result + // is not unexpectedly signed. + bailoutCmp32(Assembler::LessThan, output, Imm32(0), ins->snapshot()); + } + + if (done.used()) { + masm.bind(&done); + } +} + +void CodeGenerator::visitEffectiveAddress(LEffectiveAddress* ins) { + const MEffectiveAddress* mir = ins->mir(); + const ARMRegister base = toWRegister(ins->base()); + const ARMRegister index = toWRegister(ins->index()); + const ARMRegister output = toWRegister(ins->output()); + + masm.Add(output, base, Operand(index, vixl::LSL, mir->scale())); + masm.Add(output, output, Operand(mir->displacement())); +} + +void CodeGenerator::visitNegI(LNegI* ins) { + const ARMRegister input = toWRegister(ins->input()); + const ARMRegister output = toWRegister(ins->output()); + masm.Neg(output, input); +} + +void CodeGenerator::visitNegI64(LNegI64* ins) { + const ARMRegister input = toXRegister(ins->input()); + const ARMRegister output = toXRegister(ins->output()); + masm.Neg(output, input); +} + +void CodeGenerator::visitNegD(LNegD* ins) { + const ARMFPRegister input(ToFloatRegister(ins->input()), 64); + const ARMFPRegister output(ToFloatRegister(ins->output()), 64); + masm.Fneg(output, input); +} + +void CodeGenerator::visitNegF(LNegF* ins) { + const ARMFPRegister input(ToFloatRegister(ins->input()), 32); + const ARMFPRegister output(ToFloatRegister(ins->output()), 32); + masm.Fneg(output, input); +} + +void CodeGenerator::visitCompareExchangeTypedArrayElement( + LCompareExchangeTypedArrayElement* lir) { + Register elements = ToRegister(lir->elements()); + AnyRegister output = ToAnyRegister(lir->output()); + Register temp = + lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); + + Register oldval = ToRegister(lir->oldval()); + Register newval = ToRegister(lir->newval()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval, + newval, temp, output); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval, + newval, temp, output); + } +} + +void CodeGenerator::visitAtomicExchangeTypedArrayElement( + LAtomicExchangeTypedArrayElement* lir) { + Register elements = ToRegister(lir->elements()); + AnyRegister output = ToAnyRegister(lir->output()); + Register temp = + lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); + + Register value = ToRegister(lir->value()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp, + output); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp, + output); + } +} + +void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) { + Register elements = ToRegister(lir->elements()); + Register temp = ToRegister(lir->temp()); + Register64 temp64 = ToRegister64(lir->temp64()); + Register out = ToRegister(lir->output()); + + const MLoadUnboxedScalar* mir = lir->mir(); + + Scalar::Type storageType = mir->storageType(); + + // NOTE: the generated code must match the assembly code in gen_load in + // GenerateAtomicOperations.py + auto sync = Synchronization::Load(); + + masm.memoryBarrierBefore(sync); + if (lir->index()->isConstant()) { + Address source = + ToAddress(elements, lir->index(), storageType, mir->offsetAdjustment()); + masm.load64(source, temp64); + } else { + BaseIndex source(elements, ToRegister(lir->index()), + ScaleFromScalarType(storageType), mir->offsetAdjustment()); + masm.load64(source, temp64); + } + masm.memoryBarrierAfter(sync); + + emitCreateBigInt(lir, storageType, temp64, out, temp); +} + +void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) { + Register elements = ToRegister(lir->elements()); + Register value = ToRegister(lir->value()); + Register64 temp1 = ToRegister64(lir->temp1()); + + Scalar::Type writeType = lir->mir()->writeType(); + + masm.loadBigInt64(value, temp1); + + // NOTE: the generated code must match the assembly code in gen_store in + // GenerateAtomicOperations.py + auto sync = Synchronization::Store(); + + masm.memoryBarrierBefore(sync); + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), writeType); + masm.store64(temp1, dest); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(writeType)); + masm.store64(temp1, dest); + } + masm.memoryBarrierAfter(sync); +} + +void CodeGenerator::visitCompareExchangeTypedArrayElement64( + LCompareExchangeTypedArrayElement64* lir) { + Register elements = ToRegister(lir->elements()); + Register oldval = ToRegister(lir->oldval()); + Register newval = ToRegister(lir->newval()); + Register64 temp1 = ToRegister64(lir->temp1()); + Register64 temp2 = ToRegister64(lir->temp2()); + Register out = ToRegister(lir->output()); + Register64 tempOut(out); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + masm.loadBigInt64(oldval, temp1); + masm.loadBigInt64(newval, tempOut); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.compareExchange64(Synchronization::Full(), dest, temp1, tempOut, + temp2); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.compareExchange64(Synchronization::Full(), dest, temp1, tempOut, + temp2); + } + + emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg()); +} + +void CodeGenerator::visitAtomicExchangeTypedArrayElement64( + LAtomicExchangeTypedArrayElement64* lir) { + Register elements = ToRegister(lir->elements()); + Register value = ToRegister(lir->value()); + Register64 temp1 = ToRegister64(lir->temp1()); + Register64 temp2 = Register64(ToRegister(lir->temp2())); + Register out = ToRegister(lir->output()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + masm.loadBigInt64(value, temp1); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.atomicExchange64(Synchronization::Full(), dest, temp1, temp2); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicExchange64(Synchronization::Full(), dest, temp1, temp2); + } + + emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg()); +} + +void CodeGenerator::visitAtomicTypedArrayElementBinop64( + LAtomicTypedArrayElementBinop64* lir) { + MOZ_ASSERT(!lir->mir()->isForEffect()); + + Register elements = ToRegister(lir->elements()); + Register value = ToRegister(lir->value()); + Register64 temp1 = ToRegister64(lir->temp1()); + Register64 temp2 = ToRegister64(lir->temp2()); + Register out = ToRegister(lir->output()); + Register64 tempOut = Register64(out); + + Scalar::Type arrayType = lir->mir()->arrayType(); + AtomicOp atomicOp = lir->mir()->operation(); + + masm.loadBigInt64(value, temp1); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest, + tempOut, temp2); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicFetchOp64(Synchronization::Full(), atomicOp, temp1, dest, + tempOut, temp2); + } + + emitCreateBigInt(lir, arrayType, temp2, out, temp1.scratchReg()); +} + +void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect64( + LAtomicTypedArrayElementBinopForEffect64* lir) { + MOZ_ASSERT(lir->mir()->isForEffect()); + + Register elements = ToRegister(lir->elements()); + Register value = ToRegister(lir->value()); + Register64 temp1 = ToRegister64(lir->temp1()); + Register64 temp2 = ToRegister64(lir->temp2()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + AtomicOp atomicOp = lir->mir()->operation(); + + masm.loadBigInt64(value, temp1); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest, + temp2); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicEffectOp64(Synchronization::Full(), atomicOp, temp1, dest, + temp2); + } +} + +void CodeGeneratorARM64::emitSimpleBinaryI64( + LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* lir, JSOp op) { + const ARMRegister dest = ARMRegister(ToOutRegister64(lir).reg, 64); + const ARMRegister lhs = + ARMRegister(ToRegister64(lir->getInt64Operand(0)).reg, 64); + const LInt64Allocation rhsAlloc = lir->getInt64Operand(INT64_PIECES); + Operand rhs; + + if (IsConstant(rhsAlloc)) { + rhs = Operand(ToInt64(rhsAlloc)); + } else { + rhs = Operand(ARMRegister(ToRegister64(rhsAlloc).reg, 64)); + } + switch (op) { + case JSOp::Add: + masm.Add(dest, lhs, rhs); + break; + case JSOp::Sub: + masm.Sub(dest, lhs, rhs); + break; + case JSOp::BitOr: + masm.Orr(dest, lhs, rhs); + break; + case JSOp::BitXor: + masm.Eor(dest, lhs, rhs); + break; + case JSOp::BitAnd: + masm.And(dest, lhs, rhs); + break; + default: + MOZ_CRASH("unexpected binary opcode"); + } +} + +void CodeGenerator::visitAddI64(LAddI64* lir) { + emitSimpleBinaryI64(lir, JSOp::Add); +} + +void CodeGenerator::visitClzI64(LClzI64* ins) { + masm.clz64(ToRegister64(ins->getInt64Operand(0)), ToRegister(ins->output())); +} + +void CodeGenerator::visitCtzI64(LCtzI64* ins) { + masm.ctz64(ToRegister64(ins->getInt64Operand(0)), ToRegister(ins->output())); +} + +void CodeGenerator::visitMulI64(LMulI64* lir) { + const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs); + const Register64 output = ToOutRegister64(lir); + + if (IsConstant(rhs)) { + int64_t constant = ToInt64(rhs); + // Ad-hoc strength reduction, cf the x64 code as well as the 32-bit code + // higher up in this file. Bug 1712298 will lift this code to the MIR + // constant folding pass, or to lowering. + // + // This is for wasm integers only, so no input guards or overflow checking + // are needed. + switch (constant) { + case -1: + masm.Neg(ARMRegister(output.reg, 64), + ARMRegister(ToRegister64(lhs).reg, 64)); + break; + case 0: + masm.Mov(ARMRegister(output.reg, 64), xzr); + break; + case 1: + if (ToRegister64(lhs) != output) { + masm.move64(ToRegister64(lhs), output); + } + break; + case 2: + masm.Add(ARMRegister(output.reg, 64), + ARMRegister(ToRegister64(lhs).reg, 64), + ARMRegister(ToRegister64(lhs).reg, 64)); + break; + default: + // Use shift if constant is nonnegative power of 2. + if (constant > 0) { + int32_t shift = mozilla::FloorLog2(constant); + if (int64_t(1) << shift == constant) { + masm.Lsl(ARMRegister(output.reg, 64), + ARMRegister(ToRegister64(lhs).reg, 64), shift); + break; + } + } + masm.mul64(Imm64(constant), ToRegister64(lhs), output); + break; + } + } else { + masm.mul64(ToRegister64(lhs), ToRegister64(rhs), output); + } +} + +void CodeGenerator::visitNotI64(LNotI64* lir) { + const Register64 input = ToRegister64(lir->getInt64Operand(0)); + const Register64 output = ToOutRegister64(lir); + masm.Cmp(ARMRegister(input.reg, 64), ZeroRegister64); + masm.Cset(ARMRegister(output.reg, 64), Assembler::Zero); +} + +void CodeGenerator::visitSubI64(LSubI64* lir) { + emitSimpleBinaryI64(lir, JSOp::Sub); +} + +void CodeGenerator::visitPopcntI(LPopcntI* ins) { + Register input = ToRegister(ins->input()); + Register output = ToRegister(ins->output()); + Register temp = ToRegister(ins->temp0()); + masm.popcnt32(input, output, temp); +} + +void CodeGenerator::visitBitOpI64(LBitOpI64* lir) { + emitSimpleBinaryI64(lir, lir->bitop()); +} + +void CodeGenerator::visitShiftI64(LShiftI64* lir) { + ARMRegister lhs(ToRegister64(lir->getInt64Operand(LShiftI64::Lhs)).reg, 64); + LAllocation* rhsAlloc = lir->getOperand(LShiftI64::Rhs); + ARMRegister dest(ToOutRegister64(lir).reg, 64); + + if (rhsAlloc->isConstant()) { + int32_t shift = int32_t(rhsAlloc->toConstant()->toInt64() & 0x3F); + if (shift == 0) { + if (lhs.code() != dest.code()) { + masm.Mov(dest, lhs); + } + } else { + switch (lir->bitop()) { + case JSOp::Lsh: + masm.Lsl(dest, lhs, shift); + break; + case JSOp::Rsh: + masm.Asr(dest, lhs, shift); + break; + case JSOp::Ursh: + masm.Lsr(dest, lhs, shift); + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } + } else { + ARMRegister rhs(ToRegister(rhsAlloc), 64); + switch (lir->bitop()) { + case JSOp::Lsh: + masm.Lsl(dest, lhs, rhs); + break; + case JSOp::Rsh: + masm.Asr(dest, lhs, rhs); + break; + case JSOp::Ursh: + masm.Lsr(dest, lhs, rhs); + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } +} + +void CodeGenerator::visitWasmHeapBase(LWasmHeapBase* ins) { + MOZ_ASSERT(ins->instance()->isBogus()); + masm.movePtr(HeapReg, ToRegister(ins->output())); +} + +// If we have a constant base ptr, try to add the offset to it, to generate +// better code when the full address is known. The addition may overflow past +// 32 bits because the front end does nothing special if the base is a large +// constant and base+offset overflows; sidestep this by performing the addition +// anyway, overflowing to 64-bit. + +static Maybe<uint64_t> IsAbsoluteAddress(const LAllocation* ptr, + const wasm::MemoryAccessDesc& access) { + if (ptr->isConstantValue()) { + const MConstant* c = ptr->toConstant(); + uint64_t base_address = c->type() == MIRType::Int32 + ? uint64_t(uint32_t(c->toInt32())) + : uint64_t(c->toInt64()); + uint64_t offset = access.offset(); + return Some(base_address + offset); + } + return Nothing(); +} + +void CodeGenerator::visitWasmLoad(LWasmLoad* lir) { + const MWasmLoad* mir = lir->mir(); + + if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) { + masm.wasmLoadAbsolute(mir->access(), HeapReg, absAddr.value(), + ToAnyRegister(lir->output()), Register64::Invalid()); + return; + } + + // ptr is a GPR and is either a 32-bit value zero-extended to 64-bit, or a + // true 64-bit value. + masm.wasmLoad(mir->access(), HeapReg, ToRegister(lir->ptr()), + ToAnyRegister(lir->output())); +} + +void CodeGenerator::visitCopySignD(LCopySignD* ins) { + MOZ_ASSERT(ins->getTemp(0)->isBogusTemp()); + MOZ_ASSERT(ins->getTemp(1)->isBogusTemp()); + masm.copySignDouble(ToFloatRegister(ins->getOperand(0)), + ToFloatRegister(ins->getOperand(1)), + ToFloatRegister(ins->getDef(0))); +} + +void CodeGenerator::visitCopySignF(LCopySignF* ins) { + MOZ_ASSERT(ins->getTemp(0)->isBogusTemp()); + MOZ_ASSERT(ins->getTemp(1)->isBogusTemp()); + masm.copySignFloat32(ToFloatRegister(ins->getOperand(0)), + ToFloatRegister(ins->getOperand(1)), + ToFloatRegister(ins->getDef(0))); +} + +void CodeGenerator::visitPopcntI64(LPopcntI64* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + Register64 output = ToOutRegister64(lir); + Register temp = ToRegister(lir->getTemp(0)); + masm.popcnt64(input, output, temp); +} + +void CodeGenerator::visitRotateI64(LRotateI64* lir) { + bool rotateLeft = lir->mir()->isLeftRotate(); + Register64 input = ToRegister64(lir->input()); + Register64 output = ToOutRegister64(lir); + const LAllocation* count = lir->count(); + + if (count->isConstant()) { + int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F); + if (c == 0) { + if (input != output) { + masm.move64(input, output); + return; + } + } + if (rotateLeft) { + masm.rotateLeft64(Imm32(c), input, output, InvalidReg); + } else { + masm.rotateRight64(Imm32(c), input, output, InvalidReg); + } + } else { + Register c = ToRegister(count); + if (rotateLeft) { + masm.rotateLeft64(c, input, output, InvalidReg); + } else { + masm.rotateRight64(c, input, output, InvalidReg); + } + } +} + +void CodeGenerator::visitWasmStore(LWasmStore* lir) { + const MWasmStore* mir = lir->mir(); + + if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) { + masm.wasmStoreAbsolute(mir->access(), ToAnyRegister(lir->value()), + Register64::Invalid(), HeapReg, absAddr.value()); + return; + } + + masm.wasmStore(mir->access(), ToAnyRegister(lir->value()), HeapReg, + ToRegister(lir->ptr())); +} + +void CodeGenerator::visitCompareI64(LCompareI64* lir) { + MCompare* mir = lir->mir(); + MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 || + mir->compareType() == MCompare::Compare_UInt64); + + const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs); + Register lhsReg = ToRegister64(lhs).reg; + Register output = ToRegister(lir->output()); + bool isSigned = mir->compareType() == MCompare::Compare_Int64; + + if (IsConstant(rhs)) { + masm.cmpPtrSet(JSOpToCondition(lir->jsop(), isSigned), lhsReg, + ImmWord(ToInt64(rhs)), output); + } else if (rhs.value().isGeneralReg()) { + masm.cmpPtrSet(JSOpToCondition(lir->jsop(), isSigned), lhsReg, + ToRegister64(rhs).reg, output); + } else { + masm.cmpPtrSet( + GetCondForSwappedOperands(JSOpToCondition(lir->jsop(), isSigned)), + ToAddress(rhs.value()), lhsReg, output); + } +} + +void CodeGenerator::visitWasmSelect(LWasmSelect* lir) { + MIRType mirType = lir->mir()->type(); + Register condReg = ToRegister(lir->condExpr()); + + masm.test32(condReg, condReg); + + switch (mirType) { + case MIRType::Int32: + case MIRType::RefOrNull: { + Register outReg = ToRegister(lir->output()); + Register trueReg = ToRegister(lir->trueExpr()); + Register falseReg = ToRegister(lir->falseExpr()); + + if (mirType == MIRType::Int32) { + masm.Csel(ARMRegister(outReg, 32), ARMRegister(trueReg, 32), + ARMRegister(falseReg, 32), Assembler::NonZero); + } else { + masm.Csel(ARMRegister(outReg, 64), ARMRegister(trueReg, 64), + ARMRegister(falseReg, 64), Assembler::NonZero); + } + break; + } + + case MIRType::Float32: + case MIRType::Double: + case MIRType::Simd128: { + FloatRegister outReg = ToFloatRegister(lir->output()); + FloatRegister trueReg = ToFloatRegister(lir->trueExpr()); + FloatRegister falseReg = ToFloatRegister(lir->falseExpr()); + + switch (mirType) { + case MIRType::Float32: + masm.Fcsel(ARMFPRegister(outReg, 32), ARMFPRegister(trueReg, 32), + ARMFPRegister(falseReg, 32), Assembler::NonZero); + break; + case MIRType::Double: + masm.Fcsel(ARMFPRegister(outReg, 64), ARMFPRegister(trueReg, 64), + ARMFPRegister(falseReg, 64), Assembler::NonZero); + break; +#ifdef ENABLE_WASM_SIMD + case MIRType::Simd128: { + MOZ_ASSERT(outReg == trueReg); + Label done; + masm.j(Assembler::NonZero, &done); + masm.moveSimd128(falseReg, outReg); + masm.bind(&done); + break; + } +#endif + default: + MOZ_CRASH(); + } + break; + } + + default: { + MOZ_CRASH("unhandled type in visitWasmSelect!"); + } + } +} + +// We expect to handle the cases: compare is {{U,}Int32, {U,}Int64}, Float32, +// Double}, and select is {{U,}Int32, {U,}Int64}, Float32, Double}, +// independently. +void CodeGenerator::visitWasmCompareAndSelect(LWasmCompareAndSelect* ins) { + MCompare::CompareType compTy = ins->compareType(); + + // Set flag. + if (compTy == MCompare::Compare_Int32 || compTy == MCompare::Compare_UInt32) { + Register lhs = ToRegister(ins->leftExpr()); + if (ins->rightExpr()->isConstant()) { + masm.cmp32(lhs, Imm32(ins->rightExpr()->toConstant()->toInt32())); + } else { + masm.cmp32(lhs, ToRegister(ins->rightExpr())); + } + } else if (compTy == MCompare::Compare_Int64 || + compTy == MCompare::Compare_UInt64) { + Register lhs = ToRegister(ins->leftExpr()); + if (ins->rightExpr()->isConstant()) { + masm.cmpPtr(lhs, Imm64(ins->rightExpr()->toConstant()->toInt64())); + } else { + masm.cmpPtr(lhs, ToRegister(ins->rightExpr())); + } + } else if (compTy == MCompare::Compare_Float32) { + masm.compareFloat(JSOpToDoubleCondition(ins->jsop()), + ToFloatRegister(ins->leftExpr()), + ToFloatRegister(ins->rightExpr())); + } else if (compTy == MCompare::Compare_Double) { + masm.compareDouble(JSOpToDoubleCondition(ins->jsop()), + ToFloatRegister(ins->leftExpr()), + ToFloatRegister(ins->rightExpr())); + } else { + // Ref types not supported yet; v128 is not yet observed to be worth + // optimizing. + MOZ_CRASH("CodeGenerator::visitWasmCompareAndSelect: unexpected type (1)"); + } + + // Act on flag. + Assembler::Condition cond; + if (compTy == MCompare::Compare_Float32 || + compTy == MCompare::Compare_Double) { + cond = Assembler::ConditionFromDoubleCondition( + JSOpToDoubleCondition(ins->jsop())); + } else { + cond = JSOpToCondition(compTy, ins->jsop()); + } + MIRType insTy = ins->mir()->type(); + if (insTy == MIRType::Int32 || insTy == MIRType::Int64) { + Register destReg = ToRegister(ins->output()); + Register trueReg = ToRegister(ins->ifTrueExpr()); + Register falseReg = ToRegister(ins->ifFalseExpr()); + size_t size = insTy == MIRType::Int32 ? 32 : 64; + masm.Csel(ARMRegister(destReg, size), ARMRegister(trueReg, size), + ARMRegister(falseReg, size), cond); + } else if (insTy == MIRType::Float32 || insTy == MIRType::Double) { + FloatRegister destReg = ToFloatRegister(ins->output()); + FloatRegister trueReg = ToFloatRegister(ins->ifTrueExpr()); + FloatRegister falseReg = ToFloatRegister(ins->ifFalseExpr()); + size_t size = MIRTypeToSize(insTy) * 8; + masm.Fcsel(ARMFPRegister(destReg, size), ARMFPRegister(trueReg, size), + ARMFPRegister(falseReg, size), cond); + } else { + // See above. + MOZ_CRASH("CodeGenerator::visitWasmCompareAndSelect: unexpected type (2)"); + } +} + +void CodeGenerator::visitWasmLoadI64(LWasmLoadI64* lir) { + const MWasmLoad* mir = lir->mir(); + + if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) { + masm.wasmLoadAbsolute(mir->access(), HeapReg, absAddr.value(), + AnyRegister(), ToOutRegister64(lir)); + return; + } + + masm.wasmLoadI64(mir->access(), HeapReg, ToRegister(lir->ptr()), + ToOutRegister64(lir)); +} + +void CodeGenerator::visitWasmStoreI64(LWasmStoreI64* lir) { + const MWasmStore* mir = lir->mir(); + + if (Maybe<uint64_t> absAddr = IsAbsoluteAddress(lir->ptr(), mir->access())) { + masm.wasmStoreAbsolute(mir->access(), AnyRegister(), + ToRegister64(lir->value()), HeapReg, + absAddr.value()); + return; + } + + masm.wasmStoreI64(mir->access(), ToRegister64(lir->value()), HeapReg, + ToRegister(lir->ptr())); +} + +void CodeGenerator::visitMemoryBarrier(LMemoryBarrier* ins) { + masm.memoryBarrier(ins->type()); +} + +void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) { + MWasmAddOffset* mir = lir->mir(); + Register base = ToRegister(lir->base()); + Register out = ToRegister(lir->output()); + + masm.Adds(ARMRegister(out, 32), ARMRegister(base, 32), + Operand(mir->offset())); + OutOfLineAbortingWasmTrap* ool = new (alloc()) + OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds); + addOutOfLineCode(ool, mir); + masm.j(Assembler::CarrySet, ool->entry()); +} + +void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) { + MWasmAddOffset* mir = lir->mir(); + Register64 base = ToRegister64(lir->base()); + Register64 out = ToOutRegister64(lir); + + masm.Adds(ARMRegister(out.reg, 64), ARMRegister(base.reg, 64), + Operand(mir->offset())); + OutOfLineAbortingWasmTrap* ool = new (alloc()) + OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds); + addOutOfLineCode(ool, mir); + masm.j(Assembler::CarrySet, ool->entry()); +} + +void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) { + MOZ_ASSERT(lir->mir()->type() == MIRType::Int64); + Register condReg = ToRegister(lir->condExpr()); + Register64 trueReg = ToRegister64(lir->trueExpr()); + Register64 falseReg = ToRegister64(lir->falseExpr()); + Register64 outReg = ToOutRegister64(lir); + + masm.test32(condReg, condReg); + masm.Csel(ARMRegister(outReg.reg, 64), ARMRegister(trueReg.reg, 64), + ARMRegister(falseReg.reg, 64), Assembler::NonZero); +} + +void CodeGenerator::visitSignExtendInt64(LSignExtendInt64* ins) { + Register64 input = ToRegister64(ins->getInt64Operand(0)); + Register64 output = ToOutRegister64(ins); + switch (ins->mode()) { + case MSignExtendInt64::Byte: + masm.move8To64SignExtend(input.reg, output); + break; + case MSignExtendInt64::Half: + masm.move16To64SignExtend(input.reg, output); + break; + case MSignExtendInt64::Word: + masm.move32To64SignExtend(input.reg, output); + break; + } +} + +void CodeGenerator::visitWasmReinterpret(LWasmReinterpret* lir) { + MOZ_ASSERT(gen->compilingWasm()); + MWasmReinterpret* ins = lir->mir(); + + MIRType to = ins->type(); + mozilla::DebugOnly<MIRType> from = ins->input()->type(); + + switch (to) { + case MIRType::Int32: + MOZ_ASSERT(from == MIRType::Float32); + masm.moveFloat32ToGPR(ToFloatRegister(lir->input()), + ToRegister(lir->output())); + break; + case MIRType::Float32: + MOZ_ASSERT(from == MIRType::Int32); + masm.moveGPRToFloat32(ToRegister(lir->input()), + ToFloatRegister(lir->output())); + break; + case MIRType::Double: + case MIRType::Int64: + MOZ_CRASH("not handled by this LIR opcode"); + default: + MOZ_CRASH("unexpected WasmReinterpret"); + } +} + +void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) { + const MWasmStackArg* mir = ins->mir(); + Address dst(masm.getStackPointer(), mir->spOffset()); + if (IsConstant(ins->arg())) { + masm.store64(Imm64(ToInt64(ins->arg())), dst); + } else { + masm.store64(ToRegister64(ins->arg()), dst); + } +} + +void CodeGenerator::visitTestI64AndBranch(LTestI64AndBranch* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + MBasicBlock* mirTrue = lir->ifTrue(); + MBasicBlock* mirFalse = lir->ifFalse(); + + // Jump to the True block if NonZero. + // Jump to the False block if Zero. + if (isNextBlock(mirFalse->lir())) { + masm.Cbnz(ARMRegister(input.reg, 64), getJumpLabelForBranch(mirTrue)); + } else { + masm.Cbz(ARMRegister(input.reg, 64), getJumpLabelForBranch(mirFalse)); + if (!isNextBlock(mirTrue->lir())) { + jumpToBlock(mirTrue); + } + } +} + +void CodeGenerator::visitWrapInt64ToInt32(LWrapInt64ToInt32* lir) { + const LAllocation* input = lir->getOperand(0); + Register output = ToRegister(lir->output()); + + if (lir->mir()->bottomHalf()) { + if (input->isMemory()) { + masm.load32(ToAddress(input), output); + } else { + // Really this is a 64-bit input register and we could use move64To32. + masm.Mov(ARMRegister(output, 32), ARMRegister(ToRegister(input), 32)); + } + } else { + MOZ_CRASH("Not implemented."); + } +} + +void CodeGenerator::visitExtendInt32ToInt64(LExtendInt32ToInt64* lir) { + Register input = ToRegister(lir->getOperand(0)); + Register64 output = ToOutRegister64(lir); + + if (lir->mir()->isUnsigned()) { + masm.move32To64ZeroExtend(input, output); + } else { + masm.move32To64SignExtend(input, output); + } +} + +void CodeGenerator::visitWasmExtendU32Index(LWasmExtendU32Index* lir) { + // Generates no code on this platform because the input is assumed to have + // canonical form. + Register output = ToRegister(lir->output()); + MOZ_ASSERT(ToRegister(lir->input()) == output); + masm.debugAssertCanonicalInt32(output); +} + +void CodeGenerator::visitWasmWrapU32Index(LWasmWrapU32Index* lir) { + // Generates no code on this platform because the input is assumed to have + // canonical form. + Register output = ToRegister(lir->output()); + MOZ_ASSERT(ToRegister(lir->input()) == output); + masm.debugAssertCanonicalInt32(output); +} + +void CodeGenerator::visitCompareI64AndBranch(LCompareI64AndBranch* comp) { + const MCompare* mir = comp->cmpMir(); + const mozilla::DebugOnly<MCompare::CompareType> type = mir->compareType(); + const LInt64Allocation left = + comp->getInt64Operand(LCompareI64AndBranch::Lhs); + const LInt64Allocation right = + comp->getInt64Operand(LCompareI64AndBranch::Rhs); + + MOZ_ASSERT(type == MCompare::Compare_Int64 || + type == MCompare::Compare_UInt64); + if (IsConstant(right)) { + masm.Cmp(ARMRegister(ToRegister64(left).reg, 64), ToInt64(right)); + } else { + masm.Cmp(ARMRegister(ToRegister64(left).reg, 64), + ARMRegister(ToRegister64(right).reg, 64)); + } + + bool isSigned = mir->compareType() == MCompare::Compare_Int64; + Assembler::Condition cond = JSOpToCondition(comp->jsop(), isSigned); + emitBranch(cond, comp->ifTrue(), comp->ifFalse()); +} + +void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) { + auto input = ToFloatRegister(lir->input()); + auto output = ToRegister(lir->output()); + + MWasmTruncateToInt32* mir = lir->mir(); + MIRType fromType = mir->input()->type(); + + MOZ_ASSERT(fromType == MIRType::Double || fromType == MIRType::Float32); + + auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output); + addOutOfLineCode(ool, mir); + + Label* oolEntry = ool->entry(); + if (mir->isUnsigned()) { + if (fromType == MIRType::Double) { + masm.wasmTruncateDoubleToUInt32(input, output, mir->isSaturating(), + oolEntry); + } else if (fromType == MIRType::Float32) { + masm.wasmTruncateFloat32ToUInt32(input, output, mir->isSaturating(), + oolEntry); + } else { + MOZ_CRASH("unexpected type"); + } + + masm.bind(ool->rejoin()); + return; + } + + if (fromType == MIRType::Double) { + masm.wasmTruncateDoubleToInt32(input, output, mir->isSaturating(), + oolEntry); + } else if (fromType == MIRType::Float32) { + masm.wasmTruncateFloat32ToInt32(input, output, mir->isSaturating(), + oolEntry); + } else { + MOZ_CRASH("unexpected type"); + } + + masm.bind(ool->rejoin()); +} + +void CodeGenerator::visitWasmTruncateToInt64(LWasmTruncateToInt64* lir) { + FloatRegister input = ToFloatRegister(lir->input()); + Register64 output = ToOutRegister64(lir); + + MWasmTruncateToInt64* mir = lir->mir(); + MIRType fromType = mir->input()->type(); + + MOZ_ASSERT(fromType == MIRType::Double || fromType == MIRType::Float32); + + auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output); + addOutOfLineCode(ool, mir); + + Label* oolEntry = ool->entry(); + Label* oolRejoin = ool->rejoin(); + bool isSaturating = mir->isSaturating(); + + if (fromType == MIRType::Double) { + if (mir->isUnsigned()) { + masm.wasmTruncateDoubleToUInt64(input, output, isSaturating, oolEntry, + oolRejoin, InvalidFloatReg); + } else { + masm.wasmTruncateDoubleToInt64(input, output, isSaturating, oolEntry, + oolRejoin, InvalidFloatReg); + } + } else { + if (mir->isUnsigned()) { + masm.wasmTruncateFloat32ToUInt64(input, output, isSaturating, oolEntry, + oolRejoin, InvalidFloatReg); + } else { + masm.wasmTruncateFloat32ToInt64(input, output, isSaturating, oolEntry, + oolRejoin, InvalidFloatReg); + } + } +} + +void CodeGeneratorARM64::visitOutOfLineWasmTruncateCheck( + OutOfLineWasmTruncateCheck* ool) { + FloatRegister input = ool->input(); + Register output = ool->output(); + Register64 output64 = ool->output64(); + MIRType fromType = ool->fromType(); + MIRType toType = ool->toType(); + Label* oolRejoin = ool->rejoin(); + TruncFlags flags = ool->flags(); + wasm::BytecodeOffset off = ool->bytecodeOffset(); + + if (fromType == MIRType::Float32) { + if (toType == MIRType::Int32) { + masm.oolWasmTruncateCheckF32ToI32(input, output, flags, off, oolRejoin); + } else if (toType == MIRType::Int64) { + masm.oolWasmTruncateCheckF32ToI64(input, output64, flags, off, oolRejoin); + } else { + MOZ_CRASH("unexpected type"); + } + } else if (fromType == MIRType::Double) { + if (toType == MIRType::Int32) { + masm.oolWasmTruncateCheckF64ToI32(input, output, flags, off, oolRejoin); + } else if (toType == MIRType::Int64) { + masm.oolWasmTruncateCheckF64ToI64(input, output64, flags, off, oolRejoin); + } else { + MOZ_CRASH("unexpected type"); + } + } else { + MOZ_CRASH("unexpected type"); + } +} + +void CodeGenerator::visitWasmReinterpretToI64(LWasmReinterpretToI64* lir) { + MOZ_ASSERT(lir->mir()->type() == MIRType::Int64); + MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Double); + masm.moveDoubleToGPR64(ToFloatRegister(lir->input()), ToOutRegister64(lir)); +} + +void CodeGenerator::visitWasmReinterpretFromI64(LWasmReinterpretFromI64* lir) { + MOZ_ASSERT(lir->mir()->type() == MIRType::Double); + MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Int64); + masm.moveGPR64ToDouble( + ToRegister64(lir->getInt64Operand(LWasmReinterpretFromI64::Input)), + ToFloatRegister(lir->output())); +} + +void CodeGenerator::visitAtomicTypedArrayElementBinop( + LAtomicTypedArrayElementBinop* lir) { + MOZ_ASSERT(!lir->mir()->isForEffect()); + + AnyRegister output = ToAnyRegister(lir->output()); + Register elements = ToRegister(lir->elements()); + Register flagTemp = ToRegister(lir->temp1()); + Register outTemp = + lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2()); + Register value = ToRegister(lir->value()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + if (lir->index()->isConstant()) { + Address mem = ToAddress(elements, lir->index(), arrayType); + masm.atomicFetchOpJS(arrayType, Synchronization::Full(), + lir->mir()->operation(), value, mem, flagTemp, outTemp, + output); + } else { + BaseIndex mem(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicFetchOpJS(arrayType, Synchronization::Full(), + lir->mir()->operation(), value, mem, flagTemp, outTemp, + output); + } +} + +void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect( + LAtomicTypedArrayElementBinopForEffect* lir) { + MOZ_ASSERT(lir->mir()->isForEffect()); + + Register elements = ToRegister(lir->elements()); + Register flagTemp = ToRegister(lir->flagTemp()); + Register value = ToRegister(lir->value()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + if (lir->index()->isConstant()) { + Address mem = ToAddress(elements, lir->index(), arrayType); + masm.atomicEffectOpJS(arrayType, Synchronization::Full(), + lir->mir()->operation(), value, mem, flagTemp); + } else { + BaseIndex mem(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicEffectOpJS(arrayType, Synchronization::Full(), + lir->mir()->operation(), value, mem, flagTemp); + } +} + +void CodeGenerator::visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + FloatRegister output = ToFloatRegister(lir->output()); + + MIRType outputType = lir->mir()->type(); + MOZ_ASSERT(outputType == MIRType::Double || outputType == MIRType::Float32); + + if (outputType == MIRType::Double) { + if (lir->mir()->isUnsigned()) { + masm.convertUInt64ToDouble(input, output, Register::Invalid()); + } else { + masm.convertInt64ToDouble(input, output); + } + } else { + if (lir->mir()->isUnsigned()) { + masm.convertUInt64ToFloat32(input, output, Register::Invalid()); + } else { + masm.convertInt64ToFloat32(input, output); + } + } +} + +void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) { + Register lhs = ToRegister(lir->lhs()); + Register rhs = ToRegister(lir->rhs()); + Register output = ToRegister(lir->output()); + + Label done; + + // Handle divide by zero. + if (lir->canBeDivideByZero()) { + Label isNotDivByZero; + masm.Cbnz(ARMRegister(rhs, 64), &isNotDivByZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset()); + masm.bind(&isNotDivByZero); + } + + // Handle an integer overflow exception from INT64_MIN / -1. + if (lir->canBeNegativeOverflow()) { + Label noOverflow; + masm.branchPtr(Assembler::NotEqual, lhs, ImmWord(INT64_MIN), &noOverflow); + masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), &noOverflow); + if (lir->mir()->isMod()) { + masm.movePtr(ImmWord(0), output); + } else { + masm.wasmTrap(wasm::Trap::IntegerOverflow, lir->bytecodeOffset()); + } + masm.jump(&done); + masm.bind(&noOverflow); + } + + masm.Sdiv(ARMRegister(output, 64), ARMRegister(lhs, 64), + ARMRegister(rhs, 64)); + if (lir->mir()->isMod()) { + masm.Msub(ARMRegister(output, 64), ARMRegister(output, 64), + ARMRegister(rhs, 64), ARMRegister(lhs, 64)); + } + masm.bind(&done); +} + +void CodeGenerator::visitUDivOrModI64(LUDivOrModI64* lir) { + Register lhs = ToRegister(lir->lhs()); + Register rhs = ToRegister(lir->rhs()); + Register output = ToRegister(lir->output()); + + Label done; + + // Handle divide by zero. + if (lir->canBeDivideByZero()) { + Label isNotDivByZero; + masm.Cbnz(ARMRegister(rhs, 64), &isNotDivByZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset()); + masm.bind(&isNotDivByZero); + } + + masm.Udiv(ARMRegister(output, 64), ARMRegister(lhs, 64), + ARMRegister(rhs, 64)); + if (lir->mir()->isMod()) { + masm.Msub(ARMRegister(output, 64), ARMRegister(output, 64), + ARMRegister(rhs, 64), ARMRegister(lhs, 64)); + } + masm.bind(&done); +} + +void CodeGenerator::visitSimd128(LSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + const LDefinition* out = ins->getDef(0); + masm.loadConstantSimd128(ins->simd128(), ToFloatRegister(out)); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + switch (ins->simdOp()) { + case wasm::SimdOp::V128Bitselect: { + FloatRegister lhs = ToFloatRegister(ins->v0()); + FloatRegister rhs = ToFloatRegister(ins->v1()); + FloatRegister controlDest = ToFloatRegister(ins->v2()); + masm.bitwiseSelectSimd128(lhs, rhs, controlDest); + break; + } + case wasm::SimdOp::F32x4RelaxedFma: + masm.fmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + case wasm::SimdOp::F32x4RelaxedFnma: + masm.fnmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + case wasm::SimdOp::F64x2RelaxedFma: + masm.fmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + case wasm::SimdOp::F64x2RelaxedFnma: + masm.fnmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + case wasm::SimdOp::I8x16RelaxedLaneSelect: + case wasm::SimdOp::I16x8RelaxedLaneSelect: + case wasm::SimdOp::I32x4RelaxedLaneSelect: + case wasm::SimdOp::I64x2RelaxedLaneSelect: { + FloatRegister lhs = ToFloatRegister(ins->v0()); + FloatRegister rhs = ToFloatRegister(ins->v1()); + FloatRegister maskDest = ToFloatRegister(ins->v2()); + masm.laneSelectSimd128(maskDest, lhs, rhs, maskDest); + break; + } + case wasm::SimdOp::I32x4DotI8x16I7x16AddS: + masm.dotInt8x16Int7x16ThenAdd( + ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2()), ToFloatRegister(ins->temp())); + break; + default: + MOZ_CRASH("NYI"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister lhs = ToFloatRegister(ins->lhs()); + FloatRegister rhs = ToFloatRegister(ins->rhs()); + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::V128And: + masm.bitwiseAndSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::V128Or: + masm.bitwiseOrSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::V128Xor: + masm.bitwiseXorSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::V128AndNot: + masm.bitwiseAndNotSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16AvgrU: + masm.unsignedAverageInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8AvgrU: + masm.unsignedAverageInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Add: + masm.addInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16AddSatS: + masm.addSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16AddSatU: + masm.unsignedAddSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Sub: + masm.subInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16SubSatS: + masm.subSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16SubSatU: + masm.unsignedSubSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MinS: + masm.minInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MinU: + masm.unsignedMinInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MaxS: + masm.maxInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MaxU: + masm.unsignedMaxInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Add: + masm.addInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8AddSatS: + masm.addSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8AddSatU: + masm.unsignedAddSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Sub: + masm.subInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8SubSatS: + masm.subSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8SubSatU: + masm.unsignedSubSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Mul: + masm.mulInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MinS: + masm.minInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MinU: + masm.unsignedMinInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MaxS: + masm.maxInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MaxU: + masm.unsignedMaxInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Add: + masm.addInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Sub: + masm.subInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Mul: + masm.mulInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MinS: + masm.minInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MinU: + masm.unsignedMinInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MaxS: + masm.maxInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MaxU: + masm.unsignedMaxInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Add: + masm.addInt64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Sub: + masm.subInt64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Mul: { + auto temp1 = ToFloatRegister(ins->getTemp(0)); + auto temp2 = ToFloatRegister(ins->getTemp(1)); + masm.mulInt64x2(lhs, rhs, dest, temp1, temp2); + break; + } + case wasm::SimdOp::F32x4Add: + masm.addFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Sub: + masm.subFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Mul: + masm.mulFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Div: + masm.divFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Min: + masm.minFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Max: + masm.maxFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Add: + masm.addFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Sub: + masm.subFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Mul: + masm.mulFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Div: + masm.divFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Min: + masm.minFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Max: + masm.maxFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Swizzle: + masm.swizzleInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16RelaxedSwizzle: + masm.swizzleInt8x16Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16NarrowI16x8S: + masm.narrowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16NarrowI16x8U: + masm.unsignedNarrowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8NarrowI32x4S: + masm.narrowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8NarrowI32x4U: + masm.unsignedNarrowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Eq: + masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Ne: + masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LtS: + masm.compareInt8x16(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GtS: + masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LeS: + masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GeS: + masm.compareInt8x16(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LtU: + masm.compareInt8x16(Assembler::Below, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GtU: + masm.compareInt8x16(Assembler::Above, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LeU: + masm.compareInt8x16(Assembler::BelowOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GeU: + masm.compareInt8x16(Assembler::AboveOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Eq: + masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Ne: + masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LtS: + masm.compareInt16x8(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GtS: + masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LeS: + masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GeS: + masm.compareInt16x8(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LtU: + masm.compareInt16x8(Assembler::Below, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GtU: + masm.compareInt16x8(Assembler::Above, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LeU: + masm.compareInt16x8(Assembler::BelowOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GeU: + masm.compareInt16x8(Assembler::AboveOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Eq: + masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Ne: + masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LtS: + masm.compareInt32x4(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GtS: + masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LeS: + masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GeS: + masm.compareInt32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LtU: + masm.compareInt32x4(Assembler::Below, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GtU: + masm.compareInt32x4(Assembler::Above, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LeU: + masm.compareInt32x4(Assembler::BelowOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GeU: + masm.compareInt32x4(Assembler::AboveOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Eq: + masm.compareInt64x2(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2LtS: + masm.compareInt64x2(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2GtS: + masm.compareInt64x2(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2LeS: + masm.compareInt64x2(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2GeS: + masm.compareInt64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Ne: + masm.compareInt64x2(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Eq: + masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Ne: + masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Lt: + masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Gt: + masm.compareFloat32x4(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Le: + masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Ge: + masm.compareFloat32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Eq: + masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Ne: + masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Lt: + masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Gt: + masm.compareFloat64x2(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Le: + masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Ge: + masm.compareFloat64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4PMax: + masm.pseudoMaxFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4PMin: + masm.pseudoMinFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2PMax: + masm.pseudoMaxFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2PMin: + masm.pseudoMinFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4DotI16x8S: + masm.widenDotInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ExtmulLowI8x16S: + masm.extMulLowInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ExtmulHighI8x16S: + masm.extMulHighInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ExtmulLowI8x16U: + masm.unsignedExtMulLowInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ExtmulHighI8x16U: + masm.unsignedExtMulHighInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ExtmulLowI16x8S: + masm.extMulLowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ExtmulHighI16x8S: + masm.extMulHighInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ExtmulLowI16x8U: + masm.unsignedExtMulLowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ExtmulHighI16x8U: + masm.unsignedExtMulHighInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ExtmulLowI32x4S: + masm.extMulLowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ExtmulHighI32x4S: + masm.extMulHighInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ExtmulLowI32x4U: + masm.unsignedExtMulLowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ExtmulHighI32x4U: + masm.unsignedExtMulHighInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Q15MulrSatS: + masm.q15MulrSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4RelaxedMin: + masm.minFloat32x4Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4RelaxedMax: + masm.maxFloat32x4Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2RelaxedMin: + masm.minFloat64x2Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2RelaxedMax: + masm.maxFloat64x2Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8RelaxedQ15MulrS: + masm.q15MulrInt16x8Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8DotI8x16I7x16S: + masm.dotInt8x16Int7x16(lhs, rhs, dest); + break; + default: + MOZ_CRASH("Binary SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmBinarySimd128WithConstant( + LWasmBinarySimd128WithConstant* ins) { + MOZ_CRASH("No SIMD"); +} + +void CodeGenerator::visitWasmVariableShiftSimd128( + LWasmVariableShiftSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Register rhs = ToRegister(ins->rhs()); + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Shl: + masm.leftShiftInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16ShrS: + masm.rightShiftInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16ShrU: + masm.unsignedRightShiftInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Shl: + masm.leftShiftInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ShrS: + masm.rightShiftInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ShrU: + masm.unsignedRightShiftInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Shl: + masm.leftShiftInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ShrS: + masm.rightShiftInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ShrU: + masm.unsignedRightShiftInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Shl: + masm.leftShiftInt64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ShrS: + masm.rightShiftInt64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ShrU: + masm.unsignedRightShiftInt64x2(lhs, rhs, dest); + break; + default: + MOZ_CRASH("Shift SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmConstantShiftSimd128( + LWasmConstantShiftSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + int32_t shift = ins->shift(); + + if (shift == 0) { + if (src != dest) { + masm.moveSimd128(src, dest); + } + return; + } + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Shl: + masm.leftShiftInt8x16(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I8x16ShrS: + masm.rightShiftInt8x16(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I8x16ShrU: + masm.unsignedRightShiftInt8x16(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I16x8Shl: + masm.leftShiftInt16x8(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I16x8ShrS: + masm.rightShiftInt16x8(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I16x8ShrU: + masm.unsignedRightShiftInt16x8(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I32x4Shl: + masm.leftShiftInt32x4(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I32x4ShrS: + masm.rightShiftInt32x4(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I32x4ShrU: + masm.unsignedRightShiftInt32x4(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I64x2Shl: + masm.leftShiftInt64x2(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I64x2ShrS: + masm.rightShiftInt64x2(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I64x2ShrU: + masm.unsignedRightShiftInt64x2(Imm32(shift), src, dest); + break; + default: + MOZ_CRASH("Shift SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmSignReplicationSimd128( + LWasmSignReplicationSimd128* ins) { + MOZ_CRASH("No SIMD"); +} + +void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister lhs = ToFloatRegister(ins->lhs()); + FloatRegister rhs = ToFloatRegister(ins->rhs()); + FloatRegister dest = ToFloatRegister(ins->output()); + MOZ_ASSERT(ins->temp()->isBogusTemp()); + SimdConstant control = ins->control(); + switch (ins->op()) { + case SimdShuffleOp::BLEND_8x16: { + masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), + lhs, rhs, dest); + break; + } + case SimdShuffleOp::BLEND_16x8: { + masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()), + lhs, rhs, dest); + break; + } + case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: { + int8_t count = 16 - control.asInt8x16()[0]; + MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); + masm.concatAndRightShiftSimd128(lhs, rhs, dest, count); + break; + } + case SimdShuffleOp::INTERLEAVE_HIGH_8x16: { + masm.interleaveHighInt8x16(lhs, rhs, dest); + break; + } + case SimdShuffleOp::INTERLEAVE_HIGH_16x8: { + masm.interleaveHighInt16x8(lhs, rhs, dest); + break; + } + case SimdShuffleOp::INTERLEAVE_HIGH_32x4: { + masm.interleaveHighInt32x4(lhs, rhs, dest); + break; + } + case SimdShuffleOp::INTERLEAVE_HIGH_64x2: { + masm.interleaveHighInt64x2(lhs, rhs, dest); + break; + } + case SimdShuffleOp::INTERLEAVE_LOW_8x16: { + masm.interleaveLowInt8x16(lhs, rhs, dest); + break; + } + case SimdShuffleOp::INTERLEAVE_LOW_16x8: { + masm.interleaveLowInt16x8(lhs, rhs, dest); + break; + } + case SimdShuffleOp::INTERLEAVE_LOW_32x4: { + masm.interleaveLowInt32x4(lhs, rhs, dest); + break; + } + case SimdShuffleOp::INTERLEAVE_LOW_64x2: { + masm.interleaveLowInt64x2(lhs, rhs, dest); + break; + } + case SimdShuffleOp::SHUFFLE_BLEND_8x16: { + masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), + lhs, rhs, dest); + break; + } + default: { + MOZ_CRASH("Unsupported SIMD shuffle operation"); + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + SimdConstant control = ins->control(); + switch (ins->op()) { + case SimdPermuteOp::BROADCAST_8x16: { + const SimdConstant::I8x16& mask = control.asInt8x16(); + int8_t source = mask[0]; + masm.splatX16(source, src, dest); + break; + } + case SimdPermuteOp::BROADCAST_16x8: { + const SimdConstant::I16x8& mask = control.asInt16x8(); + int16_t source = mask[0]; + masm.splatX8(source, src, dest); + break; + } + case SimdPermuteOp::MOVE: { + masm.moveSimd128(src, dest); + break; + } + case SimdPermuteOp::PERMUTE_8x16: { + const SimdConstant::I8x16& mask = control.asInt8x16(); +# ifdef DEBUG + mozilla::DebugOnly<int> i; + for (i = 0; i < 16 && mask[i] == i; i++) { + } + MOZ_ASSERT(i < 16, "Should have been a MOVE operation"); +# endif + masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest); + break; + } + case SimdPermuteOp::PERMUTE_16x8: { + const SimdConstant::I16x8& mask = control.asInt16x8(); +# ifdef DEBUG + mozilla::DebugOnly<int> i; + for (i = 0; i < 8 && mask[i] == i; i++) { + } + MOZ_ASSERT(i < 8, "Should have been a MOVE operation"); +# endif + masm.permuteInt16x8(reinterpret_cast<const uint16_t*>(mask), src, dest); + break; + } + case SimdPermuteOp::PERMUTE_32x4: { + const SimdConstant::I32x4& mask = control.asInt32x4(); +# ifdef DEBUG + mozilla::DebugOnly<int> i; + for (i = 0; i < 4 && mask[i] == i; i++) { + } + MOZ_ASSERT(i < 4, "Should have been a MOVE operation"); +# endif + masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest); + break; + } + case SimdPermuteOp::ROTATE_RIGHT_8x16: { + int8_t count = control.asInt8x16()[0]; + MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); + masm.rotateRightSimd128(src, dest, count); + break; + } + case SimdPermuteOp::SHIFT_LEFT_8x16: { + int8_t count = control.asInt8x16()[0]; + MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); + masm.leftShiftSimd128(Imm32(count), src, dest); + break; + } + case SimdPermuteOp::SHIFT_RIGHT_8x16: { + int8_t count = control.asInt8x16()[0]; + MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); + masm.rightShiftSimd128(Imm32(count), src, dest); + break; + } + case SimdPermuteOp::REVERSE_16x8: + masm.reverseInt16x8(src, dest); + break; + case SimdPermuteOp::REVERSE_32x4: + masm.reverseInt32x4(src, dest); + break; + case SimdPermuteOp::REVERSE_64x2: + masm.reverseInt64x2(src, dest); + break; + default: { + MOZ_CRASH("Unsupported SIMD permutation operation"); + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ToFloatRegister(ins->lhs()) == ToFloatRegister(ins->output())); + FloatRegister lhsDest = ToFloatRegister(ins->lhs()); + const LAllocation* rhs = ins->rhs(); + uint32_t laneIndex = ins->laneIndex(); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16ReplaceLane: + masm.replaceLaneInt8x16(laneIndex, ToRegister(rhs), lhsDest); + break; + case wasm::SimdOp::I16x8ReplaceLane: + masm.replaceLaneInt16x8(laneIndex, ToRegister(rhs), lhsDest); + break; + case wasm::SimdOp::I32x4ReplaceLane: + masm.replaceLaneInt32x4(laneIndex, ToRegister(rhs), lhsDest); + break; + case wasm::SimdOp::F32x4ReplaceLane: + masm.replaceLaneFloat32x4(laneIndex, ToFloatRegister(rhs), lhsDest); + break; + case wasm::SimdOp::F64x2ReplaceLane: + masm.replaceLaneFloat64x2(laneIndex, ToFloatRegister(rhs), lhsDest); + break; + default: + MOZ_CRASH("ReplaceLane SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReplaceInt64LaneSimd128( + LWasmReplaceInt64LaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_RELEASE_ASSERT(ins->simdOp() == wasm::SimdOp::I64x2ReplaceLane); + MOZ_ASSERT(ToFloatRegister(ins->lhs()) == ToFloatRegister(ins->output())); + masm.replaceLaneInt64x2(ins->laneIndex(), ToRegister64(ins->rhs()), + ToFloatRegister(ins->lhs())); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Splat: + masm.splatX16(ToRegister(ins->src()), dest); + break; + case wasm::SimdOp::I16x8Splat: + masm.splatX8(ToRegister(ins->src()), dest); + break; + case wasm::SimdOp::I32x4Splat: + masm.splatX4(ToRegister(ins->src()), dest); + break; + case wasm::SimdOp::F32x4Splat: + masm.splatX4(ToFloatRegister(ins->src()), dest); + break; + case wasm::SimdOp::F64x2Splat: + masm.splatX2(ToFloatRegister(ins->src()), dest); + break; + default: + MOZ_CRASH("ScalarToSimd128 SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + Register64 src = ToRegister64(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::I64x2Splat: + masm.splatX2(src, dest); + break; + case wasm::SimdOp::V128Load8x8S: + masm.moveGPR64ToDouble(src, dest); + masm.widenLowInt8x16(dest, dest); + break; + case wasm::SimdOp::V128Load8x8U: + masm.moveGPR64ToDouble(src, dest); + masm.unsignedWidenLowInt8x16(dest, dest); + break; + case wasm::SimdOp::V128Load16x4S: + masm.moveGPR64ToDouble(src, dest); + masm.widenLowInt16x8(dest, dest); + break; + case wasm::SimdOp::V128Load16x4U: + masm.moveGPR64ToDouble(src, dest); + masm.unsignedWidenLowInt16x8(dest, dest); + break; + case wasm::SimdOp::V128Load32x2S: + masm.moveGPR64ToDouble(src, dest); + masm.widenLowInt32x4(dest, dest); + break; + case wasm::SimdOp::V128Load32x2U: + masm.moveGPR64ToDouble(src, dest); + masm.unsignedWidenLowInt32x4(dest, dest); + break; + default: + MOZ_CRASH("Int64ToSimd128 SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Neg: + masm.negInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8Neg: + masm.negInt16x8(src, dest); + break; + case wasm::SimdOp::I16x8ExtendLowI8x16S: + masm.widenLowInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ExtendHighI8x16S: + masm.widenHighInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ExtendLowI8x16U: + masm.unsignedWidenLowInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ExtendHighI8x16U: + masm.unsignedWidenHighInt8x16(src, dest); + break; + case wasm::SimdOp::I32x4Neg: + masm.negInt32x4(src, dest); + break; + case wasm::SimdOp::I32x4ExtendLowI16x8S: + masm.widenLowInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ExtendHighI16x8S: + masm.widenHighInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ExtendLowI16x8U: + masm.unsignedWidenLowInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ExtendHighI16x8U: + masm.unsignedWidenHighInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4TruncSatF32x4S: + masm.truncSatFloat32x4ToInt32x4(src, dest); + break; + case wasm::SimdOp::I32x4TruncSatF32x4U: + masm.unsignedTruncSatFloat32x4ToInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2Neg: + masm.negInt64x2(src, dest); + break; + case wasm::SimdOp::I64x2ExtendLowI32x4S: + masm.widenLowInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2ExtendHighI32x4S: + masm.widenHighInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2ExtendLowI32x4U: + masm.unsignedWidenLowInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2ExtendHighI32x4U: + masm.unsignedWidenHighInt32x4(src, dest); + break; + case wasm::SimdOp::F32x4Abs: + masm.absFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Neg: + masm.negFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Sqrt: + masm.sqrtFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4ConvertI32x4S: + masm.convertInt32x4ToFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4ConvertI32x4U: + masm.unsignedConvertInt32x4ToFloat32x4(src, dest); + break; + case wasm::SimdOp::F64x2Abs: + masm.absFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Neg: + masm.negFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Sqrt: + masm.sqrtFloat64x2(src, dest); + break; + case wasm::SimdOp::V128Not: + masm.bitwiseNotSimd128(src, dest); + break; + case wasm::SimdOp::I8x16Abs: + masm.absInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8Abs: + masm.absInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4Abs: + masm.absInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2Abs: + masm.absInt64x2(src, dest); + break; + case wasm::SimdOp::F32x4Ceil: + masm.ceilFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Floor: + masm.floorFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Trunc: + masm.truncFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Nearest: + masm.nearestFloat32x4(src, dest); + break; + case wasm::SimdOp::F64x2Ceil: + masm.ceilFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Floor: + masm.floorFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Trunc: + masm.truncFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Nearest: + masm.nearestFloat64x2(src, dest); + break; + case wasm::SimdOp::F32x4DemoteF64x2Zero: + masm.convertFloat64x2ToFloat32x4(src, dest); + break; + case wasm::SimdOp::F64x2PromoteLowF32x4: + masm.convertFloat32x4ToFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2ConvertLowI32x4S: + masm.convertInt32x4ToFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2ConvertLowI32x4U: + masm.unsignedConvertInt32x4ToFloat64x2(src, dest); + break; + case wasm::SimdOp::I32x4TruncSatF64x2SZero: + masm.truncSatFloat64x2ToInt32x4(src, dest, ToFloatRegister(ins->temp())); + break; + case wasm::SimdOp::I32x4TruncSatF64x2UZero: + masm.unsignedTruncSatFloat64x2ToInt32x4(src, dest, + ToFloatRegister(ins->temp())); + break; + case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S: + masm.extAddPairwiseInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U: + masm.unsignedExtAddPairwiseInt8x16(src, dest); + break; + case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S: + masm.extAddPairwiseInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U: + masm.unsignedExtAddPairwiseInt16x8(src, dest); + break; + case wasm::SimdOp::I8x16Popcnt: + masm.popcntInt8x16(src, dest); + break; + case wasm::SimdOp::I32x4RelaxedTruncF32x4S: + masm.truncFloat32x4ToInt32x4Relaxed(src, dest); + break; + case wasm::SimdOp::I32x4RelaxedTruncF32x4U: + masm.unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest); + break; + case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero: + masm.truncFloat64x2ToInt32x4Relaxed(src, dest); + break; + case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero: + masm.unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest); + break; + default: + MOZ_CRASH("Unary SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + const LDefinition* dest = ins->output(); + uint32_t imm = ins->imm(); + FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->getTemp(0)); + + switch (ins->simdOp()) { + case wasm::SimdOp::V128AnyTrue: + masm.anyTrueSimd128(src, ToRegister(dest)); + break; + case wasm::SimdOp::I8x16AllTrue: + masm.allTrueInt8x16(src, ToRegister(dest)); + break; + case wasm::SimdOp::I16x8AllTrue: + masm.allTrueInt16x8(src, ToRegister(dest)); + break; + case wasm::SimdOp::I32x4AllTrue: + masm.allTrueInt32x4(src, ToRegister(dest)); + break; + case wasm::SimdOp::I64x2AllTrue: + masm.allTrueInt64x2(src, ToRegister(dest)); + break; + case wasm::SimdOp::I8x16Bitmask: + masm.bitmaskInt8x16(src, ToRegister(dest), temp); + break; + case wasm::SimdOp::I16x8Bitmask: + masm.bitmaskInt16x8(src, ToRegister(dest), temp); + break; + case wasm::SimdOp::I32x4Bitmask: + masm.bitmaskInt32x4(src, ToRegister(dest), temp); + break; + case wasm::SimdOp::I64x2Bitmask: + masm.bitmaskInt64x2(src, ToRegister(dest), temp); + break; + case wasm::SimdOp::I8x16ExtractLaneS: + masm.extractLaneInt8x16(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::I8x16ExtractLaneU: + masm.unsignedExtractLaneInt8x16(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::I16x8ExtractLaneS: + masm.extractLaneInt16x8(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::I16x8ExtractLaneU: + masm.unsignedExtractLaneInt16x8(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::I32x4ExtractLane: + masm.extractLaneInt32x4(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::F32x4ExtractLane: + masm.extractLaneFloat32x4(imm, src, ToFloatRegister(dest)); + break; + case wasm::SimdOp::F64x2ExtractLane: + masm.extractLaneFloat64x2(imm, src, ToFloatRegister(dest)); + break; + default: + MOZ_CRASH("Reduce SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReduceAndBranchSimd128( + LWasmReduceAndBranchSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + + ScratchSimd128Scope scratch(masm); + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const Register test = temps.AcquireX().asUnsized(); + + switch (ins->simdOp()) { + case wasm::SimdOp::V128AnyTrue: + masm.Addp(Simd1D(scratch), Simd2D(src)); + masm.Umov(ARMRegister(test, 64), Simd1D(scratch), 0); + masm.branch64(Assembler::Equal, Register64(test), Imm64(0), + getJumpLabelForBranch(ins->ifFalse())); + jumpToBlock(ins->ifTrue()); + break; + case wasm::SimdOp::I8x16AllTrue: + case wasm::SimdOp::I16x8AllTrue: + case wasm::SimdOp::I32x4AllTrue: + case wasm::SimdOp::I64x2AllTrue: { + // Compare all lanes to zero. + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16AllTrue: + masm.Cmeq(Simd16B(scratch), Simd16B(src), 0); + break; + case wasm::SimdOp::I16x8AllTrue: + masm.Cmeq(Simd8H(scratch), Simd8H(src), 0); + break; + case wasm::SimdOp::I32x4AllTrue: + masm.Cmeq(Simd4S(scratch), Simd4S(src), 0); + break; + case wasm::SimdOp::I64x2AllTrue: + masm.Cmeq(Simd2D(scratch), Simd2D(src), 0); + break; + default: + MOZ_CRASH(); + } + masm.Addp(Simd1D(scratch), Simd2D(scratch)); + masm.Umov(ARMRegister(test, 64), Simd1D(scratch), 0); + masm.branch64(Assembler::NotEqual, Register64(test), Imm64(0), + getJumpLabelForBranch(ins->ifFalse())); + jumpToBlock(ins->ifTrue()); + break; + } + default: + MOZ_CRASH("Reduce-and-branch SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReduceSimd128ToInt64( + LWasmReduceSimd128ToInt64* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + Register64 dest = ToOutRegister64(ins); + uint32_t imm = ins->imm(); + + switch (ins->simdOp()) { + case wasm::SimdOp::I64x2ExtractLane: + masm.extractLaneInt64x2(imm, src, dest); + break; + default: + MOZ_CRASH("Reduce SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +static inline wasm::MemoryAccessDesc DeriveMemoryAccessDesc( + const wasm::MemoryAccessDesc& access, Scalar::Type type) { + return wasm::MemoryAccessDesc(type, access.align(), access.offset(), + access.trapOffset()); +} + +void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + // Forward loading to wasmLoad, and use replaceLane after that. + const MWasmLoadLaneSimd128* mir = ins->mir(); + Register temp = ToRegister(ins->temp()); + FloatRegister src = ToFloatRegister(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + // replaceLane takes an lhsDest argument. + masm.moveSimd128(src, dest); + switch (ins->laneSize()) { + case 1: { + masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int8), + HeapReg, ToRegister(ins->ptr()), AnyRegister(temp)); + masm.replaceLaneInt8x16(ins->laneIndex(), temp, dest); + break; + } + case 2: { + masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int16), + HeapReg, ToRegister(ins->ptr()), AnyRegister(temp)); + masm.replaceLaneInt16x8(ins->laneIndex(), temp, dest); + break; + } + case 4: { + masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int32), + HeapReg, ToRegister(ins->ptr()), AnyRegister(temp)); + masm.replaceLaneInt32x4(ins->laneIndex(), temp, dest); + break; + } + case 8: { + masm.wasmLoadI64(DeriveMemoryAccessDesc(mir->access(), Scalar::Int64), + HeapReg, ToRegister(ins->ptr()), Register64(temp)); + masm.replaceLaneInt64x2(ins->laneIndex(), Register64(temp), dest); + break; + } + default: + MOZ_CRASH("Unsupported load lane size"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + // Forward storing to wasmStore for the result of extractLane. + const MWasmStoreLaneSimd128* mir = ins->mir(); + Register temp = ToRegister(ins->temp()); + FloatRegister src = ToFloatRegister(ins->src()); + switch (ins->laneSize()) { + case 1: { + masm.extractLaneInt8x16(ins->laneIndex(), src, temp); + masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int8), + AnyRegister(temp), HeapReg, ToRegister(ins->ptr())); + break; + } + case 2: { + masm.extractLaneInt16x8(ins->laneIndex(), src, temp); + masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int16), + AnyRegister(temp), HeapReg, ToRegister(ins->ptr())); + break; + } + case 4: { + masm.extractLaneInt32x4(ins->laneIndex(), src, temp); + masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int32), + AnyRegister(temp), HeapReg, ToRegister(ins->ptr())); + break; + } + case 8: { + masm.extractLaneInt64x2(ins->laneIndex(), src, Register64(temp)); + masm.wasmStoreI64(DeriveMemoryAccessDesc(mir->access(), Scalar::Int64), + Register64(temp), HeapReg, ToRegister(ins->ptr())); + break; + } + default: + MOZ_CRASH("Unsupported store lane size"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} diff --git a/js/src/jit/arm64/CodeGenerator-arm64.h b/js/src/jit/arm64/CodeGenerator-arm64.h new file mode 100644 index 0000000000..43cd24fddf --- /dev/null +++ b/js/src/jit/arm64/CodeGenerator-arm64.h @@ -0,0 +1,135 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_CodeGenerator_arm64_h +#define jit_arm64_CodeGenerator_arm64_h + +#include "jit/arm64/Assembler-arm64.h" +#include "jit/shared/CodeGenerator-shared.h" + +namespace js { +namespace jit { + +class CodeGeneratorARM64; +class OutOfLineBailout; +class OutOfLineTableSwitch; + +using OutOfLineWasmTruncateCheck = + OutOfLineWasmTruncateCheckBase<CodeGeneratorARM64>; + +class CodeGeneratorARM64 : public CodeGeneratorShared { + friend class MoveResolverARM64; + + protected: + CodeGeneratorARM64(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm); + + NonAssertingLabel deoptLabel_; + + MoveOperand toMoveOperand(const LAllocation a) const; + + void bailoutIf(Assembler::Condition condition, LSnapshot* snapshot); + void bailoutFrom(Label* label, LSnapshot* snapshot); + void bailout(LSnapshot* snapshot); + + template <typename T1, typename T2> + void bailoutCmpPtr(Assembler::Condition c, T1 lhs, T2 rhs, + LSnapshot* snapshot) { + masm.cmpPtr(lhs, rhs); + return bailoutIf(c, snapshot); + } + void bailoutTestPtr(Assembler::Condition c, Register lhs, Register rhs, + LSnapshot* snapshot) { + masm.testPtr(lhs, rhs); + return bailoutIf(c, snapshot); + } + template <typename T1, typename T2> + void bailoutCmp32(Assembler::Condition c, T1 lhs, T2 rhs, + LSnapshot* snapshot) { + masm.cmp32(lhs, rhs); + return bailoutIf(c, snapshot); + } + template <typename T1, typename T2> + void bailoutTest32(Assembler::Condition c, T1 lhs, T2 rhs, + LSnapshot* snapshot) { + masm.test32(lhs, rhs); + return bailoutIf(c, snapshot); + } + void bailoutIfFalseBool(Register reg, LSnapshot* snapshot) { + masm.test32(reg, Imm32(0xFF)); + return bailoutIf(Assembler::Zero, snapshot); + } + + bool generateOutOfLineCode(); + + // Emits a branch that directs control flow to the true block if |cond| is + // true, and the false block if |cond| is false. + void emitBranch(Assembler::Condition cond, MBasicBlock* ifTrue, + MBasicBlock* ifFalse); + + void testNullEmitBranch(Assembler::Condition cond, const ValueOperand& value, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) { + cond = masm.testNull(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + void testUndefinedEmitBranch(Assembler::Condition cond, + const ValueOperand& value, MBasicBlock* ifTrue, + MBasicBlock* ifFalse) { + cond = masm.testUndefined(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + void testObjectEmitBranch(Assembler::Condition cond, + const ValueOperand& value, MBasicBlock* ifTrue, + MBasicBlock* ifFalse) { + cond = masm.testObject(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + void testZeroEmitBranch(Assembler::Condition cond, Register reg, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + masm.cmpPtr(reg, ImmWord(0)); + emitBranch(cond, ifTrue, ifFalse); + } + + void emitTableSwitchDispatch(MTableSwitch* mir, Register index, + Register base); + + void emitBigIntDiv(LBigIntDiv* ins, Register dividend, Register divisor, + Register output, Label* fail); + void emitBigIntMod(LBigIntMod* ins, Register dividend, Register divisor, + Register output, Label* fail); + void emitSimpleBinaryI64( + LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* lir, JSOp op); + + ValueOperand ToValue(LInstruction* ins, size_t pos); + ValueOperand ToTempValue(LInstruction* ins, size_t pos); + + void generateInvalidateEpilogue(); + + public: + void visitOutOfLineBailout(OutOfLineBailout* ool); + void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool); + void visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool); +}; + +typedef CodeGeneratorARM64 CodeGeneratorSpecific; + +// An out-of-line bailout thunk. +class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM64> { + protected: // Silence Clang warning. + LSnapshot* snapshot_; + + public: + explicit OutOfLineBailout(LSnapshot* snapshot) : snapshot_(snapshot) {} + + void accept(CodeGeneratorARM64* codegen) override; + + LSnapshot* snapshot() const { return snapshot_; } +}; + +} // namespace jit +} // namespace js + +#endif /* jit_arm64_CodeGenerator_arm64_h */ diff --git a/js/src/jit/arm64/LIR-arm64.h b/js/src/jit/arm64/LIR-arm64.h new file mode 100644 index 0000000000..d825209b1e --- /dev/null +++ b/js/src/jit/arm64/LIR-arm64.h @@ -0,0 +1,373 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_LIR_arm64_h +#define jit_arm64_LIR_arm64_h + +namespace js { +namespace jit { + +class LUnboxBase : public LInstructionHelper<1, 1, 0> { + public: + LUnboxBase(LNode::Opcode opcode, const LAllocation& input) + : LInstructionHelper(opcode) { + setOperand(0, input); + } + + static const size_t Input = 0; + + MUnbox* mir() const { return mir_->toUnbox(); } +}; + +class LUnbox : public LUnboxBase { + public: + LIR_HEADER(Unbox); + + explicit LUnbox(const LAllocation& input) : LUnboxBase(classOpcode, input) {} + + const char* extraName() const { return StringFromMIRType(mir()->type()); } +}; + +class LUnboxFloatingPoint : public LUnboxBase { + MIRType type_; + + public: + LIR_HEADER(UnboxFloatingPoint); + + LUnboxFloatingPoint(const LAllocation& input, MIRType type) + : LUnboxBase(classOpcode, input), type_(type) {} + + MIRType type() const { return type_; } + const char* extraName() const { return StringFromMIRType(type_); } +}; + +// Convert a 32-bit unsigned integer to a double. +class LWasmUint32ToDouble : public LInstructionHelper<1, 1, 0> { + public: + LIR_HEADER(WasmUint32ToDouble) + + explicit LWasmUint32ToDouble(const LAllocation& input) + : LInstructionHelper(classOpcode) { + setOperand(0, input); + } +}; + +// Convert a 32-bit unsigned integer to a float32. +class LWasmUint32ToFloat32 : public LInstructionHelper<1, 1, 0> { + public: + LIR_HEADER(WasmUint32ToFloat32) + + explicit LWasmUint32ToFloat32(const LAllocation& input) + : LInstructionHelper(classOpcode) { + setOperand(0, input); + } +}; + +class LDivI : public LBinaryMath<1> { + public: + LIR_HEADER(DivI); + + LDivI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + setTemp(0, temp); + } + + MDiv* mir() const { return mir_->toDiv(); } +}; + +class LDivPowTwoI : public LInstructionHelper<1, 1, 0> { + const int32_t shift_; + const bool negativeDivisor_; + + public: + LIR_HEADER(DivPowTwoI) + + LDivPowTwoI(const LAllocation& lhs, int32_t shift, bool negativeDivisor) + : LInstructionHelper(classOpcode), + shift_(shift), + negativeDivisor_(negativeDivisor) { + setOperand(0, lhs); + } + + const LAllocation* numerator() { return getOperand(0); } + + int32_t shift() { return shift_; } + bool negativeDivisor() { return negativeDivisor_; } + + MDiv* mir() const { return mir_->toDiv(); } +}; + +class LDivConstantI : public LInstructionHelper<1, 1, 1> { + const int32_t denominator_; + + public: + LIR_HEADER(DivConstantI) + + LDivConstantI(const LAllocation& lhs, int32_t denominator, + const LDefinition& temp) + : LInstructionHelper(classOpcode), denominator_(denominator) { + setOperand(0, lhs); + setTemp(0, temp); + } + + const LAllocation* numerator() { return getOperand(0); } + const LDefinition* temp() { return getTemp(0); } + int32_t denominator() const { return denominator_; } + MDiv* mir() const { return mir_->toDiv(); } + bool canBeNegativeDividend() const { return mir()->canBeNegativeDividend(); } +}; + +class LUDivConstantI : public LInstructionHelper<1, 1, 1> { + const int32_t denominator_; + + public: + LIR_HEADER(UDivConstantI) + + LUDivConstantI(const LAllocation& lhs, int32_t denominator, + const LDefinition& temp) + : LInstructionHelper(classOpcode), denominator_(denominator) { + setOperand(0, lhs); + setTemp(0, temp); + } + + const LAllocation* numerator() { return getOperand(0); } + const LDefinition* temp() { return getTemp(0); } + int32_t denominator() const { return denominator_; } + MDiv* mir() const { return mir_->toDiv(); } +}; + +class LModI : public LBinaryMath<0> { + public: + LIR_HEADER(ModI); + + LModI(const LAllocation& lhs, const LAllocation& rhs) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + } + + MMod* mir() const { return mir_->toMod(); } +}; + +class LModPowTwoI : public LInstructionHelper<1, 1, 0> { + const int32_t shift_; + + public: + LIR_HEADER(ModPowTwoI); + int32_t shift() { return shift_; } + + LModPowTwoI(const LAllocation& lhs, int32_t shift) + : LInstructionHelper(classOpcode), shift_(shift) { + setOperand(0, lhs); + } + + MMod* mir() const { return mir_->toMod(); } +}; + +class LModMaskI : public LInstructionHelper<1, 1, 2> { + const int32_t shift_; + + public: + LIR_HEADER(ModMaskI); + + LModMaskI(const LAllocation& lhs, const LDefinition& temp1, + const LDefinition& temp2, int32_t shift) + : LInstructionHelper(classOpcode), shift_(shift) { + setOperand(0, lhs); + setTemp(0, temp1); + setTemp(1, temp2); + } + + int32_t shift() const { return shift_; } + + MMod* mir() const { return mir_->toMod(); } +}; + +// Takes a tableswitch with an integer to decide +class LTableSwitch : public LInstructionHelper<0, 1, 2> { + public: + LIR_HEADER(TableSwitch); + + LTableSwitch(const LAllocation& in, const LDefinition& inputCopy, + const LDefinition& jumpTablePointer, MTableSwitch* ins) + : LInstructionHelper(classOpcode) { + setOperand(0, in); + setTemp(0, inputCopy); + setTemp(1, jumpTablePointer); + setMir(ins); + } + + MTableSwitch* mir() const { return mir_->toTableSwitch(); } + + const LAllocation* index() { return getOperand(0); } + const LDefinition* tempInt() { return getTemp(0); } + // This is added to share the same CodeGenerator prefixes. + const LDefinition* tempPointer() { return getTemp(1); } +}; + +// Takes a tableswitch with an integer to decide +class LTableSwitchV : public LInstructionHelper<0, BOX_PIECES, 3> { + public: + LIR_HEADER(TableSwitchV); + + LTableSwitchV(const LBoxAllocation& input, const LDefinition& inputCopy, + const LDefinition& floatCopy, + const LDefinition& jumpTablePointer, MTableSwitch* ins) + : LInstructionHelper(classOpcode) { + setBoxOperand(InputValue, input); + setTemp(0, inputCopy); + setTemp(1, floatCopy); + setTemp(2, jumpTablePointer); + setMir(ins); + } + + MTableSwitch* mir() const { return mir_->toTableSwitch(); } + + static const size_t InputValue = 0; + + const LDefinition* tempInt() { return getTemp(0); } + const LDefinition* tempFloat() { return getTemp(1); } + const LDefinition* tempPointer() { return getTemp(2); } +}; + +class LMulI : public LBinaryMath<0> { + public: + LIR_HEADER(MulI); + + LMulI() : LBinaryMath(classOpcode) {} + + MMul* mir() { return mir_->toMul(); } +}; + +class LUDiv : public LBinaryMath<1> { + public: + LIR_HEADER(UDiv); + + LUDiv(const LAllocation& lhs, const LAllocation& rhs, + const LDefinition& remainder) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + setTemp(0, remainder); + } + + const LDefinition* remainder() { return getTemp(0); } + + MDiv* mir() { return mir_->toDiv(); } +}; + +class LUMod : public LBinaryMath<0> { + public: + LIR_HEADER(UMod); + + LUMod(const LAllocation& lhs, const LAllocation& rhs) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + } + + MMod* mir() { return mir_->toMod(); } +}; + +class LInt64ToFloatingPoint : public LInstructionHelper<1, 1, 0> { + public: + LIR_HEADER(Int64ToFloatingPoint); + + explicit LInt64ToFloatingPoint(const LInt64Allocation& in) + : LInstructionHelper(classOpcode) { + setInt64Operand(0, in); + } + + MInt64ToFloatingPoint* mir() const { return mir_->toInt64ToFloatingPoint(); } +}; + +class LWasmTruncateToInt64 : public LInstructionHelper<1, 1, 0> { + public: + LIR_HEADER(WasmTruncateToInt64); + + explicit LWasmTruncateToInt64(const LAllocation& in) + : LInstructionHelper(classOpcode) { + setOperand(0, in); + } + + MWasmTruncateToInt64* mir() const { return mir_->toWasmTruncateToInt64(); } +}; + +class LDivOrModI64 : public LBinaryMath<0> { + public: + LIR_HEADER(DivOrModI64) + + LDivOrModI64(const LAllocation& lhs, const LAllocation& rhs) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + } + + MBinaryArithInstruction* mir() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + return static_cast<MBinaryArithInstruction*>(mir_); + } + + bool canBeDivideByZero() const { + if (mir_->isMod()) { + return mir_->toMod()->canBeDivideByZero(); + } + return mir_->toDiv()->canBeDivideByZero(); + } + bool canBeNegativeOverflow() const { + if (mir_->isMod()) { + return mir_->toMod()->canBeNegativeDividend(); + } + return mir_->toDiv()->canBeNegativeOverflow(); + } + wasm::BytecodeOffset bytecodeOffset() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + if (mir_->isMod()) { + return mir_->toMod()->bytecodeOffset(); + } + return mir_->toDiv()->bytecodeOffset(); + } +}; + +class LUDivOrModI64 : public LBinaryMath<0> { + public: + LIR_HEADER(UDivOrModI64); + + LUDivOrModI64(const LAllocation& lhs, const LAllocation& rhs) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + } + + const char* extraName() const { + return mir()->isTruncated() ? "Truncated" : nullptr; + } + + MBinaryArithInstruction* mir() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + return static_cast<MBinaryArithInstruction*>(mir_); + } + bool canBeDivideByZero() const { + if (mir_->isMod()) { + return mir_->toMod()->canBeDivideByZero(); + } + return mir_->toDiv()->canBeDivideByZero(); + } + wasm::BytecodeOffset bytecodeOffset() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + if (mir_->isMod()) { + return mir_->toMod()->bytecodeOffset(); + } + return mir_->toDiv()->bytecodeOffset(); + } +}; + +} // namespace jit +} // namespace js + +#endif /* jit_arm64_LIR_arm64_h */ diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp new file mode 100644 index 0000000000..d71f22089d --- /dev/null +++ b/js/src/jit/arm64/Lowering-arm64.cpp @@ -0,0 +1,1438 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/arm64/Lowering-arm64.h" + +#include "mozilla/MathAlgorithms.h" + +#include "jit/arm64/Assembler-arm64.h" +#include "jit/Lowering.h" +#include "jit/MIR.h" +#include "jit/shared/Lowering-shared-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::FloorLog2; + +LBoxAllocation LIRGeneratorARM64::useBoxFixed(MDefinition* mir, Register reg1, + Register, bool useAtStart) { + MOZ_ASSERT(mir->type() == MIRType::Value); + + ensureDefined(mir); + return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart)); +} + +LAllocation LIRGeneratorARM64::useByteOpRegister(MDefinition* mir) { + return useRegister(mir); +} + +LAllocation LIRGeneratorARM64::useByteOpRegisterAtStart(MDefinition* mir) { + return useRegisterAtStart(mir); +} + +LAllocation LIRGeneratorARM64::useByteOpRegisterOrNonDoubleConstant( + MDefinition* mir) { + return useRegisterOrNonDoubleConstant(mir); +} + +LDefinition LIRGeneratorARM64::tempByteOpRegister() { return temp(); } + +LDefinition LIRGeneratorARM64::tempToUnbox() { return temp(); } + +void LIRGenerator::visitBox(MBox* box) { + MDefinition* opd = box->getOperand(0); + + // If the operand is a constant, emit near its uses. + if (opd->isConstant() && box->canEmitAtUses()) { + emitAtUses(box); + return; + } + + if (opd->isConstant()) { + define(new (alloc()) LValue(opd->toConstant()->toJSValue()), box, + LDefinition(LDefinition::BOX)); + } else { + LBox* ins = new (alloc()) LBox(useRegister(opd), opd->type()); + define(ins, box, LDefinition(LDefinition::BOX)); + } +} + +void LIRGenerator::visitUnbox(MUnbox* unbox) { + MDefinition* box = unbox->getOperand(0); + MOZ_ASSERT(box->type() == MIRType::Value); + + LUnboxBase* lir; + if (IsFloatingPointType(unbox->type())) { + lir = new (alloc()) + LUnboxFloatingPoint(useRegisterAtStart(box), unbox->type()); + } else if (unbox->fallible()) { + // If the unbox is fallible, load the Value in a register first to + // avoid multiple loads. + lir = new (alloc()) LUnbox(useRegisterAtStart(box)); + } else { + // FIXME: It should be possible to useAtStart() here, but the DEBUG + // code in CodeGenerator::visitUnbox() needs to handle non-Register + // cases. ARM64 doesn't have an Operand type. + lir = new (alloc()) LUnbox(useRegisterAtStart(box)); + } + + if (unbox->fallible()) { + assignSnapshot(lir, unbox->bailoutKind()); + } + + define(lir, unbox); +} + +void LIRGenerator::visitReturnImpl(MDefinition* opd, bool isGenerator) { + MOZ_ASSERT(opd->type() == MIRType::Value); + + LReturn* ins = new (alloc()) LReturn(isGenerator); + ins->setOperand(0, useFixed(opd, JSReturnReg)); + add(ins); +} + +// x = !y +void LIRGeneratorARM64::lowerForALU(LInstructionHelper<1, 1, 0>* ins, + MDefinition* mir, MDefinition* input) { + ins->setOperand( + 0, ins->snapshot() ? useRegister(input) : useRegisterAtStart(input)); + define( + ins, mir, + LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER)); +} + +// z = x+y +void LIRGeneratorARM64::lowerForALU(LInstructionHelper<1, 2, 0>* ins, + MDefinition* mir, MDefinition* lhs, + MDefinition* rhs) { + ins->setOperand(0, + ins->snapshot() ? useRegister(lhs) : useRegisterAtStart(lhs)); + ins->setOperand(1, ins->snapshot() ? useRegisterOrConstant(rhs) + : useRegisterOrConstantAtStart(rhs)); + define( + ins, mir, + LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER)); +} + +void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 1, 0>* ins, + MDefinition* mir, MDefinition* input) { + ins->setOperand(0, useRegisterAtStart(input)); + define( + ins, mir, + LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER)); +} + +template <size_t Temps> +void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, + MDefinition* mir, MDefinition* lhs, + MDefinition* rhs) { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, useRegisterAtStart(rhs)); + define( + ins, mir, + LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER)); +} + +template void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, + MDefinition* mir, MDefinition* lhs, + MDefinition* rhs); +template void LIRGeneratorARM64::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, + MDefinition* mir, MDefinition* lhs, + MDefinition* rhs); + +void LIRGeneratorARM64::lowerForALUInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, MDefinition* mir, + MDefinition* input) { + ins->setInt64Operand(0, useInt64RegisterAtStart(input)); + defineInt64(ins, mir); +} + +// These all currently have codegen that depends on reuse but only because the +// masm API depends on that. We need new three-address masm APIs, for both +// constant and variable rhs. +// +// MAdd => LAddI64 +// MSub => LSubI64 +// MBitAnd, MBitOr, MBitXor => LBitOpI64 +void LIRGeneratorARM64::lowerForALUInt64( + LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs) { + ins->setInt64Operand(0, useInt64RegisterAtStart(lhs)); + ins->setInt64Operand(INT64_PIECES, useInt64RegisterOrConstantAtStart(rhs)); + defineInt64(ins, mir); +} + +void LIRGeneratorARM64::lowerForMulInt64(LMulI64* ins, MMul* mir, + MDefinition* lhs, MDefinition* rhs) { + ins->setInt64Operand(LMulI64::Lhs, useInt64RegisterAtStart(lhs)); + ins->setInt64Operand(LMulI64::Rhs, useInt64RegisterOrConstantAtStart(rhs)); + defineInt64(ins, mir); +} + +template <size_t Temps> +void LIRGeneratorARM64::lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs) { + ins->setInt64Operand(0, useInt64RegisterAtStart(lhs)); + + static_assert(LShiftI64::Rhs == INT64_PIECES, + "Assume Rhs is located at INT64_PIECES."); + static_assert(LRotateI64::Count == INT64_PIECES, + "Assume Count is located at INT64_PIECES."); + + ins->setOperand(INT64_PIECES, useRegisterOrConstantAtStart(rhs)); + defineInt64(ins, mir); +} + +template void LIRGeneratorARM64::lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); +template void LIRGeneratorARM64::lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); + +void LIRGeneratorARM64::lowerForCompareI64AndBranch(MTest* mir, MCompare* comp, + JSOp op, MDefinition* left, + MDefinition* right, + MBasicBlock* ifTrue, + MBasicBlock* ifFalse) { + auto* lir = new (alloc()) + LCompareI64AndBranch(comp, op, useInt64Register(left), + useInt64RegisterOrConstant(right), ifTrue, ifFalse); + add(lir, mir); +} + +void LIRGeneratorARM64::lowerForBitAndAndBranch(LBitAndAndBranch* baab, + MInstruction* mir, + MDefinition* lhs, + MDefinition* rhs) { + baab->setOperand(0, useRegisterAtStart(lhs)); + baab->setOperand(1, useRegisterOrConstantAtStart(rhs)); + add(baab, mir); +} + +void LIRGeneratorARM64::lowerWasmBuiltinTruncateToInt32( + MWasmBuiltinTruncateToInt32* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32); + + if (opd->type() == MIRType::Double) { + define(new (alloc()) LWasmBuiltinTruncateDToInt32( + useRegister(opd), useFixed(ins->instance(), InstanceReg), + LDefinition::BogusTemp()), + ins); + return; + } + + define(new (alloc()) LWasmBuiltinTruncateFToInt32( + useRegister(opd), useFixed(ins->instance(), InstanceReg), + LDefinition::BogusTemp()), + ins); +} + +void LIRGeneratorARM64::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, + LBlock* block, size_t lirIndex) { + lowerTypedPhiInput(phi, inputPosition, block, lirIndex); +} + +void LIRGeneratorARM64::lowerForShift(LInstructionHelper<1, 2, 0>* ins, + MDefinition* mir, MDefinition* lhs, + MDefinition* rhs) { + ins->setOperand(0, useRegister(lhs)); + ins->setOperand(1, useRegisterOrConstant(rhs)); + define(ins, mir); +} + +void LIRGeneratorARM64::lowerDivI(MDiv* div) { + if (div->isUnsigned()) { + lowerUDiv(div); + return; + } + + if (div->rhs()->isConstant()) { + LAllocation lhs = useRegister(div->lhs()); + int32_t rhs = div->rhs()->toConstant()->toInt32(); + int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs)); + + if (rhs != 0 && uint32_t(1) << shift == mozilla::Abs(rhs)) { + LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, shift, rhs < 0); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + define(lir, div); + return; + } + if (rhs != 0) { + LDivConstantI* lir = new (alloc()) LDivConstantI(lhs, rhs, temp()); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + define(lir, div); + return; + } + } + + LDivI* lir = new (alloc()) + LDivI(useRegister(div->lhs()), useRegister(div->rhs()), temp()); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + define(lir, div); +} + +void LIRGeneratorARM64::lowerNegI(MInstruction* ins, MDefinition* input) { + define(new (alloc()) LNegI(useRegisterAtStart(input)), ins); +} + +void LIRGeneratorARM64::lowerNegI64(MInstruction* ins, MDefinition* input) { + defineInt64(new (alloc()) LNegI64(useInt64RegisterAtStart(input)), ins); +} + +void LIRGeneratorARM64::lowerMulI(MMul* mul, MDefinition* lhs, + MDefinition* rhs) { + LMulI* lir = new (alloc()) LMulI; + if (mul->fallible()) { + assignSnapshot(lir, mul->bailoutKind()); + } + lowerForALU(lir, mul, lhs, rhs); +} + +void LIRGeneratorARM64::lowerModI(MMod* mod) { + if (mod->isUnsigned()) { + lowerUMod(mod); + return; + } + + if (mod->rhs()->isConstant()) { + int32_t rhs = mod->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(rhs); + if (rhs > 0 && 1 << shift == rhs) { + LModPowTwoI* lir = + new (alloc()) LModPowTwoI(useRegister(mod->lhs()), shift); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + define(lir, mod); + return; + } else if (shift < 31 && (1 << (shift + 1)) - 1 == rhs) { + LModMaskI* lir = new (alloc()) + LModMaskI(useRegister(mod->lhs()), temp(), temp(), shift + 1); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + define(lir, mod); + } + } + + LModI* lir = + new (alloc()) LModI(useRegister(mod->lhs()), useRegister(mod->rhs())); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + define(lir, mod); +} + +void LIRGeneratorARM64::lowerDivI64(MDiv* div) { + if (div->isUnsigned()) { + lowerUDivI64(div); + return; + } + + LDivOrModI64* lir = new (alloc()) + LDivOrModI64(useRegister(div->lhs()), useRegister(div->rhs())); + defineInt64(lir, div); +} + +void LIRGeneratorARM64::lowerUDivI64(MDiv* div) { + LUDivOrModI64* lir = new (alloc()) + LUDivOrModI64(useRegister(div->lhs()), useRegister(div->rhs())); + defineInt64(lir, div); +} + +void LIRGeneratorARM64::lowerUModI64(MMod* mod) { + LUDivOrModI64* lir = new (alloc()) + LUDivOrModI64(useRegister(mod->lhs()), useRegister(mod->rhs())); + defineInt64(lir, mod); +} + +void LIRGeneratorARM64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) { + MOZ_CRASH("We don't use runtime div for this architecture"); +} + +void LIRGeneratorARM64::lowerModI64(MMod* mod) { + if (mod->isUnsigned()) { + lowerUModI64(mod); + return; + } + + LDivOrModI64* lir = new (alloc()) + LDivOrModI64(useRegister(mod->lhs()), useRegister(mod->rhs())); + defineInt64(lir, mod); +} + +void LIRGeneratorARM64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) { + MOZ_CRASH("We don't use runtime mod for this architecture"); +} + +void LIRGenerator::visitPowHalf(MPowHalf* ins) { + MDefinition* input = ins->input(); + MOZ_ASSERT(input->type() == MIRType::Double); + LPowHalfD* lir = new (alloc()) LPowHalfD(useRegister(input)); + define(lir, ins); +} + +void LIRGeneratorARM64::lowerWasmSelectI(MWasmSelect* select) { + if (select->type() == MIRType::Simd128) { + LAllocation t = useRegisterAtStart(select->trueExpr()); + LAllocation f = useRegister(select->falseExpr()); + LAllocation c = useRegister(select->condExpr()); + auto* lir = new (alloc()) LWasmSelect(t, f, c); + defineReuseInput(lir, select, LWasmSelect::TrueExprIndex); + } else { + LAllocation t = useRegisterAtStart(select->trueExpr()); + LAllocation f = useRegisterAtStart(select->falseExpr()); + LAllocation c = useRegisterAtStart(select->condExpr()); + define(new (alloc()) LWasmSelect(t, f, c), select); + } +} + +void LIRGeneratorARM64::lowerWasmSelectI64(MWasmSelect* select) { + LInt64Allocation t = useInt64RegisterAtStart(select->trueExpr()); + LInt64Allocation f = useInt64RegisterAtStart(select->falseExpr()); + LAllocation c = useRegisterAtStart(select->condExpr()); + defineInt64(new (alloc()) LWasmSelectI64(t, f, c), select); +} + +// On arm64 we specialize the cases: compare is {{U,}Int32, {U,}Int64}, +// Float32, Double}, and select is {{U,}Int32, {U,}Int64}, Float32, Double}, +// independently. +bool LIRGeneratorARM64::canSpecializeWasmCompareAndSelect( + MCompare::CompareType compTy, MIRType insTy) { + return (insTy == MIRType::Int32 || insTy == MIRType::Int64 || + insTy == MIRType::Float32 || insTy == MIRType::Double) && + (compTy == MCompare::Compare_Int32 || + compTy == MCompare::Compare_UInt32 || + compTy == MCompare::Compare_Int64 || + compTy == MCompare::Compare_UInt64 || + compTy == MCompare::Compare_Float32 || + compTy == MCompare::Compare_Double); +} + +void LIRGeneratorARM64::lowerWasmCompareAndSelect(MWasmSelect* ins, + MDefinition* lhs, + MDefinition* rhs, + MCompare::CompareType compTy, + JSOp jsop) { + MOZ_ASSERT(canSpecializeWasmCompareAndSelect(compTy, ins->type())); + LAllocation rhsAlloc; + if (compTy == MCompare::Compare_Float32 || + compTy == MCompare::Compare_Double) { + rhsAlloc = useRegisterAtStart(rhs); + } else if (compTy == MCompare::Compare_Int32 || + compTy == MCompare::Compare_UInt32 || + compTy == MCompare::Compare_Int64 || + compTy == MCompare::Compare_UInt64) { + rhsAlloc = useRegisterOrConstantAtStart(rhs); + } else { + MOZ_CRASH("Unexpected type"); + } + auto* lir = new (alloc()) + LWasmCompareAndSelect(useRegisterAtStart(lhs), rhsAlloc, compTy, jsop, + useRegisterAtStart(ins->trueExpr()), + useRegisterAtStart(ins->falseExpr())); + define(lir, ins); +} + +void LIRGenerator::visitAbs(MAbs* ins) { + define(allocateAbs(ins, useRegisterAtStart(ins->input())), ins); +} + +LTableSwitch* LIRGeneratorARM64::newLTableSwitch(const LAllocation& in, + const LDefinition& inputCopy, + MTableSwitch* tableswitch) { + return new (alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch); +} + +LTableSwitchV* LIRGeneratorARM64::newLTableSwitchV(MTableSwitch* tableswitch) { + return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(), + tempDouble(), temp(), tableswitch); +} + +void LIRGeneratorARM64::lowerUrshD(MUrsh* mir) { + MDefinition* lhs = mir->lhs(); + MDefinition* rhs = mir->rhs(); + + MOZ_ASSERT(lhs->type() == MIRType::Int32); + MOZ_ASSERT(rhs->type() == MIRType::Int32); + + LUrshD* lir = new (alloc()) + LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp()); + define(lir, mir); +} + +void LIRGeneratorARM64::lowerPowOfTwoI(MPow* mir) { + int32_t base = mir->input()->toConstant()->toInt32(); + MDefinition* power = mir->power(); + + auto* lir = new (alloc()) LPowOfTwoI(useRegister(power), base); + assignSnapshot(lir, mir->bailoutKind()); + define(lir, mir); +} + +void LIRGeneratorARM64::lowerBigIntLsh(MBigIntLsh* ins) { + auto* lir = new (alloc()) LBigIntLsh( + useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +void LIRGeneratorARM64::lowerBigIntRsh(MBigIntRsh* ins) { + auto* lir = new (alloc()) LBigIntRsh( + useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp(), temp()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +void LIRGeneratorARM64::lowerBigIntDiv(MBigIntDiv* ins) { + auto* lir = new (alloc()) LBigIntDiv(useRegister(ins->lhs()), + useRegister(ins->rhs()), temp(), temp()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +void LIRGeneratorARM64::lowerBigIntMod(MBigIntMod* ins) { + auto* lir = new (alloc()) LBigIntMod(useRegister(ins->lhs()), + useRegister(ins->rhs()), temp(), temp()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +#ifdef ENABLE_WASM_SIMD + +bool LIRGeneratorARM64::canFoldReduceSimd128AndBranch(wasm::SimdOp op) { + switch (op) { + case wasm::SimdOp::V128AnyTrue: + case wasm::SimdOp::I8x16AllTrue: + case wasm::SimdOp::I16x8AllTrue: + case wasm::SimdOp::I32x4AllTrue: + case wasm::SimdOp::I64x2AllTrue: + return true; + default: + return false; + } +} + +bool LIRGeneratorARM64::canEmitWasmReduceSimd128AtUses( + MWasmReduceSimd128* ins) { + if (!ins->canEmitAtUses()) { + return false; + } + // Only specific ops generating int32. + if (ins->type() != MIRType::Int32) { + return false; + } + if (!canFoldReduceSimd128AndBranch(ins->simdOp())) { + return false; + } + // If never used then defer (it will be removed). + MUseIterator iter(ins->usesBegin()); + if (iter == ins->usesEnd()) { + return true; + } + // We require an MTest consumer. + MNode* node = iter->consumer(); + if (!node->isDefinition() || !node->toDefinition()->isTest()) { + return false; + } + // Defer only if there's only one use. + iter++; + return iter == ins->usesEnd(); +} + +#endif + +void LIRGenerator::visitWasmNeg(MWasmNeg* ins) { + switch (ins->type()) { + case MIRType::Int32: + define(new (alloc()) LNegI(useRegisterAtStart(ins->input())), ins); + break; + case MIRType::Float32: + define(new (alloc()) LNegF(useRegisterAtStart(ins->input())), ins); + break; + case MIRType::Double: + define(new (alloc()) LNegD(useRegisterAtStart(ins->input())), ins); + break; + default: + MOZ_CRASH("unexpected type"); + } +} + +void LIRGeneratorARM64::lowerUDiv(MDiv* div) { + LAllocation lhs = useRegister(div->lhs()); + if (div->rhs()->isConstant()) { + // NOTE: the result of toInt32 is coerced to uint32_t. + uint32_t rhs = div->rhs()->toConstant()->toInt32(); + int32_t shift = mozilla::FloorLog2(rhs); + + if (rhs != 0 && uint32_t(1) << shift == rhs) { + LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, shift, false); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + define(lir, div); + return; + } + + LUDivConstantI* lir = new (alloc()) LUDivConstantI(lhs, rhs, temp()); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + define(lir, div); + return; + } + + // Generate UDiv + LAllocation rhs = useRegister(div->rhs()); + LDefinition remainder = LDefinition::BogusTemp(); + if (!div->canTruncateRemainder()) { + remainder = temp(); + } + + LUDiv* lir = new (alloc()) LUDiv(lhs, rhs, remainder); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + define(lir, div); +} + +void LIRGeneratorARM64::lowerUMod(MMod* mod) { + LUMod* lir = new (alloc()) + LUMod(useRegister(mod->getOperand(0)), useRegister(mod->getOperand(1))); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + define(lir, mod); +} + +void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) { + MOZ_ASSERT(ins->input()->type() == MIRType::Int32); + LWasmUint32ToDouble* lir = + new (alloc()) LWasmUint32ToDouble(useRegisterAtStart(ins->input())); + define(lir, ins); +} + +void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) { + MOZ_ASSERT(ins->input()->type() == MIRType::Int32); + LWasmUint32ToFloat32* lir = + new (alloc()) LWasmUint32ToFloat32(useRegisterAtStart(ins->input())); + define(lir, ins); +} + +void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) { + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + MDefinition* boundsCheckLimit = ins->boundsCheckLimit(); + MOZ_ASSERT_IF(ins->needsBoundsCheck(), + boundsCheckLimit->type() == MIRType::Int32); + + LAllocation baseAlloc = useRegisterAtStart(base); + + LAllocation limitAlloc = ins->needsBoundsCheck() + ? useRegisterAtStart(boundsCheckLimit) + : LAllocation(); + + // We have no memory-base value, meaning that HeapReg is to be used as the + // memory base. This follows from the definition of + // FunctionCompiler::maybeLoadMemoryBase() in WasmIonCompile.cpp. + MOZ_ASSERT(!ins->hasMemoryBase()); + auto* lir = + new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, LAllocation()); + define(lir, ins); +} + +void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) { + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + MDefinition* boundsCheckLimit = ins->boundsCheckLimit(); + MOZ_ASSERT_IF(ins->needsBoundsCheck(), + boundsCheckLimit->type() == MIRType::Int32); + + LAllocation baseAlloc = useRegisterAtStart(base); + + LAllocation limitAlloc = ins->needsBoundsCheck() + ? useRegisterAtStart(boundsCheckLimit) + : LAllocation(); + + // See comment in LIRGenerator::visitAsmJSStoreHeap just above. + MOZ_ASSERT(!ins->hasMemoryBase()); + add(new (alloc()) LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()), + limitAlloc, LAllocation()), + ins); +} + +void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) { + MDefinition* base = ins->base(); + // See comment in visitWasmLoad re the type of 'base'. + MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64); + + // Note, the access type may be Int64 here. + + LWasmCompareExchangeHeap* lir = new (alloc()) + LWasmCompareExchangeHeap(useRegister(base), useRegister(ins->oldValue()), + useRegister(ins->newValue())); + + define(lir, ins); +} + +void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) { + MDefinition* base = ins->base(); + // See comment in visitWasmLoad re the type of 'base'. + MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64); + + // Note, the access type may be Int64 here. + + LWasmAtomicExchangeHeap* lir = new (alloc()) + LWasmAtomicExchangeHeap(useRegister(base), useRegister(ins->value())); + define(lir, ins); +} + +void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) { + MDefinition* base = ins->base(); + // See comment in visitWasmLoad re the type of 'base'. + MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64); + + // Note, the access type may be Int64 here. + + if (!ins->hasUses()) { + LWasmAtomicBinopHeapForEffect* lir = + new (alloc()) LWasmAtomicBinopHeapForEffect(useRegister(base), + useRegister(ins->value()), + /* flagTemp= */ temp()); + add(lir, ins); + return; + } + + LWasmAtomicBinopHeap* lir = new (alloc()) + LWasmAtomicBinopHeap(useRegister(base), useRegister(ins->value()), + /* temp= */ LDefinition::BogusTemp(), + /* flagTemp= */ temp()); + define(lir, ins); +} + +void LIRGeneratorARM64::lowerTruncateDToInt32(MTruncateToInt32* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Double); + define(new (alloc()) + LTruncateDToInt32(useRegister(opd), LDefinition::BogusTemp()), + ins); +} + +void LIRGeneratorARM64::lowerTruncateFToInt32(MTruncateToInt32* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Float32); + define(new (alloc()) + LTruncateFToInt32(useRegister(opd), LDefinition::BogusTemp()), + ins); +} + +void LIRGenerator::visitAtomicTypedArrayElementBinop( + MAtomicTypedArrayElementBinop* ins) { + MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + + LAllocation value = useRegister(ins->value()); + + if (Scalar::isBigIntType(ins->arrayType())) { + LInt64Definition temp1 = tempInt64(); + LInt64Definition temp2 = tempInt64(); + + // Case 1: the result of the operation is not used. + // + // We can omit allocating the result BigInt. + + if (ins->isForEffect()) { + auto* lir = new (alloc()) LAtomicTypedArrayElementBinopForEffect64( + elements, index, value, temp1, temp2); + add(lir, ins); + return; + } + + // Case 2: the result of the operation is used. + + auto* lir = new (alloc()) + LAtomicTypedArrayElementBinop64(elements, index, value, temp1, temp2); + define(lir, ins); + assignSafepoint(lir, ins); + return; + } + + if (ins->isForEffect()) { + auto* lir = new (alloc()) + LAtomicTypedArrayElementBinopForEffect(elements, index, value, temp()); + add(lir, ins); + return; + } + + LDefinition tempDef1 = temp(); + LDefinition tempDef2 = LDefinition::BogusTemp(); + if (ins->arrayType() == Scalar::Uint32) { + tempDef2 = temp(); + } + + LAtomicTypedArrayElementBinop* lir = new (alloc()) + LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2); + + define(lir, ins); +} + +void LIRGenerator::visitCompareExchangeTypedArrayElement( + MCompareExchangeTypedArrayElement* ins) { + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + + const LAllocation newval = useRegister(ins->newval()); + const LAllocation oldval = useRegister(ins->oldval()); + + if (Scalar::isBigIntType(ins->arrayType())) { + LInt64Definition temp1 = tempInt64(); + LInt64Definition temp2 = tempInt64(); + + auto* lir = new (alloc()) LCompareExchangeTypedArrayElement64( + elements, index, oldval, newval, temp1, temp2); + define(lir, ins); + assignSafepoint(lir, ins); + return; + } + + // If the target is an FPReg then we need a temporary at the CodeGenerator + // level for creating the result. + + LDefinition outTemp = LDefinition::BogusTemp(); + if (ins->arrayType() == Scalar::Uint32) { + outTemp = temp(); + } + + LCompareExchangeTypedArrayElement* lir = + new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, + newval, outTemp); + + define(lir, ins); +} + +void LIRGenerator::visitAtomicExchangeTypedArrayElement( + MAtomicExchangeTypedArrayElement* ins) { + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + + const LAllocation value = useRegister(ins->value()); + + if (Scalar::isBigIntType(ins->arrayType())) { + LInt64Definition temp1 = tempInt64(); + LDefinition temp2 = temp(); + + auto* lir = new (alloc()) LAtomicExchangeTypedArrayElement64( + elements, index, value, temp1, temp2); + define(lir, ins); + assignSafepoint(lir, ins); + return; + } + + MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32); + + LDefinition tempDef = LDefinition::BogusTemp(); + if (ins->arrayType() == Scalar::Uint32) { + tempDef = temp(); + } + + LAtomicExchangeTypedArrayElement* lir = new (alloc()) + LAtomicExchangeTypedArrayElement(elements, index, value, tempDef); + + define(lir, ins); +} + +void LIRGeneratorARM64::lowerAtomicLoad64(MLoadUnboxedScalar* ins) { + const LUse elements = useRegister(ins->elements()); + const LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->storageType()); + + auto* lir = new (alloc()) LAtomicLoad64(elements, index, temp(), tempInt64()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +void LIRGeneratorARM64::lowerAtomicStore64(MStoreUnboxedScalar* ins) { + LUse elements = useRegister(ins->elements()); + LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->writeType()); + LAllocation value = useRegister(ins->value()); + + add(new (alloc()) LAtomicStore64(elements, index, value, tempInt64()), ins); +} + +void LIRGenerator::visitSubstr(MSubstr* ins) { + LSubstr* lir = new (alloc()) + LSubstr(useRegister(ins->string()), useRegister(ins->begin()), + useRegister(ins->length()), temp(), temp(), temp()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32); + + defineInt64(new (alloc()) LWasmTruncateToInt64(useRegister(opd)), ins); +} + +void LIRGeneratorARM64::lowerWasmBuiltinTruncateToInt64( + MWasmBuiltinTruncateToInt64* ins) { + MOZ_CRASH("We don't use WasmBuiltinTruncateToInt64 for arm64"); +} + +void LIRGeneratorARM64::lowerBuiltinInt64ToFloatingPoint( + MBuiltinInt64ToFloatingPoint* ins) { + MOZ_CRASH("We don't use it for this architecture"); +} + +void LIRGenerator::visitWasmHeapBase(MWasmHeapBase* ins) { + auto* lir = new (alloc()) LWasmHeapBase(LAllocation()); + define(lir, ins); +} + +void LIRGenerator::visitWasmLoad(MWasmLoad* ins) { + MDefinition* base = ins->base(); + // 'base' is a GPR but may be of either type. If it is 32-bit it is + // zero-extended and can act as 64-bit. + MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64); + + LAllocation ptr = useRegisterOrConstantAtStart(base); + + if (ins->type() == MIRType::Int64) { + auto* lir = new (alloc()) LWasmLoadI64(ptr); + defineInt64(lir, ins); + } else { + auto* lir = new (alloc()) LWasmLoad(ptr); + define(lir, ins); + } +} + +void LIRGenerator::visitWasmStore(MWasmStore* ins) { + MDefinition* base = ins->base(); + // See comment in visitWasmLoad re the type of 'base'. + MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64); + + MDefinition* value = ins->value(); + + if (ins->access().type() == Scalar::Int64) { + LAllocation baseAlloc = useRegisterOrConstantAtStart(base); + LInt64Allocation valueAlloc = useInt64RegisterAtStart(value); + auto* lir = new (alloc()) LWasmStoreI64(baseAlloc, valueAlloc); + add(lir, ins); + return; + } + + LAllocation baseAlloc = useRegisterOrConstantAtStart(base); + LAllocation valueAlloc = useRegisterAtStart(value); + auto* lir = new (alloc()) LWasmStore(baseAlloc, valueAlloc); + add(lir, ins); +} + +void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Int64); + MOZ_ASSERT(IsFloatingPointType(ins->type())); + + define(new (alloc()) LInt64ToFloatingPoint(useInt64Register(opd)), ins); +} + +void LIRGenerator::visitCopySign(MCopySign* ins) { + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + MOZ_ASSERT(IsFloatingPointType(lhs->type())); + MOZ_ASSERT(lhs->type() == rhs->type()); + MOZ_ASSERT(lhs->type() == ins->type()); + + LInstructionHelper<1, 2, 2>* lir; + if (lhs->type() == MIRType::Double) { + lir = new (alloc()) LCopySignD(); + } else { + lir = new (alloc()) LCopySignF(); + } + + lir->setOperand(0, useRegisterAtStart(lhs)); + lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) + ? useRegister(rhs) + : useRegisterAtStart(rhs)); + // The copySignDouble and copySignFloat32 are optimized for lhs == output. + // It also prevents rhs == output when lhs != output, avoids clobbering. + defineReuseInput(lir, ins, 0); +} + +void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) { + defineInt64( + new (alloc()) LExtendInt32ToInt64(useRegisterAtStart(ins->input())), ins); +} + +void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) { + defineInt64(new (alloc()) + LSignExtendInt64(useInt64RegisterAtStart(ins->input())), + ins); +} + +void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->v0()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->v1()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->v2()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + switch (ins->simdOp()) { + case wasm::SimdOp::V128Bitselect: { + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()), + useRegisterAtStart(ins->v2())); + // On ARM64, control register is used as output at machine instruction. + defineReuseInput(lir, ins, LWasmTernarySimd128::V2); + break; + } + case wasm::SimdOp::F32x4RelaxedFma: + case wasm::SimdOp::F32x4RelaxedFnma: + case wasm::SimdOp::F64x2RelaxedFma: + case wasm::SimdOp::F64x2RelaxedFnma: { + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()), + useRegisterAtStart(ins->v2())); + defineReuseInput(lir, ins, LWasmTernarySimd128::V2); + break; + } + case wasm::SimdOp::I32x4DotI8x16I7x16AddS: { + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()), + useRegisterAtStart(ins->v2()), tempSimd128()); + defineReuseInput(lir, ins, LWasmTernarySimd128::V2); + break; + } + case wasm::SimdOp::I8x16RelaxedLaneSelect: + case wasm::SimdOp::I16x8RelaxedLaneSelect: + case wasm::SimdOp::I32x4RelaxedLaneSelect: + case wasm::SimdOp::I64x2RelaxedLaneSelect: { + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()), + useRegisterAtStart(ins->v2())); + defineReuseInput(lir, ins, LWasmTernarySimd128::V2); + break; + } + default: + MOZ_CRASH("NYI"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + wasm::SimdOp op = ins->simdOp(); + + MOZ_ASSERT(lhs->type() == MIRType::Simd128); + MOZ_ASSERT(rhs->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + LAllocation lhsAlloc = useRegisterAtStart(lhs); + LAllocation rhsAlloc = useRegisterAtStart(rhs); + LDefinition tempReg0 = LDefinition::BogusTemp(); + LDefinition tempReg1 = LDefinition::BogusTemp(); + if (op == wasm::SimdOp::I64x2Mul) { + tempReg0 = tempSimd128(); + tempReg1 = tempSimd128(); + } + auto* lir = new (alloc()) + LWasmBinarySimd128(op, lhsAlloc, rhsAlloc, tempReg0, tempReg1); + define(lir, ins); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +#ifdef ENABLE_WASM_SIMD +bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle( + int8_t shuffle[16]) { + return false; +} +bool MWasmTernarySimd128::canRelaxBitselect() { return false; } + +bool MWasmBinarySimd128::canPmaddubsw() { return false; } +#endif + +bool MWasmBinarySimd128::specializeForConstantRhs() { + // Probably many we want to do here + return false; +} + +void LIRGenerator::visitWasmBinarySimd128WithConstant( + MWasmBinarySimd128WithConstant* ins) { + MOZ_CRASH("binary SIMD with constant NYI"); +} + +void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + MOZ_ASSERT(lhs->type() == MIRType::Simd128); + MOZ_ASSERT(rhs->type() == MIRType::Int32); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + if (rhs->isConstant()) { + int32_t shiftCount = rhs->toConstant()->toInt32(); + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I8x16ShrS: + shiftCount &= 7; + break; + case wasm::SimdOp::I16x8Shl: + case wasm::SimdOp::I16x8ShrU: + case wasm::SimdOp::I16x8ShrS: + shiftCount &= 15; + break; + case wasm::SimdOp::I32x4Shl: + case wasm::SimdOp::I32x4ShrU: + case wasm::SimdOp::I32x4ShrS: + shiftCount &= 31; + break; + case wasm::SimdOp::I64x2Shl: + case wasm::SimdOp::I64x2ShrU: + case wasm::SimdOp::I64x2ShrS: + shiftCount &= 63; + break; + default: + MOZ_CRASH("Unexpected shift operation"); + } +# ifdef DEBUG + js::wasm::ReportSimdAnalysis("shift -> constant shift"); +# endif + auto* lir = new (alloc()) + LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount); + define(lir, ins); + return; + } + +# ifdef DEBUG + js::wasm::ReportSimdAnalysis("shift -> variable shift"); +# endif + + LAllocation lhsDestAlloc = useRegisterAtStart(lhs); + LAllocation rhsAlloc = useRegisterAtStart(rhs); + auto* lir = new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, + LDefinition::BogusTemp()); + define(lir, ins); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + SimdShuffle s = ins->shuffle(); + switch (s.opd) { + case SimdShuffle::Operand::LEFT: + case SimdShuffle::Operand::RIGHT: { + LAllocation src; + switch (*s.permuteOp) { + case SimdPermuteOp::MOVE: + case SimdPermuteOp::BROADCAST_8x16: + case SimdPermuteOp::BROADCAST_16x8: + case SimdPermuteOp::PERMUTE_8x16: + case SimdPermuteOp::PERMUTE_16x8: + case SimdPermuteOp::PERMUTE_32x4: + case SimdPermuteOp::ROTATE_RIGHT_8x16: + case SimdPermuteOp::SHIFT_LEFT_8x16: + case SimdPermuteOp::SHIFT_RIGHT_8x16: + case SimdPermuteOp::REVERSE_16x8: + case SimdPermuteOp::REVERSE_32x4: + case SimdPermuteOp::REVERSE_64x2: + break; + default: + MOZ_CRASH("Unexpected operator"); + } + if (s.opd == SimdShuffle::Operand::LEFT) { + src = useRegisterAtStart(ins->lhs()); + } else { + src = useRegisterAtStart(ins->rhs()); + } + auto* lir = + new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control); + define(lir, ins); + break; + } + case SimdShuffle::Operand::BOTH: + case SimdShuffle::Operand::BOTH_SWAPPED: { + LDefinition temp = LDefinition::BogusTemp(); + LAllocation lhs; + LAllocation rhs; + if (s.opd == SimdShuffle::Operand::BOTH) { + lhs = useRegisterAtStart(ins->lhs()); + rhs = useRegisterAtStart(ins->rhs()); + } else { + lhs = useRegisterAtStart(ins->rhs()); + rhs = useRegisterAtStart(ins->lhs()); + } + auto* lir = new (alloc()) + LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control); + define(lir, ins); + break; + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + // Optimal code generation reuses the lhs register because the rhs scalar is + // merged into a vector lhs. + LAllocation lhs = useRegisterAtStart(ins->lhs()); + if (ins->rhs()->type() == MIRType::Int64) { + auto* lir = new (alloc()) + LWasmReplaceInt64LaneSimd128(lhs, useInt64Register(ins->rhs())); + defineReuseInput(lir, ins, 0); + } else { + auto* lir = + new (alloc()) LWasmReplaceLaneSimd128(lhs, useRegister(ins->rhs())); + defineReuseInput(lir, ins, 0); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + switch (ins->input()->type()) { + case MIRType::Int64: { + // 64-bit integer splats. + // Load-and-(sign|zero)extend. + auto* lir = new (alloc()) + LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input())); + define(lir, ins); + break; + } + case MIRType::Float32: + case MIRType::Double: { + // Floating-point splats. + auto* lir = + new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input())); + define(lir, ins); + break; + } + default: { + // 32-bit integer splats. + auto* lir = + new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input())); + define(lir, ins); + break; + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->input()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + LDefinition tempReg = LDefinition::BogusTemp(); + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Neg: + case wasm::SimdOp::I16x8Neg: + case wasm::SimdOp::I32x4Neg: + case wasm::SimdOp::I64x2Neg: + case wasm::SimdOp::F32x4Neg: + case wasm::SimdOp::F64x2Neg: + case wasm::SimdOp::F32x4Abs: + case wasm::SimdOp::F64x2Abs: + case wasm::SimdOp::V128Not: + case wasm::SimdOp::F32x4Sqrt: + case wasm::SimdOp::F64x2Sqrt: + case wasm::SimdOp::I8x16Abs: + case wasm::SimdOp::I16x8Abs: + case wasm::SimdOp::I32x4Abs: + case wasm::SimdOp::I64x2Abs: + case wasm::SimdOp::I32x4TruncSatF32x4S: + case wasm::SimdOp::F32x4ConvertI32x4U: + case wasm::SimdOp::I32x4TruncSatF32x4U: + case wasm::SimdOp::I16x8ExtendLowI8x16S: + case wasm::SimdOp::I16x8ExtendHighI8x16S: + case wasm::SimdOp::I16x8ExtendLowI8x16U: + case wasm::SimdOp::I16x8ExtendHighI8x16U: + case wasm::SimdOp::I32x4ExtendLowI16x8S: + case wasm::SimdOp::I32x4ExtendHighI16x8S: + case wasm::SimdOp::I32x4ExtendLowI16x8U: + case wasm::SimdOp::I32x4ExtendHighI16x8U: + case wasm::SimdOp::I64x2ExtendLowI32x4S: + case wasm::SimdOp::I64x2ExtendHighI32x4S: + case wasm::SimdOp::I64x2ExtendLowI32x4U: + case wasm::SimdOp::I64x2ExtendHighI32x4U: + case wasm::SimdOp::F32x4ConvertI32x4S: + case wasm::SimdOp::F32x4Ceil: + case wasm::SimdOp::F32x4Floor: + case wasm::SimdOp::F32x4Trunc: + case wasm::SimdOp::F32x4Nearest: + case wasm::SimdOp::F64x2Ceil: + case wasm::SimdOp::F64x2Floor: + case wasm::SimdOp::F64x2Trunc: + case wasm::SimdOp::F64x2Nearest: + case wasm::SimdOp::F32x4DemoteF64x2Zero: + case wasm::SimdOp::F64x2PromoteLowF32x4: + case wasm::SimdOp::F64x2ConvertLowI32x4S: + case wasm::SimdOp::F64x2ConvertLowI32x4U: + case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S: + case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U: + case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S: + case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U: + case wasm::SimdOp::I8x16Popcnt: + case wasm::SimdOp::I32x4RelaxedTruncF32x4S: + case wasm::SimdOp::I32x4RelaxedTruncF32x4U: + case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero: + case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero: + break; + case wasm::SimdOp::I32x4TruncSatF64x2SZero: + case wasm::SimdOp::I32x4TruncSatF64x2UZero: + tempReg = tempSimd128(); + break; + default: + MOZ_CRASH("Unary SimdOp not implemented"); + } + + LUse input = useRegisterAtStart(ins->input()); + LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(input, tempReg); + define(lir, ins); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + if (canEmitWasmReduceSimd128AtUses(ins)) { + emitAtUses(ins); + return; + } + + // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer + // useRegisterAtStart: + // + // - In most cases, the input type differs from the output type, so there's no + // conflict and it doesn't really matter. + // + // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero + // code being generated. + // + // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register + // to be targeted lowers register pressure if it's the last use of the + // input. + + if (ins->type() == MIRType::Int64) { + auto* lir = new (alloc()) + LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input())); + defineInt64(lir, ins); + } else { + LDefinition tempReg = LDefinition::BogusTemp(); + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Bitmask: + case wasm::SimdOp::I16x8Bitmask: + case wasm::SimdOp::I32x4Bitmask: + case wasm::SimdOp::I64x2Bitmask: + tempReg = tempSimd128(); + break; + default: + break; + } + + // Ideally we would reuse the input register for floating extract_lane if + // the lane is zero, but constraints in the register allocator require the + // input and output register types to be the same. + auto* lir = new (alloc()) + LWasmReduceSimd128(useRegisterAtStart(ins->input()), tempReg); + define(lir, ins); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + // On 64-bit systems, the base pointer can be 32 bits or 64 bits. Either way, + // it fits in a GPR so we can ignore the Register/Register64 distinction here. + + // Optimal allocation here reuses the value input for the output register + // because codegen otherwise has to copy the input to the output; this is + // because load-lane is implemented as load + replace-lane. Bug 1706106 may + // change all of that, so leave it alone for now. + LUse base = useRegisterAtStart(ins->base()); + LUse inputUse = useRegisterAtStart(ins->value()); + MOZ_ASSERT(!ins->hasMemoryBase()); + LWasmLoadLaneSimd128* lir = + new (alloc()) LWasmLoadLaneSimd128(base, inputUse, temp(), LAllocation()); + define(lir, ins); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + // See comment above about the base pointer. + + LUse base = useRegisterAtStart(ins->base()); + LUse input = useRegisterAtStart(ins->value()); + MOZ_ASSERT(!ins->hasMemoryBase()); + LWasmStoreLaneSimd128* lir = + new (alloc()) LWasmStoreLaneSimd128(base, input, temp(), LAllocation()); + add(lir, ins); +#else + MOZ_CRASH("No SIMD"); +#endif +} diff --git a/js/src/jit/arm64/Lowering-arm64.h b/js/src/jit/arm64/Lowering-arm64.h new file mode 100644 index 0000000000..4ab52dd464 --- /dev/null +++ b/js/src/jit/arm64/Lowering-arm64.h @@ -0,0 +1,135 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_Lowering_arm64_h +#define jit_arm64_Lowering_arm64_h + +#include "jit/shared/Lowering-shared.h" + +namespace js { +namespace jit { + +class LIRGeneratorARM64 : public LIRGeneratorShared { + protected: + LIRGeneratorARM64(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph) + : LIRGeneratorShared(gen, graph, lirGraph) {} + + // Returns a box allocation. reg2 is ignored on 64-bit platforms. + LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register reg2, + bool useAtStart = false); + + LAllocation useByteOpRegister(MDefinition* mir); + LAllocation useByteOpRegisterAtStart(MDefinition* mir); + LAllocation useByteOpRegisterOrNonDoubleConstant(MDefinition* mir); + LDefinition tempByteOpRegister(); + + LDefinition tempToUnbox(); + + bool needTempForPostBarrier() { return true; } + + // ARM64 has a scratch register, so no need for another temp for dispatch ICs. + LDefinition tempForDispatchCache(MIRType outputType = MIRType::None) { + return LDefinition::BogusTemp(); + } + + void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block, + size_t lirIndex); + void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block, + size_t lirIndex) { + lowerTypedPhiInput(phi, inputPosition, block, lirIndex); + } + void defineInt64Phi(MPhi* phi, size_t lirIndex) { + defineTypedPhi(phi, lirIndex); + } + void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + void lowerUrshD(MUrsh* mir); + + void lowerPowOfTwoI(MPow* mir); + + void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, + MDefinition* input); + void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + + void lowerForALUInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, + MDefinition* mir, MDefinition* input); + void lowerForALUInt64( + LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); + void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, + MDefinition* rhs); + template <size_t Temps> + void lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); + + void lowerForCompareI64AndBranch(MTest* mir, MCompare* comp, JSOp op, + MDefinition* left, MDefinition* right, + MBasicBlock* ifTrue, MBasicBlock* ifFalse); + + void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, + MDefinition* input); + + template <size_t Temps> + void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + + void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins); + void lowerWasmBuiltinTruncateToInt64(MWasmBuiltinTruncateToInt64* ins); + void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir, + MDefinition* lhs, MDefinition* rhs); + void lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32* ins); + void lowerTruncateDToInt32(MTruncateToInt32* ins); + void lowerTruncateFToInt32(MTruncateToInt32* ins); + void lowerDivI(MDiv* div); + void lowerModI(MMod* mod); + void lowerDivI64(MDiv* div); + void lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div); + void lowerModI64(MMod* mod); + void lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod); + void lowerUDivI64(MDiv* div); + void lowerUModI64(MMod* mod); + void lowerNegI(MInstruction* ins, MDefinition* input); + void lowerNegI64(MInstruction* ins, MDefinition* input); + void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs); + void lowerUDiv(MDiv* div); + void lowerUMod(MMod* mod); + void lowerWasmSelectI(MWasmSelect* select); + void lowerWasmSelectI64(MWasmSelect* select); + bool canSpecializeWasmCompareAndSelect(MCompare::CompareType compTy, + MIRType insTy); + void lowerWasmCompareAndSelect(MWasmSelect* ins, MDefinition* lhs, + MDefinition* rhs, MCompare::CompareType compTy, + JSOp jsop); + + void lowerBigIntLsh(MBigIntLsh* ins); + void lowerBigIntRsh(MBigIntRsh* ins); + void lowerBigIntDiv(MBigIntDiv* ins); + void lowerBigIntMod(MBigIntMod* ins); + + void lowerAtomicLoad64(MLoadUnboxedScalar* ins); + void lowerAtomicStore64(MStoreUnboxedScalar* ins); + +#ifdef ENABLE_WASM_SIMD + bool canFoldReduceSimd128AndBranch(wasm::SimdOp op); + bool canEmitWasmReduceSimd128AtUses(MWasmReduceSimd128* ins); +#endif + + LTableSwitchV* newLTableSwitchV(MTableSwitch* ins); + LTableSwitch* newLTableSwitch(const LAllocation& in, + const LDefinition& inputCopy, + MTableSwitch* ins); + + void lowerPhi(MPhi* phi); +}; + +typedef LIRGeneratorARM64 LIRGeneratorSpecific; + +} // namespace jit +} // namespace js + +#endif /* jit_arm64_Lowering_arm64_h */ diff --git a/js/src/jit/arm64/MacroAssembler-arm64-inl.h b/js/src/jit/arm64/MacroAssembler-arm64-inl.h new file mode 100644 index 0000000000..283867a29a --- /dev/null +++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h @@ -0,0 +1,4079 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_MacroAssembler_arm64_inl_h +#define jit_arm64_MacroAssembler_arm64_inl_h + +#include "jit/arm64/MacroAssembler-arm64.h" + +namespace js { +namespace jit { + +//{{{ check_macroassembler_style + +void MacroAssembler::move64(Register64 src, Register64 dest) { + Mov(ARMRegister(dest.reg, 64), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::move64(Imm64 imm, Register64 dest) { + Mov(ARMRegister(dest.reg, 64), imm.value); +} + +void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) { + Fmov(ARMRegister(dest, 32), ARMFPRegister(src, 32)); +} + +void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) { + Fmov(ARMFPRegister(dest, 32), ARMRegister(src, 32)); +} + +void MacroAssembler::move8SignExtend(Register src, Register dest) { + Sxtb(ARMRegister(dest, 32), ARMRegister(src, 32)); +} + +void MacroAssembler::move16SignExtend(Register src, Register dest) { + Sxth(ARMRegister(dest, 32), ARMRegister(src, 32)); +} + +void MacroAssembler::moveDoubleToGPR64(FloatRegister src, Register64 dest) { + Fmov(ARMRegister(dest.reg, 64), ARMFPRegister(src, 64)); +} + +void MacroAssembler::moveGPR64ToDouble(Register64 src, FloatRegister dest) { + Fmov(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::move64To32(Register64 src, Register dest) { + Mov(ARMRegister(dest, 32), ARMRegister(src.reg, 32)); +} + +void MacroAssembler::move32To64ZeroExtend(Register src, Register64 dest) { + Uxtw(ARMRegister(dest.reg, 64), ARMRegister(src, 64)); +} + +void MacroAssembler::move8To64SignExtend(Register src, Register64 dest) { + Sxtb(ARMRegister(dest.reg, 64), ARMRegister(src, 32)); +} + +void MacroAssembler::move16To64SignExtend(Register src, Register64 dest) { + Sxth(ARMRegister(dest.reg, 64), ARMRegister(src, 32)); +} + +void MacroAssembler::move32To64SignExtend(Register src, Register64 dest) { + Sxtw(ARMRegister(dest.reg, 64), ARMRegister(src, 32)); +} + +void MacroAssembler::move32SignExtendToPtr(Register src, Register dest) { + Sxtw(ARMRegister(dest, 64), ARMRegister(src, 32)); +} + +void MacroAssembler::move32ZeroExtendToPtr(Register src, Register dest) { + Uxtw(ARMRegister(dest, 64), ARMRegister(src, 64)); +} + +// =============================================================== +// Load instructions + +void MacroAssembler::load32SignExtendToPtr(const Address& src, Register dest) { + load32(src, dest); + move32To64SignExtend(dest, Register64(dest)); +} + +void MacroAssembler::loadAbiReturnAddress(Register dest) { movePtr(lr, dest); } + +// =============================================================== +// Logical instructions + +void MacroAssembler::not32(Register reg) { + Orn(ARMRegister(reg, 32), vixl::wzr, ARMRegister(reg, 32)); +} + +void MacroAssembler::notPtr(Register reg) { + Orn(ARMRegister(reg, 64), vixl::xzr, ARMRegister(reg, 64)); +} + +void MacroAssembler::and32(Register src, Register dest) { + And(ARMRegister(dest, 32), ARMRegister(dest, 32), + Operand(ARMRegister(src, 32))); +} + +void MacroAssembler::and32(Imm32 imm, Register dest) { + And(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value)); +} + +void MacroAssembler::and32(Imm32 imm, Register src, Register dest) { + And(ARMRegister(dest, 32), ARMRegister(src, 32), Operand(imm.value)); +} + +void MacroAssembler::and32(Imm32 imm, const Address& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != dest.base); + load32(dest, scratch32.asUnsized()); + And(scratch32, scratch32, Operand(imm.value)); + store32(scratch32.asUnsized(), dest); +} + +void MacroAssembler::and32(const Address& src, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != src.base); + load32(src, scratch32.asUnsized()); + And(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32)); +} + +void MacroAssembler::andPtr(Register src, Register dest) { + And(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(ARMRegister(src, 64))); +} + +void MacroAssembler::andPtr(Imm32 imm, Register dest) { + And(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value)); +} + +void MacroAssembler::and64(Imm64 imm, Register64 dest) { + And(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value)); +} + +void MacroAssembler::and64(Register64 src, Register64 dest) { + And(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), + ARMRegister(src.reg, 64)); +} + +void MacroAssembler::or64(Imm64 imm, Register64 dest) { + Orr(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value)); +} + +void MacroAssembler::or32(Imm32 imm, Register dest) { + Orr(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value)); +} + +void MacroAssembler::or32(Register src, Register dest) { + Orr(ARMRegister(dest, 32), ARMRegister(dest, 32), + Operand(ARMRegister(src, 32))); +} + +void MacroAssembler::or32(Imm32 imm, const Address& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != dest.base); + load32(dest, scratch32.asUnsized()); + Orr(scratch32, scratch32, Operand(imm.value)); + store32(scratch32.asUnsized(), dest); +} + +void MacroAssembler::orPtr(Register src, Register dest) { + Orr(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(ARMRegister(src, 64))); +} + +void MacroAssembler::orPtr(Imm32 imm, Register dest) { + Orr(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value)); +} + +void MacroAssembler::or64(Register64 src, Register64 dest) { + orPtr(src.reg, dest.reg); +} + +void MacroAssembler::xor64(Register64 src, Register64 dest) { + xorPtr(src.reg, dest.reg); +} + +void MacroAssembler::xor32(Register src, Register dest) { + Eor(ARMRegister(dest, 32), ARMRegister(dest, 32), + Operand(ARMRegister(src, 32))); +} + +void MacroAssembler::xor32(Imm32 imm, Register dest) { + Eor(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value)); +} + +void MacroAssembler::xor32(Imm32 imm, const Address& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != dest.base); + load32(dest, scratch32.asUnsized()); + Eor(scratch32, scratch32, Operand(imm.value)); + store32(scratch32.asUnsized(), dest); +} + +void MacroAssembler::xor32(const Address& src, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != src.base); + load32(src, scratch32.asUnsized()); + Eor(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32)); +} + +void MacroAssembler::xorPtr(Register src, Register dest) { + Eor(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(ARMRegister(src, 64))); +} + +void MacroAssembler::xorPtr(Imm32 imm, Register dest) { + Eor(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value)); +} + +void MacroAssembler::xor64(Imm64 imm, Register64 dest) { + Eor(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value)); +} + +// =============================================================== +// Swap instructions + +void MacroAssembler::byteSwap16SignExtend(Register reg) { + rev16(ARMRegister(reg, 32), ARMRegister(reg, 32)); + sxth(ARMRegister(reg, 32), ARMRegister(reg, 32)); +} + +void MacroAssembler::byteSwap16ZeroExtend(Register reg) { + rev16(ARMRegister(reg, 32), ARMRegister(reg, 32)); + uxth(ARMRegister(reg, 32), ARMRegister(reg, 32)); +} + +void MacroAssembler::byteSwap32(Register reg) { + rev(ARMRegister(reg, 32), ARMRegister(reg, 32)); +} + +void MacroAssembler::byteSwap64(Register64 reg) { + rev(ARMRegister(reg.reg, 64), ARMRegister(reg.reg, 64)); +} + +// =============================================================== +// Arithmetic functions + +void MacroAssembler::add32(Register src, Register dest) { + Add(ARMRegister(dest, 32), ARMRegister(dest, 32), + Operand(ARMRegister(src, 32))); +} + +void MacroAssembler::add32(Imm32 imm, Register dest) { + Add(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value)); +} + +void MacroAssembler::add32(Imm32 imm, const Address& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != dest.base); + + Ldr(scratch32, toMemOperand(dest)); + Add(scratch32, scratch32, Operand(imm.value)); + Str(scratch32, toMemOperand(dest)); +} + +void MacroAssembler::addPtr(Register src, Register dest) { + addPtr(src, dest, dest); +} + +void MacroAssembler::addPtr(Register src1, Register src2, Register dest) { + Add(ARMRegister(dest, 64), ARMRegister(src1, 64), + Operand(ARMRegister(src2, 64))); +} + +void MacroAssembler::addPtr(Imm32 imm, Register dest) { + addPtr(imm, dest, dest); +} + +void MacroAssembler::addPtr(Imm32 imm, Register src, Register dest) { + Add(ARMRegister(dest, 64), ARMRegister(src, 64), Operand(imm.value)); +} + +void MacroAssembler::addPtr(ImmWord imm, Register dest) { + Add(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value)); +} + +void MacroAssembler::addPtr(Imm32 imm, const Address& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != dest.base); + + Ldr(scratch64, toMemOperand(dest)); + Add(scratch64, scratch64, Operand(imm.value)); + Str(scratch64, toMemOperand(dest)); +} + +void MacroAssembler::addPtr(const Address& src, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != src.base); + + Ldr(scratch64, toMemOperand(src)); + Add(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(scratch64)); +} + +void MacroAssembler::add64(Register64 src, Register64 dest) { + addPtr(src.reg, dest.reg); +} + +void MacroAssembler::add64(Imm32 imm, Register64 dest) { + Add(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value)); +} + +void MacroAssembler::add64(Imm64 imm, Register64 dest) { + Add(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value)); +} + +CodeOffset MacroAssembler::sub32FromStackPtrWithPatch(Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 3); + CodeOffset offs = CodeOffset(currentOffset()); + movz(scratch, 0, 0); + movk(scratch, 0, 16); + Sub(ARMRegister(dest, 64), sp, scratch); + return offs; +} + +void MacroAssembler::patchSub32FromStackPtr(CodeOffset offset, Imm32 imm) { + Instruction* i1 = getInstructionAt(BufferOffset(offset.offset())); + MOZ_ASSERT(i1->IsMovz()); + i1->SetInstructionBits(i1->InstructionBits() | + ImmMoveWide(uint16_t(imm.value))); + + Instruction* i2 = getInstructionAt(BufferOffset(offset.offset() + 4)); + MOZ_ASSERT(i2->IsMovk()); + i2->SetInstructionBits(i2->InstructionBits() | + ImmMoveWide(uint16_t(imm.value >> 16))); +} + +void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) { + fadd(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64), + ARMFPRegister(src, 64)); +} + +void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) { + fadd(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32), + ARMFPRegister(src, 32)); +} + +void MacroAssembler::sub32(Imm32 imm, Register dest) { + Sub(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value)); +} + +void MacroAssembler::sub32(Register src, Register dest) { + Sub(ARMRegister(dest, 32), ARMRegister(dest, 32), + Operand(ARMRegister(src, 32))); +} + +void MacroAssembler::sub32(const Address& src, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != src.base); + load32(src, scratch32.asUnsized()); + Sub(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(scratch32)); +} + +void MacroAssembler::subPtr(Register src, Register dest) { + Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(ARMRegister(src, 64))); +} + +void MacroAssembler::subPtr(Register src, const Address& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != dest.base); + + Ldr(scratch64, toMemOperand(dest)); + Sub(scratch64, scratch64, Operand(ARMRegister(src, 64))); + Str(scratch64, toMemOperand(dest)); +} + +void MacroAssembler::subPtr(Imm32 imm, Register dest) { + Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value)); +} + +void MacroAssembler::subPtr(const Address& addr, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != addr.base); + + Ldr(scratch64, toMemOperand(addr)); + Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(scratch64)); +} + +void MacroAssembler::sub64(Register64 src, Register64 dest) { + Sub(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), + ARMRegister(src.reg, 64)); +} + +void MacroAssembler::sub64(Imm64 imm, Register64 dest) { + Sub(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), Operand(imm.value)); +} + +void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) { + fsub(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64), + ARMFPRegister(src, 64)); +} + +void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) { + fsub(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32), + ARMFPRegister(src, 32)); +} + +void MacroAssembler::mul32(Register rhs, Register srcDest) { + mul32(srcDest, rhs, srcDest, nullptr); +} + +void MacroAssembler::mul32(Imm32 imm, Register srcDest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + + move32(imm, scratch32.asUnsized()); + mul32(scratch32.asUnsized(), srcDest); +} + +void MacroAssembler::mul32(Register src1, Register src2, Register dest, + Label* onOver) { + if (onOver) { + Smull(ARMRegister(dest, 64), ARMRegister(src1, 32), ARMRegister(src2, 32)); + Cmp(ARMRegister(dest, 64), Operand(ARMRegister(dest, 32), vixl::SXTW)); + B(onOver, NotEqual); + + // Clear upper 32 bits. + Uxtw(ARMRegister(dest, 64), ARMRegister(dest, 64)); + } else { + Mul(ARMRegister(dest, 32), ARMRegister(src1, 32), ARMRegister(src2, 32)); + } +} + +void MacroAssembler::mulHighUnsigned32(Imm32 imm, Register src, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + + Mov(scratch32, int32_t(imm.value)); + Umull(ARMRegister(dest, 64), scratch32, ARMRegister(src, 32)); + + Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), 32); +} + +void MacroAssembler::mulPtr(Register rhs, Register srcDest) { + Mul(ARMRegister(srcDest, 64), ARMRegister(srcDest, 64), ARMRegister(rhs, 64)); +} + +void MacroAssembler::mul64(Imm64 imm, const Register64& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(dest.reg != scratch64.asUnsized()); + mov(ImmWord(imm.value), scratch64.asUnsized()); + Mul(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), scratch64); +} + +void MacroAssembler::mul64(const Register64& src, const Register64& dest, + const Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + Mul(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), + ARMRegister(src.reg, 64)); +} + +void MacroAssembler::mul64(const Register64& src1, const Register64& src2, + const Register64& dest) { + Mul(ARMRegister(dest.reg, 64), ARMRegister(src1.reg, 64), + ARMRegister(src2.reg, 64)); +} + +void MacroAssembler::mul64(Imm64 src1, const Register64& src2, + const Register64& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(dest.reg != scratch64.asUnsized()); + mov(ImmWord(src1.value), scratch64.asUnsized()); + Mul(ARMRegister(dest.reg, 64), ARMRegister(src2.reg, 64), scratch64); +} + +void MacroAssembler::mulBy3(Register src, Register dest) { + ARMRegister xdest(dest, 64); + ARMRegister xsrc(src, 64); + Add(xdest, xsrc, Operand(xsrc, vixl::LSL, 1)); +} + +void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) { + fmul(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32), + ARMFPRegister(src, 32)); +} + +void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) { + fmul(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64), + ARMFPRegister(src, 64)); +} + +void MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp, + FloatRegister dest) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(temp != scratch); + movePtr(imm, scratch); + const ARMFPRegister scratchDouble = temps.AcquireD(); + Ldr(scratchDouble, MemOperand(Address(scratch, 0))); + fmul(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64), scratchDouble); +} + +void MacroAssembler::quotient32(Register rhs, Register srcDest, + bool isUnsigned) { + if (isUnsigned) { + Udiv(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32), + ARMRegister(rhs, 32)); + } else { + Sdiv(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32), + ARMRegister(rhs, 32)); + } +} + +// This does not deal with x % 0 or INT_MIN % -1, the caller needs to filter +// those cases when they may occur. + +void MacroAssembler::remainder32(Register rhs, Register srcDest, + bool isUnsigned) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireW(); + if (isUnsigned) { + Udiv(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32)); + } else { + Sdiv(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32)); + } + Mul(scratch, scratch, ARMRegister(rhs, 32)); + Sub(ARMRegister(srcDest, 32), ARMRegister(srcDest, 32), scratch); +} + +void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) { + fdiv(ARMFPRegister(dest, 32), ARMFPRegister(dest, 32), + ARMFPRegister(src, 32)); +} + +void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) { + fdiv(ARMFPRegister(dest, 64), ARMFPRegister(dest, 64), + ARMFPRegister(src, 64)); +} + +void MacroAssembler::inc64(AbsoluteAddress dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratchAddr64 = temps.AcquireX(); + const ARMRegister scratch64 = temps.AcquireX(); + + Mov(scratchAddr64, uint64_t(dest.addr)); + Ldr(scratch64, MemOperand(scratchAddr64, 0)); + Add(scratch64, scratch64, Operand(1)); + Str(scratch64, MemOperand(scratchAddr64, 0)); +} + +void MacroAssembler::neg32(Register reg) { + Neg(ARMRegister(reg, 32), Operand(ARMRegister(reg, 32))); +} + +void MacroAssembler::neg64(Register64 reg) { negPtr(reg.reg); } + +void MacroAssembler::negPtr(Register reg) { + Neg(ARMRegister(reg, 64), Operand(ARMRegister(reg, 64))); +} + +void MacroAssembler::negateFloat(FloatRegister reg) { + fneg(ARMFPRegister(reg, 32), ARMFPRegister(reg, 32)); +} + +void MacroAssembler::negateDouble(FloatRegister reg) { + fneg(ARMFPRegister(reg, 64), ARMFPRegister(reg, 64)); +} + +void MacroAssembler::abs32(Register src, Register dest) { + Cmp(ARMRegister(src, 32), wzr); + Cneg(ARMRegister(dest, 32), ARMRegister(src, 32), Assembler::LessThan); +} + +void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) { + fabs(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); +} + +void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) { + fabs(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); +} + +void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) { + fsqrt(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); +} + +void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) { + fsqrt(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); +} + +void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest, + bool handleNaN) { + MOZ_ASSERT(handleNaN); // Always true for wasm + fmin(ARMFPRegister(srcDest, 32), ARMFPRegister(srcDest, 32), + ARMFPRegister(other, 32)); +} + +void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest, + bool handleNaN) { + MOZ_ASSERT(handleNaN); // Always true for wasm + fmin(ARMFPRegister(srcDest, 64), ARMFPRegister(srcDest, 64), + ARMFPRegister(other, 64)); +} + +void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest, + bool handleNaN) { + MOZ_ASSERT(handleNaN); // Always true for wasm + fmax(ARMFPRegister(srcDest, 32), ARMFPRegister(srcDest, 32), + ARMFPRegister(other, 32)); +} + +void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest, + bool handleNaN) { + MOZ_ASSERT(handleNaN); // Always true for wasm + fmax(ARMFPRegister(srcDest, 64), ARMFPRegister(srcDest, 64), + ARMFPRegister(other, 64)); +} + +// =============================================================== +// Shift functions + +void MacroAssembler::lshiftPtr(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + Lsl(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value); +} + +void MacroAssembler::lshiftPtr(Register shift, Register dest) { + Lsl(ARMRegister(dest, 64), ARMRegister(dest, 64), ARMRegister(shift, 64)); +} + +void MacroAssembler::lshift64(Imm32 imm, Register64 dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + lshiftPtr(imm, dest.reg); +} + +void MacroAssembler::lshift64(Register shift, Register64 srcDest) { + Lsl(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64), + ARMRegister(shift, 64)); +} + +void MacroAssembler::lshift32(Register shift, Register dest) { + Lsl(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32)); +} + +void MacroAssembler::flexibleLshift32(Register src, Register dest) { + lshift32(src, dest); +} + +void MacroAssembler::lshift32(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 32); + Lsl(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value); +} + +void MacroAssembler::rshiftPtr(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value); +} + +void MacroAssembler::rshiftPtr(Imm32 imm, Register src, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + Lsr(ARMRegister(dest, 64), ARMRegister(src, 64), imm.value); +} + +void MacroAssembler::rshiftPtr(Register shift, Register dest) { + Lsr(ARMRegister(dest, 64), ARMRegister(dest, 64), ARMRegister(shift, 64)); +} + +void MacroAssembler::rshift32(Register shift, Register dest) { + Lsr(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32)); +} + +void MacroAssembler::flexibleRshift32(Register src, Register dest) { + rshift32(src, dest); +} + +void MacroAssembler::rshift32(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 32); + Lsr(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value); +} + +void MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + Asr(ARMRegister(dest, 64), ARMRegister(dest, 64), imm.value); +} + +void MacroAssembler::rshift32Arithmetic(Register shift, Register dest) { + Asr(ARMRegister(dest, 32), ARMRegister(dest, 32), ARMRegister(shift, 32)); +} + +void MacroAssembler::rshift32Arithmetic(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 32); + Asr(ARMRegister(dest, 32), ARMRegister(dest, 32), imm.value); +} + +void MacroAssembler::flexibleRshift32Arithmetic(Register src, Register dest) { + rshift32Arithmetic(src, dest); +} + +void MacroAssembler::rshift64(Imm32 imm, Register64 dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + rshiftPtr(imm, dest.reg); +} + +void MacroAssembler::rshift64(Register shift, Register64 srcDest) { + Lsr(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64), + ARMRegister(shift, 64)); +} + +void MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest) { + Asr(ARMRegister(dest.reg, 64), ARMRegister(dest.reg, 64), imm.value); +} + +void MacroAssembler::rshift64Arithmetic(Register shift, Register64 srcDest) { + Asr(ARMRegister(srcDest.reg, 64), ARMRegister(srcDest.reg, 64), + ARMRegister(shift, 64)); +} + +// =============================================================== +// Condition functions + +void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs, + Register dest) { + vixl::UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + + switch (cond) { + case Assembler::Equal: + case Assembler::NotEqual: + case Assembler::Above: + case Assembler::AboveOrEqual: + case Assembler::Below: + case Assembler::BelowOrEqual: + load8ZeroExtend(lhs, scratch); + cmp32Set(cond, scratch, Imm32(uint8_t(rhs.value)), dest); + break; + + case Assembler::GreaterThan: + case Assembler::GreaterThanOrEqual: + case Assembler::LessThan: + case Assembler::LessThanOrEqual: + load8SignExtend(lhs, scratch); + cmp32Set(cond, scratch, Imm32(int8_t(rhs.value)), dest); + break; + + default: + MOZ_CRASH("unexpected condition"); + } +} + +void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs, + Register dest) { + vixl::UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + + switch (cond) { + case Assembler::Equal: + case Assembler::NotEqual: + case Assembler::Above: + case Assembler::AboveOrEqual: + case Assembler::Below: + case Assembler::BelowOrEqual: + load16ZeroExtend(lhs, scratch); + cmp32Set(cond, scratch, Imm32(uint16_t(rhs.value)), dest); + break; + + case Assembler::GreaterThan: + case Assembler::GreaterThanOrEqual: + case Assembler::LessThan: + case Assembler::LessThanOrEqual: + load16SignExtend(lhs, scratch); + cmp32Set(cond, scratch, Imm32(int16_t(rhs.value)), dest); + break; + + default: + MOZ_CRASH("unexpected condition"); + } +} + +template <typename T1, typename T2> +void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) { + cmp32(lhs, rhs); + emitSet(cond, dest); +} + +void MacroAssembler::cmp64Set(Condition cond, Address lhs, Imm64 rhs, + Register dest) { + cmpPtrSet(cond, lhs, ImmWord(static_cast<uintptr_t>(rhs.value)), dest); +} + +template <typename T1, typename T2> +void MacroAssembler::cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) { + cmpPtr(lhs, rhs); + emitSet(cond, dest); +} + +// =============================================================== +// Rotation functions + +void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) { + Ror(ARMRegister(dest, 32), ARMRegister(input, 32), (32 - count.value) & 31); +} + +void MacroAssembler::rotateLeft(Register count, Register input, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireW(); + // Really 32 - count, but the upper bits of the result are ignored. + Neg(scratch, ARMRegister(count, 32)); + Ror(ARMRegister(dest, 32), ARMRegister(input, 32), scratch); +} + +void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) { + Ror(ARMRegister(dest, 32), ARMRegister(input, 32), count.value & 31); +} + +void MacroAssembler::rotateRight(Register count, Register input, + Register dest) { + Ror(ARMRegister(dest, 32), ARMRegister(input, 32), ARMRegister(count, 32)); +} + +void MacroAssembler::rotateLeft64(Register count, Register64 input, + Register64 dest, Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + // Really 64 - count, but the upper bits of the result are ignored. + Neg(scratch, ARMRegister(count, 64)); + Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64), scratch); +} + +void MacroAssembler::rotateLeft64(Imm32 count, Register64 input, + Register64 dest, Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + + Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64), + (64 - count.value) & 63); +} + +void MacroAssembler::rotateRight64(Register count, Register64 input, + Register64 dest, Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + + Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64), + ARMRegister(count, 64)); +} + +void MacroAssembler::rotateRight64(Imm32 count, Register64 input, + Register64 dest, Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + + Ror(ARMRegister(dest.reg, 64), ARMRegister(input.reg, 64), count.value & 63); +} + +// =============================================================== +// Bit counting functions + +void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) { + Clz(ARMRegister(dest, 32), ARMRegister(src, 32)); +} + +void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) { + Rbit(ARMRegister(dest, 32), ARMRegister(src, 32)); + Clz(ARMRegister(dest, 32), ARMRegister(dest, 32)); +} + +void MacroAssembler::clz64(Register64 src, Register dest) { + Clz(ARMRegister(dest, 64), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::ctz64(Register64 src, Register dest) { + Rbit(ARMRegister(dest, 64), ARMRegister(src.reg, 64)); + Clz(ARMRegister(dest, 64), ARMRegister(dest, 64)); +} + +void MacroAssembler::popcnt32(Register src_, Register dest_, Register tmp_) { + MOZ_ASSERT(tmp_ != Register::Invalid()); + + // Equivalent to mozilla::CountPopulation32(). + + ARMRegister src(src_, 32); + ARMRegister dest(dest_, 32); + ARMRegister tmp(tmp_, 32); + + Mov(tmp, src); + if (src_ != dest_) { + Mov(dest, src); + } + Lsr(dest, dest, 1); + And(dest, dest, 0x55555555); + Sub(dest, tmp, dest); + Lsr(tmp, dest, 2); + And(tmp, tmp, 0x33333333); + And(dest, dest, 0x33333333); + Add(dest, tmp, dest); + Add(dest, dest, Operand(dest, vixl::LSR, 4)); + And(dest, dest, 0x0F0F0F0F); + Add(dest, dest, Operand(dest, vixl::LSL, 8)); + Add(dest, dest, Operand(dest, vixl::LSL, 16)); + Lsr(dest, dest, 24); +} + +void MacroAssembler::popcnt64(Register64 src_, Register64 dest_, + Register tmp_) { + MOZ_ASSERT(tmp_ != Register::Invalid()); + + // Equivalent to mozilla::CountPopulation64(), though likely more efficient. + + ARMRegister src(src_.reg, 64); + ARMRegister dest(dest_.reg, 64); + ARMRegister tmp(tmp_, 64); + + Mov(tmp, src); + if (src_ != dest_) { + Mov(dest, src); + } + Lsr(dest, dest, 1); + And(dest, dest, 0x5555555555555555); + Sub(dest, tmp, dest); + Lsr(tmp, dest, 2); + And(tmp, tmp, 0x3333333333333333); + And(dest, dest, 0x3333333333333333); + Add(dest, tmp, dest); + Add(dest, dest, Operand(dest, vixl::LSR, 4)); + And(dest, dest, 0x0F0F0F0F0F0F0F0F); + Add(dest, dest, Operand(dest, vixl::LSL, 8)); + Add(dest, dest, Operand(dest, vixl::LSL, 16)); + Add(dest, dest, Operand(dest, vixl::LSL, 32)); + Lsr(dest, dest, 56); +} + +// =============================================================== +// Branch functions + +void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + + switch (cond) { + case Assembler::Equal: + case Assembler::NotEqual: + case Assembler::Above: + case Assembler::AboveOrEqual: + case Assembler::Below: + case Assembler::BelowOrEqual: + load8ZeroExtend(lhs, scratch); + branch32(cond, scratch, Imm32(uint8_t(rhs.value)), label); + break; + + case Assembler::GreaterThan: + case Assembler::GreaterThanOrEqual: + case Assembler::LessThan: + case Assembler::LessThanOrEqual: + load8SignExtend(lhs, scratch); + branch32(cond, scratch, Imm32(int8_t(rhs.value)), label); + break; + + default: + MOZ_CRASH("unexpected condition"); + } +} + +void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + + switch (cond) { + case Assembler::Equal: + case Assembler::NotEqual: + case Assembler::Above: + case Assembler::AboveOrEqual: + case Assembler::Below: + case Assembler::BelowOrEqual: + load8ZeroExtend(lhs, scratch); + branch32(cond, scratch, rhs, label); + break; + + case Assembler::GreaterThan: + case Assembler::GreaterThanOrEqual: + case Assembler::LessThan: + case Assembler::LessThanOrEqual: + load8SignExtend(lhs, scratch); + branch32(cond, scratch, rhs, label); + break; + + default: + MOZ_CRASH("unexpected condition"); + } +} + +void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + + switch (cond) { + case Assembler::Equal: + case Assembler::NotEqual: + case Assembler::Above: + case Assembler::AboveOrEqual: + case Assembler::Below: + case Assembler::BelowOrEqual: + load16ZeroExtend(lhs, scratch); + branch32(cond, scratch, Imm32(uint16_t(rhs.value)), label); + break; + + case Assembler::GreaterThan: + case Assembler::GreaterThanOrEqual: + case Assembler::LessThan: + case Assembler::LessThanOrEqual: + load16SignExtend(lhs, scratch); + branch32(cond, scratch, Imm32(int16_t(rhs.value)), label); + break; + + default: + MOZ_CRASH("unexpected condition"); + } +} + +template <class L> +void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs, + L label) { + cmp32(lhs, rhs); + B(label, cond); +} + +template <class L> +void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 imm, + L label) { + if (imm.value == 0 && cond == Assembler::Equal) { + Cbz(ARMRegister(lhs, 32), label); + } else if (imm.value == 0 && cond == Assembler::NotEqual) { + Cbnz(ARMRegister(lhs, 32), label); + } else { + cmp32(lhs, imm); + B(label, cond); + } +} + +void MacroAssembler::branch32(Condition cond, Register lhs, const Address& rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs); + MOZ_ASSERT(scratch != rhs.base); + load32(rhs, scratch); + branch32(cond, lhs, scratch, label); +} + +void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + MOZ_ASSERT(scratch != rhs); + load32(lhs, scratch); + branch32(cond, scratch, rhs, label); +} + +void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 imm, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + load32(lhs, scratch); + branch32(cond, scratch, imm, label); +} + +void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs, + Register rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + movePtr(ImmPtr(lhs.addr), scratch); + branch32(cond, Address(scratch, 0), rhs, label); +} + +void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs, + Imm32 rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + load32(lhs, scratch); + branch32(cond, scratch, rhs, label); +} + +void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != lhs.base); + MOZ_ASSERT(scratch32.asUnsized() != lhs.index); + doBaseIndex(scratch32, lhs, vixl::LDR_w); + branch32(cond, scratch32.asUnsized(), rhs, label); +} + +void MacroAssembler::branch32(Condition cond, wasm::SymbolicAddress lhs, + Imm32 rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + movePtr(lhs, scratch); + branch32(cond, Address(scratch, 0), rhs, label); +} + +void MacroAssembler::branch64(Condition cond, Register64 lhs, Imm64 val, + Label* success, Label* fail) { + if (val.value == 0 && cond == Assembler::Equal) { + Cbz(ARMRegister(lhs.reg, 64), success); + } else if (val.value == 0 && cond == Assembler::NotEqual) { + Cbnz(ARMRegister(lhs.reg, 64), success); + } else { + Cmp(ARMRegister(lhs.reg, 64), val.value); + B(success, cond); + } + if (fail) { + B(fail); + } +} + +void MacroAssembler::branch64(Condition cond, Register64 lhs, Register64 rhs, + Label* success, Label* fail) { + Cmp(ARMRegister(lhs.reg, 64), ARMRegister(rhs.reg, 64)); + B(success, cond); + if (fail) { + B(fail); + } +} + +void MacroAssembler::branch64(Condition cond, const Address& lhs, Imm64 val, + Label* label) { + MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal, + "other condition codes not supported"); + + branchPtr(cond, lhs, ImmWord(val.value), label); +} + +void MacroAssembler::branch64(Condition cond, const Address& lhs, + Register64 rhs, Label* label) { + MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal, + "other condition codes not supported"); + + branchPtr(cond, lhs, rhs.reg, label); +} + +void MacroAssembler::branch64(Condition cond, const Address& lhs, + const Address& rhs, Register scratch, + Label* label) { + MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal, + "other condition codes not supported"); + MOZ_ASSERT(lhs.base != scratch); + MOZ_ASSERT(rhs.base != scratch); + + loadPtr(rhs, scratch); + branchPtr(cond, lhs, scratch, label); +} + +template <class L> +void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs, + L label) { + Cmp(ARMRegister(lhs, 64), ARMRegister(rhs, 64)); + B(label, cond); +} + +void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs, + Label* label) { + if (rhs.value == 0 && cond == Assembler::Equal) { + Cbz(ARMRegister(lhs, 64), label); + } else if (rhs.value == 0 && cond == Assembler::NotEqual) { + Cbnz(ARMRegister(lhs, 64), label); + } else { + cmpPtr(lhs, rhs); + B(label, cond); + } +} + +void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs, + Label* label) { + if (rhs.value == 0 && cond == Assembler::Equal) { + Cbz(ARMRegister(lhs, 64), label); + } else if (rhs.value == 0 && cond == Assembler::NotEqual) { + Cbnz(ARMRegister(lhs, 64), label); + } else { + cmpPtr(lhs, rhs); + B(label, cond); + } +} + +void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs); + movePtr(rhs, scratch); + branchPtr(cond, lhs, scratch, label); +} + +void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs, + Label* label) { + if (rhs.value == 0 && cond == Assembler::Equal) { + Cbz(ARMRegister(lhs, 64), label); + } else if (rhs.value == 0 && cond == Assembler::NotEqual) { + Cbnz(ARMRegister(lhs, 64), label); + } else { + cmpPtr(lhs, rhs); + B(label, cond); + } +} + +template <class L> +void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs, + L label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + MOZ_ASSERT(scratch != rhs); + loadPtr(lhs, scratch); + branchPtr(cond, scratch, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + loadPtr(lhs, scratch); + branchPtr(cond, scratch, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch1_64 = temps.AcquireX(); + const ARMRegister scratch2_64 = temps.AcquireX(); + MOZ_ASSERT(scratch1_64.asUnsized() != lhs.base); + MOZ_ASSERT(scratch2_64.asUnsized() != lhs.base); + + movePtr(rhs, scratch1_64.asUnsized()); + loadPtr(lhs, scratch2_64.asUnsized()); + branchPtr(cond, scratch2_64.asUnsized(), scratch1_64.asUnsized(), label); +} + +void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + loadPtr(lhs, scratch); + branchPtr(cond, scratch, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs, + Register rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != rhs); + loadPtr(lhs, scratch); + branchPtr(cond, scratch, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs, + ImmWord rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + loadPtr(lhs, scratch); + branchPtr(cond, scratch, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, wasm::SymbolicAddress lhs, + Register rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != rhs); + loadPtr(lhs, scratch); + branchPtr(cond, scratch, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs, + ImmWord rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + MOZ_ASSERT(scratch != lhs.index); + loadPtr(lhs, scratch); + branchPtr(cond, scratch, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs, + Register rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + MOZ_ASSERT(scratch != lhs.index); + loadPtr(lhs, scratch); + branchPtr(cond, scratch, rhs, label); +} + +void MacroAssembler::branchPrivatePtr(Condition cond, const Address& lhs, + Register rhs, Label* label) { + branchPtr(cond, lhs, rhs, label); +} + +void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs, + FloatRegister rhs, Label* label) { + compareFloat(cond, lhs, rhs); + switch (cond) { + case DoubleNotEqual: { + Label unordered; + // not equal *and* ordered + branch(Overflow, &unordered); + branch(NotEqual, label); + bind(&unordered); + break; + } + case DoubleEqualOrUnordered: + branch(Overflow, label); + branch(Equal, label); + break; + default: + branch(Condition(cond), label); + } +} + +void MacroAssembler::branchTruncateFloat32MaybeModUint32(FloatRegister src, + Register dest, + Label* fail) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + + ARMFPRegister src32(src, 32); + ARMRegister dest64(dest, 64); + + MOZ_ASSERT(!scratch64.Is(dest64)); + + // Convert scalar to signed 64-bit fixed-point, rounding toward zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtzs(dest64, src32); + + // Fail if the result is saturated, i.e. it's either INT64_MIN or INT64_MAX. + Add(scratch64, dest64, Operand(0x7fff'ffff'ffff'ffff)); + Cmn(scratch64, 3); + B(fail, Assembler::Above); + + // Clear upper 32 bits. + Uxtw(dest64, dest64); +} + +void MacroAssembler::branchTruncateFloat32ToInt32(FloatRegister src, + Register dest, Label* fail) { + convertFloat32ToInt32(src, dest, fail, false); +} + +void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs, + FloatRegister rhs, Label* label) { + compareDouble(cond, lhs, rhs); + switch (cond) { + case DoubleNotEqual: { + Label unordered; + // not equal *and* ordered + branch(Overflow, &unordered); + branch(NotEqual, label); + bind(&unordered); + break; + } + case DoubleEqualOrUnordered: + branch(Overflow, label); + branch(Equal, label); + break; + default: + branch(Condition(cond), label); + } +} + +void MacroAssembler::branchTruncateDoubleMaybeModUint32(FloatRegister src, + Register dest, + Label* fail) { + // ARMv8.3 chips support the FJCVTZS instruction, which handles exactly this + // logic. But the simulator does not implement it, and when the simulator runs + // on ARM64 hardware we want to override vixl's detection of it. +#if defined(JS_SIMULATOR_ARM64) && (defined(__aarch64__) || defined(_M_ARM64)) + const bool fjscvt = false; +#else + const bool fjscvt = CPUHas(vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT); +#endif + if (fjscvt) { + Fjcvtzs(ARMRegister(dest, 32), ARMFPRegister(src, 64)); + return; + } + + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + + // An out of range integer will be saturated to the destination size. + ARMFPRegister src64(src, 64); + ARMRegister dest64(dest, 64); + + MOZ_ASSERT(!scratch64.Is(dest64)); + + // Convert scalar to signed 64-bit fixed-point, rounding toward zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtzs(dest64, src64); + + // Fail if the result is saturated, i.e. it's either INT64_MIN or INT64_MAX. + Add(scratch64, dest64, Operand(0x7fff'ffff'ffff'ffff)); + Cmn(scratch64, 3); + B(fail, Assembler::Above); + + // Clear upper 32 bits. + Uxtw(dest64, dest64); +} + +void MacroAssembler::branchTruncateDoubleToInt32(FloatRegister src, + Register dest, Label* fail) { + ARMFPRegister src64(src, 64); + ARMRegister dest64(dest, 64); + ARMRegister dest32(dest, 32); + + // Convert scalar to signed 64-bit fixed-point, rounding toward zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtzs(dest64, src64); + + // Fail on overflow cases. + Cmp(dest64, Operand(dest32, vixl::SXTW)); + B(fail, Assembler::NotEqual); + + // Clear upper 32 bits. + Uxtw(dest64, dest64); +} + +template <typename T> +void MacroAssembler::branchAdd32(Condition cond, T src, Register dest, + Label* label) { + adds32(src, dest); + B(label, cond); +} + +template <typename T> +void MacroAssembler::branchSub32(Condition cond, T src, Register dest, + Label* label) { + subs32(src, dest); + branch(cond, label); +} + +template <typename T> +void MacroAssembler::branchMul32(Condition cond, T src, Register dest, + Label* label) { + MOZ_ASSERT(cond == Assembler::Overflow); + vixl::UseScratchRegisterScope temps(this); + mul32(src, dest, dest, label); +} + +template <typename T> +void MacroAssembler::branchRshift32(Condition cond, T src, Register dest, + Label* label) { + MOZ_ASSERT(cond == Zero || cond == NonZero); + rshift32(src, dest); + branch32(cond == Zero ? Equal : NotEqual, dest, Imm32(0), label); +} + +void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) { + MOZ_ASSERT(cond == Overflow); + negs32(reg); + B(label, cond); +} + +template <typename T> +void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest, + Label* label) { + adds64(src, dest); + B(label, cond); +} + +template <typename T> +void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest, + Label* label) { + subs64(src, dest); + B(label, cond); +} + +void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest, + Label* label) { + MOZ_ASSERT(cond == Assembler::Overflow); + + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + const ARMRegister src64(src, 64); + const ARMRegister dest64(dest, 64); + + Smulh(scratch64, dest64, src64); + Mul(dest64, dest64, src64); + Cmp(scratch64, Operand(dest64, vixl::ASR, 63)); + B(label, NotEqual); +} + +void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs, + Label* label) { + Subs(ARMRegister(lhs, 64), ARMRegister(lhs, 64), Operand(rhs.value)); + B(cond, label); +} + +template <class L> +void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs, + L label) { + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || + cond == NotSigned); + // The x86-biased front end prefers |test foo, foo| to |cmp foo, #0|. We look + // for the former pattern and expand as Cbz/Cbnz when possible. + if (lhs == rhs && cond == Zero) { + Cbz(ARMRegister(lhs, 32), label); + } else if (lhs == rhs && cond == NonZero) { + Cbnz(ARMRegister(lhs, 32), label); + } else { + test32(lhs, rhs); + B(label, cond); + } +} + +template <class L> +void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs, + L label) { + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || + cond == NotSigned); + test32(lhs, rhs); + B(label, cond); +} + +void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + load32(lhs, scratch); + branchTest32(cond, scratch, rhs, label); +} + +void MacroAssembler::branchTest32(Condition cond, const AbsoluteAddress& lhs, + Imm32 rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + load32(lhs, scratch); + branchTest32(cond, scratch, rhs, label); +} + +template <class L> +void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs, + L label) { + // See branchTest32. + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || + cond == NotSigned); + if (lhs == rhs && cond == Zero) { + Cbz(ARMRegister(lhs, 64), label); + } else if (lhs == rhs && cond == NonZero) { + Cbnz(ARMRegister(lhs, 64), label); + } else { + Tst(ARMRegister(lhs, 64), Operand(ARMRegister(rhs, 64))); + B(label, cond); + } +} + +void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs, + Label* label) { + Tst(ARMRegister(lhs, 64), Operand(rhs.value)); + B(label, cond); +} + +void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs, + Imm32 rhs, Label* label) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + loadPtr(lhs, scratch); + branchTestPtr(cond, scratch, rhs, label); +} + +template <class L> +void MacroAssembler::branchTest64(Condition cond, Register64 lhs, + Register64 rhs, Register temp, L label) { + branchTestPtr(cond, lhs.reg, rhs.reg, label); +} + +void MacroAssembler::branchTestUndefined(Condition cond, Register tag, + Label* label) { + branchTestUndefinedImpl(cond, tag, label); +} + +void MacroAssembler::branchTestUndefined(Condition cond, const Address& address, + Label* label) { + branchTestUndefinedImpl(cond, address, label); +} + +void MacroAssembler::branchTestUndefined(Condition cond, + const BaseIndex& address, + Label* label) { + branchTestUndefinedImpl(cond, address, label); +} + +void MacroAssembler::branchTestUndefined(Condition cond, + const ValueOperand& value, + Label* label) { + branchTestUndefinedImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t, + Label* label) { + Condition c = testUndefined(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestInt32(Condition cond, Register tag, + Label* label) { + branchTestInt32Impl(cond, tag, label); +} + +void MacroAssembler::branchTestInt32(Condition cond, const Address& address, + Label* label) { + branchTestInt32Impl(cond, address, label); +} + +void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address, + Label* label) { + branchTestInt32Impl(cond, address, label); +} + +void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value, + Label* label) { + branchTestInt32Impl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestInt32Impl(Condition cond, const T& t, + Label* label) { + Condition c = testInt32(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestInt32Truthy(bool truthy, + const ValueOperand& value, + Label* label) { + Condition c = testInt32Truthy(truthy, value); + B(label, c); +} + +void MacroAssembler::branchTestDouble(Condition cond, Register tag, + Label* label) { + branchTestDoubleImpl(cond, tag, label); +} + +void MacroAssembler::branchTestDouble(Condition cond, const Address& address, + Label* label) { + branchTestDoubleImpl(cond, address, label); +} + +void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address, + Label* label) { + branchTestDoubleImpl(cond, address, label); +} + +void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value, + Label* label) { + branchTestDoubleImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t, + Label* label) { + Condition c = testDouble(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg, + Label* label) { + Fcmp(ARMFPRegister(reg, 64), 0.0); + if (!truthy) { + // falsy values are zero, and NaN. + branch(Zero, label); + branch(Overflow, label); + } else { + // truthy values are non-zero and not nan. + // If it is overflow + Label onFalse; + branch(Zero, &onFalse); + branch(Overflow, &onFalse); + B(label); + bind(&onFalse); + } +} + +void MacroAssembler::branchTestNumber(Condition cond, Register tag, + Label* label) { + branchTestNumberImpl(cond, tag, label); +} + +void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value, + Label* label) { + branchTestNumberImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestNumberImpl(Condition cond, const T& t, + Label* label) { + Condition c = testNumber(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestBoolean(Condition cond, Register tag, + Label* label) { + branchTestBooleanImpl(cond, tag, label); +} + +void MacroAssembler::branchTestBoolean(Condition cond, const Address& address, + Label* label) { + branchTestBooleanImpl(cond, address, label); +} + +void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address, + Label* label) { + branchTestBooleanImpl(cond, address, label); +} + +void MacroAssembler::branchTestBoolean(Condition cond, + const ValueOperand& value, + Label* label) { + branchTestBooleanImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestBooleanImpl(Condition cond, const T& tag, + Label* label) { + Condition c = testBoolean(cond, tag); + B(label, c); +} + +void MacroAssembler::branchTestBooleanTruthy(bool truthy, + const ValueOperand& value, + Label* label) { + Condition c = testBooleanTruthy(truthy, value); + B(label, c); +} + +void MacroAssembler::branchTestString(Condition cond, Register tag, + Label* label) { + branchTestStringImpl(cond, tag, label); +} + +void MacroAssembler::branchTestString(Condition cond, const Address& address, + Label* label) { + branchTestStringImpl(cond, address, label); +} + +void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address, + Label* label) { + branchTestStringImpl(cond, address, label); +} + +void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value, + Label* label) { + branchTestStringImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestStringImpl(Condition cond, const T& t, + Label* label) { + Condition c = testString(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestStringTruthy(bool truthy, + const ValueOperand& value, + Label* label) { + Condition c = testStringTruthy(truthy, value); + B(label, c); +} + +void MacroAssembler::branchTestSymbol(Condition cond, Register tag, + Label* label) { + branchTestSymbolImpl(cond, tag, label); +} + +void MacroAssembler::branchTestSymbol(Condition cond, const Address& address, + Label* label) { + branchTestSymbolImpl(cond, address, label); +} + +void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address, + Label* label) { + branchTestSymbolImpl(cond, address, label); +} + +void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value, + Label* label) { + branchTestSymbolImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t, + Label* label) { + Condition c = testSymbol(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestBigInt(Condition cond, Register tag, + Label* label) { + branchTestBigIntImpl(cond, tag, label); +} + +void MacroAssembler::branchTestBigInt(Condition cond, const Address& address, + Label* label) { + branchTestBigIntImpl(cond, address, label); +} + +void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address, + Label* label) { + branchTestBigIntImpl(cond, address, label); +} + +void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value, + Label* label) { + branchTestBigIntImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestBigIntImpl(Condition cond, const T& t, + Label* label) { + Condition c = testBigInt(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestBigIntTruthy(bool truthy, + const ValueOperand& value, + Label* label) { + Condition c = testBigIntTruthy(truthy, value); + B(label, c); +} + +void MacroAssembler::branchTestNull(Condition cond, Register tag, + Label* label) { + branchTestNullImpl(cond, tag, label); +} + +void MacroAssembler::branchTestNull(Condition cond, const Address& address, + Label* label) { + branchTestNullImpl(cond, address, label); +} + +void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address, + Label* label) { + branchTestNullImpl(cond, address, label); +} + +void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value, + Label* label) { + branchTestNullImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestNullImpl(Condition cond, const T& t, + Label* label) { + Condition c = testNull(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestObject(Condition cond, Register tag, + Label* label) { + branchTestObjectImpl(cond, tag, label); +} + +void MacroAssembler::branchTestObject(Condition cond, const Address& address, + Label* label) { + branchTestObjectImpl(cond, address, label); +} + +void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address, + Label* label) { + branchTestObjectImpl(cond, address, label); +} + +void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value, + Label* label) { + branchTestObjectImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestObjectImpl(Condition cond, const T& t, + Label* label) { + Condition c = testObject(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestGCThing(Condition cond, const Address& address, + Label* label) { + branchTestGCThingImpl(cond, address, label); +} + +void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address, + Label* label) { + branchTestGCThingImpl(cond, address, label); +} + +void MacroAssembler::branchTestGCThing(Condition cond, + const ValueOperand& value, + Label* label) { + branchTestGCThingImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& src, + Label* label) { + Condition c = testGCThing(cond, src); + B(label, c); +} + +void MacroAssembler::branchTestPrimitive(Condition cond, Register tag, + Label* label) { + branchTestPrimitiveImpl(cond, tag, label); +} + +void MacroAssembler::branchTestPrimitive(Condition cond, + const ValueOperand& value, + Label* label) { + branchTestPrimitiveImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t, + Label* label) { + Condition c = testPrimitive(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestMagic(Condition cond, Register tag, + Label* label) { + branchTestMagicImpl(cond, tag, label); +} + +void MacroAssembler::branchTestMagic(Condition cond, const Address& address, + Label* label) { + branchTestMagicImpl(cond, address, label); +} + +void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address, + Label* label) { + branchTestMagicImpl(cond, address, label); +} + +template <class L> +void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value, + L label) { + branchTestMagicImpl(cond, value, label); +} + +template <typename T, class L> +void MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label) { + Condition c = testMagic(cond, t); + B(label, c); +} + +void MacroAssembler::branchTestMagic(Condition cond, const Address& valaddr, + JSWhyMagic why, Label* label) { + uint64_t magic = MagicValue(why).asRawBits(); + cmpPtr(valaddr, ImmWord(magic)); + B(label, cond); +} + +void MacroAssembler::branchTestValue(Condition cond, const BaseIndex& lhs, + const ValueOperand& rhs, Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + branchPtr(cond, lhs, rhs.valueReg(), label); +} + +template <typename T> +void MacroAssembler::testNumberSet(Condition cond, const T& src, + Register dest) { + cond = testNumber(cond, src); + emitSet(cond, dest); +} + +template <typename T> +void MacroAssembler::testBooleanSet(Condition cond, const T& src, + Register dest) { + cond = testBoolean(cond, src); + emitSet(cond, dest); +} + +template <typename T> +void MacroAssembler::testStringSet(Condition cond, const T& src, + Register dest) { + cond = testString(cond, src); + emitSet(cond, dest); +} + +template <typename T> +void MacroAssembler::testSymbolSet(Condition cond, const T& src, + Register dest) { + cond = testSymbol(cond, src); + emitSet(cond, dest); +} + +template <typename T> +void MacroAssembler::testBigIntSet(Condition cond, const T& src, + Register dest) { + cond = testBigInt(cond, src); + emitSet(cond, dest); +} + +void MacroAssembler::branchToComputedAddress(const BaseIndex& addr) { + vixl::UseScratchRegisterScope temps(&this->asVIXL()); + const ARMRegister scratch64 = temps.AcquireX(); + loadPtr(addr, scratch64.asUnsized()); + Br(scratch64); +} + +void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, + Register src, Register dest) { + cmp32(lhs, rhs); + Csel(ARMRegister(dest, 32), ARMRegister(src, 32), ARMRegister(dest, 32), + cond); +} + +void MacroAssembler::cmp32Move32(Condition cond, Register lhs, + const Address& rhs, Register src, + Register dest) { + MOZ_CRASH("NYI"); +} + +void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, Register rhs, + Register src, Register dest) { + cmpPtr(lhs, rhs); + Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64), + cond); +} + +void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, + const Address& rhs, Register src, + Register dest) { + MOZ_CRASH("NYI"); +} + +void MacroAssembler::cmp32Load32(Condition cond, Register lhs, + const Address& rhs, const Address& src, + Register dest) { + MOZ_CRASH("NYI"); +} + +void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs, + const Address& src, Register dest) { + MOZ_CRASH("NYI"); +} + +void MacroAssembler::cmp32MovePtr(Condition cond, Register lhs, Imm32 rhs, + Register src, Register dest) { + cmp32(lhs, rhs); + Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64), + cond); +} + +void MacroAssembler::cmp32LoadPtr(Condition cond, const Address& lhs, Imm32 rhs, + const Address& src, Register dest) { + // ARM64 does not support conditional loads, so we use a branch with a CSel + // (to prevent Spectre attacks). + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + + // Can't use branch32() here, because it may select Cbz/Cbnz which don't + // affect condition flags. + Label done; + cmp32(lhs, rhs); + B(&done, Assembler::InvertCondition(cond)); + + loadPtr(src, scratch64.asUnsized()); + Csel(ARMRegister(dest, 64), scratch64, ARMRegister(dest, 64), cond); + bind(&done); +} + +void MacroAssembler::test32LoadPtr(Condition cond, const Address& addr, + Imm32 mask, const Address& src, + Register dest) { + MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero); + + // ARM64 does not support conditional loads, so we use a branch with a CSel + // (to prevent Spectre attacks). + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + Label done; + branchTest32(Assembler::InvertCondition(cond), addr, mask, &done); + loadPtr(src, scratch64.asUnsized()); + Csel(ARMRegister(dest, 64), scratch64, ARMRegister(dest, 64), cond); + bind(&done); +} + +void MacroAssembler::test32MovePtr(Condition cond, const Address& addr, + Imm32 mask, Register src, Register dest) { + MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero); + test32(addr, mask); + Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64), + cond); +} + +void MacroAssembler::spectreMovePtr(Condition cond, Register src, + Register dest) { + Csel(ARMRegister(dest, 64), ARMRegister(src, 64), ARMRegister(dest, 64), + cond); +} + +void MacroAssembler::spectreZeroRegister(Condition cond, Register, + Register dest) { + Csel(ARMRegister(dest, 64), ARMRegister(dest, 64), vixl::xzr, + Assembler::InvertCondition(cond)); +} + +void MacroAssembler::spectreBoundsCheck32(Register index, Register length, + Register maybeScratch, + Label* failure) { + MOZ_ASSERT(length != maybeScratch); + MOZ_ASSERT(index != maybeScratch); + + branch32(Assembler::BelowOrEqual, length, index, failure); + + if (JitOptions.spectreIndexMasking) { + Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr, + Assembler::Above); + } +} + +void MacroAssembler::spectreBoundsCheck32(Register index, const Address& length, + Register maybeScratch, + Label* failure) { + MOZ_ASSERT(index != length.base); + MOZ_ASSERT(length.base != maybeScratch); + MOZ_ASSERT(index != maybeScratch); + + branch32(Assembler::BelowOrEqual, length, index, failure); + + if (JitOptions.spectreIndexMasking) { + Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr, + Assembler::Above); + } +} + +void MacroAssembler::spectreBoundsCheckPtr(Register index, Register length, + Register maybeScratch, + Label* failure) { + MOZ_ASSERT(length != maybeScratch); + MOZ_ASSERT(index != maybeScratch); + + branchPtr(Assembler::BelowOrEqual, length, index, failure); + + if (JitOptions.spectreIndexMasking) { + Csel(ARMRegister(index, 64), ARMRegister(index, 64), vixl::xzr, + Assembler::Above); + } +} + +void MacroAssembler::spectreBoundsCheckPtr(Register index, + const Address& length, + Register maybeScratch, + Label* failure) { + MOZ_ASSERT(index != length.base); + MOZ_ASSERT(length.base != maybeScratch); + MOZ_ASSERT(index != maybeScratch); + + branchPtr(Assembler::BelowOrEqual, length, index, failure); + + if (JitOptions.spectreIndexMasking) { + Csel(ARMRegister(index, 64), ARMRegister(index, 64), vixl::xzr, + Assembler::Above); + } +} + +// ======================================================================== +// Memory access primitives. +void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, + const Address& dest) { + Str(ARMFPRegister(src, 64), toMemOperand(dest)); +} +void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, + const BaseIndex& dest) { + doBaseIndex(ARMFPRegister(src, 64), dest, vixl::STR_d); +} + +void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, + const Address& addr) { + Str(ARMFPRegister(src, 32), toMemOperand(addr)); +} +void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, + const BaseIndex& addr) { + doBaseIndex(ARMFPRegister(src, 32), addr, vixl::STR_s); +} + +void MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) { + // Bug 1715494: Discriminating barriers such as StoreStore are hard to reason + // about. Execute the full barrier for everything that requires a barrier. + if (barrier) { + Dmb(vixl::InnerShareable, vixl::BarrierAll); + } +} + +// =============================================================== +// Clamping functions. + +void MacroAssembler::clampIntToUint8(Register reg) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + const ARMRegister reg32(reg, 32); + MOZ_ASSERT(!scratch32.Is(reg32)); + + Cmp(reg32, Operand(reg32, vixl::UXTB)); + Csel(reg32, reg32, vixl::wzr, Assembler::GreaterThanOrEqual); + Mov(scratch32, Operand(0xff)); + Csel(reg32, reg32, scratch32, Assembler::LessThanOrEqual); +} + +void MacroAssembler::fallibleUnboxPtr(const ValueOperand& src, Register dest, + JSValueType type, Label* fail) { + MOZ_ASSERT(type == JSVAL_TYPE_OBJECT || type == JSVAL_TYPE_STRING || + type == JSVAL_TYPE_SYMBOL || type == JSVAL_TYPE_BIGINT); + // dest := src XOR mask + // fail if dest >> JSVAL_TAG_SHIFT != 0 + const ARMRegister src64(src.valueReg(), 64); + const ARMRegister dest64(dest, 64); + Eor(dest64, src64, Operand(JSVAL_TYPE_TO_SHIFTED_TAG(type))); + Cmp(vixl::xzr, Operand(dest64, vixl::LSR, JSVAL_TAG_SHIFT)); + j(Assembler::NotEqual, fail); +} + +void MacroAssembler::fallibleUnboxPtr(const Address& src, Register dest, + JSValueType type, Label* fail) { + loadValue(src, ValueOperand(dest)); + fallibleUnboxPtr(ValueOperand(dest), dest, type, fail); +} + +void MacroAssembler::fallibleUnboxPtr(const BaseIndex& src, Register dest, + JSValueType type, Label* fail) { + loadValue(src, ValueOperand(dest)); + fallibleUnboxPtr(ValueOperand(dest), dest, type, fail); +} + +//}}} check_macroassembler_style + +// Wasm SIMD + +static inline ARMFPRegister SimdReg(FloatRegister r) { + MOZ_ASSERT(r.isSimd128()); + return ARMFPRegister(r, 128); +} + +static inline ARMFPRegister Simd16B(FloatRegister r) { + return SimdReg(r).V16B(); +} + +static inline ARMFPRegister Simd8B(FloatRegister r) { return SimdReg(r).V8B(); } + +static inline ARMFPRegister Simd8H(FloatRegister r) { return SimdReg(r).V8H(); } + +static inline ARMFPRegister Simd4H(FloatRegister r) { return SimdReg(r).V4H(); } + +static inline ARMFPRegister Simd4S(FloatRegister r) { return SimdReg(r).V4S(); } + +static inline ARMFPRegister Simd2S(FloatRegister r) { return SimdReg(r).V2S(); } + +static inline ARMFPRegister Simd2D(FloatRegister r) { return SimdReg(r).V2D(); } + +static inline ARMFPRegister Simd1D(FloatRegister r) { return SimdReg(r).V1D(); } + +static inline ARMFPRegister SimdQ(FloatRegister r) { return SimdReg(r).Q(); } + +//{{{ check_macroassembler_style + +// Moves + +void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) { + if (src != dest) { + Mov(SimdReg(dest), SimdReg(src)); + } +} + +void MacroAssembler::loadConstantSimd128(const SimdConstant& v, + FloatRegister dest) { + // Movi does not yet generate good code for many cases, bug 1664397. + SimdConstant c = SimdConstant::CreateX2((const int64_t*)v.bytes()); + Movi(SimdReg(dest), c.asInt64x2()[1], c.asInt64x2()[0]); +} + +// Splat + +void MacroAssembler::splatX16(Register src, FloatRegister dest) { + Dup(Simd16B(dest), ARMRegister(src, 32)); +} + +void MacroAssembler::splatX16(uint32_t srcLane, FloatRegister src, + FloatRegister dest) { + Dup(Simd16B(dest), Simd16B(src), srcLane); +} + +void MacroAssembler::splatX8(Register src, FloatRegister dest) { + Dup(Simd8H(dest), ARMRegister(src, 32)); +} + +void MacroAssembler::splatX8(uint32_t srcLane, FloatRegister src, + FloatRegister dest) { + Dup(Simd8H(dest), Simd8H(src), srcLane); +} + +void MacroAssembler::splatX4(Register src, FloatRegister dest) { + Dup(Simd4S(dest), ARMRegister(src, 32)); +} + +void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) { + Dup(Simd4S(dest), ARMFPRegister(src), 0); +} + +void MacroAssembler::splatX2(Register64 src, FloatRegister dest) { + Dup(Simd2D(dest), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::splatX2(FloatRegister src, FloatRegister dest) { + Dup(Simd2D(dest), ARMFPRegister(src), 0); +} + +// Extract lane as scalar. Float extraction does not canonicalize the value. + +void MacroAssembler::extractLaneInt8x16(uint32_t lane, FloatRegister src, + Register dest_) { + MOZ_ASSERT(lane < 16); + ARMRegister dest(dest_, 32); + Umov(dest, Simd4S(src), lane / 4); + Sbfx(dest, dest, (lane % 4) * 8, 8); +} + +void MacroAssembler::unsignedExtractLaneInt8x16(uint32_t lane, + FloatRegister src, + Register dest_) { + MOZ_ASSERT(lane < 16); + ARMRegister dest(dest_, 32); + Umov(dest, Simd4S(src), lane / 4); + Ubfx(dest, dest, (lane % 4) * 8, 8); +} + +void MacroAssembler::extractLaneInt16x8(uint32_t lane, FloatRegister src, + Register dest_) { + MOZ_ASSERT(lane < 8); + ARMRegister dest(dest_, 32); + Umov(dest, Simd4S(src), lane / 2); + Sbfx(dest, dest, (lane % 2) * 16, 16); +} + +void MacroAssembler::unsignedExtractLaneInt16x8(uint32_t lane, + FloatRegister src, + Register dest_) { + MOZ_ASSERT(lane < 8); + ARMRegister dest(dest_, 32); + Umov(dest, Simd4S(src), lane / 2); + Ubfx(dest, dest, (lane % 2) * 16, 16); +} + +void MacroAssembler::extractLaneInt32x4(uint32_t lane, FloatRegister src, + Register dest_) { + MOZ_ASSERT(lane < 4); + ARMRegister dest(dest_, 32); + Umov(dest, Simd4S(src), lane); +} + +void MacroAssembler::extractLaneInt64x2(uint32_t lane, FloatRegister src, + Register64 dest_) { + MOZ_ASSERT(lane < 2); + ARMRegister dest(dest_.reg, 64); + Umov(dest, Simd2D(src), lane); +} + +void MacroAssembler::extractLaneFloat32x4(uint32_t lane, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(lane < 4); + Mov(ARMFPRegister(dest).V4S(), 0, Simd4S(src), lane); +} + +void MacroAssembler::extractLaneFloat64x2(uint32_t lane, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(lane < 2); + Mov(ARMFPRegister(dest).V2D(), 0, Simd2D(src), lane); +} + +// Replace lane value + +void MacroAssembler::replaceLaneInt8x16(unsigned lane, Register rhs, + FloatRegister lhsDest) { + MOZ_ASSERT(lane < 16); + Mov(Simd16B(lhsDest), lane, ARMRegister(rhs, 32)); +} + +void MacroAssembler::replaceLaneInt16x8(unsigned lane, Register rhs, + FloatRegister lhsDest) { + MOZ_ASSERT(lane < 8); + Mov(Simd8H(lhsDest), lane, ARMRegister(rhs, 32)); +} + +void MacroAssembler::replaceLaneInt32x4(unsigned lane, Register rhs, + FloatRegister lhsDest) { + MOZ_ASSERT(lane < 4); + Mov(Simd4S(lhsDest), lane, ARMRegister(rhs, 32)); +} + +void MacroAssembler::replaceLaneInt64x2(unsigned lane, Register64 rhs, + FloatRegister lhsDest) { + MOZ_ASSERT(lane < 2); + Mov(Simd2D(lhsDest), lane, ARMRegister(rhs.reg, 64)); +} + +void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister rhs, + FloatRegister lhsDest) { + MOZ_ASSERT(lane < 4); + Mov(Simd4S(lhsDest), lane, ARMFPRegister(rhs).V4S(), 0); +} + +void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister rhs, + FloatRegister lhsDest) { + MOZ_ASSERT(lane < 2); + Mov(Simd2D(lhsDest), lane, ARMFPRegister(rhs).V2D(), 0); +} + +// Shuffle - blend and permute with immediate indices, and its many +// specializations. Lane values other than those mentioned are illegal. + +// lane values 0..31 +void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister lhs, + FloatRegister rhs, FloatRegister dest) { + // The general solution generates ho-hum code. Realistic programs will use + // patterns that can be specialized, and this will be much better. That will + // be handled by bug 1656834, so don't worry about it here. + + // Set scratch to the lanevalue when it selects from lhs or ~lanevalue when it + // selects from rhs. + ScratchSimd128Scope scratch(*this); + int8_t idx[16]; + + if (lhs == rhs) { + for (unsigned i = 0; i < 16; i++) { + idx[i] = lanes[i] < 16 ? lanes[i] : (lanes[i] - 16); + } + loadConstantSimd128(SimdConstant::CreateX16(idx), scratch); + Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(scratch)); + return; + } + + if (rhs != dest) { + for (unsigned i = 0; i < 16; i++) { + idx[i] = lanes[i] < 16 ? lanes[i] : ~(lanes[i] - 16); + } + } else { + MOZ_ASSERT(lhs != dest); + for (unsigned i = 0; i < 16; i++) { + idx[i] = lanes[i] < 16 ? ~lanes[i] : (lanes[i] - 16); + } + std::swap(lhs, rhs); + } + loadConstantSimd128(SimdConstant::CreateX16(idx), scratch); + Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(scratch)); + Not(Simd16B(scratch), Simd16B(scratch)); + Tbx(Simd16B(dest), Simd16B(rhs), Simd16B(scratch)); +} + +void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister rhs, + FloatRegister lhsDest) { + shuffleInt8x16(lanes, lhsDest, rhs, lhsDest); +} + +void MacroAssembler::blendInt8x16(const uint8_t lanes[16], FloatRegister lhs, + FloatRegister rhs, FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + int8_t lanes_[16]; + + if (rhs == dest) { + for (unsigned i = 0; i < 16; i++) { + lanes_[i] = lanes[i] == 0 ? i : 16 + i; + } + loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch); + Tbx(Simd16B(dest), Simd16B(lhs), Simd16B(scratch)); + return; + } + + moveSimd128(lhs, dest); + for (unsigned i = 0; i < 16; i++) { + lanes_[i] = lanes[i] != 0 ? i : 16 + i; + } + loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch); + Tbx(Simd16B(dest), Simd16B(rhs), Simd16B(scratch)); +} + +void MacroAssembler::blendInt16x8(const uint16_t lanes[8], FloatRegister lhs, + FloatRegister rhs, FloatRegister dest) { + static_assert(sizeof(const uint16_t /*lanes*/[8]) == sizeof(uint8_t[16])); + blendInt8x16(reinterpret_cast<const uint8_t*>(lanes), lhs, rhs, dest); +} + +void MacroAssembler::laneSelectSimd128(FloatRegister mask, FloatRegister lhs, + FloatRegister rhs, FloatRegister dest) { + MOZ_ASSERT(mask == dest); + Bsl(Simd16B(mask), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::interleaveHighInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Zip2(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::interleaveHighInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Zip2(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::interleaveHighInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Zip2(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +void MacroAssembler::interleaveHighInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Zip2(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::interleaveLowInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Zip1(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::interleaveLowInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Zip1(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::interleaveLowInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Zip1(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +void MacroAssembler::interleaveLowInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Zip1(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::permuteInt8x16(const uint8_t lanes[16], FloatRegister src, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + loadConstantSimd128(SimdConstant::CreateX16((const int8_t*)lanes), scratch); + Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch)); +} + +void MacroAssembler::permuteInt16x8(const uint16_t lanes[8], FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(lanes[0] < 8 && lanes[1] < 8 && lanes[2] < 8 && lanes[3] < 8 && + lanes[4] < 8 && lanes[5] < 8 && lanes[6] < 8 && lanes[7] < 8); + const int8_t lanes_[16] = { + (int8_t)(lanes[0] << 1), (int8_t)((lanes[0] << 1) + 1), + (int8_t)(lanes[1] << 1), (int8_t)((lanes[1] << 1) + 1), + (int8_t)(lanes[2] << 1), (int8_t)((lanes[2] << 1) + 1), + (int8_t)(lanes[3] << 1), (int8_t)((lanes[3] << 1) + 1), + (int8_t)(lanes[4] << 1), (int8_t)((lanes[4] << 1) + 1), + (int8_t)(lanes[5] << 1), (int8_t)((lanes[5] << 1) + 1), + (int8_t)(lanes[6] << 1), (int8_t)((lanes[6] << 1) + 1), + (int8_t)(lanes[7] << 1), (int8_t)((lanes[7] << 1) + 1), + }; + ScratchSimd128Scope scratch(*this); + loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch); + Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch)); +} + +void MacroAssembler::permuteInt32x4(const uint32_t lanes[4], FloatRegister src, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + const int8_t lanes_[16] = { + (int8_t)(lanes[0] << 2), (int8_t)((lanes[0] << 2) + 1), + (int8_t)((lanes[0] << 2) + 2), (int8_t)((lanes[0] << 2) + 3), + (int8_t)(lanes[1] << 2), (int8_t)((lanes[1] << 2) + 1), + (int8_t)((lanes[1] << 2) + 2), (int8_t)((lanes[1] << 2) + 3), + (int8_t)(lanes[2] << 2), (int8_t)((lanes[2] << 2) + 1), + (int8_t)((lanes[2] << 2) + 2), (int8_t)((lanes[2] << 2) + 3), + (int8_t)(lanes[3] << 2), (int8_t)((lanes[3] << 2) + 1), + (int8_t)((lanes[3] << 2) + 2), (int8_t)((lanes[3] << 2) + 3), + }; + loadConstantSimd128(SimdConstant::CreateX16(lanes_), scratch); + Tbl(Simd16B(dest), Simd16B(src), Simd16B(scratch)); +} + +void MacroAssembler::rotateRightSimd128(FloatRegister src, FloatRegister dest, + uint32_t shift) { + Ext(Simd16B(dest), Simd16B(src), Simd16B(src), shift); +} + +void MacroAssembler::leftShiftSimd128(Imm32 count, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(count.value < 16); + ScratchSimd128Scope scratch(*this); + Movi(Simd16B(scratch), 0); + Ext(Simd16B(dest), Simd16B(scratch), Simd16B(src), 16 - count.value); +} + +void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(count.value < 16); + ScratchSimd128Scope scratch(*this); + Movi(Simd16B(scratch), 0); + Ext(Simd16B(dest), Simd16B(src), Simd16B(scratch), count.value); +} + +void MacroAssembler::concatAndRightShiftSimd128(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest, + uint32_t shift) { + MOZ_ASSERT(shift < 16); + Ext(Simd16B(dest), Simd16B(rhs), Simd16B(lhs), shift); +} + +// Reverse bytes in lanes. + +void MacroAssembler::reverseInt16x8(FloatRegister src, FloatRegister dest) { + Rev16(Simd16B(dest), Simd16B(src)); +} + +void MacroAssembler::reverseInt32x4(FloatRegister src, FloatRegister dest) { + Rev32(Simd16B(dest), Simd16B(src)); +} + +void MacroAssembler::reverseInt64x2(FloatRegister src, FloatRegister dest) { + Rev64(Simd16B(dest), Simd16B(src)); +} + +// Swizzle - permute with variable indices. `rhs` holds the lanes parameter. + +void MacroAssembler::swizzleInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::swizzleInt8x16Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Tbl(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +// Integer Add + +void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Add(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Add(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Add(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::addInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Add(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +// Integer Subtract + +void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sub(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::subInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sub(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +// Integer Multiply + +void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Mul(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Mul(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest, FloatRegister temp1, + FloatRegister temp2) { + // As documented at https://chromium-review.googlesource.com/c/v8/v8/+/1781696 + // lhs = <D C> <B A> + // rhs = <H G> <F E> + // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low> + ScratchSimd128Scope scratch(*this); + Rev64(Simd4S(temp2), Simd4S(lhs)); // temp2 = <C D> <A B> + Mul(Simd4S(temp2), Simd4S(temp2), Simd4S(rhs)); // temp2 = <CH DG> <AF BE> + Xtn(Simd2S(temp1), Simd2D(rhs)); // temp1 = <0 0> <G E> + Addp(Simd4S(temp2), Simd4S(temp2), Simd4S(temp2)); // temp2 = <CH+DG AF+BE>.. + Xtn(Simd2S(scratch), Simd2D(lhs)); // scratch = <0 0> <C A> + Shll(Simd2D(dest), Simd2S(temp2), 32); // dest = <(DG+CH)_low 0> + // <(BE+AF)_low 0> + Umlal(Simd2D(dest), Simd2S(scratch), Simd2S(temp1)); +} + +void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs)); +} + +void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + Umull(Simd8H(dest), Simd8B(lhs), Simd8B(rhs)); +} + +void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + Umull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs)); +} + +void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + Umull(Simd4S(dest), Simd4H(lhs), Simd4H(rhs)); +} + +void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + Umull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs)); +} + +void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + Umull(Simd2D(dest), Simd2S(lhs), Simd2S(rhs)); +} + +void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + Umull2(Simd2D(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::q15MulrInt16x8Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sqrdmulh(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +// Integer Negate + +void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) { + Neg(Simd16B(dest), Simd16B(src)); +} + +void MacroAssembler::negInt16x8(FloatRegister src, FloatRegister dest) { + Neg(Simd8H(dest), Simd8H(src)); +} + +void MacroAssembler::negInt32x4(FloatRegister src, FloatRegister dest) { + Neg(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::negInt64x2(FloatRegister src, FloatRegister dest) { + Neg(Simd2D(dest), Simd2D(src)); +} + +// Saturating integer add + +void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sqadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Uqadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sqadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Uqadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +// Saturating integer subtract + +void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sqsub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Uqsub(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Sqsub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Uqsub(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +// Lane-wise integer minimum + +void MacroAssembler::minInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smin(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Umin(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::minInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smin(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Umin(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::minInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Umin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +// Lane-wise integer maximum + +void MacroAssembler::maxInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smax(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Umax(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::maxInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smax(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Umax(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::maxInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Smax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Umax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +// Lane-wise integer rounding average + +void MacroAssembler::unsignedAverageInt8x16(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + Urhadd(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::unsignedAverageInt16x8(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + Urhadd(Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +// Lane-wise integer absolute value + +void MacroAssembler::absInt8x16(FloatRegister src, FloatRegister dest) { + Abs(Simd16B(dest), Simd16B(src)); +} + +void MacroAssembler::absInt16x8(FloatRegister src, FloatRegister dest) { + Abs(Simd8H(dest), Simd8H(src)); +} + +void MacroAssembler::absInt32x4(FloatRegister src, FloatRegister dest) { + Abs(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) { + Abs(Simd2D(dest), Simd2D(src)); +} + +// Left shift by variable scalar + +void MacroAssembler::leftShiftInt8x16(FloatRegister lhs, Register rhs, + FloatRegister dest) { + ScratchSimd128Scope vscratch(*this); + Dup(Simd16B(vscratch), ARMRegister(rhs, 32)); + Sshl(Simd16B(dest), Simd16B(lhs), Simd16B(vscratch)); +} + +void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest) { + Shl(Simd16B(dest), Simd16B(src), count.value); +} + +void MacroAssembler::leftShiftInt16x8(FloatRegister lhs, Register rhs, + FloatRegister dest) { + ScratchSimd128Scope vscratch(*this); + Dup(Simd8H(vscratch), ARMRegister(rhs, 32)); + Sshl(Simd8H(dest), Simd8H(lhs), Simd8H(vscratch)); +} + +void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src, + FloatRegister dest) { + Shl(Simd8H(dest), Simd8H(src), count.value); +} + +void MacroAssembler::leftShiftInt32x4(FloatRegister lhs, Register rhs, + FloatRegister dest) { + ScratchSimd128Scope vscratch(*this); + Dup(Simd4S(vscratch), ARMRegister(rhs, 32)); + Sshl(Simd4S(dest), Simd4S(lhs), Simd4S(vscratch)); +} + +void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src, + FloatRegister dest) { + Shl(Simd4S(dest), Simd4S(src), count.value); +} + +void MacroAssembler::leftShiftInt64x2(FloatRegister lhs, Register rhs, + FloatRegister dest) { + ScratchSimd128Scope vscratch(*this); + Dup(Simd2D(vscratch), ARMRegister(rhs, 64)); + Sshl(Simd2D(dest), Simd2D(lhs), Simd2D(vscratch)); +} + +void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src, + FloatRegister dest) { + Shl(Simd2D(dest), Simd2D(src), count.value); +} + +// Right shift by variable scalar + +void MacroAssembler::rightShiftInt8x16(FloatRegister lhs, Register rhs, + FloatRegister dest) { + MacroAssemblerCompat::rightShiftInt8x16(lhs, rhs, dest, + /* isUnsigned */ false); +} + +void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest) { + Sshr(Simd16B(dest), Simd16B(src), count.value); +} + +void MacroAssembler::unsignedRightShiftInt8x16(FloatRegister lhs, Register rhs, + FloatRegister dest) { + MacroAssemblerCompat::rightShiftInt8x16(lhs, rhs, dest, + /* isUnsigned */ true); +} + +void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest) { + Ushr(Simd16B(dest), Simd16B(src), count.value); +} + +void MacroAssembler::rightShiftInt16x8(FloatRegister lhs, Register rhs, + FloatRegister dest) { + MacroAssemblerCompat::rightShiftInt16x8(lhs, rhs, dest, + /* isUnsigned */ false); +} + +void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src, + FloatRegister dest) { + Sshr(Simd8H(dest), Simd8H(src), count.value); +} + +void MacroAssembler::unsignedRightShiftInt16x8(FloatRegister lhs, Register rhs, + FloatRegister dest) { + MacroAssemblerCompat::rightShiftInt16x8(lhs, rhs, dest, + /* isUnsigned */ true); +} + +void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src, + FloatRegister dest) { + Ushr(Simd8H(dest), Simd8H(src), count.value); +} + +void MacroAssembler::rightShiftInt32x4(FloatRegister lhs, Register rhs, + FloatRegister dest) { + MacroAssemblerCompat::rightShiftInt32x4(lhs, rhs, dest, + /* isUnsigned */ false); +} + +void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src, + FloatRegister dest) { + Sshr(Simd4S(dest), Simd4S(src), count.value); +} + +void MacroAssembler::unsignedRightShiftInt32x4(FloatRegister lhs, Register rhs, + FloatRegister dest) { + MacroAssemblerCompat::rightShiftInt32x4(lhs, rhs, dest, + /* isUnsigned */ true); +} + +void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src, + FloatRegister dest) { + Ushr(Simd4S(dest), Simd4S(src), count.value); +} + +void MacroAssembler::rightShiftInt64x2(FloatRegister lhs, Register rhs, + FloatRegister dest) { + MacroAssemblerCompat::rightShiftInt64x2(lhs, rhs, dest, + /* isUnsigned */ false); +} + +void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src, + FloatRegister dest) { + Sshr(Simd2D(dest), Simd2D(src), count.value); +} + +void MacroAssembler::unsignedRightShiftInt64x2(FloatRegister lhs, Register rhs, + FloatRegister dest) { + MacroAssemblerCompat::rightShiftInt64x2(lhs, rhs, dest, + /* isUnsigned */ true); +} + +void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src, + FloatRegister dest) { + Ushr(Simd2D(dest), Simd2D(src), count.value); +} + +// Bitwise and, or, xor, not + +void MacroAssembler::bitwiseAndSimd128(FloatRegister rhs, + FloatRegister lhsDest) { + And(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs)); +} + +void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + And(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::bitwiseOrSimd128(FloatRegister rhs, + FloatRegister lhsDest) { + Orr(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs)); +} + +void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Orr(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::bitwiseXorSimd128(FloatRegister rhs, + FloatRegister lhsDest) { + Eor(Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs)); +} + +void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Eor(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) { + Not(Simd16B(dest), Simd16B(src)); +} + +void MacroAssembler::bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Bic(Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +// Bitwise AND with complement: dest = ~lhs & rhs, note this is not what Wasm +// wants but what the x86 hardware offers. Hence the name. Since arm64 has +// dest = lhs & ~rhs we just swap operands. + +void MacroAssembler::bitwiseNotAndSimd128(FloatRegister rhs, + FloatRegister lhsDest) { + Bic(Simd16B(lhsDest), Simd16B(rhs), Simd16B(lhsDest)); +} + +// Bitwise select + +void MacroAssembler::bitwiseSelectSimd128(FloatRegister onTrue, + FloatRegister onFalse, + FloatRegister maskDest) { + Bsl(Simd16B(maskDest), Simd16B(onTrue), Simd16B(onFalse)); +} + +// Population count + +void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest) { + Cnt(Simd16B(dest), Simd16B(src)); +} + +// Any lane true, ie, any bit set + +void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest_) { + ScratchSimd128Scope scratch_(*this); + ARMFPRegister scratch(Simd1D(scratch_)); + ARMRegister dest(dest_, 64); + Addp(scratch, Simd2D(src)); + Umov(dest, scratch, 0); + Cmp(dest, Operand(0)); + Cset(dest, Assembler::NonZero); +} + +// All lanes true + +void MacroAssembler::allTrueInt8x16(FloatRegister src, Register dest_) { + ScratchSimd128Scope scratch(*this); + ARMRegister dest(dest_, 64); + Cmeq(Simd16B(scratch), Simd16B(src), 0); + Addp(Simd1D(scratch), Simd2D(scratch)); + Umov(dest, Simd1D(scratch), 0); + Cmp(dest, Operand(0)); + Cset(dest, Assembler::Zero); +} + +void MacroAssembler::allTrueInt16x8(FloatRegister src, Register dest_) { + ScratchSimd128Scope scratch(*this); + ARMRegister dest(dest_, 64); + Cmeq(Simd8H(scratch), Simd8H(src), 0); + Addp(Simd1D(scratch), Simd2D(scratch)); + Umov(dest, Simd1D(scratch), 0); + Cmp(dest, Operand(0)); + Cset(dest, Assembler::Zero); +} + +void MacroAssembler::allTrueInt32x4(FloatRegister src, Register dest_) { + ScratchSimd128Scope scratch(*this); + ARMRegister dest(dest_, 64); + Cmeq(Simd4S(scratch), Simd4S(src), 0); + Addp(Simd1D(scratch), Simd2D(scratch)); + Umov(dest, Simd1D(scratch), 0); + Cmp(dest, Operand(0)); + Cset(dest, Assembler::Zero); +} + +void MacroAssembler::allTrueInt64x2(FloatRegister src, Register dest_) { + ScratchSimd128Scope scratch(*this); + ARMRegister dest(dest_, 64); + Cmeq(Simd2D(scratch), Simd2D(src), 0); + Addp(Simd1D(scratch), Simd2D(scratch)); + Umov(dest, Simd1D(scratch), 0); + Cmp(dest, Operand(0)); + Cset(dest, Assembler::Zero); +} + +// Bitmask, ie extract and compress high bits of all lanes +// +// There's no direct support for this on the chip. These implementations come +// from the writeup that added the instruction to the SIMD instruction set. +// Generally, shifting and masking is used to isolate the sign bit of each +// element in the right position, then a horizontal add creates the result. For +// 8-bit elements an intermediate step is needed to assemble the bits of the +// upper and lower 8 bytes into 8 halfwords. + +void MacroAssembler::bitmaskInt8x16(FloatRegister src, Register dest, + FloatRegister temp) { + ScratchSimd128Scope scratch(*this); + int8_t values[] = {1, 2, 4, 8, 16, 32, 64, -128, + 1, 2, 4, 8, 16, 32, 64, -128}; + loadConstantSimd128(SimdConstant::CreateX16(values), temp); + Sshr(Simd16B(scratch), Simd16B(src), 7); + And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp)); + Ext(Simd16B(temp), Simd16B(scratch), Simd16B(scratch), 8); + Zip1(Simd16B(temp), Simd16B(scratch), Simd16B(temp)); + Addv(ARMFPRegister(temp, 16), Simd8H(temp)); + Mov(ARMRegister(dest, 32), Simd8H(temp), 0); +} + +void MacroAssembler::bitmaskInt16x8(FloatRegister src, Register dest, + FloatRegister temp) { + ScratchSimd128Scope scratch(*this); + int16_t values[] = {1, 2, 4, 8, 16, 32, 64, 128}; + loadConstantSimd128(SimdConstant::CreateX8(values), temp); + Sshr(Simd8H(scratch), Simd8H(src), 15); + And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp)); + Addv(ARMFPRegister(scratch, 16), Simd8H(scratch)); + Mov(ARMRegister(dest, 32), Simd8H(scratch), 0); +} + +void MacroAssembler::bitmaskInt32x4(FloatRegister src, Register dest, + FloatRegister temp) { + ScratchSimd128Scope scratch(*this); + int32_t values[] = {1, 2, 4, 8}; + loadConstantSimd128(SimdConstant::CreateX4(values), temp); + Sshr(Simd4S(scratch), Simd4S(src), 31); + And(Simd16B(scratch), Simd16B(scratch), Simd16B(temp)); + Addv(ARMFPRegister(scratch, 32), Simd4S(scratch)); + Mov(ARMRegister(dest, 32), Simd4S(scratch), 0); +} + +void MacroAssembler::bitmaskInt64x2(FloatRegister src, Register dest, + FloatRegister temp) { + Sqxtn(Simd2S(temp), Simd2D(src)); + Ushr(Simd2S(temp), Simd2S(temp), 31); + Usra(ARMFPRegister(temp, 64), ARMFPRegister(temp, 64), 31); + Fmov(ARMRegister(dest, 32), ARMFPRegister(temp, 32)); +} + +// Comparisons (integer and floating-point) + +void MacroAssembler::compareInt8x16(Assembler::Condition cond, + FloatRegister rhs, FloatRegister lhsDest) { + compareSimd128Int(cond, Simd16B(lhsDest), Simd16B(lhsDest), Simd16B(rhs)); +} + +void MacroAssembler::compareInt8x16(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + compareSimd128Int(cond, Simd16B(dest), Simd16B(lhs), Simd16B(rhs)); +} + +void MacroAssembler::compareInt16x8(Assembler::Condition cond, + FloatRegister rhs, FloatRegister lhsDest) { + compareSimd128Int(cond, Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs)); +} + +void MacroAssembler::compareInt16x8(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + compareSimd128Int(cond, Simd8H(dest), Simd8H(lhs), Simd8H(rhs)); +} + +void MacroAssembler::compareInt32x4(Assembler::Condition cond, + FloatRegister rhs, FloatRegister lhsDest) { + compareSimd128Int(cond, Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs)); +} + +void MacroAssembler::compareInt32x4(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + compareSimd128Int(cond, Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::compareInt64x2(Assembler::Condition cond, + FloatRegister rhs, FloatRegister lhsDest) { + compareSimd128Int(cond, Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs)); +} + +void MacroAssembler::compareInt64x2(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + compareSimd128Int(cond, Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +void MacroAssembler::compareFloat32x4(Assembler::Condition cond, + FloatRegister rhs, + FloatRegister lhsDest) { + compareSimd128Float(cond, Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs)); +} + +void MacroAssembler::compareFloat32x4(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + compareSimd128Float(cond, Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::compareFloat64x2(Assembler::Condition cond, + FloatRegister rhs, + FloatRegister lhsDest) { + compareSimd128Float(cond, Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs)); +} + +void MacroAssembler::compareFloat64x2(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + compareSimd128Float(cond, Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +// Load + +void MacroAssembler::loadUnalignedSimd128(const Address& src, + FloatRegister dest) { + Ldr(ARMFPRegister(dest, 128), toMemOperand(src)); +} + +void MacroAssembler::loadUnalignedSimd128(const BaseIndex& address, + FloatRegister dest) { + doBaseIndex(ARMFPRegister(dest, 128), address, vixl::LDR_q); +} + +// Store + +void MacroAssembler::storeUnalignedSimd128(FloatRegister src, + const Address& dest) { + Str(ARMFPRegister(src, 128), toMemOperand(dest)); +} + +void MacroAssembler::storeUnalignedSimd128(FloatRegister src, + const BaseIndex& dest) { + doBaseIndex(ARMFPRegister(src, 128), dest, vixl::STR_q); +} + +// Floating point negation + +void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) { + Fneg(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::negFloat64x2(FloatRegister src, FloatRegister dest) { + Fneg(Simd2D(dest), Simd2D(src)); +} + +// Floating point absolute value + +void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) { + Fabs(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::absFloat64x2(FloatRegister src, FloatRegister dest) { + Fabs(Simd2D(dest), Simd2D(src)); +} + +// NaN-propagating minimum + +void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmin(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::minFloat32x4(FloatRegister rhs, FloatRegister lhsDest) { + Fmin(Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs)); +} + +void MacroAssembler::minFloat64x2(FloatRegister rhs, FloatRegister lhsDest) { + Fmin(Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs)); +} + +void MacroAssembler::minFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmin(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +// NaN-propagating maximum + +void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmax(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::maxFloat32x4(FloatRegister rhs, FloatRegister lhsDest) { + Fmax(Simd4S(lhsDest), Simd4S(lhsDest), Simd4S(rhs)); +} + +void MacroAssembler::maxFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmax(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +void MacroAssembler::maxFloat64x2(FloatRegister rhs, FloatRegister lhsDest) { + Fmax(Simd2D(lhsDest), Simd2D(lhsDest), Simd2D(rhs)); +} + +// Floating add + +void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fadd(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::addFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fadd(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +// Floating subtract + +void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fsub(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::subFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fsub(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +// Floating division + +void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fdiv(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::divFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fdiv(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +// Floating Multiply + +void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmul(Simd4S(dest), Simd4S(lhs), Simd4S(rhs)); +} + +void MacroAssembler::mulFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmul(Simd2D(dest), Simd2D(lhs), Simd2D(rhs)); +} + +// Pairwise add + +void MacroAssembler::extAddPairwiseInt8x16(FloatRegister src, + FloatRegister dest) { + Saddlp(Simd8H(dest), Simd16B(src)); +} + +void MacroAssembler::unsignedExtAddPairwiseInt8x16(FloatRegister src, + FloatRegister dest) { + Uaddlp(Simd8H(dest), Simd16B(src)); +} + +void MacroAssembler::extAddPairwiseInt16x8(FloatRegister src, + FloatRegister dest) { + Saddlp(Simd4S(dest), Simd8H(src)); +} + +void MacroAssembler::unsignedExtAddPairwiseInt16x8(FloatRegister src, + FloatRegister dest) { + Uaddlp(Simd4S(dest), Simd8H(src)); +} + +// Floating square root + +void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) { + Fsqrt(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::sqrtFloat64x2(FloatRegister src, FloatRegister dest) { + Fsqrt(Simd2D(dest), Simd2D(src)); +} + +// Integer to floating point with rounding + +void MacroAssembler::convertInt32x4ToFloat32x4(FloatRegister src, + FloatRegister dest) { + Scvtf(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::unsignedConvertInt32x4ToFloat32x4(FloatRegister src, + FloatRegister dest) { + Ucvtf(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::convertInt32x4ToFloat64x2(FloatRegister src, + FloatRegister dest) { + Sshll(Simd2D(dest), Simd2S(src), 0); + Scvtf(Simd2D(dest), Simd2D(dest)); +} + +void MacroAssembler::unsignedConvertInt32x4ToFloat64x2(FloatRegister src, + FloatRegister dest) { + Ushll(Simd2D(dest), Simd2S(src), 0); + Ucvtf(Simd2D(dest), Simd2D(dest)); +} + +// Floating point to integer with saturation + +void MacroAssembler::truncSatFloat32x4ToInt32x4(FloatRegister src, + FloatRegister dest) { + Fcvtzs(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, + FloatRegister dest) { + Fcvtzu(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::truncSatFloat64x2ToInt32x4(FloatRegister src, + FloatRegister dest, + FloatRegister temp) { + Fcvtzs(Simd2D(dest), Simd2D(src)); + Sqxtn(Simd2S(dest), Simd2D(dest)); +} + +void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src, + FloatRegister dest, + FloatRegister temp) { + Fcvtzu(Simd2D(dest), Simd2D(src)); + Uqxtn(Simd2S(dest), Simd2D(dest)); +} + +void MacroAssembler::truncFloat32x4ToInt32x4Relaxed(FloatRegister src, + FloatRegister dest) { + Fcvtzs(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::unsignedTruncFloat32x4ToInt32x4Relaxed( + FloatRegister src, FloatRegister dest) { + Fcvtzu(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::truncFloat64x2ToInt32x4Relaxed(FloatRegister src, + FloatRegister dest) { + Fcvtzs(Simd2D(dest), Simd2D(src)); + Sqxtn(Simd2S(dest), Simd2D(dest)); +} + +void MacroAssembler::unsignedTruncFloat64x2ToInt32x4Relaxed( + FloatRegister src, FloatRegister dest) { + Fcvtzu(Simd2D(dest), Simd2D(src)); + Uqxtn(Simd2S(dest), Simd2D(dest)); +} + +// Floating point narrowing + +void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src, + FloatRegister dest) { + Fcvtn(Simd2S(dest), Simd2D(src)); +} + +// Floating point widening + +void MacroAssembler::convertFloat32x4ToFloat64x2(FloatRegister src, + FloatRegister dest) { + Fcvtl(Simd2D(dest), Simd2S(src)); +} + +// Integer to integer narrowing + +void MacroAssembler::narrowInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (rhs == dest) { + Mov(scratch, SimdReg(rhs)); + rhs = scratch; + } + Sqxtn(Simd8B(dest), Simd8H(lhs)); + Sqxtn2(Simd16B(dest), Simd8H(rhs)); +} + +void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (rhs == dest) { + Mov(scratch, SimdReg(rhs)); + rhs = scratch; + } + Sqxtun(Simd8B(dest), Simd8H(lhs)); + Sqxtun2(Simd16B(dest), Simd8H(rhs)); +} + +void MacroAssembler::narrowInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (rhs == dest) { + Mov(scratch, SimdReg(rhs)); + rhs = scratch; + } + Sqxtn(Simd4H(dest), Simd4S(lhs)); + Sqxtn2(Simd8H(dest), Simd4S(rhs)); +} + +void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (rhs == dest) { + Mov(scratch, SimdReg(rhs)); + rhs = scratch; + } + Sqxtun(Simd4H(dest), Simd4S(lhs)); + Sqxtun2(Simd8H(dest), Simd4S(rhs)); +} + +// Integer to integer widening + +void MacroAssembler::widenLowInt8x16(FloatRegister src, FloatRegister dest) { + Sshll(Simd8H(dest), Simd8B(src), 0); +} + +void MacroAssembler::widenHighInt8x16(FloatRegister src, FloatRegister dest) { + Sshll2(Simd8H(dest), Simd16B(src), 0); +} + +void MacroAssembler::unsignedWidenLowInt8x16(FloatRegister src, + FloatRegister dest) { + Ushll(Simd8H(dest), Simd8B(src), 0); +} + +void MacroAssembler::unsignedWidenHighInt8x16(FloatRegister src, + FloatRegister dest) { + Ushll2(Simd8H(dest), Simd16B(src), 0); +} + +void MacroAssembler::widenLowInt16x8(FloatRegister src, FloatRegister dest) { + Sshll(Simd4S(dest), Simd4H(src), 0); +} + +void MacroAssembler::widenHighInt16x8(FloatRegister src, FloatRegister dest) { + Sshll2(Simd4S(dest), Simd8H(src), 0); +} + +void MacroAssembler::unsignedWidenLowInt16x8(FloatRegister src, + FloatRegister dest) { + Ushll(Simd4S(dest), Simd4H(src), 0); +} + +void MacroAssembler::unsignedWidenHighInt16x8(FloatRegister src, + FloatRegister dest) { + Ushll2(Simd4S(dest), Simd8H(src), 0); +} + +void MacroAssembler::widenLowInt32x4(FloatRegister src, FloatRegister dest) { + Sshll(Simd2D(dest), Simd2S(src), 0); +} + +void MacroAssembler::unsignedWidenLowInt32x4(FloatRegister src, + FloatRegister dest) { + Ushll(Simd2D(dest), Simd2S(src), 0); +} + +void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) { + Sshll2(Simd2D(dest), Simd4S(src), 0); +} + +void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src, + FloatRegister dest) { + Ushll2(Simd2D(dest), Simd4S(src), 0); +} + +// Compare-based minimum/maximum (experimental as of August, 2020) +// https://github.com/WebAssembly/simd/pull/122 + +void MacroAssembler::pseudoMinFloat32x4(FloatRegister rhsOrRhsDest, + FloatRegister lhsOrLhsDest) { + // Shut up the linter by using the same names as in the declaration, then + // aliasing here. + FloatRegister rhs = rhsOrRhsDest; + FloatRegister lhsDest = lhsOrLhsDest; + ScratchSimd128Scope scratch(*this); + Fcmgt(Simd4S(scratch), Simd4S(lhsDest), Simd4S(rhs)); + Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest)); + Mov(SimdReg(lhsDest), scratch); +} + +void MacroAssembler::pseudoMinFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + Fcmgt(Simd4S(scratch), Simd4S(lhs), Simd4S(rhs)); + Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs)); + Mov(SimdReg(dest), scratch); +} + +void MacroAssembler::pseudoMinFloat64x2(FloatRegister rhsOrRhsDest, + FloatRegister lhsOrLhsDest) { + FloatRegister rhs = rhsOrRhsDest; + FloatRegister lhsDest = lhsOrLhsDest; + ScratchSimd128Scope scratch(*this); + Fcmgt(Simd2D(scratch), Simd2D(lhsDest), Simd2D(rhs)); + Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest)); + Mov(SimdReg(lhsDest), scratch); +} + +void MacroAssembler::pseudoMinFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + Fcmgt(Simd2D(scratch), Simd2D(lhs), Simd2D(rhs)); + Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs)); + Mov(SimdReg(dest), scratch); +} + +void MacroAssembler::pseudoMaxFloat32x4(FloatRegister rhsOrRhsDest, + FloatRegister lhsOrLhsDest) { + FloatRegister rhs = rhsOrRhsDest; + FloatRegister lhsDest = lhsOrLhsDest; + ScratchSimd128Scope scratch(*this); + Fcmgt(Simd4S(scratch), Simd4S(rhs), Simd4S(lhsDest)); + Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest)); + Mov(SimdReg(lhsDest), scratch); +} + +void MacroAssembler::pseudoMaxFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + Fcmgt(Simd4S(scratch), Simd4S(rhs), Simd4S(lhs)); + Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs)); + Mov(SimdReg(dest), scratch); +} + +void MacroAssembler::pseudoMaxFloat64x2(FloatRegister rhsOrRhsDest, + FloatRegister lhsOrLhsDest) { + FloatRegister rhs = rhsOrRhsDest; + FloatRegister lhsDest = lhsOrLhsDest; + ScratchSimd128Scope scratch(*this); + Fcmgt(Simd2D(scratch), Simd2D(rhs), Simd2D(lhsDest)); + Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhsDest)); + Mov(SimdReg(lhsDest), scratch); +} + +void MacroAssembler::pseudoMaxFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + Fcmgt(Simd2D(scratch), Simd2D(rhs), Simd2D(lhs)); + Bsl(Simd16B(scratch), Simd16B(rhs), Simd16B(lhs)); + Mov(SimdReg(dest), scratch); +} + +// Widening/pairwise integer dot product (experimental as of August, 2020) +// https://github.com/WebAssembly/simd/pull/127 + +void MacroAssembler::widenDotInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + Smull(Simd4S(scratch), Simd4H(lhs), Simd4H(rhs)); + Smull2(Simd4S(dest), Simd8H(lhs), Simd8H(rhs)); + Addp(Simd4S(dest), Simd4S(scratch), Simd4S(dest)); +} + +void MacroAssembler::dotInt8x16Int7x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + Smull(Simd8H(scratch), Simd8B(lhs), Simd8B(rhs)); + Smull2(Simd8H(dest), Simd16B(lhs), Simd16B(rhs)); + Addp(Simd8H(dest), Simd8H(scratch), Simd8H(dest)); +} + +void MacroAssembler::dotInt8x16Int7x16ThenAdd(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest, + FloatRegister temp) { + ScratchSimd128Scope scratch(*this); + Smull(Simd8H(scratch), Simd8B(lhs), Simd8B(rhs)); + Smull2(Simd8H(temp), Simd16B(lhs), Simd16B(rhs)); + Addp(Simd8H(temp), Simd8H(scratch), Simd8H(temp)); + Sadalp(Simd4S(dest), Simd8H(temp)); +} + +// Floating point rounding (experimental as of August, 2020) +// https://github.com/WebAssembly/simd/pull/232 + +void MacroAssembler::ceilFloat32x4(FloatRegister src, FloatRegister dest) { + Frintp(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::ceilFloat64x2(FloatRegister src, FloatRegister dest) { + Frintp(Simd2D(dest), Simd2D(src)); +} + +void MacroAssembler::floorFloat32x4(FloatRegister src, FloatRegister dest) { + Frintm(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::floorFloat64x2(FloatRegister src, FloatRegister dest) { + Frintm(Simd2D(dest), Simd2D(src)); +} + +void MacroAssembler::truncFloat32x4(FloatRegister src, FloatRegister dest) { + Frintz(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::truncFloat64x2(FloatRegister src, FloatRegister dest) { + Frintz(Simd2D(dest), Simd2D(src)); +} + +void MacroAssembler::nearestFloat32x4(FloatRegister src, FloatRegister dest) { + Frintn(Simd4S(dest), Simd4S(src)); +} + +void MacroAssembler::nearestFloat64x2(FloatRegister src, FloatRegister dest) { + Frintn(Simd2D(dest), Simd2D(src)); +} + +// Floating multiply-accumulate: srcDest [+-]= src1 * src2 + +void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + Fmla(Simd4S(srcDest), Simd4S(src1), Simd4S(src2)); +} + +void MacroAssembler::fnmaFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + Fmls(Simd4S(srcDest), Simd4S(src1), Simd4S(src2)); +} + +void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + Fmla(Simd2D(srcDest), Simd2D(src1), Simd2D(src2)); +} + +void MacroAssembler::fnmaFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + Fmls(Simd2D(srcDest), Simd2D(src1), Simd2D(src2)); +} + +void MacroAssembler::minFloat32x4Relaxed(FloatRegister src, + FloatRegister srcDest) { + Fmin(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest)); +} + +void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmin(Simd4S(dest), Simd4S(rhs), Simd4S(lhs)); +} + +void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src, + FloatRegister srcDest) { + Fmax(Simd4S(srcDest), Simd4S(src), Simd4S(srcDest)); +} + +void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmax(Simd4S(dest), Simd4S(rhs), Simd4S(lhs)); +} + +void MacroAssembler::minFloat64x2Relaxed(FloatRegister src, + FloatRegister srcDest) { + Fmin(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest)); +} + +void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmin(Simd2D(dest), Simd2D(rhs), Simd2D(lhs)); +} + +void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src, + FloatRegister srcDest) { + Fmax(Simd2D(srcDest), Simd2D(src), Simd2D(srcDest)); +} + +void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + Fmax(Simd2D(dest), Simd2D(rhs), Simd2D(lhs)); +} + +//}}} check_macroassembler_style +// =============================================================== + +void MacroAssemblerCompat::addToStackPtr(Register src) { + Add(GetStackPointer64(), GetStackPointer64(), ARMRegister(src, 64)); + // Given that required invariant SP <= PSP, this is probably pointless, + // since it gives PSP a larger value. + syncStackPtr(); +} + +void MacroAssemblerCompat::addToStackPtr(Imm32 imm) { + Add(GetStackPointer64(), GetStackPointer64(), Operand(imm.value)); + // As above, probably pointless. + syncStackPtr(); +} + +void MacroAssemblerCompat::addToStackPtr(const Address& src) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + Ldr(scratch, toMemOperand(src)); + Add(GetStackPointer64(), GetStackPointer64(), scratch); + // As above, probably pointless. + syncStackPtr(); +} + +void MacroAssemblerCompat::addStackPtrTo(Register dest) { + Add(ARMRegister(dest, 64), ARMRegister(dest, 64), GetStackPointer64()); +} + +void MacroAssemblerCompat::subFromStackPtr(Register src) { + Sub(GetStackPointer64(), GetStackPointer64(), ARMRegister(src, 64)); + syncStackPtr(); +} + +void MacroAssemblerCompat::subFromStackPtr(Imm32 imm) { + Sub(GetStackPointer64(), GetStackPointer64(), Operand(imm.value)); + syncStackPtr(); +} + +void MacroAssemblerCompat::subStackPtrFrom(Register dest) { + Sub(ARMRegister(dest, 64), ARMRegister(dest, 64), GetStackPointer64()); +} + +void MacroAssemblerCompat::andToStackPtr(Imm32 imm) { + if (sp.Is(GetStackPointer64())) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + Mov(scratch, sp); + And(sp, scratch, Operand(imm.value)); + // syncStackPtr() not needed since our SP is the real SP. + } else { + And(GetStackPointer64(), GetStackPointer64(), Operand(imm.value)); + syncStackPtr(); + } +} + +void MacroAssemblerCompat::moveToStackPtr(Register src) { + Mov(GetStackPointer64(), ARMRegister(src, 64)); + syncStackPtr(); +} + +void MacroAssemblerCompat::moveStackPtrTo(Register dest) { + Mov(ARMRegister(dest, 64), GetStackPointer64()); +} + +void MacroAssemblerCompat::loadStackPtr(const Address& src) { + if (sp.Is(GetStackPointer64())) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + Ldr(scratch, toMemOperand(src)); + Mov(sp, scratch); + // syncStackPtr() not needed since our SP is the real SP. + } else { + Ldr(GetStackPointer64(), toMemOperand(src)); + syncStackPtr(); + } +} + +void MacroAssemblerCompat::storeStackPtr(const Address& dest) { + if (sp.Is(GetStackPointer64())) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + Mov(scratch, sp); + Str(scratch, toMemOperand(dest)); + } else { + Str(GetStackPointer64(), toMemOperand(dest)); + } +} + +void MacroAssemblerCompat::branchTestStackPtr(Condition cond, Imm32 rhs, + Label* label) { + if (sp.Is(GetStackPointer64())) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + Mov(scratch, sp); + Tst(scratch, Operand(rhs.value)); + } else { + Tst(GetStackPointer64(), Operand(rhs.value)); + } + B(label, cond); +} + +void MacroAssemblerCompat::branchStackPtr(Condition cond, Register rhs_, + Label* label) { + ARMRegister rhs(rhs_, 64); + if (sp.Is(GetStackPointer64())) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + Mov(scratch, sp); + Cmp(scratch, rhs); + } else { + Cmp(GetStackPointer64(), rhs); + } + B(label, cond); +} + +void MacroAssemblerCompat::branchStackPtrRhs(Condition cond, Address lhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + Ldr(scratch, toMemOperand(lhs)); + // Cmp disallows SP as the rhs, so flip the operands and invert the + // condition. + Cmp(GetStackPointer64(), scratch); + B(label, Assembler::InvertCondition(cond)); +} + +void MacroAssemblerCompat::branchStackPtrRhs(Condition cond, + AbsoluteAddress lhs, + Label* label) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + loadPtr(lhs, scratch.asUnsized()); + // Cmp disallows SP as the rhs, so flip the operands and invert the + // condition. + Cmp(GetStackPointer64(), scratch); + B(label, Assembler::InvertCondition(cond)); +} + +// If source is a double, load into dest. +// If source is int32, convert to double and store in dest. +// Else, branch to failure. +void MacroAssemblerCompat::ensureDouble(const ValueOperand& source, + FloatRegister dest, Label* failure) { + Label isDouble, done; + + { + ScratchTagScope tag(asMasm(), source); + splitTagForTest(source, tag); + asMasm().branchTestDouble(Assembler::Equal, tag, &isDouble); + asMasm().branchTestInt32(Assembler::NotEqual, tag, failure); + } + + convertInt32ToDouble(source.valueReg(), dest); + jump(&done); + + bind(&isDouble); + unboxDouble(source, dest); + + bind(&done); +} + +void MacroAssemblerCompat::unboxValue(const ValueOperand& src, AnyRegister dest, + JSValueType type) { + if (dest.isFloat()) { + Label notInt32, end; + asMasm().branchTestInt32(Assembler::NotEqual, src, ¬Int32); + convertInt32ToDouble(src.valueReg(), dest.fpu()); + jump(&end); + bind(¬Int32); + unboxDouble(src, dest.fpu()); + bind(&end); + } else { + unboxNonDouble(src, dest.gpr(), type); + } +} + +} // namespace jit +} // namespace js + +#endif /* jit_arm64_MacroAssembler_arm64_inl_h */ diff --git a/js/src/jit/arm64/MacroAssembler-arm64.cpp b/js/src/jit/arm64/MacroAssembler-arm64.cpp new file mode 100644 index 0000000000..a4aff730e6 --- /dev/null +++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp @@ -0,0 +1,3416 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/arm64/MacroAssembler-arm64.h" + +#include "mozilla/MathAlgorithms.h" +#include "mozilla/Maybe.h" + +#include "jsmath.h" + +#include "jit/arm64/MoveEmitter-arm64.h" +#include "jit/arm64/SharedICRegisters-arm64.h" +#include "jit/Bailouts.h" +#include "jit/BaselineFrame.h" +#include "jit/JitRuntime.h" +#include "jit/MacroAssembler.h" +#include "util/Memory.h" +#include "vm/BigIntType.h" +#include "vm/JitActivation.h" // js::jit::JitActivation +#include "vm/JSContext.h" +#include "vm/StringType.h" + +#include "jit/MacroAssembler-inl.h" + +namespace js { +namespace jit { + +enum class Width { _32 = 32, _64 = 64 }; + +static inline ARMRegister X(Register r) { return ARMRegister(r, 64); } + +static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) { + return masm.toARMRegister(r, 64); +} + +static inline ARMRegister W(Register r) { return ARMRegister(r, 32); } + +static inline ARMRegister R(Register r, Width w) { + return ARMRegister(r, unsigned(w)); +} + +void MacroAssemblerCompat::boxValue(JSValueType type, Register src, + Register dest) { +#ifdef DEBUG + if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) { + Label upper32BitsZeroed; + movePtr(ImmWord(UINT32_MAX), dest); + asMasm().branchPtr(Assembler::BelowOrEqual, src, dest, &upper32BitsZeroed); + breakpoint(); + bind(&upper32BitsZeroed); + } +#endif + Orr(ARMRegister(dest, 64), ARMRegister(src, 64), + Operand(ImmShiftedTag(type).value)); +} + +#ifdef ENABLE_WASM_SIMD +bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { + switch (op) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I8x16ShrS: + *mask = 7; + break; + case wasm::SimdOp::I16x8Shl: + case wasm::SimdOp::I16x8ShrU: + case wasm::SimdOp::I16x8ShrS: + *mask = 15; + break; + case wasm::SimdOp::I32x4Shl: + case wasm::SimdOp::I32x4ShrU: + case wasm::SimdOp::I32x4ShrS: + *mask = 31; + break; + case wasm::SimdOp::I64x2Shl: + case wasm::SimdOp::I64x2ShrU: + case wasm::SimdOp::I64x2ShrS: + *mask = 63; + break; + default: + MOZ_CRASH("Unexpected shift operation"); + } + return true; +} +#endif + +void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { + ARMRegister dest(output, 32); + Fcvtns(dest, ARMFPRegister(input, 64)); + + { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + + Mov(scratch32, Operand(0xff)); + Cmp(dest, scratch32); + Csel(dest, dest, scratch32, LessThan); + } + + Cmp(dest, Operand(0)); + Csel(dest, dest, wzr, GreaterThan); +} + +js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() { + return *static_cast<js::jit::MacroAssembler*>(this); +} + +const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const { + return *static_cast<const js::jit::MacroAssembler*>(this); +} + +vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() { + return *static_cast<vixl::MacroAssembler*>(this); +} + +const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const { + return *static_cast<const vixl::MacroAssembler*>(this); +} + +void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) { + BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest); + label->patchAt()->bind(bo.getOffset()); + label->setLinkMode(CodeLabel::MoveImmediate); +} + +BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) { + const size_t numInst = 1; // Inserting one load instruction. + const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes. + uint8_t* literalAddr = (uint8_t*)(&ptr.value); // TODO: Should be const. + + // Scratch space for generating the load instruction. + // + // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary + // index to the corresponding PoolEntry in the instruction itself. + // + // That index will be fixed up later when finishPool() + // walks over all marked loads and calls PatchConstantPoolLoad(). + uint32_t instructionScratch = 0; + + // Emit the instruction mask in the scratch space. + // The offset doesn't matter: it will be fixed up later. + vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64), + 0); + + // Add the entry to the pool, fix up the LDR imm19 offset, + // and add the completed instruction to the buffer. + return allocLiteralLoadEntry(numInst, numPoolEntries, + (uint8_t*)&instructionScratch, literalAddr); +} + +BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr, + Register dest) { + const size_t numInst = 1; // Inserting one load instruction. + const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes. + uint8_t* literalAddr = (uint8_t*)(&ptr.value); + + // Scratch space for generating the load instruction. + // + // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary + // index to the corresponding PoolEntry in the instruction itself. + // + // That index will be fixed up later when finishPool() + // walks over all marked loads and calls PatchConstantPoolLoad(). + uint32_t instructionScratch = 0; + + // Emit the instruction mask in the scratch space. + // The offset doesn't matter: it will be fixed up later. + vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64), + 0); + + // Add the entry to the pool, fix up the LDR imm19 offset, + // and add the completed instruction to the buffer. + return allocLiteralLoadEntry(numInst, numPoolEntries, + (uint8_t*)&instructionScratch, literalAddr); +} + +void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) { + loadPtr(src, dest); +} + +void MacroAssemblerCompat::handleFailureWithHandlerTail(Label* profilerExitTail, + Label* bailoutTail) { + // Fail rather than silently create wrong code. + MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); + + // Reserve space for exception information. + int64_t size = (sizeof(ResumeFromException) + 7) & ~7; + Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size)); + syncStackPtr(); + + MOZ_ASSERT(!x0.Is(PseudoStackPointer64)); + Mov(x0, PseudoStackPointer64); + + // Call the handler. + using Fn = void (*)(ResumeFromException* rfe); + asMasm().setupUnalignedABICall(r1); + asMasm().passABIArg(r0); + asMasm().callWithABI<Fn, HandleException>( + MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame); + + Label entryFrame; + Label catch_; + Label finally; + Label returnBaseline; + Label returnIon; + Label bailout; + Label wasm; + Label wasmCatch; + + // Check the `asMasm` calls above didn't mess with the StackPointer identity. + MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); + + loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfKind()), r0); + asMasm().branch32(Assembler::Equal, r0, + Imm32(ExceptionResumeKind::EntryFrame), &entryFrame); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch), + &catch_); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally), + &finally); + asMasm().branch32(Assembler::Equal, r0, + Imm32(ExceptionResumeKind::ForcedReturnBaseline), + &returnBaseline); + asMasm().branch32(Assembler::Equal, r0, + Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout), + &bailout); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Wasm), + &wasm); + asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch), + &wasmCatch); + + breakpoint(); // Invalid kind. + + // No exception handler. Load the error value, restore state and return from + // the entry frame. + bind(&entryFrame); + moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), + FramePointer); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), + PseudoStackPointer); + + // `retn` does indeed sync the stack pointer, but before doing that it reads + // from the stack. Consequently, if we remove this call to syncStackPointer + // then we take on the requirement to prove that the immediately preceding + // loadPtr produces a value for PSP which maintains the SP <= PSP invariant. + // That's a proof burden we don't want to take on. In general it would be + // good to move (at some time in the future, not now) to a world where + // *every* assignment to PSP or SP is followed immediately by a copy into + // the other register. That would make all required correctness proofs + // trivial in the sense that it requires only local inspection of code + // immediately following (dominated by) any such assignment. + syncStackPtr(); + retn(Imm32(1 * sizeof(void*))); // Pop from stack and return. + + // If we found a catch handler, this must be a baseline frame. Restore state + // and jump to the catch block. + bind(&catch_); + loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()), + r0); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), + FramePointer); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), + PseudoStackPointer); + syncStackPtr(); + Br(x0); + + // If we found a finally block, this must be a baseline frame. Push two + // values expected by the finally block: the exception and BooleanValue(true). + bind(&finally); + ARMRegister exception = x1; + Ldr(exception, MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfException())); + Ldr(x0, + MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfTarget())); + Ldr(ARMRegister(FramePointer, 64), + MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfFramePointer())); + Ldr(PseudoStackPointer64, + MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfStackPointer())); + syncStackPtr(); + push(exception); + pushValue(BooleanValue(true)); + Br(x0); + + // Return BaselineFrame->returnValue() to the caller. + // Used in debug mode and for GeneratorReturn. + Label profilingInstrumentation; + bind(&returnBaseline); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), + FramePointer); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), + PseudoStackPointer); + // See comment further up beginning "`retn` does indeed sync the stack + // pointer". That comment applies here too. + syncStackPtr(); + loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()), + JSReturnOperand); + jump(&profilingInstrumentation); + + // Return the given value to the caller. + bind(&returnIon); + loadValue( + Address(PseudoStackPointer, ResumeFromException::offsetOfException()), + JSReturnOperand); + loadPtr( + Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)), + FramePointer); + loadPtr( + Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)), + PseudoStackPointer); + syncStackPtr(); + + // If profiling is enabled, then update the lastProfilingFrame to refer to + // caller frame before returning. This code is shared by ForcedReturnIon + // and ForcedReturnBaseline. + bind(&profilingInstrumentation); + { + Label skipProfilingInstrumentation; + AbsoluteAddress addressOfEnabled( + asMasm().runtime()->geckoProfiler().addressOfEnabled()); + asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0), + &skipProfilingInstrumentation); + jump(profilerExitTail); + bind(&skipProfilingInstrumentation); + } + + movePtr(FramePointer, PseudoStackPointer); + syncStackPtr(); + vixl::MacroAssembler::Pop(ARMRegister(FramePointer, 64)); + + vixl::MacroAssembler::Pop(vixl::lr); + syncStackPtr(); + vixl::MacroAssembler::Ret(vixl::lr); + + // If we are bailing out to baseline to handle an exception, jump to the + // bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success. + bind(&bailout); + Ldr(x2, MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfBailoutInfo())); + Ldr(PseudoStackPointer64, + MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfStackPointer())); + syncStackPtr(); + Mov(x0, 1); + jump(bailoutTail); + + // If we are throwing and the innermost frame was a wasm frame, reset SP and + // FP; SP is pointing to the unwound return address to the wasm entry, so + // we can just ret(). + bind(&wasm); + Ldr(x29, MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfFramePointer())); + Ldr(PseudoStackPointer64, + MemOperand(PseudoStackPointer64, + ResumeFromException::offsetOfStackPointer())); + syncStackPtr(); + Mov(x23, int64_t(wasm::FailInstanceReg)); + ret(); + + // Found a wasm catch handler, restore state and jump to it. + bind(&wasmCatch); + loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()), + r0); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), + r29); + loadPtr( + Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), + PseudoStackPointer); + syncStackPtr(); + Br(x0); + + MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); +} + +void MacroAssemblerCompat::profilerEnterFrame(Register framePtr, + Register scratch) { + asMasm().loadJSContext(scratch); + loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch); + storePtr(framePtr, + Address(scratch, JitActivation::offsetOfLastProfilingFrame())); + storePtr(ImmPtr(nullptr), + Address(scratch, JitActivation::offsetOfLastProfilingCallSite())); +} + +void MacroAssemblerCompat::profilerExitFrame() { + jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail()); +} + +Assembler::Condition MacroAssemblerCompat::testStringTruthy( + bool truthy, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + const ARMRegister scratch32(scratch, 32); + const ARMRegister scratch64(scratch, 64); + + MOZ_ASSERT(value.valueReg() != scratch); + + unboxString(value, scratch); + Ldr(scratch32, MemOperand(scratch64, JSString::offsetOfLength())); + Cmp(scratch32, Operand(0)); + return truthy ? Condition::NonZero : Condition::Zero; +} + +Assembler::Condition MacroAssemblerCompat::testBigIntTruthy( + bool truthy, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + + MOZ_ASSERT(value.valueReg() != scratch); + + unboxBigInt(value, scratch); + load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch); + cmp32(scratch, Imm32(0)); + return truthy ? Condition::NonZero : Condition::Zero; +} + +void MacroAssemblerCompat::breakpoint() { + // Note, other payloads are possible, but GDB is known to misinterpret them + // sometimes and iloop on the breakpoint instead of stopping properly. + Brk(0); +} + +// Either `any` is valid or `sixtyfour` is valid. Return a 32-bit ARMRegister +// in the first case and an ARMRegister of the desired size in the latter case. + +static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour, + unsigned size = 64) { + MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid())); + + if (sixtyfour == Register64::Invalid()) { + return ARMRegister(any.gpr(), 32); + } + + return ARMRegister(sixtyfour.reg, size); +} + +// Assert that `sixtyfour` is invalid and then return an FP register from `any` +// of the desired size. + +static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour, + unsigned size) { + MOZ_ASSERT(sixtyfour == Register64::Invalid()); + return ARMFPRegister(any.fpu(), size); +} + +void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access, + Register memoryBase_, Register ptr_, + AnyRegister outany, Register64 out64) { + uint32_t offset = access.offset(); + MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit()); + + ARMRegister memoryBase(memoryBase_, 64); + ARMRegister ptr(ptr_, 64); + if (offset) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireX(); + Add(scratch, ptr, Operand(offset)); + MemOperand srcAddr(memoryBase, scratch); + wasmLoadImpl(access, srcAddr, outany, out64); + } else { + MemOperand srcAddr(memoryBase, ptr); + wasmLoadImpl(access, srcAddr, outany, out64); + } +} + +void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access, + MemOperand srcAddr, AnyRegister outany, + Register64 out64) { + // Reg+Reg and Reg+SmallImm addressing is directly encodable in one Load + // instruction, hence we expect exactly one instruction to be emitted in the + // window. + int32_t instructionsExpected = 1; + + // Splat and widen however require an additional instruction to be emitted + // after the load, so allow one more instruction in the window. + if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) { + MOZ_ASSERT(access.type() == Scalar::Float64); + instructionsExpected++; + } + + // NOTE: the generated code must match the assembly code in gen_load in + // GenerateAtomicOperations.py + asMasm().memoryBarrierBefore(access.sync()); + + { + // The AutoForbidPoolsAndNops asserts if we emit more than the expected + // number of instructions and thus ensures that the access metadata is + // emitted at the address of the Load. + AutoForbidPoolsAndNops afp(this, instructionsExpected); + + append(access, asMasm().currentOffset()); + switch (access.type()) { + case Scalar::Int8: + Ldrsb(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Uint8: + Ldrb(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Int16: + Ldrsh(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Uint16: + Ldrh(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Int32: + if (out64 != Register64::Invalid()) { + Ldrsw(SelectGPReg(outany, out64), srcAddr); + } else { + Ldr(SelectGPReg(outany, out64, 32), srcAddr); + } + break; + case Scalar::Uint32: + Ldr(SelectGPReg(outany, out64, 32), srcAddr); + break; + case Scalar::Int64: + Ldr(SelectGPReg(outany, out64), srcAddr); + break; + case Scalar::Float32: + // LDR does the right thing also for access.isZeroExtendSimd128Load() + Ldr(SelectFPReg(outany, out64, 32), srcAddr); + break; + case Scalar::Float64: + if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister scratch = Simd1D(scratch_); + Ldr(scratch, srcAddr); + if (access.isSplatSimd128Load()) { + Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0); + } else { + MOZ_ASSERT(access.isWidenSimd128Load()); + switch (access.widenSimdOp()) { + case wasm::SimdOp::V128Load8x8S: + Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0); + break; + case wasm::SimdOp::V128Load8x8U: + Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0); + break; + case wasm::SimdOp::V128Load16x4S: + Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0); + break; + case wasm::SimdOp::V128Load16x4U: + Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0); + break; + case wasm::SimdOp::V128Load32x2S: + Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0); + break; + case wasm::SimdOp::V128Load32x2U: + Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0); + break; + default: + MOZ_CRASH("Unexpected widening op for wasmLoad"); + } + } + } else { + // LDR does the right thing also for access.isZeroExtendSimd128Load() + Ldr(SelectFPReg(outany, out64, 64), srcAddr); + } + break; + case Scalar::Simd128: + Ldr(SelectFPReg(outany, out64, 128), srcAddr); + break; + case Scalar::Uint8Clamped: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + } + } + + asMasm().memoryBarrierAfter(access.sync()); +} + +// Return true if `address` can be represented as an immediate (possibly scaled +// by the access size) in an LDR/STR type instruction. +// +// For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro(). +static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) { + // The predicates below operate on signed values only. + if (address > INT64_MAX) { + return false; + } + + // The access size is always a power of 2, so computing the log amounts to + // counting trailing zeroes. + unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize); + return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) || + MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize)); +} + +void MacroAssemblerCompat::wasmLoadAbsolute( + const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address, + AnyRegister output, Register64 out64) { + if (!IsLSImmediateOffset(address, access.byteSize())) { + // The access will require the constant to be loaded into a temp register. + // Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting + // trap information. + // + // Almost all constant addresses will in practice be handled by a single MOV + // so do not worry about additional optimizations here. + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireX(); + Mov(scratch, address); + MemOperand srcAddr(X(memoryBase), scratch); + wasmLoadImpl(access, srcAddr, output, out64); + } else { + MemOperand srcAddr(X(memoryBase), address); + wasmLoadImpl(access, srcAddr, output, out64); + } +} + +void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access, + AnyRegister valany, Register64 val64, + Register memoryBase_, Register ptr_) { + uint32_t offset = access.offset(); + MOZ_ASSERT(offset < asMasm().wasmMaxOffsetGuardLimit()); + + ARMRegister memoryBase(memoryBase_, 64); + ARMRegister ptr(ptr_, 64); + if (offset) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireX(); + Add(scratch, ptr, Operand(offset)); + MemOperand destAddr(memoryBase, scratch); + wasmStoreImpl(access, destAddr, valany, val64); + } else { + MemOperand destAddr(memoryBase, ptr); + wasmStoreImpl(access, destAddr, valany, val64); + } +} + +void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access, + MemOperand dstAddr, AnyRegister valany, + Register64 val64) { + // NOTE: the generated code must match the assembly code in gen_store in + // GenerateAtomicOperations.py + asMasm().memoryBarrierBefore(access.sync()); + + { + // Reg+Reg addressing is directly encodable in one Store instruction, hence + // the AutoForbidPoolsAndNops will ensure that the access metadata is + // emitted at the address of the Store. The AutoForbidPoolsAndNops will + // assert if we emit more than one instruction. + + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 1); + + append(access, asMasm().currentOffset()); + switch (access.type()) { + case Scalar::Int8: + case Scalar::Uint8: + Strb(SelectGPReg(valany, val64), dstAddr); + break; + case Scalar::Int16: + case Scalar::Uint16: + Strh(SelectGPReg(valany, val64), dstAddr); + break; + case Scalar::Int32: + case Scalar::Uint32: + Str(SelectGPReg(valany, val64), dstAddr); + break; + case Scalar::Int64: + Str(SelectGPReg(valany, val64), dstAddr); + break; + case Scalar::Float32: + Str(SelectFPReg(valany, val64, 32), dstAddr); + break; + case Scalar::Float64: + Str(SelectFPReg(valany, val64, 64), dstAddr); + break; + case Scalar::Simd128: + Str(SelectFPReg(valany, val64, 128), dstAddr); + break; + case Scalar::Uint8Clamped: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + } + } + + asMasm().memoryBarrierAfter(access.sync()); +} + +void MacroAssemblerCompat::wasmStoreAbsolute( + const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64, + Register memoryBase, uint64_t address) { + // See comments in wasmLoadAbsolute. + unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize()); + if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) || + IsImmLSUnscaled(int64_t(address)))) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireX(); + Mov(scratch, address); + MemOperand destAddr(X(memoryBase), scratch); + wasmStoreImpl(access, destAddr, value, value64); + } else { + MemOperand destAddr(X(memoryBase), address); + wasmStoreImpl(access, destAddr, value, value64); + } +} + +void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond, + ARMFPRegister dest, + ARMFPRegister lhs, + ARMFPRegister rhs) { + switch (cond) { + case Assembler::Equal: + Cmeq(dest, lhs, rhs); + break; + case Assembler::NotEqual: + Cmeq(dest, lhs, rhs); + Mvn(dest, dest); + break; + case Assembler::GreaterThan: + Cmgt(dest, lhs, rhs); + break; + case Assembler::GreaterThanOrEqual: + Cmge(dest, lhs, rhs); + break; + case Assembler::LessThan: + Cmgt(dest, rhs, lhs); + break; + case Assembler::LessThanOrEqual: + Cmge(dest, rhs, lhs); + break; + case Assembler::Above: + Cmhi(dest, lhs, rhs); + break; + case Assembler::AboveOrEqual: + Cmhs(dest, lhs, rhs); + break; + case Assembler::Below: + Cmhi(dest, rhs, lhs); + break; + case Assembler::BelowOrEqual: + Cmhs(dest, rhs, lhs); + break; + default: + MOZ_CRASH("Unexpected SIMD integer condition"); + } +} + +void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond, + ARMFPRegister dest, + ARMFPRegister lhs, + ARMFPRegister rhs) { + switch (cond) { + case Assembler::Equal: + Fcmeq(dest, lhs, rhs); + break; + case Assembler::NotEqual: + Fcmeq(dest, lhs, rhs); + Mvn(dest, dest); + break; + case Assembler::GreaterThan: + Fcmgt(dest, lhs, rhs); + break; + case Assembler::GreaterThanOrEqual: + Fcmge(dest, lhs, rhs); + break; + case Assembler::LessThan: + Fcmgt(dest, rhs, lhs); + break; + case Assembler::LessThanOrEqual: + Fcmge(dest, rhs, lhs); + break; + default: + MOZ_CRASH("Unexpected SIMD integer condition"); + } +} + +void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs, + FloatRegister dest, + bool isUnsigned) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister shift = Simd16B(scratch_); + + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); + + if (isUnsigned) { + Ushl(Simd16B(dest), Simd16B(lhs), shift); + } else { + Sshl(Simd16B(dest), Simd16B(lhs), shift); + } +} + +void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs, + FloatRegister dest, + bool isUnsigned) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister shift = Simd8H(scratch_); + + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); + + if (isUnsigned) { + Ushl(Simd8H(dest), Simd8H(lhs), shift); + } else { + Sshl(Simd8H(dest), Simd8H(lhs), shift); + } +} + +void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs, + FloatRegister dest, + bool isUnsigned) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister shift = Simd4S(scratch_); + + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); + + if (isUnsigned) { + Ushl(Simd4S(dest), Simd4S(lhs), shift); + } else { + Sshl(Simd4S(dest), Simd4S(lhs), shift); + } +} + +void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs, + FloatRegister dest, + bool isUnsigned) { + ScratchSimd128Scope scratch_(asMasm()); + ARMFPRegister shift = Simd2D(scratch_); + + Dup(shift, ARMRegister(rhs, 64)); + Neg(shift, shift); + + if (isUnsigned) { + Ushl(Simd2D(dest), Simd2D(lhs), shift); + } else { + Sshl(Simd2D(dest), Simd2D(lhs), shift); + } +} + +void MacroAssembler::reserveStack(uint32_t amount) { + // TODO: This bumps |sp| every time we reserve using a second register. + // It would save some instructions if we had a fixed frame size. + vixl::MacroAssembler::Claim(Operand(amount)); + adjustFrame(amount); +} + +void MacroAssembler::Push(RegisterOrSP reg) { + if (IsHiddenSP(reg)) { + push(sp); + } else { + push(AsRegister(reg)); + } + adjustFrame(sizeof(intptr_t)); +} + +//{{{ check_macroassembler_style +// =============================================================== +// MacroAssembler high-level usage. + +void MacroAssembler::flush() { Assembler::flush(); } + +// =============================================================== +// Stack manipulation functions. + +// Routines for saving/restoring registers on the stack. The format is: +// +// (highest address) +// +// integer (X) regs in any order size: 8 * # int regs +// +// if # int regs is odd, +// then an 8 byte alignment hole size: 0 or 8 +// +// double (D) regs in any order size: 8 * # double regs +// +// if # double regs is odd, +// then an 8 byte alignment hole size: 0 or 8 +// +// vector (Q) regs in any order size: 16 * # vector regs +// +// (lowest address) +// +// Hence the size of the save area is 0 % 16. And, provided that the base +// (highest) address is 16-aligned, then the vector reg save/restore accesses +// will also be 16-aligned, as will pairwise operations for the double regs. +// +// Implied by this is that the format of the double and vector dump area +// corresponds with what FloatRegister::GetPushSizeInBytes computes. +// See block comment in MacroAssembler.h for more details. + +size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) { + size_t numIntRegs = set.gprs().size(); + return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) + + FloatRegister::GetPushSizeInBytes(set.fpus()); +} + +// Generate code to dump the values in `set`, either on the stack if `dest` is +// `Nothing` or working backwards from the address denoted by `dest` if it is +// `Some`. These two cases are combined so as to minimise the chance of +// mistakenly generating different formats for the same `set`, given that the +// `Some` `dest` case is used extremely rarely. +static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set, + mozilla::Maybe<Address> dest) { + static_assert(sizeof(FloatRegisters::RegisterContent) == 16); + + // If we're saving to arbitrary memory, check the destination is big enough. + if (dest) { + mozilla::DebugOnly<size_t> bytesRequired = + masm->PushRegsInMaskSizeInBytes(set); + MOZ_ASSERT(dest->offset >= 0); + MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired); + } + + // Note the high limit point; we'll check it again later. + mozilla::DebugOnly<size_t> maxExtentInitial = + dest ? dest->offset : masm->framePushed(); + + // Gather up the integer registers in groups of four, and either push each + // group as a single transfer so as to minimise the number of stack pointer + // changes, or write them individually to memory. Take care to ensure the + // space used remains 16-aligned. + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) { + vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg, + vixl::NoCPUReg}; + size_t i; + for (i = 0; i < 4 && iter.more(); i++) { + src[i] = ARMRegister(*iter, 64); + ++iter; + } + MOZ_ASSERT(i > 0); + + if (i == 1 || i == 3) { + // Ensure the stack remains 16-aligned + MOZ_ASSERT(!iter.more()); + src[i] = vixl::xzr; + i++; + } + MOZ_ASSERT(i == 2 || i == 4); + + if (dest) { + for (size_t j = 0; j < i; j++) { + Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr + : src[j].code()); + dest->offset -= sizeof(intptr_t); + masm->storePtr(ireg, *dest); + } + } else { + masm->adjustFrame(i * 8); + masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); + } + } + + // Now the same for the FP double registers. Note that because of how + // ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either + // be present as a double register, or as a V128 register, but not both. + // Firstly, round up the registers to be pushed. + + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + vixl::CPURegister allSrcs[FloatRegisters::TotalPhys]; + size_t numAllSrcs = 0; + + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + if (reg.isDouble()) { + MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys); + allSrcs[numAllSrcs] = ARMFPRegister(reg, 64); + numAllSrcs++; + } else { + MOZ_ASSERT(reg.isSimd128()); + } + } + MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); + + if ((numAllSrcs & 1) == 1) { + // We've got an odd number of doubles. In order to maintain 16-alignment, + // push the last register twice. We'll skip over the duplicate in + // PopRegsInMaskIgnore. + allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1]; + numAllSrcs++; + } + MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); + MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0); + + // And now generate the transfers. + size_t i; + if (dest) { + for (i = 0; i < numAllSrcs; i++) { + FloatRegister freg = + FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()), + FloatRegisters::Kind::Double); + dest->offset -= sizeof(double); + masm->storeDouble(freg, *dest); + } + } else { + i = 0; + while (i < numAllSrcs) { + vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, + vixl::NoCPUReg, vixl::NoCPUReg}; + size_t j; + for (j = 0; j < 4 && j + i < numAllSrcs; j++) { + src[j] = allSrcs[j + i]; + } + masm->adjustFrame(8 * j); + masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); + i += j; + } + } + MOZ_ASSERT(i == numAllSrcs); + + // Finally, deal with the SIMD (V128) registers. This is a bit simpler + // as there's no need for special-casing to maintain 16-alignment. + + numAllSrcs = 0; + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + if (reg.isSimd128()) { + MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys); + allSrcs[numAllSrcs] = ARMFPRegister(reg, 128); + numAllSrcs++; + } + } + MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); + + // Generate the transfers. + if (dest) { + for (i = 0; i < numAllSrcs; i++) { + FloatRegister freg = + FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()), + FloatRegisters::Kind::Simd128); + dest->offset -= FloatRegister::SizeOfSimd128; + masm->storeUnalignedSimd128(freg, *dest); + } + } else { + i = 0; + while (i < numAllSrcs) { + vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, + vixl::NoCPUReg, vixl::NoCPUReg}; + size_t j; + for (j = 0; j < 4 && j + i < numAllSrcs; j++) { + src[j] = allSrcs[j + i]; + } + masm->adjustFrame(16 * j); + masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); + i += j; + } + } + MOZ_ASSERT(i == numAllSrcs); + + // Final overrun check. + if (dest) { + MOZ_ASSERT(maxExtentInitial - dest->offset == + masm->PushRegsInMaskSizeInBytes(set)); + } else { + MOZ_ASSERT(masm->framePushed() - maxExtentInitial == + masm->PushRegsInMaskSizeInBytes(set)); + } +} + +void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { + PushOrStoreRegsInMask(this, set, mozilla::Nothing()); +} + +void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, + Register scratch) { + PushOrStoreRegsInMask(this, set, mozilla::Some(dest)); +} + +// This is a helper function for PopRegsInMaskIgnore below. It emits the +// loads described by dests[0] and [1] and offsets[0] and [1], generating a +// load-pair if it can. +static void GeneratePendingLoadsThenFlush(MacroAssembler* masm, + vixl::CPURegister* dests, + uint32_t* offsets, + uint32_t transactionSize) { + // Generate the loads .. + if (!dests[0].IsNone()) { + if (!dests[1].IsNone()) { + // [0] and [1] both present. + if (offsets[0] + transactionSize == offsets[1]) { + masm->Ldp(dests[0], dests[1], + MemOperand(masm->GetStackPointer64(), offsets[0])); + } else { + // Theoretically we could check for a load-pair with the destinations + // switched, but our callers will never generate that. Hence there's + // no loss in giving up at this point and generating two loads. + masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0])); + masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1])); + } + } else { + // [0] only. + masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0])); + } + } else { + if (!dests[1].IsNone()) { + // [1] only. Can't happen because callers always fill [0] before [1]. + MOZ_CRASH("GenerateLoadsThenFlush"); + } else { + // Neither entry valid. This can happen. + } + } + + // .. and flush. + dests[0] = dests[1] = vixl::NoCPUReg; + offsets[0] = offsets[1] = 0; +} + +void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, + LiveRegisterSet ignore) { + mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); + + // The offset of the data from the stack pointer. + uint32_t offset = 0; + + // The set of FP/SIMD registers we need to restore. + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + + // The set of registers to ignore. BroadcastToAllSizes() is used to avoid + // any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore` + // containing d17. + FloatRegisterSet ignoreFpusBroadcasted( + FloatRegister::BroadcastToAllSizes(ignore.fpus())); + + // First recover the SIMD (V128) registers. This is straightforward in that + // we don't need to think about alignment holes. + + // These three form a two-entry queue that holds loads that we know we + // need, but which we haven't yet emitted. + vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg}; + uint32_t pendingOffsets[2] = {0, 0}; + size_t nPending = 0; + + for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + if (reg.isDouble()) { + continue; + } + MOZ_RELEASE_ASSERT(reg.isSimd128()); + + uint32_t offsetForReg = offset; + offset += FloatRegister::SizeOfSimd128; + + if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) { + continue; + } + + MOZ_ASSERT(nPending <= 2); + if (nPending == 2) { + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16); + nPending = 0; + } + pendingDests[nPending] = ARMFPRegister(reg, 128); + pendingOffsets[nPending] = offsetForReg; + nPending++; + } + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16); + nPending = 0; + + MOZ_ASSERT((offset % 16) == 0); + + // Now recover the FP double registers. This is more tricky in that we need + // to skip over the lowest-addressed of them if the number of them was odd. + + if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) { + offset += sizeof(double); + } + + for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + if (reg.isSimd128()) { + continue; + } + /* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */ + + uint32_t offsetForReg = offset; + offset += sizeof(double); + + if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) { + continue; + } + + MOZ_ASSERT(nPending <= 2); + if (nPending == 2) { + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); + nPending = 0; + } + pendingDests[nPending] = ARMFPRegister(reg, 64); + pendingOffsets[nPending] = offsetForReg; + nPending++; + } + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); + nPending = 0; + + MOZ_ASSERT((offset % 16) == 0); + MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes()); + + // And finally recover the integer registers, again skipping an alignment + // hole if it exists. + + if ((set.gprs().size() & 1) == 1) { + offset += sizeof(uint64_t); + } + + for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) { + Register reg = *iter; + + uint32_t offsetForReg = offset; + offset += sizeof(uint64_t); + + if (ignore.has(reg)) { + continue; + } + + MOZ_ASSERT(nPending <= 2); + if (nPending == 2) { + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); + nPending = 0; + } + pendingDests[nPending] = ARMRegister(reg, 64); + pendingOffsets[nPending] = offsetForReg; + nPending++; + } + GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); + + MOZ_ASSERT((offset % 16) == 0); + + size_t bytesPushed = PushRegsInMaskSizeInBytes(set); + MOZ_ASSERT(offset == bytesPushed); + freeStack(bytesPushed); +} + +void MacroAssembler::Push(Register reg) { + push(reg); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(Register reg1, Register reg2, Register reg3, + Register reg4) { + push(reg1, reg2, reg3, reg4); + adjustFrame(4 * sizeof(intptr_t)); +} + +void MacroAssembler::Push(const Imm32 imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmWord imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmPtr imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmGCPtr ptr) { + push(ptr); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(FloatRegister f) { + push(f); + adjustFrame(sizeof(double)); +} + +void MacroAssembler::PushBoxed(FloatRegister reg) { + subFromStackPtr(Imm32(sizeof(double))); + boxDouble(reg, Address(getStackPointer(), 0)); + adjustFrame(sizeof(double)); +} + +void MacroAssembler::Pop(Register reg) { + pop(reg); + adjustFrame(-1 * int64_t(sizeof(int64_t))); +} + +void MacroAssembler::Pop(FloatRegister f) { + loadDouble(Address(getStackPointer(), 0), f); + freeStack(sizeof(double)); +} + +void MacroAssembler::Pop(const ValueOperand& val) { + pop(val); + adjustFrame(-1 * int64_t(sizeof(int64_t))); +} + +// =============================================================== +// Simple call functions. + +CodeOffset MacroAssembler::call(Register reg) { + // This sync has been observed (and is expected) to be necessary. + // eg testcase: tests/debug/bug1107525.js + syncStackPtr(); + Blr(ARMRegister(reg, 64)); + return CodeOffset(currentOffset()); +} + +CodeOffset MacroAssembler::call(Label* label) { + // This sync has been observed (and is expected) to be necessary. + // eg testcase: tests/basic/testBug504520Harder.js + syncStackPtr(); + Bl(label); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::call(ImmPtr imm) { + // This sync has been observed (and is expected) to be necessary. + // eg testcase: asm.js/testTimeout5.js + syncStackPtr(); + vixl::UseScratchRegisterScope temps(this); + MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0 + temps.Exclude(ScratchReg64); + movePtr(imm, ScratchReg64.asUnsized()); + Blr(ScratchReg64); +} + +void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); } + +CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + // This sync is believed to be necessary, although no case in jit-test/tests + // has been observed to cause SP != PSP here. + syncStackPtr(); + movePtr(imm, scratch); + Blr(ARMRegister(scratch, 64)); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::call(const Address& addr) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + // This sync has been observed (and is expected) to be necessary. + // eg testcase: tests/backup-point-bug1315634.js + syncStackPtr(); + loadPtr(addr, scratch); + Blr(ARMRegister(scratch, 64)); +} + +void MacroAssembler::call(JitCode* c) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + // This sync has been observed (and is expected) to be necessary. + // eg testcase: arrays/new-array-undefined-undefined-more-args-2.js + syncStackPtr(); + BufferOffset off = immPool64(scratch64, uint64_t(c->raw())); + addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE); + blr(scratch64); +} + +CodeOffset MacroAssembler::callWithPatch() { + // This needs to sync. Wasm goes through this one for intramodule calls. + // + // In other cases, wasm goes through masm.wasmCallImport(), + // masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which + // sync. + // + // This sync is believed to be necessary, although no case in jit-test/tests + // has been observed to cause SP != PSP here. + syncStackPtr(); + bl(0, LabelDoc()); + return CodeOffset(currentOffset()); +} +void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) { + Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4)); + MOZ_ASSERT(inst->IsBL()); + ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4); + ptrdiff_t relTarget00 = relTarget >> 2; + MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0); + MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00)); + bl(inst, relTarget00); +} + +CodeOffset MacroAssembler::farJumpWithPatch() { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + const ARMRegister scratch2 = temps.AcquireX(); + + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 7); + + mozilla::DebugOnly<uint32_t> before = currentOffset(); + + align(8); // At most one nop + + Label branch; + adr(scratch2, &branch); + ldr(scratch, vixl::MemOperand(scratch2, 4)); + add(scratch2, scratch2, scratch); + CodeOffset offs(currentOffset()); + bind(&branch); + br(scratch2); + Emit(UINT32_MAX); + Emit(UINT32_MAX); + + mozilla::DebugOnly<uint32_t> after = currentOffset(); + + MOZ_ASSERT(after - before == 24 || after - before == 28); + + return offs; +} + +void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) { + Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4)); + Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8)); + + int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset(); + + MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX); + MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX); + + inst1->SetInstructionBits((uint32_t)distance); + inst2->SetInstructionBits((uint32_t)(distance >> 32)); +} + +CodeOffset MacroAssembler::nopPatchableToCall() { + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 1); + Nop(); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) { + uint8_t* inst = call - 4; + Instruction* instr = reinterpret_cast<Instruction*>(inst); + MOZ_ASSERT(instr->IsBL() || instr->IsNOP()); + bl(instr, (target - inst) >> 2); +} + +void MacroAssembler::patchCallToNop(uint8_t* call) { + uint8_t* inst = call - 4; + Instruction* instr = reinterpret_cast<Instruction*>(inst); + MOZ_ASSERT(instr->IsBL() || instr->IsNOP()); + nop(instr); +} + +void MacroAssembler::pushReturnAddress() { + MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid"); + push(lr); +} + +void MacroAssembler::popReturnAddress() { + MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid"); + pop(lr); +} + +// =============================================================== +// ABI function calls. + +void MacroAssembler::setupUnalignedABICall(Register scratch) { + // Because wasm operates without the need for dynamic alignment of SP, it is + // implied that this routine should never be called when generating wasm. + MOZ_ASSERT(!IsCompilingWasm()); + + // The following won't work for SP -- needs slightly different logic. + MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); + + setupNativeABICall(); + dynamicAlignment_ = true; + + int64_t alignment = ~(int64_t(ABIStackAlignment) - 1); + ARMRegister scratch64(scratch, 64); + MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64)); + + // Always save LR -- Baseline ICs assume that LR isn't modified. + push(lr); + + // Remember the stack address on entry. This is reloaded in callWithABIPost + // below. + Mov(scratch64, PseudoStackPointer64); + + // Make alignment, including the effective push of the previous sp. + Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8)); + And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment)); + syncStackPtr(); + + // Store previous sp to the top of the stack, aligned. This is also + // reloaded in callWithABIPost. + Str(scratch64, MemOperand(PseudoStackPointer64, 0)); +} + +void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) { + // wasm operates without the need for dynamic alignment of SP. + MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm)); + + MOZ_ASSERT(inCall_); + uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar(); + + // ARM64 *really* wants SP to always be 16-aligned, so ensure this now. + if (dynamicAlignment_) { + stackForCall += ComputeByteAlignment(stackForCall, StackAlignment); + } else { + // This can happen when we attach out-of-line stubs for rare cases. For + // example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line + // chunk. + uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0; + stackForCall += ComputeByteAlignment( + stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment); + } + + *stackAdjust = stackForCall; + reserveStack(*stackAdjust); + { + enoughMemory_ &= moveResolver_.resolve(); + if (!enoughMemory_) { + return; + } + MoveEmitter emitter(*this); + emitter.emit(moveResolver_); + emitter.finish(); + } + + // Call boundaries communicate stack via SP. + // (jseward, 2021Mar03) This sync may well be redundant, given that all of + // the MacroAssembler::call methods generate a sync before the call. + // Removing it does not cause any failures for all of jit-tests. + syncStackPtr(); + + assertStackAlignment(ABIStackAlignment); +} + +void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result, + bool callFromWasm) { + // wasm operates without the need for dynamic alignment of SP. + MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm)); + + // Call boundaries communicate stack via SP, so we must resync PSP now. + initPseudoStackPtr(); + + freeStack(stackAdjust); + + if (dynamicAlignment_) { + // This then-clause makes more sense if you first read + // setupUnalignedABICall above. + // + // Restore the stack pointer from entry. The stack pointer will have been + // saved by setupUnalignedABICall. This is fragile in that it assumes + // that uses of this routine (callWithABIPost) with `dynamicAlignment_ == + // true` are preceded by matching calls to setupUnalignedABICall. But + // there's nothing that enforce that mechanically. If we really want to + // enforce this, we could add a debug-only CallWithABIState enum to the + // MacroAssembler and assert that setupUnalignedABICall updates it before + // we get here, then reset it to its initial state. + Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0)); + syncStackPtr(); + + // Restore LR. This restores LR to the value stored by + // setupUnalignedABICall, which should have been called just before + // callWithABIPre. This is, per the above comment, also fragile. + pop(lr); + + // SP may be < PSP now. That is expected from the behaviour of `pop`. It + // is not clear why the following `syncStackPtr` is necessary, but it is: + // without it, the following test segfaults: + // tests/backup-point-bug1315634.js + syncStackPtr(); + } + + // If the ABI's return regs are where ION is expecting them, then + // no other work needs to be done. + +#ifdef DEBUG + MOZ_ASSERT(inCall_); + inCall_ = false; +#endif +} + +void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + movePtr(fun, scratch); + + uint32_t stackAdjust; + callWithABIPre(&stackAdjust); + call(scratch); + callWithABIPost(stackAdjust, result); +} + +void MacroAssembler::callWithABINoProfiler(const Address& fun, + MoveOp::Type result) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + loadPtr(fun, scratch); + + uint32_t stackAdjust; + callWithABIPre(&stackAdjust); + call(scratch); + callWithABIPost(stackAdjust, result); +} + +// =============================================================== +// Jit Frames. + +uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) { + enterNoPool(3); + Label fakeCallsite; + + Adr(ARMRegister(scratch, 64), &fakeCallsite); + Push(scratch); + bind(&fakeCallsite); + uint32_t pseudoReturnOffset = currentOffset(); + + leaveNoPool(); + return pseudoReturnOffset; +} + +bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) { + asMasm().PushFrameDescriptor(FrameType::IonJS); + asMasm().Push(ImmPtr(fakeReturnAddr)); + asMasm().Push(FramePointer); + return true; +} + +// =============================================================== +// Move instructions + +void MacroAssembler::moveValue(const TypedOrValueRegister& src, + const ValueOperand& dest) { + if (src.hasValue()) { + moveValue(src.valueReg(), dest); + return; + } + + MIRType type = src.type(); + AnyRegister reg = src.typedReg(); + + if (!IsFloatingPointType(type)) { + boxNonDouble(ValueTypeFromMIRType(type), reg.gpr(), dest); + return; + } + + ScratchDoubleScope scratch(*this); + FloatRegister freg = reg.fpu(); + if (type == MIRType::Float32) { + convertFloat32ToDouble(freg, scratch); + freg = scratch; + } + boxDouble(freg, dest, scratch); +} + +void MacroAssembler::moveValue(const ValueOperand& src, + const ValueOperand& dest) { + if (src == dest) { + return; + } + movePtr(src.valueReg(), dest.valueReg()); +} + +void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) { + if (!src.isGCThing()) { + movePtr(ImmWord(src.asRawBits()), dest.valueReg()); + return; + } + + BufferOffset load = + movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg()); + writeDataRelocation(src, load); +} + +// =============================================================== +// Branch functions + +void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) { + And(ARMRegister(buffer, 64), ARMRegister(ptr, 64), + Operand(int32_t(~gc::ChunkMask))); + loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer); +} + +void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr, + Register temp, Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + MOZ_ASSERT(ptr != temp); + MOZ_ASSERT(ptr != ScratchReg && + ptr != ScratchReg2); // Both may be used internally. + MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2); + + And(ARMRegister(temp, 64), ARMRegister(ptr, 64), + Operand(int32_t(~gc::ChunkMask))); + branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset), + ImmWord(0), label); +} + +void MacroAssembler::branchValueIsNurseryCell(Condition cond, + const Address& address, + Register temp, Label* label) { + branchValueIsNurseryCellImpl(cond, address, temp, label); +} + +void MacroAssembler::branchValueIsNurseryCell(Condition cond, + ValueOperand value, Register temp, + Label* label) { + branchValueIsNurseryCellImpl(cond, value, temp, label); +} + +template <typename T> +void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond, + const T& value, Register temp, + Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + MOZ_ASSERT(temp != ScratchReg && + temp != ScratchReg2); // Both may be used internally. + + Label done; + branchTestGCThing(Assembler::NotEqual, value, + cond == Assembler::Equal ? &done : label); + + getGCThingValueChunk(value, temp); + branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset), + ImmWord(0), label); + + bind(&done); +} + +void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs, + const Value& rhs, Label* label) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg()); + moveValue(rhs, ValueOperand(scratch64.asUnsized())); + Cmp(ARMRegister(lhs.valueReg(), 64), scratch64); + B(label, cond); +} + +// ======================================================================== +// Memory access primitives. +template <typename T> +void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, + MIRType valueType, const T& dest) { + MOZ_ASSERT(valueType < MIRType::Value); + + if (valueType == MIRType::Double) { + boxDouble(value.reg().typedReg().fpu(), dest); + return; + } + + if (value.constant()) { + storeValue(value.value(), dest); + } else { + storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(), + dest); + } +} + +template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, + MIRType valueType, + const Address& dest); +template void MacroAssembler::storeUnboxedValue( + const ConstantOrRegister& value, MIRType valueType, + const BaseObjectElementIndex& dest); + +void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); } + +// ======================================================================== +// wasm support + +CodeOffset MacroAssembler::wasmTrapInstruction() { + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 1); + CodeOffset offs(currentOffset()); + Unreachable(); + return offs; +} + +void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, + Register boundsCheckLimit, Label* ok) { + branch32(cond, index, boundsCheckLimit, ok); + if (JitOptions.spectreIndexMasking) { + csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond); + } +} + +void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, + Address boundsCheckLimit, Label* ok) { + branch32(cond, index, boundsCheckLimit, ok); + if (JitOptions.spectreIndexMasking) { + csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond); + } +} + +void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, + Register64 boundsCheckLimit, Label* ok) { + branchPtr(cond, index.reg, boundsCheckLimit.reg, ok); + if (JitOptions.spectreIndexMasking) { + csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64), + cond); + } +} + +void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, + Address boundsCheckLimit, Label* ok) { + branchPtr(InvertCondition(cond), boundsCheckLimit, index.reg, ok); + if (JitOptions.spectreIndexMasking) { + csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64), + cond); + } +} + +// FCVTZU behaves as follows: +// +// on NaN it produces zero +// on too large it produces UINT_MAX (for appropriate type) +// on too small it produces zero +// +// FCVTZS behaves as follows: +// +// on NaN it produces zero +// on too large it produces INT_MAX (for appropriate type) +// on too small it produces INT_MIN (ditto) + +void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_, + Register output_, + bool isSaturating, + Label* oolEntry) { + ARMRegister output(output_, 32); + ARMFPRegister input(input_, 64); + Fcvtzu(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + } +} + +void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_, + Register output_, + bool isSaturating, + Label* oolEntry) { + ARMRegister output(output_, 32); + ARMFPRegister input(input_, 32); + Fcvtzu(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + } +} + +void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_, + Register output_, + bool isSaturating, + Label* oolEntry) { + ARMRegister output(output_, 32); + ARMFPRegister input(input_, 64); + Fcvtzs(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual); + Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + } +} + +void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_, + Register output_, + bool isSaturating, + Label* oolEntry) { + ARMRegister output(output_, 32); + ARMFPRegister input(input_, 32); + Fcvtzs(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual); + Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + } +} + +void MacroAssembler::wasmTruncateDoubleToUInt64( + FloatRegister input_, Register64 output_, bool isSaturating, + Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { + MOZ_ASSERT(tempDouble.isInvalid()); + + ARMRegister output(output_.reg, 64); + ARMFPRegister input(input_, 64); + Fcvtzu(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + bind(oolRejoin); + } +} + +void MacroAssembler::wasmTruncateFloat32ToUInt64( + FloatRegister input_, Register64 output_, bool isSaturating, + Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { + MOZ_ASSERT(tempDouble.isInvalid()); + + ARMRegister output(output_.reg, 64); + ARMFPRegister input(input_, 32); + Fcvtzu(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + bind(oolRejoin); + } +} + +void MacroAssembler::wasmTruncateDoubleToInt64( + FloatRegister input_, Register64 output_, bool isSaturating, + Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { + MOZ_ASSERT(tempDouble.isInvalid()); + + ARMRegister output(output_.reg, 64); + ARMFPRegister input(input_, 64); + Fcvtzs(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual); + Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + bind(oolRejoin); + } +} + +void MacroAssembler::wasmTruncateFloat32ToInt64( + FloatRegister input_, Register64 output_, bool isSaturating, + Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { + ARMRegister output(output_.reg, 64); + ARMFPRegister input(input_, 32); + Fcvtzs(output, input); + if (!isSaturating) { + Cmp(output, 0); + Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual); + Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual); + B(oolEntry, Assembler::Equal); + bind(oolRejoin); + } +} + +void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input, + Register output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + Label notNaN; + branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); + wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + bind(¬NaN); + + Label isOverflow; + const float two_31 = -float(INT32_MIN); + ScratchFloat32Scope fpscratch(*this); + if (flags & TRUNC_UNSIGNED) { + loadConstantFloat32(two_31 * 2, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantFloat32(-1.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } else { + loadConstantFloat32(two_31, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantFloat32(-two_31, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); + } + bind(&isOverflow); + wasmTrap(wasm::Trap::IntegerOverflow, off); +} + +void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input, + Register output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + Label notNaN; + branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); + wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + bind(¬NaN); + + Label isOverflow; + const double two_31 = -double(INT32_MIN); + ScratchDoubleScope fpscratch(*this); + if (flags & TRUNC_UNSIGNED) { + loadConstantDouble(two_31 * 2, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantDouble(-1.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } else { + loadConstantDouble(two_31, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantDouble(-two_31 - 1, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } + bind(&isOverflow); + wasmTrap(wasm::Trap::IntegerOverflow, off); +} + +void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input, + Register64 output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + Label notNaN; + branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); + wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + bind(¬NaN); + + Label isOverflow; + const float two_63 = -float(INT64_MIN); + ScratchFloat32Scope fpscratch(*this); + if (flags & TRUNC_UNSIGNED) { + loadConstantFloat32(two_63 * 2, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantFloat32(-1.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } else { + loadConstantFloat32(two_63, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantFloat32(-two_63, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); + } + bind(&isOverflow); + wasmTrap(wasm::Trap::IntegerOverflow, off); +} + +void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input, + Register64 output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + Label notNaN; + branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); + wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + bind(¬NaN); + + Label isOverflow; + const double two_63 = -double(INT64_MIN); + ScratchDoubleScope fpscratch(*this); + if (flags & TRUNC_UNSIGNED) { + loadConstantDouble(two_63 * 2, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantDouble(-1.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); + } else { + loadConstantDouble(two_63, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &isOverflow); + loadConstantDouble(-two_63, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); + } + bind(&isOverflow); + wasmTrap(wasm::Trap::IntegerOverflow, off); +} + +void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access, + Register memoryBase, Register ptr, + AnyRegister output) { + wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid()); +} + +void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, + Register memoryBase, Register ptr, + Register64 output) { + wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output); +} + +void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, + AnyRegister value, Register memoryBase, + Register ptr) { + wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr); +} + +void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access, + Register64 value, Register memoryBase, + Register ptr) { + wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr); +} + +void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch, + ExitFrameType type) { + // Wasm stubs use the native SP, not the PSP. + + linkExitFrame(cxreg, scratch); + + MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64())); + + // SP has to be 16-byte aligned when we do a load/store, so push |type| twice + // and then add 8 bytes to SP. This leaves SP unaligned. + move32(Imm32(int32_t(type)), scratch); + push(scratch, scratch); + Add(sp, sp, 8); + + // Despite the above assertion, it is possible for control to flow from here + // to the code generated by + // MacroAssemblerCompat::handleFailureWithHandlerTail without any + // intervening assignment to PSP. But handleFailureWithHandlerTail assumes + // that PSP is the active stack pointer. Hence the following is necessary + // for safety. Note we can't use initPseudoStackPtr here as that would + // generate no instructions. + Mov(PseudoStackPointer64, sp); +} + +void MacroAssembler::widenInt32(Register r) { + move32To64ZeroExtend(r, Register64(r)); +} + +// ======================================================================== +// Convert floating point. + +bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; } + +void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest, + Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) { + Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest, + Register temp) { + MOZ_ASSERT(temp == Register::Invalid()); + Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) { + Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64)); +} + +void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) { + convertInt64ToDouble(Register64(src), dest); +} + +// ======================================================================== +// Primitive atomic operations. + +// The computed MemOperand must be Reg+0 because the load/store exclusive +// instructions only take a single pointer register. + +static MemOperand ComputePointerForAtomic(MacroAssembler& masm, + const Address& address, + Register scratch) { + if (address.offset == 0) { + return MemOperand(X(masm, address.base), 0); + } + + masm.Add(X(scratch), X(masm, address.base), address.offset); + return MemOperand(X(scratch), 0); +} + +static MemOperand ComputePointerForAtomic(MacroAssembler& masm, + const BaseIndex& address, + Register scratch) { + masm.Add(X(scratch), X(masm, address.base), + Operand(X(address.index), vixl::LSL, address.scale)); + if (address.offset) { + masm.Add(X(scratch), X(scratch), address.offset); + } + return MemOperand(X(scratch), 0); +} + +// This sign extends to targetWidth and leaves any higher bits zero. + +static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType, + Width targetWidth, Register src, Register dest) { + bool signExtend = Scalar::isSignedIntType(srcType); + + switch (Scalar::byteSize(srcType)) { + case 1: + if (signExtend) { + masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7); + } else { + masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7); + } + break; + case 2: + if (signExtend) { + masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15); + } else { + masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15); + } + break; + case 4: + if (targetWidth == Width::_64) { + if (signExtend) { + masm.Sbfm(X(dest), X(src), 0, 31); + } else { + masm.Ubfm(X(dest), X(src), 0, 31); + } + } else if (src != dest) { + masm.Mov(R(dest, targetWidth), R(src, targetWidth)); + } + break; + case 8: + if (src != dest) { + masm.Mov(R(dest, targetWidth), R(src, targetWidth)); + } + break; + default: + MOZ_CRASH(); + } +} + +// Exclusive-loads zero-extend their values to the full width of the X register. +// +// Note, we've promised to leave the high bits of the 64-bit register clear if +// the targetWidth is 32. + +static void LoadExclusive(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type srcType, Width targetWidth, + MemOperand ptr, Register dest) { + bool signExtend = Scalar::isSignedIntType(srcType); + + // With this address form, a single native ldxr* will be emitted, and the + // AutoForbidPoolsAndNops ensures that the metadata is emitted at the address + // of the ldxr*. + MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0); + + switch (Scalar::byteSize(srcType)) { + case 1: { + { + AutoForbidPoolsAndNops afp( + &masm, + /* max number of instructions in scope = */ 1); + if (access) { + masm.append(*access, masm.currentOffset()); + } + masm.Ldxrb(W(dest), ptr); + } + if (signExtend) { + masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7); + } + break; + } + case 2: { + { + AutoForbidPoolsAndNops afp( + &masm, + /* max number of instructions in scope = */ 1); + if (access) { + masm.append(*access, masm.currentOffset()); + } + masm.Ldxrh(W(dest), ptr); + } + if (signExtend) { + masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15); + } + break; + } + case 4: { + { + AutoForbidPoolsAndNops afp( + &masm, + /* max number of instructions in scope = */ 1); + if (access) { + masm.append(*access, masm.currentOffset()); + } + masm.Ldxr(W(dest), ptr); + } + if (targetWidth == Width::_64 && signExtend) { + masm.Sbfm(X(dest), X(dest), 0, 31); + } + break; + } + case 8: { + { + AutoForbidPoolsAndNops afp( + &masm, + /* max number of instructions in scope = */ 1); + if (access) { + masm.append(*access, masm.currentOffset()); + } + masm.Ldxr(X(dest), ptr); + } + break; + } + default: { + MOZ_CRASH(); + } + } +} + +static void StoreExclusive(MacroAssembler& masm, Scalar::Type type, + Register status, Register src, MemOperand ptr) { + switch (Scalar::byteSize(type)) { + case 1: + masm.Stxrb(W(status), W(src), ptr); + break; + case 2: + masm.Stxrh(W(status), W(src), ptr); + break; + case 4: + masm.Stxr(W(status), W(src), ptr); + break; + case 8: + masm.Stxr(W(status), X(src), ptr); + break; + } +} + +static bool HasAtomicInstructions(MacroAssembler& masm) { + return masm.asVIXL().GetCPUFeatures()->Has(vixl::CPUFeatures::kAtomics); +} + +static inline bool SupportedAtomicInstructionOperands(Scalar::Type type, + Width targetWidth) { + if (targetWidth == Width::_32) { + return byteSize(type) <= 4; + } + if (targetWidth == Width::_64) { + return byteSize(type) == 8; + } + return false; +} + +template <typename T> +static void CompareExchange(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, Width targetWidth, + const Synchronization& sync, const T& mem, + Register oldval, Register newval, Register output) { + MOZ_ASSERT(oldval != output && newval != output); + + vixl::UseScratchRegisterScope temps(&masm); + + Register ptrScratch = temps.AcquireX().asUnsized(); + MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); + + MOZ_ASSERT(ptr.base().asUnsized() != output); + + if (HasAtomicInstructions(masm) && + SupportedAtomicInstructionOperands(type, targetWidth)) { + masm.Mov(X(output), X(oldval)); + // Capal is using same atomic mechanism as Ldxr/Stxr, and + // consider it is the same for "Inner Shareable" domain. + // Not updated gen_cmpxchg in GenerateAtomicOperations.py. + masm.memoryBarrierBefore(sync); + if (access) { + masm.append(*access, masm.currentOffset()); + } + switch (byteSize(type)) { + case 1: + masm.Casalb(R(output, targetWidth), R(newval, targetWidth), ptr); + break; + case 2: + masm.Casalh(R(output, targetWidth), R(newval, targetWidth), ptr); + break; + case 4: + case 8: + masm.Casal(R(output, targetWidth), R(newval, targetWidth), ptr); + break; + default: + MOZ_CRASH("CompareExchange unsupported type"); + } + masm.memoryBarrierAfter(sync); + SignOrZeroExtend(masm, type, targetWidth, output, output); + return; + } + + // The target doesn't support atomics, so generate a LL-SC loop. This requires + // only AArch64 v8.0. + Label again; + Label done; + + // NOTE: the generated code must match the assembly code in gen_cmpxchg in + // GenerateAtomicOperations.py + masm.memoryBarrierBefore(sync); + + Register scratch = temps.AcquireX().asUnsized(); + + masm.bind(&again); + SignOrZeroExtend(masm, type, targetWidth, oldval, scratch); + LoadExclusive(masm, access, type, targetWidth, ptr, output); + masm.Cmp(R(output, targetWidth), R(scratch, targetWidth)); + masm.B(&done, MacroAssembler::NotEqual); + StoreExclusive(masm, type, scratch, newval, ptr); + masm.Cbnz(W(scratch), &again); + masm.bind(&done); + + masm.memoryBarrierAfter(sync); +} + +template <typename T> +static void AtomicExchange(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, Width targetWidth, + const Synchronization& sync, const T& mem, + Register value, Register output) { + MOZ_ASSERT(value != output); + + vixl::UseScratchRegisterScope temps(&masm); + + Register ptrScratch = temps.AcquireX().asUnsized(); + MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); + + if (HasAtomicInstructions(masm) && + SupportedAtomicInstructionOperands(type, targetWidth)) { + // Swpal is using same atomic mechanism as Ldxr/Stxr, and + // consider it is the same for "Inner Shareable" domain. + // Not updated gen_exchange in GenerateAtomicOperations.py. + masm.memoryBarrierBefore(sync); + if (access) { + masm.append(*access, masm.currentOffset()); + } + switch (byteSize(type)) { + case 1: + masm.Swpalb(R(value, targetWidth), R(output, targetWidth), ptr); + break; + case 2: + masm.Swpalh(R(value, targetWidth), R(output, targetWidth), ptr); + break; + case 4: + case 8: + masm.Swpal(R(value, targetWidth), R(output, targetWidth), ptr); + break; + default: + MOZ_CRASH("AtomicExchange unsupported type"); + } + masm.memoryBarrierAfter(sync); + SignOrZeroExtend(masm, type, targetWidth, output, output); + return; + } + + // The target doesn't support atomics, so generate a LL-SC loop. This requires + // only AArch64 v8.0. + Label again; + + // NOTE: the generated code must match the assembly code in gen_exchange in + // GenerateAtomicOperations.py + masm.memoryBarrierBefore(sync); + + Register scratch = temps.AcquireX().asUnsized(); + + masm.bind(&again); + LoadExclusive(masm, access, type, targetWidth, ptr, output); + StoreExclusive(masm, type, scratch, value, ptr); + masm.Cbnz(W(scratch), &again); + + masm.memoryBarrierAfter(sync); +} + +template <bool wantResult, typename T> +static void AtomicFetchOp(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, Width targetWidth, + const Synchronization& sync, AtomicOp op, + const T& mem, Register value, Register temp, + Register output) { + MOZ_ASSERT(value != output); + MOZ_ASSERT(value != temp); + MOZ_ASSERT_IF(wantResult, output != temp); + + vixl::UseScratchRegisterScope temps(&masm); + + Register ptrScratch = temps.AcquireX().asUnsized(); + MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); + + if (HasAtomicInstructions(masm) && + SupportedAtomicInstructionOperands(type, targetWidth) && + !isFloatingType(type)) { + // LdXXXal/StXXXl is using same atomic mechanism as Ldxr/Stxr, and + // consider it is the same for "Inner Shareable" domain. + // Not updated gen_fetchop in GenerateAtomicOperations.py. + masm.memoryBarrierBefore(sync); + +#define FETCH_OP_CASE(op, arg) \ + if (access) { \ + masm.append(*access, masm.currentOffset()); \ + } \ + switch (byteSize(type)) { \ + case 1: \ + if (wantResult) { \ + masm.Ld##op##alb(R(arg, targetWidth), R(output, targetWidth), ptr); \ + } else { \ + masm.St##op##lb(R(arg, targetWidth), ptr); \ + } \ + break; \ + case 2: \ + if (wantResult) { \ + masm.Ld##op##alh(R(arg, targetWidth), R(output, targetWidth), ptr); \ + } else { \ + masm.St##op##lh(R(arg, targetWidth), ptr); \ + } \ + break; \ + case 4: \ + case 8: \ + if (wantResult) { \ + masm.Ld##op##al(R(arg, targetWidth), R(output, targetWidth), ptr); \ + } else { \ + masm.St##op##l(R(arg, targetWidth), ptr); \ + } \ + break; \ + default: \ + MOZ_CRASH("AtomicFetchOp unsupported type"); \ + } + + switch (op) { + case AtomicFetchAddOp: + FETCH_OP_CASE(add, value); + break; + case AtomicFetchSubOp: { + Register scratch = temps.AcquireX().asUnsized(); + masm.Neg(X(scratch), X(value)); + FETCH_OP_CASE(add, scratch); + break; + } + case AtomicFetchAndOp: { + Register scratch = temps.AcquireX().asUnsized(); + masm.Eor(X(scratch), X(value), Operand(~0)); + FETCH_OP_CASE(clr, scratch); + break; + } + case AtomicFetchOrOp: + FETCH_OP_CASE(set, value); + break; + case AtomicFetchXorOp: + FETCH_OP_CASE(eor, value); + break; + } + masm.memoryBarrierAfter(sync); + if (wantResult) { + SignOrZeroExtend(masm, type, targetWidth, output, output); + } + return; + } + +#undef FETCH_OP_CASE + + // The target doesn't support atomics, so generate a LL-SC loop. This requires + // only AArch64 v8.0. + Label again; + + // NOTE: the generated code must match the assembly code in gen_fetchop in + // GenerateAtomicOperations.py + masm.memoryBarrierBefore(sync); + + Register scratch = temps.AcquireX().asUnsized(); + + masm.bind(&again); + LoadExclusive(masm, access, type, targetWidth, ptr, output); + switch (op) { + case AtomicFetchAddOp: + masm.Add(X(temp), X(output), X(value)); + break; + case AtomicFetchSubOp: + masm.Sub(X(temp), X(output), X(value)); + break; + case AtomicFetchAndOp: + masm.And(X(temp), X(output), X(value)); + break; + case AtomicFetchOrOp: + masm.Orr(X(temp), X(output), X(value)); + break; + case AtomicFetchXorOp: + masm.Eor(X(temp), X(output), X(value)); + break; + } + StoreExclusive(masm, type, scratch, temp, ptr); + masm.Cbnz(W(scratch), &again); + if (wantResult) { + SignOrZeroExtend(masm, type, targetWidth, output, output); + } + + masm.memoryBarrierAfter(sync); +} + +void MacroAssembler::compareExchange(Scalar::Type type, + const Synchronization& sync, + const Address& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval, + output); +} + +void MacroAssembler::compareExchange(Scalar::Type type, + const Synchronization& sync, + const BaseIndex& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval, + output); +} + +void MacroAssembler::compareExchange64(const Synchronization& sync, + const Address& mem, Register64 expect, + Register64 replace, Register64 output) { + CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, + expect.reg, replace.reg, output.reg); +} + +void MacroAssembler::compareExchange64(const Synchronization& sync, + const BaseIndex& mem, Register64 expect, + Register64 replace, Register64 output) { + CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, + expect.reg, replace.reg, output.reg); +} + +void MacroAssembler::atomicExchange64(const Synchronization& sync, + const Address& mem, Register64 value, + Register64 output) { + AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, + value.reg, output.reg); +} + +void MacroAssembler::atomicExchange64(const Synchronization& sync, + const BaseIndex& mem, Register64 value, + Register64 output) { + AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, + value.reg, output.reg); +} + +void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op, + Register64 value, const Address& mem, + Register64 temp, Register64 output) { + AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, + value.reg, temp.reg, output.reg); +} + +void MacroAssembler::atomicFetchOp64(const Synchronization& sync, AtomicOp op, + Register64 value, const BaseIndex& mem, + Register64 temp, Register64 output) { + AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, + value.reg, temp.reg, output.reg); +} + +void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op, + Register64 value, const Address& mem, + Register64 temp) { + AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, + value.reg, temp.reg, temp.reg); +} + +void MacroAssembler::atomicEffectOp64(const Synchronization& sync, AtomicOp op, + Register64 value, const BaseIndex& mem, + Register64 temp) { + AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, + value.reg, temp.reg, temp.reg); +} + +void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, + const Address& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, + oldval, newval, output); +} + +void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, + oldval, newval, output); +} + +void MacroAssembler::atomicExchange(Scalar::Type type, + const Synchronization& sync, + const Address& mem, Register value, + Register output) { + AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output); +} + +void MacroAssembler::atomicExchange(Scalar::Type type, + const Synchronization& sync, + const BaseIndex& mem, Register value, + Register output) { + AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output); +} + +void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, + const Address& mem, Register value, + Register output) { + AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, + value, output); +} + +void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register value, + Register output) { + AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, + value, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type type, + const Synchronization& sync, AtomicOp op, + Register value, const Address& mem, + Register temp, Register output) { + AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value, + temp, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type type, + const Synchronization& sync, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp, Register output) { + AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value, + temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const Address& mem, Register temp, + Register output) { + AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(), + op, mem, value, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const BaseIndex& mem, Register temp, + Register output) { + AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(), + op, mem, value, temp, output); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const Address& mem, Register temp) { + AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(), + op, mem, value, temp, temp); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const BaseIndex& mem, Register temp) { + AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(), + op, mem, value, temp, temp); +} + +void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, + const Address& mem, + Register64 expect, + Register64 replace, + Register64 output) { + CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, + expect.reg, replace.reg, output.reg); +} + +void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, + Register64 expect, + Register64 replace, + Register64 output) { + CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, + expect.reg, replace.reg, output.reg); +} + +void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, + const Address& mem, Register64 value, + Register64 output) { + AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, + value.reg, output.reg); +} + +void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, + Register64 value, Register64 output) { + AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, + value.reg, output.reg); +} + +void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register64 value, + const Address& mem, Register64 temp, + Register64 output) { + AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(), + op, mem, value.reg, temp.reg, output.reg); +} + +void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register64 value, + const BaseIndex& mem, Register64 temp, + Register64 output) { + AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(), + op, mem, value.reg, temp.reg, output.reg); +} + +void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register64 value, + const BaseIndex& mem, + Register64 temp) { + AtomicFetchOp<false>(*this, &access, Scalar::Int64, Width::_64, access.sync(), + op, mem, value.reg, temp.reg, temp.reg); +} + +// ======================================================================== +// JS atomic operations. + +template <typename T> +static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, const T& mem, + Register oldval, Register newval, Register temp, + AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.compareExchange(arrayType, sync, mem, oldval, newval, temp); + masm.convertUInt32ToDouble(temp, output.fpu()); + } else { + masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr()); + } +} + +void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const Address& mem, Register oldval, + Register newval, Register temp, + AnyRegister output) { + CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); +} + +void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const BaseIndex& mem, Register oldval, + Register newval, Register temp, + AnyRegister output) { + CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); +} + +template <typename T> +static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, const T& mem, + Register value, Register temp, + AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicExchange(arrayType, sync, mem, value, temp); + masm.convertUInt32ToDouble(temp, output.fpu()); + } else { + masm.atomicExchange(arrayType, sync, mem, value, output.gpr()); + } +} + +void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const Address& mem, Register value, + Register temp, AnyRegister output) { + AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); +} + +void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const BaseIndex& mem, Register value, + Register temp, AnyRegister output) { + AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); +} + +template <typename T> +static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const T& mem, Register temp1, + Register temp2, AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); + masm.convertUInt32ToDouble(temp1, output.fpu()); + } else { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); + } +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const Address& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp) { + AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem, + value, temp, temp); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const Address& mem, + Register temp) { + AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem, + value, temp, temp); +} + +void MacroAssembler::flexibleQuotient32(Register rhs, Register srcDest, + bool isUnsigned, + const LiveRegisterSet&) { + quotient32(rhs, srcDest, isUnsigned); +} + +void MacroAssembler::flexibleRemainder32(Register rhs, Register srcDest, + bool isUnsigned, + const LiveRegisterSet&) { + remainder32(rhs, srcDest, isUnsigned); +} + +void MacroAssembler::flexibleDivMod32(Register rhs, Register srcDest, + Register remOutput, bool isUnsigned, + const LiveRegisterSet&) { + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch = temps.AcquireW(); + ARMRegister src = temps.AcquireW(); + + // Preserve src for remainder computation + Mov(src, ARMRegister(srcDest, 32)); + + if (isUnsigned) { + Udiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32)); + } else { + Sdiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32)); + } + // Compute remainder + Mul(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32)); + Sub(ARMRegister(remOutput, 32), src, scratch); +} + +CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) { + AutoForbidPoolsAndNops afp(this, + /* max number of instructions in scope = */ 1); + CodeOffset offset(currentOffset()); + adr(ARMRegister(dest, 64), 0, LabelDoc()); + return offset; +} + +void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc, + CodeLocationLabel target) { + ptrdiff_t off = target - loc; + MOZ_RELEASE_ASSERT(vixl::IsInt21(off)); + + Instruction* cur = reinterpret_cast<Instruction*>(loc.raw()); + MOZ_ASSERT(cur->IsADR()); + + vixl::Register rd = vixl::Register::XRegFromCode(cur->Rd()); + adr(cur, rd, off); +} + +// ======================================================================== +// Spectre Mitigations. + +void MacroAssembler::speculationBarrier() { + // Conditional speculation barrier. + csdb(); +} + +void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + ARMFPRegister iFlt(src, 32); + ARMRegister o64(dest, 64); + ARMRegister o32(dest, 32); + + Label handleZero; + Label fin; + + // Handle ±0 and NaN first. + Fcmp(iFlt, 0.0); + B(Assembler::Equal, &handleZero); + // NaN is always a bail condition, just bail directly. + B(Assembler::Overflow, fail); + + // Round towards negative infinity. + Fcvtms(o64, iFlt); + + // Sign extend lower 32 bits to test if the result isn't an Int32. + Cmp(o64, Operand(o64, vixl::SXTW)); + B(NotEqual, fail); + + // Clear upper 32 bits. + Uxtw(o64, o64); + B(&fin); + + bind(&handleZero); + // Move the top word of the float into the output reg, if it is non-zero, + // then the original value was -0.0. + Fmov(o32, iFlt); + Cbnz(o32, fail); + bind(&fin); +} + +void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + ARMFPRegister iDbl(src, 64); + ARMRegister o64(dest, 64); + ARMRegister o32(dest, 32); + + Label handleZero; + Label fin; + + // Handle ±0 and NaN first. + Fcmp(iDbl, 0.0); + B(Assembler::Equal, &handleZero); + // NaN is always a bail condition, just bail directly. + B(Assembler::Overflow, fail); + + // Round towards negative infinity. + Fcvtms(o64, iDbl); + + // Sign extend lower 32 bits to test if the result isn't an Int32. + Cmp(o64, Operand(o64, vixl::SXTW)); + B(NotEqual, fail); + + // Clear upper 32 bits. + Uxtw(o64, o64); + B(&fin); + + bind(&handleZero); + // Move the top word of the double into the output reg, if it is non-zero, + // then the original value was -0.0. + Fmov(o64, iDbl); + Cbnz(o64, fail); + bind(&fin); +} + +void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + ARMFPRegister iFlt(src, 32); + ARMRegister o64(dest, 64); + ARMRegister o32(dest, 32); + + Label handleZero; + Label fin; + + // Round towards positive infinity. + Fcvtps(o64, iFlt); + + // Sign extend lower 32 bits to test if the result isn't an Int32. + Cmp(o64, Operand(o64, vixl::SXTW)); + B(NotEqual, fail); + + // We have to check for (-1, -0] and NaN when the result is zero. + Cbz(o64, &handleZero); + + // Clear upper 32 bits. + Uxtw(o64, o64); + B(&fin); + + // Bail if the input is in (-1, -0] or NaN. + bind(&handleZero); + // Move the top word of the float into the output reg, if it is non-zero, + // then the original value wasn't +0.0. + Fmov(o32, iFlt); + Cbnz(o32, fail); + bind(&fin); +} + +void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + ARMFPRegister iDbl(src, 64); + ARMRegister o64(dest, 64); + ARMRegister o32(dest, 32); + + Label handleZero; + Label fin; + + // Round towards positive infinity. + Fcvtps(o64, iDbl); + + // Sign extend lower 32 bits to test if the result isn't an Int32. + Cmp(o64, Operand(o64, vixl::SXTW)); + B(NotEqual, fail); + + // We have to check for (-1, -0] and NaN when the result is zero. + Cbz(o64, &handleZero); + + // Clear upper 32 bits. + Uxtw(o64, o64); + B(&fin); + + // Bail if the input is in (-1, -0] or NaN. + bind(&handleZero); + // Move the top word of the double into the output reg, if it is non-zero, + // then the original value wasn't +0.0. + Fmov(o64, iDbl); + Cbnz(o64, fail); + bind(&fin); +} + +void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + ARMFPRegister src32(src, 32); + ARMRegister dest32(dest, 32); + ARMRegister dest64(dest, 64); + + Label done, zeroCase; + + // Convert scalar to signed 64-bit fixed-point, rounding toward zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtzs(dest64, src32); + + // If the output was zero, worry about special cases. + Cbz(dest64, &zeroCase); + + // Sign extend lower 32 bits to test if the result isn't an Int32. + Cmp(dest64, Operand(dest64, vixl::SXTW)); + B(NotEqual, fail); + + // Clear upper 32 bits. + Uxtw(dest64, dest64); + + // If the output was non-zero and wasn't saturated, just return it. + B(&done); + + // Handle the case of a zero output: + // 1. The input may have been NaN, requiring a failure. + // 2. The input may have been in (-1,-0], requiring a failure. + { + bind(&zeroCase); + + // Combine test for negative and NaN values using a single bitwise + // operation. + // + // | Decimal number | Bitwise representation | + // |----------------|------------------------| + // | -0 | 8000'0000 | + // | +0 | 0000'0000 | + // | +1 | 3f80'0000 | + // | NaN (or +Inf) | 7fyx'xxxx, y >= 8 | + // | -NaN (or -Inf) | ffyx'xxxx, y >= 8 | + // + // If any of two most significant bits is set, the number isn't in [0, 1). + // (Recall that floating point numbers, except for NaN, are strictly ordered + // when comparing their bitwise representation as signed integers.) + + Fmov(dest32, src32); + Lsr(dest32, dest32, 30); + Cbnz(dest32, fail); + } + + bind(&done); +} + +void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + ARMFPRegister src64(src, 64); + ARMRegister dest64(dest, 64); + ARMRegister dest32(dest, 32); + + Label done, zeroCase; + + // Convert scalar to signed 64-bit fixed-point, rounding toward zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtzs(dest64, src64); + + // If the output was zero, worry about special cases. + Cbz(dest64, &zeroCase); + + // Sign extend lower 32 bits to test if the result isn't an Int32. + Cmp(dest64, Operand(dest64, vixl::SXTW)); + B(NotEqual, fail); + + // Clear upper 32 bits. + Uxtw(dest64, dest64); + + // If the output was non-zero and wasn't saturated, just return it. + B(&done); + + // Handle the case of a zero output: + // 1. The input may have been NaN, requiring a failure. + // 2. The input may have been in (-1,-0], requiring a failure. + { + bind(&zeroCase); + + // Combine test for negative and NaN values using a single bitwise + // operation. + // + // | Decimal number | Bitwise representation | + // |----------------|------------------------| + // | -0 | 8000'0000'0000'0000 | + // | +0 | 0000'0000'0000'0000 | + // | +1 | 3ff0'0000'0000'0000 | + // | NaN (or +Inf) | 7ffx'xxxx'xxxx'xxxx | + // | -NaN (or -Inf) | fffx'xxxx'xxxx'xxxx | + // + // If any of two most significant bits is set, the number isn't in [0, 1). + // (Recall that floating point numbers, except for NaN, are strictly ordered + // when comparing their bitwise representation as signed integers.) + + Fmov(dest64, src64); + Lsr(dest64, dest64, 62); + Cbnz(dest64, fail); + } + + bind(&done); +} + +void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest, + FloatRegister temp, Label* fail) { + ARMFPRegister src32(src, 32); + ARMRegister dest32(dest, 32); + ARMRegister dest64(dest, 64); + + Label negative, saturated, done; + + // Branch to a slow path if input < 0.0 due to complicated rounding rules. + // Note that Fcmp with NaN unsets the negative flag. + Fcmp(src32, 0.0); + B(&negative, Assembler::Condition::lo); + + // Handle the simple case of a positive input, and also -0 and NaN. + // Rounding proceeds with consideration of the fractional part of the input: + // 1. If > 0.5, round to integer with higher absolute value (so, up). + // 2. If < 0.5, round to integer with lower absolute value (so, down). + // 3. If = 0.5, round to +Infinity (so, up). + { + // Convert to signed 64-bit integer, rounding halfway cases away from zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtas(dest64, src32); + + // In the case of zero, the input may have been NaN or -0, which must bail. + Cbnz(dest64, &saturated); + + // Combine test for -0 and NaN values using a single bitwise operation. + // See truncFloat32ToInt32 for an explanation. + Fmov(dest32, src32); + Lsr(dest32, dest32, 30); + Cbnz(dest32, fail); + + B(&done); + } + + // Handle the complicated case of a negative input. + // Rounding proceeds with consideration of the fractional part of the input: + // 1. If > 0.5, round to integer with higher absolute value (so, down). + // 2. If < 0.5, round to integer with lower absolute value (so, up). + // 3. If = 0.5, round to +Infinity (so, up). + bind(&negative); + { + // Inputs in [-0.5, 0) are rounded to -0. Fail. + loadConstantFloat32(-0.5f, temp); + branchFloat(Assembler::DoubleGreaterThanOrEqual, src, temp, fail); + + // Other negative inputs need the biggest double less than 0.5 added. + loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); + addFloat32(src, temp); + + // Round all values toward -Infinity. + // In the case of overflow, the output is saturated. + // NaN and -0 are already handled by the "positive number" path above. + Fcvtms(dest64, temp); + } + + bind(&saturated); + + // Sign extend lower 32 bits to test if the result isn't an Int32. + Cmp(dest64, Operand(dest64, vixl::SXTW)); + B(NotEqual, fail); + + // Clear upper 32 bits. + Uxtw(dest64, dest64); + + bind(&done); +} + +void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest, + FloatRegister temp, Label* fail) { + ARMFPRegister src64(src, 64); + ARMRegister dest64(dest, 64); + ARMRegister dest32(dest, 32); + + Label negative, saturated, done; + + // Branch to a slow path if input < 0.0 due to complicated rounding rules. + // Note that Fcmp with NaN unsets the negative flag. + Fcmp(src64, 0.0); + B(&negative, Assembler::Condition::lo); + + // Handle the simple case of a positive input, and also -0 and NaN. + // Rounding proceeds with consideration of the fractional part of the input: + // 1. If > 0.5, round to integer with higher absolute value (so, up). + // 2. If < 0.5, round to integer with lower absolute value (so, down). + // 3. If = 0.5, round to +Infinity (so, up). + { + // Convert to signed 64-bit integer, rounding halfway cases away from zero. + // In the case of overflow, the output is saturated. + // In the case of NaN and -0, the output is zero. + Fcvtas(dest64, src64); + + // In the case of zero, the input may have been NaN or -0, which must bail. + Cbnz(dest64, &saturated); + + // Combine test for -0 and NaN values using a single bitwise operation. + // See truncDoubleToInt32 for an explanation. + Fmov(dest64, src64); + Lsr(dest64, dest64, 62); + Cbnz(dest64, fail); + + B(&done); + } + + // Handle the complicated case of a negative input. + // Rounding proceeds with consideration of the fractional part of the input: + // 1. If > 0.5, round to integer with higher absolute value (so, down). + // 2. If < 0.5, round to integer with lower absolute value (so, up). + // 3. If = 0.5, round to +Infinity (so, up). + bind(&negative); + { + // Inputs in [-0.5, 0) are rounded to -0. Fail. + loadConstantDouble(-0.5, temp); + branchDouble(Assembler::DoubleGreaterThanOrEqual, src, temp, fail); + + // Other negative inputs need the biggest double less than 0.5 added. + loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); + addDouble(src, temp); + + // Round all values toward -Infinity. + // In the case of overflow, the output is saturated. + // NaN and -0 are already handled by the "positive number" path above. + Fcvtms(dest64, temp); + } + + bind(&saturated); + + // Sign extend lower 32 bits to test if the result isn't an Int32. + Cmp(dest64, Operand(dest64, vixl::SXTW)); + B(NotEqual, fail); + + // Clear upper 32 bits. + Uxtw(dest64, dest64); + + bind(&done); +} + +void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src, + FloatRegister dest) { + switch (mode) { + case RoundingMode::Up: + frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + return; + case RoundingMode::Down: + frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + return; + case RoundingMode::NearestTiesToEven: + frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + return; + case RoundingMode::TowardsZero: + frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + return; + } + MOZ_CRASH("unexpected mode"); +} + +void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src, + FloatRegister dest) { + switch (mode) { + case RoundingMode::Up: + frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + return; + case RoundingMode::Down: + frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + return; + case RoundingMode::NearestTiesToEven: + frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + return; + case RoundingMode::TowardsZero: + frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + return; + } + MOZ_CRASH("unexpected mode"); +} + +void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, + FloatRegister output) { + ScratchDoubleScope scratch(*this); + + // Double with only the sign bit set + loadConstantDouble(-0.0, scratch); + + if (lhs != output) { + moveDouble(lhs, output); + } + + bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), + ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), + ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); +} + +void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs, + FloatRegister output) { + ScratchFloat32Scope scratch(*this); + + // Float with only the sign bit set + loadConstantFloat32(-0.0f, scratch); + + if (lhs != output) { + moveFloat32(lhs, output); + } + + bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), + ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), + ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); +} + +void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) { + Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64), + Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift)); +} + +//}}} check_macroassembler_style + +} // namespace jit +} // namespace js diff --git a/js/src/jit/arm64/MacroAssembler-arm64.h b/js/src/jit/arm64/MacroAssembler-arm64.h new file mode 100644 index 0000000000..edfd8c9d3e --- /dev/null +++ b/js/src/jit/arm64/MacroAssembler-arm64.h @@ -0,0 +1,2206 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_MacroAssembler_arm64_h +#define jit_arm64_MacroAssembler_arm64_h + +#include "jit/arm64/Assembler-arm64.h" +#include "jit/arm64/vixl/Debugger-vixl.h" +#include "jit/arm64/vixl/MacroAssembler-vixl.h" +#include "jit/AtomicOp.h" +#include "jit/MoveResolver.h" +#include "vm/BigIntType.h" // JS::BigInt +#include "wasm/WasmBuiltins.h" + +#ifdef _M_ARM64 +# ifdef move32 +# undef move32 +# endif +# ifdef move64 +# undef move64 +# endif +#endif + +namespace js { +namespace jit { + +// Import VIXL operands directly into the jit namespace for shared code. +using vixl::MemOperand; +using vixl::Operand; + +struct ImmShiftedTag : public ImmWord { + explicit ImmShiftedTag(JSValueShiftedTag shtag) : ImmWord((uintptr_t)shtag) {} + + explicit ImmShiftedTag(JSValueType type) + : ImmWord(uintptr_t(JSValueShiftedTag(JSVAL_TYPE_TO_SHIFTED_TAG(type)))) { + } +}; + +struct ImmTag : public Imm32 { + explicit ImmTag(JSValueTag tag) : Imm32(tag) {} +}; + +class ScratchTagScope; + +class MacroAssemblerCompat : public vixl::MacroAssembler { + public: + typedef vixl::Condition Condition; + + private: + // Perform a downcast. Should be removed by Bug 996602. + js::jit::MacroAssembler& asMasm(); + const js::jit::MacroAssembler& asMasm() const; + + public: + // Restrict to only VIXL-internal functions. + vixl::MacroAssembler& asVIXL(); + const MacroAssembler& asVIXL() const; + + protected: + bool enoughMemory_; + uint32_t framePushed_; + + MacroAssemblerCompat() + : vixl::MacroAssembler(), enoughMemory_(true), framePushed_(0) {} + + protected: + MoveResolver moveResolver_; + + public: + bool oom() const { return Assembler::oom() || !enoughMemory_; } + static ARMRegister toARMRegister(RegisterOrSP r, size_t size) { + if (IsHiddenSP(r)) { + MOZ_ASSERT(size == 64); + return sp; + } + return ARMRegister(AsRegister(r), size); + } + static MemOperand toMemOperand(const Address& a) { + return MemOperand(toARMRegister(a.base, 64), a.offset); + } + void doBaseIndex(const vixl::CPURegister& rt, const BaseIndex& addr, + vixl::LoadStoreOp op) { + const ARMRegister base = toARMRegister(addr.base, 64); + const ARMRegister index = ARMRegister(addr.index, 64); + const unsigned scale = addr.scale; + + if (!addr.offset && + (!scale || scale == static_cast<unsigned>(CalcLSDataSize(op)))) { + LoadStoreMacro(rt, MemOperand(base, index, vixl::LSL, scale), op); + return; + } + + vixl::UseScratchRegisterScope temps(this); + ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(!scratch64.Is(rt)); + MOZ_ASSERT(!scratch64.Is(base)); + MOZ_ASSERT(!scratch64.Is(index)); + + Add(scratch64, base, Operand(index, vixl::LSL, scale)); + LoadStoreMacro(rt, MemOperand(scratch64, addr.offset), op); + } + void Push(ARMRegister reg) { + push(reg); + adjustFrame(reg.size() / 8); + } + void Push(Register reg) { + vixl::MacroAssembler::Push(ARMRegister(reg, 64)); + adjustFrame(8); + } + void Push(Imm32 imm) { + push(imm); + adjustFrame(8); + } + void Push(FloatRegister f) { + push(ARMFPRegister(f, 64)); + adjustFrame(8); + } + void Push(ImmPtr imm) { + push(imm); + adjustFrame(sizeof(void*)); + } + void push(FloatRegister f) { + vixl::MacroAssembler::Push(ARMFPRegister(f, 64)); + } + void push(ARMFPRegister f) { vixl::MacroAssembler::Push(f); } + void push(Imm32 imm) { + if (imm.value == 0) { + vixl::MacroAssembler::Push(vixl::xzr); + } else { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + move32(imm, scratch64.asUnsized()); + vixl::MacroAssembler::Push(scratch64); + } + } + void push(ImmWord imm) { + if (imm.value == 0) { + vixl::MacroAssembler::Push(vixl::xzr); + } else { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + Mov(scratch64, imm.value); + vixl::MacroAssembler::Push(scratch64); + } + } + void push(ImmPtr imm) { + if (imm.value == nullptr) { + vixl::MacroAssembler::Push(vixl::xzr); + } else { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + movePtr(imm, scratch64.asUnsized()); + vixl::MacroAssembler::Push(scratch64); + } + } + void push(ImmGCPtr imm) { + if (imm.value == nullptr) { + vixl::MacroAssembler::Push(vixl::xzr); + } else { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + movePtr(imm, scratch64.asUnsized()); + vixl::MacroAssembler::Push(scratch64); + } + } + void push(ARMRegister reg) { vixl::MacroAssembler::Push(reg); } + void push(Address a) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(a.base != scratch64.asUnsized()); + loadPtr(a, scratch64.asUnsized()); + vixl::MacroAssembler::Push(scratch64); + } + + // Push registers. + void push(Register reg) { vixl::MacroAssembler::Push(ARMRegister(reg, 64)); } + void push(RegisterOrSP reg) { + if (IsHiddenSP(reg)) { + vixl::MacroAssembler::Push(sp); + } + vixl::MacroAssembler::Push(toARMRegister(reg, 64)); + } + void push(Register r0, Register r1) { + vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64)); + } + void push(Register r0, Register r1, Register r2) { + vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64), + ARMRegister(r2, 64)); + } + void push(Register r0, Register r1, Register r2, Register r3) { + vixl::MacroAssembler::Push(ARMRegister(r0, 64), ARMRegister(r1, 64), + ARMRegister(r2, 64), ARMRegister(r3, 64)); + } + void push(ARMFPRegister r0, ARMFPRegister r1, ARMFPRegister r2, + ARMFPRegister r3) { + vixl::MacroAssembler::Push(r0, r1, r2, r3); + } + + // Pop registers. + void pop(Register reg) { vixl::MacroAssembler::Pop(ARMRegister(reg, 64)); } + void pop(Register r0, Register r1) { + vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64)); + } + void pop(Register r0, Register r1, Register r2) { + vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64), + ARMRegister(r2, 64)); + } + void pop(Register r0, Register r1, Register r2, Register r3) { + vixl::MacroAssembler::Pop(ARMRegister(r0, 64), ARMRegister(r1, 64), + ARMRegister(r2, 64), ARMRegister(r3, 64)); + } + void pop(ARMFPRegister r0, ARMFPRegister r1, ARMFPRegister r2, + ARMFPRegister r3) { + vixl::MacroAssembler::Pop(r0, r1, r2, r3); + } + + void pop(const ValueOperand& v) { pop(v.valueReg()); } + void pop(const FloatRegister& f) { + vixl::MacroAssembler::Pop(ARMFPRegister(f, 64)); + } + + void implicitPop(uint32_t args) { + MOZ_ASSERT(args % sizeof(intptr_t) == 0); + adjustFrame(0 - args); + } + void Pop(ARMRegister r) { + vixl::MacroAssembler::Pop(r); + adjustFrame(0 - r.size() / 8); + } + // FIXME: This is the same on every arch. + // FIXME: If we can share framePushed_, we can share this. + // FIXME: Or just make it at the highest level. + CodeOffset PushWithPatch(ImmWord word) { + framePushed_ += sizeof(word.value); + return pushWithPatch(word); + } + CodeOffset PushWithPatch(ImmPtr ptr) { + return PushWithPatch(ImmWord(uintptr_t(ptr.value))); + } + + uint32_t framePushed() const { return framePushed_; } + void adjustFrame(int32_t diff) { setFramePushed(framePushed_ + diff); } + + void setFramePushed(uint32_t framePushed) { framePushed_ = framePushed; } + + void freeStack(Register amount) { + vixl::MacroAssembler::Drop(Operand(ARMRegister(amount, 64))); + } + + // Update sp with the value of the current active stack pointer, if necessary. + void syncStackPtr() { + if (!GetStackPointer64().Is(vixl::sp)) { + Mov(vixl::sp, GetStackPointer64()); + } + } + void initPseudoStackPtr() { + if (!GetStackPointer64().Is(vixl::sp)) { + Mov(GetStackPointer64(), vixl::sp); + } + } + // In debug builds only, cause a trap if PSP is active and PSP != SP + void assertStackPtrsSynced(uint32_t id) { +#ifdef DEBUG + // The add and sub instructions below will only take a 12-bit immediate. + MOZ_ASSERT(id <= 0xFFF); + if (!GetStackPointer64().Is(vixl::sp)) { + Label ok; + // Add a marker, so we can figure out who requested the check when + // inspecting the generated code. Note, a more concise way to encode + // the marker would be to use it as an immediate for the `brk` + // instruction as generated by `Unreachable()`, and removing the add/sub. + Add(GetStackPointer64(), GetStackPointer64(), Operand(id)); + Sub(GetStackPointer64(), GetStackPointer64(), Operand(id)); + Cmp(vixl::sp, GetStackPointer64()); + B(Equal, &ok); + Unreachable(); + bind(&ok); + } +#endif + } + // In debug builds only, add a marker that doesn't change the machine's + // state. Note these markers are x16-based, as opposed to the x28-based + // ones made by `assertStackPtrsSynced`. + void addMarker(uint32_t id) { +#ifdef DEBUG + // Only 12 bits of immediate are allowed. + MOZ_ASSERT(id <= 0xFFF); + ARMRegister x16 = ARMRegister(r16, 64); + Add(x16, x16, Operand(id)); + Sub(x16, x16, Operand(id)); +#endif + } + + void storeValue(ValueOperand val, const Address& dest) { + storePtr(val.valueReg(), dest); + } + + template <typename T> + void storeValue(JSValueType type, Register reg, const T& dest) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != reg); + tagValue(type, reg, ValueOperand(scratch)); + storeValue(ValueOperand(scratch), dest); + } + template <typename T> + void storeValue(const Value& val, const T& dest) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + moveValue(val, ValueOperand(scratch)); + storeValue(ValueOperand(scratch), dest); + } + void storeValue(ValueOperand val, BaseIndex dest) { + storePtr(val.valueReg(), dest); + } + void storeValue(const Address& src, const Address& dest, Register temp) { + loadPtr(src, temp); + storePtr(temp, dest); + } + + void storePrivateValue(Register src, const Address& dest) { + storePtr(src, dest); + } + void storePrivateValue(ImmGCPtr imm, const Address& dest) { + storePtr(imm, dest); + } + + void loadValue(Address src, Register val) { + Ldr(ARMRegister(val, 64), MemOperand(src)); + } + void loadValue(Address src, ValueOperand val) { + Ldr(ARMRegister(val.valueReg(), 64), MemOperand(src)); + } + void loadValue(const BaseIndex& src, ValueOperand val) { + doBaseIndex(ARMRegister(val.valueReg(), 64), src, vixl::LDR_x); + } + void loadUnalignedValue(const Address& src, ValueOperand dest) { + loadValue(src, dest); + } + void tagValue(JSValueType type, Register payload, ValueOperand dest) { + // This could be cleverer, but the first attempt had bugs. + Orr(ARMRegister(dest.valueReg(), 64), ARMRegister(payload, 64), + Operand(ImmShiftedTag(type).value)); + } + void pushValue(ValueOperand val) { + vixl::MacroAssembler::Push(ARMRegister(val.valueReg(), 64)); + } + void popValue(ValueOperand val) { + vixl::MacroAssembler::Pop(ARMRegister(val.valueReg(), 64)); + // SP may be < PSP now (that's OK). + // eg testcase: tests/backup-point-bug1315634.js + } + void pushValue(const Value& val) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + if (val.isGCThing()) { + BufferOffset load = + movePatchablePtr(ImmPtr(val.bitsAsPunboxPointer()), scratch); + writeDataRelocation(val, load); + push(scratch); + } else { + moveValue(val, scratch); + push(scratch); + } + } + void pushValue(JSValueType type, Register reg) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != reg); + tagValue(type, reg, ValueOperand(scratch)); + push(scratch); + } + void pushValue(const Address& addr) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != addr.base); + loadValue(addr, scratch); + push(scratch); + } + void pushValue(const BaseIndex& addr, Register scratch) { + loadValue(addr, ValueOperand(scratch)); + pushValue(ValueOperand(scratch)); + } + template <typename T> + void storeUnboxedPayload(ValueOperand value, T address, size_t nbytes, + JSValueType type) { + switch (nbytes) { + case 8: { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + if (type == JSVAL_TYPE_OBJECT) { + unboxObjectOrNull(value, scratch); + } else { + unboxNonDouble(value, scratch, type); + } + storePtr(scratch, address); + return; + } + case 4: + store32(value.valueReg(), address); + return; + case 1: + store8(value.valueReg(), address); + return; + default: + MOZ_CRASH("Bad payload width"); + } + } + void moveValue(const Value& val, Register dest) { + if (val.isGCThing()) { + BufferOffset load = + movePatchablePtr(ImmPtr(val.bitsAsPunboxPointer()), dest); + writeDataRelocation(val, load); + } else { + movePtr(ImmWord(val.asRawBits()), dest); + } + } + void moveValue(const Value& src, const ValueOperand& dest) { + moveValue(src, dest.valueReg()); + } + + CodeOffset pushWithPatch(ImmWord imm) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + CodeOffset label = movWithPatch(imm, scratch); + push(scratch); + return label; + } + + CodeOffset movWithPatch(ImmWord imm, Register dest) { + BufferOffset off = immPool64(ARMRegister(dest, 64), imm.value); + return CodeOffset(off.getOffset()); + } + CodeOffset movWithPatch(ImmPtr imm, Register dest) { + BufferOffset off = immPool64(ARMRegister(dest, 64), uint64_t(imm.value)); + return CodeOffset(off.getOffset()); + } + + void boxValue(JSValueType type, Register src, Register dest); + + void splitSignExtTag(Register src, Register dest) { + sbfx(ARMRegister(dest, 64), ARMRegister(src, 64), JSVAL_TAG_SHIFT, + (64 - JSVAL_TAG_SHIFT)); + } + [[nodiscard]] Register extractTag(const Address& address, Register scratch) { + loadPtr(address, scratch); + splitSignExtTag(scratch, scratch); + return scratch; + } + [[nodiscard]] Register extractTag(const ValueOperand& value, + Register scratch) { + splitSignExtTag(value.valueReg(), scratch); + return scratch; + } + [[nodiscard]] Register extractObject(const Address& address, + Register scratch) { + loadPtr(address, scratch); + unboxObject(scratch, scratch); + return scratch; + } + [[nodiscard]] Register extractObject(const ValueOperand& value, + Register scratch) { + unboxObject(value, scratch); + return scratch; + } + [[nodiscard]] Register extractSymbol(const ValueOperand& value, + Register scratch) { + unboxSymbol(value, scratch); + return scratch; + } + [[nodiscard]] Register extractInt32(const ValueOperand& value, + Register scratch) { + unboxInt32(value, scratch); + return scratch; + } + [[nodiscard]] Register extractBoolean(const ValueOperand& value, + Register scratch) { + unboxBoolean(value, scratch); + return scratch; + } + + inline void ensureDouble(const ValueOperand& source, FloatRegister dest, + Label* failure); + + void emitSet(Condition cond, Register dest) { + Cset(ARMRegister(dest, 64), cond); + } + + void testNullSet(Condition cond, const ValueOperand& value, Register dest) { + cond = testNull(cond, value); + emitSet(cond, dest); + } + void testObjectSet(Condition cond, const ValueOperand& value, Register dest) { + cond = testObject(cond, value); + emitSet(cond, dest); + } + void testUndefinedSet(Condition cond, const ValueOperand& value, + Register dest) { + cond = testUndefined(cond, value); + emitSet(cond, dest); + } + + void convertBoolToInt32(Register source, Register dest) { + Uxtb(ARMRegister(dest, 64), ARMRegister(source, 64)); + } + + void convertInt32ToDouble(Register src, FloatRegister dest) { + Scvtf(ARMFPRegister(dest, 64), + ARMRegister(src, 32)); // Uses FPCR rounding mode. + } + void convertInt32ToDouble(const Address& src, FloatRegister dest) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != src.base); + load32(src, scratch); + convertInt32ToDouble(scratch, dest); + } + void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != src.base); + MOZ_ASSERT(scratch != src.index); + load32(src, scratch); + convertInt32ToDouble(scratch, dest); + } + + void convertInt32ToFloat32(Register src, FloatRegister dest) { + Scvtf(ARMFPRegister(dest, 32), + ARMRegister(src, 32)); // Uses FPCR rounding mode. + } + void convertInt32ToFloat32(const Address& src, FloatRegister dest) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != src.base); + load32(src, scratch); + convertInt32ToFloat32(scratch, dest); + } + + void convertUInt32ToDouble(Register src, FloatRegister dest) { + Ucvtf(ARMFPRegister(dest, 64), + ARMRegister(src, 32)); // Uses FPCR rounding mode. + } + void convertUInt32ToDouble(const Address& src, FloatRegister dest) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != src.base); + load32(src, scratch); + convertUInt32ToDouble(scratch, dest); + } + + void convertUInt32ToFloat32(Register src, FloatRegister dest) { + Ucvtf(ARMFPRegister(dest, 32), + ARMRegister(src, 32)); // Uses FPCR rounding mode. + } + void convertUInt32ToFloat32(const Address& src, FloatRegister dest) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != src.base); + load32(src, scratch); + convertUInt32ToFloat32(scratch, dest); + } + + void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) { + Fcvt(ARMFPRegister(dest, 64), ARMFPRegister(src, 32)); + } + void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) { + Fcvt(ARMFPRegister(dest, 32), ARMFPRegister(src, 64)); + } + + using vixl::MacroAssembler::B; + + void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail, + bool negativeZeroCheck = true) { + ARMFPRegister fsrc64(src, 64); + ARMRegister dest32(dest, 32); + + // ARMv8.3 chips support the FJCVTZS instruction, which handles exactly this + // logic. But the simulator does not implement it, and when the simulator + // runs on ARM64 hardware we want to override vixl's detection of it. +#if defined(JS_SIMULATOR_ARM64) && (defined(__aarch64__) || defined(_M_ARM64)) + const bool fjscvt = false; +#else + const bool fjscvt = + CPUHas(vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT); +#endif + if (fjscvt) { + // Convert double to integer, rounding toward zero. + // The Z-flag is set iff the conversion is exact. -0 unsets the Z-flag. + Fjcvtzs(dest32, fsrc64); + + if (negativeZeroCheck) { + B(fail, Assembler::NonZero); + } else { + Label done; + B(&done, Assembler::Zero); // If conversion was exact, go to end. + + // The conversion was inexact, but the caller intends to allow -0. + + // Compare fsrc64 to 0. + // If fsrc64 == 0 and FJCVTZS conversion was inexact, then fsrc64 is -0. + Fcmp(fsrc64, 0.0); + B(fail, Assembler::NotEqual); // Pass through -0; fail otherwise. + + bind(&done); + } + } else { + // Older processors use a significantly slower path. + ARMRegister dest64(dest, 64); + + vixl::UseScratchRegisterScope temps(this); + const ARMFPRegister scratch64 = temps.AcquireD(); + MOZ_ASSERT(!scratch64.Is(fsrc64)); + + Fcvtzs(dest32, fsrc64); // Convert, rounding toward zero. + Scvtf(scratch64, dest32); // Convert back, using FPCR rounding mode. + Fcmp(scratch64, fsrc64); + B(fail, Assembler::NotEqual); + + if (negativeZeroCheck) { + Label nonzero; + Cbnz(dest32, &nonzero); + Fmov(dest64, fsrc64); + Cbnz(dest64, fail); + bind(&nonzero); + } + } + } + void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail, + bool negativeZeroCheck = true) { + vixl::UseScratchRegisterScope temps(this); + const ARMFPRegister scratch32 = temps.AcquireS(); + + ARMFPRegister fsrc(src, 32); + ARMRegister dest32(dest, 32); + ARMRegister dest64(dest, 64); + + MOZ_ASSERT(!scratch32.Is(fsrc)); + + Fcvtzs(dest64, fsrc); // Convert, rounding toward zero. + Scvtf(scratch32, dest32); // Convert back, using FPCR rounding mode. + Fcmp(scratch32, fsrc); + B(fail, Assembler::NotEqual); + + if (negativeZeroCheck) { + Label nonzero; + Cbnz(dest32, &nonzero); + Fmov(dest32, fsrc); + Cbnz(dest32, fail); + bind(&nonzero); + } + Uxtw(dest64, dest64); + } + + void convertDoubleToPtr(FloatRegister src, Register dest, Label* fail, + bool negativeZeroCheck = true) { + ARMFPRegister fsrc64(src, 64); + ARMRegister dest64(dest, 64); + + vixl::UseScratchRegisterScope temps(this); + const ARMFPRegister scratch64 = temps.AcquireD(); + MOZ_ASSERT(!scratch64.Is(fsrc64)); + + // Note: we can't use the FJCVTZS instruction here because that only works + // for 32-bit values. + + Fcvtzs(dest64, fsrc64); // Convert, rounding toward zero. + Scvtf(scratch64, dest64); // Convert back, using FPCR rounding mode. + Fcmp(scratch64, fsrc64); + B(fail, Assembler::NotEqual); + + if (negativeZeroCheck) { + Label nonzero; + Cbnz(dest64, &nonzero); + Fmov(dest64, fsrc64); + Cbnz(dest64, fail); + bind(&nonzero); + } + } + + void jump(Label* label) { B(label); } + void jump(JitCode* code) { branch(code); } + void jump(ImmPtr ptr) { + // It is unclear why this sync is necessary: + // * PSP and SP have been observed to be different in testcase + // tests/asm.js/testBug1046688.js. + // * Removing the sync causes no failures in all of jit-tests. + // + // Also see branch(JitCode*) below. This version of jump() is called only + // from jump(TrampolinePtr) which is called on various very slow paths, + // probably only in JS. + syncStackPtr(); + BufferOffset loc = + b(-1, + LabelDoc()); // The jump target will be patched by executableCopy(). + addPendingJump(loc, ptr, RelocationKind::HARDCODED); + } + void jump(TrampolinePtr code) { jump(ImmPtr(code.value)); } + void jump(Register reg) { Br(ARMRegister(reg, 64)); } + void jump(const Address& addr) { + vixl::UseScratchRegisterScope temps(this); + MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0 + temps.Exclude(ScratchReg64); + MOZ_ASSERT(addr.base != ScratchReg64.asUnsized()); + loadPtr(addr, ScratchReg64.asUnsized()); + br(ScratchReg64); + } + + void align(int alignment) { armbuffer_.align(alignment); } + + void haltingAlign(int alignment) { + armbuffer_.align(alignment, vixl::HLT | ImmException(0xBAAD)); + } + void nopAlign(int alignment) { armbuffer_.align(alignment); } + + void movePtr(Register src, Register dest) { + Mov(ARMRegister(dest, 64), ARMRegister(src, 64)); + } + void movePtr(ImmWord imm, Register dest) { + Mov(ARMRegister(dest, 64), int64_t(imm.value)); + } + void movePtr(ImmPtr imm, Register dest) { + Mov(ARMRegister(dest, 64), int64_t(imm.value)); + } + void movePtr(wasm::SymbolicAddress imm, Register dest) { + BufferOffset off = movePatchablePtr(ImmWord(0xffffffffffffffffULL), dest); + append(wasm::SymbolicAccess(CodeOffset(off.getOffset()), imm)); + } + void movePtr(ImmGCPtr imm, Register dest) { + BufferOffset load = movePatchablePtr(ImmPtr(imm.value), dest); + writeDataRelocation(imm, load); + } + + void mov(ImmWord imm, Register dest) { movePtr(imm, dest); } + void mov(ImmPtr imm, Register dest) { movePtr(imm, dest); } + void mov(wasm::SymbolicAddress imm, Register dest) { movePtr(imm, dest); } + void mov(Register src, Register dest) { movePtr(src, dest); } + void mov(CodeLabel* label, Register dest); + + void move32(Imm32 imm, Register dest) { + Mov(ARMRegister(dest, 32), (int64_t)imm.value); + } + void move32(Register src, Register dest) { + Mov(ARMRegister(dest, 32), ARMRegister(src, 32)); + } + + // Move a pointer using a literal pool, so that the pointer + // may be easily patched or traced. + // Returns the BufferOffset of the load instruction emitted. + BufferOffset movePatchablePtr(ImmWord ptr, Register dest); + BufferOffset movePatchablePtr(ImmPtr ptr, Register dest); + + void loadPtr(wasm::SymbolicAddress address, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + movePtr(address, scratch.asUnsized()); + Ldr(ARMRegister(dest, 64), MemOperand(scratch)); + } + void loadPtr(AbsoluteAddress address, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + movePtr(ImmWord((uintptr_t)address.addr), scratch.asUnsized()); + Ldr(ARMRegister(dest, 64), MemOperand(scratch)); + } + void loadPtr(const Address& address, Register dest) { + Ldr(ARMRegister(dest, 64), MemOperand(address)); + } + void loadPtr(const BaseIndex& src, Register dest) { + ARMRegister base = toARMRegister(src.base, 64); + uint32_t scale = Imm32::ShiftOf(src.scale).value; + ARMRegister dest64(dest, 64); + ARMRegister index64(src.index, 64); + + if (src.offset) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch = temps.AcquireX(); + MOZ_ASSERT(!scratch.Is(base)); + MOZ_ASSERT(!scratch.Is(dest64)); + MOZ_ASSERT(!scratch.Is(index64)); + + Add(scratch, base, Operand(int64_t(src.offset))); + Ldr(dest64, MemOperand(scratch, index64, vixl::LSL, scale)); + return; + } + + Ldr(dest64, MemOperand(base, index64, vixl::LSL, scale)); + } + void loadPrivate(const Address& src, Register dest); + + void store8(Register src, const Address& address) { + Strb(ARMRegister(src, 32), toMemOperand(address)); + } + void store8(Imm32 imm, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != address.base); + move32(imm, scratch32.asUnsized()); + Strb(scratch32, toMemOperand(address)); + } + void store8(Register src, const BaseIndex& address) { + doBaseIndex(ARMRegister(src, 32), address, vixl::STRB_w); + } + void store8(Imm32 imm, const BaseIndex& address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != address.base); + MOZ_ASSERT(scratch32.asUnsized() != address.index); + Mov(scratch32, Operand(imm.value)); + doBaseIndex(scratch32, address, vixl::STRB_w); + } + + void store16(Register src, const Address& address) { + Strh(ARMRegister(src, 32), toMemOperand(address)); + } + void store16(Imm32 imm, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != address.base); + move32(imm, scratch32.asUnsized()); + Strh(scratch32, toMemOperand(address)); + } + void store16(Register src, const BaseIndex& address) { + doBaseIndex(ARMRegister(src, 32), address, vixl::STRH_w); + } + void store16(Imm32 imm, const BaseIndex& address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != address.base); + MOZ_ASSERT(scratch32.asUnsized() != address.index); + Mov(scratch32, Operand(imm.value)); + doBaseIndex(scratch32, address, vixl::STRH_w); + } + template <typename S, typename T> + void store16Unaligned(const S& src, const T& dest) { + store16(src, dest); + } + + void storePtr(ImmWord imm, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != address.base); + movePtr(imm, scratch); + storePtr(scratch, address); + } + void storePtr(ImmPtr imm, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != address.base); + Mov(scratch64, uint64_t(imm.value)); + Str(scratch64, toMemOperand(address)); + } + void storePtr(ImmGCPtr imm, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != address.base); + movePtr(imm, scratch); + storePtr(scratch, address); + } + void storePtr(Register src, const Address& address) { + Str(ARMRegister(src, 64), toMemOperand(address)); + } + + void storePtr(ImmWord imm, const BaseIndex& address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != address.base); + MOZ_ASSERT(scratch64.asUnsized() != address.index); + Mov(scratch64, Operand(imm.value)); + doBaseIndex(scratch64, address, vixl::STR_x); + } + void storePtr(ImmGCPtr imm, const BaseIndex& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != address.base); + MOZ_ASSERT(scratch != address.index); + movePtr(imm, scratch); + doBaseIndex(ARMRegister(scratch, 64), address, vixl::STR_x); + } + void storePtr(Register src, const BaseIndex& address) { + doBaseIndex(ARMRegister(src, 64), address, vixl::STR_x); + } + + void storePtr(Register src, AbsoluteAddress address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + Mov(scratch64, uint64_t(address.addr)); + Str(ARMRegister(src, 64), MemOperand(scratch64)); + } + + void store32(Register src, AbsoluteAddress address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + Mov(scratch64, uint64_t(address.addr)); + Str(ARMRegister(src, 32), MemOperand(scratch64)); + } + void store32(Imm32 imm, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != address.base); + Mov(scratch32, uint64_t(imm.value)); + Str(scratch32, toMemOperand(address)); + } + void store32(Register r, const Address& address) { + Str(ARMRegister(r, 32), toMemOperand(address)); + } + void store32(Imm32 imm, const BaseIndex& address) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != address.base); + MOZ_ASSERT(scratch32.asUnsized() != address.index); + Mov(scratch32, imm.value); + doBaseIndex(scratch32, address, vixl::STR_w); + } + void store32(Register r, const BaseIndex& address) { + doBaseIndex(ARMRegister(r, 32), address, vixl::STR_w); + } + + template <typename S, typename T> + void store32Unaligned(const S& src, const T& dest) { + store32(src, dest); + } + + void store64(Register64 src, Address address) { storePtr(src.reg, address); } + + void store64(Register64 src, const BaseIndex& address) { + storePtr(src.reg, address); + } + + void store64(Imm64 imm, const BaseIndex& address) { + storePtr(ImmWord(imm.value), address); + } + + void store64(Imm64 imm, const Address& address) { + storePtr(ImmWord(imm.value), address); + } + + template <typename S, typename T> + void store64Unaligned(const S& src, const T& dest) { + store64(src, dest); + } + + // StackPointer manipulation. + inline void addToStackPtr(Register src); + inline void addToStackPtr(Imm32 imm); + inline void addToStackPtr(const Address& src); + inline void addStackPtrTo(Register dest); + + inline void subFromStackPtr(Register src); + inline void subFromStackPtr(Imm32 imm); + inline void subStackPtrFrom(Register dest); + + inline void andToStackPtr(Imm32 t); + + inline void moveToStackPtr(Register src); + inline void moveStackPtrTo(Register dest); + + inline void loadStackPtr(const Address& src); + inline void storeStackPtr(const Address& dest); + + // StackPointer testing functions. + inline void branchTestStackPtr(Condition cond, Imm32 rhs, Label* label); + inline void branchStackPtr(Condition cond, Register rhs, Label* label); + inline void branchStackPtrRhs(Condition cond, Address lhs, Label* label); + inline void branchStackPtrRhs(Condition cond, AbsoluteAddress lhs, + Label* label); + + void testPtr(Register lhs, Register rhs) { + Tst(ARMRegister(lhs, 64), Operand(ARMRegister(rhs, 64))); + } + void test32(Register lhs, Register rhs) { + Tst(ARMRegister(lhs, 32), Operand(ARMRegister(rhs, 32))); + } + void test32(const Address& addr, Imm32 imm) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != addr.base); + load32(addr, scratch32.asUnsized()); + Tst(scratch32, Operand(imm.value)); + } + void test32(Register lhs, Imm32 rhs) { + Tst(ARMRegister(lhs, 32), Operand(rhs.value)); + } + void cmp32(Register lhs, Imm32 rhs) { + Cmp(ARMRegister(lhs, 32), Operand(rhs.value)); + } + void cmp32(Register a, Register b) { + Cmp(ARMRegister(a, 32), Operand(ARMRegister(b, 32))); + } + void cmp32(const Address& lhs, Imm32 rhs) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != lhs.base); + Ldr(scratch32, toMemOperand(lhs)); + Cmp(scratch32, Operand(rhs.value)); + } + void cmp32(const Address& lhs, Register rhs) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != lhs.base); + MOZ_ASSERT(scratch32.asUnsized() != rhs); + Ldr(scratch32, toMemOperand(lhs)); + Cmp(scratch32, Operand(ARMRegister(rhs, 32))); + } + void cmp32(const vixl::Operand& lhs, Imm32 rhs) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + Mov(scratch32, lhs); + Cmp(scratch32, Operand(rhs.value)); + } + void cmp32(const vixl::Operand& lhs, Register rhs) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + Mov(scratch32, lhs); + Cmp(scratch32, Operand(ARMRegister(rhs, 32))); + } + + void cmn32(Register lhs, Imm32 rhs) { + Cmn(ARMRegister(lhs, 32), Operand(rhs.value)); + } + + void cmpPtr(Register lhs, Imm32 rhs) { + Cmp(ARMRegister(lhs, 64), Operand(rhs.value)); + } + void cmpPtr(Register lhs, ImmWord rhs) { + Cmp(ARMRegister(lhs, 64), Operand(rhs.value)); + } + void cmpPtr(Register lhs, ImmPtr rhs) { + Cmp(ARMRegister(lhs, 64), Operand(uint64_t(rhs.value))); + } + void cmpPtr(Register lhs, Imm64 rhs) { + Cmp(ARMRegister(lhs, 64), Operand(uint64_t(rhs.value))); + } + void cmpPtr(Register lhs, Register rhs) { + Cmp(ARMRegister(lhs, 64), ARMRegister(rhs, 64)); + } + void cmpPtr(Register lhs, ImmGCPtr rhs) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs); + movePtr(rhs, scratch); + cmpPtr(lhs, scratch); + } + + void cmpPtr(const Address& lhs, Register rhs) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != lhs.base); + MOZ_ASSERT(scratch64.asUnsized() != rhs); + Ldr(scratch64, toMemOperand(lhs)); + Cmp(scratch64, Operand(ARMRegister(rhs, 64))); + } + void cmpPtr(const Address& lhs, ImmWord rhs) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != lhs.base); + Ldr(scratch64, toMemOperand(lhs)); + Cmp(scratch64, Operand(rhs.value)); + } + void cmpPtr(const Address& lhs, ImmPtr rhs) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != lhs.base); + Ldr(scratch64, toMemOperand(lhs)); + Cmp(scratch64, Operand(uint64_t(rhs.value))); + } + void cmpPtr(const Address& lhs, ImmGCPtr rhs) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != lhs.base); + loadPtr(lhs, scratch); + cmpPtr(scratch, rhs); + } + + void loadDouble(const Address& src, FloatRegister dest) { + Ldr(ARMFPRegister(dest, 64), MemOperand(src)); + } + void loadDouble(const BaseIndex& src, FloatRegister dest) { + ARMRegister base = toARMRegister(src.base, 64); + ARMRegister index(src.index, 64); + + if (src.offset == 0) { + Ldr(ARMFPRegister(dest, 64), + MemOperand(base, index, vixl::LSL, unsigned(src.scale))); + return; + } + + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != src.base); + MOZ_ASSERT(scratch64.asUnsized() != src.index); + + Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale))); + Ldr(ARMFPRegister(dest, 64), MemOperand(scratch64, src.offset)); + } + void loadFloatAsDouble(const Address& addr, FloatRegister dest) { + Ldr(ARMFPRegister(dest, 32), toMemOperand(addr)); + fcvt(ARMFPRegister(dest, 64), ARMFPRegister(dest, 32)); + } + void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) { + ARMRegister base = toARMRegister(src.base, 64); + ARMRegister index(src.index, 64); + if (src.offset == 0) { + Ldr(ARMFPRegister(dest, 32), + MemOperand(base, index, vixl::LSL, unsigned(src.scale))); + } else { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != src.base); + MOZ_ASSERT(scratch64.asUnsized() != src.index); + + Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale))); + Ldr(ARMFPRegister(dest, 32), MemOperand(scratch64, src.offset)); + } + fcvt(ARMFPRegister(dest, 64), ARMFPRegister(dest, 32)); + } + + void loadFloat32(const Address& addr, FloatRegister dest) { + Ldr(ARMFPRegister(dest, 32), toMemOperand(addr)); + } + void loadFloat32(const BaseIndex& src, FloatRegister dest) { + ARMRegister base = toARMRegister(src.base, 64); + ARMRegister index(src.index, 64); + if (src.offset == 0) { + Ldr(ARMFPRegister(dest, 32), + MemOperand(base, index, vixl::LSL, unsigned(src.scale))); + } else { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != src.base); + MOZ_ASSERT(scratch64.asUnsized() != src.index); + + Add(scratch64, base, Operand(index, vixl::LSL, unsigned(src.scale))); + Ldr(ARMFPRegister(dest, 32), MemOperand(scratch64, src.offset)); + } + } + + void moveDouble(FloatRegister src, FloatRegister dest) { + fmov(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); + } + void zeroDouble(FloatRegister reg) { + fmov(ARMFPRegister(reg, 64), vixl::xzr); + } + void zeroFloat32(FloatRegister reg) { + fmov(ARMFPRegister(reg, 32), vixl::wzr); + } + + void moveFloat32(FloatRegister src, FloatRegister dest) { + fmov(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); + } + void moveFloatAsDouble(Register src, FloatRegister dest) { + MOZ_CRASH("moveFloatAsDouble"); + } + + void moveSimd128(FloatRegister src, FloatRegister dest) { + fmov(ARMFPRegister(dest, 128), ARMFPRegister(src, 128)); + } + + void splitSignExtTag(const ValueOperand& operand, Register dest) { + splitSignExtTag(operand.valueReg(), dest); + } + void splitSignExtTag(const Address& operand, Register dest) { + loadPtr(operand, dest); + splitSignExtTag(dest, dest); + } + void splitSignExtTag(const BaseIndex& operand, Register dest) { + loadPtr(operand, dest); + splitSignExtTag(dest, dest); + } + + // Extracts the tag of a value and places it in tag + inline void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag); + void cmpTag(const ValueOperand& operand, ImmTag tag) { MOZ_CRASH("cmpTag"); } + + void load32(const Address& address, Register dest) { + Ldr(ARMRegister(dest, 32), toMemOperand(address)); + } + void load32(const BaseIndex& src, Register dest) { + doBaseIndex(ARMRegister(dest, 32), src, vixl::LDR_w); + } + void load32(AbsoluteAddress address, Register dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + movePtr(ImmWord((uintptr_t)address.addr), scratch64.asUnsized()); + ldr(ARMRegister(dest, 32), MemOperand(scratch64)); + } + template <typename S> + void load32Unaligned(const S& src, Register dest) { + load32(src, dest); + } + void load64(const Address& address, Register64 dest) { + loadPtr(address, dest.reg); + } + void load64(const BaseIndex& address, Register64 dest) { + loadPtr(address, dest.reg); + } + template <typename S> + void load64Unaligned(const S& src, Register64 dest) { + load64(src, dest); + } + + void load8SignExtend(const Address& address, Register dest) { + Ldrsb(ARMRegister(dest, 32), toMemOperand(address)); + } + void load8SignExtend(const BaseIndex& src, Register dest) { + doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRSB_w); + } + + void load8ZeroExtend(const Address& address, Register dest) { + Ldrb(ARMRegister(dest, 32), toMemOperand(address)); + } + void load8ZeroExtend(const BaseIndex& src, Register dest) { + doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRB_w); + } + + void load16SignExtend(const Address& address, Register dest) { + Ldrsh(ARMRegister(dest, 32), toMemOperand(address)); + } + void load16SignExtend(const BaseIndex& src, Register dest) { + doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRSH_w); + } + template <typename S> + void load16UnalignedSignExtend(const S& src, Register dest) { + load16SignExtend(src, dest); + } + + void load16ZeroExtend(const Address& address, Register dest) { + Ldrh(ARMRegister(dest, 32), toMemOperand(address)); + } + void load16ZeroExtend(const BaseIndex& src, Register dest) { + doBaseIndex(ARMRegister(dest, 32), src, vixl::LDRH_w); + } + template <typename S> + void load16UnalignedZeroExtend(const S& src, Register dest) { + load16ZeroExtend(src, dest); + } + + void adds32(Register src, Register dest) { + Adds(ARMRegister(dest, 32), ARMRegister(dest, 32), + Operand(ARMRegister(src, 32))); + } + void adds32(Imm32 imm, Register dest) { + Adds(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value)); + } + void adds32(Imm32 imm, const Address& dest) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != dest.base); + + Ldr(scratch32, toMemOperand(dest)); + Adds(scratch32, scratch32, Operand(imm.value)); + Str(scratch32, toMemOperand(dest)); + } + void adds64(Imm32 imm, Register dest) { + Adds(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value)); + } + void adds64(ImmWord imm, Register dest) { + Adds(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value)); + } + void adds64(Register src, Register dest) { + Adds(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(ARMRegister(src, 64))); + } + + void subs32(Imm32 imm, Register dest) { + Subs(ARMRegister(dest, 32), ARMRegister(dest, 32), Operand(imm.value)); + } + void subs32(Register src, Register dest) { + Subs(ARMRegister(dest, 32), ARMRegister(dest, 32), + Operand(ARMRegister(src, 32))); + } + void subs64(Imm32 imm, Register dest) { + Subs(ARMRegister(dest, 64), ARMRegister(dest, 64), Operand(imm.value)); + } + void subs64(Register src, Register dest) { + Subs(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(ARMRegister(src, 64))); + } + + void negs32(Register reg) { + Negs(ARMRegister(reg, 32), Operand(ARMRegister(reg, 32))); + } + + void ret() { + pop(lr); + abiret(); + } + + void retn(Imm32 n) { + vixl::UseScratchRegisterScope temps(this); + MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0 + temps.Exclude(ScratchReg64); + // ip0 <- [sp]; sp += n; ret ip0 + Ldr(ScratchReg64, + MemOperand(GetStackPointer64(), ptrdiff_t(n.value), vixl::PostIndex)); + syncStackPtr(); // SP is always used to transmit the stack between calls. + Ret(ScratchReg64); + } + + void j(Condition cond, Label* dest) { B(dest, cond); } + + void branch(Condition cond, Label* label) { B(label, cond); } + void branch(JitCode* target) { + // It is unclear why this sync is necessary: + // * PSP and SP have been observed to be different in testcase + // tests/async/debugger-reject-after-fulfill.js + // * Removing the sync causes no failures in all of jit-tests. + // + // Also see jump() above. This is used only to implement jump(JitCode*) + // and only for JS, it appears. + syncStackPtr(); + BufferOffset loc = + b(-1, + LabelDoc()); // The jump target will be patched by executableCopy(). + addPendingJump(loc, ImmPtr(target->raw()), RelocationKind::JITCODE); + } + + void compareDouble(DoubleCondition cond, FloatRegister lhs, + FloatRegister rhs) { + Fcmp(ARMFPRegister(lhs, 64), ARMFPRegister(rhs, 64)); + } + + void compareFloat(DoubleCondition cond, FloatRegister lhs, + FloatRegister rhs) { + Fcmp(ARMFPRegister(lhs, 32), ARMFPRegister(rhs, 32)); + } + + void compareSimd128Int(Assembler::Condition cond, ARMFPRegister dest, + ARMFPRegister lhs, ARMFPRegister rhs); + void compareSimd128Float(Assembler::Condition cond, ARMFPRegister dest, + ARMFPRegister lhs, ARMFPRegister rhs); + void rightShiftInt8x16(FloatRegister lhs, Register rhs, FloatRegister dest, + bool isUnsigned); + void rightShiftInt16x8(FloatRegister lhs, Register rhs, FloatRegister dest, + bool isUnsigned); + void rightShiftInt32x4(FloatRegister lhs, Register rhs, FloatRegister dest, + bool isUnsigned); + void rightShiftInt64x2(FloatRegister lhs, Register rhs, FloatRegister dest, + bool isUnsigned); + + void branchNegativeZero(FloatRegister reg, Register scratch, Label* label) { + MOZ_CRASH("branchNegativeZero"); + } + void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, + Label* label) { + MOZ_CRASH("branchNegativeZeroFloat32"); + } + + void boxDouble(FloatRegister src, const ValueOperand& dest, FloatRegister) { + Fmov(ARMRegister(dest.valueReg(), 64), ARMFPRegister(src, 64)); + } + void boxNonDouble(JSValueType type, Register src, const ValueOperand& dest) { + boxValue(type, src, dest.valueReg()); + } + + // Note that the |dest| register here may be ScratchReg, so we shouldn't use + // it. + void unboxInt32(const ValueOperand& src, Register dest) { + move32(src.valueReg(), dest); + } + void unboxInt32(const Address& src, Register dest) { load32(src, dest); } + void unboxInt32(const BaseIndex& src, Register dest) { load32(src, dest); } + + template <typename T> + void unboxDouble(const T& src, FloatRegister dest) { + loadDouble(src, dest); + } + void unboxDouble(const ValueOperand& src, FloatRegister dest) { + Fmov(ARMFPRegister(dest, 64), ARMRegister(src.valueReg(), 64)); + } + + void unboxArgObjMagic(const ValueOperand& src, Register dest) { + MOZ_CRASH("unboxArgObjMagic"); + } + void unboxArgObjMagic(const Address& src, Register dest) { + MOZ_CRASH("unboxArgObjMagic"); + } + + void unboxBoolean(const ValueOperand& src, Register dest) { + move32(src.valueReg(), dest); + } + void unboxBoolean(const Address& src, Register dest) { load32(src, dest); } + void unboxBoolean(const BaseIndex& src, Register dest) { load32(src, dest); } + + void unboxMagic(const ValueOperand& src, Register dest) { + move32(src.valueReg(), dest); + } + void unboxNonDouble(const ValueOperand& src, Register dest, + JSValueType type) { + unboxNonDouble(src.valueReg(), dest, type); + } + + template <typename T> + void unboxNonDouble(T src, Register dest, JSValueType type) { + MOZ_ASSERT(type != JSVAL_TYPE_DOUBLE); + if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) { + load32(src, dest); + return; + } + loadPtr(src, dest); + unboxNonDouble(dest, dest, type); + } + + void unboxNonDouble(Register src, Register dest, JSValueType type) { + MOZ_ASSERT(type != JSVAL_TYPE_DOUBLE); + if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) { + move32(src, dest); + return; + } + Eor(ARMRegister(dest, 64), ARMRegister(src, 64), + Operand(JSVAL_TYPE_TO_SHIFTED_TAG(type))); + } + + void notBoolean(const ValueOperand& val) { + ARMRegister r(val.valueReg(), 64); + eor(r, r, Operand(1)); + } + void unboxObject(const ValueOperand& src, Register dest) { + unboxNonDouble(src.valueReg(), dest, JSVAL_TYPE_OBJECT); + } + void unboxObject(Register src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT); + } + void unboxObject(const Address& src, Register dest) { + loadPtr(src, dest); + unboxNonDouble(dest, dest, JSVAL_TYPE_OBJECT); + } + void unboxObject(const BaseIndex& src, Register dest) { + doBaseIndex(ARMRegister(dest, 64), src, vixl::LDR_x); + unboxNonDouble(dest, dest, JSVAL_TYPE_OBJECT); + } + + template <typename T> + void unboxObjectOrNull(const T& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT); + And(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(~JS::detail::ValueObjectOrNullBit)); + } + + // See comment in MacroAssembler-x64.h. + void unboxGCThingForGCBarrier(const Address& src, Register dest) { + loadPtr(src, dest); + And(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(JS::detail::ValueGCThingPayloadMask)); + } + void unboxGCThingForGCBarrier(const ValueOperand& src, Register dest) { + And(ARMRegister(dest, 64), ARMRegister(src.valueReg(), 64), + Operand(JS::detail::ValueGCThingPayloadMask)); + } + + // Like unboxGCThingForGCBarrier, but loads the GC thing's chunk base. + void getGCThingValueChunk(const Address& src, Register dest) { + loadPtr(src, dest); + And(ARMRegister(dest, 64), ARMRegister(dest, 64), + Operand(JS::detail::ValueGCThingPayloadChunkMask)); + } + void getGCThingValueChunk(const ValueOperand& src, Register dest) { + And(ARMRegister(dest, 64), ARMRegister(src.valueReg(), 64), + Operand(JS::detail::ValueGCThingPayloadChunkMask)); + } + + inline void unboxValue(const ValueOperand& src, AnyRegister dest, + JSValueType type); + + void unboxString(const ValueOperand& operand, Register dest) { + unboxNonDouble(operand, dest, JSVAL_TYPE_STRING); + } + void unboxString(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_STRING); + } + void unboxSymbol(const ValueOperand& operand, Register dest) { + unboxNonDouble(operand, dest, JSVAL_TYPE_SYMBOL); + } + void unboxSymbol(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL); + } + void unboxBigInt(const ValueOperand& operand, Register dest) { + unboxNonDouble(operand, dest, JSVAL_TYPE_BIGINT); + } + void unboxBigInt(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT); + } + // These two functions use the low 32-bits of the full value register. + void boolValueToDouble(const ValueOperand& operand, FloatRegister dest) { + convertInt32ToDouble(operand.valueReg(), dest); + } + void int32ValueToDouble(const ValueOperand& operand, FloatRegister dest) { + convertInt32ToDouble(operand.valueReg(), dest); + } + + void boolValueToFloat32(const ValueOperand& operand, FloatRegister dest) { + convertInt32ToFloat32(operand.valueReg(), dest); + } + void int32ValueToFloat32(const ValueOperand& operand, FloatRegister dest) { + convertInt32ToFloat32(operand.valueReg(), dest); + } + + void loadConstantDouble(double d, FloatRegister dest) { + ARMFPRegister r(dest, 64); + if (d == 0.0) { + // Clang11 does movi for 0 and movi+fneg for -0, and this seems like a + // good implementation-independent strategy as it avoids any gpr->fpr + // moves or memory traffic. + Movi(r, 0); + if (std::signbit(d)) { + Fneg(r, r); + } + } else { + Fmov(r, d); + } + } + void loadConstantFloat32(float f, FloatRegister dest) { + ARMFPRegister r(dest, 32); + if (f == 0.0) { + // See comments above. There's not a movi variant for a single register, + // so clear the double. + Movi(ARMFPRegister(dest, 64), 0); + if (std::signbit(f)) { + Fneg(r, r); + } + } else { + Fmov(r, f); + } + } + + void cmpTag(Register tag, ImmTag ref) { + // As opposed to other architecture, splitTag is replaced by splitSignExtTag + // which extract the tag with a sign extension. The reason being that cmp32 + // with a tag value would be too large to fit as a 12 bits immediate value, + // and would require the VIXL macro assembler to add an extra instruction + // and require extra scratch register to load the Tag value. + // + // Instead, we compare with the negative value of the sign extended tag with + // the CMN instruction. The sign extended tag is expected to be a negative + // value. Therefore the negative of the sign extended tag is expected to be + // near 0 and fit on 12 bits. + // + // Ignoring the sign extension, the logic is the following: + // + // CMP32(Reg, Tag) = Reg - Tag + // = Reg + (-Tag) + // = CMN32(Reg, -Tag) + // + // Note: testGCThing, testPrimitive and testNumber which are checking for + // inequalities should use unsigned comparisons (as done by default) in + // order to keep the same relation order after the sign extension, i.e. + // using Above or Below which are based on the carry flag. + uint32_t hiShift = JSVAL_TAG_SHIFT - 32; + int32_t seTag = int32_t(ref.value); + seTag = (seTag << hiShift) >> hiShift; + MOZ_ASSERT(seTag < 0); + int32_t negTag = -seTag; + // Check thest negTag is encoded on a 12 bits immediate value. + MOZ_ASSERT((negTag & ~0xFFF) == 0); + cmn32(tag, Imm32(negTag)); + } + + // Register-based tests. + Condition testUndefined(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_UNDEFINED)); + return cond; + } + Condition testInt32(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_INT32)); + return cond; + } + Condition testBoolean(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_BOOLEAN)); + return cond; + } + Condition testNull(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_NULL)); + return cond; + } + Condition testString(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_STRING)); + return cond; + } + Condition testSymbol(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_SYMBOL)); + return cond; + } + Condition testBigInt(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_BIGINT)); + return cond; + } + Condition testObject(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_OBJECT)); + return cond; + } + Condition testDouble(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_MAX_DOUBLE)); + // Requires unsigned comparison due to cmpTag internals. + return (cond == Equal) ? BelowOrEqual : Above; + } + Condition testNumber(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JS::detail::ValueUpperInclNumberTag)); + // Requires unsigned comparison due to cmpTag internals. + return (cond == Equal) ? BelowOrEqual : Above; + } + Condition testGCThing(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag)); + // Requires unsigned comparison due to cmpTag internals. + return (cond == Equal) ? AboveOrEqual : Below; + } + Condition testMagic(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JSVAL_TAG_MAGIC)); + return cond; + } + Condition testPrimitive(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmpTag(tag, ImmTag(JS::detail::ValueUpperExclPrimitiveTag)); + // Requires unsigned comparison due to cmpTag internals. + return (cond == Equal) ? Below : AboveOrEqual; + } + Condition testError(Condition cond, Register tag) { + return testMagic(cond, tag); + } + + // ValueOperand-based tests. + Condition testInt32(Condition cond, const ValueOperand& value) { + // The incoming ValueOperand may use scratch registers. + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(scratch != value.valueReg()); + + splitSignExtTag(value, scratch); + return testInt32(cond, scratch); + } + Condition testBoolean(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testBoolean(cond, scratch); + } + Condition testDouble(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testDouble(cond, scratch); + } + Condition testNull(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testNull(cond, scratch); + } + Condition testUndefined(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testUndefined(cond, scratch); + } + Condition testString(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testString(cond, scratch); + } + Condition testSymbol(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testSymbol(cond, scratch); + } + Condition testBigInt(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testBigInt(cond, scratch); + } + Condition testObject(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testObject(cond, scratch); + } + Condition testNumber(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testNumber(cond, scratch); + } + Condition testPrimitive(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testPrimitive(cond, scratch); + } + Condition testMagic(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testMagic(cond, scratch); + } + Condition testGCThing(Condition cond, const ValueOperand& value) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(value.valueReg() != scratch); + splitSignExtTag(value, scratch); + return testGCThing(cond, scratch); + } + Condition testError(Condition cond, const ValueOperand& value) { + return testMagic(cond, value); + } + + // Address-based tests. + Condition testGCThing(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testGCThing(cond, scratch); + } + Condition testMagic(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testMagic(cond, scratch); + } + Condition testInt32(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testInt32(cond, scratch); + } + Condition testDouble(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testDouble(cond, scratch); + } + Condition testBoolean(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testBoolean(cond, scratch); + } + Condition testNull(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testNull(cond, scratch); + } + Condition testUndefined(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testUndefined(cond, scratch); + } + Condition testString(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testString(cond, scratch); + } + Condition testSymbol(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testSymbol(cond, scratch); + } + Condition testBigInt(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testBigInt(cond, scratch); + } + Condition testObject(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testObject(cond, scratch); + } + Condition testNumber(Condition cond, const Address& address) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(address.base != scratch); + splitSignExtTag(address, scratch); + return testNumber(cond, scratch); + } + + // BaseIndex-based tests. + Condition testUndefined(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testUndefined(cond, scratch); + } + Condition testNull(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testNull(cond, scratch); + } + Condition testBoolean(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testBoolean(cond, scratch); + } + Condition testString(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testString(cond, scratch); + } + Condition testSymbol(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testSymbol(cond, scratch); + } + Condition testBigInt(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testBigInt(cond, scratch); + } + Condition testInt32(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testInt32(cond, scratch); + } + Condition testObject(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testObject(cond, scratch); + } + Condition testDouble(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testDouble(cond, scratch); + } + Condition testMagic(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testMagic(cond, scratch); + } + Condition testGCThing(Condition cond, const BaseIndex& src) { + vixl::UseScratchRegisterScope temps(this); + const Register scratch = temps.AcquireX().asUnsized(); + MOZ_ASSERT(src.base != scratch); + MOZ_ASSERT(src.index != scratch); + splitSignExtTag(src, scratch); + return testGCThing(cond, scratch); + } + + Condition testInt32Truthy(bool truthy, const ValueOperand& operand) { + ARMRegister payload32(operand.valueReg(), 32); + Tst(payload32, payload32); + return truthy ? NonZero : Zero; + } + + Condition testBooleanTruthy(bool truthy, const ValueOperand& operand) { + ARMRegister payload32(operand.valueReg(), 32); + Tst(payload32, payload32); + return truthy ? NonZero : Zero; + } + + Condition testBigIntTruthy(bool truthy, const ValueOperand& value); + Condition testStringTruthy(bool truthy, const ValueOperand& value); + + void int32OrDouble(Register src, ARMFPRegister dest) { + Label isInt32; + Label join; + testInt32(Equal, ValueOperand(src)); + B(&isInt32, Equal); + // is double, move the bits as is + Fmov(dest, ARMRegister(src, 64)); + B(&join); + bind(&isInt32); + // is int32, do a conversion while moving + Scvtf(dest, ARMRegister(src, 32)); + bind(&join); + } + void loadUnboxedValue(Address address, MIRType type, AnyRegister dest) { + if (dest.isFloat()) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != address.base); + Ldr(scratch64, toMemOperand(address)); + int32OrDouble(scratch64.asUnsized(), ARMFPRegister(dest.fpu(), 64)); + } else { + unboxNonDouble(address, dest.gpr(), ValueTypeFromMIRType(type)); + } + } + + void loadUnboxedValue(BaseIndex address, MIRType type, AnyRegister dest) { + if (dest.isFloat()) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT(scratch64.asUnsized() != address.base); + MOZ_ASSERT(scratch64.asUnsized() != address.index); + doBaseIndex(scratch64, address, vixl::LDR_x); + int32OrDouble(scratch64.asUnsized(), ARMFPRegister(dest.fpu(), 64)); + } else { + unboxNonDouble(address, dest.gpr(), ValueTypeFromMIRType(type)); + } + } + + // Emit a B that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp(). + CodeOffset toggledJump(Label* label) { + BufferOffset offset = b(label, Always); + CodeOffset ret(offset.getOffset()); + return ret; + } + + // load: offset to the load instruction obtained by movePatchablePtr(). + void writeDataRelocation(ImmGCPtr ptr, BufferOffset load) { + // Raw GC pointer relocations and Value relocations both end up in + // Assembler::TraceDataRelocations. + if (ptr.value) { + if (gc::IsInsideNursery(ptr.value)) { + embedsNurseryPointers_ = true; + } + dataRelocations_.writeUnsigned(load.getOffset()); + } + } + void writeDataRelocation(const Value& val, BufferOffset load) { + // Raw GC pointer relocations and Value relocations both end up in + // Assembler::TraceDataRelocations. + if (val.isGCThing()) { + gc::Cell* cell = val.toGCThing(); + if (cell && gc::IsInsideNursery(cell)) { + embedsNurseryPointers_ = true; + } + dataRelocations_.writeUnsigned(load.getOffset()); + } + } + + void computeEffectiveAddress(const Address& address, Register dest) { + Add(ARMRegister(dest, 64), toARMRegister(address.base, 64), + Operand(address.offset)); + } + void computeEffectiveAddress(const Address& address, RegisterOrSP dest) { + Add(toARMRegister(dest, 64), toARMRegister(address.base, 64), + Operand(address.offset)); + } + void computeEffectiveAddress(const BaseIndex& address, Register dest) { + ARMRegister dest64(dest, 64); + ARMRegister base64 = toARMRegister(address.base, 64); + ARMRegister index64(address.index, 64); + + Add(dest64, base64, Operand(index64, vixl::LSL, address.scale)); + if (address.offset) { + Add(dest64, dest64, Operand(address.offset)); + } + } + + public: + void handleFailureWithHandlerTail(Label* profilerExitTail, + Label* bailoutTail); + + void profilerEnterFrame(Register framePtr, Register scratch); + void profilerExitFrame(); + + void wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register memoryBase, + Register ptr, AnyRegister outany, Register64 out64); + void wasmLoadImpl(const wasm::MemoryAccessDesc& access, MemOperand srcAddr, + AnyRegister outany, Register64 out64); + void wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister valany, + Register64 val64, Register memoryBase, Register ptr); + void wasmStoreImpl(const wasm::MemoryAccessDesc& access, MemOperand destAddr, + AnyRegister valany, Register64 val64); + // The complete address is in `address`, and `access` is used for its type + // attributes only; its `offset` is ignored. + void wasmLoadAbsolute(const wasm::MemoryAccessDesc& access, + Register memoryBase, uint64_t address, AnyRegister out, + Register64 out64); + void wasmStoreAbsolute(const wasm::MemoryAccessDesc& access, + AnyRegister value, Register64 value64, + Register memoryBase, uint64_t address); + + // Emit a BLR or NOP instruction. ToggleCall can be used to patch + // this instruction. + CodeOffset toggledCall(JitCode* target, bool enabled) { + // The returned offset must be to the first instruction generated, + // for the debugger to match offset with Baseline's pcMappingEntries_. + BufferOffset offset = nextOffset(); + + // It is unclear why this sync is necessary: + // * PSP and SP have been observed to be different in testcase + // tests/cacheir/bug1448136.js + // * Removing the sync causes no failures in all of jit-tests. + syncStackPtr(); + + BufferOffset loadOffset; + { + vixl::UseScratchRegisterScope temps(this); + + // The register used for the load is hardcoded, so that ToggleCall + // can patch in the branch instruction easily. This could be changed, + // but then ToggleCall must read the target register from the load. + MOZ_ASSERT(temps.IsAvailable(ScratchReg2_64)); + temps.Exclude(ScratchReg2_64); + + loadOffset = immPool64(ScratchReg2_64, uint64_t(target->raw())); + + if (enabled) { + blr(ScratchReg2_64); + } else { + nop(); + } + } + + addPendingJump(loadOffset, ImmPtr(target->raw()), RelocationKind::JITCODE); + CodeOffset ret(offset.getOffset()); + return ret; + } + + static size_t ToggledCallSize(uint8_t* code) { + // The call site is a sequence of two or three instructions: + // + // syncStack (optional) + // ldr/adr + // nop/blr + // + // Flushed constant pools can appear before any of the instructions. + + const Instruction* cur = (const Instruction*)code; + cur = cur->skipPool(); + if (cur->IsStackPtrSync()) cur = cur->NextInstruction(); + cur = cur->skipPool(); + cur = cur->NextInstruction(); // LDR/ADR + cur = cur->skipPool(); + cur = cur->NextInstruction(); // NOP/BLR + return (uint8_t*)cur - code; + } + + void checkARMRegAlignment(const ARMRegister& reg) { +#ifdef DEBUG + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch64 = temps.AcquireX(); + MOZ_ASSERT_IF(!reg.IsSP(), scratch64.asUnsized() != reg.asUnsized()); + Label aligned; + Mov(scratch64, reg); + Tst(scratch64, Operand(StackAlignment - 1)); + B(Zero, &aligned); + breakpoint(); + bind(&aligned); + Mov(scratch64, vixl::xzr); // Clear the scratch register for sanity. +#endif + } + + void checkStackAlignment() { +#ifdef DEBUG + checkARMRegAlignment(GetStackPointer64()); + + // If another register is being used to track pushes, check sp explicitly. + if (!GetStackPointer64().Is(vixl::sp)) { + checkARMRegAlignment(vixl::sp); + } +#endif + } + + void abiret() { + syncStackPtr(); // SP is always used to transmit the stack between calls. + vixl::MacroAssembler::Ret(vixl::lr); + } + + void incrementInt32Value(const Address& addr) { + vixl::UseScratchRegisterScope temps(this); + const ARMRegister scratch32 = temps.AcquireW(); + MOZ_ASSERT(scratch32.asUnsized() != addr.base); + + load32(addr, scratch32.asUnsized()); + Add(scratch32, scratch32, Operand(1)); + store32(scratch32.asUnsized(), addr); + } + + void breakpoint(); + + // Emits a simulator directive to save the current sp on an internal stack. + void simulatorMarkSP() { +#ifdef JS_SIMULATOR_ARM64 + svc(vixl::kMarkStackPointer); +#endif + } + + // Emits a simulator directive to pop from its internal stack + // and assert that the value is equal to the current sp. + void simulatorCheckSP() { +#ifdef JS_SIMULATOR_ARM64 + svc(vixl::kCheckStackPointer); +#endif + } + + protected: + bool buildOOLFakeExitFrame(void* fakeReturnAddr); +}; + +// See documentation for ScratchTagScope and ScratchTagScopeRelease in +// MacroAssembler-x64.h. + +class ScratchTagScope { + vixl::UseScratchRegisterScope temps_; + ARMRegister scratch64_; + bool owned_; + mozilla::DebugOnly<bool> released_; + + public: + ScratchTagScope(MacroAssemblerCompat& masm, const ValueOperand&) + : temps_(&masm), owned_(true), released_(false) { + scratch64_ = temps_.AcquireX(); + } + + operator Register() { + MOZ_ASSERT(!released_); + return scratch64_.asUnsized(); + } + + void release() { + MOZ_ASSERT(!released_); + released_ = true; + if (owned_) { + temps_.Release(scratch64_); + owned_ = false; + } + } + + void reacquire() { + MOZ_ASSERT(released_); + released_ = false; + } +}; + +class ScratchTagScopeRelease { + ScratchTagScope* ts_; + + public: + explicit ScratchTagScopeRelease(ScratchTagScope* ts) : ts_(ts) { + ts_->release(); + } + ~ScratchTagScopeRelease() { ts_->reacquire(); } +}; + +inline void MacroAssemblerCompat::splitTagForTest(const ValueOperand& value, + ScratchTagScope& tag) { + splitSignExtTag(value, tag); +} + +typedef MacroAssemblerCompat MacroAssemblerSpecific; + +} // namespace jit +} // namespace js + +#endif // jit_arm64_MacroAssembler_arm64_h diff --git a/js/src/jit/arm64/MoveEmitter-arm64.cpp b/js/src/jit/arm64/MoveEmitter-arm64.cpp new file mode 100644 index 0000000000..fa1bb1209e --- /dev/null +++ b/js/src/jit/arm64/MoveEmitter-arm64.cpp @@ -0,0 +1,329 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/arm64/MoveEmitter-arm64.h" +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +MemOperand MoveEmitterARM64::toMemOperand(const MoveOperand& operand) const { + MOZ_ASSERT(operand.isMemory()); + ARMRegister base(operand.base(), 64); + if (operand.base() == masm.getStackPointer()) { + return MemOperand(base, + operand.disp() + (masm.framePushed() - pushedAtStart_)); + } + return MemOperand(base, operand.disp()); +} + +void MoveEmitterARM64::emit(const MoveResolver& moves) { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + // We have two scratch general registers, so use one as temporary storage for + // breaking cycles and leave the other available for memory to memory moves. + // + // This register is used when breaking GENERAL, INT32, FLOAT32, and DOUBLE + // move cycles. For FLOAT32/DOUBLE, this involves a fmov between float and + // general registers. We could avoid this if we had an extra scratch float + // register, otherwise we need the scratch float register for memory to + // memory moves that may happen in the cycle. We cannot use the scratch + // general register for SIMD128 cycles as it is not large enough. + cycleGeneralReg_ = temps.AcquireX(); + + for (size_t i = 0; i < moves.numMoves(); i++) { + emitMove(moves.getMove(i)); + } + + cycleGeneralReg_ = ARMRegister(); +} + +void MoveEmitterARM64::finish() { + assertDone(); + masm.freeStack(masm.framePushed() - pushedAtStart_); + MOZ_ASSERT(masm.framePushed() == pushedAtStart_); +} + +void MoveEmitterARM64::emitMove(const MoveOp& move) { + const MoveOperand& from = move.from(); + const MoveOperand& to = move.to(); + + if (move.isCycleBegin()) { + MOZ_ASSERT(!inCycle_ && !move.isCycleEnd()); + breakCycle(from, to, move.endCycleType()); + inCycle_ = true; + } else if (move.isCycleEnd()) { + MOZ_ASSERT(inCycle_); + completeCycle(from, to, move.type()); + inCycle_ = false; + return; + } + + switch (move.type()) { + case MoveOp::FLOAT32: + emitFloat32Move(from, to); + break; + case MoveOp::DOUBLE: + emitDoubleMove(from, to); + break; + case MoveOp::SIMD128: + emitSimd128Move(from, to); + break; + case MoveOp::INT32: + emitInt32Move(from, to); + break; + case MoveOp::GENERAL: + emitGeneralMove(from, to); + break; + default: + MOZ_CRASH("Unexpected move type"); + } +} + +void MoveEmitterARM64::emitFloat32Move(const MoveOperand& from, + const MoveOperand& to) { + if (from.isFloatReg()) { + if (to.isFloatReg()) { + masm.Fmov(toFPReg(to, MoveOp::FLOAT32), toFPReg(from, MoveOp::FLOAT32)); + } else { + masm.Str(toFPReg(from, MoveOp::FLOAT32), toMemOperand(to)); + } + return; + } + + if (to.isFloatReg()) { + masm.Ldr(toFPReg(to, MoveOp::FLOAT32), toMemOperand(from)); + return; + } + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMFPRegister scratch32 = temps.AcquireS(); + masm.Ldr(scratch32, toMemOperand(from)); + masm.Str(scratch32, toMemOperand(to)); +} + +void MoveEmitterARM64::emitDoubleMove(const MoveOperand& from, + const MoveOperand& to) { + if (from.isFloatReg()) { + if (to.isFloatReg()) { + masm.Fmov(toFPReg(to, MoveOp::DOUBLE), toFPReg(from, MoveOp::DOUBLE)); + } else { + masm.Str(toFPReg(from, MoveOp::DOUBLE), toMemOperand(to)); + } + return; + } + + if (to.isFloatReg()) { + masm.Ldr(toFPReg(to, MoveOp::DOUBLE), toMemOperand(from)); + return; + } + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMFPRegister scratch = temps.AcquireD(); + masm.Ldr(scratch, toMemOperand(from)); + masm.Str(scratch, toMemOperand(to)); +} + +void MoveEmitterARM64::emitSimd128Move(const MoveOperand& from, + const MoveOperand& to) { + if (from.isFloatReg()) { + if (to.isFloatReg()) { + masm.Mov(toFPReg(to, MoveOp::SIMD128), toFPReg(from, MoveOp::SIMD128)); + } else { + masm.Str(toFPReg(from, MoveOp::SIMD128), toMemOperand(to)); + } + return; + } + + if (to.isFloatReg()) { + masm.Ldr(toFPReg(to, MoveOp::SIMD128), toMemOperand(from)); + return; + } + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMFPRegister scratch = temps.AcquireQ(); + masm.Ldr(scratch, toMemOperand(from)); + masm.Str(scratch, toMemOperand(to)); +} + +void MoveEmitterARM64::emitInt32Move(const MoveOperand& from, + const MoveOperand& to) { + if (from.isGeneralReg()) { + if (to.isGeneralReg()) { + masm.Mov(toARMReg32(to), toARMReg32(from)); + } else { + masm.Str(toARMReg32(from), toMemOperand(to)); + } + return; + } + + if (to.isGeneralReg()) { + masm.Ldr(toARMReg32(to), toMemOperand(from)); + return; + } + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMRegister scratch32 = temps.AcquireW(); + masm.Ldr(scratch32, toMemOperand(from)); + masm.Str(scratch32, toMemOperand(to)); +} + +void MoveEmitterARM64::emitGeneralMove(const MoveOperand& from, + const MoveOperand& to) { + if (from.isGeneralReg()) { + MOZ_ASSERT(to.isGeneralReg() || to.isMemory()); + if (to.isGeneralReg()) { + masm.Mov(toARMReg64(to), toARMReg64(from)); + } else { + masm.Str(toARMReg64(from), toMemOperand(to)); + } + return; + } + + // {Memory OR EffectiveAddress} -> Register move. + if (to.isGeneralReg()) { + MOZ_ASSERT(from.isMemoryOrEffectiveAddress()); + if (from.isMemory()) { + masm.Ldr(toARMReg64(to), toMemOperand(from)); + } else { + masm.Add(toARMReg64(to), toARMReg64(from), Operand(from.disp())); + } + return; + } + + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMRegister scratch64 = temps.AcquireX(); + + // Memory -> Memory move. + if (from.isMemory()) { + MOZ_ASSERT(to.isMemory()); + masm.Ldr(scratch64, toMemOperand(from)); + masm.Str(scratch64, toMemOperand(to)); + return; + } + + // EffectiveAddress -> Memory move. + MOZ_ASSERT(from.isEffectiveAddress()); + MOZ_ASSERT(to.isMemory()); + masm.Add(scratch64, toARMReg64(from), Operand(from.disp())); + masm.Str(scratch64, toMemOperand(to)); +} + +MemOperand MoveEmitterARM64::cycleSlot() { + // Using SP as stack pointer requires alignment preservation below. + MOZ_ASSERT(!masm.GetStackPointer64().Is(sp)); + + // Allocate a slot for breaking cycles if we have not already + if (pushedAtCycle_ == -1) { + static_assert(SpillSlotSize == 16); + masm.reserveStack(SpillSlotSize); + pushedAtCycle_ = masm.framePushed(); + } + + return MemOperand(masm.GetStackPointer64(), + masm.framePushed() - pushedAtCycle_); +} + +void MoveEmitterARM64::breakCycle(const MoveOperand& from, + const MoveOperand& to, MoveOp::Type type) { + switch (type) { + case MoveOp::FLOAT32: + if (to.isMemory()) { + masm.Ldr(cycleGeneralReg_.W(), toMemOperand(to)); + } else { + masm.Fmov(cycleGeneralReg_.W(), toFPReg(to, type)); + } + break; + + case MoveOp::DOUBLE: + if (to.isMemory()) { + masm.Ldr(cycleGeneralReg_.X(), toMemOperand(to)); + } else { + masm.Fmov(cycleGeneralReg_.X(), toFPReg(to, type)); + } + break; + + case MoveOp::SIMD128: + if (to.isMemory()) { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMFPRegister scratch128 = temps.AcquireQ(); + masm.Ldr(scratch128, toMemOperand(to)); + masm.Str(scratch128, cycleSlot()); + } else { + masm.Str(toFPReg(to, type), cycleSlot()); + } + break; + + case MoveOp::INT32: + if (to.isMemory()) { + masm.Ldr(cycleGeneralReg_.W(), toMemOperand(to)); + } else { + masm.Mov(cycleGeneralReg_.W(), toARMReg32(to)); + } + break; + + case MoveOp::GENERAL: + if (to.isMemory()) { + masm.Ldr(cycleGeneralReg_.X(), toMemOperand(to)); + } else { + masm.Mov(cycleGeneralReg_.X(), toARMReg64(to)); + } + break; + + default: + MOZ_CRASH("Unexpected move type"); + } +} + +void MoveEmitterARM64::completeCycle(const MoveOperand& from, + const MoveOperand& to, MoveOp::Type type) { + switch (type) { + case MoveOp::FLOAT32: + if (to.isMemory()) { + masm.Str(cycleGeneralReg_.W(), toMemOperand(to)); + } else { + masm.Fmov(toFPReg(to, type), cycleGeneralReg_.W()); + } + break; + + case MoveOp::DOUBLE: + if (to.isMemory()) { + masm.Str(cycleGeneralReg_.X(), toMemOperand(to)); + } else { + masm.Fmov(toFPReg(to, type), cycleGeneralReg_.X()); + } + break; + + case MoveOp::SIMD128: + if (to.isMemory()) { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMFPRegister scratch = temps.AcquireQ(); + masm.Ldr(scratch, cycleSlot()); + masm.Str(scratch, toMemOperand(to)); + } else { + masm.Ldr(toFPReg(to, type), cycleSlot()); + } + break; + + case MoveOp::INT32: + if (to.isMemory()) { + masm.Str(cycleGeneralReg_.W(), toMemOperand(to)); + } else { + masm.Mov(toARMReg32(to), cycleGeneralReg_.W()); + } + break; + + case MoveOp::GENERAL: + if (to.isMemory()) { + masm.Str(cycleGeneralReg_.X(), toMemOperand(to)); + } else { + masm.Mov(toARMReg64(to), cycleGeneralReg_.X()); + } + break; + + default: + MOZ_CRASH("Unexpected move type"); + } +} diff --git a/js/src/jit/arm64/MoveEmitter-arm64.h b/js/src/jit/arm64/MoveEmitter-arm64.h new file mode 100644 index 0000000000..fec2e3e012 --- /dev/null +++ b/js/src/jit/arm64/MoveEmitter-arm64.h @@ -0,0 +1,99 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_MoveEmitter_arm64_h +#define jit_arm64_MoveEmitter_arm64_h + +#include "mozilla/Assertions.h" + +#include <stdint.h> + +#include "jit/arm64/Assembler-arm64.h" +#include "jit/MacroAssembler.h" +#include "jit/MoveResolver.h" +#include "jit/Registers.h" + +namespace js { +namespace jit { + +class CodeGenerator; + +class MoveEmitterARM64 { + bool inCycle_; + MacroAssembler& masm; + + // A scratch general register used to break cycles. + ARMRegister cycleGeneralReg_; + + // Original stack push value. + uint32_t pushedAtStart_; + + // This stores a stack offset to a spill location, snapshotting + // codegen->framePushed_ at the time it was allocated. It is -1 if no + // stack space has been allocated for that particular spill. + int32_t pushedAtCycle_; + + void assertDone() { MOZ_ASSERT(!inCycle_); } + + MemOperand cycleSlot(); + MemOperand toMemOperand(const MoveOperand& operand) const; + ARMRegister toARMReg32(const MoveOperand& operand) const { + MOZ_ASSERT(operand.isGeneralReg()); + return ARMRegister(operand.reg(), 32); + } + ARMRegister toARMReg64(const MoveOperand& operand) const { + if (operand.isGeneralReg()) { + return ARMRegister(operand.reg(), 64); + } else { + return ARMRegister(operand.base(), 64); + } + } + ARMFPRegister toFPReg(const MoveOperand& operand, MoveOp::Type t) const { + MOZ_ASSERT(operand.isFloatReg()); + switch (t) { + case MoveOp::FLOAT32: + return ARMFPRegister(operand.floatReg().encoding(), 32); + case MoveOp::DOUBLE: + return ARMFPRegister(operand.floatReg().encoding(), 64); + case MoveOp::SIMD128: + return ARMFPRegister(operand.floatReg().encoding(), 128); + default: + MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Bad register type"); + } + } + + void emitFloat32Move(const MoveOperand& from, const MoveOperand& to); + void emitDoubleMove(const MoveOperand& from, const MoveOperand& to); + void emitSimd128Move(const MoveOperand& from, const MoveOperand& to); + void emitInt32Move(const MoveOperand& from, const MoveOperand& to); + void emitGeneralMove(const MoveOperand& from, const MoveOperand& to); + + void emitMove(const MoveOp& move); + void breakCycle(const MoveOperand& from, const MoveOperand& to, + MoveOp::Type type); + void completeCycle(const MoveOperand& from, const MoveOperand& to, + MoveOp::Type type); + + public: + explicit MoveEmitterARM64(MacroAssembler& masm) + : inCycle_(false), + masm(masm), + pushedAtStart_(masm.framePushed()), + pushedAtCycle_(-1) {} + + ~MoveEmitterARM64() { assertDone(); } + + void emit(const MoveResolver& moves); + void finish(); + void setScratchRegister(Register reg) {} +}; + +typedef MoveEmitterARM64 MoveEmitter; + +} // namespace jit +} // namespace js + +#endif /* jit_arm64_MoveEmitter_arm64_h */ diff --git a/js/src/jit/arm64/SharedICHelpers-arm64-inl.h b/js/src/jit/arm64/SharedICHelpers-arm64-inl.h new file mode 100644 index 0000000000..8261a8b94f --- /dev/null +++ b/js/src/jit/arm64/SharedICHelpers-arm64-inl.h @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_SharedICHelpers_arm64_inl_h +#define jit_arm64_SharedICHelpers_arm64_inl_h + +#include "jit/BaselineFrame.h" +#include "jit/SharedICHelpers.h" + +#include "jit/MacroAssembler-inl.h" + +namespace js { +namespace jit { + +inline void EmitBaselineTailCallVM(TrampolinePtr target, MacroAssembler& masm, + uint32_t argSize) { +#ifdef DEBUG + // We assume that R0 has been pushed, and R2 is unused. + static_assert(R2 == ValueOperand(r0)); + + // Store frame size without VMFunction arguments for debug assertions. + masm.Sub(x0, FramePointer64, masm.GetStackPointer64()); + masm.Sub(w0, w0, Operand(argSize)); + Address frameSizeAddr(FramePointer, + BaselineFrame::reverseOffsetOfDebugFrameSize()); + masm.store32(w0.asUnsized(), frameSizeAddr); +#endif + + // Push frame descriptor (minus the return address) and perform the tail call. + static_assert(ICTailCallReg == lr); + masm.pushFrameDescriptor(FrameType::BaselineJS); + + // The return address will be pushed by the VM wrapper, for compatibility + // with direct calls. Refer to the top of generateVMWrapper(). + // ICTailCallReg (lr) already contains the return address (as we keep + // it there through the stub calls). + + masm.jump(target); +} + +inline void EmitBaselineCallVM(TrampolinePtr target, MacroAssembler& masm) { + masm.pushFrameDescriptor(FrameType::BaselineStub); + masm.call(target); +} + +inline void EmitBaselineEnterStubFrame(MacroAssembler& masm, Register scratch) { + MOZ_ASSERT(scratch != ICTailCallReg); + +#ifdef DEBUG + // Compute frame size. + masm.Sub(ARMRegister(scratch, 64), FramePointer64, masm.GetStackPointer64()); + + Address frameSizeAddr(FramePointer, + BaselineFrame::reverseOffsetOfDebugFrameSize()); + masm.store32(scratch, frameSizeAddr); +#endif + + // Push frame descriptor and return address. + // Save old frame pointer, stack pointer, and stub reg. + masm.PushFrameDescriptor(FrameType::BaselineJS); + masm.Push(ICTailCallReg); + masm.Push(FramePointer); + + // Update the frame register. + masm.Mov(FramePointer64, masm.GetStackPointer64()); + + masm.Push(ICStubReg); + + // Stack should remain 16-byte aligned. + masm.checkStackAlignment(); +} + +} // namespace jit +} // namespace js + +#endif // jit_arm64_SharedICHelpers_arm64_inl_h diff --git a/js/src/jit/arm64/SharedICHelpers-arm64.h b/js/src/jit/arm64/SharedICHelpers-arm64.h new file mode 100644 index 0000000000..2ea45c80fb --- /dev/null +++ b/js/src/jit/arm64/SharedICHelpers-arm64.h @@ -0,0 +1,82 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_SharedICHelpers_arm64_h +#define jit_arm64_SharedICHelpers_arm64_h + +#include "jit/BaselineIC.h" +#include "jit/JitFrames.h" +#include "jit/MacroAssembler.h" +#include "jit/SharedICRegisters.h" + +namespace js { +namespace jit { + +// Distance from sp to the top Value inside an IC stub (no return address on the +// stack on ARM). +static const size_t ICStackValueOffset = 0; + +inline void EmitRestoreTailCallReg(MacroAssembler& masm) { + // No-op on ARM because link register is always holding the return address. +} + +inline void EmitRepushTailCallReg(MacroAssembler& masm) { + // No-op on ARM because link register is always holding the return address. +} + +inline void EmitCallIC(MacroAssembler& masm, CodeOffset* callOffset) { + // The stub pointer must already be in ICStubReg. + // Load stubcode pointer from the ICStub. + // R2 won't be active when we call ICs, so we can use r0. + static_assert(R2 == ValueOperand(r0)); + masm.loadPtr(Address(ICStubReg, ICStub::offsetOfStubCode()), r0); + + // Call the stubcode via a direct branch-and-link. + masm.Blr(x0); + *callOffset = CodeOffset(masm.currentOffset()); +} + +inline void EmitReturnFromIC(MacroAssembler& masm) { + masm.abiret(); // Defaults to lr. +} + +inline void EmitBaselineLeaveStubFrame(MacroAssembler& masm) { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + const ARMRegister scratch64 = temps.AcquireX(); + + Address stubAddr(FramePointer, BaselineStubFrameLayout::ICStubOffsetFromFP); + masm.loadPtr(stubAddr, ICStubReg); + + masm.moveToStackPtr(FramePointer); + + // Pop values, discarding the frame descriptor. + masm.pop(FramePointer, ICTailCallReg, scratch64.asUnsized()); + + // Stack should remain 16-byte aligned. + masm.checkStackAlignment(); +} + +template <typename AddrType> +inline void EmitPreBarrier(MacroAssembler& masm, const AddrType& addr, + MIRType type) { + // On AArch64, lr is clobbered by guardedCallPreBarrier. Save it first. + masm.push(lr); + masm.guardedCallPreBarrier(addr, type); + masm.pop(lr); +} + +inline void EmitStubGuardFailure(MacroAssembler& masm) { + // Load next stub into ICStubReg. + masm.loadPtr(Address(ICStubReg, ICCacheIRStub::offsetOfNext()), ICStubReg); + + // Return address is already loaded, just jump to the next stubcode. + masm.jump(Address(ICStubReg, ICStub::offsetOfStubCode())); +} + +} // namespace jit +} // namespace js + +#endif // jit_arm64_SharedICHelpers_arm64_h diff --git a/js/src/jit/arm64/SharedICRegisters-arm64.h b/js/src/jit/arm64/SharedICRegisters-arm64.h new file mode 100644 index 0000000000..1aa49d651c --- /dev/null +++ b/js/src/jit/arm64/SharedICRegisters-arm64.h @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_arm64_SharedICRegisters_arm64_h +#define jit_arm64_SharedICRegisters_arm64_h + +#include "jit/arm64/Assembler-arm64.h" +#include "jit/Registers.h" +#include "jit/RegisterSets.h" + +namespace js { +namespace jit { + +// ValueOperands R0, R1, and R2. +// R0 == JSReturnReg, and R2 uses registers not preserved across calls. +// R1 value should be preserved across calls. +static constexpr Register R0_ = r2; +static constexpr Register R1_ = r19; +static constexpr Register R2_ = r0; + +static constexpr ValueOperand R0(R0_); +static constexpr ValueOperand R1(R1_); +static constexpr ValueOperand R2(R2_); + +// ICTailCallReg and ICStubReg use registers that are not preserved across +// calls. +static constexpr Register ICTailCallReg = r30; +static constexpr Register ICStubReg = r9; + +// R7 - R9 are generally available for use within stubcode. + +// Note that BaselineTailCallReg is actually just the link +// register. In ARM code emission, we do not clobber BaselineTailCallReg +// since we keep the return address for calls there. + +static constexpr FloatRegister FloatReg0 = {FloatRegisters::d0, + FloatRegisters::Double}; +static constexpr FloatRegister FloatReg1 = {FloatRegisters::d1, + FloatRegisters::Double}; +static constexpr FloatRegister FloatReg2 = {FloatRegisters::d2, + FloatRegisters::Double}; +static constexpr FloatRegister FloatReg3 = {FloatRegisters::d3, + FloatRegisters::Double}; + +} // namespace jit +} // namespace js + +#endif // jit_arm64_SharedICRegisters_arm64_h diff --git a/js/src/jit/arm64/Trampoline-arm64.cpp b/js/src/jit/arm64/Trampoline-arm64.cpp new file mode 100644 index 0000000000..36f7f24d02 --- /dev/null +++ b/js/src/jit/arm64/Trampoline-arm64.cpp @@ -0,0 +1,840 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/arm64/SharedICHelpers-arm64.h" +#include "jit/Bailouts.h" +#include "jit/BaselineFrame.h" +#include "jit/CalleeToken.h" +#include "jit/JitFrames.h" +#include "jit/JitRuntime.h" +#include "jit/PerfSpewer.h" +#include "jit/VMFunctions.h" +#include "vm/JitActivation.h" // js::jit::JitActivation +#include "vm/JSContext.h" + +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +/* This method generates a trampoline on ARM64 for a c++ function with + * the following signature: + * bool blah(void* code, int argc, Value* argv, + * JSObject* scopeChain, Value* vp) + * ...using standard AArch64 calling convention + */ +void JitRuntime::generateEnterJIT(JSContext* cx, MacroAssembler& masm) { + AutoCreatedBy acb(masm, "JitRuntime::generateEnterJIT"); + + enterJITOffset_ = startTrampolineCode(masm); + + const Register reg_code = IntArgReg0; // EnterJitData::jitcode. + const Register reg_argc = IntArgReg1; // EnterJitData::maxArgc. + const Register reg_argv = IntArgReg2; // EnterJitData::maxArgv. + const Register reg_osrFrame = IntArgReg3; // EnterJitData::osrFrame. + const Register reg_callee = IntArgReg4; // EnterJitData::calleeToken. + const Register reg_scope = IntArgReg5; // EnterJitData::scopeChain. + const Register reg_osrNStack = + IntArgReg6; // EnterJitData::osrNumStackValues. + const Register reg_vp = IntArgReg7; // Address of EnterJitData::result. + + static_assert(OsrFrameReg == IntArgReg3); + + // During the pushes below, use the normal stack pointer. + masm.SetStackPointer64(sp); + + // Save return address and old frame pointer; set new frame pointer. + masm.push(r30, r29); + masm.moveStackPtrTo(r29); + + // Save callee-save integer registers. + // Also save x7 (reg_vp) and x30 (lr), for use later. + masm.push(r19, r20, r21, r22); + masm.push(r23, r24, r25, r26); + masm.push(r27, r28, r7, r30); + + // Save callee-save floating-point registers. + // AArch64 ABI specifies that only the lower 64 bits must be saved. + masm.push(d8, d9, d10, d11); + masm.push(d12, d13, d14, d15); + +#ifdef DEBUG + // Emit stack canaries. + masm.movePtr(ImmWord(0xdeadd00d), r23); + masm.movePtr(ImmWord(0xdeadd11d), r24); + masm.push(r23, r24); +#endif + + // Common code below attempts to push single registers at a time, + // which breaks the stack pointer's 16-byte alignment requirement. + // Note that movePtr() is invalid because StackPointer is treated as xzr. + // + // FIXME: After testing, this entire function should be rewritten to not + // use the PseudoStackPointer: since the amount of data pushed is + // precalculated, we can just allocate the whole frame header at once and + // index off sp. This will save a significant number of instructions where + // Push() updates sp. + masm.Mov(PseudoStackPointer64, sp); + masm.SetStackPointer64(PseudoStackPointer64); + + // Remember stack depth without padding and arguments. + masm.moveStackPtrTo(r19); + + // If constructing, include newTarget in argument vector. + { + Label noNewTarget; + Imm32 constructingToken(CalleeToken_FunctionConstructing); + masm.branchTest32(Assembler::Zero, reg_callee, constructingToken, + &noNewTarget); + masm.add32(Imm32(1), reg_argc); + masm.bind(&noNewTarget); + } + + // JitFrameLayout is as follows (higher is higher in memory): + // N*8 - [ JS argument vector ] (base 16-byte aligned) + // 8 - calleeToken + // 8 - frameDescriptor (16-byte aligned) + // 8 - returnAddress + // 8 - frame pointer (16-byte aligned, pushed by callee) + + // Touch frame incrementally (a requirement for Windows). + // + // Use already saved callee-save registers r20 and r21 as temps. + // + // This has to be done outside the ScratchRegisterScope, as the temps are + // under demand inside the touchFrameValues call. + + // Give sp 16-byte alignment and sync stack pointers. + masm.andToStackPtr(Imm32(~0xf)); + // We needn't worry about the Gecko Profiler mark because touchFrameValues + // touches in large increments. + masm.touchFrameValues(reg_argc, r20, r21); + // Restore stack pointer, preserved above. + masm.moveToStackPtr(r19); + + // Push the argument vector onto the stack. + // WARNING: destructively modifies reg_argv + { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + + const ARMRegister tmp_argc = temps.AcquireX(); + const ARMRegister tmp_sp = temps.AcquireX(); + + Label noArguments; + Label loopHead; + + masm.movePtr(reg_argc, tmp_argc.asUnsized()); + + // sp -= 8 + // Since we're using PostIndex Str below, this is necessary to avoid + // overwriting the Gecko Profiler mark pushed above. + masm.subFromStackPtr(Imm32(8)); + + // sp -= 8 * argc + masm.Sub(PseudoStackPointer64, PseudoStackPointer64, + Operand(tmp_argc, vixl::SXTX, 3)); + + // Give sp 16-byte alignment and sync stack pointers. + masm.andToStackPtr(Imm32(~0xf)); + masm.moveStackPtrTo(tmp_sp.asUnsized()); + + masm.branchTestPtr(Assembler::Zero, reg_argc, reg_argc, &noArguments); + + // Begin argument-pushing loop. + // This could be optimized using Ldp and Stp. + { + masm.bind(&loopHead); + + // Load an argument from argv, then increment argv by 8. + masm.Ldr(x24, MemOperand(ARMRegister(reg_argv, 64), Operand(8), + vixl::PostIndex)); + + // Store the argument to tmp_sp, then increment tmp_sp by 8. + masm.Str(x24, MemOperand(tmp_sp, Operand(8), vixl::PostIndex)); + + // Decrement tmp_argc and set the condition codes for the new value. + masm.Subs(tmp_argc, tmp_argc, Operand(1)); + + // Branch if arguments remain. + masm.B(&loopHead, vixl::Condition::NonZero); + } + + masm.bind(&noArguments); + } + masm.checkStackAlignment(); + + // Push the calleeToken and the frame descriptor. + // The result address is used to store the actual number of arguments + // without adding an argument to EnterJIT. + { + vixl::UseScratchRegisterScope temps(&masm.asVIXL()); + MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0 + temps.Exclude(ScratchReg64); + Register scratch = ScratchReg64.asUnsized(); + masm.push(reg_callee); + + // Push the descriptor. + masm.unboxInt32(Address(reg_vp, 0x0), scratch); + masm.PushFrameDescriptorForJitCall(FrameType::CppToJSJit, scratch, scratch); + } + masm.checkStackAlignment(); + + Label osrReturnPoint; + { + // Check for Interpreter -> Baseline OSR. + + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + MOZ_ASSERT(!regs.has(FramePointer)); + regs.take(OsrFrameReg); + regs.take(reg_code); + regs.take(reg_osrNStack); + MOZ_ASSERT(!regs.has(ReturnReg), "ReturnReg matches reg_code"); + + Label notOsr; + masm.branchTestPtr(Assembler::Zero, OsrFrameReg, OsrFrameReg, ¬Osr); + + Register scratch = regs.takeAny(); + + // Frame prologue. + masm.Adr(ARMRegister(scratch, 64), &osrReturnPoint); + masm.push(scratch, FramePointer); + masm.moveStackPtrTo(FramePointer); + + // Reserve frame. + masm.subFromStackPtr(Imm32(BaselineFrame::Size())); + + Register framePtrScratch = regs.takeAny(); + masm.touchFrameValues(reg_osrNStack, scratch, framePtrScratch); + masm.moveStackPtrTo(framePtrScratch); + + // Reserve space for locals and stack values. + // scratch = num_stack_values * sizeof(Value). + masm.Lsl(ARMRegister(scratch, 32), ARMRegister(reg_osrNStack, 32), 3); + masm.subFromStackPtr(scratch); + + // Enter exit frame. + masm.pushFrameDescriptor(FrameType::BaselineJS); + masm.push(xzr); // Push xzr for a fake return address. + masm.push(FramePointer); + // No GC things to mark: push a bare token. + masm.loadJSContext(scratch); + masm.enterFakeExitFrame(scratch, scratch, ExitFrameType::Bare); + + masm.push(reg_code); + + // Initialize the frame, including filling in the slots. + using Fn = bool (*)(BaselineFrame * frame, InterpreterFrame * interpFrame, + uint32_t numStackValues); + masm.setupUnalignedABICall(r19); + masm.passABIArg(framePtrScratch); // BaselineFrame. + masm.passABIArg(reg_osrFrame); // InterpreterFrame. + masm.passABIArg(reg_osrNStack); + masm.callWithABI<Fn, jit::InitBaselineFrameForOsr>( + MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame); + + masm.pop(scratch); + MOZ_ASSERT(scratch != ReturnReg); + + masm.addToStackPtr(Imm32(ExitFrameLayout::SizeWithFooter())); + + Label error; + masm.branchIfFalseBool(ReturnReg, &error); + + // If OSR-ing, then emit instrumentation for setting lastProfilerFrame + // if profiler instrumentation is enabled. + { + Label skipProfilingInstrumentation; + AbsoluteAddress addressOfEnabled( + cx->runtime()->geckoProfiler().addressOfEnabled()); + masm.branch32(Assembler::Equal, addressOfEnabled, Imm32(0), + &skipProfilingInstrumentation); + masm.profilerEnterFrame(FramePointer, regs.getAny()); + masm.bind(&skipProfilingInstrumentation); + } + + masm.jump(scratch); + + // OOM: frame epilogue, load error value, discard return address and return. + masm.bind(&error); + masm.moveToStackPtr(FramePointer); + masm.pop(FramePointer); + masm.addToStackPtr(Imm32(sizeof(uintptr_t))); // Return address. + masm.syncStackPtr(); + masm.moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand); + masm.B(&osrReturnPoint); + + masm.bind(¬Osr); + masm.movePtr(reg_scope, R1_); + } + + // The callee will push the return address and frame pointer on the stack, + // thus we check that the stack would be aligned once the call is complete. + masm.assertStackAlignment(JitStackAlignment, 2 * sizeof(uintptr_t)); + + // Call function. + // Since AArch64 doesn't have the pc register available, the callee must push + // lr. + masm.callJitNoProfiler(reg_code); + + // Interpreter -> Baseline OSR will return here. + masm.bind(&osrReturnPoint); + + // Discard arguments and padding. Set sp to the address of the saved + // registers. In debug builds we have to include the two stack canaries + // checked below. +#ifdef DEBUG + static constexpr size_t SavedRegSize = 22 * sizeof(void*); +#else + static constexpr size_t SavedRegSize = 20 * sizeof(void*); +#endif + masm.computeEffectiveAddress(Address(FramePointer, -int32_t(SavedRegSize)), + masm.getStackPointer()); + + masm.syncStackPtr(); + masm.SetStackPointer64(sp); + +#ifdef DEBUG + // Check that canaries placed on function entry are still present. + masm.pop(r24, r23); + Label x23OK, x24OK; + + masm.branchPtr(Assembler::Equal, r23, ImmWord(0xdeadd00d), &x23OK); + masm.breakpoint(); + masm.bind(&x23OK); + + masm.branchPtr(Assembler::Equal, r24, ImmWord(0xdeadd11d), &x24OK); + masm.breakpoint(); + masm.bind(&x24OK); +#endif + + // Restore callee-save floating-point registers. + masm.pop(d15, d14, d13, d12); + masm.pop(d11, d10, d9, d8); + + // Restore callee-save integer registers. + // Also restore x7 (reg_vp) and x30 (lr). + masm.pop(r30, r7, r28, r27); + masm.pop(r26, r25, r24, r23); + masm.pop(r22, r21, r20, r19); + + // Store return value (in JSReturnReg = x2 to just-popped reg_vp). + masm.storeValue(JSReturnOperand, Address(reg_vp, 0)); + + // Restore old frame pointer. + masm.pop(r29, r30); + + // Return using the value popped into x30. + masm.abiret(); + + // Reset stack pointer. + masm.SetStackPointer64(PseudoStackPointer64); +} + +// static +mozilla::Maybe<::JS::ProfilingFrameIterator::RegisterState> +JitRuntime::getCppEntryRegisters(JitFrameLayout* frameStackAddress) { + // Not supported, or not implemented yet. + // TODO: Implement along with the corresponding stack-walker changes, in + // coordination with the Gecko Profiler, see bug 1635987 and follow-ups. + return mozilla::Nothing{}; +} + +static void PushRegisterDump(MacroAssembler& masm) { + const LiveRegisterSet First28GeneralRegisters = LiveRegisterSet( + GeneralRegisterSet(Registers::AllMask & + ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28)), + FloatRegisterSet(FloatRegisters::NoneMask)); + + const LiveRegisterSet AllFloatRegisters = + LiveRegisterSet(GeneralRegisterSet(Registers::NoneMask), + FloatRegisterSet(FloatRegisters::AllMask)); + + // Push all general-purpose registers. + // + // The ARM64 ABI does not treat SP as a normal register that can + // be pushed. So pushing happens in two phases. + // + // Registers are pushed in reverse order of code. + // + // See block comment in MacroAssembler.h for further required invariants. + + // First, push the last four registers, passing zero for sp. + // Zero is pushed for x28 and x31: the pseudo-SP and SP, respectively. + masm.asVIXL().Push(xzr, x30, x29, xzr); + + // Second, push the first 28 registers that serve no special purpose. + masm.PushRegsInMask(First28GeneralRegisters); + + // Finally, push all floating-point registers, completing the RegisterDump. + masm.PushRegsInMask(AllFloatRegisters); +} + +void JitRuntime::generateInvalidator(MacroAssembler& masm, Label* bailoutTail) { + AutoCreatedBy acb(masm, "JitRuntime::generateInvalidator"); + + invalidatorOffset_ = startTrampolineCode(masm); + + // The InvalidationBailoutStack saved in r0 must be: + // - osiPointReturnAddress_ + // - ionScript_ (pushed by CodeGeneratorARM64::generateInvalidateEpilogue()) + // - regs_ (pushed here) + // - fpregs_ (pushed here) [=r0] + PushRegisterDump(masm); + masm.moveStackPtrTo(r0); + + // Reserve space for InvalidationBailout's bailoutInfo outparam. + masm.Sub(x1, masm.GetStackPointer64(), Operand(sizeof(void*))); + masm.moveToStackPtr(r1); + + using Fn = + bool (*)(InvalidationBailoutStack * sp, BaselineBailoutInfo * *info); + masm.setupUnalignedABICall(r10); + masm.passABIArg(r0); + masm.passABIArg(r1); + + masm.callWithABI<Fn, InvalidationBailout>( + MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther); + + masm.pop(r2); // Get the bailoutInfo outparam. + + // Pop the machine state and the dead frame. + masm.moveToStackPtr(FramePointer); + + // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2. + masm.jump(bailoutTail); +} + +void JitRuntime::generateArgumentsRectifier(MacroAssembler& masm, + ArgumentsRectifierKind kind) { + AutoCreatedBy acb(masm, "JitRuntime::generateArgumentsRectifier"); + + switch (kind) { + case ArgumentsRectifierKind::Normal: + argumentsRectifierOffset_ = startTrampolineCode(masm); + break; + case ArgumentsRectifierKind::TrialInlining: + trialInliningArgumentsRectifierOffset_ = startTrampolineCode(masm); + break; + } + + // Save the return address for later. + masm.push(lr); + + // Frame prologue. + // + // NOTE: if this changes, fix the Baseline bailout code too! + // See BaselineStackBuilder::calculatePrevFramePtr and + // BaselineStackBuilder::buildRectifierFrame (in BaselineBailouts.cpp). + masm.push(FramePointer); + masm.moveStackPtrTo(FramePointer); + + // Load the information that the rectifier needs from the stack. + masm.loadNumActualArgs(FramePointer, r0); + masm.loadPtr( + Address(FramePointer, RectifierFrameLayout::offsetOfCalleeToken()), r1); + + // Extract a JSFunction pointer from the callee token and keep the + // intermediary to avoid later recalculation. + masm.And(x5, x1, Operand(CalleeTokenMask)); + + // Get the arguments from the function object. + masm.loadFunctionArgCount(x5.asUnsized(), x6.asUnsized()); + + static_assert(CalleeToken_FunctionConstructing == 0x1, + "Constructing must be low-order bit"); + masm.And(x4, x1, Operand(CalleeToken_FunctionConstructing)); + masm.Add(x7, x6, x4); + + // Copy the number of actual arguments into r8. + masm.mov(r0, r8); + + // Calculate the position that our arguments are at before sp gets modified. + masm.Add(x3, masm.GetStackPointer64(), Operand(x8, vixl::LSL, 3)); + masm.Add(x3, x3, Operand(sizeof(RectifierFrameLayout))); + + // If the number of Values without |this| is even, push 8 padding bytes to + // ensure the stack is 16-byte aligned. + Label noPadding; + masm.Tbnz(x7, 0, &noPadding); + masm.asVIXL().Push(xzr); + masm.bind(&noPadding); + + { + Label notConstructing; + masm.Cbz(x4, ¬Constructing); + + // new.target lives at the end of the pushed args + // NB: The arg vector holder starts at the beginning of the last arg, + // add a value to get to argv[argc] + masm.loadPtr(Address(r3, sizeof(Value)), r4); + masm.Push(r4); + + masm.bind(¬Constructing); + } + + // Calculate the number of undefineds that need to be pushed. + masm.Sub(w2, w6, w8); + + // Put an undefined in a register so it can be pushed. + masm.moveValue(UndefinedValue(), ValueOperand(r4)); + + // Push undefined N times. + { + Label undefLoopTop; + masm.bind(&undefLoopTop); + masm.Push(r4); + masm.Subs(w2, w2, Operand(1)); + masm.B(&undefLoopTop, Assembler::NonZero); + } + + // Arguments copy loop. Copy for x8 >= 0 to include |this|. + { + Label copyLoopTop; + masm.bind(©LoopTop); + masm.Ldr(x4, MemOperand(x3, -sizeof(Value), vixl::PostIndex)); + masm.Push(r4); + masm.Subs(x8, x8, Operand(1)); + masm.B(©LoopTop, Assembler::NotSigned); + } + + masm.push(r1); // Callee token. + masm.pushFrameDescriptorForJitCall(FrameType::Rectifier, r0, r0); + + // Call the target function. + switch (kind) { + case ArgumentsRectifierKind::Normal: + masm.loadJitCodeRaw(r5, r3); + argumentsRectifierReturnOffset_ = masm.callJitNoProfiler(r3); + break; + case ArgumentsRectifierKind::TrialInlining: + Label noBaselineScript, done; + masm.loadBaselineJitCodeRaw(r5, r3, &noBaselineScript); + masm.callJitNoProfiler(r3); + masm.jump(&done); + + // See BaselineCacheIRCompiler::emitCallInlinedFunction. + masm.bind(&noBaselineScript); + masm.loadJitCodeRaw(r5, r3); + masm.callJitNoProfiler(r3); + masm.bind(&done); + break; + } + + masm.moveToStackPtr(FramePointer); + masm.pop(FramePointer); + masm.ret(); +} + +static void PushBailoutFrame(MacroAssembler& masm, Register spArg) { + // This assumes no SIMD registers, as JS does not support SIMD. + + // The stack saved in spArg must be (higher entries have higher memory + // addresses): + // - snapshotOffset_ + // - frameSize_ + // - regs_ + // - fpregs_ (spArg + 0) + PushRegisterDump(masm); + masm.moveStackPtrTo(spArg); +} + +static void GenerateBailoutThunk(MacroAssembler& masm, Label* bailoutTail) { + PushBailoutFrame(masm, r0); + + // SP % 8 == 4 + // STEP 1c: Call the bailout function, giving a pointer to the + // structure we just blitted onto the stack. + // Make space for the BaselineBailoutInfo* outparam. + masm.reserveStack(sizeof(void*)); + masm.moveStackPtrTo(r1); + + using Fn = bool (*)(BailoutStack * sp, BaselineBailoutInfo * *info); + masm.setupUnalignedABICall(r2); + masm.passABIArg(r0); + masm.passABIArg(r1); + masm.callWithABI<Fn, Bailout>(MoveOp::GENERAL, + CheckUnsafeCallWithABI::DontCheckOther); + + // Get the bailoutInfo outparam. + masm.pop(r2); + + // Remove both the bailout frame and the topmost Ion frame's stack. + masm.moveToStackPtr(FramePointer); + + // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2. + masm.jump(bailoutTail); +} + +void JitRuntime::generateBailoutHandler(MacroAssembler& masm, + Label* bailoutTail) { + AutoCreatedBy acb(masm, "JitRuntime::generateBailoutHandler"); + + bailoutHandlerOffset_ = startTrampolineCode(masm); + + GenerateBailoutThunk(masm, bailoutTail); +} + +bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm, + const VMFunctionData& f, DynFn nativeFun, + uint32_t* wrapperOffset) { + AutoCreatedBy acb(masm, "JitRuntime::generateVMWrapper"); + + *wrapperOffset = startTrampolineCode(masm); + + // Avoid conflicts with argument registers while discarding the result after + // the function call. + AllocatableGeneralRegisterSet regs(Register::Codes::WrapperMask); + + static_assert( + (Register::Codes::VolatileMask & ~Register::Codes::WrapperMask) == 0, + "Wrapper register set must be a superset of the Volatile register set."); + + // Unlike on other platforms, it is the responsibility of the VM *callee* to + // push the return address, while the caller must ensure that the address + // is stored in lr on entry. This allows the VM wrapper to work with both + // direct calls and tail calls. + masm.push(lr); + + // First argument is the JSContext. + Register reg_cx = IntArgReg0; + regs.take(reg_cx); + + // Stack is: + // ... frame ... + // +12 [args] + // +8 descriptor + // +0 returnAddress (pushed by this function, caller sets as lr) + // + // Push the frame pointer to finish the exit frame, then link it up. + masm.Push(FramePointer); + masm.moveStackPtrTo(FramePointer); + masm.loadJSContext(reg_cx); + masm.enterExitFrame(reg_cx, regs.getAny(), &f); + + // Save the current stack pointer as the base for copying arguments. + Register argsBase = InvalidReg; + if (f.explicitArgs) { + // argsBase can't be an argument register. Bad things would happen if + // the MoveResolver didn't throw an assertion failure first. + argsBase = r8; + regs.take(argsBase); + masm.Add(ARMRegister(argsBase, 64), masm.GetStackPointer64(), + Operand(ExitFrameLayout::SizeWithFooter())); + } + + // Reserve space for any outparameter. + Register outReg = InvalidReg; + switch (f.outParam) { + case Type_Value: + outReg = regs.takeAny(); + masm.reserveStack(sizeof(Value)); + masm.moveStackPtrTo(outReg); + break; + + case Type_Handle: + outReg = regs.takeAny(); + masm.PushEmptyRooted(f.outParamRootType); + masm.moveStackPtrTo(outReg); + break; + + case Type_Int32: + case Type_Bool: + outReg = regs.takeAny(); + masm.reserveStack(sizeof(int64_t)); + masm.moveStackPtrTo(outReg); + break; + + case Type_Double: + outReg = regs.takeAny(); + masm.reserveStack(sizeof(double)); + masm.moveStackPtrTo(outReg); + break; + + case Type_Pointer: + outReg = regs.takeAny(); + masm.reserveStack(sizeof(uintptr_t)); + masm.moveStackPtrTo(outReg); + break; + + default: + MOZ_ASSERT(f.outParam == Type_Void); + break; + } + + masm.setupUnalignedABICall(regs.getAny()); + masm.passABIArg(reg_cx); + + size_t argDisp = 0; + + // Copy arguments. + for (uint32_t explicitArg = 0; explicitArg < f.explicitArgs; explicitArg++) { + switch (f.argProperties(explicitArg)) { + case VMFunctionData::WordByValue: + masm.passABIArg(MoveOperand(argsBase, argDisp), + (f.argPassedInFloatReg(explicitArg) ? MoveOp::DOUBLE + : MoveOp::GENERAL)); + argDisp += sizeof(void*); + break; + + case VMFunctionData::WordByRef: + masm.passABIArg( + MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress), + MoveOp::GENERAL); + argDisp += sizeof(void*); + break; + + case VMFunctionData::DoubleByValue: + case VMFunctionData::DoubleByRef: + MOZ_CRASH("NYI: AArch64 callVM should not be used with 128bit values."); + } + } + + // Copy the semi-implicit outparam, if any. + // It is not a C++-abi outparam, which would get passed in the + // outparam register, but a real parameter to the function, which + // was stack-allocated above. + if (outReg != InvalidReg) { + masm.passABIArg(outReg); + } + + masm.callWithABI(nativeFun, MoveOp::GENERAL, + CheckUnsafeCallWithABI::DontCheckHasExitFrame); + + // SP is used to transfer stack across call boundaries. + masm.initPseudoStackPtr(); + + // Test for failure. + switch (f.failType()) { + case Type_Cell: + masm.branchTestPtr(Assembler::Zero, r0, r0, masm.failureLabel()); + break; + case Type_Bool: + masm.branchIfFalseBool(r0, masm.failureLabel()); + break; + case Type_Void: + break; + default: + MOZ_CRASH("unknown failure kind"); + } + + // Load the outparam and free any allocated stack. + switch (f.outParam) { + case Type_Value: + masm.Ldr(ARMRegister(JSReturnReg, 64), + MemOperand(masm.GetStackPointer64())); + masm.freeStack(sizeof(Value)); + break; + + case Type_Handle: + masm.popRooted(f.outParamRootType, ReturnReg, JSReturnOperand); + break; + + case Type_Int32: + masm.Ldr(ARMRegister(ReturnReg, 32), + MemOperand(masm.GetStackPointer64())); + masm.freeStack(sizeof(int64_t)); + break; + + case Type_Bool: + masm.Ldrb(ARMRegister(ReturnReg, 32), + MemOperand(masm.GetStackPointer64())); + masm.freeStack(sizeof(int64_t)); + break; + + case Type_Double: + masm.Ldr(ARMFPRegister(ReturnDoubleReg, 64), + MemOperand(masm.GetStackPointer64())); + masm.freeStack(sizeof(double)); + break; + + case Type_Pointer: + masm.Ldr(ARMRegister(ReturnReg, 64), + MemOperand(masm.GetStackPointer64())); + masm.freeStack(sizeof(uintptr_t)); + break; + + default: + MOZ_ASSERT(f.outParam == Type_Void); + break; + } + + // Until C++ code is instrumented against Spectre, prevent speculative + // execution from returning any private data. + if (f.returnsData() && JitOptions.spectreJitToCxxCalls) { + masm.speculationBarrier(); + } + + // Pop ExitFooterFrame and the frame pointer. + masm.leaveExitFrame(0); + masm.pop(FramePointer); + + // Return. Subtract sizeof(void*) for the frame pointer. + masm.retn(Imm32(sizeof(ExitFrameLayout) - sizeof(void*) + + f.explicitStackSlots() * sizeof(void*) + + f.extraValuesToPop * sizeof(Value))); + + return true; +} + +uint32_t JitRuntime::generatePreBarrier(JSContext* cx, MacroAssembler& masm, + MIRType type) { + AutoCreatedBy acb(masm, "JitRuntime::generatePreBarrier"); + + uint32_t offset = startTrampolineCode(masm); + + static_assert(PreBarrierReg == r1); + Register temp1 = r2; + Register temp2 = r3; + Register temp3 = r4; + masm.push(temp1); + masm.push(temp2); + masm.push(temp3); + + Label noBarrier; + masm.emitPreBarrierFastPath(cx->runtime(), type, temp1, temp2, temp3, + &noBarrier); + + // Call into C++ to mark this GC thing. + masm.pop(temp3); + masm.pop(temp2); + masm.pop(temp1); + + LiveRegisterSet regs = + LiveRegisterSet(GeneralRegisterSet(Registers::VolatileMask), + FloatRegisterSet(FloatRegisters::VolatileMask)); + + // Also preserve the return address. + regs.add(lr); + + masm.PushRegsInMask(regs); + + masm.movePtr(ImmPtr(cx->runtime()), r3); + + masm.setupUnalignedABICall(r0); + masm.passABIArg(r3); + masm.passABIArg(PreBarrierReg); + masm.callWithABI(JitPreWriteBarrier(type)); + + // Pop the volatile regs and restore LR. + masm.PopRegsInMask(regs); + masm.abiret(); + + masm.bind(&noBarrier); + masm.pop(temp3); + masm.pop(temp2); + masm.pop(temp1); + masm.abiret(); + + return offset; +} + +void JitRuntime::generateBailoutTailStub(MacroAssembler& masm, + Label* bailoutTail) { + AutoCreatedBy acb(masm, "JitRuntime::generateBailoutTailStub"); + + masm.bind(bailoutTail); + masm.generateBailoutTail(r1, r2); +} diff --git a/js/src/jit/arm64/vixl/.clang-format b/js/src/jit/arm64/vixl/.clang-format new file mode 100644 index 0000000000..122a79540d --- /dev/null +++ b/js/src/jit/arm64/vixl/.clang-format @@ -0,0 +1,4 @@ +BasedOnStyle: Chromium + +# Ignore all comments because they aren't reflowed properly. +CommentPragmas: "^" diff --git a/js/src/jit/arm64/vixl/AUTHORS b/js/src/jit/arm64/vixl/AUTHORS new file mode 100644 index 0000000000..257ec9d32b --- /dev/null +++ b/js/src/jit/arm64/vixl/AUTHORS @@ -0,0 +1,8 @@ +# Below is a list of people and organisations that have contributed to the VIXL +# project. Entries should be added to the list as: +# +# Name/Organization <email address> + +ARM Ltd. <*@arm.com> +Google Inc. <*@google.com> +Linaro <*@linaro.org> diff --git a/js/src/jit/arm64/vixl/Assembler-vixl.cpp b/js/src/jit/arm64/vixl/Assembler-vixl.cpp new file mode 100644 index 0000000000..6ed31cef78 --- /dev/null +++ b/js/src/jit/arm64/vixl/Assembler-vixl.cpp @@ -0,0 +1,5318 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Assembler-vixl.h" + +#include <cmath> + +#include "jit/arm64/vixl/MacroAssembler-vixl.h" + +namespace vixl { + +// CPURegList utilities. +CPURegister CPURegList::PopLowestIndex() { + if (IsEmpty()) { + return NoCPUReg; + } + int index = CountTrailingZeros(list_); + VIXL_ASSERT((1ULL << index) & list_); + Remove(index); + return CPURegister(index, size_, type_); +} + + +CPURegister CPURegList::PopHighestIndex() { + VIXL_ASSERT(IsValid()); + if (IsEmpty()) { + return NoCPUReg; + } + int index = CountLeadingZeros(list_); + index = kRegListSizeInBits - 1 - index; + VIXL_ASSERT((1ULL << index) & list_); + Remove(index); + return CPURegister(index, size_, type_); +} + + +bool CPURegList::IsValid() const { + if ((type_ == CPURegister::kRegister) || + (type_ == CPURegister::kVRegister)) { + bool is_valid = true; + // Try to create a CPURegister for each element in the list. + for (int i = 0; i < kRegListSizeInBits; i++) { + if (((list_ >> i) & 1) != 0) { + is_valid &= CPURegister(i, size_, type_).IsValid(); + } + } + return is_valid; + } else if (type_ == CPURegister::kNoRegister) { + // We can't use IsEmpty here because that asserts IsValid(). + return list_ == 0; + } else { + return false; + } +} + + +void CPURegList::RemoveCalleeSaved() { + if (type() == CPURegister::kRegister) { + Remove(GetCalleeSaved(RegisterSizeInBits())); + } else if (type() == CPURegister::kVRegister) { + Remove(GetCalleeSavedV(RegisterSizeInBits())); + } else { + VIXL_ASSERT(type() == CPURegister::kNoRegister); + VIXL_ASSERT(IsEmpty()); + // The list must already be empty, so do nothing. + } +} + + +CPURegList CPURegList::Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3) { + return Union(list_1, Union(list_2, list_3)); +} + + +CPURegList CPURegList::Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4) { + return Union(Union(list_1, list_2), Union(list_3, list_4)); +} + + +CPURegList CPURegList::Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3) { + return Intersection(list_1, Intersection(list_2, list_3)); +} + + +CPURegList CPURegList::Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4) { + return Intersection(Intersection(list_1, list_2), + Intersection(list_3, list_4)); +} + + +CPURegList CPURegList::GetCalleeSaved(unsigned size) { + return CPURegList(CPURegister::kRegister, size, 19, 29); +} + + +CPURegList CPURegList::GetCalleeSavedV(unsigned size) { + return CPURegList(CPURegister::kVRegister, size, 8, 15); +} + + +CPURegList CPURegList::GetCallerSaved(unsigned size) { + // Registers x0-x18 and lr (x30) are caller-saved. + CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 18); + // Do not use lr directly to avoid initialisation order fiasco bugs for users. + list.Combine(Register(30, kXRegSize)); + return list; +} + + +CPURegList CPURegList::GetCallerSavedV(unsigned size) { + // Registers d0-d7 and d16-d31 are caller-saved. + CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7); + list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31)); + return list; +} + + +const CPURegList kCalleeSaved = CPURegList::GetCalleeSaved(); +const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV(); +const CPURegList kCallerSaved = CPURegList::GetCallerSaved(); +const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV(); + + +// Registers. +#define WREG(n) w##n, +const Register Register::wregisters[] = { +REGISTER_CODE_LIST(WREG) +}; +#undef WREG + +#define XREG(n) x##n, +const Register Register::xregisters[] = { +REGISTER_CODE_LIST(XREG) +}; +#undef XREG + +#define BREG(n) b##n, +const VRegister VRegister::bregisters[] = { +REGISTER_CODE_LIST(BREG) +}; +#undef BREG + +#define HREG(n) h##n, +const VRegister VRegister::hregisters[] = { +REGISTER_CODE_LIST(HREG) +}; +#undef HREG + +#define SREG(n) s##n, +const VRegister VRegister::sregisters[] = { +REGISTER_CODE_LIST(SREG) +}; +#undef SREG + +#define DREG(n) d##n, +const VRegister VRegister::dregisters[] = { +REGISTER_CODE_LIST(DREG) +}; +#undef DREG + +#define QREG(n) q##n, +const VRegister VRegister::qregisters[] = { +REGISTER_CODE_LIST(QREG) +}; +#undef QREG + +#define VREG(n) v##n, +const VRegister VRegister::vregisters[] = { +REGISTER_CODE_LIST(VREG) +}; +#undef VREG + + +const Register& Register::WRegFromCode(unsigned code) { + if (code == kSPRegInternalCode) { + return wsp; + } else { + VIXL_ASSERT(code < kNumberOfRegisters); + return wregisters[code]; + } +} + + +const Register& Register::XRegFromCode(unsigned code) { + if (code == kSPRegInternalCode) { + return sp; + } else { + VIXL_ASSERT(code < kNumberOfRegisters); + return xregisters[code]; + } +} + + +const VRegister& VRegister::BRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return bregisters[code]; +} + + +const VRegister& VRegister::HRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return hregisters[code]; +} + + +const VRegister& VRegister::SRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return sregisters[code]; +} + + +const VRegister& VRegister::DRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return dregisters[code]; +} + + +const VRegister& VRegister::QRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return qregisters[code]; +} + + +const VRegister& VRegister::VRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return vregisters[code]; +} + + +const Register& CPURegister::W() const { + VIXL_ASSERT(IsValidRegister()); + return Register::WRegFromCode(code_); +} + + +const Register& CPURegister::X() const { + VIXL_ASSERT(IsValidRegister()); + return Register::XRegFromCode(code_); +} + + +const VRegister& CPURegister::B() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::BRegFromCode(code_); +} + + +const VRegister& CPURegister::H() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::HRegFromCode(code_); +} + + +const VRegister& CPURegister::S() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::SRegFromCode(code_); +} + + +const VRegister& CPURegister::D() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::DRegFromCode(code_); +} + + +const VRegister& CPURegister::Q() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::QRegFromCode(code_); +} + + +const VRegister& CPURegister::V() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::VRegFromCode(code_); +} + + +// Operand. +Operand::Operand(int64_t immediate) + : immediate_(immediate), + reg_(NoReg), + shift_(NO_SHIFT), + extend_(NO_EXTEND), + shift_amount_(0) {} + + +Operand::Operand(Register reg, Shift shift, unsigned shift_amount) + : reg_(reg), + shift_(shift), + extend_(NO_EXTEND), + shift_amount_(shift_amount) { + VIXL_ASSERT(shift != MSL); + VIXL_ASSERT(reg.Is64Bits() || (shift_amount < kWRegSize)); + VIXL_ASSERT(reg.Is32Bits() || (shift_amount < kXRegSize)); + VIXL_ASSERT(!reg.IsSP()); +} + + +Operand::Operand(Register reg, Extend extend, unsigned shift_amount) + : reg_(reg), + shift_(NO_SHIFT), + extend_(extend), + shift_amount_(shift_amount) { + VIXL_ASSERT(reg.IsValid()); + VIXL_ASSERT(shift_amount <= 4); + VIXL_ASSERT(!reg.IsSP()); + + // Extend modes SXTX and UXTX require a 64-bit register. + VIXL_ASSERT(reg.Is64Bits() || ((extend != SXTX) && (extend != UXTX))); +} + + +bool Operand::IsImmediate() const { + return reg_.Is(NoReg); +} + + +bool Operand::IsShiftedRegister() const { + return reg_.IsValid() && (shift_ != NO_SHIFT); +} + + +bool Operand::IsExtendedRegister() const { + return reg_.IsValid() && (extend_ != NO_EXTEND); +} + + +bool Operand::IsZero() const { + if (IsImmediate()) { + return immediate() == 0; + } else { + return reg().IsZero(); + } +} + + +Operand Operand::ToExtendedRegister() const { + VIXL_ASSERT(IsShiftedRegister()); + VIXL_ASSERT((shift_ == LSL) && (shift_amount_ <= 4)); + return Operand(reg_, reg_.Is64Bits() ? UXTX : UXTW, shift_amount_); +} + + +// MemOperand +MemOperand::MemOperand(Register base, int64_t offset, AddrMode addrmode) + : base_(base), regoffset_(NoReg), offset_(offset), addrmode_(addrmode) { + VIXL_ASSERT(base.Is64Bits() && !base.IsZero()); +} + + +MemOperand::MemOperand(Register base, + Register regoffset, + Extend extend, + unsigned shift_amount) + : base_(base), regoffset_(regoffset), offset_(0), addrmode_(Offset), + shift_(NO_SHIFT), extend_(extend), shift_amount_(shift_amount) { + VIXL_ASSERT(base.Is64Bits() && !base.IsZero()); + VIXL_ASSERT(!regoffset.IsSP()); + VIXL_ASSERT((extend == UXTW) || (extend == SXTW) || (extend == SXTX)); + + // SXTX extend mode requires a 64-bit offset register. + VIXL_ASSERT(regoffset.Is64Bits() || (extend != SXTX)); +} + + +MemOperand::MemOperand(Register base, + Register regoffset, + Shift shift, + unsigned shift_amount) + : base_(base), regoffset_(regoffset), offset_(0), addrmode_(Offset), + shift_(shift), extend_(NO_EXTEND), shift_amount_(shift_amount) { + VIXL_ASSERT(base.Is64Bits() && !base.IsZero()); + VIXL_ASSERT(regoffset.Is64Bits() && !regoffset.IsSP()); + VIXL_ASSERT(shift == LSL); +} + + +MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode) + : base_(base), regoffset_(NoReg), addrmode_(addrmode) { + VIXL_ASSERT(base.Is64Bits() && !base.IsZero()); + + if (offset.IsImmediate()) { + offset_ = offset.immediate(); + } else if (offset.IsShiftedRegister()) { + VIXL_ASSERT((addrmode == Offset) || (addrmode == PostIndex)); + + regoffset_ = offset.reg(); + shift_ = offset.shift(); + shift_amount_ = offset.shift_amount(); + + extend_ = NO_EXTEND; + offset_ = 0; + + // These assertions match those in the shifted-register constructor. + VIXL_ASSERT(regoffset_.Is64Bits() && !regoffset_.IsSP()); + VIXL_ASSERT(shift_ == LSL); + } else { + VIXL_ASSERT(offset.IsExtendedRegister()); + VIXL_ASSERT(addrmode == Offset); + + regoffset_ = offset.reg(); + extend_ = offset.extend(); + shift_amount_ = offset.shift_amount(); + + shift_ = NO_SHIFT; + offset_ = 0; + + // These assertions match those in the extended-register constructor. + VIXL_ASSERT(!regoffset_.IsSP()); + VIXL_ASSERT((extend_ == UXTW) || (extend_ == SXTW) || (extend_ == SXTX)); + VIXL_ASSERT((regoffset_.Is64Bits() || (extend_ != SXTX))); + } +} + + +bool MemOperand::IsImmediateOffset() const { + return (addrmode_ == Offset) && regoffset_.Is(NoReg); +} + + +bool MemOperand::IsRegisterOffset() const { + return (addrmode_ == Offset) && !regoffset_.Is(NoReg); +} + + +bool MemOperand::IsPreIndex() const { + return addrmode_ == PreIndex; +} + + +bool MemOperand::IsPostIndex() const { + return addrmode_ == PostIndex; +} + + +void MemOperand::AddOffset(int64_t offset) { + VIXL_ASSERT(IsImmediateOffset()); + offset_ += offset; +} + + +// Assembler +Assembler::Assembler(PositionIndependentCodeOption pic) + : pic_(pic), + cpu_features_(CPUFeatures::AArch64LegacyBaseline()) +{ + // Mozilla change: always use maximally-present features. + cpu_features_.Combine(CPUFeatures::InferFromOS()); + + // Mozilla change: Compile time hard-coded value from js-config.mozbuild. +#ifndef MOZ_AARCH64_JSCVT +# error "MOZ_AARCH64_JSCVT must be defined." +#elif MOZ_AARCH64_JSCVT >= 1 + // Note, vixl backend implements the JSCVT flag as a boolean despite having 3 + // extra bits reserved for forward compatibility in the ARMv8 documentation. + cpu_features_.Combine(CPUFeatures::kJSCVT); +#endif +} + + +// Code generation. +void Assembler::br(const Register& xn) { + VIXL_ASSERT(xn.Is64Bits()); + Emit(BR | Rn(xn)); +} + + +void Assembler::blr(const Register& xn) { + VIXL_ASSERT(xn.Is64Bits()); + Emit(BLR | Rn(xn)); +} + + +void Assembler::ret(const Register& xn) { + VIXL_ASSERT(xn.Is64Bits()); + Emit(RET | Rn(xn)); +} + + +void Assembler::NEONTable(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEONTableOp op) { + VIXL_ASSERT(vd.Is16B() || vd.Is8B()); + VIXL_ASSERT(vn.Is16B()); + VIXL_ASSERT(AreSameFormat(vd, vm)); + Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + NEONTable(vd, vn, vm, NEON_TBL_1v); +} + + +void Assembler::tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm) { + USE(vn2); + VIXL_ASSERT(AreSameFormat(vn, vn2)); + VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters)); + + NEONTable(vd, vn, vm, NEON_TBL_2v); +} + + +void Assembler::tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm) { + USE(vn2, vn3); + VIXL_ASSERT(AreSameFormat(vn, vn2, vn3)); + VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters)); + VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters)); + + NEONTable(vd, vn, vm, NEON_TBL_3v); +} + + +void Assembler::tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm) { + USE(vn2, vn3, vn4); + VIXL_ASSERT(AreSameFormat(vn, vn2, vn3, vn4)); + VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters)); + VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters)); + VIXL_ASSERT(vn4.code() == ((vn.code() + 3) % kNumberOfVRegisters)); + + NEONTable(vd, vn, vm, NEON_TBL_4v); +} + + +void Assembler::tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + NEONTable(vd, vn, vm, NEON_TBX_1v); +} + + +void Assembler::tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm) { + USE(vn2); + VIXL_ASSERT(AreSameFormat(vn, vn2)); + VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters)); + + NEONTable(vd, vn, vm, NEON_TBX_2v); +} + + +void Assembler::tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm) { + USE(vn2, vn3); + VIXL_ASSERT(AreSameFormat(vn, vn2, vn3)); + VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters)); + VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters)); + + NEONTable(vd, vn, vm, NEON_TBX_3v); +} + + +void Assembler::tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm) { + USE(vn2, vn3, vn4); + VIXL_ASSERT(AreSameFormat(vn, vn2, vn3, vn4)); + VIXL_ASSERT(vn2.code() == ((vn.code() + 1) % kNumberOfVRegisters)); + VIXL_ASSERT(vn3.code() == ((vn.code() + 2) % kNumberOfVRegisters)); + VIXL_ASSERT(vn4.code() == ((vn.code() + 3) % kNumberOfVRegisters)); + + NEONTable(vd, vn, vm, NEON_TBX_4v); +} + + +void Assembler::add(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, LeaveFlags, ADD); +} + + +void Assembler::adds(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, SetFlags, ADD); +} + + +void Assembler::cmn(const Register& rn, + const Operand& operand) { + Register zr = AppropriateZeroRegFor(rn); + adds(zr, rn, operand); +} + + +void Assembler::sub(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, LeaveFlags, SUB); +} + + +void Assembler::subs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, SetFlags, SUB); +} + + +void Assembler::cmp(const Register& rn, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rn); + subs(zr, rn, operand); +} + + +void Assembler::neg(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + sub(rd, zr, operand); +} + + +void Assembler::negs(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + subs(rd, zr, operand); +} + + +void Assembler::adc(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC); +} + + +void Assembler::adcs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, SetFlags, ADC); +} + + +void Assembler::sbc(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC); +} + + +void Assembler::sbcs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, SetFlags, SBC); +} + + +void Assembler::ngc(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + sbc(rd, zr, operand); +} + + +void Assembler::ngcs(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + sbcs(rd, zr, operand); +} + + +// Logical instructions. +void Assembler::and_(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, AND); +} + + +void Assembler::bic(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, BIC); +} + + +void Assembler::bics(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, BICS); +} + + +void Assembler::orr(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, ORR); +} + + +void Assembler::orn(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, ORN); +} + + +void Assembler::eor(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, EOR); +} + + +void Assembler::eon(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, EON); +} + + +void Assembler::lslv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == rm.size()); + Emit(SF(rd) | LSLV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::lsrv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == rm.size()); + Emit(SF(rd) | LSRV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::asrv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == rm.size()); + Emit(SF(rd) | ASRV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::rorv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == rm.size()); + Emit(SF(rd) | RORV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +// Bitfield operations. +void Assembler::bfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(rd.size() == rn.size()); + Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); + Emit(SF(rd) | BFM | N | + ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::sbfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(rd.Is64Bits() || rn.Is32Bits()); + Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); + Emit(SF(rd) | SBFM | N | + ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::ubfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(rd.size() == rn.size()); + Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); + Emit(SF(rd) | UBFM | N | + ImmR(immr, rd.size()) | ImmS(imms, rn.size()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::extr(const Register& rd, + const Register& rn, + const Register& rm, + unsigned lsb) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == rm.size()); + Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); + Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.size()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::csel(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + ConditionalSelect(rd, rn, rm, cond, CSEL); +} + + +void Assembler::csinc(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + ConditionalSelect(rd, rn, rm, cond, CSINC); +} + + +void Assembler::csinv(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + ConditionalSelect(rd, rn, rm, cond, CSINV); +} + + +void Assembler::csneg(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + ConditionalSelect(rd, rn, rm, cond, CSNEG); +} + + +void Assembler::cset(const Register &rd, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + Register zr = AppropriateZeroRegFor(rd); + csinc(rd, zr, zr, InvertCondition(cond)); +} + + +void Assembler::csetm(const Register &rd, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + Register zr = AppropriateZeroRegFor(rd); + csinv(rd, zr, zr, InvertCondition(cond)); +} + + +void Assembler::cinc(const Register &rd, const Register &rn, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + csinc(rd, rn, rn, InvertCondition(cond)); +} + + +void Assembler::cinv(const Register &rd, const Register &rn, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + csinv(rd, rn, rn, InvertCondition(cond)); +} + + +void Assembler::cneg(const Register &rd, const Register &rn, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + csneg(rd, rn, rn, InvertCondition(cond)); +} + + +void Assembler::ConditionalSelect(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond, + ConditionalSelectOp op) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == rm.size()); + Emit(SF(rd) | op | Rm(rm) | Cond(cond) | Rn(rn) | Rd(rd)); +} + + +void Assembler::ccmn(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond) { + ConditionalCompare(rn, operand, nzcv, cond, CCMN); +} + + +void Assembler::ccmp(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond) { + ConditionalCompare(rn, operand, nzcv, cond, CCMP); +} + + +void Assembler::DataProcessing3Source(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra, + DataProcessing3SourceOp op) { + Emit(SF(rd) | op | Rm(rm) | Ra(ra) | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32b(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits()); + Emit(SF(rm) | Rm(rm) | CRC32B | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32h(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits()); + Emit(SF(rm) | Rm(rm) | CRC32H | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32w(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits()); + Emit(SF(rm) | Rm(rm) | CRC32W | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32x(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is64Bits()); + Emit(SF(rm) | Rm(rm) | CRC32X | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32cb(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits()); + Emit(SF(rm) | Rm(rm) | CRC32CB | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32ch(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits()); + Emit(SF(rm) | Rm(rm) | CRC32CH | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32cw(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is32Bits()); + Emit(SF(rm) | Rm(rm) | CRC32CW | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32cx(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is32Bits() && rn.Is32Bits() && rm.Is64Bits()); + Emit(SF(rm) | Rm(rm) | CRC32CX | Rn(rn) | Rd(rd)); +} + + +void Assembler::mul(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(AreSameSizeAndType(rd, rn, rm)); + DataProcessing3Source(rd, rn, rm, AppropriateZeroRegFor(rd), MADD); +} + + +void Assembler::madd(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + DataProcessing3Source(rd, rn, rm, ra, MADD); +} + + +void Assembler::mneg(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(AreSameSizeAndType(rd, rn, rm)); + DataProcessing3Source(rd, rn, rm, AppropriateZeroRegFor(rd), MSUB); +} + + +void Assembler::msub(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + DataProcessing3Source(rd, rn, rm, ra, MSUB); +} + + +void Assembler::umaddl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits()); + VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits()); + DataProcessing3Source(rd, rn, rm, ra, UMADDL_x); +} + + +void Assembler::smaddl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits()); + VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits()); + DataProcessing3Source(rd, rn, rm, ra, SMADDL_x); +} + + +void Assembler::umsubl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits()); + VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits()); + DataProcessing3Source(rd, rn, rm, ra, UMSUBL_x); +} + + +void Assembler::smsubl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(rd.Is64Bits() && ra.Is64Bits()); + VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits()); + DataProcessing3Source(rd, rn, rm, ra, SMSUBL_x); +} + + +void Assembler::smull(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.Is64Bits()); + VIXL_ASSERT(rn.Is32Bits() && rm.Is32Bits()); + DataProcessing3Source(rd, rn, rm, xzr, SMADDL_x); +} + + +void Assembler::sdiv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == rm.size()); + Emit(SF(rd) | SDIV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::smulh(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits()); + DataProcessing3Source(xd, xn, xm, xzr, SMULH_x); +} + + +void Assembler::umulh(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits()); + DataProcessing3Source(xd, xn, xm, xzr, UMULH_x); +} + + +void Assembler::udiv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == rm.size()); + Emit(SF(rd) | UDIV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::rbit(const Register& rd, + const Register& rn) { + DataProcessing1Source(rd, rn, RBIT); +} + + +void Assembler::rev16(const Register& rd, + const Register& rn) { + DataProcessing1Source(rd, rn, REV16); +} + + +void Assembler::rev32(const Register& rd, + const Register& rn) { + VIXL_ASSERT(rd.Is64Bits()); + DataProcessing1Source(rd, rn, REV); +} + + +void Assembler::rev(const Register& rd, + const Register& rn) { + DataProcessing1Source(rd, rn, rd.Is64Bits() ? REV_x : REV_w); +} + + +void Assembler::clz(const Register& rd, + const Register& rn) { + DataProcessing1Source(rd, rn, CLZ); +} + + +void Assembler::cls(const Register& rd, + const Register& rn) { + DataProcessing1Source(rd, rn, CLS); +} + + +void Assembler::ldp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& src) { + LoadStorePair(rt, rt2, src, LoadPairOpFor(rt, rt2)); +} + + +void Assembler::stp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& dst) { + LoadStorePair(rt, rt2, dst, StorePairOpFor(rt, rt2)); +} + + +void Assembler::ldpsw(const Register& rt, + const Register& rt2, + const MemOperand& src) { + VIXL_ASSERT(rt.Is64Bits()); + LoadStorePair(rt, rt2, src, LDPSW_x); +} + + +void Assembler::LoadStorePair(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op) { + // 'rt' and 'rt2' can only be aliased for stores. + VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || !rt.Is(rt2)); + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + VIXL_ASSERT(IsImmLSPair(addr.offset(), CalcLSPairDataSize(op))); + + int offset = static_cast<int>(addr.offset()); + Instr memop = op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) | + ImmLSPair(offset, CalcLSPairDataSize(op)); + + Instr addrmodeop; + if (addr.IsImmediateOffset()) { + addrmodeop = LoadStorePairOffsetFixed; + } else { + VIXL_ASSERT(addr.offset() != 0); + if (addr.IsPreIndex()) { + addrmodeop = LoadStorePairPreIndexFixed; + } else { + VIXL_ASSERT(addr.IsPostIndex()); + addrmodeop = LoadStorePairPostIndexFixed; + } + } + Emit(addrmodeop | memop); +} + + +void Assembler::ldnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& src) { + LoadStorePairNonTemporal(rt, rt2, src, + LoadPairNonTemporalOpFor(rt, rt2)); +} + + +void Assembler::stnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& dst) { + LoadStorePairNonTemporal(rt, rt2, dst, + StorePairNonTemporalOpFor(rt, rt2)); +} + + +void Assembler::LoadStorePairNonTemporal(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairNonTemporalOp op) { + VIXL_ASSERT(!rt.Is(rt2)); + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + VIXL_ASSERT(addr.IsImmediateOffset()); + + unsigned size = CalcLSPairDataSize( + static_cast<LoadStorePairOp>(op & LoadStorePairMask)); + VIXL_ASSERT(IsImmLSPair(addr.offset(), size)); + int offset = static_cast<int>(addr.offset()); + Emit(op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) | ImmLSPair(offset, size)); +} + + +// Memory instructions. +void Assembler::ldrb(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, LDRB_w, option); +} + + +void Assembler::strb(const Register& rt, const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, dst, STRB_w, option); +} + + +void Assembler::ldrsb(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w, option); +} + + +void Assembler::ldrh(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, LDRH_w, option); +} + + +void Assembler::strh(const Register& rt, const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, dst, STRH_w, option); +} + + +void Assembler::ldrsh(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w, option); +} + + +void Assembler::ldr(const CPURegister& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, LoadOpFor(rt), option); +} + + +void Assembler::str(const CPURegister& rt, const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, dst, StoreOpFor(rt), option); +} + + +void Assembler::ldrsw(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(rt.Is64Bits()); + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, LDRSW_x, option); +} + + +void Assembler::ldurb(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, LDRB_w, option); +} + + +void Assembler::sturb(const Register& rt, const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, dst, STRB_w, option); +} + + +void Assembler::ldursb(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w, option); +} + + +void Assembler::ldurh(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, LDRH_w, option); +} + + +void Assembler::sturh(const Register& rt, const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, dst, STRH_w, option); +} + + +void Assembler::ldursh(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w, option); +} + + +void Assembler::ldur(const CPURegister& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, LoadOpFor(rt), option); +} + + +void Assembler::stur(const CPURegister& rt, const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, dst, StoreOpFor(rt), option); +} + + +void Assembler::ldursw(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(rt.Is64Bits()); + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, LDRSW_x, option); +} + + +void Assembler::ldrsw(const Register& rt, int imm19) { + Emit(LDRSW_x_lit | ImmLLiteral(imm19) | Rt(rt)); +} + + +void Assembler::ldr(const CPURegister& rt, int imm19) { + LoadLiteralOp op = LoadLiteralOpFor(rt); + Emit(op | ImmLLiteral(imm19) | Rt(rt)); +} + +// clang-format off +#define COMPARE_AND_SWAP_W_X_LIST(V) \ + V(cas, CAS) \ + V(casa, CASA) \ + V(casl, CASL) \ + V(casal, CASAL) +// clang-format on + +#define DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rs, const Register& rt, \ + const MemOperand& src) { \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \ + LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \ + Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \ + } +COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + +// clang-format off +#define COMPARE_AND_SWAP_W_LIST(V) \ + V(casb, CASB) \ + V(casab, CASAB) \ + V(caslb, CASLB) \ + V(casalb, CASALB) \ + V(cash, CASH) \ + V(casah, CASAH) \ + V(caslh, CASLH) \ + V(casalh, CASALH) +// clang-format on + +#define DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rs, const Register& rt, \ + const MemOperand& src) { \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \ + Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \ + } +COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + +// clang-format off +#define COMPARE_AND_SWAP_PAIR_LIST(V) \ + V(casp, CASP) \ + V(caspa, CASPA) \ + V(caspl, CASPL) \ + V(caspal, CASPAL) +// clang-format on + +#define DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rs, const Register& rs1, \ + const Register& rt, const Register& rt1, \ + const MemOperand& src) { \ + USE(rs1, rt1); \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \ + VIXL_ASSERT(AreEven(rs, rt)); \ + VIXL_ASSERT(AreConsecutive(rs, rs1)); \ + VIXL_ASSERT(AreConsecutive(rt, rt1)); \ + LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \ + Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.base())); \ + } +COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + +void Assembler::prfm(PrefetchOperation op, int imm19) { + Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19)); +} + + +// Exclusive-access instructions. +void Assembler::stxrb(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + Emit(STXRB_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::stxrh(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + Emit(STXRH_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::stxr(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STXR_x : STXR_w; + Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::ldxrb(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + Emit(LDXRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + + +void Assembler::ldxrh(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + Emit(LDXRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + + +void Assembler::ldxr(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDXR_x : LDXR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + + +void Assembler::stxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst) { + VIXL_ASSERT(rt.size() == rt2.size()); + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STXP_x : STXP_w; + Emit(op | Rs(rs) | Rt(rt) | Rt2(rt2) | RnSP(dst.base())); +} + + +void Assembler::ldxp(const Register& rt, + const Register& rt2, + const MemOperand& src) { + VIXL_ASSERT(rt.size() == rt2.size()); + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDXP_x : LDXP_w; + Emit(op | Rs_mask | Rt(rt) | Rt2(rt2) | RnSP(src.base())); +} + + +void Assembler::stlxrb(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + Emit(STLXRB_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::stlxrh(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + Emit(STLXRH_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::stlxr(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STLXR_x : STLXR_w; + Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::ldaxrb(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + Emit(LDAXRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + + +void Assembler::ldaxrh(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + Emit(LDAXRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + + +void Assembler::ldaxr(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDAXR_x : LDAXR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + + +void Assembler::stlxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst) { + VIXL_ASSERT(rt.size() == rt2.size()); + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STLXP_x : STLXP_w; + Emit(op | Rs(rs) | Rt(rt) | Rt2(rt2) | RnSP(dst.base())); +} + + +void Assembler::ldaxp(const Register& rt, + const Register& rt2, + const MemOperand& src) { + VIXL_ASSERT(rt.size() == rt2.size()); + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDAXP_x : LDAXP_w; + Emit(op | Rs_mask | Rt(rt) | Rt2(rt2) | RnSP(src.base())); +} + + +void Assembler::stlrb(const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + Emit(STLRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::stlrh(const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + Emit(STLRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::stlr(const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STLR_x : STLR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.base())); +} + + +void Assembler::ldarb(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + Emit(LDARB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + + +void Assembler::ldarh(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + Emit(LDARH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + + +void Assembler::ldar(const Register& rt, + const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDAR_x : LDAR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.base())); +} + +// These macros generate all the variations of the atomic memory operations, +// e.g. ldadd, ldadda, ldaddb, staddl, etc. +// For a full list of the methods with comments, see the assembler header file. + +// clang-format off +#define ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(V, DEF) \ + V(DEF, add, LDADD) \ + V(DEF, clr, LDCLR) \ + V(DEF, eor, LDEOR) \ + V(DEF, set, LDSET) \ + V(DEF, smax, LDSMAX) \ + V(DEF, smin, LDSMIN) \ + V(DEF, umax, LDUMAX) \ + V(DEF, umin, LDUMIN) + +#define ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \ + V(NAME, OP##_x, OP##_w) \ + V(NAME##l, OP##L_x, OP##L_w) \ + V(NAME##b, OP##B, OP##B) \ + V(NAME##lb, OP##LB, OP##LB) \ + V(NAME##h, OP##H, OP##H) \ + V(NAME##lh, OP##LH, OP##LH) + +#define ATOMIC_MEMORY_LOAD_MODES(V, NAME, OP) \ + ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \ + V(NAME##a, OP##A_x, OP##A_w) \ + V(NAME##al, OP##AL_x, OP##AL_w) \ + V(NAME##ab, OP##AB, OP##AB) \ + V(NAME##alb, OP##ALB, OP##ALB) \ + V(NAME##ah, OP##AH, OP##AH) \ + V(NAME##alh, OP##ALH, OP##ALH) +// clang-format on + +#define DEFINE_ASM_LOAD_FUNC(FN, OP_X, OP_W) \ + void Assembler::ld##FN(const Register& rs, const Register& rt, \ + const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \ + AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \ + Emit(op | Rs(rs) | Rt(rt) | RnSP(src.base())); \ + } +#define DEFINE_ASM_STORE_FUNC(FN, OP_X, OP_W) \ + void Assembler::st##FN(const Register& rs, const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + ld##FN(rs, AppropriateZeroRegFor(rs), src); \ + } + +ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_LOAD_MODES, + DEFINE_ASM_LOAD_FUNC) +ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_STORE_MODES, + DEFINE_ASM_STORE_FUNC) + +#define DEFINE_ASM_SWP_FUNC(FN, OP_X, OP_W) \ + void Assembler::FN(const Register& rs, const Register& rt, \ + const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.offset() == 0)); \ + AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \ + Emit(op | Rs(rs) | Rt(rt) | RnSP(src.base())); \ + } + +ATOMIC_MEMORY_LOAD_MODES(DEFINE_ASM_SWP_FUNC, swp, SWP) + +#undef DEFINE_ASM_LOAD_FUNC +#undef DEFINE_ASM_STORE_FUNC +#undef DEFINE_ASM_SWP_FUNC + +void Assembler::prfm(PrefetchOperation op, const MemOperand& address, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + Prefetch(op, address, option); +} + + +void Assembler::prfum(PrefetchOperation op, const MemOperand& address, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + Prefetch(op, address, option); +} + + +void Assembler::sys(int op1, int crn, int crm, int op2, const Register& rt) { + Emit(SYS | ImmSysOp1(op1) | CRn(crn) | CRm(crm) | ImmSysOp2(op2) | Rt(rt)); +} + + +void Assembler::sys(int op, const Register& rt) { + Emit(SYS | SysOp(op) | Rt(rt)); +} + + +void Assembler::dc(DataCacheOp op, const Register& rt) { + VIXL_ASSERT((op == CVAC) || (op == CVAU) || (op == CIVAC) || (op == ZVA)); + sys(op, rt); +} + + +void Assembler::ic(InstructionCacheOp op, const Register& rt) { + VIXL_ASSERT(op == IVAU); + sys(op, rt); +} + + +// NEON structure loads and stores. +Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) { + Instr addr_field = RnSP(addr.base()); + + if (addr.IsPostIndex()) { + VIXL_STATIC_ASSERT(NEONLoadStoreMultiStructPostIndex == + static_cast<NEONLoadStoreMultiStructPostIndexOp>( + NEONLoadStoreSingleStructPostIndex)); + + addr_field |= NEONLoadStoreMultiStructPostIndex; + if (addr.offset() == 0) { + addr_field |= RmNot31(addr.regoffset()); + } else { + // The immediate post index addressing mode is indicated by rm = 31. + // The immediate is implied by the number of vector registers used. + addr_field |= (0x1f << Rm_offset); + } + } else { + VIXL_ASSERT(addr.IsImmediateOffset() && (addr.offset() == 0)); + } + return addr_field; +} + +void Assembler::LoadStoreStructVerify(const VRegister& vt, + const MemOperand& addr, + Instr op) { +#ifdef DEBUG + // Assert that addressing mode is either offset (with immediate 0), post + // index by immediate of the size of the register list, or post index by a + // value in a core register. + if (addr.IsImmediateOffset()) { + VIXL_ASSERT(addr.offset() == 0); + } else { + int offset = vt.SizeInBytes(); + switch (op) { + case NEON_LD1_1v: + case NEON_ST1_1v: + offset *= 1; break; + case NEONLoadStoreSingleStructLoad1: + case NEONLoadStoreSingleStructStore1: + case NEON_LD1R: + offset = (offset / vt.lanes()) * 1; break; + + case NEON_LD1_2v: + case NEON_ST1_2v: + case NEON_LD2: + case NEON_ST2: + offset *= 2; + break; + case NEONLoadStoreSingleStructLoad2: + case NEONLoadStoreSingleStructStore2: + case NEON_LD2R: + offset = (offset / vt.lanes()) * 2; break; + + case NEON_LD1_3v: + case NEON_ST1_3v: + case NEON_LD3: + case NEON_ST3: + offset *= 3; break; + case NEONLoadStoreSingleStructLoad3: + case NEONLoadStoreSingleStructStore3: + case NEON_LD3R: + offset = (offset / vt.lanes()) * 3; break; + + case NEON_LD1_4v: + case NEON_ST1_4v: + case NEON_LD4: + case NEON_ST4: + offset *= 4; break; + case NEONLoadStoreSingleStructLoad4: + case NEONLoadStoreSingleStructStore4: + case NEON_LD4R: + offset = (offset / vt.lanes()) * 4; break; + default: + VIXL_UNREACHABLE(); + } + VIXL_ASSERT(!addr.regoffset().Is(NoReg) || + addr.offset() == offset); + } +#else + USE(vt, addr, op); +#endif +} + +void Assembler::LoadStoreStruct(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreMultiStructOp op) { + LoadStoreStructVerify(vt, addr, op); + VIXL_ASSERT(vt.IsVector() || vt.Is1D()); + Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt)); +} + + +void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op) { + LoadStoreStructVerify(vt, addr, op); + Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt)); +} + + +void Assembler::ld1(const VRegister& vt, + const MemOperand& src) { + LoadStoreStruct(vt, src, NEON_LD1_1v); +} + + +void Assembler::ld1(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStruct(vt, src, NEON_LD1_2v); +} + + +void Assembler::ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStruct(vt, src, NEON_LD1_3v); +} + + +void Assembler::ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStruct(vt, src, NEON_LD1_4v); +} + + +void Assembler::ld2(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStruct(vt, src, NEON_LD2); +} + + +void Assembler::ld2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2); +} + + +void Assembler::ld2r(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R); +} + + +void Assembler::ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStruct(vt, src, NEON_LD3); +} + + +void Assembler::ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3); +} + + +void Assembler::ld3r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R); +} + + +void Assembler::ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStruct(vt, src, NEON_LD4); +} + + +void Assembler::ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4); +} + + +void Assembler::ld4r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R); +} + + +void Assembler::st1(const VRegister& vt, + const MemOperand& src) { + LoadStoreStruct(vt, src, NEON_ST1_1v); +} + + +void Assembler::st1(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStruct(vt, src, NEON_ST1_2v); +} + + +void Assembler::st1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStruct(vt, src, NEON_ST1_3v); +} + + +void Assembler::st1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStruct(vt, src, NEON_ST1_4v); +} + + +void Assembler::st2(const VRegister& vt, + const VRegister& vt2, + const MemOperand& dst) { + USE(vt2); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStruct(vt, dst, NEON_ST2); +} + + +void Assembler::st2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& dst) { + USE(vt2); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2); +} + + +void Assembler::st3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& dst) { + USE(vt2, vt3); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStruct(vt, dst, NEON_ST3); +} + + +void Assembler::st3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& dst) { + USE(vt2, vt3); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3); +} + + +void Assembler::st4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& dst) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStruct(vt, dst, NEON_ST4); +} + + +void Assembler::st4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& dst) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4); +} + + +void Assembler::LoadStoreStructSingle(const VRegister& vt, + uint32_t lane, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op) { + LoadStoreStructVerify(vt, addr, op); + + // We support vt arguments of the form vt.VxT() or vt.T(), where x is the + // number of lanes, and T is b, h, s or d. + unsigned lane_size = vt.LaneSizeInBytes(); + VIXL_ASSERT(lane < (kQRegSizeInBytes / lane_size)); + + // Lane size is encoded in the opcode field. Lane index is encoded in the Q, + // S and size fields. + lane *= lane_size; + if (lane_size == 8) lane++; + + Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask; + Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask; + Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask; + + Instr instr = op; + switch (lane_size) { + case 1: instr |= NEONLoadStoreSingle_b; break; + case 2: instr |= NEONLoadStoreSingle_h; break; + case 4: instr |= NEONLoadStoreSingle_s; break; + default: + VIXL_ASSERT(lane_size == 8); + instr |= NEONLoadStoreSingle_d; + } + + Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt)); +} + + +void Assembler::ld1(const VRegister& vt, + int lane, + const MemOperand& src) { + LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1); +} + + +void Assembler::ld1r(const VRegister& vt, + const MemOperand& src) { + LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R); +} + + +void Assembler::st1(const VRegister& vt, + int lane, + const MemOperand& dst) { + LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1); +} + + +void Assembler::NEON3DifferentL(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop) { + VIXL_ASSERT(AreSameFormat(vn, vm)); + VIXL_ASSERT((vn.Is1H() && vd.Is1S()) || + (vn.Is1S() && vd.Is1D()) || + (vn.Is8B() && vd.Is8H()) || + (vn.Is4H() && vd.Is4S()) || + (vn.Is2S() && vd.Is2D()) || + (vn.Is16B() && vd.Is8H())|| + (vn.Is8H() && vd.Is4S()) || + (vn.Is4S() && vd.Is2D())); + Instr format, op = vop; + if (vd.IsScalar()) { + op |= NEON_Q | NEONScalar; + format = SFormat(vn); + } else { + format = VFormat(vn); + } + Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEON3DifferentW(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT((vm.Is8B() && vd.Is8H()) || + (vm.Is4H() && vd.Is4S()) || + (vm.Is2S() && vd.Is2D()) || + (vm.Is16B() && vd.Is8H())|| + (vm.Is8H() && vd.Is4S()) || + (vm.Is4S() && vd.Is2D())); + Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEON3DifferentHN(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop) { + VIXL_ASSERT(AreSameFormat(vm, vn)); + VIXL_ASSERT((vd.Is8B() && vn.Is8H()) || + (vd.Is4H() && vn.Is4S()) || + (vd.Is2S() && vn.Is2D()) || + (vd.Is16B() && vn.Is8H())|| + (vd.Is8H() && vn.Is4S()) || + (vd.Is4S() && vn.Is2D())); + Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +#define NEON_3DIFF_LONG_LIST(V) \ + V(pmull, NEON_PMULL, vn.IsVector() && vn.Is8B()) \ + V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B()) \ + V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD()) \ + V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ()) \ + V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD()) \ + V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ()) \ + V(uabal, NEON_UABAL, vn.IsVector() && vn.IsD()) \ + V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ()) \ + V(sabdl, NEON_SABDL, vn.IsVector() && vn.IsD()) \ + V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ()) \ + V(uabdl, NEON_UABDL, vn.IsVector() && vn.IsD()) \ + V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ()) \ + V(smlal, NEON_SMLAL, vn.IsVector() && vn.IsD()) \ + V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ()) \ + V(umlal, NEON_UMLAL, vn.IsVector() && vn.IsD()) \ + V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ()) \ + V(smlsl, NEON_SMLSL, vn.IsVector() && vn.IsD()) \ + V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ()) \ + V(umlsl, NEON_UMLSL, vn.IsVector() && vn.IsD()) \ + V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ()) \ + V(smull, NEON_SMULL, vn.IsVector() && vn.IsD()) \ + V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ()) \ + V(umull, NEON_UMULL, vn.IsVector() && vn.IsD()) \ + V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ()) \ + V(ssubl, NEON_SSUBL, vn.IsVector() && vn.IsD()) \ + V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ()) \ + V(uaddl, NEON_UADDL, vn.IsVector() && vn.IsD()) \ + V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ()) \ + V(usubl, NEON_USUBL, vn.IsVector() && vn.IsD()) \ + V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ()) \ + V(sqdmlal, NEON_SQDMLAL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \ + V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \ + V(sqdmlsl, NEON_SQDMLSL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \ + V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \ + V(sqdmull, NEON_SQDMULL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \ + V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \ + + +#define DEFINE_ASM_FUNC(FN, OP, AS) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + VIXL_ASSERT(AS); \ + NEON3DifferentL(vd, vn, vm, OP); \ +} +NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + +#define NEON_3DIFF_HN_LIST(V) \ + V(addhn, NEON_ADDHN, vd.IsD()) \ + V(addhn2, NEON_ADDHN2, vd.IsQ()) \ + V(raddhn, NEON_RADDHN, vd.IsD()) \ + V(raddhn2, NEON_RADDHN2, vd.IsQ()) \ + V(subhn, NEON_SUBHN, vd.IsD()) \ + V(subhn2, NEON_SUBHN2, vd.IsQ()) \ + V(rsubhn, NEON_RSUBHN, vd.IsD()) \ + V(rsubhn2, NEON_RSUBHN2, vd.IsQ()) + +#define DEFINE_ASM_FUNC(FN, OP, AS) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + VIXL_ASSERT(AS); \ + NEON3DifferentHN(vd, vn, vm, OP); \ +} +NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + +void Assembler::uaddw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(vm.IsD()); + NEON3DifferentW(vd, vn, vm, NEON_UADDW); +} + + +void Assembler::uaddw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(vm.IsQ()); + NEON3DifferentW(vd, vn, vm, NEON_UADDW2); +} + + +void Assembler::saddw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(vm.IsD()); + NEON3DifferentW(vd, vn, vm, NEON_SADDW); +} + + +void Assembler::saddw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(vm.IsQ()); + NEON3DifferentW(vd, vn, vm, NEON_SADDW2); +} + + +void Assembler::usubw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(vm.IsD()); + NEON3DifferentW(vd, vn, vm, NEON_USUBW); +} + + +void Assembler::usubw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(vm.IsQ()); + NEON3DifferentW(vd, vn, vm, NEON_USUBW2); +} + + +void Assembler::ssubw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(vm.IsD()); + NEON3DifferentW(vd, vn, vm, NEON_SSUBW); +} + + +void Assembler::ssubw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(vm.IsQ()); + NEON3DifferentW(vd, vn, vm, NEON_SSUBW2); +} + + +void Assembler::mov(const Register& rd, const Register& rm) { + // Moves involving the stack pointer are encoded as add immediate with + // second operand of zero. Otherwise, orr with first operand zr is + // used. + if (rd.IsSP() || rm.IsSP()) { + add(rd, rm, 0); + } else { + orr(rd, AppropriateZeroRegFor(rd), rm); + } +} + + +void Assembler::mvn(const Register& rd, const Operand& operand) { + orn(rd, AppropriateZeroRegFor(rd), operand); +} + + +void Assembler::mrs(const Register& rt, SystemRegister sysreg) { + VIXL_ASSERT(rt.Is64Bits()); + Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt)); +} + + +void Assembler::msr(SystemRegister sysreg, const Register& rt) { + VIXL_ASSERT(rt.Is64Bits()); + Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg)); +} + + +void Assembler::clrex(int imm4) { + Emit(CLREX | CRm(imm4)); +} + + +void Assembler::dmb(BarrierDomain domain, BarrierType type) { + Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type)); +} + + +void Assembler::dsb(BarrierDomain domain, BarrierType type) { + Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type)); +} + + +void Assembler::isb() { + Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll)); +} + + +void Assembler::fmov(const VRegister& vd, double imm) { + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1D()); + Emit(FMOV_d_imm | Rd(vd) | ImmFP64(imm)); + } else { + VIXL_ASSERT(vd.Is2D()); + Instr op = NEONModifiedImmediate_MOVI | NEONModifiedImmediateOpBit; + Instr q = NEON_Q; + uint32_t encoded_imm = FP64ToImm8(imm); + Emit(q | op | ImmNEONabcdefgh(encoded_imm) | NEONCmode(0xf) | Rd(vd)); + } +} + + +void Assembler::fmov(const VRegister& vd, float imm) { + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1S()); + Emit(FMOV_s_imm | Rd(vd) | ImmFP32(imm)); + } else { + VIXL_ASSERT(vd.Is2S() || vd.Is4S()); + Instr op = NEONModifiedImmediate_MOVI; + Instr q = vd.Is4S() ? NEON_Q : 0; + uint32_t encoded_imm = FP32ToImm8(imm); + Emit(q | op | ImmNEONabcdefgh(encoded_imm) | NEONCmode(0xf) | Rd(vd)); + } +} + + +void Assembler::fmov(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(vn.Is1S() || vn.Is1D()); + VIXL_ASSERT(rd.size() == vn.size()); + FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd; + Emit(op | Rd(rd) | Rn(vn)); +} + + +void Assembler::fmov(const VRegister& vd, const Register& rn) { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(vd.size() == rn.size()); + FPIntegerConvertOp op = vd.Is32Bits() ? FMOV_sw : FMOV_dx; + Emit(op | Rd(vd) | Rn(rn)); +} + + +void Assembler::fmov(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(vd.IsSameFormat(vn)); + Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn)); +} + + +void Assembler::fmov(const VRegister& vd, int index, const Register& rn) { + VIXL_ASSERT((index == 1) && vd.Is1D() && rn.IsX()); + USE(index); + Emit(FMOV_d1_x | Rd(vd) | Rn(rn)); +} + + +void Assembler::fmov(const Register& rd, const VRegister& vn, int index) { + VIXL_ASSERT((index == 1) && vn.Is1D() && rd.IsX()); + USE(index); + Emit(FMOV_x_d1 | Rd(rd) | Rn(vn)); +} + + +void Assembler::fmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FMADD_s : FMADD_d); +} + + +void Assembler::fmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FMSUB_s : FMSUB_d); +} + + +void Assembler::fnmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FNMADD_s : FNMADD_d); +} + + +void Assembler::fnmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FNMSUB_s : FNMSUB_d); +} + + +void Assembler::fnmul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm)); + Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d; + Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::FPCompareMacro(const VRegister& vn, + double value, + FPTrapFlags trap) { + USE(value); + // Although the fcmp{e} instructions can strictly only take an immediate + // value of +0.0, we don't need to check for -0.0 because the sign of 0.0 + // doesn't affect the result of the comparison. + VIXL_ASSERT(value == 0.0); + VIXL_ASSERT(vn.Is1S() || vn.Is1D()); + Instr op = (trap == EnableTrap) ? FCMPE_zero : FCMP_zero; + Emit(FPType(vn) | op | Rn(vn)); +} + + +void Assembler::FPCompareMacro(const VRegister& vn, + const VRegister& vm, + FPTrapFlags trap) { + VIXL_ASSERT(vn.Is1S() || vn.Is1D()); + VIXL_ASSERT(vn.IsSameSizeAndType(vm)); + Instr op = (trap == EnableTrap) ? FCMPE : FCMP; + Emit(FPType(vn) | op | Rm(vm) | Rn(vn)); +} + + +void Assembler::fcmp(const VRegister& vn, + const VRegister& vm) { + FPCompareMacro(vn, vm, DisableTrap); +} + + +void Assembler::fcmpe(const VRegister& vn, + const VRegister& vm) { + FPCompareMacro(vn, vm, EnableTrap); +} + + +void Assembler::fcmp(const VRegister& vn, + double value) { + FPCompareMacro(vn, value, DisableTrap); +} + + +void Assembler::fcmpe(const VRegister& vn, + double value) { + FPCompareMacro(vn, value, EnableTrap); +} + + +void Assembler::FPCCompareMacro(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond, + FPTrapFlags trap) { + VIXL_ASSERT(vn.Is1S() || vn.Is1D()); + VIXL_ASSERT(vn.IsSameSizeAndType(vm)); + Instr op = (trap == EnableTrap) ? FCCMPE : FCCMP; + Emit(FPType(vn) | op | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv)); +} + +void Assembler::fccmp(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond) { + FPCCompareMacro(vn, vm, nzcv, cond, DisableTrap); +} + + +void Assembler::fccmpe(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond) { + FPCCompareMacro(vn, vm, nzcv, cond, EnableTrap); +} + + +void Assembler::fcsel(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Condition cond) { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + Emit(FPType(vd) | FCSEL | Rm(vm) | Cond(cond) | Rn(vn) | Rd(vd)); +} + +void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kJSCVT)); + VIXL_ASSERT(rd.IsW() && vn.Is1D()); + Emit(FJCVTZS | Rn(vn) | Rd(rd)); +} + + +void Assembler::NEONFPConvertToInt(const Register& rd, + const VRegister& vn, + Instr op) { + Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd)); +} + + +void Assembler::NEONFPConvertToInt(const VRegister& vd, + const VRegister& vn, + Instr op) { + if (vn.IsScalar()) { + VIXL_ASSERT((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D())); + op |= NEON_Q | NEONScalar; + } + Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvt(const VRegister& vd, + const VRegister& vn) { + FPDataProcessing1SourceOp op; + if (vd.Is1D()) { + VIXL_ASSERT(vn.Is1S() || vn.Is1H()); + op = vn.Is1S() ? FCVT_ds : FCVT_dh; + } else if (vd.Is1S()) { + VIXL_ASSERT(vn.Is1D() || vn.Is1H()); + op = vn.Is1D() ? FCVT_sd : FCVT_sh; + } else { + VIXL_ASSERT(vd.Is1H()); + VIXL_ASSERT(vn.Is1D() || vn.Is1S()); + op = vn.Is1D() ? FCVT_hd : FCVT_hs; + } + FPDataProcessing1Source(vd, vn, op); +} + + +void Assembler::fcvtl(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vd.Is4S() && vn.Is4H()) || + (vd.Is2D() && vn.Is2S())); + Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0; + Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvtl2(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vd.Is4S() && vn.Is8H()) || + (vd.Is2D() && vn.Is4S())); + Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0; + Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvtn(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vn.Is4S() && vd.Is4H()) || + (vn.Is2D() && vd.Is2S())); + Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0; + Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvtn2(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vn.Is4S() && vd.Is8H()) || + (vn.Is2D() && vd.Is4S())); + Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0; + Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvtxn(const VRegister& vd, + const VRegister& vn) { + Instr format = 1 << NEONSize_offset; + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1S() && vn.Is1D()); + Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd)); + } else { + VIXL_ASSERT(vd.Is2S() && vn.Is2D()); + Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd)); + } +} + + +void Assembler::fcvtxn2(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.Is4S() && vn.Is2D()); + Instr format = 1 << NEONSize_offset; + Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd)); +} + + +#define NEON_FP2REGMISC_FCVT_LIST(V) \ + V(fcvtnu, NEON_FCVTNU, FCVTNU) \ + V(fcvtns, NEON_FCVTNS, FCVTNS) \ + V(fcvtpu, NEON_FCVTPU, FCVTPU) \ + V(fcvtps, NEON_FCVTPS, FCVTPS) \ + V(fcvtmu, NEON_FCVTMU, FCVTMU) \ + V(fcvtms, NEON_FCVTMS, FCVTMS) \ + V(fcvtau, NEON_FCVTAU, FCVTAU) \ + V(fcvtas, NEON_FCVTAS, FCVTAS) + +#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \ +void Assembler::FN(const Register& rd, \ + const VRegister& vn) { \ + NEONFPConvertToInt(rd, vn, SCA_OP); \ +} \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn) { \ + NEONFPConvertToInt(vd, vn, VEC_OP); \ +} +NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS) +#undef DEFINE_ASM_FUNCS + + +void Assembler::fcvtzs(const Register& rd, + const VRegister& vn, + int fbits) { + VIXL_ASSERT(vn.Is1S() || vn.Is1D()); + VIXL_ASSERT((fbits >= 0) && (fbits <= rd.SizeInBits())); + if (fbits == 0) { + Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd)); + } else { + Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) | + Rd(rd)); + } +} + + +void Assembler::fcvtzs(const VRegister& vd, + const VRegister& vn, + int fbits) { + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + NEONFP2RegMisc(vd, vn, NEON_FCVTZS); + } else { + VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S()); + NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm); + } +} + + +void Assembler::fcvtzu(const Register& rd, + const VRegister& vn, + int fbits) { + VIXL_ASSERT(vn.Is1S() || vn.Is1D()); + VIXL_ASSERT((fbits >= 0) && (fbits <= rd.SizeInBits())); + if (fbits == 0) { + Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd)); + } else { + Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) | + Rd(rd)); + } +} + + +void Assembler::fcvtzu(const VRegister& vd, + const VRegister& vn, + int fbits) { + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + NEONFP2RegMisc(vd, vn, NEON_FCVTZU); + } else { + VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S()); + NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm); + } +} + +void Assembler::ucvtf(const VRegister& vd, + const VRegister& vn, + int fbits) { + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + NEONFP2RegMisc(vd, vn, NEON_UCVTF); + } else { + VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S()); + NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm); + } +} + +void Assembler::scvtf(const VRegister& vd, + const VRegister& vn, + int fbits) { + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + NEONFP2RegMisc(vd, vn, NEON_SCVTF); + } else { + VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S()); + NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm); + } +} + + +void Assembler::scvtf(const VRegister& vd, + const Register& rn, + int fbits) { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd)); + } else { + Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) | + Rd(vd)); + } +} + + +void Assembler::ucvtf(const VRegister& vd, + const Register& rn, + int fbits) { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + Emit(SF(rn) | FPType(vd) | UCVTF | Rn(rn) | Rd(vd)); + } else { + Emit(SF(rn) | FPType(vd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) | + Rd(vd)); + } +} + + +void Assembler::NEON3Same(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3SameOp vop) { + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.IsVector() || !vd.IsQ()); + + Instr format, op = vop; + if (vd.IsScalar()) { + op |= NEON_Q | NEONScalar; + format = SFormat(vd); + } else { + format = VFormat(vd); + } + + Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONFP3Same(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Instr op) { + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +#define NEON_FP2REGMISC_LIST(V) \ + V(fabs, NEON_FABS, FABS) \ + V(fneg, NEON_FNEG, FNEG) \ + V(fsqrt, NEON_FSQRT, FSQRT) \ + V(frintn, NEON_FRINTN, FRINTN) \ + V(frinta, NEON_FRINTA, FRINTA) \ + V(frintp, NEON_FRINTP, FRINTP) \ + V(frintm, NEON_FRINTM, FRINTM) \ + V(frintx, NEON_FRINTX, FRINTX) \ + V(frintz, NEON_FRINTZ, FRINTZ) \ + V(frinti, NEON_FRINTI, FRINTI) \ + V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \ + V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar ) + + +#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn) { \ + Instr op; \ + if (vd.IsScalar()) { \ + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \ + op = SCA_OP; \ + } else { \ + VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \ + op = VEC_OP; \ + } \ + NEONFP2RegMisc(vd, vn, op); \ +} +NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + + +void Assembler::NEONFP2RegMisc(const VRegister& vd, + const VRegister& vn, + Instr op) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEON2RegMisc(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop, + int value) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(value == 0); + USE(value); + + Instr format, op = vop; + if (vd.IsScalar()) { + op |= NEON_Q | NEONScalar; + format = SFormat(vd); + } else { + format = VFormat(vd); + } + + Emit(format | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::cmeq(const VRegister& vd, + const VRegister& vn, + int value) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value); +} + + +void Assembler::cmge(const VRegister& vd, + const VRegister& vn, + int value) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMGE_zero, value); +} + + +void Assembler::cmgt(const VRegister& vd, + const VRegister& vn, + int value) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMGT_zero, value); +} + + +void Assembler::cmle(const VRegister& vd, + const VRegister& vn, + int value) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMLE_zero, value); +} + + +void Assembler::cmlt(const VRegister& vd, + const VRegister& vn, + int value) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMLT_zero, value); +} + + +void Assembler::shll(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT((vd.Is8H() && vn.Is8B() && shift == 8) || + (vd.Is4S() && vn.Is4H() && shift == 16) || + (vd.Is2D() && vn.Is2S() && shift == 32)); + USE(shift); + Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd)); +} + + +void Assembler::shll2(const VRegister& vd, + const VRegister& vn, + int shift) { + USE(shift); + VIXL_ASSERT((vd.Is8H() && vn.Is16B() && shift == 8) || + (vd.Is4S() && vn.Is8H() && shift == 16) || + (vd.Is2D() && vn.Is4S() && shift == 32)); + Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONFP2RegMisc(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop, + double value) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(value == 0.0); + USE(value); + + Instr op = vop; + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + op |= NEON_Q | NEONScalar; + } else { + VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); + } + + Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcmeq(const VRegister& vd, + const VRegister& vn, + double value) { + NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value); +} + + +void Assembler::fcmge(const VRegister& vd, + const VRegister& vn, + double value) { + NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value); +} + + +void Assembler::fcmgt(const VRegister& vd, + const VRegister& vn, + double value) { + NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value); +} + + +void Assembler::fcmle(const VRegister& vd, + const VRegister& vn, + double value) { + NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value); +} + + +void Assembler::fcmlt(const VRegister& vd, + const VRegister& vn, + double value) { + NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value); +} + + +void Assembler::frecpx(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd)); +} + + +#define NEON_3SAME_LIST(V) \ + V(add, NEON_ADD, vd.IsVector() || vd.Is1D()) \ + V(addp, NEON_ADDP, vd.IsVector() || vd.Is1D()) \ + V(sub, NEON_SUB, vd.IsVector() || vd.Is1D()) \ + V(cmeq, NEON_CMEQ, vd.IsVector() || vd.Is1D()) \ + V(cmge, NEON_CMGE, vd.IsVector() || vd.Is1D()) \ + V(cmgt, NEON_CMGT, vd.IsVector() || vd.Is1D()) \ + V(cmhi, NEON_CMHI, vd.IsVector() || vd.Is1D()) \ + V(cmhs, NEON_CMHS, vd.IsVector() || vd.Is1D()) \ + V(cmtst, NEON_CMTST, vd.IsVector() || vd.Is1D()) \ + V(sshl, NEON_SSHL, vd.IsVector() || vd.Is1D()) \ + V(ushl, NEON_USHL, vd.IsVector() || vd.Is1D()) \ + V(srshl, NEON_SRSHL, vd.IsVector() || vd.Is1D()) \ + V(urshl, NEON_URSHL, vd.IsVector() || vd.Is1D()) \ + V(sqdmulh, NEON_SQDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \ + V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \ + V(shadd, NEON_SHADD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uhadd, NEON_UHADD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(srhadd, NEON_SRHADD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(urhadd, NEON_URHADD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(shsub, NEON_SHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uhsub, NEON_UHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(smax, NEON_SMAX, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(smaxp, NEON_SMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(smin, NEON_SMIN, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(sminp, NEON_SMINP, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(umax, NEON_UMAX, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(umaxp, NEON_UMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(umin, NEON_UMIN, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uminp, NEON_UMINP, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(saba, NEON_SABA, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(sabd, NEON_SABD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uaba, NEON_UABA, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uabd, NEON_UABD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(mla, NEON_MLA, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(mls, NEON_MLS, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(mul, NEON_MUL, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(and_, NEON_AND, vd.Is8B() || vd.Is16B()) \ + V(orr, NEON_ORR, vd.Is8B() || vd.Is16B()) \ + V(orn, NEON_ORN, vd.Is8B() || vd.Is16B()) \ + V(eor, NEON_EOR, vd.Is8B() || vd.Is16B()) \ + V(bic, NEON_BIC, vd.Is8B() || vd.Is16B()) \ + V(bit, NEON_BIT, vd.Is8B() || vd.Is16B()) \ + V(bif, NEON_BIF, vd.Is8B() || vd.Is16B()) \ + V(bsl, NEON_BSL, vd.Is8B() || vd.Is16B()) \ + V(pmul, NEON_PMUL, vd.Is8B() || vd.Is16B()) \ + V(uqadd, NEON_UQADD, true) \ + V(sqadd, NEON_SQADD, true) \ + V(uqsub, NEON_UQSUB, true) \ + V(sqsub, NEON_SQSUB, true) \ + V(sqshl, NEON_SQSHL, true) \ + V(uqshl, NEON_UQSHL, true) \ + V(sqrshl, NEON_SQRSHL, true) \ + V(uqrshl, NEON_UQRSHL, true) + +#define DEFINE_ASM_FUNC(FN, OP, AS) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + VIXL_ASSERT(AS); \ + NEON3Same(vd, vn, vm, OP); \ +} +NEON_3SAME_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + + +#define NEON_FP3SAME_OP_LIST(V) \ + V(fadd, NEON_FADD, FADD) \ + V(fsub, NEON_FSUB, FSUB) \ + V(fmul, NEON_FMUL, FMUL) \ + V(fdiv, NEON_FDIV, FDIV) \ + V(fmax, NEON_FMAX, FMAX) \ + V(fmaxnm, NEON_FMAXNM, FMAXNM) \ + V(fmin, NEON_FMIN, FMIN) \ + V(fminnm, NEON_FMINNM, FMINNM) \ + V(fmulx, NEON_FMULX, NEON_FMULX_scalar) \ + V(frecps, NEON_FRECPS, NEON_FRECPS_scalar) \ + V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar) \ + V(fabd, NEON_FABD, NEON_FABD_scalar) \ + V(fmla, NEON_FMLA, 0) \ + V(fmls, NEON_FMLS, 0) \ + V(facge, NEON_FACGE, NEON_FACGE_scalar) \ + V(facgt, NEON_FACGT, NEON_FACGT_scalar) \ + V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar) \ + V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar) \ + V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar) \ + V(faddp, NEON_FADDP, 0) \ + V(fmaxp, NEON_FMAXP, 0) \ + V(fminp, NEON_FMINP, 0) \ + V(fmaxnmp, NEON_FMAXNMP, 0) \ + V(fminnmp, NEON_FMINNMP, 0) + +#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + Instr op; \ + if ((SCA_OP != 0) && vd.IsScalar()) { \ + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \ + op = SCA_OP; \ + } else { \ + VIXL_ASSERT(vd.IsVector()); \ + VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \ + op = VEC_OP; \ + } \ + NEONFP3Same(vd, vn, vm, op); \ +} +NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + + +void Assembler::addp(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vd.Is1D() && vn.Is2D())); + Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd)); +} + + +void Assembler::faddp(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || + (vd.Is1D() && vn.Is2D())); + Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd)); +} + + +void Assembler::fmaxp(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || + (vd.Is1D() && vn.Is2D())); + Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd)); +} + + +void Assembler::fminp(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || + (vd.Is1D() && vn.Is2D())); + Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd)); +} + + +void Assembler::fmaxnmp(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || + (vd.Is1D() && vn.Is2D())); + Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd)); +} + + +void Assembler::fminnmp(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || + (vd.Is1D() && vn.Is2D())); + Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd)); +} + + +void Assembler::orr(const VRegister& vd, + const int imm8, + const int left_shift) { + NEONModifiedImmShiftLsl(vd, imm8, left_shift, + NEONModifiedImmediate_ORR); +} + + +void Assembler::mov(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + if (vd.IsD()) { + orr(vd.V8B(), vn.V8B(), vn.V8B()); + } else { + VIXL_ASSERT(vd.IsQ()); + orr(vd.V16B(), vn.V16B(), vn.V16B()); + } +} + + +void Assembler::bic(const VRegister& vd, + const int imm8, + const int left_shift) { + NEONModifiedImmShiftLsl(vd, imm8, left_shift, + NEONModifiedImmediate_BIC); +} + + +void Assembler::movi(const VRegister& vd, + const uint64_t imm, + Shift shift, + const int shift_amount) { + VIXL_ASSERT((shift == LSL) || (shift == MSL)); + if (vd.Is2D() || vd.Is1D()) { + VIXL_ASSERT(shift_amount == 0); + int imm8 = 0; + for (int i = 0; i < 8; ++i) { + int byte = (imm >> (i * 8)) & 0xff; + VIXL_ASSERT((byte == 0) || (byte == 0xff)); + if (byte == 0xff) { + imm8 |= (1 << i); + } + } + int q = vd.Is2D() ? NEON_Q : 0; + Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI | + ImmNEONabcdefgh(imm8) | NEONCmode(0xe) | Rd(vd)); + } else if (shift == LSL) { + VIXL_ASSERT(IsUint8(imm)); + NEONModifiedImmShiftLsl(vd, static_cast<int>(imm), shift_amount, + NEONModifiedImmediate_MOVI); + } else { + VIXL_ASSERT(IsUint8(imm)); + NEONModifiedImmShiftMsl(vd, static_cast<int>(imm), shift_amount, + NEONModifiedImmediate_MOVI); + } +} + + +void Assembler::mvn(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + if (vd.IsD()) { + not_(vd.V8B(), vn.V8B()); + } else { + VIXL_ASSERT(vd.IsQ()); + not_(vd.V16B(), vn.V16B()); + } +} + + +void Assembler::mvni(const VRegister& vd, + const int imm8, + Shift shift, + const int shift_amount) { + VIXL_ASSERT((shift == LSL) || (shift == MSL)); + if (shift == LSL) { + NEONModifiedImmShiftLsl(vd, imm8, shift_amount, + NEONModifiedImmediate_MVNI); + } else { + NEONModifiedImmShiftMsl(vd, imm8, shift_amount, + NEONModifiedImmediate_MVNI); + } +} + + +void Assembler::NEONFPByElement(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp vop) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT((vd.Is2S() && vm.Is1S()) || + (vd.Is4S() && vm.Is1S()) || + (vd.Is1S() && vm.Is1S()) || + (vd.Is2D() && vm.Is1D()) || + (vd.Is1D() && vm.Is1D())); + VIXL_ASSERT((vm.Is1S() && (vm_index < 4)) || + (vm.Is1D() && (vm_index < 2))); + + Instr op = vop; + int index_num_bits = vm.Is1S() ? 2 : 1; + if (vd.IsScalar()) { + op |= NEON_Q | NEONScalar; + } + + Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) | + Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONByElement(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp vop) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT((vd.Is4H() && vm.Is1H()) || + (vd.Is8H() && vm.Is1H()) || + (vd.Is1H() && vm.Is1H()) || + (vd.Is2S() && vm.Is1S()) || + (vd.Is4S() && vm.Is1S()) || + (vd.Is1S() && vm.Is1S())); + VIXL_ASSERT((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) || + (vm.Is1S() && (vm_index < 4))); + + Instr format, op = vop; + int index_num_bits = vm.Is1H() ? 3 : 2; + if (vd.IsScalar()) { + op |= NEONScalar | NEON_Q; + format = SFormat(vn); + } else { + format = VFormat(vn); + } + Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | + Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONByElementL(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp vop) { + VIXL_ASSERT((vd.Is4S() && vn.Is4H() && vm.Is1H()) || + (vd.Is4S() && vn.Is8H() && vm.Is1H()) || + (vd.Is1S() && vn.Is1H() && vm.Is1H()) || + (vd.Is2D() && vn.Is2S() && vm.Is1S()) || + (vd.Is2D() && vn.Is4S() && vm.Is1S()) || + (vd.Is1D() && vn.Is1S() && vm.Is1S())); + + VIXL_ASSERT((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) || + (vm.Is1S() && (vm_index < 4))); + + Instr format, op = vop; + int index_num_bits = vm.Is1H() ? 3 : 2; + if (vd.IsScalar()) { + op |= NEONScalar | NEON_Q; + format = SFormat(vn); + } else { + format = VFormat(vn); + } + Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | + Rm(vm) | Rn(vn) | Rd(vd)); +} + + +#define NEON_BYELEMENT_LIST(V) \ + V(mul, NEON_MUL_byelement, vn.IsVector()) \ + V(mla, NEON_MLA_byelement, vn.IsVector()) \ + V(mls, NEON_MLS_byelement, vn.IsVector()) \ + V(sqdmulh, NEON_SQDMULH_byelement, true) \ + V(sqrdmulh, NEON_SQRDMULH_byelement, true) + + +#define DEFINE_ASM_FUNC(FN, OP, AS) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + VIXL_ASSERT(AS); \ + NEONByElement(vd, vn, vm, vm_index, OP); \ +} +NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + + +#define NEON_FPBYELEMENT_LIST(V) \ + V(fmul, NEON_FMUL_byelement) \ + V(fmla, NEON_FMLA_byelement) \ + V(fmls, NEON_FMLS_byelement) \ + V(fmulx, NEON_FMULX_byelement) + + +#define DEFINE_ASM_FUNC(FN, OP) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + NEONFPByElement(vd, vn, vm, vm_index, OP); \ +} +NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + + +#define NEON_BYELEMENT_LONG_LIST(V) \ + V(sqdmull, NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD()) \ + V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ()) \ + V(sqdmlal, NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD()) \ + V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ()) \ + V(sqdmlsl, NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD()) \ + V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ()) \ + V(smull, NEON_SMULL_byelement, vn.IsVector() && vn.IsD()) \ + V(smull2, NEON_SMULL_byelement, vn.IsVector() && vn.IsQ()) \ + V(umull, NEON_UMULL_byelement, vn.IsVector() && vn.IsD()) \ + V(umull2, NEON_UMULL_byelement, vn.IsVector() && vn.IsQ()) \ + V(smlal, NEON_SMLAL_byelement, vn.IsVector() && vn.IsD()) \ + V(smlal2, NEON_SMLAL_byelement, vn.IsVector() && vn.IsQ()) \ + V(umlal, NEON_UMLAL_byelement, vn.IsVector() && vn.IsD()) \ + V(umlal2, NEON_UMLAL_byelement, vn.IsVector() && vn.IsQ()) \ + V(smlsl, NEON_SMLSL_byelement, vn.IsVector() && vn.IsD()) \ + V(smlsl2, NEON_SMLSL_byelement, vn.IsVector() && vn.IsQ()) \ + V(umlsl, NEON_UMLSL_byelement, vn.IsVector() && vn.IsD()) \ + V(umlsl2, NEON_UMLSL_byelement, vn.IsVector() && vn.IsQ()) + + +#define DEFINE_ASM_FUNC(FN, OP, AS) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + VIXL_ASSERT(AS); \ + NEONByElementL(vd, vn, vm, vm_index, OP); \ +} +NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + + +void Assembler::suqadd(const VRegister& vd, + const VRegister& vn) { + NEON2RegMisc(vd, vn, NEON_SUQADD); +} + + +void Assembler::usqadd(const VRegister& vd, + const VRegister& vn) { + NEON2RegMisc(vd, vn, NEON_USQADD); +} + + +void Assembler::abs(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_ABS); +} + + +void Assembler::sqabs(const VRegister& vd, + const VRegister& vn) { + NEON2RegMisc(vd, vn, NEON_SQABS); +} + + +void Assembler::neg(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_NEG); +} + + +void Assembler::sqneg(const VRegister& vd, + const VRegister& vn) { + NEON2RegMisc(vd, vn, NEON_SQNEG); +} + + +void Assembler::NEONXtn(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop) { + Instr format, op = vop; + if (vd.IsScalar()) { + VIXL_ASSERT((vd.Is1B() && vn.Is1H()) || + (vd.Is1H() && vn.Is1S()) || + (vd.Is1S() && vn.Is1D())); + op |= NEON_Q | NEONScalar; + format = SFormat(vd); + } else { + VIXL_ASSERT((vd.Is8B() && vn.Is8H()) || + (vd.Is4H() && vn.Is4S()) || + (vd.Is2S() && vn.Is2D()) || + (vd.Is16B() && vn.Is8H()) || + (vd.Is8H() && vn.Is4S()) || + (vd.Is4S() && vn.Is2D())); + format = VFormat(vd); + } + Emit(format | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::xtn(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsVector() && vd.IsD()); + NEONXtn(vd, vn, NEON_XTN); +} + + +void Assembler::xtn2(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsVector() && vd.IsQ()); + NEONXtn(vd, vn, NEON_XTN); +} + + +void Assembler::sqxtn(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsScalar() || vd.IsD()); + NEONXtn(vd, vn, NEON_SQXTN); +} + + +void Assembler::sqxtn2(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsVector() && vd.IsQ()); + NEONXtn(vd, vn, NEON_SQXTN); +} + + +void Assembler::sqxtun(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsScalar() || vd.IsD()); + NEONXtn(vd, vn, NEON_SQXTUN); +} + + +void Assembler::sqxtun2(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsVector() && vd.IsQ()); + NEONXtn(vd, vn, NEON_SQXTUN); +} + + +void Assembler::uqxtn(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsScalar() || vd.IsD()); + NEONXtn(vd, vn, NEON_UQXTN); +} + + +void Assembler::uqxtn2(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(vd.IsVector() && vd.IsQ()); + NEONXtn(vd, vn, NEON_UQXTN); +} + + +// NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size". +void Assembler::not_(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd)); +} + + +void Assembler::rbit(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd)); +} + + +void Assembler::ext(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int index) { + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + VIXL_ASSERT((0 <= index) && (index < vd.lanes())); + Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd)); +} + + +void Assembler::dup(const VRegister& vd, + const VRegister& vn, + int vn_index) { + Instr q, scalar; + + // We support vn arguments of the form vn.VxT() or vn.T(), where x is the + // number of lanes, and T is b, h, s or d. + int lane_size = vn.LaneSizeInBytes(); + NEONFormatField format; + switch (lane_size) { + case 1: format = NEON_16B; break; + case 2: format = NEON_8H; break; + case 4: format = NEON_4S; break; + default: + VIXL_ASSERT(lane_size == 8); + format = NEON_2D; + break; + } + + if (vd.IsScalar()) { + q = NEON_Q; + scalar = NEONScalar; + } else { + VIXL_ASSERT(!vd.Is1D()); + q = vd.IsD() ? 0 : NEON_Q; + scalar = 0; + } + Emit(q | scalar | NEON_DUP_ELEMENT | + ImmNEON5(format, vn_index) | Rn(vn) | Rd(vd)); +} + + +void Assembler::mov(const VRegister& vd, + const VRegister& vn, + int vn_index) { + VIXL_ASSERT(vn.IsScalar()); + dup(vd, vn, vn_index); +} + + +void Assembler::dup(const VRegister& vd, const Register& rn) { + VIXL_ASSERT(!vd.Is1D()); + VIXL_ASSERT(vd.Is2D() == rn.IsX()); + int q = vd.IsD() ? 0 : NEON_Q; + Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd)); +} + + +void Assembler::ins(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + // We support vd arguments of the form vd.VxT() or vd.T(), where x is the + // number of lanes, and T is b, h, s or d. + int lane_size = vd.LaneSizeInBytes(); + NEONFormatField format; + switch (lane_size) { + case 1: format = NEON_16B; break; + case 2: format = NEON_8H; break; + case 4: format = NEON_4S; break; + default: + VIXL_ASSERT(lane_size == 8); + format = NEON_2D; + break; + } + + VIXL_ASSERT((0 <= vd_index) && + (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format)))); + VIXL_ASSERT((0 <= vn_index) && + (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format)))); + Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) | + ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd)); +} + + +void Assembler::mov(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index) { + ins(vd, vd_index, vn, vn_index); +} + + +void Assembler::ins(const VRegister& vd, + int vd_index, + const Register& rn) { + // We support vd arguments of the form vd.VxT() or vd.T(), where x is the + // number of lanes, and T is b, h, s or d. + int lane_size = vd.LaneSizeInBytes(); + NEONFormatField format; + switch (lane_size) { + case 1: format = NEON_16B; VIXL_ASSERT(rn.IsW()); break; + case 2: format = NEON_8H; VIXL_ASSERT(rn.IsW()); break; + case 4: format = NEON_4S; VIXL_ASSERT(rn.IsW()); break; + default: + VIXL_ASSERT(lane_size == 8); + VIXL_ASSERT(rn.IsX()); + format = NEON_2D; + break; + } + + VIXL_ASSERT((0 <= vd_index) && + (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format)))); + Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd)); +} + + +void Assembler::mov(const VRegister& vd, + int vd_index, + const Register& rn) { + ins(vd, vd_index, rn); +} + + +void Assembler::umov(const Register& rd, + const VRegister& vn, + int vn_index) { + // We support vd arguments of the form vd.VxT() or vd.T(), where x is the + // number of lanes, and T is b, h, s or d. + int lane_size = vn.LaneSizeInBytes(); + NEONFormatField format; + Instr q = 0; + switch (lane_size) { + case 1: format = NEON_16B; VIXL_ASSERT(rd.IsW()); break; + case 2: format = NEON_8H; VIXL_ASSERT(rd.IsW()); break; + case 4: format = NEON_4S; VIXL_ASSERT(rd.IsW()); break; + default: + VIXL_ASSERT(lane_size == 8); + VIXL_ASSERT(rd.IsX()); + format = NEON_2D; + q = NEON_Q; + break; + } + + VIXL_ASSERT((0 <= vn_index) && + (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format)))); + Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd)); +} + + +void Assembler::mov(const Register& rd, + const VRegister& vn, + int vn_index) { + VIXL_ASSERT(vn.SizeInBytes() >= 4); + umov(rd, vn, vn_index); +} + + +void Assembler::smov(const Register& rd, + const VRegister& vn, + int vn_index) { + // We support vd arguments of the form vd.VxT() or vd.T(), where x is the + // number of lanes, and T is b, h, s. + int lane_size = vn.LaneSizeInBytes(); + NEONFormatField format; + Instr q = 0; + VIXL_ASSERT(lane_size != 8); + switch (lane_size) { + case 1: format = NEON_16B; break; + case 2: format = NEON_8H; break; + default: + VIXL_ASSERT(lane_size == 4); + VIXL_ASSERT(rd.IsX()); + format = NEON_4S; + break; + } + q = rd.IsW() ? 0 : NEON_Q; + VIXL_ASSERT((0 <= vn_index) && + (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format)))); + Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd)); +} + + +void Assembler::cls(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(!vd.Is1D() && !vd.Is2D()); + Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd)); +} + + +void Assembler::clz(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(!vd.Is1D() && !vd.Is2D()); + Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd)); +} + + +void Assembler::cnt(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd)); +} + + +void Assembler::rev16(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd)); +} + + +void Assembler::rev32(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H()); + Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd)); +} + + +void Assembler::rev64(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(!vd.Is1D() && !vd.Is2D()); + Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd)); +} + + +void Assembler::ursqrte(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is2S() || vd.Is4S()); + Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd)); +} + + +void Assembler::urecpe(const VRegister& vd, + const VRegister& vn) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is2S() || vd.Is4S()); + Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONAddlp(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp op) { + VIXL_ASSERT((op == NEON_SADDLP) || + (op == NEON_UADDLP) || + (op == NEON_SADALP) || + (op == NEON_UADALP)); + + VIXL_ASSERT((vn.Is8B() && vd.Is4H()) || + (vn.Is4H() && vd.Is2S()) || + (vn.Is2S() && vd.Is1D()) || + (vn.Is16B() && vd.Is8H())|| + (vn.Is8H() && vd.Is4S()) || + (vn.Is4S() && vd.Is2D())); + Emit(VFormat(vn) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::saddlp(const VRegister& vd, + const VRegister& vn) { + NEONAddlp(vd, vn, NEON_SADDLP); +} + + +void Assembler::uaddlp(const VRegister& vd, + const VRegister& vn) { + NEONAddlp(vd, vn, NEON_UADDLP); +} + + +void Assembler::sadalp(const VRegister& vd, + const VRegister& vn) { + NEONAddlp(vd, vn, NEON_SADALP); +} + + +void Assembler::uadalp(const VRegister& vd, + const VRegister& vn) { + NEONAddlp(vd, vn, NEON_UADALP); +} + + +void Assembler::NEONAcrossLanesL(const VRegister& vd, + const VRegister& vn, + NEONAcrossLanesOp op) { + VIXL_ASSERT((vn.Is8B() && vd.Is1H()) || + (vn.Is16B() && vd.Is1H()) || + (vn.Is4H() && vd.Is1S()) || + (vn.Is8H() && vd.Is1S()) || + (vn.Is4S() && vd.Is1D())); + Emit(VFormat(vn) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::saddlv(const VRegister& vd, + const VRegister& vn) { + NEONAcrossLanesL(vd, vn, NEON_SADDLV); +} + + +void Assembler::uaddlv(const VRegister& vd, + const VRegister& vn) { + NEONAcrossLanesL(vd, vn, NEON_UADDLV); +} + + +void Assembler::NEONAcrossLanes(const VRegister& vd, + const VRegister& vn, + NEONAcrossLanesOp op) { + VIXL_ASSERT((vn.Is8B() && vd.Is1B()) || + (vn.Is16B() && vd.Is1B()) || + (vn.Is4H() && vd.Is1H()) || + (vn.Is8H() && vd.Is1H()) || + (vn.Is4S() && vd.Is1S())); + if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) { + Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd)); + } else { + Emit(VFormat(vn) | op | Rn(vn) | Rd(vd)); + } +} + + +#define NEON_ACROSSLANES_LIST(V) \ + V(fmaxv, NEON_FMAXV, vd.Is1S()) \ + V(fminv, NEON_FMINV, vd.Is1S()) \ + V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \ + V(fminnmv, NEON_FMINNMV, vd.Is1S()) \ + V(addv, NEON_ADDV, true) \ + V(smaxv, NEON_SMAXV, true) \ + V(sminv, NEON_SMINV, true) \ + V(umaxv, NEON_UMAXV, true) \ + V(uminv, NEON_UMINV, true) + + +#define DEFINE_ASM_FUNC(FN, OP, AS) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn) { \ + VIXL_ASSERT(AS); \ + NEONAcrossLanes(vd, vn, OP); \ +} +NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC + + +void Assembler::NEONPerm(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEONPermOp op) { + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(!vd.Is1D()); + Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::trn1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + NEONPerm(vd, vn, vm, NEON_TRN1); +} + + +void Assembler::trn2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + NEONPerm(vd, vn, vm, NEON_TRN2); +} + + +void Assembler::uzp1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + NEONPerm(vd, vn, vm, NEON_UZP1); +} + + +void Assembler::uzp2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + NEONPerm(vd, vn, vm, NEON_UZP2); +} + + +void Assembler::zip1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + NEONPerm(vd, vn, vm, NEON_ZIP1); +} + + +void Assembler::zip2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + NEONPerm(vd, vn, vm, NEON_ZIP2); +} + + +void Assembler::NEONShiftImmediate(const VRegister& vd, + const VRegister& vn, + NEONShiftImmediateOp op, + int immh_immb) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + Instr q, scalar; + if (vn.IsScalar()) { + q = NEON_Q; + scalar = NEONScalar; + } else { + q = vd.IsD() ? 0 : NEON_Q; + scalar = 0; + } + Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONShiftLeftImmediate(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op) { + int laneSizeInBits = vn.LaneSizeInBits(); + VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits)); + NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16); +} + + +void Assembler::NEONShiftRightImmediate(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op) { + int laneSizeInBits = vn.LaneSizeInBits(); + VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits)); + NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16); +} + + +void Assembler::NEONShiftImmediateL(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op) { + int laneSizeInBits = vn.LaneSizeInBits(); + VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits)); + int immh_immb = (laneSizeInBits + shift) << 16; + + VIXL_ASSERT((vn.Is8B() && vd.Is8H()) || + (vn.Is4H() && vd.Is4S()) || + (vn.Is2S() && vd.Is2D()) || + (vn.Is16B() && vd.Is8H())|| + (vn.Is8H() && vd.Is4S()) || + (vn.Is4S() && vd.Is2D())); + Instr q; + q = vn.IsD() ? 0 : NEON_Q; + Emit(q | op | immh_immb | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONShiftImmediateN(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op) { + Instr q, scalar; + int laneSizeInBits = vd.LaneSizeInBits(); + VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits)); + int immh_immb = (2 * laneSizeInBits - shift) << 16; + + if (vn.IsScalar()) { + VIXL_ASSERT((vd.Is1B() && vn.Is1H()) || + (vd.Is1H() && vn.Is1S()) || + (vd.Is1S() && vn.Is1D())); + q = NEON_Q; + scalar = NEONScalar; + } else { + VIXL_ASSERT((vd.Is8B() && vn.Is8H()) || + (vd.Is4H() && vn.Is4S()) || + (vd.Is2S() && vn.Is2D()) || + (vd.Is16B() && vn.Is8H())|| + (vd.Is8H() && vn.Is4S()) || + (vd.Is4S() && vn.Is2D())); + scalar = 0; + q = vd.IsD() ? 0 : NEON_Q; + } + Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd)); +} + + +void Assembler::shl(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL); +} + + +void Assembler::sli(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI); +} + + +void Assembler::sqshl(const VRegister& vd, + const VRegister& vn, + int shift) { + NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm); +} + + +void Assembler::sqshlu(const VRegister& vd, + const VRegister& vn, + int shift) { + NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU); +} + + +void Assembler::uqshl(const VRegister& vd, + const VRegister& vn, + int shift) { + NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm); +} + + +void Assembler::sshll(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsD()); + NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL); +} + + +void Assembler::sshll2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsQ()); + NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL); +} + + +void Assembler::sxtl(const VRegister& vd, + const VRegister& vn) { + sshll(vd, vn, 0); +} + + +void Assembler::sxtl2(const VRegister& vd, + const VRegister& vn) { + sshll2(vd, vn, 0); +} + + +void Assembler::ushll(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsD()); + NEONShiftImmediateL(vd, vn, shift, NEON_USHLL); +} + + +void Assembler::ushll2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsQ()); + NEONShiftImmediateL(vd, vn, shift, NEON_USHLL); +} + + +void Assembler::uxtl(const VRegister& vd, + const VRegister& vn) { + ushll(vd, vn, 0); +} + + +void Assembler::uxtl2(const VRegister& vd, + const VRegister& vn) { + ushll2(vd, vn, 0); +} + + +void Assembler::sri(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SRI); +} + + +void Assembler::sshr(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR); +} + + +void Assembler::ushr(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_USHR); +} + + +void Assembler::srshr(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR); +} + + +void Assembler::urshr(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR); +} + + +void Assembler::ssra(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA); +} + + +void Assembler::usra(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_USRA); +} + + +void Assembler::srsra(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA); +} + + +void Assembler::ursra(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA); +} + + +void Assembler::shrn(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsD()); + NEONShiftImmediateN(vd, vn, shift, NEON_SHRN); +} + + +void Assembler::shrn2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SHRN); +} + + +void Assembler::rshrn(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsD()); + NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN); +} + + +void Assembler::rshrn2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN); +} + + +void Assembler::sqshrn(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN); +} + + +void Assembler::sqshrn2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN); +} + + +void Assembler::sqrshrn(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN); +} + + +void Assembler::sqrshrn2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN); +} + + +void Assembler::sqshrun(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN); +} + + +void Assembler::sqshrun2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN); +} + + +void Assembler::sqrshrun(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN); +} + + +void Assembler::sqrshrun2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN); +} + + +void Assembler::uqshrn(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN); +} + + +void Assembler::uqshrn2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN); +} + + +void Assembler::uqrshrn(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN); +} + + +void Assembler::uqrshrn2(const VRegister& vd, + const VRegister& vn, + int shift) { + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN); +} + + +// Note: +// Below, a difference in case for the same letter indicates a +// negated bit. +// If b is 1, then B is 0. +uint32_t Assembler::FP32ToImm8(float imm) { + VIXL_ASSERT(IsImmFP32(imm)); + // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 + uint32_t bits = FloatToRawbits(imm); + // bit7: a000.0000 + uint32_t bit7 = ((bits >> 31) & 0x1) << 7; + // bit6: 0b00.0000 + uint32_t bit6 = ((bits >> 29) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint32_t bit5_to_0 = (bits >> 19) & 0x3f; + + return bit7 | bit6 | bit5_to_0; +} + + +Instr Assembler::ImmFP32(float imm) { + return FP32ToImm8(imm) << ImmFP_offset; +} + + +uint32_t Assembler::FP64ToImm8(double imm) { + VIXL_ASSERT(IsImmFP64(imm)); + // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 + uint64_t bits = DoubleToRawbits(imm); + // bit7: a000.0000 + uint64_t bit7 = ((bits >> 63) & 0x1) << 7; + // bit6: 0b00.0000 + uint64_t bit6 = ((bits >> 61) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint64_t bit5_to_0 = (bits >> 48) & 0x3f; + + return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); +} + + +Instr Assembler::ImmFP64(double imm) { + return FP64ToImm8(imm) << ImmFP_offset; +} + + +// Code generation helpers. +void Assembler::MoveWide(const Register& rd, + uint64_t imm, + int shift, + MoveWideImmediateOp mov_op) { + // Ignore the top 32 bits of an immediate if we're moving to a W register. + if (rd.Is32Bits()) { + // Check that the top 32 bits are zero (a positive 32-bit number) or top + // 33 bits are one (a negative 32-bit number, sign extended to 64 bits). + VIXL_ASSERT(((imm >> kWRegSize) == 0) || + ((imm >> (kWRegSize - 1)) == 0x1ffffffff)); + imm &= kWRegMask; + } + + if (shift >= 0) { + // Explicit shift specified. + VIXL_ASSERT((shift == 0) || (shift == 16) || + (shift == 32) || (shift == 48)); + VIXL_ASSERT(rd.Is64Bits() || (shift == 0) || (shift == 16)); + shift /= 16; + } else { + // Calculate a new immediate and shift combination to encode the immediate + // argument. + shift = 0; + if ((imm & 0xffffffffffff0000) == 0) { + // Nothing to do. + } else if ((imm & 0xffffffff0000ffff) == 0) { + imm >>= 16; + shift = 1; + } else if ((imm & 0xffff0000ffffffff) == 0) { + VIXL_ASSERT(rd.Is64Bits()); + imm >>= 32; + shift = 2; + } else if ((imm & 0x0000ffffffffffff) == 0) { + VIXL_ASSERT(rd.Is64Bits()); + imm >>= 48; + shift = 3; + } + } + + VIXL_ASSERT(IsUint16(imm)); + + Emit(SF(rd) | MoveWideImmediateFixed | mov_op | + Rd(rd) | ImmMoveWide(imm) | ShiftMoveWide(shift)); +} + + +void Assembler::AddSub(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubOp op) { + VIXL_ASSERT(rd.size() == rn.size()); + if (operand.IsImmediate()) { + int64_t immediate = operand.immediate(); + VIXL_ASSERT(IsImmAddSub(immediate)); + Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd); + Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) | + ImmAddSub(static_cast<int>(immediate)) | dest_reg | RnSP(rn)); + } else if (operand.IsShiftedRegister()) { + VIXL_ASSERT(operand.reg().size() == rd.size()); + VIXL_ASSERT(operand.shift() != ROR); + + // For instructions of the form: + // add/sub wsp, <Wn>, <Wm> [, LSL #0-3 ] + // add/sub <Wd>, wsp, <Wm> [, LSL #0-3 ] + // add/sub wsp, wsp, <Wm> [, LSL #0-3 ] + // adds/subs <Wd>, wsp, <Wm> [, LSL #0-3 ] + // or their 64-bit register equivalents, convert the operand from shifted to + // extended register mode, and emit an add/sub extended instruction. + if (rn.IsSP() || rd.IsSP()) { + VIXL_ASSERT(!(rd.IsSP() && (S == SetFlags))); + DataProcExtendedRegister(rd, rn, operand.ToExtendedRegister(), S, + AddSubExtendedFixed | op); + } else { + DataProcShiftedRegister(rd, rn, operand, S, AddSubShiftedFixed | op); + } + } else { + VIXL_ASSERT(operand.IsExtendedRegister()); + DataProcExtendedRegister(rd, rn, operand, S, AddSubExtendedFixed | op); + } +} + + +void Assembler::AddSubWithCarry(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubWithCarryOp op) { + VIXL_ASSERT(rd.size() == rn.size()); + VIXL_ASSERT(rd.size() == operand.reg().size()); + VIXL_ASSERT(operand.IsShiftedRegister() && (operand.shift_amount() == 0)); + Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::hlt(int code) { + VIXL_ASSERT(IsUint16(code)); + Emit(HLT | ImmException(code)); +} + + +void Assembler::brk(int code) { + VIXL_ASSERT(IsUint16(code)); + Emit(BRK | ImmException(code)); +} + + +void Assembler::svc(int code) { + Emit(SVC | ImmException(code)); +} + + +void Assembler::ConditionalCompare(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond, + ConditionalCompareOp op) { + Instr ccmpop; + if (operand.IsImmediate()) { + int64_t immediate = operand.immediate(); + VIXL_ASSERT(IsImmConditionalCompare(immediate)); + ccmpop = ConditionalCompareImmediateFixed | op | + ImmCondCmp(static_cast<unsigned>(immediate)); + } else { + VIXL_ASSERT(operand.IsShiftedRegister() && (operand.shift_amount() == 0)); + ccmpop = ConditionalCompareRegisterFixed | op | Rm(operand.reg()); + } + Emit(SF(rn) | ccmpop | Cond(cond) | Rn(rn) | Nzcv(nzcv)); +} + + +void Assembler::DataProcessing1Source(const Register& rd, + const Register& rn, + DataProcessing1SourceOp op) { + VIXL_ASSERT(rd.size() == rn.size()); + Emit(SF(rn) | op | Rn(rn) | Rd(rd)); +} + + +void Assembler::FPDataProcessing1Source(const VRegister& vd, + const VRegister& vn, + FPDataProcessing1SourceOp op) { + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); + Emit(FPType(vn) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::FPDataProcessing3Source(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va, + FPDataProcessing3SourceOp op) { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm, va)); + Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd) | Ra(va)); +} + + +void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd, + const int imm8, + const int left_shift, + NEONModifiedImmediateOp op) { + VIXL_ASSERT(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() || + vd.Is2S() || vd.Is4S()); + VIXL_ASSERT((left_shift == 0) || (left_shift == 8) || + (left_shift == 16) || (left_shift == 24)); + VIXL_ASSERT(IsUint8(imm8)); + + int cmode_1, cmode_2, cmode_3; + if (vd.Is8B() || vd.Is16B()) { + VIXL_ASSERT(op == NEONModifiedImmediate_MOVI); + cmode_1 = 1; + cmode_2 = 1; + cmode_3 = 1; + } else { + cmode_1 = (left_shift >> 3) & 1; + cmode_2 = left_shift >> 4; + cmode_3 = 0; + if (vd.Is4H() || vd.Is8H()) { + VIXL_ASSERT((left_shift == 0) || (left_shift == 8)); + cmode_3 = 1; + } + } + int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1); + + int q = vd.IsQ() ? NEON_Q : 0; + + Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd)); +} + + +void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd, + const int imm8, + const int shift_amount, + NEONModifiedImmediateOp op) { + VIXL_ASSERT(vd.Is2S() || vd.Is4S()); + VIXL_ASSERT((shift_amount == 8) || (shift_amount == 16)); + VIXL_ASSERT(IsUint8(imm8)); + + int cmode_0 = (shift_amount >> 4) & 1; + int cmode = 0xc | cmode_0; + + int q = vd.IsQ() ? NEON_Q : 0; + + Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd)); +} + + +void Assembler::EmitShift(const Register& rd, + const Register& rn, + Shift shift, + unsigned shift_amount) { + switch (shift) { + case LSL: + lsl(rd, rn, shift_amount); + break; + case LSR: + lsr(rd, rn, shift_amount); + break; + case ASR: + asr(rd, rn, shift_amount); + break; + case ROR: + ror(rd, rn, shift_amount); + break; + default: + VIXL_UNREACHABLE(); + } +} + + +void Assembler::EmitExtendShift(const Register& rd, + const Register& rn, + Extend extend, + unsigned left_shift) { + VIXL_ASSERT(rd.size() >= rn.size()); + unsigned reg_size = rd.size(); + // Use the correct size of register. + Register rn_ = Register(rn.code(), rd.size()); + // Bits extracted are high_bit:0. + unsigned high_bit = (8 << (extend & 0x3)) - 1; + // Number of bits left in the result that are not introduced by the shift. + unsigned non_shift_bits = (reg_size - left_shift) & (reg_size - 1); + + if ((non_shift_bits > high_bit) || (non_shift_bits == 0)) { + switch (extend) { + case UXTB: + case UXTH: + case UXTW: ubfm(rd, rn_, non_shift_bits, high_bit); break; + case SXTB: + case SXTH: + case SXTW: sbfm(rd, rn_, non_shift_bits, high_bit); break; + case UXTX: + case SXTX: { + VIXL_ASSERT(rn.size() == kXRegSize); + // Nothing to extend. Just shift. + lsl(rd, rn_, left_shift); + break; + } + default: VIXL_UNREACHABLE(); + } + } else { + // No need to extend as the extended bits would be shifted away. + lsl(rd, rn_, left_shift); + } +} + + +void Assembler::DataProcExtendedRegister(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + Instr op) { + Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd); + Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | + ExtendMode(operand.extend()) | ImmExtendShift(operand.shift_amount()) | + dest_reg | RnSP(rn)); +} + + +Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, + unsigned access_size, + LoadStoreScalingOption option) { + Instr base = RnSP(addr.base()); + int64_t offset = addr.offset(); + + if (addr.IsImmediateOffset()) { + bool prefer_unscaled = (option == PreferUnscaledOffset) || + (option == RequireUnscaledOffset); + if (prefer_unscaled && IsImmLSUnscaled(offset)) { + // Use the unscaled addressing mode. + return base | LoadStoreUnscaledOffsetFixed | + ImmLS(static_cast<int>(offset)); + } + + if ((option != RequireUnscaledOffset) && + IsImmLSScaled(offset, access_size)) { + // Use the scaled addressing mode. + return base | LoadStoreUnsignedOffsetFixed | + ImmLSUnsigned(static_cast<int>(offset) >> access_size); + } + + if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) { + // Use the unscaled addressing mode. + return base | LoadStoreUnscaledOffsetFixed | + ImmLS(static_cast<int>(offset)); + } + } + + // All remaining addressing modes are register-offset, pre-indexed or + // post-indexed modes. + VIXL_ASSERT((option != RequireUnscaledOffset) && + (option != RequireScaledOffset)); + + if (addr.IsRegisterOffset()) { + Extend ext = addr.extend(); + Shift shift = addr.shift(); + unsigned shift_amount = addr.shift_amount(); + + // LSL is encoded in the option field as UXTX. + if (shift == LSL) { + ext = UXTX; + } + + // Shifts are encoded in one bit, indicating a left shift by the memory + // access size. + VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size)); + return base | LoadStoreRegisterOffsetFixed | Rm(addr.regoffset()) | + ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0); + } + + if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) { + return base | LoadStorePreIndexFixed | ImmLS(static_cast<int>(offset)); + } + + if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) { + return base | LoadStorePostIndexFixed | ImmLS(static_cast<int>(offset)); + } + + // If this point is reached, the MemOperand (addr) cannot be encoded. + VIXL_UNREACHABLE(); + return 0; +} + + +void Assembler::LoadStore(const CPURegister& rt, + const MemOperand& addr, + LoadStoreOp op, + LoadStoreScalingOption option) { + Emit(op | Rt(rt) | LoadStoreMemOperand(addr, CalcLSDataSize(op), option)); +} + + +void Assembler::Prefetch(PrefetchOperation op, + const MemOperand& addr, + LoadStoreScalingOption option) { + VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset()); + + Instr prfop = ImmPrefetchOperation(op); + Emit(PRFM | prfop | LoadStoreMemOperand(addr, kXRegSizeInBytesLog2, option)); +} + + +bool Assembler::IsImmAddSub(int64_t immediate) { + return IsUint12(immediate) || + (IsUint12(immediate >> 12) && ((immediate & 0xfff) == 0)); +} + + +bool Assembler::IsImmConditionalCompare(int64_t immediate) { + return IsUint5(immediate); +} + + +bool Assembler::IsImmFP32(float imm) { + // Valid values will have the form: + // aBbb.bbbc.defg.h000.0000.0000.0000.0000 + uint32_t bits = FloatToRawbits(imm); + // bits[19..0] are cleared. + if ((bits & 0x7ffff) != 0) { + return false; + } + + // bits[29..25] are all set or all cleared. + uint32_t b_pattern = (bits >> 16) & 0x3e00; + if (b_pattern != 0 && b_pattern != 0x3e00) { + return false; + } + + // bit[30] and bit[29] are opposite. + if (((bits ^ (bits << 1)) & 0x40000000) == 0) { + return false; + } + + return true; +} + + +bool Assembler::IsImmFP64(double imm) { + // Valid values will have the form: + // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 + uint64_t bits = DoubleToRawbits(imm); + // bits[47..0] are cleared. + if ((bits & 0x0000ffffffffffff) != 0) { + return false; + } + + // bits[61..54] are all set or all cleared. + uint32_t b_pattern = (bits >> 48) & 0x3fc0; + if ((b_pattern != 0) && (b_pattern != 0x3fc0)) { + return false; + } + + // bit[62] and bit[61] are opposite. + if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) { + return false; + } + + return true; +} + + +bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) { + VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2); + bool offset_is_size_multiple = + (((offset >> access_size) << access_size) == offset); + return offset_is_size_multiple && IsInt7(offset >> access_size); +} + + +bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) { + VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2); + bool offset_is_size_multiple = + (((offset >> access_size) << access_size) == offset); + return offset_is_size_multiple && IsUint12(offset >> access_size); +} + + +bool Assembler::IsImmLSUnscaled(int64_t offset) { + return IsInt9(offset); +} + + +// The movn instruction can generate immediates containing an arbitrary 16-bit +// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff. +bool Assembler::IsImmMovn(uint64_t imm, unsigned reg_size) { + return IsImmMovz(~imm, reg_size); +} + + +// The movz instruction can generate immediates containing an arbitrary 16-bit +// value, with remaining bits clear, eg. 0x00001234, 0x0000123400000000. +bool Assembler::IsImmMovz(uint64_t imm, unsigned reg_size) { + VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize)); + return CountClearHalfWords(imm, reg_size) >= ((reg_size / 16) - 1); +} + + +// Test if a given value can be encoded in the immediate field of a logical +// instruction. +// If it can be encoded, the function returns true, and values pointed to by n, +// imm_s and imm_r are updated with immediates encoded in the format required +// by the corresponding fields in the logical instruction. +// If it can not be encoded, the function returns false, and the values pointed +// to by n, imm_s and imm_r are undefined. +bool Assembler::IsImmLogical(uint64_t value, + unsigned width, + unsigned* n, + unsigned* imm_s, + unsigned* imm_r) { + VIXL_ASSERT((width == kWRegSize) || (width == kXRegSize)); + + bool negate = false; + + // Logical immediates are encoded using parameters n, imm_s and imm_r using + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 bits + // are set. The pattern is rotated right by R, and repeated across a 32 or + // 64-bit value, depending on destination register width. + // + // Put another way: the basic format of a logical immediate is a single + // contiguous stretch of 1 bits, repeated across the whole word at intervals + // given by a power of 2. To identify them quickly, we first locate the + // lowest stretch of 1 bits, then the next 1 bit above that; that combination + // is different for every logical immediate, so it gives us all the + // information we need to identify the only logical immediate that our input + // could be, and then we simply check if that's the value we actually have. + // + // (The rotation parameter does give the possibility of the stretch of 1 bits + // going 'round the end' of the word. To deal with that, we observe that in + // any situation where that happens the bitwise NOT of the value is also a + // valid logical immediate. So we simply invert the input whenever its low bit + // is set, and then we know that the rotated case can't arise.) + + if (value & 1) { + // If the low bit is 1, negate the value, and set a flag to remember that we + // did (so that we can adjust the return values appropriately). + negate = true; + value = ~value; + } + + if (width == kWRegSize) { + // To handle 32-bit logical immediates, the very easiest thing is to repeat + // the input value twice to make a 64-bit word. The correct encoding of that + // as a logical immediate will also be the correct encoding of the 32-bit + // value. + + // Avoid making the assumption that the most-significant 32 bits are zero by + // shifting the value left and duplicating it. + value <<= kWRegSize; + value |= value >> kWRegSize; + } + + // The basic analysis idea: imagine our input word looks like this. + // + // 0011111000111110001111100011111000111110001111100011111000111110 + // c b a + // |<--d-->| + // + // We find the lowest set bit (as an actual power-of-2 value, not its index) + // and call it a. Then we add a to our original number, which wipes out the + // bottommost stretch of set bits and replaces it with a 1 carried into the + // next zero bit. Then we look for the new lowest set bit, which is in + // position b, and subtract it, so now our number is just like the original + // but with the lowest stretch of set bits completely gone. Now we find the + // lowest set bit again, which is position c in the diagram above. Then we'll + // measure the distance d between bit positions a and c (using CLZ), and that + // tells us that the only valid logical immediate that could possibly be equal + // to this number is the one in which a stretch of bits running from a to just + // below b is replicated every d bits. + uint64_t a = LowestSetBit(value); + uint64_t value_plus_a = value + a; + uint64_t b = LowestSetBit(value_plus_a); + uint64_t value_plus_a_minus_b = value_plus_a - b; + uint64_t c = LowestSetBit(value_plus_a_minus_b); + + int d, clz_a, out_n; + uint64_t mask; + + if (c != 0) { + // The general case, in which there is more than one stretch of set bits. + // Compute the repeat distance d, and set up a bitmask covering the basic + // unit of repetition (i.e. a word with the bottom d bits set). Also, in all + // of these cases the N bit of the output will be zero. + clz_a = CountLeadingZeros(a, kXRegSize); + int clz_c = CountLeadingZeros(c, kXRegSize); + d = clz_a - clz_c; + mask = ((UINT64_C(1) << d) - 1); + out_n = 0; + } else { + // Handle degenerate cases. + // + // If any of those 'find lowest set bit' operations didn't find a set bit at + // all, then the word will have been zero thereafter, so in particular the + // last lowest_set_bit operation will have returned zero. So we can test for + // all the special case conditions in one go by seeing if c is zero. + if (a == 0) { + // The input was zero (or all 1 bits, which will come to here too after we + // inverted it at the start of the function), for which we just return + // false. + return false; + } else { + // Otherwise, if c was zero but a was not, then there's just one stretch + // of set bits in our word, meaning that we have the trivial case of + // d == 64 and only one 'repetition'. Set up all the same variables as in + // the general case above, and set the N bit in the output. + clz_a = CountLeadingZeros(a, kXRegSize); + d = 64; + mask = ~UINT64_C(0); + out_n = 1; + } + } + + // If the repeat period d is not a power of two, it can't be encoded. + if (!IsPowerOf2(d)) { + return false; + } + + if (((b - a) & ~mask) != 0) { + // If the bit stretch (b - a) does not fit within the mask derived from the + // repeat period, then fail. + return false; + } + + // The only possible option is b - a repeated every d bits. Now we're going to + // actually construct the valid logical immediate derived from that + // specification, and see if it equals our original input. + // + // To repeat a value every d bits, we multiply it by a number of the form + // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can + // be derived using a table lookup on CLZ(d). + static const uint64_t multipliers[] = { + 0x0000000000000001UL, + 0x0000000100000001UL, + 0x0001000100010001UL, + 0x0101010101010101UL, + 0x1111111111111111UL, + 0x5555555555555555UL, + }; + uint64_t multiplier = multipliers[CountLeadingZeros(d, kXRegSize) - 57]; + uint64_t candidate = (b - a) * multiplier; + + if (value != candidate) { + // The candidate pattern doesn't match our input value, so fail. + return false; + } + + // We have a match! This is a valid logical immediate, so now we have to + // construct the bits and pieces of the instruction encoding that generates + // it. + + // Count the set bits in our basic stretch. The special case of clz(0) == -1 + // makes the answer come out right for stretches that reach the very top of + // the word (e.g. numbers like 0xffffc00000000000). + int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSize); + int s = clz_a - clz_b; + + // Decide how many bits to rotate right by, to put the low bit of that basic + // stretch in position a. + int r; + if (negate) { + // If we inverted the input right at the start of this function, here's + // where we compensate: the number of set bits becomes the number of clear + // bits, and the rotation count is based on position b rather than position + // a (since b is the location of the 'lowest' 1 bit after inversion). + s = d - s; + r = (clz_b + 1) & (d - 1); + } else { + r = (clz_a + 1) & (d - 1); + } + + // Now we're done, except for having to encode the S output in such a way that + // it gives both the number of set bits and the length of the repeated + // segment. The s field is encoded like this: + // + // imms size S + // ssssss 64 UInt(ssssss) + // 0sssss 32 UInt(sssss) + // 10ssss 16 UInt(ssss) + // 110sss 8 UInt(sss) + // 1110ss 4 UInt(ss) + // 11110s 2 UInt(s) + // + // So we 'or' (-d << 1) with our computed s to form imms. + if ((n != NULL) || (imm_s != NULL) || (imm_r != NULL)) { + *n = out_n; + *imm_s = ((-d << 1) | (s - 1)) & 0x3f; + *imm_r = r; + } + + return true; +} + + +LoadStoreOp Assembler::LoadOpFor(const CPURegister& rt) { + VIXL_ASSERT(rt.IsValid()); + if (rt.IsRegister()) { + return rt.Is64Bits() ? LDR_x : LDR_w; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.SizeInBits()) { + case kBRegSize: return LDR_b; + case kHRegSize: return LDR_h; + case kSRegSize: return LDR_s; + case kDRegSize: return LDR_d; + default: + VIXL_ASSERT(rt.IsQ()); + return LDR_q; + } + } +} + + +LoadStoreOp Assembler::StoreOpFor(const CPURegister& rt) { + VIXL_ASSERT(rt.IsValid()); + if (rt.IsRegister()) { + return rt.Is64Bits() ? STR_x : STR_w; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.SizeInBits()) { + case kBRegSize: return STR_b; + case kHRegSize: return STR_h; + case kSRegSize: return STR_s; + case kDRegSize: return STR_d; + default: + VIXL_ASSERT(rt.IsQ()); + return STR_q; + } + } +} + + +LoadStorePairOp Assembler::StorePairOpFor(const CPURegister& rt, + const CPURegister& rt2) { + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + USE(rt2); + if (rt.IsRegister()) { + return rt.Is64Bits() ? STP_x : STP_w; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.SizeInBytes()) { + case kSRegSizeInBytes: return STP_s; + case kDRegSizeInBytes: return STP_d; + default: + VIXL_ASSERT(rt.IsQ()); + return STP_q; + } + } +} + + +LoadStorePairOp Assembler::LoadPairOpFor(const CPURegister& rt, + const CPURegister& rt2) { + VIXL_ASSERT((STP_w | LoadStorePairLBit) == LDP_w); + return static_cast<LoadStorePairOp>(StorePairOpFor(rt, rt2) | + LoadStorePairLBit); +} + + +LoadStorePairNonTemporalOp Assembler::StorePairNonTemporalOpFor( + const CPURegister& rt, const CPURegister& rt2) { + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + USE(rt2); + if (rt.IsRegister()) { + return rt.Is64Bits() ? STNP_x : STNP_w; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.SizeInBytes()) { + case kSRegSizeInBytes: return STNP_s; + case kDRegSizeInBytes: return STNP_d; + default: + VIXL_ASSERT(rt.IsQ()); + return STNP_q; + } + } +} + + +LoadStorePairNonTemporalOp Assembler::LoadPairNonTemporalOpFor( + const CPURegister& rt, const CPURegister& rt2) { + VIXL_ASSERT((STNP_w | LoadStorePairNonTemporalLBit) == LDNP_w); + return static_cast<LoadStorePairNonTemporalOp>( + StorePairNonTemporalOpFor(rt, rt2) | LoadStorePairNonTemporalLBit); +} + + +LoadLiteralOp Assembler::LoadLiteralOpFor(const CPURegister& rt) { + if (rt.IsRegister()) { + return rt.IsX() ? LDR_x_lit : LDR_w_lit; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.SizeInBytes()) { + case kSRegSizeInBytes: return LDR_s_lit; + case kDRegSizeInBytes: return LDR_d_lit; + default: + VIXL_ASSERT(rt.IsQ()); + return LDR_q_lit; + } + } +} + + +bool Assembler::CPUHas(const CPURegister& rt) const { + // Core registers are available without any particular CPU features. + if (rt.IsRegister()) return true; + VIXL_ASSERT(rt.IsVRegister()); + // The architecture does not allow FP and NEON to be implemented separately, + // but we can crudely categorise them based on register size, since FP only + // uses D, S and (occasionally) H registers. + if (rt.IsH() || rt.IsS() || rt.IsD()) { + return CPUHas(CPUFeatures::kFP) || CPUHas(CPUFeatures::kNEON); + } + VIXL_ASSERT(rt.IsB() || rt.IsQ()); + return CPUHas(CPUFeatures::kNEON); +} + + +bool Assembler::CPUHas(const CPURegister& rt, const CPURegister& rt2) const { + // This is currently only used for loads and stores, where rt and rt2 must + // have the same size and type. We could extend this to cover other cases if + // necessary, but for now we can avoid checking both registers. + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + USE(rt2); + return CPUHas(rt); +} + + +bool Assembler::CPUHas(SystemRegister sysreg) const { + switch (sysreg) { + case RNDR: + case RNDRRS: + return CPUHas(CPUFeatures::kRNG); + case FPCR: + case NZCV: + break; + } + return true; +} + + +bool AreAliased(const CPURegister& reg1, const CPURegister& reg2, + const CPURegister& reg3, const CPURegister& reg4, + const CPURegister& reg5, const CPURegister& reg6, + const CPURegister& reg7, const CPURegister& reg8) { + int number_of_valid_regs = 0; + int number_of_valid_fpregs = 0; + + RegList unique_regs = 0; + RegList unique_fpregs = 0; + + const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8}; + + for (unsigned i = 0; i < sizeof(regs) / sizeof(regs[0]); i++) { + if (regs[i].IsRegister()) { + number_of_valid_regs++; + unique_regs |= regs[i].Bit(); + } else if (regs[i].IsVRegister()) { + number_of_valid_fpregs++; + unique_fpregs |= regs[i].Bit(); + } else { + VIXL_ASSERT(!regs[i].IsValid()); + } + } + + int number_of_unique_regs = CountSetBits(unique_regs); + int number_of_unique_fpregs = CountSetBits(unique_fpregs); + + VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs); + VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs); + + return (number_of_valid_regs != number_of_unique_regs) || + (number_of_valid_fpregs != number_of_unique_fpregs); +} + + +bool AreSameSizeAndType(const CPURegister& reg1, const CPURegister& reg2, + const CPURegister& reg3, const CPURegister& reg4, + const CPURegister& reg5, const CPURegister& reg6, + const CPURegister& reg7, const CPURegister& reg8) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1); + match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1); + match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1); + match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1); + match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1); + match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1); + match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1); + return match; +} + +bool AreEven(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + VIXL_ASSERT(reg1.IsValid()); + bool even = (reg1.code() % 2) == 0; + even &= !reg2.IsValid() || ((reg2.code() % 2) == 0); + even &= !reg3.IsValid() || ((reg3.code() % 2) == 0); + even &= !reg4.IsValid() || ((reg4.code() % 2) == 0); + even &= !reg5.IsValid() || ((reg5.code() % 2) == 0); + even &= !reg6.IsValid() || ((reg6.code() % 2) == 0); + even &= !reg7.IsValid() || ((reg7.code() % 2) == 0); + even &= !reg8.IsValid() || ((reg8.code() % 2) == 0); + return even; +} + +bool AreConsecutive(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + + if (!reg2.IsValid()) { + return true; + } else if (reg2.code() != ((reg1.code() + 1) % kNumberOfRegisters)) { + return false; + } + + if (!reg3.IsValid()) { + return true; + } else if (reg3.code() != ((reg2.code() + 1) % kNumberOfRegisters)) { + return false; + } + + if (!reg4.IsValid()) { + return true; + } else if (reg4.code() != ((reg3.code() + 1) % kNumberOfRegisters)) { + return false; + } + + return true; +} + +bool AreSameFormat(const VRegister& reg1, const VRegister& reg2, + const VRegister& reg3, const VRegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || reg2.IsSameFormat(reg1); + match &= !reg3.IsValid() || reg3.IsSameFormat(reg1); + match &= !reg4.IsValid() || reg4.IsSameFormat(reg1); + return match; +} + + +bool AreConsecutive(const VRegister& reg1, const VRegister& reg2, + const VRegister& reg3, const VRegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || + (reg2.code() == ((reg1.code() + 1) % kNumberOfVRegisters)); + match &= !reg3.IsValid() || + (reg3.code() == ((reg1.code() + 2) % kNumberOfVRegisters)); + match &= !reg4.IsValid() || + (reg4.code() == ((reg1.code() + 3) % kNumberOfVRegisters)); + return match; +} +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/Assembler-vixl.h b/js/src/jit/arm64/vixl/Assembler-vixl.h new file mode 100644 index 0000000000..462b359eea --- /dev/null +++ b/js/src/jit/arm64/vixl/Assembler-vixl.h @@ -0,0 +1,4974 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_ASSEMBLER_A64_H_ +#define VIXL_A64_ASSEMBLER_A64_H_ + +#include "jit/arm64/vixl/Cpu-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Instructions-vixl.h" +#include "jit/arm64/vixl/MozBaseAssembler-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" + +#include "jit/JitSpewer.h" + +#include "jit/shared/Assembler-shared.h" +#include "jit/shared/Disassembler-shared.h" +#include "jit/shared/IonAssemblerBufferWithConstantPools.h" + +#if defined(_M_ARM64) +#ifdef mvn +#undef mvn +#endif +#endif + +namespace vixl { + +using js::jit::BufferOffset; +using js::jit::Label; +using js::jit::Address; +using js::jit::BaseIndex; +using js::jit::DisassemblerSpew; + +using LabelDoc = DisassemblerSpew::LabelDoc; + +typedef uint64_t RegList; +static const int kRegListSizeInBits = sizeof(RegList) * 8; + + +// Registers. + +// Some CPURegister methods can return Register or VRegister types, so we need +// to declare them in advance. +class Register; +class VRegister; + +class CPURegister { + public: + enum RegisterType { + // The kInvalid value is used to detect uninitialized static instances, + // which are always zero-initialized before any constructors are called. + kInvalid = 0, + kRegister, + kVRegister, + kFPRegister = kVRegister, + kNoRegister + }; + + constexpr CPURegister() : code_(0), size_(0), type_(kNoRegister) { + } + + constexpr CPURegister(unsigned code, unsigned size, RegisterType type) + : code_(code), size_(size), type_(type) { + } + + unsigned code() const { + VIXL_ASSERT(IsValid()); + return code_; + } + + RegisterType type() const { + VIXL_ASSERT(IsValidOrNone()); + return type_; + } + + RegList Bit() const { + VIXL_ASSERT(code_ < (sizeof(RegList) * 8)); + return IsValid() ? (static_cast<RegList>(1) << code_) : 0; + } + + unsigned size() const { + VIXL_ASSERT(IsValid()); + return size_; + } + + int SizeInBytes() const { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(size() % 8 == 0); + return size_ / 8; + } + + int SizeInBits() const { + VIXL_ASSERT(IsValid()); + return size_; + } + + bool Is8Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 8; + } + + bool Is16Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 16; + } + + bool Is32Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 32; + } + + bool Is64Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 64; + } + + bool Is128Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 128; + } + + bool IsValid() const { + if (IsValidRegister() || IsValidVRegister()) { + VIXL_ASSERT(!IsNone()); + return true; + } else { + // This assert is hit when the register has not been properly initialized. + // One cause for this can be an initialisation order fiasco. See + // https://isocpp.org/wiki/faq/ctors#static-init-order for some details. + VIXL_ASSERT(IsNone()); + return false; + } + } + + bool IsValidRegister() const { + return IsRegister() && + ((size_ == kWRegSize) || (size_ == kXRegSize)) && + ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)); + } + + bool IsValidVRegister() const { + return IsVRegister() && + ((size_ == kBRegSize) || (size_ == kHRegSize) || + (size_ == kSRegSize) || (size_ == kDRegSize) || + (size_ == kQRegSize)) && + (code_ < kNumberOfVRegisters); + } + + bool IsValidFPRegister() const { + return IsFPRegister() && (code_ < kNumberOfVRegisters); + } + + bool IsNone() const { + // kNoRegister types should always have size 0 and code 0. + VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0)); + VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0)); + + return type_ == kNoRegister; + } + + bool Aliases(const CPURegister& other) const { + VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone()); + return (code_ == other.code_) && (type_ == other.type_); + } + + bool Is(const CPURegister& other) const { + VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone()); + return Aliases(other) && (size_ == other.size_); + } + + bool IsZero() const { + VIXL_ASSERT(IsValid()); + return IsRegister() && (code_ == kZeroRegCode); + } + + bool IsSP() const { + VIXL_ASSERT(IsValid()); + return IsRegister() && (code_ == kSPRegInternalCode); + } + + bool IsRegister() const { + return type_ == kRegister; + } + + bool IsVRegister() const { + return type_ == kVRegister; + } + + bool IsFPRegister() const { + return IsS() || IsD(); + } + + bool IsW() const { return IsValidRegister() && Is32Bits(); } + bool IsX() const { return IsValidRegister() && Is64Bits(); } + + // These assertions ensure that the size and type of the register are as + // described. They do not consider the number of lanes that make up a vector. + // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD() + // does not imply Is1D() or Is8B(). + // Check the number of lanes, ie. the format of the vector, using methods such + // as Is8B(), Is1D(), etc. in the VRegister class. + bool IsV() const { return IsVRegister(); } + bool IsB() const { return IsV() && Is8Bits(); } + bool IsH() const { return IsV() && Is16Bits(); } + bool IsS() const { return IsV() && Is32Bits(); } + bool IsD() const { return IsV() && Is64Bits(); } + bool IsQ() const { return IsV() && Is128Bits(); } + + const Register& W() const; + const Register& X() const; + const VRegister& V() const; + const VRegister& B() const; + const VRegister& H() const; + const VRegister& S() const; + const VRegister& D() const; + const VRegister& Q() const; + + bool IsSameSizeAndType(const CPURegister& other) const { + return (size_ == other.size_) && (type_ == other.type_); + } + + protected: + unsigned code_; + unsigned size_; + RegisterType type_; + + private: + bool IsValidOrNone() const { + return IsValid() || IsNone(); + } +}; + + +class Register : public CPURegister { + public: + Register() : CPURegister() {} + explicit Register(const CPURegister& other) + : CPURegister(other.code(), other.size(), other.type()) { + VIXL_ASSERT(IsValidRegister()); + } + constexpr Register(unsigned code, unsigned size) + : CPURegister(code, size, kRegister) {} + + constexpr Register(js::jit::Register r, unsigned size) + : CPURegister(r.code(), size, kRegister) {} + + bool IsValid() const { + VIXL_ASSERT(IsRegister() || IsNone()); + return IsValidRegister(); + } + + js::jit::Register asUnsized() const { + // asUnsized() is only ever used on temp registers or on registers that + // are known not to be SP, and there should be no risk of it being + // applied to SP. Check anyway. + VIXL_ASSERT(code_ != kSPRegInternalCode); + return js::jit::Register::FromCode((js::jit::Register::Code)code_); + } + + + static const Register& WRegFromCode(unsigned code); + static const Register& XRegFromCode(unsigned code); + + private: + static const Register wregisters[]; + static const Register xregisters[]; +}; + + +class VRegister : public CPURegister { + public: + VRegister() : CPURegister(), lanes_(1) {} + explicit VRegister(const CPURegister& other) + : CPURegister(other.code(), other.size(), other.type()), lanes_(1) { + VIXL_ASSERT(IsValidVRegister()); + VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); + } + constexpr VRegister(unsigned code, unsigned size, unsigned lanes = 1) + : CPURegister(code, size, kVRegister), lanes_(lanes) { + // VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); + } + constexpr VRegister(js::jit::FloatRegister r) + : CPURegister(r.encoding(), r.size() * 8, kVRegister), lanes_(1) { + } + constexpr VRegister(js::jit::FloatRegister r, unsigned size) + : CPURegister(r.encoding(), size, kVRegister), lanes_(1) { + } + VRegister(unsigned code, VectorFormat format) + : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister), + lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) { + VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); + } + + bool IsValid() const { + VIXL_ASSERT(IsVRegister() || IsNone()); + return IsValidVRegister(); + } + + static const VRegister& BRegFromCode(unsigned code); + static const VRegister& HRegFromCode(unsigned code); + static const VRegister& SRegFromCode(unsigned code); + static const VRegister& DRegFromCode(unsigned code); + static const VRegister& QRegFromCode(unsigned code); + static const VRegister& VRegFromCode(unsigned code); + + VRegister V8B() const { return VRegister(code_, kDRegSize, 8); } + VRegister V16B() const { return VRegister(code_, kQRegSize, 16); } + VRegister V4H() const { return VRegister(code_, kDRegSize, 4); } + VRegister V8H() const { return VRegister(code_, kQRegSize, 8); } + VRegister V2S() const { return VRegister(code_, kDRegSize, 2); } + VRegister V4S() const { return VRegister(code_, kQRegSize, 4); } + VRegister V2D() const { return VRegister(code_, kQRegSize, 2); } + VRegister V1D() const { return VRegister(code_, kDRegSize, 1); } + + bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); } + bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); } + bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); } + bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); } + bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); } + bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); } + bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); } + bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); } + + // For consistency, we assert the number of lanes of these scalar registers, + // even though there are no vectors of equivalent total size with which they + // could alias. + bool Is1B() const { + VIXL_ASSERT(!(Is8Bits() && IsVector())); + return Is8Bits(); + } + bool Is1H() const { + VIXL_ASSERT(!(Is16Bits() && IsVector())); + return Is16Bits(); + } + bool Is1S() const { + VIXL_ASSERT(!(Is32Bits() && IsVector())); + return Is32Bits(); + } + + bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; } + bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; } + bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; } + bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; } + + int lanes() const { + return lanes_; + } + + bool IsScalar() const { + return lanes_ == 1; + } + + bool IsVector() const { + return lanes_ > 1; + } + + bool IsSameFormat(const VRegister& other) const { + return (size_ == other.size_) && (lanes_ == other.lanes_); + } + + unsigned LaneSizeInBytes() const { + return SizeInBytes() / lanes_; + } + + unsigned LaneSizeInBits() const { + return LaneSizeInBytes() * 8; + } + + private: + static const VRegister bregisters[]; + static const VRegister hregisters[]; + static const VRegister sregisters[]; + static const VRegister dregisters[]; + static const VRegister qregisters[]; + static const VRegister vregisters[]; + int lanes_; +}; + + +// Backward compatibility for FPRegisters. +typedef VRegister FPRegister; + +// No*Reg is used to indicate an unused argument, or an error case. Note that +// these all compare equal (using the Is() method). The Register and VRegister +// variants are provided for convenience. +const Register NoReg; +const VRegister NoVReg; +const FPRegister NoFPReg; // For backward compatibility. +const CPURegister NoCPUReg; + + +#define DEFINE_REGISTERS(N) \ +constexpr Register w##N(N, kWRegSize); \ +constexpr Register x##N(N, kXRegSize); +REGISTER_CODE_LIST(DEFINE_REGISTERS) +#undef DEFINE_REGISTERS +constexpr Register wsp(kSPRegInternalCode, kWRegSize); +constexpr Register sp(kSPRegInternalCode, kXRegSize); + + +#define DEFINE_VREGISTERS(N) \ +constexpr VRegister b##N(N, kBRegSize); \ +constexpr VRegister h##N(N, kHRegSize); \ +constexpr VRegister s##N(N, kSRegSize); \ +constexpr VRegister d##N(N, kDRegSize); \ +constexpr VRegister q##N(N, kQRegSize); \ +constexpr VRegister v##N(N, kQRegSize); +REGISTER_CODE_LIST(DEFINE_VREGISTERS) +#undef DEFINE_VREGISTERS + + +// Registers aliases. +constexpr Register ip0 = x16; +constexpr Register ip1 = x17; +constexpr Register lr = x30; +constexpr Register xzr = x31; +constexpr Register wzr = w31; + + +// AreAliased returns true if any of the named registers overlap. Arguments +// set to NoReg are ignored. The system stack pointer may be specified. +bool AreAliased(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoReg, + const CPURegister& reg4 = NoReg, + const CPURegister& reg5 = NoReg, + const CPURegister& reg6 = NoReg, + const CPURegister& reg7 = NoReg, + const CPURegister& reg8 = NoReg); + + +// AreSameSizeAndType returns true if all of the specified registers have the +// same size, and are of the same type. The system stack pointer may be +// specified. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoCPUReg). +bool AreSameSizeAndType(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg, + const CPURegister& reg5 = NoCPUReg, + const CPURegister& reg6 = NoCPUReg, + const CPURegister& reg7 = NoCPUReg, + const CPURegister& reg8 = NoCPUReg); + +// AreEven returns true if all of the specified registers have even register +// indices. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoCPUReg). +bool AreEven(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoReg, + const CPURegister& reg4 = NoReg, + const CPURegister& reg5 = NoReg, + const CPURegister& reg6 = NoReg, + const CPURegister& reg7 = NoReg, + const CPURegister& reg8 = NoReg); + +// AreConsecutive returns true if all of the specified registers are +// consecutive in the register file. Arguments set to NoReg are ignored, as are +// any subsequent arguments. At least one argument (reg1) must be valid +// (not NoCPUReg). +bool AreConsecutive(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + +// AreSameFormat returns true if all of the specified VRegisters have the same +// vector format. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoVReg). +bool AreSameFormat(const VRegister& reg1, + const VRegister& reg2, + const VRegister& reg3 = NoVReg, + const VRegister& reg4 = NoVReg); + + +// AreConsecutive returns true if all of the specified VRegisters are +// consecutive in the register file. Arguments set to NoReg are ignored, as are +// any subsequent arguments. At least one argument (reg1) must be valid +// (not NoVReg). +bool AreConsecutive(const VRegister& reg1, + const VRegister& reg2, + const VRegister& reg3 = NoVReg, + const VRegister& reg4 = NoVReg); + + +// Lists of registers. +class CPURegList { + public: + explicit CPURegList(CPURegister reg1, + CPURegister reg2 = NoCPUReg, + CPURegister reg3 = NoCPUReg, + CPURegister reg4 = NoCPUReg) + : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()), + size_(reg1.size()), type_(reg1.type()) { + VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4)); + VIXL_ASSERT(IsValid()); + } + + CPURegList(CPURegister::RegisterType type, unsigned size, RegList list) + : list_(list), size_(size), type_(type) { + VIXL_ASSERT(IsValid()); + } + + CPURegList(CPURegister::RegisterType type, unsigned size, + unsigned first_reg, unsigned last_reg) + : size_(size), type_(type) { + VIXL_ASSERT(((type == CPURegister::kRegister) && + (last_reg < kNumberOfRegisters)) || + ((type == CPURegister::kVRegister) && + (last_reg < kNumberOfVRegisters))); + VIXL_ASSERT(last_reg >= first_reg); + list_ = (UINT64_C(1) << (last_reg + 1)) - 1; + list_ &= ~((UINT64_C(1) << first_reg) - 1); + VIXL_ASSERT(IsValid()); + } + + CPURegister::RegisterType type() const { + VIXL_ASSERT(IsValid()); + return type_; + } + + // Combine another CPURegList into this one. Registers that already exist in + // this list are left unchanged. The type and size of the registers in the + // 'other' list must match those in this list. + void Combine(const CPURegList& other) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(other.type() == type_); + VIXL_ASSERT(other.RegisterSizeInBits() == size_); + list_ |= other.list(); + } + + // Remove every register in the other CPURegList from this one. Registers that + // do not exist in this list are ignored. The type and size of the registers + // in the 'other' list must match those in this list. + void Remove(const CPURegList& other) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(other.type() == type_); + VIXL_ASSERT(other.RegisterSizeInBits() == size_); + list_ &= ~other.list(); + } + + // Variants of Combine and Remove which take a single register. + void Combine(const CPURegister& other) { + VIXL_ASSERT(other.type() == type_); + VIXL_ASSERT(other.size() == size_); + Combine(other.code()); + } + + void Remove(const CPURegister& other) { + VIXL_ASSERT(other.type() == type_); + VIXL_ASSERT(other.size() == size_); + Remove(other.code()); + } + + // Variants of Combine and Remove which take a single register by its code; + // the type and size of the register is inferred from this list. + void Combine(int code) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(CPURegister(code, size_, type_).IsValid()); + list_ |= (UINT64_C(1) << code); + } + + void Remove(int code) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(CPURegister(code, size_, type_).IsValid()); + list_ &= ~(UINT64_C(1) << code); + } + + static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) { + VIXL_ASSERT(list_1.type_ == list_2.type_); + VIXL_ASSERT(list_1.size_ == list_2.size_); + return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_); + } + static CPURegList Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3); + static CPURegList Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4); + + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2) { + VIXL_ASSERT(list_1.type_ == list_2.type_); + VIXL_ASSERT(list_1.size_ == list_2.size_); + return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_); + } + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3); + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4); + + bool Overlaps(const CPURegList& other) const { + return (type_ == other.type_) && ((list_ & other.list_) != 0); + } + + RegList list() const { + VIXL_ASSERT(IsValid()); + return list_; + } + + void set_list(RegList new_list) { + VIXL_ASSERT(IsValid()); + list_ = new_list; + } + + // Remove all callee-saved registers from the list. This can be useful when + // preparing registers for an AAPCS64 function call, for example. + void RemoveCalleeSaved(); + + CPURegister PopLowestIndex(); + CPURegister PopHighestIndex(); + + // AAPCS64 callee-saved registers. + static CPURegList GetCalleeSaved(unsigned size = kXRegSize); + static CPURegList GetCalleeSavedV(unsigned size = kDRegSize); + + // AAPCS64 caller-saved registers. Note that this includes lr. + // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top + // 64-bits being caller-saved. + static CPURegList GetCallerSaved(unsigned size = kXRegSize); + static CPURegList GetCallerSavedV(unsigned size = kDRegSize); + + bool IsEmpty() const { + VIXL_ASSERT(IsValid()); + return list_ == 0; + } + + bool IncludesAliasOf(const CPURegister& other) const { + VIXL_ASSERT(IsValid()); + return (type_ == other.type()) && ((other.Bit() & list_) != 0); + } + + bool IncludesAliasOf(int code) const { + VIXL_ASSERT(IsValid()); + return ((code & list_) != 0); + } + + int Count() const { + VIXL_ASSERT(IsValid()); + return CountSetBits(list_); + } + + unsigned RegisterSizeInBits() const { + VIXL_ASSERT(IsValid()); + return size_; + } + + unsigned RegisterSizeInBytes() const { + int size_in_bits = RegisterSizeInBits(); + VIXL_ASSERT((size_in_bits % 8) == 0); + return size_in_bits / 8; + } + + unsigned TotalSizeInBytes() const { + VIXL_ASSERT(IsValid()); + return RegisterSizeInBytes() * Count(); + } + + private: + RegList list_; + unsigned size_; + CPURegister::RegisterType type_; + + bool IsValid() const; +}; + + +// AAPCS64 callee-saved registers. +extern const CPURegList kCalleeSaved; +extern const CPURegList kCalleeSavedV; + + +// AAPCS64 caller-saved registers. Note that this includes lr. +extern const CPURegList kCallerSaved; +extern const CPURegList kCallerSavedV; + + +// Operand. +class Operand { + public: + // #<immediate> + // where <immediate> is int64_t. + // This is allowed to be an implicit constructor because Operand is + // a wrapper class that doesn't normally perform any type conversion. + Operand(int64_t immediate = 0); // NOLINT(runtime/explicit) + + // rm, {<shift> #<shift_amount>} + // where <shift> is one of {LSL, LSR, ASR, ROR}. + // <shift_amount> is uint6_t. + // This is allowed to be an implicit constructor because Operand is + // a wrapper class that doesn't normally perform any type conversion. + Operand(Register reg, + Shift shift = LSL, + unsigned shift_amount = 0); // NOLINT(runtime/explicit) + + // rm, {<extend> {#<shift_amount>}} + // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}. + // <shift_amount> is uint2_t. + explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0); + + bool IsImmediate() const; + bool IsShiftedRegister() const; + bool IsExtendedRegister() const; + bool IsZero() const; + + // This returns an LSL shift (<= 4) operand as an equivalent extend operand, + // which helps in the encoding of instructions that use the stack pointer. + Operand ToExtendedRegister() const; + + int64_t immediate() const { + VIXL_ASSERT(IsImmediate()); + return immediate_; + } + + Register reg() const { + VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister()); + return reg_; + } + + CPURegister maybeReg() const { + if (IsShiftedRegister() || IsExtendedRegister()) + return reg_; + return NoCPUReg; + } + + Shift shift() const { + VIXL_ASSERT(IsShiftedRegister()); + return shift_; + } + + Extend extend() const { + VIXL_ASSERT(IsExtendedRegister()); + return extend_; + } + + unsigned shift_amount() const { + VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister()); + return shift_amount_; + } + + private: + int64_t immediate_; + Register reg_; + Shift shift_; + Extend extend_; + unsigned shift_amount_; +}; + + +// MemOperand represents the addressing mode of a load or store instruction. +class MemOperand { + public: + explicit MemOperand(Register base, + int64_t offset = 0, + AddrMode addrmode = Offset); + MemOperand(Register base, + Register regoffset, + Shift shift = LSL, + unsigned shift_amount = 0); + MemOperand(Register base, + Register regoffset, + Extend extend, + unsigned shift_amount = 0); + MemOperand(Register base, + const Operand& offset, + AddrMode addrmode = Offset); + + // Adapter constructors using C++11 delegating. + // TODO: If sp == kSPRegInternalCode, the xzr check isn't necessary. + explicit MemOperand(js::jit::Address addr) + : MemOperand(IsHiddenSP(addr.base) ? sp : Register(AsRegister(addr.base), 64), + (ptrdiff_t)addr.offset) { + } + + const Register& base() const { return base_; } + const Register& regoffset() const { return regoffset_; } + int64_t offset() const { return offset_; } + AddrMode addrmode() const { return addrmode_; } + Shift shift() const { return shift_; } + Extend extend() const { return extend_; } + unsigned shift_amount() const { return shift_amount_; } + bool IsImmediateOffset() const; + bool IsRegisterOffset() const; + bool IsPreIndex() const; + bool IsPostIndex() const; + + void AddOffset(int64_t offset); + + private: + Register base_; + Register regoffset_; + int64_t offset_; + AddrMode addrmode_; + Shift shift_; + Extend extend_; + unsigned shift_amount_; +}; + + +// Control whether or not position-independent code should be emitted. +enum PositionIndependentCodeOption { + // All code generated will be position-independent; all branches and + // references to labels generated with the Label class will use PC-relative + // addressing. + PositionIndependentCode, + + // Allow VIXL to generate code that refers to absolute addresses. With this + // option, it will not be possible to copy the code buffer and run it from a + // different address; code must be generated in its final location. + PositionDependentCode, + + // Allow VIXL to assume that the bottom 12 bits of the address will be + // constant, but that the top 48 bits may change. This allows `adrp` to + // function in systems which copy code between pages, but otherwise maintain + // 4KB page alignment. + PageOffsetDependentCode +}; + + +// Control how scaled- and unscaled-offset loads and stores are generated. +enum LoadStoreScalingOption { + // Prefer scaled-immediate-offset instructions, but emit unscaled-offset, + // register-offset, pre-index or post-index instructions if necessary. + PreferScaledOffset, + + // Prefer unscaled-immediate-offset instructions, but emit scaled-offset, + // register-offset, pre-index or post-index instructions if necessary. + PreferUnscaledOffset, + + // Require scaled-immediate-offset instructions. + RequireScaledOffset, + + // Require unscaled-immediate-offset instructions. + RequireUnscaledOffset +}; + + +// Assembler. +class Assembler : public MozBaseAssembler { + public: + Assembler(PositionIndependentCodeOption pic = PositionIndependentCode); + + // System functions. + + // Finalize a code buffer of generated instructions. This function must be + // called before executing or copying code from the buffer. + void FinalizeCode(); + +#define COPYENUM(v) static const Condition v = vixl::v +#define COPYENUM_(v) static const Condition v = vixl::v##_ + COPYENUM(Equal); + COPYENUM(Zero); + COPYENUM(NotEqual); + COPYENUM(NonZero); + COPYENUM(AboveOrEqual); + COPYENUM(CarrySet); + COPYENUM(Below); + COPYENUM(CarryClear); + COPYENUM(Signed); + COPYENUM(NotSigned); + COPYENUM(Overflow); + COPYENUM(NoOverflow); + COPYENUM(Above); + COPYENUM(BelowOrEqual); + COPYENUM_(GreaterThanOrEqual); + COPYENUM_(LessThan); + COPYENUM_(GreaterThan); + COPYENUM_(LessThanOrEqual); + COPYENUM(Always); + COPYENUM(Never); +#undef COPYENUM +#undef COPYENUM_ + + // Bit set when a DoubleCondition does not map to a single ARM condition. + // The MacroAssembler must special-case these conditions, or else + // ConditionFromDoubleCondition will complain. + static const int DoubleConditionBitSpecial = 0x100; + + enum DoubleCondition { + DoubleOrdered = Condition::vc, + DoubleEqual = Condition::eq, + DoubleNotEqual = Condition::ne | DoubleConditionBitSpecial, + DoubleGreaterThan = Condition::gt, + DoubleGreaterThanOrEqual = Condition::ge, + DoubleLessThan = Condition::lo, // Could also use Condition::mi. + DoubleLessThanOrEqual = Condition::ls, + + // If either operand is NaN, these conditions always evaluate to true. + DoubleUnordered = Condition::vs, + DoubleEqualOrUnordered = Condition::eq | DoubleConditionBitSpecial, + DoubleNotEqualOrUnordered = Condition::ne, + DoubleGreaterThanOrUnordered = Condition::hi, + DoubleGreaterThanOrEqualOrUnordered = Condition::hs, + DoubleLessThanOrUnordered = Condition::lt, + DoubleLessThanOrEqualOrUnordered = Condition::le + }; + + static inline Condition InvertCondition(Condition cond) { + // Conditions al and nv behave identically, as "always true". They can't be + // inverted, because there is no "always false" condition. + VIXL_ASSERT((cond != al) && (cond != nv)); + return static_cast<Condition>(cond ^ 1); + } + + // This is chaging the condition codes for cmp a, b to the same codes for cmp b, a. + static inline Condition InvertCmpCondition(Condition cond) { + // Conditions al and nv behave identically, as "always true". They can't be + // inverted, because there is no "always false" condition. + switch (cond) { + case eq: + case ne: + return cond; + case gt: + return le; + case le: + return gt; + case ge: + return lt; + case lt: + return ge; + case hi: + return lo; + case lo: + return hi; + case hs: + return ls; + case ls: + return hs; + case mi: + return pl; + case pl: + return mi; + default: + MOZ_CRASH("TODO: figure this case out."); + } + return static_cast<Condition>(cond ^ 1); + } + + static inline DoubleCondition InvertCondition(DoubleCondition cond) { + switch (cond) { + case DoubleOrdered: + return DoubleUnordered; + case DoubleEqual: + return DoubleNotEqualOrUnordered; + case DoubleNotEqual: + return DoubleEqualOrUnordered; + case DoubleGreaterThan: + return DoubleLessThanOrEqualOrUnordered; + case DoubleGreaterThanOrEqual: + return DoubleLessThanOrUnordered; + case DoubleLessThan: + return DoubleGreaterThanOrEqualOrUnordered; + case DoubleLessThanOrEqual: + return DoubleGreaterThanOrUnordered; + case DoubleUnordered: + return DoubleOrdered; + case DoubleEqualOrUnordered: + return DoubleNotEqual; + case DoubleNotEqualOrUnordered: + return DoubleEqual; + case DoubleGreaterThanOrUnordered: + return DoubleLessThanOrEqual; + case DoubleGreaterThanOrEqualOrUnordered: + return DoubleLessThan; + case DoubleLessThanOrUnordered: + return DoubleGreaterThanOrEqual; + case DoubleLessThanOrEqualOrUnordered: + return DoubleGreaterThan; + default: + MOZ_CRASH("Bad condition"); + } + } + + static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) { + VIXL_ASSERT(!(cond & DoubleConditionBitSpecial)); + return static_cast<Condition>(cond); + } + + // Instruction set functions. + + // Branch / Jump instructions. + // Branch to register. + void br(const Register& xn); + static void br(Instruction* at, const Register& xn); + + // Branch with link to register. + void blr(const Register& xn); + static void blr(Instruction* at, const Register& blr); + + // Branch to register with return hint. + void ret(const Register& xn = lr); + + // Unconditional branch to label. + BufferOffset b(Label* label); + + // Conditional branch to label. + BufferOffset b(Label* label, Condition cond); + + // Unconditional branch to PC offset. + BufferOffset b(int imm26, const LabelDoc& doc); + static void b(Instruction* at, int imm26); + + // Conditional branch to PC offset. + BufferOffset b(int imm19, Condition cond, const LabelDoc& doc); + static void b(Instruction*at, int imm19, Condition cond); + + // Branch with link to label. + void bl(Label* label); + + // Branch with link to PC offset. + void bl(int imm26, const LabelDoc& doc); + static void bl(Instruction* at, int imm26); + + // Compare and branch to label if zero. + void cbz(const Register& rt, Label* label); + + // Compare and branch to PC offset if zero. + void cbz(const Register& rt, int imm19, const LabelDoc& doc); + static void cbz(Instruction* at, const Register& rt, int imm19); + + // Compare and branch to label if not zero. + void cbnz(const Register& rt, Label* label); + + // Compare and branch to PC offset if not zero. + void cbnz(const Register& rt, int imm19, const LabelDoc& doc); + static void cbnz(Instruction* at, const Register& rt, int imm19); + + // Table lookup from one register. + void tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Table lookup from two registers. + void tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm); + + // Table lookup from three registers. + void tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm); + + // Table lookup from four registers. + void tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm); + + // Table lookup extension from one register. + void tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Table lookup extension from two registers. + void tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm); + + // Table lookup extension from three registers. + void tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm); + + // Table lookup extension from four registers. + void tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm); + + // Test bit and branch to label if zero. + void tbz(const Register& rt, unsigned bit_pos, Label* label); + + // Test bit and branch to PC offset if zero. + void tbz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc); + static void tbz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14); + + // Test bit and branch to label if not zero. + void tbnz(const Register& rt, unsigned bit_pos, Label* label); + + // Test bit and branch to PC offset if not zero. + void tbnz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc); + static void tbnz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14); + + // Address calculation instructions. + // Calculate a PC-relative address. Unlike for branches the offset in adr is + // unscaled (i.e. the result can be unaligned). + + // Calculate the address of a label. + void adr(const Register& rd, Label* label); + + // Calculate the address of a PC offset. + void adr(const Register& rd, int imm21, const LabelDoc& doc); + static void adr(Instruction* at, const Register& rd, int imm21); + + // Calculate the page address of a label. + void adrp(const Register& rd, Label* label); + + // Calculate the page address of a PC offset. + void adrp(const Register& rd, int imm21, const LabelDoc& doc); + static void adrp(Instruction* at, const Register& rd, int imm21); + + // Data Processing instructions. + // Add. + void add(const Register& rd, + const Register& rn, + const Operand& operand); + + // Add and update status flags. + void adds(const Register& rd, + const Register& rn, + const Operand& operand); + + // Compare negative. + void cmn(const Register& rn, const Operand& operand); + + // Subtract. + void sub(const Register& rd, + const Register& rn, + const Operand& operand); + + // Subtract and update status flags. + void subs(const Register& rd, + const Register& rn, + const Operand& operand); + + // Compare. + void cmp(const Register& rn, const Operand& operand); + + // Negate. + void neg(const Register& rd, + const Operand& operand); + + // Negate and update status flags. + void negs(const Register& rd, + const Operand& operand); + + // Add with carry bit. + void adc(const Register& rd, + const Register& rn, + const Operand& operand); + + // Add with carry bit and update status flags. + void adcs(const Register& rd, + const Register& rn, + const Operand& operand); + + // Subtract with carry bit. + void sbc(const Register& rd, + const Register& rn, + const Operand& operand); + + // Subtract with carry bit and update status flags. + void sbcs(const Register& rd, + const Register& rn, + const Operand& operand); + + // Negate with carry bit. + void ngc(const Register& rd, + const Operand& operand); + + // Negate with carry bit and update status flags. + void ngcs(const Register& rd, + const Operand& operand); + + // Logical instructions. + // Bitwise and (A & B). + void and_(const Register& rd, + const Register& rn, + const Operand& operand); + + // Bitwise and (A & B) and update status flags. + BufferOffset ands(const Register& rd, + const Register& rn, + const Operand& operand); + + // Bit test and set flags. + BufferOffset tst(const Register& rn, const Operand& operand); + + // Bit clear (A & ~B). + void bic(const Register& rd, + const Register& rn, + const Operand& operand); + + // Bit clear (A & ~B) and update status flags. + void bics(const Register& rd, + const Register& rn, + const Operand& operand); + + // Bitwise or (A | B). + void orr(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise nor (A | ~B). + void orn(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise eor/xor (A ^ B). + void eor(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise enor/xnor (A ^ ~B). + void eon(const Register& rd, const Register& rn, const Operand& operand); + + // Logical shift left by variable. + void lslv(const Register& rd, const Register& rn, const Register& rm); + + // Logical shift right by variable. + void lsrv(const Register& rd, const Register& rn, const Register& rm); + + // Arithmetic shift right by variable. + void asrv(const Register& rd, const Register& rn, const Register& rm); + + // Rotate right by variable. + void rorv(const Register& rd, const Register& rn, const Register& rm); + + // Bitfield instructions. + // Bitfield move. + void bfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms); + + // Signed bitfield move. + void sbfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms); + + // Unsigned bitfield move. + void ubfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms); + + // Bfm aliases. + // Bitfield insert. + void bfi(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= rn.size()); + bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); + } + + // Bitfield extract and insert low. + void bfxil(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= rn.size()); + bfm(rd, rn, lsb, lsb + width - 1); + } + + // Sbfm aliases. + // Arithmetic shift right. + void asr(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(shift < rd.size()); + sbfm(rd, rn, shift, rd.size() - 1); + } + + // Signed bitfield insert with zero at right. + void sbfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= rn.size()); + sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); + } + + // Signed bitfield extract. + void sbfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= rn.size()); + sbfm(rd, rn, lsb, lsb + width - 1); + } + + // Signed extend byte. + void sxtb(const Register& rd, const Register& rn) { + sbfm(rd, rn, 0, 7); + } + + // Signed extend halfword. + void sxth(const Register& rd, const Register& rn) { + sbfm(rd, rn, 0, 15); + } + + // Signed extend word. + void sxtw(const Register& rd, const Register& rn) { + sbfm(rd, rn, 0, 31); + } + + // Ubfm aliases. + // Logical shift left. + void lsl(const Register& rd, const Register& rn, unsigned shift) { + unsigned reg_size = rd.size(); + VIXL_ASSERT(shift < reg_size); + ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); + } + + // Logical shift right. + void lsr(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(shift < rd.size()); + ubfm(rd, rn, shift, rd.size() - 1); + } + + // Unsigned bitfield insert with zero at right. + void ubfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= rn.size()); + ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); + } + + // Unsigned bitfield extract. + void ubfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= rn.size()); + ubfm(rd, rn, lsb, lsb + width - 1); + } + + // Unsigned extend byte. + void uxtb(const Register& rd, const Register& rn) { + ubfm(rd, rn, 0, 7); + } + + // Unsigned extend halfword. + void uxth(const Register& rd, const Register& rn) { + ubfm(rd, rn, 0, 15); + } + + // Unsigned extend word. + void uxtw(const Register& rd, const Register& rn) { + ubfm(rd, rn, 0, 31); + } + + // Extract. + void extr(const Register& rd, + const Register& rn, + const Register& rm, + unsigned lsb); + + // Conditional select: rd = cond ? rn : rm. + void csel(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond); + + // Conditional select increment: rd = cond ? rn : rm + 1. + void csinc(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond); + + // Conditional select inversion: rd = cond ? rn : ~rm. + void csinv(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond); + + // Conditional select negation: rd = cond ? rn : -rm. + void csneg(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond); + + // Conditional set: rd = cond ? 1 : 0. + void cset(const Register& rd, Condition cond); + + // Conditional set mask: rd = cond ? -1 : 0. + void csetm(const Register& rd, Condition cond); + + // Conditional increment: rd = cond ? rn + 1 : rn. + void cinc(const Register& rd, const Register& rn, Condition cond); + + // Conditional invert: rd = cond ? ~rn : rn. + void cinv(const Register& rd, const Register& rn, Condition cond); + + // Conditional negate: rd = cond ? -rn : rn. + void cneg(const Register& rd, const Register& rn, Condition cond); + + // Rotate right. + void ror(const Register& rd, const Register& rs, unsigned shift) { + extr(rd, rs, rs, shift); + } + + // Conditional comparison. + // Conditional compare negative. + void ccmn(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond); + + // Conditional compare. + void ccmp(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond); + + // CRC-32 checksum from byte. + void crc32b(const Register& rd, + const Register& rn, + const Register& rm); + + // CRC-32 checksum from half-word. + void crc32h(const Register& rd, + const Register& rn, + const Register& rm); + + // CRC-32 checksum from word. + void crc32w(const Register& rd, + const Register& rn, + const Register& rm); + + // CRC-32 checksum from double word. + void crc32x(const Register& rd, + const Register& rn, + const Register& rm); + + // CRC-32 C checksum from byte. + void crc32cb(const Register& rd, + const Register& rn, + const Register& rm); + + // CRC-32 C checksum from half-word. + void crc32ch(const Register& rd, + const Register& rn, + const Register& rm); + + // CRC-32 C checksum from word. + void crc32cw(const Register& rd, + const Register& rn, + const Register& rm); + + // CRC-32C checksum from double word. + void crc32cx(const Register& rd, + const Register& rn, + const Register& rm); + + // Multiply. + void mul(const Register& rd, const Register& rn, const Register& rm); + + // Negated multiply. + void mneg(const Register& rd, const Register& rn, const Register& rm); + + // Signed long multiply: 32 x 32 -> 64-bit. + void smull(const Register& rd, const Register& rn, const Register& rm); + + // Signed multiply high: 64 x 64 -> 64-bit <127:64>. + void smulh(const Register& xd, const Register& xn, const Register& xm); + + // Multiply and accumulate. + void madd(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra); + + // Multiply and subtract. + void msub(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra); + + // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit. + void smaddl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra); + + // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit. + void umaddl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra); + + // Unsigned long multiply: 32 x 32 -> 64-bit. + void umull(const Register& rd, + const Register& rn, + const Register& rm) { + umaddl(rd, rn, rm, xzr); + } + + // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>. + void umulh(const Register& xd, + const Register& xn, + const Register& xm); + + // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit. + void smsubl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra); + + // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit. + void umsubl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra); + + // Signed integer divide. + void sdiv(const Register& rd, const Register& rn, const Register& rm); + + // Unsigned integer divide. + void udiv(const Register& rd, const Register& rn, const Register& rm); + + // Bit reverse. + void rbit(const Register& rd, const Register& rn); + + // Reverse bytes in 16-bit half words. + void rev16(const Register& rd, const Register& rn); + + // Reverse bytes in 32-bit words. + void rev32(const Register& rd, const Register& rn); + + // Reverse bytes. + void rev(const Register& rd, const Register& rn); + + // Count leading zeroes. + void clz(const Register& rd, const Register& rn); + + // Count leading sign bits. + void cls(const Register& rd, const Register& rn); + + // Memory instructions. + // Load integer or FP register. + void ldr(const CPURegister& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Store integer or FP register. + void str(const CPURegister& rt, const MemOperand& dst, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load word with sign extension. + void ldrsw(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load byte. + void ldrb(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Store byte. + void strb(const Register& rt, const MemOperand& dst, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load byte with sign extension. + void ldrsb(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load half-word. + void ldrh(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Store half-word. + void strh(const Register& rt, const MemOperand& dst, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load half-word with sign extension. + void ldrsh(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load integer or FP register (with unscaled offset). + void ldur(const CPURegister& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Store integer or FP register (with unscaled offset). + void stur(const CPURegister& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load word with sign extension. + void ldursw(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load byte (with unscaled offset). + void ldurb(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Store byte (with unscaled offset). + void sturb(const Register& rt, const MemOperand& dst, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load byte with sign extension (and unscaled offset). + void ldursb(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load half-word (with unscaled offset). + void ldurh(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Store half-word (with unscaled offset). + void sturh(const Register& rt, const MemOperand& dst, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load half-word with sign extension (and unscaled offset). + void ldursh(const Register& rt, const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load integer or FP register pair. + void ldp(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& src); + + // Store integer or FP register pair. + void stp(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& dst); + + // Load word pair with sign extension. + void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src); + + // Load integer or FP register pair, non-temporal. + void ldnp(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& src); + + // Store integer or FP register pair, non-temporal. + void stnp(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& dst); + + // Load integer or FP register from pc + imm19 << 2. + void ldr(const CPURegister& rt, int imm19); + static void ldr(Instruction* at, const CPURegister& rt, int imm19); + + // Load word with sign extension from pc + imm19 << 2. + void ldrsw(const Register& rt, int imm19); + + // Store exclusive byte. + void stxrb(const Register& rs, const Register& rt, const MemOperand& dst); + + // Store exclusive half-word. + void stxrh(const Register& rs, const Register& rt, const MemOperand& dst); + + // Store exclusive register. + void stxr(const Register& rs, const Register& rt, const MemOperand& dst); + + // Load exclusive byte. + void ldxrb(const Register& rt, const MemOperand& src); + + // Load exclusive half-word. + void ldxrh(const Register& rt, const MemOperand& src); + + // Load exclusive register. + void ldxr(const Register& rt, const MemOperand& src); + + // Store exclusive register pair. + void stxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst); + + // Load exclusive register pair. + void ldxp(const Register& rt, const Register& rt2, const MemOperand& src); + + // Store-release exclusive byte. + void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst); + + // Store-release exclusive half-word. + void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst); + + // Store-release exclusive register. + void stlxr(const Register& rs, const Register& rt, const MemOperand& dst); + + // Load-acquire exclusive byte. + void ldaxrb(const Register& rt, const MemOperand& src); + + // Load-acquire exclusive half-word. + void ldaxrh(const Register& rt, const MemOperand& src); + + // Load-acquire exclusive register. + void ldaxr(const Register& rt, const MemOperand& src); + + // Store-release exclusive register pair. + void stlxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst); + + // Load-acquire exclusive register pair. + void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src); + + // Store-release byte. + void stlrb(const Register& rt, const MemOperand& dst); + + // Store-release half-word. + void stlrh(const Register& rt, const MemOperand& dst); + + // Store-release register. + void stlr(const Register& rt, const MemOperand& dst); + + // Load-acquire byte. + void ldarb(const Register& rt, const MemOperand& src); + + // Load-acquire half-word. + void ldarh(const Register& rt, const MemOperand& src); + + // Load-acquire register. + void ldar(const Register& rt, const MemOperand& src); + + // Compare and Swap word or doubleword in memory [Armv8.1]. + void cas(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap word or doubleword in memory [Armv8.1]. + void casa(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap word or doubleword in memory [Armv8.1]. + void casl(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap word or doubleword in memory [Armv8.1]. + void casal(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap byte in memory [Armv8.1]. + void casb(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap byte in memory [Armv8.1]. + void casab(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap byte in memory [Armv8.1]. + void caslb(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap byte in memory [Armv8.1]. + void casalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap halfword in memory [Armv8.1]. + void cash(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap halfword in memory [Armv8.1]. + void casah(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap halfword in memory [Armv8.1]. + void caslh(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap halfword in memory [Armv8.1]. + void casalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap Pair of words or doublewords in memory [Armv8.1]. + void casp(const Register& rs, + const Register& rs2, + const Register& rt, + const Register& rt2, + const MemOperand& src); + + // Compare and Swap Pair of words or doublewords in memory [Armv8.1]. + void caspa(const Register& rs, + const Register& rs2, + const Register& rt, + const Register& rt2, + const MemOperand& src); + + // Compare and Swap Pair of words or doublewords in memory [Armv8.1]. + void caspl(const Register& rs, + const Register& rs2, + const Register& rt, + const Register& rt2, + const MemOperand& src); + + // Compare and Swap Pair of words or doublewords in memory [Armv8.1]. + void caspal(const Register& rs, + const Register& rs2, + const Register& rt, + const Register& rt2, + const MemOperand& src); + + // Atomic add on byte in memory [Armv8.1] + void ldaddb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1] + void ldaddab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on byte in memory, with Store-release semantics [Armv8.1] + void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on byte in memory, with Load-acquire and Store-release semantics + // [Armv8.1] + void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on halfword in memory [Armv8.1] + void ldaddh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1] + void ldaddah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on halfword in memory, with Store-release semantics [Armv8.1] + void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on halfword in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on word or doubleword in memory [Armv8.1] + void ldadd(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on word or doubleword in memory, with Load-acquire semantics + // [Armv8.1] + void ldadda(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on word or doubleword in memory, with Store-release semantics + // [Armv8.1] + void ldaddl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on word or doubleword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldaddal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on byte in memory [Armv8.1] + void ldclrb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1] + void ldclrab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1] + void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on byte in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldclralb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on halfword in memory [Armv8.1] + void ldclrh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldclrah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on halfword in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldclralh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory [Armv8.1] + void ldclr(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldclra(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldclrl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldclral(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on byte in memory [Armv8.1] + void ldeorb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, with Load-acquire semantics + // [Armv8.1] + void ldeorab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, with Store-release semantics + // [Armv8.1] + void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory [Armv8.1] + void ldeorh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldeorah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory [Armv8.1] + void ldeor(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldeora(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldeorl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldeoral(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on byte in memory [Armv8.1] + void ldsetb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1] + void ldsetab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1] + void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on byte in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on halfword in memory [Armv8.1] + void ldseth(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1] + void ldsetah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on halfword in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory [Armv8.1] + void ldset(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, with Load-acquire semantics + // [Armv8.1] + void ldseta(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldsetl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsetal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on byte in memory [Armv8.1] + void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on byte in memory, with Load-acquire semantics + // [Armv8.1] + void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on byte in memory, with Store-release semantics + // [Armv8.1] + void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on byte in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on halfword in memory [Armv8.1] + void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory [Armv8.1] + void ldsmax(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, with Load-acquire + // and Store-release semantics [Armv8.1] + void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on byte in memory [Armv8.1] + void ldsminb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on byte in memory, with Load-acquire semantics + // [Armv8.1] + void ldsminab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on byte in memory, with Store-release semantics + // [Armv8.1] + void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on byte in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on halfword in memory [Armv8.1] + void ldsminh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldsminah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory [Armv8.1] + void ldsmin(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldsmina(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldsminl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, with Load-acquire + // and Store-release semantics [Armv8.1] + void ldsminal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory [Armv8.1] + void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, with Load-acquire semantics + // [Armv8.1] + void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, with Store-release semantics + // [Armv8.1] + void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory [Armv8.1] + void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory [Armv8.1] + void ldumax(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire + // and Store-release semantics [Armv8.1] + void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory [Armv8.1] + void lduminb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, with Load-acquire semantics + // [Armv8.1] + void lduminab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, with Store-release semantics + // [Armv8.1] + void lduminlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void lduminalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory [Armv8.1] + void lduminh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void lduminah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, with Store-release semantics + // [Armv8.1] + void lduminlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void lduminalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory [Armv8.1] + void ldumin(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldumina(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void lduminl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire + // and Store-release semantics [Armv8.1] + void lduminal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on byte in memory, without return. [Armv8.1] + void staddb(const Register& rs, const MemOperand& src); + + // Atomic add on byte in memory, with Store-release semantics and without + // return. [Armv8.1] + void staddlb(const Register& rs, const MemOperand& src); + + // Atomic add on halfword in memory, without return. [Armv8.1] + void staddh(const Register& rs, const MemOperand& src); + + // Atomic add on halfword in memory, with Store-release semantics and without + // return. [Armv8.1] + void staddlh(const Register& rs, const MemOperand& src); + + // Atomic add on word or doubleword in memory, without return. [Armv8.1] + void stadd(const Register& rs, const MemOperand& src); + + // Atomic add on word or doubleword in memory, with Store-release semantics + // and without return. [Armv8.1] + void staddl(const Register& rs, const MemOperand& src); + + // Atomic bit clear on byte in memory, without return. [Armv8.1] + void stclrb(const Register& rs, const MemOperand& src); + + // Atomic bit clear on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stclrlb(const Register& rs, const MemOperand& src); + + // Atomic bit clear on halfword in memory, without return. [Armv8.1] + void stclrh(const Register& rs, const MemOperand& src); + + // Atomic bit clear on halfword in memory, with Store-release semantics and + // without return. [Armv8.1] + void stclrlh(const Register& rs, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1] + void stclr(const Register& rs, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stclrl(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, without return. [Armv8.1] + void steorb(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void steorlb(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, without return. [Armv8.1] + void steorh(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void steorlh(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, without return. + // [Armv8.1] + void steor(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void steorl(const Register& rs, const MemOperand& src); + + // Atomic bit set on byte in memory, without return. [Armv8.1] + void stsetb(const Register& rs, const MemOperand& src); + + // Atomic bit set on byte in memory, with Store-release semantics and without + // return. [Armv8.1] + void stsetlb(const Register& rs, const MemOperand& src); + + // Atomic bit set on halfword in memory, without return. [Armv8.1] + void stseth(const Register& rs, const MemOperand& src); + + // Atomic bit set on halfword in memory, with Store-release semantics and + // without return. [Armv8.1] + void stsetlh(const Register& rs, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, without return. [Armv8.1] + void stset(const Register& rs, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stsetl(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on byte in memory, without return. [Armv8.1] + void stsmaxb(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stsmaxlb(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, without return. [Armv8.1] + void stsmaxh(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void stsmaxlh(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, without return. + // [Armv8.1] + void stsmax(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stsmaxl(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on byte in memory, without return. [Armv8.1] + void stsminb(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stsminlb(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, without return. [Armv8.1] + void stsminh(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void stsminlh(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, without return. + // [Armv8.1] + void stsmin(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, with Store-release + // semantics and without return. semantics [Armv8.1] + void stsminl(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, without return. [Armv8.1] + void stumaxb(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stumaxlb(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1] + void stumaxh(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void stumaxlh(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, without return. + // [Armv8.1] + void stumax(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stumaxl(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, without return. [Armv8.1] + void stuminb(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stuminlb(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1] + void stuminh(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void stuminlh(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, without return. + // [Armv8.1] + void stumin(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stuminl(const Register& rs, const MemOperand& src); + + // Swap byte in memory [Armv8.1] + void swpb(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap byte in memory, with Load-acquire semantics [Armv8.1] + void swpab(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap byte in memory, with Store-release semantics [Armv8.1] + void swplb(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap byte in memory, with Load-acquire and Store-release semantics + // [Armv8.1] + void swpalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap halfword in memory [Armv8.1] + void swph(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap halfword in memory, with Load-acquire semantics [Armv8.1] + void swpah(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap halfword in memory, with Store-release semantics [Armv8.1] + void swplh(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap halfword in memory, with Load-acquire and Store-release semantics + // [Armv8.1] + void swpalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap word or doubleword in memory [Armv8.1] + void swp(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1] + void swpa(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap word or doubleword in memory, with Store-release semantics [Armv8.1] + void swpl(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap word or doubleword in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void swpal(const Register& rs, const Register& rt, const MemOperand& src); + + // Prefetch memory. + void prfm(PrefetchOperation op, const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); + + // Prefetch memory (with unscaled offset). + void prfum(PrefetchOperation op, const MemOperand& addr, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Prefetch from pc + imm19 << 2. + void prfm(PrefetchOperation op, int imm19); + + // Move instructions. The default shift of -1 indicates that the move + // instruction will calculate an appropriate 16-bit immediate and left shift + // that is equal to the 64-bit immediate argument. If an explicit left shift + // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. + // + // For movk, an explicit shift can be used to indicate which half word should + // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant + // half word with zero, whereas movk(x0, 0, 48) will overwrite the + // most-significant. + + // Move immediate and keep. + void movk(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVK); + } + + // Move inverted immediate. + void movn(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVN); + } + + // Move immediate. + void movz(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVZ); + } + + // Misc instructions. + // Monitor debug-mode breakpoint. + void brk(int code); + + // Halting debug-mode breakpoint. + void hlt(int code); + + // Generate exception targeting EL1. + void svc(int code); + static void svc(Instruction* at, int code); + + // Move register to register. + void mov(const Register& rd, const Register& rn); + + // Move inverted operand to register. + void mvn(const Register& rd, const Operand& operand); + + // System instructions. + // Move to register from system register. + void mrs(const Register& rt, SystemRegister sysreg); + + // Move from register to system register. + void msr(SystemRegister sysreg, const Register& rt); + + // System instruction. + void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr); + + // System instruction with pre-encoded op (op1:crn:crm:op2). + void sys(int op, const Register& rt = xzr); + + // System data cache operation. + void dc(DataCacheOp op, const Register& rt); + + // System instruction cache operation. + void ic(InstructionCacheOp op, const Register& rt); + + // System hint. + BufferOffset hint(SystemHint code); + static void hint(Instruction* at, SystemHint code); + + // Clear exclusive monitor. + void clrex(int imm4 = 0xf); + + // Data memory barrier. + void dmb(BarrierDomain domain, BarrierType type); + + // Data synchronization barrier. + void dsb(BarrierDomain domain, BarrierType type); + + // Instruction synchronization barrier. + void isb(); + + // Alias for system instructions. + // No-op. + BufferOffset nop() { + return hint(NOP); + } + static void nop(Instruction* at); + + // Alias for system instructions. + // Conditional speculation barrier. + BufferOffset csdb() { + return hint(CSDB); + } + static void csdb(Instruction* at); + + // FP and NEON instructions. + // Move double precision immediate to FP register. + void fmov(const VRegister& vd, double imm); + + // Move single precision immediate to FP register. + void fmov(const VRegister& vd, float imm); + + // Move FP register to register. + void fmov(const Register& rd, const VRegister& fn); + + // Move register to FP register. + void fmov(const VRegister& vd, const Register& rn); + + // Move FP register to FP register. + void fmov(const VRegister& vd, const VRegister& fn); + + // Move 64-bit register to top half of 128-bit FP register. + void fmov(const VRegister& vd, int index, const Register& rn); + + // Move top half of 128-bit FP register to 64-bit register. + void fmov(const Register& rd, const VRegister& vn, int index); + + // FP add. + void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP subtract. + void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP multiply. + void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-add. + void fmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va); + + // FP fused multiply-subtract. + void fmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va); + + // FP fused multiply-add and negate. + void fnmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va); + + // FP fused multiply-subtract and negate. + void fnmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va); + + // FP multiply-negate scalar. + void fnmul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP reciprocal exponent scalar. + void frecpx(const VRegister& vd, + const VRegister& vn); + + // FP divide. + void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP maximum. + void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP minimum. + void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP maximum number. + void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP minimum number. + void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP absolute. + void fabs(const VRegister& vd, const VRegister& vn); + + // FP negate. + void fneg(const VRegister& vd, const VRegister& vn); + + // FP square root. + void fsqrt(const VRegister& vd, const VRegister& vn); + + // FP round to integer, nearest with ties to away. + void frinta(const VRegister& vd, const VRegister& vn); + + // FP round to integer, implicit rounding. + void frinti(const VRegister& vd, const VRegister& vn); + + // FP round to integer, toward minus infinity. + void frintm(const VRegister& vd, const VRegister& vn); + + // FP round to integer, nearest with ties to even. + void frintn(const VRegister& vd, const VRegister& vn); + + // FP round to integer, toward plus infinity. + void frintp(const VRegister& vd, const VRegister& vn); + + // FP round to integer, exact, implicit rounding. + void frintx(const VRegister& vd, const VRegister& vn); + + // FP round to integer, towards zero. + void frintz(const VRegister& vd, const VRegister& vn); + + void FPCompareMacro(const VRegister& vn, + double value, + FPTrapFlags trap); + + void FPCompareMacro(const VRegister& vn, + const VRegister& vm, + FPTrapFlags trap); + + // FP compare registers. + void fcmp(const VRegister& vn, const VRegister& vm); + + // FP compare immediate. + void fcmp(const VRegister& vn, double value); + + void FPCCompareMacro(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond, + FPTrapFlags trap); + + // FP conditional compare. + void fccmp(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond); + + // FP signaling compare registers. + void fcmpe(const VRegister& vn, const VRegister& vm); + + // FP signaling compare immediate. + void fcmpe(const VRegister& vn, double value); + + // FP conditional signaling compare. + void fccmpe(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond); + + // FP conditional select. + void fcsel(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Condition cond); + + // Common FP Convert functions. + void NEONFPConvertToInt(const Register& rd, + const VRegister& vn, + Instr op); + void NEONFPConvertToInt(const VRegister& vd, + const VRegister& vn, + Instr op); + + // FP convert between precisions. + void fcvt(const VRegister& vd, const VRegister& vn); + + // FP convert to higher precision. + void fcvtl(const VRegister& vd, const VRegister& vn); + + // FP convert to higher precision (second part). + void fcvtl2(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision. + void fcvtn(const VRegister& vd, const VRegister& vn); + + // FP convert to lower prevision (second part). + void fcvtn2(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision, rounding to odd. + void fcvtxn(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision, rounding to odd (second part). + void fcvtxn2(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to away. + void fcvtas(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to away. + void fcvtau(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to away. + void fcvtas(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to away. + void fcvtau(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, round towards -infinity. + void fcvtms(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, round towards -infinity. + void fcvtmu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, round towards -infinity. + void fcvtms(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, round towards -infinity. + void fcvtmu(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to even. + void fcvtns(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to even. + void fcvtnu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to even. + void fcvtns(const VRegister& rd, const VRegister& vn); + + // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3]. + void fjcvtzs(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to even. + void fcvtnu(const VRegister& rd, const VRegister& vn); + + // FP convert to signed integer or fixed-point, round towards zero. + void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); + + // FP convert to unsigned integer or fixed-point, round towards zero. + void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); + + // FP convert to signed integer or fixed-point, round towards zero. + void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); + + // FP convert to unsigned integer or fixed-point, round towards zero. + void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); + + // FP convert to signed integer, round towards +infinity. + void fcvtps(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, round towards +infinity. + void fcvtpu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, round towards +infinity. + void fcvtps(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, round towards +infinity. + void fcvtpu(const VRegister& vd, const VRegister& vn); + + // Convert signed integer or fixed point to FP. + void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); + + // Convert unsigned integer or fixed point to FP. + void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); + + // Convert signed integer or fixed-point to FP. + void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); + + // Convert unsigned integer or fixed-point to FP. + void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); + + // Unsigned absolute difference. + void uabd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed absolute difference. + void sabd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned absolute difference and accumulate. + void uaba(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed absolute difference and accumulate. + void saba(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Add. + void add(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Subtract. + void sub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned halving add. + void uhadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed halving add. + void shadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned rounding halving add. + void urhadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed rounding halving add. + void srhadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned halving sub. + void uhsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed halving sub. + void shsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned saturating add. + void uqadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating add. + void sqadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned saturating subtract. + void uqsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating subtract. + void sqsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Add pairwise. + void addp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Add pair of elements scalar. + void addp(const VRegister& vd, + const VRegister& vn); + + // Multiply-add to accumulator. + void mla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Multiply-subtract to accumulator. + void mls(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Multiply. + void mul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Multiply by scalar element. + void mul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Multiply-add by scalar element. + void mla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Multiply-subtract by scalar element. + void mls(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply-add by scalar element. + void smlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply-add by scalar element (second part). + void smlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply-add by scalar element. + void umlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply-add by scalar element (second part). + void umlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply-sub by scalar element. + void smlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply-sub by scalar element (second part). + void smlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply-sub by scalar element. + void umlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply-sub by scalar element (second part). + void umlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply by scalar element. + void smull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply by scalar element (second part). + void smull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply by scalar element. + void umull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply by scalar element (second part). + void umull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating double long multiply by element. + void sqdmull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating double long multiply by element (second part). + void sqdmull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-add by element. + void sqdmlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-add by element (second part). + void sqdmlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-sub by element. + void sqdmlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-sub by element (second part). + void sqdmlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Compare equal. + void cmeq(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Compare signed greater than or equal. + void cmge(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Compare signed greater than. + void cmgt(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Compare unsigned higher. + void cmhi(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Compare unsigned higher or same. + void cmhs(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Compare bitwise test bits nonzero. + void cmtst(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Compare bitwise to zero. + void cmeq(const VRegister& vd, + const VRegister& vn, + int value); + + // Compare signed greater than or equal to zero. + void cmge(const VRegister& vd, + const VRegister& vn, + int value); + + // Compare signed greater than zero. + void cmgt(const VRegister& vd, + const VRegister& vn, + int value); + + // Compare signed less than or equal to zero. + void cmle(const VRegister& vd, + const VRegister& vn, + int value); + + // Compare signed less than zero. + void cmlt(const VRegister& vd, + const VRegister& vn, + int value); + + // Signed shift left by register. + void sshl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned shift left by register. + void ushl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating shift left by register. + void sqshl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned saturating shift left by register. + void uqshl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed rounding shift left by register. + void srshl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned rounding shift left by register. + void urshl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating rounding shift left by register. + void sqrshl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned saturating rounding shift left by register. + void uqrshl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Bitwise and. + void and_(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Bitwise or. + void orr(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Bitwise or immediate. + void orr(const VRegister& vd, + const int imm8, + const int left_shift = 0); + + // Move register to register. + void mov(const VRegister& vd, + const VRegister& vn); + + // Bitwise orn. + void orn(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Bitwise eor. + void eor(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Bit clear immediate. + void bic(const VRegister& vd, + const int imm8, + const int left_shift = 0); + + // Bit clear. + void bic(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Bitwise insert if false. + void bif(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Bitwise insert if true. + void bit(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Bitwise select. + void bsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Polynomial multiply. + void pmul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Vector move immediate. + void movi(const VRegister& vd, + const uint64_t imm, + Shift shift = LSL, + const int shift_amount = 0); + + // Bitwise not. + void mvn(const VRegister& vd, + const VRegister& vn); + + // Vector move inverted immediate. + void mvni(const VRegister& vd, + const int imm8, + Shift shift = LSL, + const int shift_amount = 0); + + // Signed saturating accumulate of unsigned value. + void suqadd(const VRegister& vd, + const VRegister& vn); + + // Unsigned saturating accumulate of signed value. + void usqadd(const VRegister& vd, + const VRegister& vn); + + // Absolute value. + void abs(const VRegister& vd, + const VRegister& vn); + + // Signed saturating absolute value. + void sqabs(const VRegister& vd, + const VRegister& vn); + + // Negate. + void neg(const VRegister& vd, + const VRegister& vn); + + // Signed saturating negate. + void sqneg(const VRegister& vd, + const VRegister& vn); + + // Bitwise not. + void not_(const VRegister& vd, + const VRegister& vn); + + // Extract narrow. + void xtn(const VRegister& vd, + const VRegister& vn); + + // Extract narrow (second part). + void xtn2(const VRegister& vd, + const VRegister& vn); + + // Signed saturating extract narrow. + void sqxtn(const VRegister& vd, + const VRegister& vn); + + // Signed saturating extract narrow (second part). + void sqxtn2(const VRegister& vd, + const VRegister& vn); + + // Unsigned saturating extract narrow. + void uqxtn(const VRegister& vd, + const VRegister& vn); + + // Unsigned saturating extract narrow (second part). + void uqxtn2(const VRegister& vd, + const VRegister& vn); + + // Signed saturating extract unsigned narrow. + void sqxtun(const VRegister& vd, + const VRegister& vn); + + // Signed saturating extract unsigned narrow (second part). + void sqxtun2(const VRegister& vd, + const VRegister& vn); + + // Extract vector from pair of vectors. + void ext(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int index); + + // Duplicate vector element to vector or scalar. + void dup(const VRegister& vd, + const VRegister& vn, + int vn_index); + + // Move vector element to scalar. + void mov(const VRegister& vd, + const VRegister& vn, + int vn_index); + + // Duplicate general-purpose register to vector. + void dup(const VRegister& vd, + const Register& rn); + + // Insert vector element from another vector element. + void ins(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index); + + // Move vector element to another vector element. + void mov(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index); + + // Insert vector element from general-purpose register. + void ins(const VRegister& vd, + int vd_index, + const Register& rn); + + // Move general-purpose register to a vector element. + void mov(const VRegister& vd, + int vd_index, + const Register& rn); + + // Unsigned move vector element to general-purpose register. + void umov(const Register& rd, + const VRegister& vn, + int vn_index); + + // Move vector element to general-purpose register. + void mov(const Register& rd, + const VRegister& vn, + int vn_index); + + // Signed move vector element to general-purpose register. + void smov(const Register& rd, + const VRegister& vn, + int vn_index); + + // One-element structure load to one register. + void ld1(const VRegister& vt, + const MemOperand& src); + + // One-element structure load to two registers. + void ld1(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src); + + // One-element structure load to three registers. + void ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // One-element structure load to four registers. + void ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // One-element single structure load to one lane. + void ld1(const VRegister& vt, + int lane, + const MemOperand& src); + + // One-element single structure load to all lanes. + void ld1r(const VRegister& vt, + const MemOperand& src); + + // Two-element structure load. + void ld2(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src); + + // Two-element single structure load to one lane. + void ld2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& src); + + // Two-element single structure load to all lanes. + void ld2r(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src); + + // Three-element structure load. + void ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // Three-element single structure load to one lane. + void ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& src); + + // Three-element single structure load to all lanes. + void ld3r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // Four-element structure load. + void ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // Four-element single structure load to one lane. + void ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& src); + + // Four-element single structure load to all lanes. + void ld4r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // Count leading sign bits. + void cls(const VRegister& vd, + const VRegister& vn); + + // Count leading zero bits (vector). + void clz(const VRegister& vd, + const VRegister& vn); + + // Population count per byte. + void cnt(const VRegister& vd, + const VRegister& vn); + + // Reverse bit order. + void rbit(const VRegister& vd, + const VRegister& vn); + + // Reverse elements in 16-bit halfwords. + void rev16(const VRegister& vd, + const VRegister& vn); + + // Reverse elements in 32-bit words. + void rev32(const VRegister& vd, + const VRegister& vn); + + // Reverse elements in 64-bit doublewords. + void rev64(const VRegister& vd, + const VRegister& vn); + + // Unsigned reciprocal square root estimate. + void ursqrte(const VRegister& vd, + const VRegister& vn); + + // Unsigned reciprocal estimate. + void urecpe(const VRegister& vd, + const VRegister& vn); + + // Signed pairwise long add. + void saddlp(const VRegister& vd, + const VRegister& vn); + + // Unsigned pairwise long add. + void uaddlp(const VRegister& vd, + const VRegister& vn); + + // Signed pairwise long add and accumulate. + void sadalp(const VRegister& vd, + const VRegister& vn); + + // Unsigned pairwise long add and accumulate. + void uadalp(const VRegister& vd, + const VRegister& vn); + + // Shift left by immediate. + void shl(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed saturating shift left by immediate. + void sqshl(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed saturating shift left unsigned by immediate. + void sqshlu(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned saturating shift left by immediate. + void uqshl(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed shift left long by immediate. + void sshll(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed shift left long by immediate (second part). + void sshll2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed extend long. + void sxtl(const VRegister& vd, + const VRegister& vn); + + // Signed extend long (second part). + void sxtl2(const VRegister& vd, + const VRegister& vn); + + // Unsigned shift left long by immediate. + void ushll(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned shift left long by immediate (second part). + void ushll2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Shift left long by element size. + void shll(const VRegister& vd, + const VRegister& vn, + int shift); + + // Shift left long by element size (second part). + void shll2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned extend long. + void uxtl(const VRegister& vd, + const VRegister& vn); + + // Unsigned extend long (second part). + void uxtl2(const VRegister& vd, + const VRegister& vn); + + // Shift left by immediate and insert. + void sli(const VRegister& vd, + const VRegister& vn, + int shift); + + // Shift right by immediate and insert. + void sri(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed maximum. + void smax(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed pairwise maximum. + void smaxp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Add across vector. + void addv(const VRegister& vd, + const VRegister& vn); + + // Signed add long across vector. + void saddlv(const VRegister& vd, + const VRegister& vn); + + // Unsigned add long across vector. + void uaddlv(const VRegister& vd, + const VRegister& vn); + + // FP maximum number across vector. + void fmaxnmv(const VRegister& vd, + const VRegister& vn); + + // FP maximum across vector. + void fmaxv(const VRegister& vd, + const VRegister& vn); + + // FP minimum number across vector. + void fminnmv(const VRegister& vd, + const VRegister& vn); + + // FP minimum across vector. + void fminv(const VRegister& vd, + const VRegister& vn); + + // Signed maximum across vector. + void smaxv(const VRegister& vd, + const VRegister& vn); + + // Signed minimum. + void smin(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed minimum pairwise. + void sminp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed minimum across vector. + void sminv(const VRegister& vd, + const VRegister& vn); + + // One-element structure store from one register. + void st1(const VRegister& vt, + const MemOperand& src); + + // One-element structure store from two registers. + void st1(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src); + + // One-element structure store from three registers. + void st1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // One-element structure store from four registers. + void st1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // One-element single structure store from one lane. + void st1(const VRegister& vt, + int lane, + const MemOperand& src); + + // Two-element structure store from two registers. + void st2(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src); + + // Two-element single structure store from two lanes. + void st2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& src); + + // Three-element structure store from three registers. + void st3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // Three-element single structure store from three lanes. + void st3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& src); + + // Four-element structure store from four registers. + void st4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // Four-element single structure store from four lanes. + void st4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& src); + + // Unsigned add long. + void uaddl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned add long (second part). + void uaddl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned add wide. + void uaddw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned add wide (second part). + void uaddw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed add long. + void saddl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed add long (second part). + void saddl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed add wide. + void saddw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed add wide (second part). + void saddw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned subtract long. + void usubl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned subtract long (second part). + void usubl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned subtract wide. + void usubw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned subtract wide (second part). + void usubw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed subtract long. + void ssubl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed subtract long (second part). + void ssubl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed integer subtract wide. + void ssubw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed integer subtract wide (second part). + void ssubw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned maximum. + void umax(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned pairwise maximum. + void umaxp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned maximum across vector. + void umaxv(const VRegister& vd, + const VRegister& vn); + + // Unsigned minimum. + void umin(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned pairwise minimum. + void uminp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned minimum across vector. + void uminv(const VRegister& vd, + const VRegister& vn); + + // Transpose vectors (primary). + void trn1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Transpose vectors (secondary). + void trn2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unzip vectors (primary). + void uzp1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unzip vectors (secondary). + void uzp2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Zip vectors (primary). + void zip1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Zip vectors (secondary). + void zip2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed shift right by immediate. + void sshr(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned shift right by immediate. + void ushr(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed rounding shift right by immediate. + void srshr(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned rounding shift right by immediate. + void urshr(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed shift right by immediate and accumulate. + void ssra(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned shift right by immediate and accumulate. + void usra(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed rounding shift right by immediate and accumulate. + void srsra(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned rounding shift right by immediate and accumulate. + void ursra(const VRegister& vd, + const VRegister& vn, + int shift); + + // Shift right narrow by immediate. + void shrn(const VRegister& vd, + const VRegister& vn, + int shift); + + // Shift right narrow by immediate (second part). + void shrn2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Rounding shift right narrow by immediate. + void rshrn(const VRegister& vd, + const VRegister& vn, + int shift); + + // Rounding shift right narrow by immediate (second part). + void rshrn2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned saturating shift right narrow by immediate. + void uqshrn(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned saturating shift right narrow by immediate (second part). + void uqshrn2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned saturating rounding shift right narrow by immediate. + void uqrshrn(const VRegister& vd, + const VRegister& vn, + int shift); + + // Unsigned saturating rounding shift right narrow by immediate (second part). + void uqrshrn2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed saturating shift right narrow by immediate. + void sqshrn(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed saturating shift right narrow by immediate (second part). + void sqshrn2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed saturating rounded shift right narrow by immediate. + void sqrshrn(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed saturating rounded shift right narrow by immediate (second part). + void sqrshrn2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed saturating shift right unsigned narrow by immediate. + void sqshrun(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed saturating shift right unsigned narrow by immediate (second part). + void sqshrun2(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed sat rounded shift right unsigned narrow by immediate. + void sqrshrun(const VRegister& vd, + const VRegister& vn, + int shift); + + // Signed sat rounded shift right unsigned narrow by immediate (second part). + void sqrshrun2(const VRegister& vd, + const VRegister& vn, + int shift); + + // FP reciprocal step. + void frecps(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP reciprocal estimate. + void frecpe(const VRegister& vd, + const VRegister& vn); + + // FP reciprocal square root estimate. + void frsqrte(const VRegister& vd, + const VRegister& vn); + + // FP reciprocal square root step. + void frsqrts(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed absolute difference and accumulate long. + void sabal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed absolute difference and accumulate long (second part). + void sabal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned absolute difference and accumulate long. + void uabal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned absolute difference and accumulate long (second part). + void uabal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed absolute difference long. + void sabdl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed absolute difference long (second part). + void sabdl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned absolute difference long. + void uabdl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned absolute difference long (second part). + void uabdl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Polynomial multiply long. + void pmull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Polynomial multiply long (second part). + void pmull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed long multiply-add. + void smlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed long multiply-add (second part). + void smlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned long multiply-add. + void umlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned long multiply-add (second part). + void umlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed long multiply-sub. + void smlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed long multiply-sub (second part). + void smlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned long multiply-sub. + void umlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned long multiply-sub (second part). + void umlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed long multiply. + void smull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed long multiply (second part). + void smull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating doubling long multiply-add. + void sqdmlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating doubling long multiply-add (second part). + void sqdmlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating doubling long multiply-subtract. + void sqdmlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating doubling long multiply-subtract (second part). + void sqdmlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating doubling long multiply. + void sqdmull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating doubling long multiply (second part). + void sqdmull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating doubling multiply returning high half. + void sqdmulh(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating rounding doubling multiply returning high half. + void sqrdmulh(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Signed saturating doubling multiply element returning high half. + void sqdmulh(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating rounding doubling multiply element returning high half. + void sqrdmulh(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply long. + void umull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Unsigned long multiply (second part). + void umull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Add narrow returning high half. + void addhn(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Add narrow returning high half (second part). + void addhn2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Rounding add narrow returning high half. + void raddhn(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Rounding add narrow returning high half (second part). + void raddhn2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Subtract narrow returning high half. + void subhn(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Subtract narrow returning high half (second part). + void subhn2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Rounding subtract narrow returning high half. + void rsubhn(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // Rounding subtract narrow returning high half (second part). + void rsubhn2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP vector multiply accumulate. + void fmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP vector multiply subtract. + void fmls(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP vector multiply extended. + void fmulx(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP absolute greater than or equal. + void facge(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP absolute greater than. + void facgt(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP multiply by element. + void fmul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP fused multiply-add to accumulator by element. + void fmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP fused multiply-sub from accumulator by element. + void fmls(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP multiply extended by element. + void fmulx(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP compare equal. + void fcmeq(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP greater than. + void fcmgt(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP greater than or equal. + void fcmge(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP compare equal to zero. + void fcmeq(const VRegister& vd, + const VRegister& vn, + double imm); + + // FP greater than zero. + void fcmgt(const VRegister& vd, + const VRegister& vn, + double imm); + + // FP greater than or equal to zero. + void fcmge(const VRegister& vd, + const VRegister& vn, + double imm); + + // FP less than or equal to zero. + void fcmle(const VRegister& vd, + const VRegister& vn, + double imm); + + // FP less than to zero. + void fcmlt(const VRegister& vd, + const VRegister& vn, + double imm); + + // FP absolute difference. + void fabd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP pairwise add vector. + void faddp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP pairwise add scalar. + void faddp(const VRegister& vd, + const VRegister& vn); + + // FP pairwise maximum vector. + void fmaxp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP pairwise maximum scalar. + void fmaxp(const VRegister& vd, + const VRegister& vn); + + // FP pairwise minimum vector. + void fminp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP pairwise minimum scalar. + void fminp(const VRegister& vd, + const VRegister& vn); + + // FP pairwise maximum number vector. + void fmaxnmp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP pairwise maximum number scalar. + void fmaxnmp(const VRegister& vd, + const VRegister& vn); + + // FP pairwise minimum number vector. + void fminnmp(const VRegister& vd, + const VRegister& vn, + const VRegister& vm); + + // FP pairwise minimum number scalar. + void fminnmp(const VRegister& vd, + const VRegister& vn); + + // Emit generic instructions. + // Emit raw instructions into the instruction stream. + void dci(Instr raw_inst) { Emit(raw_inst); } + + // Emit 32 bits of data into the instruction stream. + void dc32(uint32_t data) { + EmitData(&data, sizeof(data)); + } + + // Emit 64 bits of data into the instruction stream. + void dc64(uint64_t data) { + EmitData(&data, sizeof(data)); + } + + // Code generation helpers. + + // Register encoding. + static Instr Rd(CPURegister rd) { + VIXL_ASSERT(rd.code() != kSPRegInternalCode); + return rd.code() << Rd_offset; + } + + static Instr Rn(CPURegister rn) { + VIXL_ASSERT(rn.code() != kSPRegInternalCode); + return rn.code() << Rn_offset; + } + + static Instr Rm(CPURegister rm) { + VIXL_ASSERT(rm.code() != kSPRegInternalCode); + return rm.code() << Rm_offset; + } + + static Instr RmNot31(CPURegister rm) { + VIXL_ASSERT(rm.code() != kSPRegInternalCode); + VIXL_ASSERT(!rm.IsZero()); + return Rm(rm); + } + + static Instr Ra(CPURegister ra) { + VIXL_ASSERT(ra.code() != kSPRegInternalCode); + return ra.code() << Ra_offset; + } + + static Instr Rt(CPURegister rt) { + VIXL_ASSERT(rt.code() != kSPRegInternalCode); + return rt.code() << Rt_offset; + } + + static Instr Rt2(CPURegister rt2) { + VIXL_ASSERT(rt2.code() != kSPRegInternalCode); + return rt2.code() << Rt2_offset; + } + + static Instr Rs(CPURegister rs) { + VIXL_ASSERT(rs.code() != kSPRegInternalCode); + return rs.code() << Rs_offset; + } + + // These encoding functions allow the stack pointer to be encoded, and + // disallow the zero register. + static Instr RdSP(Register rd) { + VIXL_ASSERT(!rd.IsZero()); + return (rd.code() & kRegCodeMask) << Rd_offset; + } + + static Instr RnSP(Register rn) { + VIXL_ASSERT(!rn.IsZero()); + return (rn.code() & kRegCodeMask) << Rn_offset; + } + + // Flags encoding. + static Instr Flags(FlagsUpdate S) { + if (S == SetFlags) { + return 1 << FlagsUpdate_offset; + } else if (S == LeaveFlags) { + return 0 << FlagsUpdate_offset; + } + VIXL_UNREACHABLE(); + return 0; + } + + static Instr Cond(Condition cond) { + return cond << Condition_offset; + } + + // PC-relative address encoding. + static Instr ImmPCRelAddress(int imm21) { + VIXL_ASSERT(IsInt21(imm21)); + Instr imm = static_cast<Instr>(TruncateToUint21(imm21)); + Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset; + Instr immlo = imm << ImmPCRelLo_offset; + return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask); + } + + // Branch encoding. + static Instr ImmUncondBranch(int imm26) { + VIXL_ASSERT(IsInt26(imm26)); + return TruncateToUint26(imm26) << ImmUncondBranch_offset; + } + + static Instr ImmCondBranch(int imm19) { + VIXL_ASSERT(IsInt19(imm19)); + return TruncateToUint19(imm19) << ImmCondBranch_offset; + } + + static Instr ImmCmpBranch(int imm19) { + VIXL_ASSERT(IsInt19(imm19)); + return TruncateToUint19(imm19) << ImmCmpBranch_offset; + } + + static Instr ImmTestBranch(int imm14) { + VIXL_ASSERT(IsInt14(imm14)); + return TruncateToUint14(imm14) << ImmTestBranch_offset; + } + + static Instr ImmTestBranchBit(unsigned bit_pos) { + VIXL_ASSERT(IsUint6(bit_pos)); + // Subtract five from the shift offset, as we need bit 5 from bit_pos. + unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5); + unsigned b40 = bit_pos << ImmTestBranchBit40_offset; + b5 &= ImmTestBranchBit5_mask; + b40 &= ImmTestBranchBit40_mask; + return b5 | b40; + } + + // Data Processing encoding. + static Instr SF(Register rd) { + return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits; + } + + static Instr ImmAddSub(int imm) { + VIXL_ASSERT(IsImmAddSub(imm)); + if (IsUint12(imm)) { // No shift required. + imm <<= ImmAddSub_offset; + } else { + imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset); + } + return imm; + } + + static Instr ImmS(unsigned imms, unsigned reg_size) { + VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) || + ((reg_size == kWRegSize) && IsUint5(imms))); + USE(reg_size); + return imms << ImmS_offset; + } + + static Instr ImmR(unsigned immr, unsigned reg_size) { + VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) || + ((reg_size == kWRegSize) && IsUint5(immr))); + USE(reg_size); + VIXL_ASSERT(IsUint6(immr)); + return immr << ImmR_offset; + } + + static Instr ImmSetBits(unsigned imms, unsigned reg_size) { + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + VIXL_ASSERT(IsUint6(imms)); + VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3)); + USE(reg_size); + return imms << ImmSetBits_offset; + } + + static Instr ImmRotate(unsigned immr, unsigned reg_size) { + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) || + ((reg_size == kWRegSize) && IsUint5(immr))); + USE(reg_size); + return immr << ImmRotate_offset; + } + + static Instr ImmLLiteral(int imm19) { + VIXL_ASSERT(IsInt19(imm19)); + return TruncateToUint19(imm19) << ImmLLiteral_offset; + } + + static Instr BitN(unsigned bitn, unsigned reg_size) { + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0)); + USE(reg_size); + return bitn << BitN_offset; + } + + static Instr ShiftDP(Shift shift) { + VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR); + return shift << ShiftDP_offset; + } + + static Instr ImmDPShift(unsigned amount) { + VIXL_ASSERT(IsUint6(amount)); + return amount << ImmDPShift_offset; + } + + static Instr ExtendMode(Extend extend) { + return extend << ExtendMode_offset; + } + + static Instr ImmExtendShift(unsigned left_shift) { + VIXL_ASSERT(left_shift <= 4); + return left_shift << ImmExtendShift_offset; + } + + static Instr ImmCondCmp(unsigned imm) { + VIXL_ASSERT(IsUint5(imm)); + return imm << ImmCondCmp_offset; + } + + static Instr Nzcv(StatusFlags nzcv) { + return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset; + } + + // MemOperand offset encoding. + static Instr ImmLSUnsigned(int imm12) { + VIXL_ASSERT(IsUint12(imm12)); + return imm12 << ImmLSUnsigned_offset; + } + + static Instr ImmLS(int imm9) { + VIXL_ASSERT(IsInt9(imm9)); + return TruncateToUint9(imm9) << ImmLS_offset; + } + + static Instr ImmLSPair(int imm7, unsigned access_size) { + VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7); + int scaled_imm7 = imm7 >> access_size; + VIXL_ASSERT(IsInt7(scaled_imm7)); + return TruncateToUint7(scaled_imm7) << ImmLSPair_offset; + } + + static Instr ImmShiftLS(unsigned shift_amount) { + VIXL_ASSERT(IsUint1(shift_amount)); + return shift_amount << ImmShiftLS_offset; + } + + static Instr ImmPrefetchOperation(int imm5) { + VIXL_ASSERT(IsUint5(imm5)); + return imm5 << ImmPrefetchOperation_offset; + } + + static Instr ImmException(int imm16) { + VIXL_ASSERT(IsUint16(imm16)); + return imm16 << ImmException_offset; + } + + static Instr ImmSystemRegister(int imm15) { + VIXL_ASSERT(IsUint15(imm15)); + return imm15 << ImmSystemRegister_offset; + } + + static Instr ImmHint(int imm7) { + VIXL_ASSERT(IsUint7(imm7)); + return imm7 << ImmHint_offset; + } + + static Instr CRm(int imm4) { + VIXL_ASSERT(IsUint4(imm4)); + return imm4 << CRm_offset; + } + + static Instr CRn(int imm4) { + VIXL_ASSERT(IsUint4(imm4)); + return imm4 << CRn_offset; + } + + static Instr SysOp(int imm14) { + VIXL_ASSERT(IsUint14(imm14)); + return imm14 << SysOp_offset; + } + + static Instr ImmSysOp1(int imm3) { + VIXL_ASSERT(IsUint3(imm3)); + return imm3 << SysOp1_offset; + } + + static Instr ImmSysOp2(int imm3) { + VIXL_ASSERT(IsUint3(imm3)); + return imm3 << SysOp2_offset; + } + + static Instr ImmBarrierDomain(int imm2) { + VIXL_ASSERT(IsUint2(imm2)); + return imm2 << ImmBarrierDomain_offset; + } + + static Instr ImmBarrierType(int imm2) { + VIXL_ASSERT(IsUint2(imm2)); + return imm2 << ImmBarrierType_offset; + } + + // Move immediates encoding. + static Instr ImmMoveWide(uint64_t imm) { + VIXL_ASSERT(IsUint16(imm)); + return static_cast<Instr>(imm << ImmMoveWide_offset); + } + + static Instr ShiftMoveWide(int64_t shift) { + VIXL_ASSERT(IsUint2(shift)); + return static_cast<Instr>(shift << ShiftMoveWide_offset); + } + + // FP Immediates. + static Instr ImmFP32(float imm); + static Instr ImmFP64(double imm); + + // FP register type. + static Instr FPType(FPRegister fd) { + return fd.Is64Bits() ? FP64 : FP32; + } + + static Instr FPScale(unsigned scale) { + VIXL_ASSERT(IsUint6(scale)); + return scale << FPScale_offset; + } + + // Immediate field checking helpers. + static bool IsImmAddSub(int64_t immediate); + static bool IsImmConditionalCompare(int64_t immediate); + static bool IsImmFP32(float imm); + static bool IsImmFP64(double imm); + static bool IsImmLogical(uint64_t value, + unsigned width, + unsigned* n = NULL, + unsigned* imm_s = NULL, + unsigned* imm_r = NULL); + static bool IsImmLSPair(int64_t offset, unsigned access_size); + static bool IsImmLSScaled(int64_t offset, unsigned access_size); + static bool IsImmLSUnscaled(int64_t offset); + static bool IsImmMovn(uint64_t imm, unsigned reg_size); + static bool IsImmMovz(uint64_t imm, unsigned reg_size); + + // Instruction bits for vector format in data processing operations. + static Instr VFormat(VRegister vd) { + if (vd.Is64Bits()) { + switch (vd.lanes()) { + case 2: return NEON_2S; + case 4: return NEON_4H; + case 8: return NEON_8B; + default: return 0xffffffff; + } + } else { + VIXL_ASSERT(vd.Is128Bits()); + switch (vd.lanes()) { + case 2: return NEON_2D; + case 4: return NEON_4S; + case 8: return NEON_8H; + case 16: return NEON_16B; + default: return 0xffffffff; + } + } + } + + // Instruction bits for vector format in floating point data processing + // operations. + static Instr FPFormat(VRegister vd) { + if (vd.lanes() == 1) { + // Floating point scalar formats. + VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits()); + return vd.Is64Bits() ? FP64 : FP32; + } + + // Two lane floating point vector formats. + if (vd.lanes() == 2) { + VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits()); + return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; + } + + // Four lane floating point vector format. + VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits()); + return NEON_FP_4S; + } + + // Instruction bits for vector format in load and store operations. + static Instr LSVFormat(VRegister vd) { + if (vd.Is64Bits()) { + switch (vd.lanes()) { + case 1: return LS_NEON_1D; + case 2: return LS_NEON_2S; + case 4: return LS_NEON_4H; + case 8: return LS_NEON_8B; + default: return 0xffffffff; + } + } else { + VIXL_ASSERT(vd.Is128Bits()); + switch (vd.lanes()) { + case 2: return LS_NEON_2D; + case 4: return LS_NEON_4S; + case 8: return LS_NEON_8H; + case 16: return LS_NEON_16B; + default: return 0xffffffff; + } + } + } + + // Instruction bits for scalar format in data processing operations. + static Instr SFormat(VRegister vd) { + VIXL_ASSERT(vd.lanes() == 1); + switch (vd.SizeInBytes()) { + case 1: return NEON_B; + case 2: return NEON_H; + case 4: return NEON_S; + case 8: return NEON_D; + default: return 0xffffffff; + } + } + + static Instr ImmNEONHLM(int index, int num_bits) { + int h, l, m; + if (num_bits == 3) { + VIXL_ASSERT(IsUint3(index)); + h = (index >> 2) & 1; + l = (index >> 1) & 1; + m = (index >> 0) & 1; + } else if (num_bits == 2) { + VIXL_ASSERT(IsUint2(index)); + h = (index >> 1) & 1; + l = (index >> 0) & 1; + m = 0; + } else { + VIXL_ASSERT(IsUint1(index) && (num_bits == 1)); + h = (index >> 0) & 1; + l = 0; + m = 0; + } + return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); + } + + static Instr ImmNEONExt(int imm4) { + VIXL_ASSERT(IsUint4(imm4)); + return imm4 << ImmNEONExt_offset; + } + + static Instr ImmNEON5(Instr format, int index) { + VIXL_ASSERT(IsUint4(index)); + int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); + int imm5 = (index << (s + 1)) | (1 << s); + return imm5 << ImmNEON5_offset; + } + + static Instr ImmNEON4(Instr format, int index) { + VIXL_ASSERT(IsUint4(index)); + int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); + int imm4 = index << s; + return imm4 << ImmNEON4_offset; + } + + static Instr ImmNEONabcdefgh(int imm8) { + VIXL_ASSERT(IsUint8(imm8)); + Instr instr; + instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; + instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; + return instr; + } + + static Instr NEONCmode(int cmode) { + VIXL_ASSERT(IsUint4(cmode)); + return cmode << NEONCmode_offset; + } + + static Instr NEONModImmOp(int op) { + VIXL_ASSERT(IsUint1(op)); + return op << NEONModImmOp_offset; + } + + size_t size() const { + return SizeOfCodeGenerated(); + } + + size_t SizeOfCodeGenerated() const { + return armbuffer_.size(); + } + + PositionIndependentCodeOption pic() const { + return pic_; + } + + CPUFeatures* GetCPUFeatures() { return &cpu_features_; } + + void SetCPUFeatures(const CPUFeatures& cpu_features) { + cpu_features_ = cpu_features; + } + + bool AllowPageOffsetDependentCode() const { + return (pic() == PageOffsetDependentCode) || + (pic() == PositionDependentCode); + } + + static const Register& AppropriateZeroRegFor(const CPURegister& reg) { + return reg.Is64Bits() ? xzr : wzr; + } + + + protected: + void LoadStore(const CPURegister& rt, + const MemOperand& addr, + LoadStoreOp op, + LoadStoreScalingOption option = PreferScaledOffset); + + void LoadStorePair(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op); + void LoadStoreStruct(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreMultiStructOp op); + void LoadStoreStruct1(const VRegister& vt, + int reg_count, + const MemOperand& addr); + void LoadStoreStructSingle(const VRegister& vt, + uint32_t lane, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op); + void LoadStoreStructSingleAllLanes(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op); + void LoadStoreStructVerify(const VRegister& vt, + const MemOperand& addr, + Instr op); + + void Prefetch(PrefetchOperation op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); + + BufferOffset Logical(const Register& rd, + const Register& rn, + const Operand& operand, + LogicalOp op); + BufferOffset LogicalImmediate(const Register& rd, + const Register& rn, + unsigned n, + unsigned imm_s, + unsigned imm_r, + LogicalOp op); + + void ConditionalCompare(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond, + ConditionalCompareOp op); + + void AddSubWithCarry(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubWithCarryOp op); + + + // Functions for emulating operands not directly supported by the instruction + // set. + void EmitShift(const Register& rd, + const Register& rn, + Shift shift, + unsigned amount); + void EmitExtendShift(const Register& rd, + const Register& rn, + Extend extend, + unsigned left_shift); + + void AddSub(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubOp op); + + void NEONTable(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEONTableOp op); + + // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified + // registers. Only simple loads are supported; sign- and zero-extension (such + // as in LDPSW_x or LDRB_w) are not supported. + static LoadStoreOp LoadOpFor(const CPURegister& rt); + static LoadStorePairOp LoadPairOpFor(const CPURegister& rt, + const CPURegister& rt2); + static LoadStoreOp StoreOpFor(const CPURegister& rt); + static LoadStorePairOp StorePairOpFor(const CPURegister& rt, + const CPURegister& rt2); + static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor( + const CPURegister& rt, const CPURegister& rt2); + static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor( + const CPURegister& rt, const CPURegister& rt2); + static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); + + // Convenience pass-through for CPU feature checks. + bool CPUHas(CPUFeatures::Feature feature0, + CPUFeatures::Feature feature1 = CPUFeatures::kNone, + CPUFeatures::Feature feature2 = CPUFeatures::kNone, + CPUFeatures::Feature feature3 = CPUFeatures::kNone) const { + return cpu_features_.Has(feature0, feature1, feature2, feature3); + } + + // Determine whether the target CPU has the specified registers, based on the + // currently-enabled CPU features. Presence of a register does not imply + // support for arbitrary operations on it. For example, CPUs with FP have H + // registers, but most half-precision operations require the FPHalf feature. + // + // These are used to check CPU features in loads and stores that have the same + // entry point for both integer and FP registers. + bool CPUHas(const CPURegister& rt) const; + bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const; + + bool CPUHas(SystemRegister sysreg) const; + + private: + static uint32_t FP32ToImm8(float imm); + static uint32_t FP64ToImm8(double imm); + + // Instruction helpers. + void MoveWide(const Register& rd, + uint64_t imm, + int shift, + MoveWideImmediateOp mov_op); + BufferOffset DataProcShiftedRegister(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + Instr op); + void DataProcExtendedRegister(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + Instr op); + void LoadStorePairNonTemporal(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairNonTemporalOp op); + void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op); + void ConditionalSelect(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond, + ConditionalSelectOp op); + void DataProcessing1Source(const Register& rd, + const Register& rn, + DataProcessing1SourceOp op); + void DataProcessing3Source(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra, + DataProcessing3SourceOp op); + void FPDataProcessing1Source(const VRegister& fd, + const VRegister& fn, + FPDataProcessing1SourceOp op); + void FPDataProcessing3Source(const VRegister& fd, + const VRegister& fn, + const VRegister& fm, + const VRegister& fa, + FPDataProcessing3SourceOp op); + void NEONAcrossLanesL(const VRegister& vd, + const VRegister& vn, + NEONAcrossLanesOp op); + void NEONAcrossLanes(const VRegister& vd, + const VRegister& vn, + NEONAcrossLanesOp op); + void NEONModifiedImmShiftLsl(const VRegister& vd, + const int imm8, + const int left_shift, + NEONModifiedImmediateOp op); + void NEONModifiedImmShiftMsl(const VRegister& vd, + const int imm8, + const int shift_amount, + NEONModifiedImmediateOp op); + void NEONFP2Same(const VRegister& vd, + const VRegister& vn, + Instr vop); + void NEON3Same(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3SameOp vop); + void NEONFP3Same(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Instr op); + void NEON3DifferentL(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop); + void NEON3DifferentW(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop); + void NEON3DifferentHN(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop); + void NEONFP2RegMisc(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop, + double value = 0.0); + void NEON2RegMisc(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop, + int value = 0); + void NEONFP2RegMisc(const VRegister& vd, + const VRegister& vn, + Instr op); + void NEONAddlp(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp op); + void NEONPerm(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEONPermOp op); + void NEONFPByElement(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp op); + void NEONByElement(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp op); + void NEONByElementL(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp op); + void NEONShiftImmediate(const VRegister& vd, + const VRegister& vn, + NEONShiftImmediateOp op, + int immh_immb); + void NEONShiftLeftImmediate(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op); + void NEONShiftRightImmediate(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op); + void NEONShiftImmediateL(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op); + void NEONShiftImmediateN(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op); + void NEONXtn(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop); + + Instr LoadStoreStructAddrModeField(const MemOperand& addr); + + // Encode the specified MemOperand for the specified access size and scaling + // preference. + Instr LoadStoreMemOperand(const MemOperand& addr, + unsigned access_size, + LoadStoreScalingOption option); + + protected: + // Prevent generation of a literal pool for the next |maxInst| instructions. + // Guarantees instruction linearity. + class AutoBlockLiteralPool { + ARMBuffer* armbuffer_; + + public: + AutoBlockLiteralPool(Assembler* assembler, size_t maxInst) + : armbuffer_(&assembler->armbuffer_) { + armbuffer_->enterNoPool(maxInst); + } + ~AutoBlockLiteralPool() { + armbuffer_->leaveNoPool(); + } + }; + + protected: + // Buffer where the code is emitted. + PositionIndependentCodeOption pic_; + + CPUFeatures cpu_features_; + +#ifdef DEBUG + bool finalized_; +#endif +}; + +} // namespace vixl + +#endif // VIXL_A64_ASSEMBLER_A64_H_ diff --git a/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h b/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h new file mode 100644 index 0000000000..e13eef6135 --- /dev/null +++ b/js/src/jit/arm64/vixl/CompilerIntrinsics-vixl.h @@ -0,0 +1,179 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef VIXL_COMPILER_INTRINSICS_H +#define VIXL_COMPILER_INTRINSICS_H + +#include "mozilla/MathAlgorithms.h" + +#include "jit/arm64/vixl/Globals-vixl.h" + +namespace vixl { + +// Helper to check whether the version of GCC used is greater than the specified +// requirement. +#define MAJOR 1000000 +#define MINOR 1000 +#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) +#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \ + ((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR + __GNUC_PATCHLEVEL__) >= \ + ((major) * MAJOR + (minor) * MINOR + (patchlevel))) +#elif defined(__GNUC__) && defined(__GNUC_MINOR__) +#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \ + ((__GNUC__ * MAJOR + __GNUC_MINOR__ * MINOR) >= \ + ((major) * MAJOR + (minor) * MINOR + (patchlevel))) +#else +#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) 0 +#endif + + +#if defined(__clang__) && !defined(VIXL_NO_COMPILER_BUILTINS) + +#define COMPILER_HAS_BUILTIN_CLRSB (__has_builtin(__builtin_clrsb)) +#define COMPILER_HAS_BUILTIN_CLZ (__has_builtin(__builtin_clz)) +#define COMPILER_HAS_BUILTIN_CTZ (__has_builtin(__builtin_ctz)) +#define COMPILER_HAS_BUILTIN_FFS (__has_builtin(__builtin_ffs)) +#define COMPILER_HAS_BUILTIN_POPCOUNT (__has_builtin(__builtin_popcount)) + +#elif defined(__GNUC__) && !defined(VIXL_NO_COMPILER_BUILTINS) +// The documentation for these builtins is available at: +// https://gcc.gnu.org/onlinedocs/gcc-$MAJOR.$MINOR.$PATCHLEVEL/gcc//Other-Builtins.html + +# define COMPILER_HAS_BUILTIN_CLRSB (GCC_VERSION_OR_NEWER(4, 7, 0)) +# define COMPILER_HAS_BUILTIN_CLZ (GCC_VERSION_OR_NEWER(3, 4, 0)) +# define COMPILER_HAS_BUILTIN_CTZ (GCC_VERSION_OR_NEWER(3, 4, 0)) +# define COMPILER_HAS_BUILTIN_FFS (GCC_VERSION_OR_NEWER(3, 4, 0)) +# define COMPILER_HAS_BUILTIN_POPCOUNT (GCC_VERSION_OR_NEWER(3, 4, 0)) + +#else +// One can define VIXL_NO_COMPILER_BUILTINS to force using the manually +// implemented C++ methods. + +#define COMPILER_HAS_BUILTIN_BSWAP false +#define COMPILER_HAS_BUILTIN_CLRSB false +#define COMPILER_HAS_BUILTIN_CLZ false +#define COMPILER_HAS_BUILTIN_CTZ false +#define COMPILER_HAS_BUILTIN_FFS false +#define COMPILER_HAS_BUILTIN_POPCOUNT false + +#endif + + +template<typename V> +inline bool IsPowerOf2(V value) { + return (value != 0) && ((value & (value - 1)) == 0); +} + + +// Implementation of intrinsics functions. +// TODO: The implementations could be improved for sizes different from 32bit +// and 64bit: we could mask the values and call the appropriate builtin. + + +template<typename V> +inline int CountLeadingZeros(V value, int width = (sizeof(V) * 8)) { +#if COMPILER_HAS_BUILTIN_CLZ + if (width == 32) { + return (value == 0) ? 32 : __builtin_clz(static_cast<unsigned>(value)); + } else if (width == 64) { + return (value == 0) ? 64 : __builtin_clzll(value); + } + MOZ_CRASH("Unhandled width."); +#else + if (width == 32) { + return mozilla::CountLeadingZeroes32(value); + } else if (width == 64) { + return mozilla::CountLeadingZeroes64(value); + } + MOZ_CRASH("Unhandled width."); +#endif +} + + +template<typename V> +inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) { +#if COMPILER_HAS_BUILTIN_CLRSB + if (width == 32) { + return __builtin_clrsb(value); + } else if (width == 64) { + return __builtin_clrsbll(value); + } + MOZ_CRASH("Unhandled width."); +#else + VIXL_ASSERT(IsPowerOf2(width) && (width <= 64)); + if (value >= 0) { + return CountLeadingZeros(value, width) - 1; + } else { + return CountLeadingZeros(~value, width) - 1; + } +#endif +} + + +template<typename V> +inline int CountSetBits(V value, int width = (sizeof(V) * 8)) { +#if COMPILER_HAS_BUILTIN_POPCOUNT + if (width == 32) { + return __builtin_popcount(static_cast<unsigned>(value)); + } else if (width == 64) { + return __builtin_popcountll(value); + } + MOZ_CRASH("Unhandled width."); +#else + if (width == 32) { + return mozilla::CountPopulation32(value); + } else if (width == 64) { + return mozilla::CountPopulation64(value); + } + MOZ_CRASH("Unhandled width."); +#endif +} + + +template<typename V> +inline int CountTrailingZeros(V value, int width = (sizeof(V) * 8)) { +#if COMPILER_HAS_BUILTIN_CTZ + if (width == 32) { + return (value == 0) ? 32 : __builtin_ctz(static_cast<unsigned>(value)); + } else if (width == 64) { + return (value == 0) ? 64 : __builtin_ctzll(value); + } + MOZ_CRASH("Unhandled width."); +#else + if (width == 32) { + return mozilla::CountTrailingZeroes32(value); + } else if (width == 64) { + return mozilla::CountTrailingZeroes64(value); + } + MOZ_CRASH("Unhandled width."); +#endif +} + +} // namespace vixl + +#endif // VIXL_COMPILER_INTRINSICS_H + diff --git a/js/src/jit/arm64/vixl/Constants-vixl.h b/js/src/jit/arm64/vixl/Constants-vixl.h new file mode 100644 index 0000000000..2c174e61a5 --- /dev/null +++ b/js/src/jit/arm64/vixl/Constants-vixl.h @@ -0,0 +1,2694 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_CONSTANTS_A64_H_ +#define VIXL_A64_CONSTANTS_A64_H_ + +#include <stdint.h> + +#include "jit/arm64/vixl/Globals-vixl.h" + +namespace vixl { + +// Supervisor Call (svc) specific support. +// +// The SVC instruction encodes an optional 16-bit immediate value. +// The simulator understands the codes below. +enum SVCSimulatorCodes { + kCallRtRedirected = 0x10, // Transition to x86_64 C code. + kMarkStackPointer = 0x11, // Push the current SP on a special Simulator stack. + kCheckStackPointer = 0x12 // Pop from the special Simulator stack and compare to SP. +}; + +const unsigned kNumberOfRegisters = 32; +const unsigned kNumberOfVRegisters = 32; +const unsigned kNumberOfFPRegisters = kNumberOfVRegisters; +// Callee saved registers are x21-x30(lr). +const int kNumberOfCalleeSavedRegisters = 10; +const int kFirstCalleeSavedRegisterIndex = 21; +// Callee saved FP registers are d8-d15. Note that the high parts of v8-v15 are +// still caller-saved. +const int kNumberOfCalleeSavedFPRegisters = 8; +const int kFirstCalleeSavedFPRegisterIndex = 8; + +#define REGISTER_CODE_LIST(R) \ +R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ +R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) \ +R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \ +R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31) + +#define INSTRUCTION_FIELDS_LIST(V_) \ +/* Register fields */ \ +V_(Rd, 4, 0, Bits) /* Destination register. */ \ +V_(Rn, 9, 5, Bits) /* First source register. */ \ +V_(Rm, 20, 16, Bits) /* Second source register. */ \ +V_(Ra, 14, 10, Bits) /* Third source register. */ \ +V_(Rt, 4, 0, Bits) /* Load/store register. */ \ +V_(Rt2, 14, 10, Bits) /* Load/store second register. */ \ +V_(Rs, 20, 16, Bits) /* Exclusive access status. */ \ + \ +/* Common bits */ \ +V_(SixtyFourBits, 31, 31, Bits) \ +V_(FlagsUpdate, 29, 29, Bits) \ + \ +/* PC relative addressing */ \ +V_(ImmPCRelHi, 23, 5, SignedBits) \ +V_(ImmPCRelLo, 30, 29, Bits) \ + \ +/* Add/subtract/logical shift register */ \ +V_(ShiftDP, 23, 22, Bits) \ +V_(ImmDPShift, 15, 10, Bits) \ + \ +/* Add/subtract immediate */ \ +V_(ImmAddSub, 21, 10, Bits) \ +V_(ShiftAddSub, 23, 22, Bits) \ + \ +/* Add/substract extend */ \ +V_(ImmExtendShift, 12, 10, Bits) \ +V_(ExtendMode, 15, 13, Bits) \ + \ +/* Move wide */ \ +V_(ImmMoveWide, 20, 5, Bits) \ +V_(ShiftMoveWide, 22, 21, Bits) \ + \ +/* Logical immediate, bitfield and extract */ \ +V_(BitN, 22, 22, Bits) \ +V_(ImmRotate, 21, 16, Bits) \ +V_(ImmSetBits, 15, 10, Bits) \ +V_(ImmR, 21, 16, Bits) \ +V_(ImmS, 15, 10, Bits) \ + \ +/* Test and branch immediate */ \ +V_(ImmTestBranch, 18, 5, SignedBits) \ +V_(ImmTestBranchBit40, 23, 19, Bits) \ +V_(ImmTestBranchBit5, 31, 31, Bits) \ + \ +/* Conditionals */ \ +V_(Condition, 15, 12, Bits) \ +V_(ConditionBranch, 3, 0, Bits) \ +V_(Nzcv, 3, 0, Bits) \ +V_(ImmCondCmp, 20, 16, Bits) \ +V_(ImmCondBranch, 23, 5, SignedBits) \ + \ +/* Floating point */ \ +V_(FPType, 23, 22, Bits) \ +V_(ImmFP, 20, 13, Bits) \ +V_(FPScale, 15, 10, Bits) \ + \ +/* Load Store */ \ +V_(ImmLS, 20, 12, SignedBits) \ +V_(ImmLSUnsigned, 21, 10, Bits) \ +V_(ImmLSPair, 21, 15, SignedBits) \ +V_(ImmShiftLS, 12, 12, Bits) \ +V_(LSOpc, 23, 22, Bits) \ +V_(LSVector, 26, 26, Bits) \ +V_(LSSize, 31, 30, Bits) \ +V_(ImmPrefetchOperation, 4, 0, Bits) \ +V_(PrefetchHint, 4, 3, Bits) \ +V_(PrefetchTarget, 2, 1, Bits) \ +V_(PrefetchStream, 0, 0, Bits) \ + \ +/* Other immediates */ \ +V_(ImmUncondBranch, 25, 0, SignedBits) \ +V_(ImmCmpBranch, 23, 5, SignedBits) \ +V_(ImmLLiteral, 23, 5, SignedBits) \ +V_(ImmException, 20, 5, Bits) \ +V_(ImmHint, 11, 5, Bits) \ +V_(ImmBarrierDomain, 11, 10, Bits) \ +V_(ImmBarrierType, 9, 8, Bits) \ + \ +/* System (MRS, MSR, SYS) */ \ +V_(ImmSystemRegister, 19, 5, Bits) \ +V_(SysO0, 19, 19, Bits) \ +V_(SysOp, 18, 5, Bits) \ +V_(SysOp1, 18, 16, Bits) \ +V_(SysOp2, 7, 5, Bits) \ +V_(CRn, 15, 12, Bits) \ +V_(CRm, 11, 8, Bits) \ + \ +/* Load-/store-exclusive */ \ +V_(LdStXLoad, 22, 22, Bits) \ +V_(LdStXNotExclusive, 23, 23, Bits) \ +V_(LdStXAcquireRelease, 15, 15, Bits) \ +V_(LdStXSizeLog2, 31, 30, Bits) \ +V_(LdStXPair, 21, 21, Bits) \ + \ +/* NEON generic fields */ \ +V_(NEONQ, 30, 30, Bits) \ +V_(NEONSize, 23, 22, Bits) \ +V_(NEONLSSize, 11, 10, Bits) \ +V_(NEONS, 12, 12, Bits) \ +V_(NEONL, 21, 21, Bits) \ +V_(NEONM, 20, 20, Bits) \ +V_(NEONH, 11, 11, Bits) \ +V_(ImmNEONExt, 14, 11, Bits) \ +V_(ImmNEON5, 20, 16, Bits) \ +V_(ImmNEON4, 14, 11, Bits) \ + \ +/* NEON Modified Immediate fields */ \ +V_(ImmNEONabc, 18, 16, Bits) \ +V_(ImmNEONdefgh, 9, 5, Bits) \ +V_(NEONModImmOp, 29, 29, Bits) \ +V_(NEONCmode, 15, 12, Bits) \ + \ +/* NEON Shift Immediate fields */ \ +V_(ImmNEONImmhImmb, 22, 16, Bits) \ +V_(ImmNEONImmh, 22, 19, Bits) \ +V_(ImmNEONImmb, 18, 16, Bits) + +#define SYSTEM_REGISTER_FIELDS_LIST(V_, M_) \ +/* NZCV */ \ +V_(Flags, 31, 28, Bits) \ +V_(N, 31, 31, Bits) \ +V_(Z, 30, 30, Bits) \ +V_(C, 29, 29, Bits) \ +V_(V, 28, 28, Bits) \ +M_(NZCV, Flags_mask) \ +/* FPCR */ \ +V_(AHP, 26, 26, Bits) \ +V_(DN, 25, 25, Bits) \ +V_(FZ, 24, 24, Bits) \ +V_(RMode, 23, 22, Bits) \ +M_(FPCR, AHP_mask | DN_mask | FZ_mask | RMode_mask) + +// Fields offsets. +#define DECLARE_FIELDS_OFFSETS(Name, HighBit, LowBit, X) \ +const int Name##_offset = LowBit; \ +const int Name##_width = HighBit - LowBit + 1; \ +const uint32_t Name##_mask = ((1 << Name##_width) - 1) << LowBit; +#define NOTHING(A, B) +INSTRUCTION_FIELDS_LIST(DECLARE_FIELDS_OFFSETS) +SYSTEM_REGISTER_FIELDS_LIST(DECLARE_FIELDS_OFFSETS, NOTHING) +#undef NOTHING +#undef DECLARE_FIELDS_BITS + +// ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST), formed +// from ImmPCRelLo and ImmPCRelHi. +const int ImmPCRel_mask = ImmPCRelLo_mask | ImmPCRelHi_mask; + +// Condition codes. +enum Condition { + eq = 0, // Z set Equal. + ne = 1, // Z clear Not equal. + cs = 2, // C set Carry set. + cc = 3, // C clear Carry clear. + mi = 4, // N set Negative. + pl = 5, // N clear Positive or zero. + vs = 6, // V set Overflow. + vc = 7, // V clear No overflow. + hi = 8, // C set, Z clear Unsigned higher. + ls = 9, // C clear or Z set Unsigned lower or same. + ge = 10, // N == V Greater or equal. + lt = 11, // N != V Less than. + gt = 12, // Z clear, N == V Greater than. + le = 13, // Z set or N != V Less then or equal + al = 14, // Always. + nv = 15, // Behaves as always/al. + + // Aliases. + hs = cs, // C set Unsigned higher or same. + lo = cc, // C clear Unsigned lower. + + // Mozilla expanded aliases. + Equal = 0, Zero = 0, + NotEqual = 1, NonZero = 1, + AboveOrEqual = 2, CarrySet = 2, + Below = 3, CarryClear = 3, + Signed = 4, + NotSigned = 5, + Overflow = 6, + NoOverflow = 7, + Above = 8, + BelowOrEqual = 9, + GreaterThanOrEqual_ = 10, + LessThan_ = 11, + GreaterThan_ = 12, + LessThanOrEqual_ = 13, + Always = 14, + Never = 15 +}; + +inline Condition InvertCondition(Condition cond) { + // Conditions al and nv behave identically, as "always true". They can't be + // inverted, because there is no "always false" condition. + VIXL_ASSERT((cond != al) && (cond != nv)); + return static_cast<Condition>(cond ^ 1); +} + +enum FPTrapFlags { + EnableTrap = 1, + DisableTrap = 0 +}; + +enum FlagsUpdate { + SetFlags = 1, + LeaveFlags = 0 +}; + +enum StatusFlags { + NoFlag = 0, + + // Derive the flag combinations from the system register bit descriptions. + NFlag = N_mask, + ZFlag = Z_mask, + CFlag = C_mask, + VFlag = V_mask, + NZFlag = NFlag | ZFlag, + NCFlag = NFlag | CFlag, + NVFlag = NFlag | VFlag, + ZCFlag = ZFlag | CFlag, + ZVFlag = ZFlag | VFlag, + CVFlag = CFlag | VFlag, + NZCFlag = NFlag | ZFlag | CFlag, + NZVFlag = NFlag | ZFlag | VFlag, + NCVFlag = NFlag | CFlag | VFlag, + ZCVFlag = ZFlag | CFlag | VFlag, + NZCVFlag = NFlag | ZFlag | CFlag | VFlag, + + // Floating-point comparison results. + FPEqualFlag = ZCFlag, + FPLessThanFlag = NFlag, + FPGreaterThanFlag = CFlag, + FPUnorderedFlag = CVFlag +}; + +enum Shift { + NO_SHIFT = -1, + LSL = 0x0, + LSR = 0x1, + ASR = 0x2, + ROR = 0x3, + MSL = 0x4 +}; + +enum Extend { + NO_EXTEND = -1, + UXTB = 0, + UXTH = 1, + UXTW = 2, + UXTX = 3, + SXTB = 4, + SXTH = 5, + SXTW = 6, + SXTX = 7 +}; + +enum SystemHint { + NOP = 0, + YIELD = 1, + WFE = 2, + WFI = 3, + SEV = 4, + SEVL = 5, + ESB = 16, + CSDB = 20, + BTI = 32, + BTI_c = 34, + BTI_j = 36, + BTI_jc = 38 +}; + +enum BranchTargetIdentifier { + EmitBTI_none = NOP, + EmitBTI = BTI, + EmitBTI_c = BTI_c, + EmitBTI_j = BTI_j, + EmitBTI_jc = BTI_jc, + + // These correspond to the values of the CRm:op2 fields in the equivalent HINT + // instruction. + EmitPACIASP = 25, + EmitPACIBSP = 27 +}; + +enum BarrierDomain { + OuterShareable = 0, + NonShareable = 1, + InnerShareable = 2, + FullSystem = 3 +}; + +enum BarrierType { + BarrierOther = 0, + BarrierReads = 1, + BarrierWrites = 2, + BarrierAll = 3 +}; + +enum PrefetchOperation { + PLDL1KEEP = 0x00, + PLDL1STRM = 0x01, + PLDL2KEEP = 0x02, + PLDL2STRM = 0x03, + PLDL3KEEP = 0x04, + PLDL3STRM = 0x05, + + PLIL1KEEP = 0x08, + PLIL1STRM = 0x09, + PLIL2KEEP = 0x0a, + PLIL2STRM = 0x0b, + PLIL3KEEP = 0x0c, + PLIL3STRM = 0x0d, + + PSTL1KEEP = 0x10, + PSTL1STRM = 0x11, + PSTL2KEEP = 0x12, + PSTL2STRM = 0x13, + PSTL3KEEP = 0x14, + PSTL3STRM = 0x15 +}; + +enum BType { + // Set when executing any instruction on a guarded page, except those cases + // listed below. + DefaultBType = 0, + + // Set when an indirect branch is taken from an unguarded page to a guarded + // page, or from a guarded page to ip0 or ip1 (x16 or x17), eg "br ip0". + BranchFromUnguardedOrToIP = 1, + + // Set when an indirect branch and link (call) is taken, eg. "blr x0". + BranchAndLink = 2, + + // Set when an indirect branch is taken from a guarded page to a register + // that is not ip0 or ip1 (x16 or x17), eg, "br x0". + BranchFromGuardedNotToIP = 3 +}; + +template<int op0, int op1, int crn, int crm, int op2> +class SystemRegisterEncoder { + public: + static const uint32_t value = + ((op0 << SysO0_offset) | + (op1 << SysOp1_offset) | + (crn << CRn_offset) | + (crm << CRm_offset) | + (op2 << SysOp2_offset)) >> ImmSystemRegister_offset; +}; + +// System/special register names. +// This information is not encoded as one field but as the concatenation of +// multiple fields (Op0, Op1, Crn, Crm, Op2). +enum SystemRegister { + NZCV = SystemRegisterEncoder<3, 3, 4, 2, 0>::value, + FPCR = SystemRegisterEncoder<3, 3, 4, 4, 0>::value, + RNDR = SystemRegisterEncoder<3, 3, 2, 4, 0>::value, // Random number. + RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value // Reseeded random number. +}; + +template<int op1, int crn, int crm, int op2> +class CacheOpEncoder { + public: + static const uint32_t value = + ((op1 << SysOp1_offset) | + (crn << CRn_offset) | + (crm << CRm_offset) | + (op2 << SysOp2_offset)) >> SysOp_offset; +}; + +enum InstructionCacheOp : uint32_t { + IVAU = CacheOpEncoder<3, 7, 5, 1>::value +}; + +enum DataCacheOp : uint32_t { + CVAC = CacheOpEncoder<3, 7, 10, 1>::value, + CVAU = CacheOpEncoder<3, 7, 11, 1>::value, + CVAP = CacheOpEncoder<3, 7, 12, 1>::value, + CVADP = CacheOpEncoder<3, 7, 13, 1>::value, + CIVAC = CacheOpEncoder<3, 7, 14, 1>::value, + ZVA = CacheOpEncoder<3, 7, 4, 1>::value +}; + +// Instruction enumerations. +// +// These are the masks that define a class of instructions, and the list of +// instructions within each class. Each enumeration has a Fixed, FMask and +// Mask value. +// +// Fixed: The fixed bits in this instruction class. +// FMask: The mask used to extract the fixed bits in the class. +// Mask: The mask used to identify the instructions within a class. +// +// The enumerations can be used like this: +// +// VIXL_ASSERT(instr->Mask(PCRelAddressingFMask) == PCRelAddressingFixed); +// switch(instr->Mask(PCRelAddressingMask)) { +// case ADR: Format("adr 'Xd, 'AddrPCRelByte"); break; +// case ADRP: Format("adrp 'Xd, 'AddrPCRelPage"); break; +// default: printf("Unknown instruction\n"); +// } + + +// Generic fields. +enum GenericInstrField : uint32_t { + SixtyFourBits = 0x80000000, + ThirtyTwoBits = 0x00000000, + + FPTypeMask = 0x00C00000, + FP16 = 0x00C00000, + FP32 = 0x00000000, + FP64 = 0x00400000 +}; + +enum NEONFormatField : uint32_t { + NEONFormatFieldMask = 0x40C00000, + NEON_Q = 0x40000000, + NEON_8B = 0x00000000, + NEON_16B = NEON_8B | NEON_Q, + NEON_4H = 0x00400000, + NEON_8H = NEON_4H | NEON_Q, + NEON_2S = 0x00800000, + NEON_4S = NEON_2S | NEON_Q, + NEON_1D = 0x00C00000, + NEON_2D = 0x00C00000 | NEON_Q +}; + +enum NEONFPFormatField : uint32_t { + NEONFPFormatFieldMask = 0x40400000, + NEON_FP_4H = FP16, + NEON_FP_2S = FP32, + NEON_FP_8H = FP16 | NEON_Q, + NEON_FP_4S = FP32 | NEON_Q, + NEON_FP_2D = FP64 | NEON_Q +}; + +enum NEONLSFormatField : uint32_t { + NEONLSFormatFieldMask = 0x40000C00, + LS_NEON_8B = 0x00000000, + LS_NEON_16B = LS_NEON_8B | NEON_Q, + LS_NEON_4H = 0x00000400, + LS_NEON_8H = LS_NEON_4H | NEON_Q, + LS_NEON_2S = 0x00000800, + LS_NEON_4S = LS_NEON_2S | NEON_Q, + LS_NEON_1D = 0x00000C00, + LS_NEON_2D = LS_NEON_1D | NEON_Q +}; + +enum NEONScalarFormatField : uint32_t { + NEONScalarFormatFieldMask = 0x00C00000, + NEONScalar = 0x10000000, + NEON_B = 0x00000000, + NEON_H = 0x00400000, + NEON_S = 0x00800000, + NEON_D = 0x00C00000 +}; + +// PC relative addressing. +enum PCRelAddressingOp : uint32_t { + PCRelAddressingFixed = 0x10000000, + PCRelAddressingFMask = 0x1F000000, + PCRelAddressingMask = 0x9F000000, + ADR = PCRelAddressingFixed | 0x00000000, + ADRP = PCRelAddressingFixed | 0x80000000 +}; + +// Add/sub (immediate, shifted and extended.) +const int kSFOffset = 31; +enum AddSubOp : uint32_t { + AddSubOpMask = 0x60000000, + AddSubSetFlagsBit = 0x20000000, + ADD = 0x00000000, + ADDS = ADD | AddSubSetFlagsBit, + SUB = 0x40000000, + SUBS = SUB | AddSubSetFlagsBit +}; + +#define ADD_SUB_OP_LIST(V) \ + V(ADD), \ + V(ADDS), \ + V(SUB), \ + V(SUBS) + +enum AddSubImmediateOp : uint32_t { + AddSubImmediateFixed = 0x11000000, + AddSubImmediateFMask = 0x1F000000, + AddSubImmediateMask = 0xFF000000, + #define ADD_SUB_IMMEDIATE(A) \ + A##_w_imm = AddSubImmediateFixed | A, \ + A##_x_imm = AddSubImmediateFixed | A | SixtyFourBits + ADD_SUB_OP_LIST(ADD_SUB_IMMEDIATE) + #undef ADD_SUB_IMMEDIATE +}; + +enum AddSubShiftedOp : uint32_t { + AddSubShiftedFixed = 0x0B000000, + AddSubShiftedFMask = 0x1F200000, + AddSubShiftedMask = 0xFF200000, + #define ADD_SUB_SHIFTED(A) \ + A##_w_shift = AddSubShiftedFixed | A, \ + A##_x_shift = AddSubShiftedFixed | A | SixtyFourBits + ADD_SUB_OP_LIST(ADD_SUB_SHIFTED) + #undef ADD_SUB_SHIFTED +}; + +enum AddSubExtendedOp : uint32_t { + AddSubExtendedFixed = 0x0B200000, + AddSubExtendedFMask = 0x1F200000, + AddSubExtendedMask = 0xFFE00000, + #define ADD_SUB_EXTENDED(A) \ + A##_w_ext = AddSubExtendedFixed | A, \ + A##_x_ext = AddSubExtendedFixed | A | SixtyFourBits + ADD_SUB_OP_LIST(ADD_SUB_EXTENDED) + #undef ADD_SUB_EXTENDED +}; + +// Add/sub with carry. +enum AddSubWithCarryOp : uint32_t { + AddSubWithCarryFixed = 0x1A000000, + AddSubWithCarryFMask = 0x1FE00000, + AddSubWithCarryMask = 0xFFE0FC00, + ADC_w = AddSubWithCarryFixed | ADD, + ADC_x = AddSubWithCarryFixed | ADD | SixtyFourBits, + ADC = ADC_w, + ADCS_w = AddSubWithCarryFixed | ADDS, + ADCS_x = AddSubWithCarryFixed | ADDS | SixtyFourBits, + SBC_w = AddSubWithCarryFixed | SUB, + SBC_x = AddSubWithCarryFixed | SUB | SixtyFourBits, + SBC = SBC_w, + SBCS_w = AddSubWithCarryFixed | SUBS, + SBCS_x = AddSubWithCarryFixed | SUBS | SixtyFourBits +}; + +// Rotate right into flags. +enum RotateRightIntoFlagsOp : uint32_t { + RotateRightIntoFlagsFixed = 0x1A000400, + RotateRightIntoFlagsFMask = 0x1FE07C00, + RotateRightIntoFlagsMask = 0xFFE07C10, + RMIF = RotateRightIntoFlagsFixed | 0xA0000000 +}; + +// Evaluate into flags. +enum EvaluateIntoFlagsOp : uint32_t { + EvaluateIntoFlagsFixed = 0x1A000800, + EvaluateIntoFlagsFMask = 0x1FE03C00, + EvaluateIntoFlagsMask = 0xFFE07C1F, + SETF8 = EvaluateIntoFlagsFixed | 0x2000000D, + SETF16 = EvaluateIntoFlagsFixed | 0x2000400D +}; + +// Logical (immediate and shifted register). +enum LogicalOp : uint32_t { + LogicalOpMask = 0x60200000, + NOT = 0x00200000, + AND = 0x00000000, + BIC = AND | NOT, + ORR = 0x20000000, + ORN = ORR | NOT, + EOR = 0x40000000, + EON = EOR | NOT, + ANDS = 0x60000000, + BICS = ANDS | NOT +}; + +// Logical immediate. +enum LogicalImmediateOp : uint32_t { + LogicalImmediateFixed = 0x12000000, + LogicalImmediateFMask = 0x1F800000, + LogicalImmediateMask = 0xFF800000, + AND_w_imm = LogicalImmediateFixed | AND, + AND_x_imm = LogicalImmediateFixed | AND | SixtyFourBits, + ORR_w_imm = LogicalImmediateFixed | ORR, + ORR_x_imm = LogicalImmediateFixed | ORR | SixtyFourBits, + EOR_w_imm = LogicalImmediateFixed | EOR, + EOR_x_imm = LogicalImmediateFixed | EOR | SixtyFourBits, + ANDS_w_imm = LogicalImmediateFixed | ANDS, + ANDS_x_imm = LogicalImmediateFixed | ANDS | SixtyFourBits +}; + +// Logical shifted register. +enum LogicalShiftedOp : uint32_t { + LogicalShiftedFixed = 0x0A000000, + LogicalShiftedFMask = 0x1F000000, + LogicalShiftedMask = 0xFF200000, + AND_w = LogicalShiftedFixed | AND, + AND_x = LogicalShiftedFixed | AND | SixtyFourBits, + AND_shift = AND_w, + BIC_w = LogicalShiftedFixed | BIC, + BIC_x = LogicalShiftedFixed | BIC | SixtyFourBits, + BIC_shift = BIC_w, + ORR_w = LogicalShiftedFixed | ORR, + ORR_x = LogicalShiftedFixed | ORR | SixtyFourBits, + ORR_shift = ORR_w, + ORN_w = LogicalShiftedFixed | ORN, + ORN_x = LogicalShiftedFixed | ORN | SixtyFourBits, + ORN_shift = ORN_w, + EOR_w = LogicalShiftedFixed | EOR, + EOR_x = LogicalShiftedFixed | EOR | SixtyFourBits, + EOR_shift = EOR_w, + EON_w = LogicalShiftedFixed | EON, + EON_x = LogicalShiftedFixed | EON | SixtyFourBits, + EON_shift = EON_w, + ANDS_w = LogicalShiftedFixed | ANDS, + ANDS_x = LogicalShiftedFixed | ANDS | SixtyFourBits, + ANDS_shift = ANDS_w, + BICS_w = LogicalShiftedFixed | BICS, + BICS_x = LogicalShiftedFixed | BICS | SixtyFourBits, + BICS_shift = BICS_w +}; + +// Move wide immediate. +enum MoveWideImmediateOp : uint32_t { + MoveWideImmediateFixed = 0x12800000, + MoveWideImmediateFMask = 0x1F800000, + MoveWideImmediateMask = 0xFF800000, + MOVN = 0x00000000, + MOVZ = 0x40000000, + MOVK = 0x60000000, + MOVN_w = MoveWideImmediateFixed | MOVN, + MOVN_x = MoveWideImmediateFixed | MOVN | SixtyFourBits, + MOVZ_w = MoveWideImmediateFixed | MOVZ, + MOVZ_x = MoveWideImmediateFixed | MOVZ | SixtyFourBits, + MOVK_w = MoveWideImmediateFixed | MOVK, + MOVK_x = MoveWideImmediateFixed | MOVK | SixtyFourBits +}; + +// Bitfield. +const int kBitfieldNOffset = 22; +enum BitfieldOp : uint32_t { + BitfieldFixed = 0x13000000, + BitfieldFMask = 0x1F800000, + BitfieldMask = 0xFF800000, + SBFM_w = BitfieldFixed | 0x00000000, + SBFM_x = BitfieldFixed | 0x80000000, + SBFM = SBFM_w, + BFM_w = BitfieldFixed | 0x20000000, + BFM_x = BitfieldFixed | 0xA0000000, + BFM = BFM_w, + UBFM_w = BitfieldFixed | 0x40000000, + UBFM_x = BitfieldFixed | 0xC0000000, + UBFM = UBFM_w + // Bitfield N field. +}; + +// Extract. +enum ExtractOp : uint32_t { + ExtractFixed = 0x13800000, + ExtractFMask = 0x1F800000, + ExtractMask = 0xFFA00000, + EXTR_w = ExtractFixed | 0x00000000, + EXTR_x = ExtractFixed | 0x80000000, + EXTR = EXTR_w +}; + +// Unconditional branch. +enum UnconditionalBranchOp : uint32_t { + UnconditionalBranchFixed = 0x14000000, + UnconditionalBranchFMask = 0x7C000000, + UnconditionalBranchMask = 0xFC000000, + B = UnconditionalBranchFixed | 0x00000000, + BL = UnconditionalBranchFixed | 0x80000000 +}; + +// Unconditional branch to register. +enum UnconditionalBranchToRegisterOp : uint32_t { + UnconditionalBranchToRegisterFixed = 0xD6000000, + UnconditionalBranchToRegisterFMask = 0xFE000000, + UnconditionalBranchToRegisterMask = 0xFFFFFC00, + BR = UnconditionalBranchToRegisterFixed | 0x001F0000, + BLR = UnconditionalBranchToRegisterFixed | 0x003F0000, + RET = UnconditionalBranchToRegisterFixed | 0x005F0000, + + BRAAZ = UnconditionalBranchToRegisterFixed | 0x001F0800, + BRABZ = UnconditionalBranchToRegisterFixed | 0x001F0C00, + BLRAAZ = UnconditionalBranchToRegisterFixed | 0x003F0800, + BLRABZ = UnconditionalBranchToRegisterFixed | 0x003F0C00, + RETAA = UnconditionalBranchToRegisterFixed | 0x005F0800, + RETAB = UnconditionalBranchToRegisterFixed | 0x005F0C00, + BRAA = UnconditionalBranchToRegisterFixed | 0x011F0800, + BRAB = UnconditionalBranchToRegisterFixed | 0x011F0C00, + BLRAA = UnconditionalBranchToRegisterFixed | 0x013F0800, + BLRAB = UnconditionalBranchToRegisterFixed | 0x013F0C00 +}; + +// Compare and branch. +enum CompareBranchOp : uint32_t { + CompareBranchFixed = 0x34000000, + CompareBranchFMask = 0x7E000000, + CompareBranchMask = 0xFF000000, + CBZ_w = CompareBranchFixed | 0x00000000, + CBZ_x = CompareBranchFixed | 0x80000000, + CBZ = CBZ_w, + CBNZ_w = CompareBranchFixed | 0x01000000, + CBNZ_x = CompareBranchFixed | 0x81000000, + CBNZ = CBNZ_w +}; + +// Test and branch. +enum TestBranchOp : uint32_t { + TestBranchFixed = 0x36000000, + TestBranchFMask = 0x7E000000, + TestBranchMask = 0x7F000000, + TBZ = TestBranchFixed | 0x00000000, + TBNZ = TestBranchFixed | 0x01000000 +}; + +// Conditional branch. +enum ConditionalBranchOp : uint32_t { + ConditionalBranchFixed = 0x54000000, + ConditionalBranchFMask = 0xFE000000, + ConditionalBranchMask = 0xFF000010, + B_cond = ConditionalBranchFixed | 0x00000000 +}; + +// System. +// System instruction encoding is complicated because some instructions use op +// and CR fields to encode parameters. To handle this cleanly, the system +// instructions are split into more than one enum. + +enum SystemOp : uint32_t { + SystemFixed = 0xD5000000, + SystemFMask = 0xFFC00000 +}; + +enum SystemSysRegOp : uint32_t { + SystemSysRegFixed = 0xD5100000, + SystemSysRegFMask = 0xFFD00000, + SystemSysRegMask = 0xFFF00000, + MRS = SystemSysRegFixed | 0x00200000, + MSR = SystemSysRegFixed | 0x00000000 +}; + +enum SystemPStateOp : uint32_t { + SystemPStateFixed = 0xD5004000, + SystemPStateFMask = 0xFFF8F000, + SystemPStateMask = 0xFFFFF0FF, + CFINV = SystemPStateFixed | 0x0000001F, + XAFLAG = SystemPStateFixed | 0x0000003F, + AXFLAG = SystemPStateFixed | 0x0000005F +}; + +enum SystemHintOp : uint32_t { + SystemHintFixed = 0xD503201F, + SystemHintFMask = 0xFFFFF01F, + SystemHintMask = 0xFFFFF01F, + HINT = SystemHintFixed | 0x00000000 +}; + +enum SystemSysOp : uint32_t { + SystemSysFixed = 0xD5080000, + SystemSysFMask = 0xFFF80000, + SystemSysMask = 0xFFF80000, + SYS = SystemSysFixed | 0x00000000 +}; + +// Exception. +enum ExceptionOp : uint32_t { + ExceptionFixed = 0xD4000000, + ExceptionFMask = 0xFF000000, + ExceptionMask = 0xFFE0001F, + HLT = ExceptionFixed | 0x00400000, + BRK = ExceptionFixed | 0x00200000, + SVC = ExceptionFixed | 0x00000001, + HVC = ExceptionFixed | 0x00000002, + SMC = ExceptionFixed | 0x00000003, + DCPS1 = ExceptionFixed | 0x00A00001, + DCPS2 = ExceptionFixed | 0x00A00002, + DCPS3 = ExceptionFixed | 0x00A00003 +}; + +enum MemBarrierOp : uint32_t { + MemBarrierFixed = 0xD503309F, + MemBarrierFMask = 0xFFFFF09F, + MemBarrierMask = 0xFFFFF0FF, + DSB = MemBarrierFixed | 0x00000000, + DMB = MemBarrierFixed | 0x00000020, + ISB = MemBarrierFixed | 0x00000040 +}; + +enum SystemExclusiveMonitorOp : uint32_t { + SystemExclusiveMonitorFixed = 0xD503305F, + SystemExclusiveMonitorFMask = 0xFFFFF0FF, + SystemExclusiveMonitorMask = 0xFFFFF0FF, + CLREX = SystemExclusiveMonitorFixed +}; + +enum SystemPAuthOp : uint32_t { + SystemPAuthFixed = 0xD503211F, + SystemPAuthFMask = 0xFFFFFD1F, + SystemPAuthMask = 0xFFFFFFFF, + PACIA1716 = SystemPAuthFixed | 0x00000100, + PACIB1716 = SystemPAuthFixed | 0x00000140, + AUTIA1716 = SystemPAuthFixed | 0x00000180, + AUTIB1716 = SystemPAuthFixed | 0x000001C0, + PACIAZ = SystemPAuthFixed | 0x00000300, + PACIASP = SystemPAuthFixed | 0x00000320, + PACIBZ = SystemPAuthFixed | 0x00000340, + PACIBSP = SystemPAuthFixed | 0x00000360, + AUTIAZ = SystemPAuthFixed | 0x00000380, + AUTIASP = SystemPAuthFixed | 0x000003A0, + AUTIBZ = SystemPAuthFixed | 0x000003C0, + AUTIBSP = SystemPAuthFixed | 0x000003E0, + + // XPACLRI has the same fixed mask as System Hints and needs to be handled + // differently. + XPACLRI = 0xD50320FF +}; + +// Any load or store. +enum LoadStoreAnyOp : uint32_t { + LoadStoreAnyFMask = 0x0a000000, + LoadStoreAnyFixed = 0x08000000 +}; + +// Any load pair or store pair. +enum LoadStorePairAnyOp : uint32_t { + LoadStorePairAnyFMask = 0x3a000000, + LoadStorePairAnyFixed = 0x28000000 +}; + +#define LOAD_STORE_PAIR_OP_LIST(V) \ + V(STP, w, 0x00000000), \ + V(LDP, w, 0x00400000), \ + V(LDPSW, x, 0x40400000), \ + V(STP, x, 0x80000000), \ + V(LDP, x, 0x80400000), \ + V(STP, s, 0x04000000), \ + V(LDP, s, 0x04400000), \ + V(STP, d, 0x44000000), \ + V(LDP, d, 0x44400000), \ + V(STP, q, 0x84000000), \ + V(LDP, q, 0x84400000) + +// Load/store pair (post, pre and offset.) +enum LoadStorePairOp : uint32_t { + LoadStorePairMask = 0xC4400000, + LoadStorePairLBit = 1 << 22, + #define LOAD_STORE_PAIR(A, B, C) \ + A##_##B = C + LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR) + #undef LOAD_STORE_PAIR +}; + +enum LoadStorePairPostIndexOp : uint32_t { + LoadStorePairPostIndexFixed = 0x28800000, + LoadStorePairPostIndexFMask = 0x3B800000, + LoadStorePairPostIndexMask = 0xFFC00000, + #define LOAD_STORE_PAIR_POST_INDEX(A, B, C) \ + A##_##B##_post = LoadStorePairPostIndexFixed | A##_##B + LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_POST_INDEX) + #undef LOAD_STORE_PAIR_POST_INDEX +}; + +enum LoadStorePairPreIndexOp : uint32_t { + LoadStorePairPreIndexFixed = 0x29800000, + LoadStorePairPreIndexFMask = 0x3B800000, + LoadStorePairPreIndexMask = 0xFFC00000, + #define LOAD_STORE_PAIR_PRE_INDEX(A, B, C) \ + A##_##B##_pre = LoadStorePairPreIndexFixed | A##_##B + LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_PRE_INDEX) + #undef LOAD_STORE_PAIR_PRE_INDEX +}; + +enum LoadStorePairOffsetOp : uint32_t { + LoadStorePairOffsetFixed = 0x29000000, + LoadStorePairOffsetFMask = 0x3B800000, + LoadStorePairOffsetMask = 0xFFC00000, + #define LOAD_STORE_PAIR_OFFSET(A, B, C) \ + A##_##B##_off = LoadStorePairOffsetFixed | A##_##B + LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_OFFSET) + #undef LOAD_STORE_PAIR_OFFSET +}; + +enum LoadStorePairNonTemporalOp : uint32_t { + LoadStorePairNonTemporalFixed = 0x28000000, + LoadStorePairNonTemporalFMask = 0x3B800000, + LoadStorePairNonTemporalMask = 0xFFC00000, + LoadStorePairNonTemporalLBit = 1 << 22, + STNP_w = LoadStorePairNonTemporalFixed | STP_w, + LDNP_w = LoadStorePairNonTemporalFixed | LDP_w, + STNP_x = LoadStorePairNonTemporalFixed | STP_x, + LDNP_x = LoadStorePairNonTemporalFixed | LDP_x, + STNP_s = LoadStorePairNonTemporalFixed | STP_s, + LDNP_s = LoadStorePairNonTemporalFixed | LDP_s, + STNP_d = LoadStorePairNonTemporalFixed | STP_d, + LDNP_d = LoadStorePairNonTemporalFixed | LDP_d, + STNP_q = LoadStorePairNonTemporalFixed | STP_q, + LDNP_q = LoadStorePairNonTemporalFixed | LDP_q +}; + +// Load with pointer authentication. +enum LoadStorePACOp : uint32_t { + LoadStorePACFixed = 0xF8200400, + LoadStorePACFMask = 0xFF200400, + LoadStorePACMask = 0xFFA00C00, + LoadStorePACPreBit = 0x00000800, + LDRAA = LoadStorePACFixed | 0x00000000, + LDRAA_pre = LoadStorePACPreBit | LDRAA, + LDRAB = LoadStorePACFixed | 0x00800000, + LDRAB_pre = LoadStorePACPreBit | LDRAB +}; + +// Load literal. +enum LoadLiteralOp : uint32_t { + LoadLiteralFixed = 0x18000000, + LoadLiteralFMask = 0x3B000000, + LoadLiteralMask = 0xFF000000, + LDR_w_lit = LoadLiteralFixed | 0x00000000, + LDR_x_lit = LoadLiteralFixed | 0x40000000, + LDRSW_x_lit = LoadLiteralFixed | 0x80000000, + PRFM_lit = LoadLiteralFixed | 0xC0000000, + LDR_s_lit = LoadLiteralFixed | 0x04000000, + LDR_d_lit = LoadLiteralFixed | 0x44000000, + LDR_q_lit = LoadLiteralFixed | 0x84000000 +}; + +#define LOAD_STORE_OP_LIST(V) \ + V(ST, RB, w, 0x00000000), \ + V(ST, RH, w, 0x40000000), \ + V(ST, R, w, 0x80000000), \ + V(ST, R, x, 0xC0000000), \ + V(LD, RB, w, 0x00400000), \ + V(LD, RH, w, 0x40400000), \ + V(LD, R, w, 0x80400000), \ + V(LD, R, x, 0xC0400000), \ + V(LD, RSB, x, 0x00800000), \ + V(LD, RSH, x, 0x40800000), \ + V(LD, RSW, x, 0x80800000), \ + V(LD, RSB, w, 0x00C00000), \ + V(LD, RSH, w, 0x40C00000), \ + V(ST, R, b, 0x04000000), \ + V(ST, R, h, 0x44000000), \ + V(ST, R, s, 0x84000000), \ + V(ST, R, d, 0xC4000000), \ + V(ST, R, q, 0x04800000), \ + V(LD, R, b, 0x04400000), \ + V(LD, R, h, 0x44400000), \ + V(LD, R, s, 0x84400000), \ + V(LD, R, d, 0xC4400000), \ + V(LD, R, q, 0x04C00000) + +// Load/store (post, pre, offset and unsigned.) +enum LoadStoreOp : uint32_t { + LoadStoreMask = 0xC4C00000, + LoadStoreVMask = 0x04000000, + #define LOAD_STORE(A, B, C, D) \ + A##B##_##C = D + LOAD_STORE_OP_LIST(LOAD_STORE), + #undef LOAD_STORE + PRFM = 0xC0800000 +}; + +// Load/store unscaled offset. +enum LoadStoreUnscaledOffsetOp : uint32_t { + LoadStoreUnscaledOffsetFixed = 0x38000000, + LoadStoreUnscaledOffsetFMask = 0x3B200C00, + LoadStoreUnscaledOffsetMask = 0xFFE00C00, + PRFUM = LoadStoreUnscaledOffsetFixed | PRFM, + #define LOAD_STORE_UNSCALED(A, B, C, D) \ + A##U##B##_##C = LoadStoreUnscaledOffsetFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_UNSCALED) + #undef LOAD_STORE_UNSCALED +}; + +// Load/store post index. +enum LoadStorePostIndex : uint32_t { + LoadStorePostIndexFixed = 0x38000400, + LoadStorePostIndexFMask = 0x3B200C00, + LoadStorePostIndexMask = 0xFFE00C00, + #define LOAD_STORE_POST_INDEX(A, B, C, D) \ + A##B##_##C##_post = LoadStorePostIndexFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_POST_INDEX) + #undef LOAD_STORE_POST_INDEX +}; + +// Load/store pre index. +enum LoadStorePreIndex : uint32_t { + LoadStorePreIndexFixed = 0x38000C00, + LoadStorePreIndexFMask = 0x3B200C00, + LoadStorePreIndexMask = 0xFFE00C00, + #define LOAD_STORE_PRE_INDEX(A, B, C, D) \ + A##B##_##C##_pre = LoadStorePreIndexFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_PRE_INDEX) + #undef LOAD_STORE_PRE_INDEX +}; + +// Load/store unsigned offset. +enum LoadStoreUnsignedOffset : uint32_t { + LoadStoreUnsignedOffsetFixed = 0x39000000, + LoadStoreUnsignedOffsetFMask = 0x3B000000, + LoadStoreUnsignedOffsetMask = 0xFFC00000, + PRFM_unsigned = LoadStoreUnsignedOffsetFixed | PRFM, + #define LOAD_STORE_UNSIGNED_OFFSET(A, B, C, D) \ + A##B##_##C##_unsigned = LoadStoreUnsignedOffsetFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_UNSIGNED_OFFSET) + #undef LOAD_STORE_UNSIGNED_OFFSET +}; + +// Load/store register offset. +enum LoadStoreRegisterOffset : uint32_t { + LoadStoreRegisterOffsetFixed = 0x38200800, + LoadStoreRegisterOffsetFMask = 0x3B200C00, + LoadStoreRegisterOffsetMask = 0xFFE00C00, + PRFM_reg = LoadStoreRegisterOffsetFixed | PRFM, + #define LOAD_STORE_REGISTER_OFFSET(A, B, C, D) \ + A##B##_##C##_reg = LoadStoreRegisterOffsetFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_REGISTER_OFFSET) + #undef LOAD_STORE_REGISTER_OFFSET +}; + +enum LoadStoreExclusive : uint32_t { + LoadStoreExclusiveFixed = 0x08000000, + LoadStoreExclusiveFMask = 0x3F000000, + LoadStoreExclusiveMask = 0xFFE08000, + STXRB_w = LoadStoreExclusiveFixed | 0x00000000, + STXRH_w = LoadStoreExclusiveFixed | 0x40000000, + STXR_w = LoadStoreExclusiveFixed | 0x80000000, + STXR_x = LoadStoreExclusiveFixed | 0xC0000000, + LDXRB_w = LoadStoreExclusiveFixed | 0x00400000, + LDXRH_w = LoadStoreExclusiveFixed | 0x40400000, + LDXR_w = LoadStoreExclusiveFixed | 0x80400000, + LDXR_x = LoadStoreExclusiveFixed | 0xC0400000, + STXP_w = LoadStoreExclusiveFixed | 0x80200000, + STXP_x = LoadStoreExclusiveFixed | 0xC0200000, + LDXP_w = LoadStoreExclusiveFixed | 0x80600000, + LDXP_x = LoadStoreExclusiveFixed | 0xC0600000, + STLXRB_w = LoadStoreExclusiveFixed | 0x00008000, + STLXRH_w = LoadStoreExclusiveFixed | 0x40008000, + STLXR_w = LoadStoreExclusiveFixed | 0x80008000, + STLXR_x = LoadStoreExclusiveFixed | 0xC0008000, + LDAXRB_w = LoadStoreExclusiveFixed | 0x00408000, + LDAXRH_w = LoadStoreExclusiveFixed | 0x40408000, + LDAXR_w = LoadStoreExclusiveFixed | 0x80408000, + LDAXR_x = LoadStoreExclusiveFixed | 0xC0408000, + STLXP_w = LoadStoreExclusiveFixed | 0x80208000, + STLXP_x = LoadStoreExclusiveFixed | 0xC0208000, + LDAXP_w = LoadStoreExclusiveFixed | 0x80608000, + LDAXP_x = LoadStoreExclusiveFixed | 0xC0608000, + STLRB_w = LoadStoreExclusiveFixed | 0x00808000, + STLRH_w = LoadStoreExclusiveFixed | 0x40808000, + STLR_w = LoadStoreExclusiveFixed | 0x80808000, + STLR_x = LoadStoreExclusiveFixed | 0xC0808000, + LDARB_w = LoadStoreExclusiveFixed | 0x00C08000, + LDARH_w = LoadStoreExclusiveFixed | 0x40C08000, + LDAR_w = LoadStoreExclusiveFixed | 0x80C08000, + LDAR_x = LoadStoreExclusiveFixed | 0xC0C08000, + + // v8.1 Load/store LORegion ops + STLLRB = LoadStoreExclusiveFixed | 0x00800000, + LDLARB = LoadStoreExclusiveFixed | 0x00C00000, + STLLRH = LoadStoreExclusiveFixed | 0x40800000, + LDLARH = LoadStoreExclusiveFixed | 0x40C00000, + STLLR_w = LoadStoreExclusiveFixed | 0x80800000, + LDLAR_w = LoadStoreExclusiveFixed | 0x80C00000, + STLLR_x = LoadStoreExclusiveFixed | 0xC0800000, + LDLAR_x = LoadStoreExclusiveFixed | 0xC0C00000, + + // v8.1 Load/store exclusive ops + LSEBit_l = 0x00400000, + LSEBit_o0 = 0x00008000, + LSEBit_sz = 0x40000000, + CASFixed = LoadStoreExclusiveFixed | 0x80A00000, + CASBFixed = LoadStoreExclusiveFixed | 0x00A00000, + CASHFixed = LoadStoreExclusiveFixed | 0x40A00000, + CASPFixed = LoadStoreExclusiveFixed | 0x00200000, + CAS_w = CASFixed, + CAS_x = CASFixed | LSEBit_sz, + CASA_w = CASFixed | LSEBit_l, + CASA_x = CASFixed | LSEBit_l | LSEBit_sz, + CASL_w = CASFixed | LSEBit_o0, + CASL_x = CASFixed | LSEBit_o0 | LSEBit_sz, + CASAL_w = CASFixed | LSEBit_l | LSEBit_o0, + CASAL_x = CASFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz, + CASB = CASBFixed, + CASAB = CASBFixed | LSEBit_l, + CASLB = CASBFixed | LSEBit_o0, + CASALB = CASBFixed | LSEBit_l | LSEBit_o0, + CASH = CASHFixed, + CASAH = CASHFixed | LSEBit_l, + CASLH = CASHFixed | LSEBit_o0, + CASALH = CASHFixed | LSEBit_l | LSEBit_o0, + CASP_w = CASPFixed, + CASP_x = CASPFixed | LSEBit_sz, + CASPA_w = CASPFixed | LSEBit_l, + CASPA_x = CASPFixed | LSEBit_l | LSEBit_sz, + CASPL_w = CASPFixed | LSEBit_o0, + CASPL_x = CASPFixed | LSEBit_o0 | LSEBit_sz, + CASPAL_w = CASPFixed | LSEBit_l | LSEBit_o0, + CASPAL_x = CASPFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz +}; + +// Load/store RCpc unscaled offset. +enum LoadStoreRCpcUnscaledOffsetOp : uint32_t { + LoadStoreRCpcUnscaledOffsetFixed = 0x19000000, + LoadStoreRCpcUnscaledOffsetFMask = 0x3F200C00, + LoadStoreRCpcUnscaledOffsetMask = 0xFFE00C00, + STLURB = LoadStoreRCpcUnscaledOffsetFixed | 0x00000000, + LDAPURB = LoadStoreRCpcUnscaledOffsetFixed | 0x00400000, + LDAPURSB_x = LoadStoreRCpcUnscaledOffsetFixed | 0x00800000, + LDAPURSB_w = LoadStoreRCpcUnscaledOffsetFixed | 0x00C00000, + STLURH = LoadStoreRCpcUnscaledOffsetFixed | 0x40000000, + LDAPURH = LoadStoreRCpcUnscaledOffsetFixed | 0x40400000, + LDAPURSH_x = LoadStoreRCpcUnscaledOffsetFixed | 0x40800000, + LDAPURSH_w = LoadStoreRCpcUnscaledOffsetFixed | 0x40C00000, + STLUR_w = LoadStoreRCpcUnscaledOffsetFixed | 0x80000000, + LDAPUR_w = LoadStoreRCpcUnscaledOffsetFixed | 0x80400000, + LDAPURSW = LoadStoreRCpcUnscaledOffsetFixed | 0x80800000, + STLUR_x = LoadStoreRCpcUnscaledOffsetFixed | 0xC0000000, + LDAPUR_x = LoadStoreRCpcUnscaledOffsetFixed | 0xC0400000 +}; + +#define ATOMIC_MEMORY_SIMPLE_OPC_LIST(V) \ + V(LDADD, 0x00000000), \ + V(LDCLR, 0x00001000), \ + V(LDEOR, 0x00002000), \ + V(LDSET, 0x00003000), \ + V(LDSMAX, 0x00004000), \ + V(LDSMIN, 0x00005000), \ + V(LDUMAX, 0x00006000), \ + V(LDUMIN, 0x00007000) + +// Atomic memory. +enum AtomicMemoryOp : uint32_t { + AtomicMemoryFixed = 0x38200000, + AtomicMemoryFMask = 0x3B200C00, + AtomicMemoryMask = 0xFFE0FC00, + SWPB = AtomicMemoryFixed | 0x00008000, + SWPAB = AtomicMemoryFixed | 0x00808000, + SWPLB = AtomicMemoryFixed | 0x00408000, + SWPALB = AtomicMemoryFixed | 0x00C08000, + SWPH = AtomicMemoryFixed | 0x40008000, + SWPAH = AtomicMemoryFixed | 0x40808000, + SWPLH = AtomicMemoryFixed | 0x40408000, + SWPALH = AtomicMemoryFixed | 0x40C08000, + SWP_w = AtomicMemoryFixed | 0x80008000, + SWPA_w = AtomicMemoryFixed | 0x80808000, + SWPL_w = AtomicMemoryFixed | 0x80408000, + SWPAL_w = AtomicMemoryFixed | 0x80C08000, + SWP_x = AtomicMemoryFixed | 0xC0008000, + SWPA_x = AtomicMemoryFixed | 0xC0808000, + SWPL_x = AtomicMemoryFixed | 0xC0408000, + SWPAL_x = AtomicMemoryFixed | 0xC0C08000, + LDAPRB = AtomicMemoryFixed | 0x0080C000, + LDAPRH = AtomicMemoryFixed | 0x4080C000, + LDAPR_w = AtomicMemoryFixed | 0x8080C000, + LDAPR_x = AtomicMemoryFixed | 0xC080C000, + + AtomicMemorySimpleFMask = 0x3B208C00, + AtomicMemorySimpleOpMask = 0x00007000, +#define ATOMIC_MEMORY_SIMPLE(N, OP) \ + N##Op = OP, \ + N##B = AtomicMemoryFixed | OP, \ + N##AB = AtomicMemoryFixed | OP | 0x00800000, \ + N##LB = AtomicMemoryFixed | OP | 0x00400000, \ + N##ALB = AtomicMemoryFixed | OP | 0x00C00000, \ + N##H = AtomicMemoryFixed | OP | 0x40000000, \ + N##AH = AtomicMemoryFixed | OP | 0x40800000, \ + N##LH = AtomicMemoryFixed | OP | 0x40400000, \ + N##ALH = AtomicMemoryFixed | OP | 0x40C00000, \ + N##_w = AtomicMemoryFixed | OP | 0x80000000, \ + N##A_w = AtomicMemoryFixed | OP | 0x80800000, \ + N##L_w = AtomicMemoryFixed | OP | 0x80400000, \ + N##AL_w = AtomicMemoryFixed | OP | 0x80C00000, \ + N##_x = AtomicMemoryFixed | OP | 0xC0000000, \ + N##A_x = AtomicMemoryFixed | OP | 0xC0800000, \ + N##L_x = AtomicMemoryFixed | OP | 0xC0400000, \ + N##AL_x = AtomicMemoryFixed | OP | 0xC0C00000 + + ATOMIC_MEMORY_SIMPLE_OPC_LIST(ATOMIC_MEMORY_SIMPLE) +#undef ATOMIC_MEMORY_SIMPLE +}; + +// Conditional compare. +enum ConditionalCompareOp : uint32_t { + ConditionalCompareMask = 0x60000000, + CCMN = 0x20000000, + CCMP = 0x60000000 +}; + +// Conditional compare register. +enum ConditionalCompareRegisterOp : uint32_t { + ConditionalCompareRegisterFixed = 0x1A400000, + ConditionalCompareRegisterFMask = 0x1FE00800, + ConditionalCompareRegisterMask = 0xFFE00C10, + CCMN_w = ConditionalCompareRegisterFixed | CCMN, + CCMN_x = ConditionalCompareRegisterFixed | SixtyFourBits | CCMN, + CCMP_w = ConditionalCompareRegisterFixed | CCMP, + CCMP_x = ConditionalCompareRegisterFixed | SixtyFourBits | CCMP +}; + +// Conditional compare immediate. +enum ConditionalCompareImmediateOp : uint32_t { + ConditionalCompareImmediateFixed = 0x1A400800, + ConditionalCompareImmediateFMask = 0x1FE00800, + ConditionalCompareImmediateMask = 0xFFE00C10, + CCMN_w_imm = ConditionalCompareImmediateFixed | CCMN, + CCMN_x_imm = ConditionalCompareImmediateFixed | SixtyFourBits | CCMN, + CCMP_w_imm = ConditionalCompareImmediateFixed | CCMP, + CCMP_x_imm = ConditionalCompareImmediateFixed | SixtyFourBits | CCMP +}; + +// Conditional select. +enum ConditionalSelectOp : uint32_t { + ConditionalSelectFixed = 0x1A800000, + ConditionalSelectFMask = 0x1FE00000, + ConditionalSelectMask = 0xFFE00C00, + CSEL_w = ConditionalSelectFixed | 0x00000000, + CSEL_x = ConditionalSelectFixed | 0x80000000, + CSEL = CSEL_w, + CSINC_w = ConditionalSelectFixed | 0x00000400, + CSINC_x = ConditionalSelectFixed | 0x80000400, + CSINC = CSINC_w, + CSINV_w = ConditionalSelectFixed | 0x40000000, + CSINV_x = ConditionalSelectFixed | 0xC0000000, + CSINV = CSINV_w, + CSNEG_w = ConditionalSelectFixed | 0x40000400, + CSNEG_x = ConditionalSelectFixed | 0xC0000400, + CSNEG = CSNEG_w +}; + +// Data processing 1 source. +enum DataProcessing1SourceOp : uint32_t { + DataProcessing1SourceFixed = 0x5AC00000, + DataProcessing1SourceFMask = 0x5FE00000, + DataProcessing1SourceMask = 0xFFFFFC00, + RBIT = DataProcessing1SourceFixed | 0x00000000, + RBIT_w = RBIT, + RBIT_x = RBIT | SixtyFourBits, + REV16 = DataProcessing1SourceFixed | 0x00000400, + REV16_w = REV16, + REV16_x = REV16 | SixtyFourBits, + REV = DataProcessing1SourceFixed | 0x00000800, + REV_w = REV, + REV32_x = REV | SixtyFourBits, + REV_x = DataProcessing1SourceFixed | SixtyFourBits | 0x00000C00, + CLZ = DataProcessing1SourceFixed | 0x00001000, + CLZ_w = CLZ, + CLZ_x = CLZ | SixtyFourBits, + CLS = DataProcessing1SourceFixed | 0x00001400, + CLS_w = CLS, + CLS_x = CLS | SixtyFourBits, + + // Pointer authentication instructions in Armv8.3. + PACIA = DataProcessing1SourceFixed | 0x80010000, + PACIB = DataProcessing1SourceFixed | 0x80010400, + PACDA = DataProcessing1SourceFixed | 0x80010800, + PACDB = DataProcessing1SourceFixed | 0x80010C00, + AUTIA = DataProcessing1SourceFixed | 0x80011000, + AUTIB = DataProcessing1SourceFixed | 0x80011400, + AUTDA = DataProcessing1SourceFixed | 0x80011800, + AUTDB = DataProcessing1SourceFixed | 0x80011C00, + PACIZA = DataProcessing1SourceFixed | 0x80012000, + PACIZB = DataProcessing1SourceFixed | 0x80012400, + PACDZA = DataProcessing1SourceFixed | 0x80012800, + PACDZB = DataProcessing1SourceFixed | 0x80012C00, + AUTIZA = DataProcessing1SourceFixed | 0x80013000, + AUTIZB = DataProcessing1SourceFixed | 0x80013400, + AUTDZA = DataProcessing1SourceFixed | 0x80013800, + AUTDZB = DataProcessing1SourceFixed | 0x80013C00, + XPACI = DataProcessing1SourceFixed | 0x80014000, + XPACD = DataProcessing1SourceFixed | 0x80014400 +}; + +// Data processing 2 source. +enum DataProcessing2SourceOp : uint32_t { + DataProcessing2SourceFixed = 0x1AC00000, + DataProcessing2SourceFMask = 0x5FE00000, + DataProcessing2SourceMask = 0xFFE0FC00, + UDIV_w = DataProcessing2SourceFixed | 0x00000800, + UDIV_x = DataProcessing2SourceFixed | 0x80000800, + UDIV = UDIV_w, + SDIV_w = DataProcessing2SourceFixed | 0x00000C00, + SDIV_x = DataProcessing2SourceFixed | 0x80000C00, + SDIV = SDIV_w, + LSLV_w = DataProcessing2SourceFixed | 0x00002000, + LSLV_x = DataProcessing2SourceFixed | 0x80002000, + LSLV = LSLV_w, + LSRV_w = DataProcessing2SourceFixed | 0x00002400, + LSRV_x = DataProcessing2SourceFixed | 0x80002400, + LSRV = LSRV_w, + ASRV_w = DataProcessing2SourceFixed | 0x00002800, + ASRV_x = DataProcessing2SourceFixed | 0x80002800, + ASRV = ASRV_w, + RORV_w = DataProcessing2SourceFixed | 0x00002C00, + RORV_x = DataProcessing2SourceFixed | 0x80002C00, + RORV = RORV_w, + PACGA = DataProcessing2SourceFixed | SixtyFourBits | 0x00003000, + CRC32B = DataProcessing2SourceFixed | 0x00004000, + CRC32H = DataProcessing2SourceFixed | 0x00004400, + CRC32W = DataProcessing2SourceFixed | 0x00004800, + CRC32X = DataProcessing2SourceFixed | SixtyFourBits | 0x00004C00, + CRC32CB = DataProcessing2SourceFixed | 0x00005000, + CRC32CH = DataProcessing2SourceFixed | 0x00005400, + CRC32CW = DataProcessing2SourceFixed | 0x00005800, + CRC32CX = DataProcessing2SourceFixed | SixtyFourBits | 0x00005C00 +}; + +// Data processing 3 source. +enum DataProcessing3SourceOp : uint32_t { + DataProcessing3SourceFixed = 0x1B000000, + DataProcessing3SourceFMask = 0x1F000000, + DataProcessing3SourceMask = 0xFFE08000, + MADD_w = DataProcessing3SourceFixed | 0x00000000, + MADD_x = DataProcessing3SourceFixed | 0x80000000, + MADD = MADD_w, + MSUB_w = DataProcessing3SourceFixed | 0x00008000, + MSUB_x = DataProcessing3SourceFixed | 0x80008000, + MSUB = MSUB_w, + SMADDL_x = DataProcessing3SourceFixed | 0x80200000, + SMSUBL_x = DataProcessing3SourceFixed | 0x80208000, + SMULH_x = DataProcessing3SourceFixed | 0x80400000, + UMADDL_x = DataProcessing3SourceFixed | 0x80A00000, + UMSUBL_x = DataProcessing3SourceFixed | 0x80A08000, + UMULH_x = DataProcessing3SourceFixed | 0x80C00000 +}; + +// Floating point compare. +enum FPCompareOp : uint32_t { + FPCompareFixed = 0x1E202000, + FPCompareFMask = 0x5F203C00, + FPCompareMask = 0xFFE0FC1F, + FCMP_h = FPCompareFixed | FP16 | 0x00000000, + FCMP_s = FPCompareFixed | 0x00000000, + FCMP_d = FPCompareFixed | FP64 | 0x00000000, + FCMP = FCMP_s, + FCMP_h_zero = FPCompareFixed | FP16 | 0x00000008, + FCMP_s_zero = FPCompareFixed | 0x00000008, + FCMP_d_zero = FPCompareFixed | FP64 | 0x00000008, + FCMP_zero = FCMP_s_zero, + FCMPE_h = FPCompareFixed | FP16 | 0x00000010, + FCMPE_s = FPCompareFixed | 0x00000010, + FCMPE_d = FPCompareFixed | FP64 | 0x00000010, + FCMPE = FCMPE_s, + FCMPE_h_zero = FPCompareFixed | FP16 | 0x00000018, + FCMPE_s_zero = FPCompareFixed | 0x00000018, + FCMPE_d_zero = FPCompareFixed | FP64 | 0x00000018, + FCMPE_zero = FCMPE_s_zero +}; + +// Floating point conditional compare. +enum FPConditionalCompareOp : uint32_t { + FPConditionalCompareFixed = 0x1E200400, + FPConditionalCompareFMask = 0x5F200C00, + FPConditionalCompareMask = 0xFFE00C10, + FCCMP_h = FPConditionalCompareFixed | FP16 | 0x00000000, + FCCMP_s = FPConditionalCompareFixed | 0x00000000, + FCCMP_d = FPConditionalCompareFixed | FP64 | 0x00000000, + FCCMP = FCCMP_s, + FCCMPE_h = FPConditionalCompareFixed | FP16 | 0x00000010, + FCCMPE_s = FPConditionalCompareFixed | 0x00000010, + FCCMPE_d = FPConditionalCompareFixed | FP64 | 0x00000010, + FCCMPE = FCCMPE_s +}; + +// Floating point conditional select. +enum FPConditionalSelectOp : uint32_t { + FPConditionalSelectFixed = 0x1E200C00, + FPConditionalSelectFMask = 0x5F200C00, + FPConditionalSelectMask = 0xFFE00C00, + FCSEL_h = FPConditionalSelectFixed | FP16 | 0x00000000, + FCSEL_s = FPConditionalSelectFixed | 0x00000000, + FCSEL_d = FPConditionalSelectFixed | FP64 | 0x00000000, + FCSEL = FCSEL_s +}; + +// Floating point immediate. +enum FPImmediateOp : uint32_t { + FPImmediateFixed = 0x1E201000, + FPImmediateFMask = 0x5F201C00, + FPImmediateMask = 0xFFE01C00, + FMOV_h_imm = FPImmediateFixed | FP16 | 0x00000000, + FMOV_s_imm = FPImmediateFixed | 0x00000000, + FMOV_d_imm = FPImmediateFixed | FP64 | 0x00000000 +}; + +// Floating point data processing 1 source. +enum FPDataProcessing1SourceOp : uint32_t { + FPDataProcessing1SourceFixed = 0x1E204000, + FPDataProcessing1SourceFMask = 0x5F207C00, + FPDataProcessing1SourceMask = 0xFFFFFC00, + FMOV_h = FPDataProcessing1SourceFixed | FP16 | 0x00000000, + FMOV_s = FPDataProcessing1SourceFixed | 0x00000000, + FMOV_d = FPDataProcessing1SourceFixed | FP64 | 0x00000000, + FMOV = FMOV_s, + FABS_h = FPDataProcessing1SourceFixed | FP16 | 0x00008000, + FABS_s = FPDataProcessing1SourceFixed | 0x00008000, + FABS_d = FPDataProcessing1SourceFixed | FP64 | 0x00008000, + FABS = FABS_s, + FNEG_h = FPDataProcessing1SourceFixed | FP16 | 0x00010000, + FNEG_s = FPDataProcessing1SourceFixed | 0x00010000, + FNEG_d = FPDataProcessing1SourceFixed | FP64 | 0x00010000, + FNEG = FNEG_s, + FSQRT_h = FPDataProcessing1SourceFixed | FP16 | 0x00018000, + FSQRT_s = FPDataProcessing1SourceFixed | 0x00018000, + FSQRT_d = FPDataProcessing1SourceFixed | FP64 | 0x00018000, + FSQRT = FSQRT_s, + FCVT_ds = FPDataProcessing1SourceFixed | 0x00028000, + FCVT_sd = FPDataProcessing1SourceFixed | FP64 | 0x00020000, + FCVT_hs = FPDataProcessing1SourceFixed | 0x00038000, + FCVT_hd = FPDataProcessing1SourceFixed | FP64 | 0x00038000, + FCVT_sh = FPDataProcessing1SourceFixed | 0x00C20000, + FCVT_dh = FPDataProcessing1SourceFixed | 0x00C28000, + FRINT32X_s = FPDataProcessing1SourceFixed | 0x00088000, + FRINT32X_d = FPDataProcessing1SourceFixed | FP64 | 0x00088000, + FRINT32X = FRINT32X_s, + FRINT32Z_s = FPDataProcessing1SourceFixed | 0x00080000, + FRINT32Z_d = FPDataProcessing1SourceFixed | FP64 | 0x00080000, + FRINT32Z = FRINT32Z_s, + FRINT64X_s = FPDataProcessing1SourceFixed | 0x00098000, + FRINT64X_d = FPDataProcessing1SourceFixed | FP64 | 0x00098000, + FRINT64X = FRINT64X_s, + FRINT64Z_s = FPDataProcessing1SourceFixed | 0x00090000, + FRINT64Z_d = FPDataProcessing1SourceFixed | FP64 | 0x00090000, + FRINT64Z = FRINT64Z_s, + FRINTN_h = FPDataProcessing1SourceFixed | FP16 | 0x00040000, + FRINTN_s = FPDataProcessing1SourceFixed | 0x00040000, + FRINTN_d = FPDataProcessing1SourceFixed | FP64 | 0x00040000, + FRINTN = FRINTN_s, + FRINTP_h = FPDataProcessing1SourceFixed | FP16 | 0x00048000, + FRINTP_s = FPDataProcessing1SourceFixed | 0x00048000, + FRINTP_d = FPDataProcessing1SourceFixed | FP64 | 0x00048000, + FRINTP = FRINTP_s, + FRINTM_h = FPDataProcessing1SourceFixed | FP16 | 0x00050000, + FRINTM_s = FPDataProcessing1SourceFixed | 0x00050000, + FRINTM_d = FPDataProcessing1SourceFixed | FP64 | 0x00050000, + FRINTM = FRINTM_s, + FRINTZ_h = FPDataProcessing1SourceFixed | FP16 | 0x00058000, + FRINTZ_s = FPDataProcessing1SourceFixed | 0x00058000, + FRINTZ_d = FPDataProcessing1SourceFixed | FP64 | 0x00058000, + FRINTZ = FRINTZ_s, + FRINTA_h = FPDataProcessing1SourceFixed | FP16 | 0x00060000, + FRINTA_s = FPDataProcessing1SourceFixed | 0x00060000, + FRINTA_d = FPDataProcessing1SourceFixed | FP64 | 0x00060000, + FRINTA = FRINTA_s, + FRINTX_h = FPDataProcessing1SourceFixed | FP16 | 0x00070000, + FRINTX_s = FPDataProcessing1SourceFixed | 0x00070000, + FRINTX_d = FPDataProcessing1SourceFixed | FP64 | 0x00070000, + FRINTX = FRINTX_s, + FRINTI_h = FPDataProcessing1SourceFixed | FP16 | 0x00078000, + FRINTI_s = FPDataProcessing1SourceFixed | 0x00078000, + FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000, + FRINTI = FRINTI_s +}; + +// Floating point data processing 2 source. +enum FPDataProcessing2SourceOp : uint32_t { + FPDataProcessing2SourceFixed = 0x1E200800, + FPDataProcessing2SourceFMask = 0x5F200C00, + FPDataProcessing2SourceMask = 0xFFE0FC00, + FMUL = FPDataProcessing2SourceFixed | 0x00000000, + FMUL_h = FMUL | FP16, + FMUL_s = FMUL, + FMUL_d = FMUL | FP64, + FDIV = FPDataProcessing2SourceFixed | 0x00001000, + FDIV_h = FDIV | FP16, + FDIV_s = FDIV, + FDIV_d = FDIV | FP64, + FADD = FPDataProcessing2SourceFixed | 0x00002000, + FADD_h = FADD | FP16, + FADD_s = FADD, + FADD_d = FADD | FP64, + FSUB = FPDataProcessing2SourceFixed | 0x00003000, + FSUB_h = FSUB | FP16, + FSUB_s = FSUB, + FSUB_d = FSUB | FP64, + FMAX = FPDataProcessing2SourceFixed | 0x00004000, + FMAX_h = FMAX | FP16, + FMAX_s = FMAX, + FMAX_d = FMAX | FP64, + FMIN = FPDataProcessing2SourceFixed | 0x00005000, + FMIN_h = FMIN | FP16, + FMIN_s = FMIN, + FMIN_d = FMIN | FP64, + FMAXNM = FPDataProcessing2SourceFixed | 0x00006000, + FMAXNM_h = FMAXNM | FP16, + FMAXNM_s = FMAXNM, + FMAXNM_d = FMAXNM | FP64, + FMINNM = FPDataProcessing2SourceFixed | 0x00007000, + FMINNM_h = FMINNM | FP16, + FMINNM_s = FMINNM, + FMINNM_d = FMINNM | FP64, + FNMUL = FPDataProcessing2SourceFixed | 0x00008000, + FNMUL_h = FNMUL | FP16, + FNMUL_s = FNMUL, + FNMUL_d = FNMUL | FP64 +}; + +// Floating point data processing 3 source. +enum FPDataProcessing3SourceOp : uint32_t { + FPDataProcessing3SourceFixed = 0x1F000000, + FPDataProcessing3SourceFMask = 0x5F000000, + FPDataProcessing3SourceMask = 0xFFE08000, + FMADD_h = FPDataProcessing3SourceFixed | 0x00C00000, + FMSUB_h = FPDataProcessing3SourceFixed | 0x00C08000, + FNMADD_h = FPDataProcessing3SourceFixed | 0x00E00000, + FNMSUB_h = FPDataProcessing3SourceFixed | 0x00E08000, + FMADD_s = FPDataProcessing3SourceFixed | 0x00000000, + FMSUB_s = FPDataProcessing3SourceFixed | 0x00008000, + FNMADD_s = FPDataProcessing3SourceFixed | 0x00200000, + FNMSUB_s = FPDataProcessing3SourceFixed | 0x00208000, + FMADD_d = FPDataProcessing3SourceFixed | 0x00400000, + FMSUB_d = FPDataProcessing3SourceFixed | 0x00408000, + FNMADD_d = FPDataProcessing3SourceFixed | 0x00600000, + FNMSUB_d = FPDataProcessing3SourceFixed | 0x00608000 +}; + +// Conversion between floating point and integer. +enum FPIntegerConvertOp : uint32_t { + FPIntegerConvertFixed = 0x1E200000, + FPIntegerConvertFMask = 0x5F20FC00, + FPIntegerConvertMask = 0xFFFFFC00, + FCVTNS = FPIntegerConvertFixed | 0x00000000, + FCVTNS_wh = FCVTNS | FP16, + FCVTNS_xh = FCVTNS | SixtyFourBits | FP16, + FCVTNS_ws = FCVTNS, + FCVTNS_xs = FCVTNS | SixtyFourBits, + FCVTNS_wd = FCVTNS | FP64, + FCVTNS_xd = FCVTNS | SixtyFourBits | FP64, + FCVTNU = FPIntegerConvertFixed | 0x00010000, + FCVTNU_wh = FCVTNU | FP16, + FCVTNU_xh = FCVTNU | SixtyFourBits | FP16, + FCVTNU_ws = FCVTNU, + FCVTNU_xs = FCVTNU | SixtyFourBits, + FCVTNU_wd = FCVTNU | FP64, + FCVTNU_xd = FCVTNU | SixtyFourBits | FP64, + FCVTPS = FPIntegerConvertFixed | 0x00080000, + FCVTPS_wh = FCVTPS | FP16, + FCVTPS_xh = FCVTPS | SixtyFourBits | FP16, + FCVTPS_ws = FCVTPS, + FCVTPS_xs = FCVTPS | SixtyFourBits, + FCVTPS_wd = FCVTPS | FP64, + FCVTPS_xd = FCVTPS | SixtyFourBits | FP64, + FCVTPU = FPIntegerConvertFixed | 0x00090000, + FCVTPU_wh = FCVTPU | FP16, + FCVTPU_xh = FCVTPU | SixtyFourBits | FP16, + FCVTPU_ws = FCVTPU, + FCVTPU_xs = FCVTPU | SixtyFourBits, + FCVTPU_wd = FCVTPU | FP64, + FCVTPU_xd = FCVTPU | SixtyFourBits | FP64, + FCVTMS = FPIntegerConvertFixed | 0x00100000, + FCVTMS_wh = FCVTMS | FP16, + FCVTMS_xh = FCVTMS | SixtyFourBits | FP16, + FCVTMS_ws = FCVTMS, + FCVTMS_xs = FCVTMS | SixtyFourBits, + FCVTMS_wd = FCVTMS | FP64, + FCVTMS_xd = FCVTMS | SixtyFourBits | FP64, + FCVTMU = FPIntegerConvertFixed | 0x00110000, + FCVTMU_wh = FCVTMU | FP16, + FCVTMU_xh = FCVTMU | SixtyFourBits | FP16, + FCVTMU_ws = FCVTMU, + FCVTMU_xs = FCVTMU | SixtyFourBits, + FCVTMU_wd = FCVTMU | FP64, + FCVTMU_xd = FCVTMU | SixtyFourBits | FP64, + FCVTZS = FPIntegerConvertFixed | 0x00180000, + FCVTZS_wh = FCVTZS | FP16, + FCVTZS_xh = FCVTZS | SixtyFourBits | FP16, + FCVTZS_ws = FCVTZS, + FCVTZS_xs = FCVTZS | SixtyFourBits, + FCVTZS_wd = FCVTZS | FP64, + FCVTZS_xd = FCVTZS | SixtyFourBits | FP64, + FCVTZU = FPIntegerConvertFixed | 0x00190000, + FCVTZU_wh = FCVTZU | FP16, + FCVTZU_xh = FCVTZU | SixtyFourBits | FP16, + FCVTZU_ws = FCVTZU, + FCVTZU_xs = FCVTZU | SixtyFourBits, + FCVTZU_wd = FCVTZU | FP64, + FCVTZU_xd = FCVTZU | SixtyFourBits | FP64, + SCVTF = FPIntegerConvertFixed | 0x00020000, + SCVTF_hw = SCVTF | FP16, + SCVTF_hx = SCVTF | SixtyFourBits | FP16, + SCVTF_sw = SCVTF, + SCVTF_sx = SCVTF | SixtyFourBits, + SCVTF_dw = SCVTF | FP64, + SCVTF_dx = SCVTF | SixtyFourBits | FP64, + UCVTF = FPIntegerConvertFixed | 0x00030000, + UCVTF_hw = UCVTF | FP16, + UCVTF_hx = UCVTF | SixtyFourBits | FP16, + UCVTF_sw = UCVTF, + UCVTF_sx = UCVTF | SixtyFourBits, + UCVTF_dw = UCVTF | FP64, + UCVTF_dx = UCVTF | SixtyFourBits | FP64, + FCVTAS = FPIntegerConvertFixed | 0x00040000, + FCVTAS_wh = FCVTAS | FP16, + FCVTAS_xh = FCVTAS | SixtyFourBits | FP16, + FCVTAS_ws = FCVTAS, + FCVTAS_xs = FCVTAS | SixtyFourBits, + FCVTAS_wd = FCVTAS | FP64, + FCVTAS_xd = FCVTAS | SixtyFourBits | FP64, + FCVTAU = FPIntegerConvertFixed | 0x00050000, + FCVTAU_wh = FCVTAU | FP16, + FCVTAU_xh = FCVTAU | SixtyFourBits | FP16, + FCVTAU_ws = FCVTAU, + FCVTAU_xs = FCVTAU | SixtyFourBits, + FCVTAU_wd = FCVTAU | FP64, + FCVTAU_xd = FCVTAU | SixtyFourBits | FP64, + FMOV_wh = FPIntegerConvertFixed | 0x00060000 | FP16, + FMOV_hw = FPIntegerConvertFixed | 0x00070000 | FP16, + FMOV_xh = FMOV_wh | SixtyFourBits, + FMOV_hx = FMOV_hw | SixtyFourBits, + FMOV_ws = FPIntegerConvertFixed | 0x00060000, + FMOV_sw = FPIntegerConvertFixed | 0x00070000, + FMOV_xd = FMOV_ws | SixtyFourBits | FP64, + FMOV_dx = FMOV_sw | SixtyFourBits | FP64, + FMOV_d1_x = FPIntegerConvertFixed | SixtyFourBits | 0x008F0000, + FMOV_x_d1 = FPIntegerConvertFixed | SixtyFourBits | 0x008E0000, + FJCVTZS = FPIntegerConvertFixed | FP64 | 0x001E0000 +}; + +// Conversion between fixed point and floating point. +enum FPFixedPointConvertOp : uint32_t { + FPFixedPointConvertFixed = 0x1E000000, + FPFixedPointConvertFMask = 0x5F200000, + FPFixedPointConvertMask = 0xFFFF0000, + FCVTZS_fixed = FPFixedPointConvertFixed | 0x00180000, + FCVTZS_wh_fixed = FCVTZS_fixed | FP16, + FCVTZS_xh_fixed = FCVTZS_fixed | SixtyFourBits | FP16, + FCVTZS_ws_fixed = FCVTZS_fixed, + FCVTZS_xs_fixed = FCVTZS_fixed | SixtyFourBits, + FCVTZS_wd_fixed = FCVTZS_fixed | FP64, + FCVTZS_xd_fixed = FCVTZS_fixed | SixtyFourBits | FP64, + FCVTZU_fixed = FPFixedPointConvertFixed | 0x00190000, + FCVTZU_wh_fixed = FCVTZU_fixed | FP16, + FCVTZU_xh_fixed = FCVTZU_fixed | SixtyFourBits | FP16, + FCVTZU_ws_fixed = FCVTZU_fixed, + FCVTZU_xs_fixed = FCVTZU_fixed | SixtyFourBits, + FCVTZU_wd_fixed = FCVTZU_fixed | FP64, + FCVTZU_xd_fixed = FCVTZU_fixed | SixtyFourBits | FP64, + SCVTF_fixed = FPFixedPointConvertFixed | 0x00020000, + SCVTF_hw_fixed = SCVTF_fixed | FP16, + SCVTF_hx_fixed = SCVTF_fixed | SixtyFourBits | FP16, + SCVTF_sw_fixed = SCVTF_fixed, + SCVTF_sx_fixed = SCVTF_fixed | SixtyFourBits, + SCVTF_dw_fixed = SCVTF_fixed | FP64, + SCVTF_dx_fixed = SCVTF_fixed | SixtyFourBits | FP64, + UCVTF_fixed = FPFixedPointConvertFixed | 0x00030000, + UCVTF_hw_fixed = UCVTF_fixed | FP16, + UCVTF_hx_fixed = UCVTF_fixed | SixtyFourBits | FP16, + UCVTF_sw_fixed = UCVTF_fixed, + UCVTF_sx_fixed = UCVTF_fixed | SixtyFourBits, + UCVTF_dw_fixed = UCVTF_fixed | FP64, + UCVTF_dx_fixed = UCVTF_fixed | SixtyFourBits | FP64 +}; + +// Crypto - two register SHA. +enum Crypto2RegSHAOp : uint32_t { + Crypto2RegSHAFixed = 0x5E280800, + Crypto2RegSHAFMask = 0xFF3E0C00 +}; + +// Crypto - three register SHA. +enum Crypto3RegSHAOp : uint32_t { + Crypto3RegSHAFixed = 0x5E000000, + Crypto3RegSHAFMask = 0xFF208C00 +}; + +// Crypto - AES. +enum CryptoAESOp : uint32_t { + CryptoAESFixed = 0x4E280800, + CryptoAESFMask = 0xFF3E0C00 +}; + +// NEON instructions with two register operands. +enum NEON2RegMiscOp : uint32_t { + NEON2RegMiscFixed = 0x0E200800, + NEON2RegMiscFMask = 0x9F3E0C00, + NEON2RegMiscMask = 0xBF3FFC00, + NEON2RegMiscUBit = 0x20000000, + NEON_REV64 = NEON2RegMiscFixed | 0x00000000, + NEON_REV32 = NEON2RegMiscFixed | 0x20000000, + NEON_REV16 = NEON2RegMiscFixed | 0x00001000, + NEON_SADDLP = NEON2RegMiscFixed | 0x00002000, + NEON_UADDLP = NEON_SADDLP | NEON2RegMiscUBit, + NEON_SUQADD = NEON2RegMiscFixed | 0x00003000, + NEON_USQADD = NEON_SUQADD | NEON2RegMiscUBit, + NEON_CLS = NEON2RegMiscFixed | 0x00004000, + NEON_CLZ = NEON2RegMiscFixed | 0x20004000, + NEON_CNT = NEON2RegMiscFixed | 0x00005000, + NEON_RBIT_NOT = NEON2RegMiscFixed | 0x20005000, + NEON_SADALP = NEON2RegMiscFixed | 0x00006000, + NEON_UADALP = NEON_SADALP | NEON2RegMiscUBit, + NEON_SQABS = NEON2RegMiscFixed | 0x00007000, + NEON_SQNEG = NEON2RegMiscFixed | 0x20007000, + NEON_CMGT_zero = NEON2RegMiscFixed | 0x00008000, + NEON_CMGE_zero = NEON2RegMiscFixed | 0x20008000, + NEON_CMEQ_zero = NEON2RegMiscFixed | 0x00009000, + NEON_CMLE_zero = NEON2RegMiscFixed | 0x20009000, + NEON_CMLT_zero = NEON2RegMiscFixed | 0x0000A000, + NEON_ABS = NEON2RegMiscFixed | 0x0000B000, + NEON_NEG = NEON2RegMiscFixed | 0x2000B000, + NEON_XTN = NEON2RegMiscFixed | 0x00012000, + NEON_SQXTUN = NEON2RegMiscFixed | 0x20012000, + NEON_SHLL = NEON2RegMiscFixed | 0x20013000, + NEON_SQXTN = NEON2RegMiscFixed | 0x00014000, + NEON_UQXTN = NEON_SQXTN | NEON2RegMiscUBit, + + NEON2RegMiscOpcode = 0x0001F000, + NEON_RBIT_NOT_opcode = NEON_RBIT_NOT & NEON2RegMiscOpcode, + NEON_NEG_opcode = NEON_NEG & NEON2RegMiscOpcode, + NEON_XTN_opcode = NEON_XTN & NEON2RegMiscOpcode, + NEON_UQXTN_opcode = NEON_UQXTN & NEON2RegMiscOpcode, + + // These instructions use only one bit of the size field. The other bit is + // used to distinguish between instructions. + NEON2RegMiscFPMask = NEON2RegMiscMask | 0x00800000, + NEON_FABS = NEON2RegMiscFixed | 0x0080F000, + NEON_FNEG = NEON2RegMiscFixed | 0x2080F000, + NEON_FCVTN = NEON2RegMiscFixed | 0x00016000, + NEON_FCVTXN = NEON2RegMiscFixed | 0x20016000, + NEON_FCVTL = NEON2RegMiscFixed | 0x00017000, + NEON_FRINT32X = NEON2RegMiscFixed | 0x2001E000, + NEON_FRINT32Z = NEON2RegMiscFixed | 0x0001E000, + NEON_FRINT64X = NEON2RegMiscFixed | 0x2001F000, + NEON_FRINT64Z = NEON2RegMiscFixed | 0x0001F000, + NEON_FRINTN = NEON2RegMiscFixed | 0x00018000, + NEON_FRINTA = NEON2RegMiscFixed | 0x20018000, + NEON_FRINTP = NEON2RegMiscFixed | 0x00818000, + NEON_FRINTM = NEON2RegMiscFixed | 0x00019000, + NEON_FRINTX = NEON2RegMiscFixed | 0x20019000, + NEON_FRINTZ = NEON2RegMiscFixed | 0x00819000, + NEON_FRINTI = NEON2RegMiscFixed | 0x20819000, + NEON_FCVTNS = NEON2RegMiscFixed | 0x0001A000, + NEON_FCVTNU = NEON_FCVTNS | NEON2RegMiscUBit, + NEON_FCVTPS = NEON2RegMiscFixed | 0x0081A000, + NEON_FCVTPU = NEON_FCVTPS | NEON2RegMiscUBit, + NEON_FCVTMS = NEON2RegMiscFixed | 0x0001B000, + NEON_FCVTMU = NEON_FCVTMS | NEON2RegMiscUBit, + NEON_FCVTZS = NEON2RegMiscFixed | 0x0081B000, + NEON_FCVTZU = NEON_FCVTZS | NEON2RegMiscUBit, + NEON_FCVTAS = NEON2RegMiscFixed | 0x0001C000, + NEON_FCVTAU = NEON_FCVTAS | NEON2RegMiscUBit, + NEON_FSQRT = NEON2RegMiscFixed | 0x2081F000, + NEON_SCVTF = NEON2RegMiscFixed | 0x0001D000, + NEON_UCVTF = NEON_SCVTF | NEON2RegMiscUBit, + NEON_URSQRTE = NEON2RegMiscFixed | 0x2081C000, + NEON_URECPE = NEON2RegMiscFixed | 0x0081C000, + NEON_FRSQRTE = NEON2RegMiscFixed | 0x2081D000, + NEON_FRECPE = NEON2RegMiscFixed | 0x0081D000, + NEON_FCMGT_zero = NEON2RegMiscFixed | 0x0080C000, + NEON_FCMGE_zero = NEON2RegMiscFixed | 0x2080C000, + NEON_FCMEQ_zero = NEON2RegMiscFixed | 0x0080D000, + NEON_FCMLE_zero = NEON2RegMiscFixed | 0x2080D000, + NEON_FCMLT_zero = NEON2RegMiscFixed | 0x0080E000, + + NEON_FCVTL_opcode = NEON_FCVTL & NEON2RegMiscOpcode, + NEON_FCVTN_opcode = NEON_FCVTN & NEON2RegMiscOpcode +}; + +// NEON instructions with two register operands (FP16). +enum NEON2RegMiscFP16Op : uint32_t { + NEON2RegMiscFP16Fixed = 0x0E780800, + NEON2RegMiscFP16FMask = 0x9F7E0C00, + NEON2RegMiscFP16Mask = 0xBFFFFC00, + NEON_FRINTN_H = NEON2RegMiscFP16Fixed | 0x00018000, + NEON_FRINTM_H = NEON2RegMiscFP16Fixed | 0x00019000, + NEON_FCVTNS_H = NEON2RegMiscFP16Fixed | 0x0001A000, + NEON_FCVTMS_H = NEON2RegMiscFP16Fixed | 0x0001B000, + NEON_FCVTAS_H = NEON2RegMiscFP16Fixed | 0x0001C000, + NEON_SCVTF_H = NEON2RegMiscFP16Fixed | 0x0001D000, + NEON_FCMGT_H_zero = NEON2RegMiscFP16Fixed | 0x0080C000, + NEON_FCMEQ_H_zero = NEON2RegMiscFP16Fixed | 0x0080D000, + NEON_FCMLT_H_zero = NEON2RegMiscFP16Fixed | 0x0080E000, + NEON_FABS_H = NEON2RegMiscFP16Fixed | 0x0080F000, + NEON_FRINTP_H = NEON2RegMiscFP16Fixed | 0x00818000, + NEON_FRINTZ_H = NEON2RegMiscFP16Fixed | 0x00819000, + NEON_FCVTPS_H = NEON2RegMiscFP16Fixed | 0x0081A000, + NEON_FCVTZS_H = NEON2RegMiscFP16Fixed | 0x0081B000, + NEON_FRECPE_H = NEON2RegMiscFP16Fixed | 0x0081D000, + NEON_FRINTA_H = NEON2RegMiscFP16Fixed | 0x20018000, + NEON_FRINTX_H = NEON2RegMiscFP16Fixed | 0x20019000, + NEON_FCVTNU_H = NEON2RegMiscFP16Fixed | 0x2001A000, + NEON_FCVTMU_H = NEON2RegMiscFP16Fixed | 0x2001B000, + NEON_FCVTAU_H = NEON2RegMiscFP16Fixed | 0x2001C000, + NEON_UCVTF_H = NEON2RegMiscFP16Fixed | 0x2001D000, + NEON_FCMGE_H_zero = NEON2RegMiscFP16Fixed | 0x2080C000, + NEON_FCMLE_H_zero = NEON2RegMiscFP16Fixed | 0x2080D000, + NEON_FNEG_H = NEON2RegMiscFP16Fixed | 0x2080F000, + NEON_FRINTI_H = NEON2RegMiscFP16Fixed | 0x20819000, + NEON_FCVTPU_H = NEON2RegMiscFP16Fixed | 0x2081A000, + NEON_FCVTZU_H = NEON2RegMiscFP16Fixed | 0x2081B000, + NEON_FRSQRTE_H = NEON2RegMiscFP16Fixed | 0x2081D000, + NEON_FSQRT_H = NEON2RegMiscFP16Fixed | 0x2081F000 +}; + +// NEON instructions with three same-type operands. +enum NEON3SameOp : uint32_t { + NEON3SameFixed = 0x0E200400, + NEON3SameFMask = 0x9F200400, + NEON3SameMask = 0xBF20FC00, + NEON3SameUBit = 0x20000000, + NEON_ADD = NEON3SameFixed | 0x00008000, + NEON_ADDP = NEON3SameFixed | 0x0000B800, + NEON_SHADD = NEON3SameFixed | 0x00000000, + NEON_SHSUB = NEON3SameFixed | 0x00002000, + NEON_SRHADD = NEON3SameFixed | 0x00001000, + NEON_CMEQ = NEON3SameFixed | NEON3SameUBit | 0x00008800, + NEON_CMGE = NEON3SameFixed | 0x00003800, + NEON_CMGT = NEON3SameFixed | 0x00003000, + NEON_CMHI = NEON3SameFixed | NEON3SameUBit | NEON_CMGT, + NEON_CMHS = NEON3SameFixed | NEON3SameUBit | NEON_CMGE, + NEON_CMTST = NEON3SameFixed | 0x00008800, + NEON_MLA = NEON3SameFixed | 0x00009000, + NEON_MLS = NEON3SameFixed | 0x20009000, + NEON_MUL = NEON3SameFixed | 0x00009800, + NEON_PMUL = NEON3SameFixed | 0x20009800, + NEON_SRSHL = NEON3SameFixed | 0x00005000, + NEON_SQSHL = NEON3SameFixed | 0x00004800, + NEON_SQRSHL = NEON3SameFixed | 0x00005800, + NEON_SSHL = NEON3SameFixed | 0x00004000, + NEON_SMAX = NEON3SameFixed | 0x00006000, + NEON_SMAXP = NEON3SameFixed | 0x0000A000, + NEON_SMIN = NEON3SameFixed | 0x00006800, + NEON_SMINP = NEON3SameFixed | 0x0000A800, + NEON_SABD = NEON3SameFixed | 0x00007000, + NEON_SABA = NEON3SameFixed | 0x00007800, + NEON_UABD = NEON3SameFixed | NEON3SameUBit | NEON_SABD, + NEON_UABA = NEON3SameFixed | NEON3SameUBit | NEON_SABA, + NEON_SQADD = NEON3SameFixed | 0x00000800, + NEON_SQSUB = NEON3SameFixed | 0x00002800, + NEON_SUB = NEON3SameFixed | NEON3SameUBit | 0x00008000, + NEON_UHADD = NEON3SameFixed | NEON3SameUBit | NEON_SHADD, + NEON_UHSUB = NEON3SameFixed | NEON3SameUBit | NEON_SHSUB, + NEON_URHADD = NEON3SameFixed | NEON3SameUBit | NEON_SRHADD, + NEON_UMAX = NEON3SameFixed | NEON3SameUBit | NEON_SMAX, + NEON_UMAXP = NEON3SameFixed | NEON3SameUBit | NEON_SMAXP, + NEON_UMIN = NEON3SameFixed | NEON3SameUBit | NEON_SMIN, + NEON_UMINP = NEON3SameFixed | NEON3SameUBit | NEON_SMINP, + NEON_URSHL = NEON3SameFixed | NEON3SameUBit | NEON_SRSHL, + NEON_UQADD = NEON3SameFixed | NEON3SameUBit | NEON_SQADD, + NEON_UQRSHL = NEON3SameFixed | NEON3SameUBit | NEON_SQRSHL, + NEON_UQSHL = NEON3SameFixed | NEON3SameUBit | NEON_SQSHL, + NEON_UQSUB = NEON3SameFixed | NEON3SameUBit | NEON_SQSUB, + NEON_USHL = NEON3SameFixed | NEON3SameUBit | NEON_SSHL, + NEON_SQDMULH = NEON3SameFixed | 0x0000B000, + NEON_SQRDMULH = NEON3SameFixed | 0x2000B000, + + // NEON floating point instructions with three same-type operands. + NEON3SameFPFixed = NEON3SameFixed | 0x0000C000, + NEON3SameFPFMask = NEON3SameFMask | 0x0000C000, + NEON3SameFPMask = NEON3SameMask | 0x00800000, + NEON_FADD = NEON3SameFixed | 0x0000D000, + NEON_FSUB = NEON3SameFixed | 0x0080D000, + NEON_FMUL = NEON3SameFixed | 0x2000D800, + NEON_FDIV = NEON3SameFixed | 0x2000F800, + NEON_FMAX = NEON3SameFixed | 0x0000F000, + NEON_FMAXNM = NEON3SameFixed | 0x0000C000, + NEON_FMAXP = NEON3SameFixed | 0x2000F000, + NEON_FMAXNMP = NEON3SameFixed | 0x2000C000, + NEON_FMIN = NEON3SameFixed | 0x0080F000, + NEON_FMINNM = NEON3SameFixed | 0x0080C000, + NEON_FMINP = NEON3SameFixed | 0x2080F000, + NEON_FMINNMP = NEON3SameFixed | 0x2080C000, + NEON_FMLA = NEON3SameFixed | 0x0000C800, + NEON_FMLS = NEON3SameFixed | 0x0080C800, + NEON_FMULX = NEON3SameFixed | 0x0000D800, + NEON_FRECPS = NEON3SameFixed | 0x0000F800, + NEON_FRSQRTS = NEON3SameFixed | 0x0080F800, + NEON_FABD = NEON3SameFixed | 0x2080D000, + NEON_FADDP = NEON3SameFixed | 0x2000D000, + NEON_FCMEQ = NEON3SameFixed | 0x0000E000, + NEON_FCMGE = NEON3SameFixed | 0x2000E000, + NEON_FCMGT = NEON3SameFixed | 0x2080E000, + NEON_FACGE = NEON3SameFixed | 0x2000E800, + NEON_FACGT = NEON3SameFixed | 0x2080E800, + + // NEON logical instructions with three same-type operands. + NEON3SameLogicalFixed = NEON3SameFixed | 0x00001800, + NEON3SameLogicalFMask = NEON3SameFMask | 0x0000F800, + NEON3SameLogicalMask = 0xBFE0FC00, + NEON3SameLogicalFormatMask = NEON_Q, + NEON_AND = NEON3SameLogicalFixed | 0x00000000, + NEON_ORR = NEON3SameLogicalFixed | 0x00A00000, + NEON_ORN = NEON3SameLogicalFixed | 0x00C00000, + NEON_EOR = NEON3SameLogicalFixed | 0x20000000, + NEON_BIC = NEON3SameLogicalFixed | 0x00400000, + NEON_BIF = NEON3SameLogicalFixed | 0x20C00000, + NEON_BIT = NEON3SameLogicalFixed | 0x20800000, + NEON_BSL = NEON3SameLogicalFixed | 0x20400000, + + // FHM (FMLAL-like) instructions have an oddball encoding scheme under 3Same. + NEON3SameFHMMask = 0xBFE0FC00, // U size opcode + NEON_FMLAL = NEON3SameFixed | 0x0000E800, // 0 00 11101 + NEON_FMLAL2 = NEON3SameFixed | 0x2000C800, // 1 00 11001 + NEON_FMLSL = NEON3SameFixed | 0x0080E800, // 0 10 11101 + NEON_FMLSL2 = NEON3SameFixed | 0x2080C800 // 1 10 11001 +}; + +enum NEON3SameFP16 : uint32_t { + NEON3SameFP16Fixed = 0x0E400400, + NEON3SameFP16FMask = 0x9F60C400, + NEON3SameFP16Mask = 0xBFE0FC00, + NEON_FMAXNM_H = NEON3SameFP16Fixed | 0x00000000, + NEON_FMLA_H = NEON3SameFP16Fixed | 0x00000800, + NEON_FADD_H = NEON3SameFP16Fixed | 0x00001000, + NEON_FMULX_H = NEON3SameFP16Fixed | 0x00001800, + NEON_FCMEQ_H = NEON3SameFP16Fixed | 0x00002000, + NEON_FMAX_H = NEON3SameFP16Fixed | 0x00003000, + NEON_FRECPS_H = NEON3SameFP16Fixed | 0x00003800, + NEON_FMINNM_H = NEON3SameFP16Fixed | 0x00800000, + NEON_FMLS_H = NEON3SameFP16Fixed | 0x00800800, + NEON_FSUB_H = NEON3SameFP16Fixed | 0x00801000, + NEON_FMIN_H = NEON3SameFP16Fixed | 0x00803000, + NEON_FRSQRTS_H = NEON3SameFP16Fixed | 0x00803800, + NEON_FMAXNMP_H = NEON3SameFP16Fixed | 0x20000000, + NEON_FADDP_H = NEON3SameFP16Fixed | 0x20001000, + NEON_FMUL_H = NEON3SameFP16Fixed | 0x20001800, + NEON_FCMGE_H = NEON3SameFP16Fixed | 0x20002000, + NEON_FACGE_H = NEON3SameFP16Fixed | 0x20002800, + NEON_FMAXP_H = NEON3SameFP16Fixed | 0x20003000, + NEON_FDIV_H = NEON3SameFP16Fixed | 0x20003800, + NEON_FMINNMP_H = NEON3SameFP16Fixed | 0x20800000, + NEON_FABD_H = NEON3SameFP16Fixed | 0x20801000, + NEON_FCMGT_H = NEON3SameFP16Fixed | 0x20802000, + NEON_FACGT_H = NEON3SameFP16Fixed | 0x20802800, + NEON_FMINP_H = NEON3SameFP16Fixed | 0x20803000 +}; + +// 'Extra' NEON instructions with three same-type operands. +enum NEON3SameExtraOp : uint32_t { + NEON3SameExtraFixed = 0x0E008400, + NEON3SameExtraUBit = 0x20000000, + NEON3SameExtraFMask = 0x9E208400, + NEON3SameExtraMask = 0xBE20FC00, + NEON_SQRDMLAH = NEON3SameExtraFixed | NEON3SameExtraUBit, + NEON_SQRDMLSH = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00000800, + NEON_SDOT = NEON3SameExtraFixed | 0x00001000, + NEON_UDOT = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00001000, + + /* v8.3 Complex Numbers */ + NEON3SameExtraFCFixed = 0x2E00C400, + NEON3SameExtraFCFMask = 0xBF20C400, + // FCMLA fixes opcode<3:2>, and uses opcode<1:0> to encode <rotate>. + NEON3SameExtraFCMLAMask = NEON3SameExtraFCFMask | 0x00006000, + NEON_FCMLA = NEON3SameExtraFCFixed, + // FCADD fixes opcode<3:2, 0>, and uses opcode<1> to encode <rotate>. + NEON3SameExtraFCADDMask = NEON3SameExtraFCFMask | 0x00006800, + NEON_FCADD = NEON3SameExtraFCFixed | 0x00002000 + // Other encodings under NEON3SameExtraFCFMask are UNALLOCATED. +}; + +// NEON instructions with three different-type operands. +enum NEON3DifferentOp : uint32_t { + NEON3DifferentFixed = 0x0E200000, + NEON3DifferentFMask = 0x9F200C00, + NEON3DifferentMask = 0xFF20FC00, + NEON_ADDHN = NEON3DifferentFixed | 0x00004000, + NEON_ADDHN2 = NEON_ADDHN | NEON_Q, + NEON_PMULL = NEON3DifferentFixed | 0x0000E000, + NEON_PMULL2 = NEON_PMULL | NEON_Q, + NEON_RADDHN = NEON3DifferentFixed | 0x20004000, + NEON_RADDHN2 = NEON_RADDHN | NEON_Q, + NEON_RSUBHN = NEON3DifferentFixed | 0x20006000, + NEON_RSUBHN2 = NEON_RSUBHN | NEON_Q, + NEON_SABAL = NEON3DifferentFixed | 0x00005000, + NEON_SABAL2 = NEON_SABAL | NEON_Q, + NEON_SABDL = NEON3DifferentFixed | 0x00007000, + NEON_SABDL2 = NEON_SABDL | NEON_Q, + NEON_SADDL = NEON3DifferentFixed | 0x00000000, + NEON_SADDL2 = NEON_SADDL | NEON_Q, + NEON_SADDW = NEON3DifferentFixed | 0x00001000, + NEON_SADDW2 = NEON_SADDW | NEON_Q, + NEON_SMLAL = NEON3DifferentFixed | 0x00008000, + NEON_SMLAL2 = NEON_SMLAL | NEON_Q, + NEON_SMLSL = NEON3DifferentFixed | 0x0000A000, + NEON_SMLSL2 = NEON_SMLSL | NEON_Q, + NEON_SMULL = NEON3DifferentFixed | 0x0000C000, + NEON_SMULL2 = NEON_SMULL | NEON_Q, + NEON_SSUBL = NEON3DifferentFixed | 0x00002000, + NEON_SSUBL2 = NEON_SSUBL | NEON_Q, + NEON_SSUBW = NEON3DifferentFixed | 0x00003000, + NEON_SSUBW2 = NEON_SSUBW | NEON_Q, + NEON_SQDMLAL = NEON3DifferentFixed | 0x00009000, + NEON_SQDMLAL2 = NEON_SQDMLAL | NEON_Q, + NEON_SQDMLSL = NEON3DifferentFixed | 0x0000B000, + NEON_SQDMLSL2 = NEON_SQDMLSL | NEON_Q, + NEON_SQDMULL = NEON3DifferentFixed | 0x0000D000, + NEON_SQDMULL2 = NEON_SQDMULL | NEON_Q, + NEON_SUBHN = NEON3DifferentFixed | 0x00006000, + NEON_SUBHN2 = NEON_SUBHN | NEON_Q, + NEON_UABAL = NEON_SABAL | NEON3SameUBit, + NEON_UABAL2 = NEON_UABAL | NEON_Q, + NEON_UABDL = NEON_SABDL | NEON3SameUBit, + NEON_UABDL2 = NEON_UABDL | NEON_Q, + NEON_UADDL = NEON_SADDL | NEON3SameUBit, + NEON_UADDL2 = NEON_UADDL | NEON_Q, + NEON_UADDW = NEON_SADDW | NEON3SameUBit, + NEON_UADDW2 = NEON_UADDW | NEON_Q, + NEON_UMLAL = NEON_SMLAL | NEON3SameUBit, + NEON_UMLAL2 = NEON_UMLAL | NEON_Q, + NEON_UMLSL = NEON_SMLSL | NEON3SameUBit, + NEON_UMLSL2 = NEON_UMLSL | NEON_Q, + NEON_UMULL = NEON_SMULL | NEON3SameUBit, + NEON_UMULL2 = NEON_UMULL | NEON_Q, + NEON_USUBL = NEON_SSUBL | NEON3SameUBit, + NEON_USUBL2 = NEON_USUBL | NEON_Q, + NEON_USUBW = NEON_SSUBW | NEON3SameUBit, + NEON_USUBW2 = NEON_USUBW | NEON_Q +}; + +// NEON instructions operating across vectors. +enum NEONAcrossLanesOp : uint32_t { + NEONAcrossLanesFixed = 0x0E300800, + NEONAcrossLanesFMask = 0x9F3E0C00, + NEONAcrossLanesMask = 0xBF3FFC00, + NEON_ADDV = NEONAcrossLanesFixed | 0x0001B000, + NEON_SADDLV = NEONAcrossLanesFixed | 0x00003000, + NEON_UADDLV = NEONAcrossLanesFixed | 0x20003000, + NEON_SMAXV = NEONAcrossLanesFixed | 0x0000A000, + NEON_SMINV = NEONAcrossLanesFixed | 0x0001A000, + NEON_UMAXV = NEONAcrossLanesFixed | 0x2000A000, + NEON_UMINV = NEONAcrossLanesFixed | 0x2001A000, + + NEONAcrossLanesFP16Fixed = NEONAcrossLanesFixed | 0x0000C000, + NEONAcrossLanesFP16FMask = NEONAcrossLanesFMask | 0x2000C000, + NEONAcrossLanesFP16Mask = NEONAcrossLanesMask | 0x20800000, + NEON_FMAXNMV_H = NEONAcrossLanesFP16Fixed | 0x00000000, + NEON_FMAXV_H = NEONAcrossLanesFP16Fixed | 0x00003000, + NEON_FMINNMV_H = NEONAcrossLanesFP16Fixed | 0x00800000, + NEON_FMINV_H = NEONAcrossLanesFP16Fixed | 0x00803000, + + // NEON floating point across instructions. + NEONAcrossLanesFPFixed = NEONAcrossLanesFixed | 0x2000C000, + NEONAcrossLanesFPFMask = NEONAcrossLanesFMask | 0x2000C000, + NEONAcrossLanesFPMask = NEONAcrossLanesMask | 0x20800000, + + NEON_FMAXV = NEONAcrossLanesFPFixed | 0x2000F000, + NEON_FMINV = NEONAcrossLanesFPFixed | 0x2080F000, + NEON_FMAXNMV = NEONAcrossLanesFPFixed | 0x2000C000, + NEON_FMINNMV = NEONAcrossLanesFPFixed | 0x2080C000 +}; + +// NEON instructions with indexed element operand. +enum NEONByIndexedElementOp : uint32_t { + NEONByIndexedElementFixed = 0x0F000000, + NEONByIndexedElementFMask = 0x9F000400, + NEONByIndexedElementMask = 0xBF00F400, + NEON_MUL_byelement = NEONByIndexedElementFixed | 0x00008000, + NEON_MLA_byelement = NEONByIndexedElementFixed | 0x20000000, + NEON_MLS_byelement = NEONByIndexedElementFixed | 0x20004000, + NEON_SMULL_byelement = NEONByIndexedElementFixed | 0x0000A000, + NEON_SMLAL_byelement = NEONByIndexedElementFixed | 0x00002000, + NEON_SMLSL_byelement = NEONByIndexedElementFixed | 0x00006000, + NEON_UMULL_byelement = NEONByIndexedElementFixed | 0x2000A000, + NEON_UMLAL_byelement = NEONByIndexedElementFixed | 0x20002000, + NEON_UMLSL_byelement = NEONByIndexedElementFixed | 0x20006000, + NEON_SQDMULL_byelement = NEONByIndexedElementFixed | 0x0000B000, + NEON_SQDMLAL_byelement = NEONByIndexedElementFixed | 0x00003000, + NEON_SQDMLSL_byelement = NEONByIndexedElementFixed | 0x00007000, + NEON_SQDMULH_byelement = NEONByIndexedElementFixed | 0x0000C000, + NEON_SQRDMULH_byelement = NEONByIndexedElementFixed | 0x0000D000, + NEON_SDOT_byelement = NEONByIndexedElementFixed | 0x0000E000, + NEON_SQRDMLAH_byelement = NEONByIndexedElementFixed | 0x2000D000, + NEON_UDOT_byelement = NEONByIndexedElementFixed | 0x2000E000, + NEON_SQRDMLSH_byelement = NEONByIndexedElementFixed | 0x2000F000, + + NEON_FMLA_H_byelement = NEONByIndexedElementFixed | 0x00001000, + NEON_FMLS_H_byelement = NEONByIndexedElementFixed | 0x00005000, + NEON_FMUL_H_byelement = NEONByIndexedElementFixed | 0x00009000, + NEON_FMULX_H_byelement = NEONByIndexedElementFixed | 0x20009000, + + // Floating point instructions. + NEONByIndexedElementFPFixed = NEONByIndexedElementFixed | 0x00800000, + NEONByIndexedElementFPMask = NEONByIndexedElementMask | 0x00800000, + NEON_FMLA_byelement = NEONByIndexedElementFPFixed | 0x00001000, + NEON_FMLS_byelement = NEONByIndexedElementFPFixed | 0x00005000, + NEON_FMUL_byelement = NEONByIndexedElementFPFixed | 0x00009000, + NEON_FMULX_byelement = NEONByIndexedElementFPFixed | 0x20009000, + + // FMLAL-like instructions. + // For all cases: U = x, size = 10, opcode = xx00 + NEONByIndexedElementFPLongFixed = NEONByIndexedElementFixed | 0x00800000, + NEONByIndexedElementFPLongFMask = NEONByIndexedElementFMask | 0x00C03000, + NEONByIndexedElementFPLongMask = 0xBFC0F400, + NEON_FMLAL_H_byelement = NEONByIndexedElementFixed | 0x00800000, + NEON_FMLAL2_H_byelement = NEONByIndexedElementFixed | 0x20808000, + NEON_FMLSL_H_byelement = NEONByIndexedElementFixed | 0x00804000, + NEON_FMLSL2_H_byelement = NEONByIndexedElementFixed | 0x2080C000, + + // Complex instruction(s). + // This is necessary because the 'rot' encoding moves into the + // NEONByIndex..Mask space. + NEONByIndexedElementFPComplexMask = 0xBF009400, + NEON_FCMLA_byelement = NEONByIndexedElementFixed | 0x20001000 +}; + +// NEON register copy. +enum NEONCopyOp : uint32_t { + NEONCopyFixed = 0x0E000400, + NEONCopyFMask = 0x9FE08400, + NEONCopyMask = 0x3FE08400, + NEONCopyInsElementMask = NEONCopyMask | 0x40000000, + NEONCopyInsGeneralMask = NEONCopyMask | 0x40007800, + NEONCopyDupElementMask = NEONCopyMask | 0x20007800, + NEONCopyDupGeneralMask = NEONCopyDupElementMask, + NEONCopyUmovMask = NEONCopyMask | 0x20007800, + NEONCopySmovMask = NEONCopyMask | 0x20007800, + NEON_INS_ELEMENT = NEONCopyFixed | 0x60000000, + NEON_INS_GENERAL = NEONCopyFixed | 0x40001800, + NEON_DUP_ELEMENT = NEONCopyFixed | 0x00000000, + NEON_DUP_GENERAL = NEONCopyFixed | 0x00000800, + NEON_SMOV = NEONCopyFixed | 0x00002800, + NEON_UMOV = NEONCopyFixed | 0x00003800 +}; + +// NEON extract. +enum NEONExtractOp : uint32_t { + NEONExtractFixed = 0x2E000000, + NEONExtractFMask = 0xBF208400, + NEONExtractMask = 0xBFE08400, + NEON_EXT = NEONExtractFixed | 0x00000000 +}; + +enum NEONLoadStoreMultiOp : uint32_t { + NEONLoadStoreMultiL = 0x00400000, + NEONLoadStoreMulti1_1v = 0x00007000, + NEONLoadStoreMulti1_2v = 0x0000A000, + NEONLoadStoreMulti1_3v = 0x00006000, + NEONLoadStoreMulti1_4v = 0x00002000, + NEONLoadStoreMulti2 = 0x00008000, + NEONLoadStoreMulti3 = 0x00004000, + NEONLoadStoreMulti4 = 0x00000000 +}; + +// NEON load/store multiple structures. +enum NEONLoadStoreMultiStructOp : uint32_t { + NEONLoadStoreMultiStructFixed = 0x0C000000, + NEONLoadStoreMultiStructFMask = 0xBFBF0000, + NEONLoadStoreMultiStructMask = 0xBFFFF000, + NEONLoadStoreMultiStructStore = NEONLoadStoreMultiStructFixed, + NEONLoadStoreMultiStructLoad = NEONLoadStoreMultiStructFixed | + NEONLoadStoreMultiL, + NEON_LD1_1v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_1v, + NEON_LD1_2v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_2v, + NEON_LD1_3v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_3v, + NEON_LD1_4v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_4v, + NEON_LD2 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti2, + NEON_LD3 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti3, + NEON_LD4 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti4, + NEON_ST1_1v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_1v, + NEON_ST1_2v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_2v, + NEON_ST1_3v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_3v, + NEON_ST1_4v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_4v, + NEON_ST2 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti2, + NEON_ST3 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti3, + NEON_ST4 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti4 +}; + +// NEON load/store multiple structures with post-index addressing. +enum NEONLoadStoreMultiStructPostIndexOp : uint32_t { + NEONLoadStoreMultiStructPostIndexFixed = 0x0C800000, + NEONLoadStoreMultiStructPostIndexFMask = 0xBFA00000, + NEONLoadStoreMultiStructPostIndexMask = 0xBFE0F000, + NEONLoadStoreMultiStructPostIndex = 0x00800000, + NEON_LD1_1v_post = NEON_LD1_1v | NEONLoadStoreMultiStructPostIndex, + NEON_LD1_2v_post = NEON_LD1_2v | NEONLoadStoreMultiStructPostIndex, + NEON_LD1_3v_post = NEON_LD1_3v | NEONLoadStoreMultiStructPostIndex, + NEON_LD1_4v_post = NEON_LD1_4v | NEONLoadStoreMultiStructPostIndex, + NEON_LD2_post = NEON_LD2 | NEONLoadStoreMultiStructPostIndex, + NEON_LD3_post = NEON_LD3 | NEONLoadStoreMultiStructPostIndex, + NEON_LD4_post = NEON_LD4 | NEONLoadStoreMultiStructPostIndex, + NEON_ST1_1v_post = NEON_ST1_1v | NEONLoadStoreMultiStructPostIndex, + NEON_ST1_2v_post = NEON_ST1_2v | NEONLoadStoreMultiStructPostIndex, + NEON_ST1_3v_post = NEON_ST1_3v | NEONLoadStoreMultiStructPostIndex, + NEON_ST1_4v_post = NEON_ST1_4v | NEONLoadStoreMultiStructPostIndex, + NEON_ST2_post = NEON_ST2 | NEONLoadStoreMultiStructPostIndex, + NEON_ST3_post = NEON_ST3 | NEONLoadStoreMultiStructPostIndex, + NEON_ST4_post = NEON_ST4 | NEONLoadStoreMultiStructPostIndex +}; + +enum NEONLoadStoreSingleOp : uint32_t { + NEONLoadStoreSingle1 = 0x00000000, + NEONLoadStoreSingle2 = 0x00200000, + NEONLoadStoreSingle3 = 0x00002000, + NEONLoadStoreSingle4 = 0x00202000, + NEONLoadStoreSingleL = 0x00400000, + NEONLoadStoreSingle_b = 0x00000000, + NEONLoadStoreSingle_h = 0x00004000, + NEONLoadStoreSingle_s = 0x00008000, + NEONLoadStoreSingle_d = 0x00008400, + NEONLoadStoreSingleAllLanes = 0x0000C000, + NEONLoadStoreSingleLenMask = 0x00202000 +}; + +// NEON load/store single structure. +enum NEONLoadStoreSingleStructOp : uint32_t { + NEONLoadStoreSingleStructFixed = 0x0D000000, + NEONLoadStoreSingleStructFMask = 0xBF9F0000, + NEONLoadStoreSingleStructMask = 0xBFFFE000, + NEONLoadStoreSingleStructStore = NEONLoadStoreSingleStructFixed, + NEONLoadStoreSingleStructLoad = NEONLoadStoreSingleStructFixed | + NEONLoadStoreSingleL, + NEONLoadStoreSingleStructLoad1 = NEONLoadStoreSingle1 | + NEONLoadStoreSingleStructLoad, + NEONLoadStoreSingleStructLoad2 = NEONLoadStoreSingle2 | + NEONLoadStoreSingleStructLoad, + NEONLoadStoreSingleStructLoad3 = NEONLoadStoreSingle3 | + NEONLoadStoreSingleStructLoad, + NEONLoadStoreSingleStructLoad4 = NEONLoadStoreSingle4 | + NEONLoadStoreSingleStructLoad, + NEONLoadStoreSingleStructStore1 = NEONLoadStoreSingle1 | + NEONLoadStoreSingleStructFixed, + NEONLoadStoreSingleStructStore2 = NEONLoadStoreSingle2 | + NEONLoadStoreSingleStructFixed, + NEONLoadStoreSingleStructStore3 = NEONLoadStoreSingle3 | + NEONLoadStoreSingleStructFixed, + NEONLoadStoreSingleStructStore4 = NEONLoadStoreSingle4 | + NEONLoadStoreSingleStructFixed, + NEON_LD1_b = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_b, + NEON_LD1_h = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_h, + NEON_LD1_s = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_s, + NEON_LD1_d = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_d, + NEON_LD1R = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingleAllLanes, + NEON_ST1_b = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_b, + NEON_ST1_h = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_h, + NEON_ST1_s = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_s, + NEON_ST1_d = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_d, + + NEON_LD2_b = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_b, + NEON_LD2_h = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_h, + NEON_LD2_s = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_s, + NEON_LD2_d = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_d, + NEON_LD2R = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingleAllLanes, + NEON_ST2_b = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_b, + NEON_ST2_h = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_h, + NEON_ST2_s = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_s, + NEON_ST2_d = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_d, + + NEON_LD3_b = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_b, + NEON_LD3_h = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_h, + NEON_LD3_s = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_s, + NEON_LD3_d = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_d, + NEON_LD3R = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingleAllLanes, + NEON_ST3_b = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_b, + NEON_ST3_h = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_h, + NEON_ST3_s = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_s, + NEON_ST3_d = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_d, + + NEON_LD4_b = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_b, + NEON_LD4_h = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_h, + NEON_LD4_s = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_s, + NEON_LD4_d = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_d, + NEON_LD4R = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingleAllLanes, + NEON_ST4_b = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_b, + NEON_ST4_h = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_h, + NEON_ST4_s = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_s, + NEON_ST4_d = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_d +}; + +// NEON load/store single structure with post-index addressing. +enum NEONLoadStoreSingleStructPostIndexOp : uint32_t { + NEONLoadStoreSingleStructPostIndexFixed = 0x0D800000, + NEONLoadStoreSingleStructPostIndexFMask = 0xBF800000, + NEONLoadStoreSingleStructPostIndexMask = 0xBFE0E000, + NEONLoadStoreSingleStructPostIndex = 0x00800000, + NEON_LD1_b_post = NEON_LD1_b | NEONLoadStoreSingleStructPostIndex, + NEON_LD1_h_post = NEON_LD1_h | NEONLoadStoreSingleStructPostIndex, + NEON_LD1_s_post = NEON_LD1_s | NEONLoadStoreSingleStructPostIndex, + NEON_LD1_d_post = NEON_LD1_d | NEONLoadStoreSingleStructPostIndex, + NEON_LD1R_post = NEON_LD1R | NEONLoadStoreSingleStructPostIndex, + NEON_ST1_b_post = NEON_ST1_b | NEONLoadStoreSingleStructPostIndex, + NEON_ST1_h_post = NEON_ST1_h | NEONLoadStoreSingleStructPostIndex, + NEON_ST1_s_post = NEON_ST1_s | NEONLoadStoreSingleStructPostIndex, + NEON_ST1_d_post = NEON_ST1_d | NEONLoadStoreSingleStructPostIndex, + + NEON_LD2_b_post = NEON_LD2_b | NEONLoadStoreSingleStructPostIndex, + NEON_LD2_h_post = NEON_LD2_h | NEONLoadStoreSingleStructPostIndex, + NEON_LD2_s_post = NEON_LD2_s | NEONLoadStoreSingleStructPostIndex, + NEON_LD2_d_post = NEON_LD2_d | NEONLoadStoreSingleStructPostIndex, + NEON_LD2R_post = NEON_LD2R | NEONLoadStoreSingleStructPostIndex, + NEON_ST2_b_post = NEON_ST2_b | NEONLoadStoreSingleStructPostIndex, + NEON_ST2_h_post = NEON_ST2_h | NEONLoadStoreSingleStructPostIndex, + NEON_ST2_s_post = NEON_ST2_s | NEONLoadStoreSingleStructPostIndex, + NEON_ST2_d_post = NEON_ST2_d | NEONLoadStoreSingleStructPostIndex, + + NEON_LD3_b_post = NEON_LD3_b | NEONLoadStoreSingleStructPostIndex, + NEON_LD3_h_post = NEON_LD3_h | NEONLoadStoreSingleStructPostIndex, + NEON_LD3_s_post = NEON_LD3_s | NEONLoadStoreSingleStructPostIndex, + NEON_LD3_d_post = NEON_LD3_d | NEONLoadStoreSingleStructPostIndex, + NEON_LD3R_post = NEON_LD3R | NEONLoadStoreSingleStructPostIndex, + NEON_ST3_b_post = NEON_ST3_b | NEONLoadStoreSingleStructPostIndex, + NEON_ST3_h_post = NEON_ST3_h | NEONLoadStoreSingleStructPostIndex, + NEON_ST3_s_post = NEON_ST3_s | NEONLoadStoreSingleStructPostIndex, + NEON_ST3_d_post = NEON_ST3_d | NEONLoadStoreSingleStructPostIndex, + + NEON_LD4_b_post = NEON_LD4_b | NEONLoadStoreSingleStructPostIndex, + NEON_LD4_h_post = NEON_LD4_h | NEONLoadStoreSingleStructPostIndex, + NEON_LD4_s_post = NEON_LD4_s | NEONLoadStoreSingleStructPostIndex, + NEON_LD4_d_post = NEON_LD4_d | NEONLoadStoreSingleStructPostIndex, + NEON_LD4R_post = NEON_LD4R | NEONLoadStoreSingleStructPostIndex, + NEON_ST4_b_post = NEON_ST4_b | NEONLoadStoreSingleStructPostIndex, + NEON_ST4_h_post = NEON_ST4_h | NEONLoadStoreSingleStructPostIndex, + NEON_ST4_s_post = NEON_ST4_s | NEONLoadStoreSingleStructPostIndex, + NEON_ST4_d_post = NEON_ST4_d | NEONLoadStoreSingleStructPostIndex +}; + +// NEON modified immediate. +enum NEONModifiedImmediateOp : uint32_t { + NEONModifiedImmediateFixed = 0x0F000400, + NEONModifiedImmediateFMask = 0x9FF80400, + NEONModifiedImmediateOpBit = 0x20000000, + NEONModifiedImmediate_FMOV = NEONModifiedImmediateFixed | 0x00000800, + NEONModifiedImmediate_MOVI = NEONModifiedImmediateFixed | 0x00000000, + NEONModifiedImmediate_MVNI = NEONModifiedImmediateFixed | 0x20000000, + NEONModifiedImmediate_ORR = NEONModifiedImmediateFixed | 0x00001000, + NEONModifiedImmediate_BIC = NEONModifiedImmediateFixed | 0x20001000 +}; + +// NEON shift immediate. +enum NEONShiftImmediateOp : uint32_t { + NEONShiftImmediateFixed = 0x0F000400, + NEONShiftImmediateFMask = 0x9F800400, + NEONShiftImmediateMask = 0xBF80FC00, + NEONShiftImmediateUBit = 0x20000000, + NEON_SHL = NEONShiftImmediateFixed | 0x00005000, + NEON_SSHLL = NEONShiftImmediateFixed | 0x0000A000, + NEON_USHLL = NEONShiftImmediateFixed | 0x2000A000, + NEON_SLI = NEONShiftImmediateFixed | 0x20005000, + NEON_SRI = NEONShiftImmediateFixed | 0x20004000, + NEON_SHRN = NEONShiftImmediateFixed | 0x00008000, + NEON_RSHRN = NEONShiftImmediateFixed | 0x00008800, + NEON_UQSHRN = NEONShiftImmediateFixed | 0x20009000, + NEON_UQRSHRN = NEONShiftImmediateFixed | 0x20009800, + NEON_SQSHRN = NEONShiftImmediateFixed | 0x00009000, + NEON_SQRSHRN = NEONShiftImmediateFixed | 0x00009800, + NEON_SQSHRUN = NEONShiftImmediateFixed | 0x20008000, + NEON_SQRSHRUN = NEONShiftImmediateFixed | 0x20008800, + NEON_SSHR = NEONShiftImmediateFixed | 0x00000000, + NEON_SRSHR = NEONShiftImmediateFixed | 0x00002000, + NEON_USHR = NEONShiftImmediateFixed | 0x20000000, + NEON_URSHR = NEONShiftImmediateFixed | 0x20002000, + NEON_SSRA = NEONShiftImmediateFixed | 0x00001000, + NEON_SRSRA = NEONShiftImmediateFixed | 0x00003000, + NEON_USRA = NEONShiftImmediateFixed | 0x20001000, + NEON_URSRA = NEONShiftImmediateFixed | 0x20003000, + NEON_SQSHLU = NEONShiftImmediateFixed | 0x20006000, + NEON_SCVTF_imm = NEONShiftImmediateFixed | 0x0000E000, + NEON_UCVTF_imm = NEONShiftImmediateFixed | 0x2000E000, + NEON_FCVTZS_imm = NEONShiftImmediateFixed | 0x0000F800, + NEON_FCVTZU_imm = NEONShiftImmediateFixed | 0x2000F800, + NEON_SQSHL_imm = NEONShiftImmediateFixed | 0x00007000, + NEON_UQSHL_imm = NEONShiftImmediateFixed | 0x20007000 +}; + +// NEON table. +enum NEONTableOp : uint32_t { + NEONTableFixed = 0x0E000000, + NEONTableFMask = 0xBF208C00, + NEONTableExt = 0x00001000, + NEONTableMask = 0xBF20FC00, + NEON_TBL_1v = NEONTableFixed | 0x00000000, + NEON_TBL_2v = NEONTableFixed | 0x00002000, + NEON_TBL_3v = NEONTableFixed | 0x00004000, + NEON_TBL_4v = NEONTableFixed | 0x00006000, + NEON_TBX_1v = NEON_TBL_1v | NEONTableExt, + NEON_TBX_2v = NEON_TBL_2v | NEONTableExt, + NEON_TBX_3v = NEON_TBL_3v | NEONTableExt, + NEON_TBX_4v = NEON_TBL_4v | NEONTableExt +}; + +// NEON perm. +enum NEONPermOp : uint32_t { + NEONPermFixed = 0x0E000800, + NEONPermFMask = 0xBF208C00, + NEONPermMask = 0x3F20FC00, + NEON_UZP1 = NEONPermFixed | 0x00001000, + NEON_TRN1 = NEONPermFixed | 0x00002000, + NEON_ZIP1 = NEONPermFixed | 0x00003000, + NEON_UZP2 = NEONPermFixed | 0x00005000, + NEON_TRN2 = NEONPermFixed | 0x00006000, + NEON_ZIP2 = NEONPermFixed | 0x00007000 +}; + +// NEON scalar instructions with two register operands. +enum NEONScalar2RegMiscOp : uint32_t { + NEONScalar2RegMiscFixed = 0x5E200800, + NEONScalar2RegMiscFMask = 0xDF3E0C00, + NEONScalar2RegMiscMask = NEON_Q | NEONScalar | NEON2RegMiscMask, + NEON_CMGT_zero_scalar = NEON_Q | NEONScalar | NEON_CMGT_zero, + NEON_CMEQ_zero_scalar = NEON_Q | NEONScalar | NEON_CMEQ_zero, + NEON_CMLT_zero_scalar = NEON_Q | NEONScalar | NEON_CMLT_zero, + NEON_CMGE_zero_scalar = NEON_Q | NEONScalar | NEON_CMGE_zero, + NEON_CMLE_zero_scalar = NEON_Q | NEONScalar | NEON_CMLE_zero, + NEON_ABS_scalar = NEON_Q | NEONScalar | NEON_ABS, + NEON_SQABS_scalar = NEON_Q | NEONScalar | NEON_SQABS, + NEON_NEG_scalar = NEON_Q | NEONScalar | NEON_NEG, + NEON_SQNEG_scalar = NEON_Q | NEONScalar | NEON_SQNEG, + NEON_SQXTN_scalar = NEON_Q | NEONScalar | NEON_SQXTN, + NEON_UQXTN_scalar = NEON_Q | NEONScalar | NEON_UQXTN, + NEON_SQXTUN_scalar = NEON_Q | NEONScalar | NEON_SQXTUN, + NEON_SUQADD_scalar = NEON_Q | NEONScalar | NEON_SUQADD, + NEON_USQADD_scalar = NEON_Q | NEONScalar | NEON_USQADD, + + NEONScalar2RegMiscOpcode = NEON2RegMiscOpcode, + NEON_NEG_scalar_opcode = NEON_NEG_scalar & NEONScalar2RegMiscOpcode, + + NEONScalar2RegMiscFPMask = NEONScalar2RegMiscMask | 0x00800000, + NEON_FRSQRTE_scalar = NEON_Q | NEONScalar | NEON_FRSQRTE, + NEON_FRECPE_scalar = NEON_Q | NEONScalar | NEON_FRECPE, + NEON_SCVTF_scalar = NEON_Q | NEONScalar | NEON_SCVTF, + NEON_UCVTF_scalar = NEON_Q | NEONScalar | NEON_UCVTF, + NEON_FCMGT_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_zero, + NEON_FCMEQ_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_zero, + NEON_FCMLT_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_zero, + NEON_FCMGE_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_zero, + NEON_FCMLE_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_zero, + NEON_FRECPX_scalar = NEONScalar2RegMiscFixed | 0x0081F000, + NEON_FCVTNS_scalar = NEON_Q | NEONScalar | NEON_FCVTNS, + NEON_FCVTNU_scalar = NEON_Q | NEONScalar | NEON_FCVTNU, + NEON_FCVTPS_scalar = NEON_Q | NEONScalar | NEON_FCVTPS, + NEON_FCVTPU_scalar = NEON_Q | NEONScalar | NEON_FCVTPU, + NEON_FCVTMS_scalar = NEON_Q | NEONScalar | NEON_FCVTMS, + NEON_FCVTMU_scalar = NEON_Q | NEONScalar | NEON_FCVTMU, + NEON_FCVTZS_scalar = NEON_Q | NEONScalar | NEON_FCVTZS, + NEON_FCVTZU_scalar = NEON_Q | NEONScalar | NEON_FCVTZU, + NEON_FCVTAS_scalar = NEON_Q | NEONScalar | NEON_FCVTAS, + NEON_FCVTAU_scalar = NEON_Q | NEONScalar | NEON_FCVTAU, + NEON_FCVTXN_scalar = NEON_Q | NEONScalar | NEON_FCVTXN +}; + +// NEON instructions with two register operands (FP16). +enum NEONScalar2RegMiscFP16Op : uint32_t { + NEONScalar2RegMiscFP16Fixed = 0x5E780800, + NEONScalar2RegMiscFP16FMask = 0xDF7E0C00, + NEONScalar2RegMiscFP16Mask = 0xFFFFFC00, + NEON_FCVTNS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTNS_H, + NEON_FCVTMS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTMS_H, + NEON_FCVTAS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTAS_H, + NEON_SCVTF_H_scalar = NEON_Q | NEONScalar | NEON_SCVTF_H, + NEON_FCMGT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_H_zero, + NEON_FCMEQ_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_H_zero, + NEON_FCMLT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_H_zero, + NEON_FCVTPS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTPS_H, + NEON_FCVTZS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTZS_H, + NEON_FRECPE_H_scalar = NEON_Q | NEONScalar | NEON_FRECPE_H, + NEON_FRECPX_H_scalar = NEONScalar2RegMiscFP16Fixed | 0x0081F000, + NEON_FCVTNU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTNU_H, + NEON_FCVTMU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTMU_H, + NEON_FCVTAU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTAU_H, + NEON_UCVTF_H_scalar = NEON_Q | NEONScalar | NEON_UCVTF_H, + NEON_FCMGE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_H_zero, + NEON_FCMLE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_H_zero, + NEON_FCVTPU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTPU_H, + NEON_FCVTZU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_H, + NEON_FRSQRTE_H_scalar = NEON_Q | NEONScalar | NEON_FRSQRTE_H +}; + +// NEON scalar instructions with three same-type operands. +enum NEONScalar3SameOp : uint32_t { + NEONScalar3SameFixed = 0x5E200400, + NEONScalar3SameFMask = 0xDF200400, + NEONScalar3SameMask = 0xFF20FC00, + NEON_ADD_scalar = NEON_Q | NEONScalar | NEON_ADD, + NEON_CMEQ_scalar = NEON_Q | NEONScalar | NEON_CMEQ, + NEON_CMGE_scalar = NEON_Q | NEONScalar | NEON_CMGE, + NEON_CMGT_scalar = NEON_Q | NEONScalar | NEON_CMGT, + NEON_CMHI_scalar = NEON_Q | NEONScalar | NEON_CMHI, + NEON_CMHS_scalar = NEON_Q | NEONScalar | NEON_CMHS, + NEON_CMTST_scalar = NEON_Q | NEONScalar | NEON_CMTST, + NEON_SUB_scalar = NEON_Q | NEONScalar | NEON_SUB, + NEON_UQADD_scalar = NEON_Q | NEONScalar | NEON_UQADD, + NEON_SQADD_scalar = NEON_Q | NEONScalar | NEON_SQADD, + NEON_UQSUB_scalar = NEON_Q | NEONScalar | NEON_UQSUB, + NEON_SQSUB_scalar = NEON_Q | NEONScalar | NEON_SQSUB, + NEON_USHL_scalar = NEON_Q | NEONScalar | NEON_USHL, + NEON_SSHL_scalar = NEON_Q | NEONScalar | NEON_SSHL, + NEON_UQSHL_scalar = NEON_Q | NEONScalar | NEON_UQSHL, + NEON_SQSHL_scalar = NEON_Q | NEONScalar | NEON_SQSHL, + NEON_URSHL_scalar = NEON_Q | NEONScalar | NEON_URSHL, + NEON_SRSHL_scalar = NEON_Q | NEONScalar | NEON_SRSHL, + NEON_UQRSHL_scalar = NEON_Q | NEONScalar | NEON_UQRSHL, + NEON_SQRSHL_scalar = NEON_Q | NEONScalar | NEON_SQRSHL, + NEON_SQDMULH_scalar = NEON_Q | NEONScalar | NEON_SQDMULH, + NEON_SQRDMULH_scalar = NEON_Q | NEONScalar | NEON_SQRDMULH, + + // NEON floating point scalar instructions with three same-type operands. + NEONScalar3SameFPFixed = NEONScalar3SameFixed | 0x0000C000, + NEONScalar3SameFPFMask = NEONScalar3SameFMask | 0x0000C000, + NEONScalar3SameFPMask = NEONScalar3SameMask | 0x00800000, + NEON_FACGE_scalar = NEON_Q | NEONScalar | NEON_FACGE, + NEON_FACGT_scalar = NEON_Q | NEONScalar | NEON_FACGT, + NEON_FCMEQ_scalar = NEON_Q | NEONScalar | NEON_FCMEQ, + NEON_FCMGE_scalar = NEON_Q | NEONScalar | NEON_FCMGE, + NEON_FCMGT_scalar = NEON_Q | NEONScalar | NEON_FCMGT, + NEON_FMULX_scalar = NEON_Q | NEONScalar | NEON_FMULX, + NEON_FRECPS_scalar = NEON_Q | NEONScalar | NEON_FRECPS, + NEON_FRSQRTS_scalar = NEON_Q | NEONScalar | NEON_FRSQRTS, + NEON_FABD_scalar = NEON_Q | NEONScalar | NEON_FABD +}; + +// NEON scalar instructions with three different-type operands. +enum NEONScalar3DiffOp : uint32_t { + NEONScalar3DiffFixed = 0x5E200000, + NEONScalar3DiffFMask = 0xDF200C00, + NEONScalar3DiffMask = NEON_Q | NEONScalar | NEON3DifferentMask, + NEON_SQDMLAL_scalar = NEON_Q | NEONScalar | NEON_SQDMLAL, + NEON_SQDMLSL_scalar = NEON_Q | NEONScalar | NEON_SQDMLSL, + NEON_SQDMULL_scalar = NEON_Q | NEONScalar | NEON_SQDMULL +}; + +// NEON scalar instructions with indexed element operand. +enum NEONScalarByIndexedElementOp : uint32_t { + NEONScalarByIndexedElementFixed = 0x5F000000, + NEONScalarByIndexedElementFMask = 0xDF000400, + NEONScalarByIndexedElementMask = 0xFF00F400, + NEON_SQDMLAL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMLAL_byelement, + NEON_SQDMLSL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMLSL_byelement, + NEON_SQDMULL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMULL_byelement, + NEON_SQDMULH_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMULH_byelement, + NEON_SQRDMULH_byelement_scalar + = NEON_Q | NEONScalar | NEON_SQRDMULH_byelement, + NEON_SQRDMLAH_byelement_scalar + = NEON_Q | NEONScalar | NEON_SQRDMLAH_byelement, + NEON_SQRDMLSH_byelement_scalar + = NEON_Q | NEONScalar | NEON_SQRDMLSH_byelement, + NEON_FMLA_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLA_H_byelement, + NEON_FMLS_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLS_H_byelement, + NEON_FMUL_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMUL_H_byelement, + NEON_FMULX_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_H_byelement, + + // Floating point instructions. + NEONScalarByIndexedElementFPFixed + = NEONScalarByIndexedElementFixed | 0x00800000, + NEONScalarByIndexedElementFPMask + = NEONScalarByIndexedElementMask | 0x00800000, + NEON_FMLA_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLA_byelement, + NEON_FMLS_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLS_byelement, + NEON_FMUL_byelement_scalar = NEON_Q | NEONScalar | NEON_FMUL_byelement, + NEON_FMULX_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_byelement +}; + +// NEON scalar register copy. +enum NEONScalarCopyOp : uint32_t { + NEONScalarCopyFixed = 0x5E000400, + NEONScalarCopyFMask = 0xDFE08400, + NEONScalarCopyMask = 0xFFE0FC00, + NEON_DUP_ELEMENT_scalar = NEON_Q | NEONScalar | NEON_DUP_ELEMENT +}; + +// NEON scalar pairwise instructions. +enum NEONScalarPairwiseOp : uint32_t { + NEONScalarPairwiseFixed = 0x5E300800, + NEONScalarPairwiseFMask = 0xDF3E0C00, + NEONScalarPairwiseMask = 0xFFB1F800, + NEON_ADDP_scalar = NEONScalarPairwiseFixed | 0x0081B000, + NEON_FMAXNMP_h_scalar = NEONScalarPairwiseFixed | 0x0000C000, + NEON_FADDP_h_scalar = NEONScalarPairwiseFixed | 0x0000D000, + NEON_FMAXP_h_scalar = NEONScalarPairwiseFixed | 0x0000F000, + NEON_FMINNMP_h_scalar = NEONScalarPairwiseFixed | 0x0080C000, + NEON_FMINP_h_scalar = NEONScalarPairwiseFixed | 0x0080F000, + NEON_FMAXNMP_scalar = NEONScalarPairwiseFixed | 0x2000C000, + NEON_FMINNMP_scalar = NEONScalarPairwiseFixed | 0x2080C000, + NEON_FADDP_scalar = NEONScalarPairwiseFixed | 0x2000D000, + NEON_FMAXP_scalar = NEONScalarPairwiseFixed | 0x2000F000, + NEON_FMINP_scalar = NEONScalarPairwiseFixed | 0x2080F000 +}; + +// NEON scalar shift immediate. +enum NEONScalarShiftImmediateOp : uint32_t { + NEONScalarShiftImmediateFixed = 0x5F000400, + NEONScalarShiftImmediateFMask = 0xDF800400, + NEONScalarShiftImmediateMask = 0xFF80FC00, + NEON_SHL_scalar = NEON_Q | NEONScalar | NEON_SHL, + NEON_SLI_scalar = NEON_Q | NEONScalar | NEON_SLI, + NEON_SRI_scalar = NEON_Q | NEONScalar | NEON_SRI, + NEON_SSHR_scalar = NEON_Q | NEONScalar | NEON_SSHR, + NEON_USHR_scalar = NEON_Q | NEONScalar | NEON_USHR, + NEON_SRSHR_scalar = NEON_Q | NEONScalar | NEON_SRSHR, + NEON_URSHR_scalar = NEON_Q | NEONScalar | NEON_URSHR, + NEON_SSRA_scalar = NEON_Q | NEONScalar | NEON_SSRA, + NEON_USRA_scalar = NEON_Q | NEONScalar | NEON_USRA, + NEON_SRSRA_scalar = NEON_Q | NEONScalar | NEON_SRSRA, + NEON_URSRA_scalar = NEON_Q | NEONScalar | NEON_URSRA, + NEON_UQSHRN_scalar = NEON_Q | NEONScalar | NEON_UQSHRN, + NEON_UQRSHRN_scalar = NEON_Q | NEONScalar | NEON_UQRSHRN, + NEON_SQSHRN_scalar = NEON_Q | NEONScalar | NEON_SQSHRN, + NEON_SQRSHRN_scalar = NEON_Q | NEONScalar | NEON_SQRSHRN, + NEON_SQSHRUN_scalar = NEON_Q | NEONScalar | NEON_SQSHRUN, + NEON_SQRSHRUN_scalar = NEON_Q | NEONScalar | NEON_SQRSHRUN, + NEON_SQSHLU_scalar = NEON_Q | NEONScalar | NEON_SQSHLU, + NEON_SQSHL_imm_scalar = NEON_Q | NEONScalar | NEON_SQSHL_imm, + NEON_UQSHL_imm_scalar = NEON_Q | NEONScalar | NEON_UQSHL_imm, + NEON_SCVTF_imm_scalar = NEON_Q | NEONScalar | NEON_SCVTF_imm, + NEON_UCVTF_imm_scalar = NEON_Q | NEONScalar | NEON_UCVTF_imm, + NEON_FCVTZS_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZS_imm, + NEON_FCVTZU_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_imm +}; + +enum ReservedOp : uint32_t { + ReservedFixed = 0x00000000, + ReservedFMask = 0x1E000000, + ReservedMask = 0xFFFF0000, + + UDF = ReservedFixed | 0x00000000 +}; + +// Unimplemented and unallocated instructions. These are defined to make fixed +// bit assertion easier. +enum UnimplementedOp : uint32_t { + UnimplementedFixed = 0x00000000, + UnimplementedFMask = 0x00000000 +}; + +enum UnallocatedOp : uint32_t { + UnallocatedFixed = 0x00000000, + UnallocatedFMask = 0x00000000 +}; + +// Instruction bit pattern for an undefined instruction, that will trigger a +// SIGILL at runtime. +// +// A couple of strategies we can use here. There are no unencoded +// instructions in the instruction set that are guaranteed to remain that +// way. However there are some currently (as of 2018) unencoded +// instructions that are good candidates. +// +// Ideally, unencoded instructions should be non-destructive to the register +// state, and should be unencoded at all exception levels. +// +// At the trap the pc will hold the address of the offending instruction. +// +// Some candidates for unencoded instructions: +// +// 0xd4a00000 (essentially dcps0, a good one since it is nonsensical and may +// remain unencoded in the future for that reason) +// 0x33000000 (bfm variant) +// 0xd67f0000 (br variant) +// 0x5ac00c00 (rbit variant) +// +// This instruction is "dcps0", also has 16-bit payload if needed. +static constexpr uint32_t UNDEFINED_INST_PATTERN = 0xd4a00000; + +} // namespace vixl + +#endif // VIXL_A64_CONSTANTS_A64_H_ diff --git a/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp b/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp new file mode 100644 index 0000000000..f31c22fbf5 --- /dev/null +++ b/js/src/jit/arm64/vixl/Cpu-Features-vixl.cpp @@ -0,0 +1,231 @@ +// Copyright 2018, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#include "jit/arm64/vixl/Cpu-Features-vixl.h" + +#include <ostream> + +#include "jit/arm64/vixl/Cpu-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" + +#define VIXL_USE_AARCH64_CPU_HELPERS + +namespace vixl { + +static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) { + if (feature == CPUFeatures::kNone) { + return 0; + } else { + // Check that the shift is well-defined, and that the feature is valid. + VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <= + (sizeof(uint64_t) * 8)); + VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures); + return UINT64_C(1) << feature; + } +} + +CPUFeatures::CPUFeatures(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) + : features_(0) { + Combine(feature0, feature1, feature2, feature3); +} + +CPUFeatures CPUFeatures::All() { + CPUFeatures all; + // Check that the shift is well-defined. + VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8)); + all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1; + return all; +} + +CPUFeatures CPUFeatures::InferFromIDRegisters() { + // This function assumes that kIDRegisterEmulation is available. + CPUFeatures features(CPUFeatures::kIDRegisterEmulation); +#ifdef VIXL_USE_AARCH64_CPU_HELPERS + // Note that the Linux kernel filters these values during emulation, so the + // results may not exactly match the expected hardware support. + features.Combine(CPU::InferCPUFeaturesFromIDRegisters()); +#endif + return features; +} + +CPUFeatures CPUFeatures::InferFromOS(QueryIDRegistersOption option) { +#ifdef VIXL_USE_AARCH64_CPU_HELPERS + return CPU::InferCPUFeaturesFromOS(option); +#else + USE(option); + return CPUFeatures(); +#endif +} + +void CPUFeatures::Combine(const CPUFeatures& other) { + features_ |= other.features_; +} + +void CPUFeatures::Combine(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) { + features_ |= MakeFeatureMask(feature0); + features_ |= MakeFeatureMask(feature1); + features_ |= MakeFeatureMask(feature2); + features_ |= MakeFeatureMask(feature3); +} + +void CPUFeatures::Remove(const CPUFeatures& other) { + features_ &= ~other.features_; +} + +void CPUFeatures::Remove(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) { + features_ &= ~MakeFeatureMask(feature0); + features_ &= ~MakeFeatureMask(feature1); + features_ &= ~MakeFeatureMask(feature2); + features_ &= ~MakeFeatureMask(feature3); +} + +CPUFeatures CPUFeatures::With(const CPUFeatures& other) const { + CPUFeatures f(*this); + f.Combine(other); + return f; +} + +CPUFeatures CPUFeatures::With(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) const { + CPUFeatures f(*this); + f.Combine(feature0, feature1, feature2, feature3); + return f; +} + +CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const { + CPUFeatures f(*this); + f.Remove(other); + return f; +} + +CPUFeatures CPUFeatures::Without(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) const { + CPUFeatures f(*this); + f.Remove(feature0, feature1, feature2, feature3); + return f; +} + +bool CPUFeatures::Has(const CPUFeatures& other) const { + return (features_ & other.features_) == other.features_; +} + +bool CPUFeatures::Has(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) const { + uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) | + MakeFeatureMask(feature2) | MakeFeatureMask(feature3); + return (features_ & mask) == mask; +} + +size_t CPUFeatures::Count() const { return CountSetBits(features_); } + +std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) { + // clang-format off + switch (feature) { +#define VIXL_FORMAT_FEATURE(SYMBOL, NAME, CPUINFO) \ + case CPUFeatures::SYMBOL: \ + return os << NAME; +VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE) +#undef VIXL_FORMAT_FEATURE + case CPUFeatures::kNone: + return os << "none"; + case CPUFeatures::kNumberOfFeatures: + VIXL_UNREACHABLE(); + } + // clang-format on + VIXL_UNREACHABLE(); + return os; +} + +CPUFeatures::const_iterator CPUFeatures::begin() const { + if (features_ == 0) return const_iterator(this, kNone); + + int feature_number = CountTrailingZeros(features_); + vixl::CPUFeatures::Feature feature = + static_cast<CPUFeatures::Feature>(feature_number); + return const_iterator(this, feature); +} + +CPUFeatures::const_iterator CPUFeatures::end() const { + return const_iterator(this, kNone); +} + +std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) { + CPUFeatures::const_iterator it = features.begin(); + while (it != features.end()) { + os << *it; + ++it; + if (it != features.end()) os << ", "; + } + return os; +} + +bool CPUFeaturesConstIterator::operator==( + const CPUFeaturesConstIterator& other) const { + VIXL_ASSERT(IsValid()); + return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_); +} + +CPUFeatures::Feature CPUFeaturesConstIterator::operator++() { // Prefix + VIXL_ASSERT(IsValid()); + do { + // Find the next feature. The order is unspecified. + feature_ = static_cast<CPUFeatures::Feature>(feature_ + 1); + if (feature_ == CPUFeatures::kNumberOfFeatures) { + feature_ = CPUFeatures::kNone; + VIXL_STATIC_ASSERT(CPUFeatures::kNone == -1); + } + VIXL_ASSERT(CPUFeatures::kNone <= feature_); + VIXL_ASSERT(feature_ < CPUFeatures::kNumberOfFeatures); + // cpu_features_->Has(kNone) is always true, so this will terminate even if + // the features list is empty. + } while (!cpu_features_->Has(feature_)); + return feature_; +} + +CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) { // Postfix + CPUFeatures::Feature result = feature_; + ++(*this); + return result; +} + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/Cpu-Features-vixl.h b/js/src/jit/arm64/vixl/Cpu-Features-vixl.h new file mode 100644 index 0000000000..b980233bf2 --- /dev/null +++ b/js/src/jit/arm64/vixl/Cpu-Features-vixl.h @@ -0,0 +1,397 @@ +// Copyright 2018, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_CPU_FEATURES_H +#define VIXL_CPU_FEATURES_H + +#include <ostream> + +#include "jit/arm64/vixl/Globals-vixl.h" + + +namespace vixl { + + +// clang-format off +#define VIXL_CPU_FEATURE_LIST(V) \ + /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \ + /* registers, so that the detailed feature registers can be read */ \ + /* directly. */ \ + V(kIDRegisterEmulation, "ID register emulation", "cpuid") \ + \ + V(kFP, "FP", "fp") \ + V(kNEON, "NEON", "asimd") \ + V(kCRC32, "CRC32", "crc32") \ + /* Cryptographic support instructions. */ \ + V(kAES, "AES", "aes") \ + V(kSHA1, "SHA1", "sha1") \ + V(kSHA2, "SHA2", "sha2") \ + /* A form of PMULL{2} with a 128-bit (1Q) result. */ \ + V(kPmull1Q, "Pmull1Q", "pmull") \ + /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc. */ \ + V(kAtomics, "Atomics", "atomics") \ + /* Limited ordering regions: LDLAR, STLLR and their variants. */ \ + V(kLORegions, "LORegions", NULL) \ + /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH. */ \ + V(kRDM, "RDM", "asimdrdm") \ + /* Scalable Vector Extension. */ \ + V(kSVE, "SVE", "sve") \ + /* SDOT and UDOT support (in NEON). */ \ + V(kDotProduct, "DotProduct", "asimddp") \ + /* Half-precision (FP16) support for FP and NEON, respectively. */ \ + V(kFPHalf, "FPHalf", "fphp") \ + V(kNEONHalf, "NEONHalf", "asimdhp") \ + /* The RAS extension, including the ESB instruction. */ \ + V(kRAS, "RAS", NULL) \ + /* Data cache clean to the point of persistence: DC CVAP. */ \ + V(kDCPoP, "DCPoP", "dcpop") \ + /* Data cache clean to the point of deep persistence: DC CVADP. */ \ + V(kDCCVADP, "DCCVADP", NULL) \ + /* Cryptographic support instructions. */ \ + V(kSHA3, "SHA3", "sha3") \ + V(kSHA512, "SHA512", "sha512") \ + V(kSM3, "SM3", "sm3") \ + V(kSM4, "SM4", "sm4") \ + /* Pointer authentication for addresses. */ \ + V(kPAuth, "PAuth", NULL) \ + /* Pointer authentication for addresses uses QARMA. */ \ + V(kPAuthQARMA, "PAuthQARMA", NULL) \ + /* Generic authentication (using the PACGA instruction). */ \ + V(kPAuthGeneric, "PAuthGeneric", NULL) \ + /* Generic authentication uses QARMA. */ \ + V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \ + /* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \ + V(kJSCVT, "JSCVT", "jscvt") \ + /* Complex number support for NEON: FCMLA and FCADD. */ \ + V(kFcma, "Fcma", "fcma") \ + /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \ + V(kRCpc, "RCpc", "lrcpc") \ + V(kRCpcImm, "RCpc (imm)", "ilrcpc") \ + /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF. */ \ + V(kFlagM, "FlagM", "flagm") \ + /* Unaligned single-copy atomicity. */ \ + V(kUSCAT, "USCAT", "uscat") \ + /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}. */ \ + V(kFHM, "FHM", "asimdfhm") \ + /* Data-independent timing (for selected instructions). */ \ + V(kDIT, "DIT", "dit") \ + /* Branch target identification. */ \ + V(kBTI, "BTI", NULL) \ + /* Flag manipulation instructions: {AX,XA}FLAG */ \ + V(kAXFlag, "AXFlag", NULL) \ + /* Random number generation extension, */ \ + V(kRNG, "RNG", NULL) \ + /* Floating-point round to {32,64}-bit integer. */ \ + V(kFrintToFixedSizedInt,"Frint (bounded)", NULL) +// clang-format on + + +class CPUFeaturesConstIterator; + +// A representation of the set of features known to be supported by the target +// device. Each feature is represented by a simple boolean flag. +// +// - When the Assembler is asked to assemble an instruction, it asserts (in +// debug mode) that the necessary features are available. +// +// - TODO: The MacroAssembler relies on the Assembler's assertions, but in +// some cases it may be useful for macros to generate a fall-back sequence +// in case features are not available. +// +// - The Simulator assumes by default that all features are available, but it +// is possible to configure it to fail if the simulated code uses features +// that are not enabled. +// +// The Simulator also offers pseudo-instructions to allow features to be +// enabled and disabled dynamically. This is useful when you want to ensure +// that some features are constrained to certain areas of code. +// +// - The base Disassembler knows nothing about CPU features, but the +// PrintDisassembler can be configured to annotate its output with warnings +// about unavailable features. The Simulator uses this feature when +// instruction trace is enabled. +// +// - The Decoder-based components -- the Simulator and PrintDisassembler -- +// rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of +// features actually encountered so that a large block of code can be +// examined (either directly or through simulation), and the required +// features analysed later. +// +// Expected usage: +// +// // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for +// // compatibility with older version of VIXL. +// MacroAssembler masm; +// +// // Generate code only for the current CPU. +// masm.SetCPUFeatures(CPUFeatures::InferFromOS()); +// +// // Turn off feature checking entirely. +// masm.SetCPUFeatures(CPUFeatures::All()); +// +// Feature set manipulation: +// +// CPUFeatures f; // The default constructor gives an empty set. +// // Individual features can be added (or removed). +// f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES); +// f.Remove(CPUFeatures::kNEON); +// +// // Some helpers exist for extensions that provide several features. +// f.Remove(CPUFeatures::All()); +// f.Combine(CPUFeatures::AArch64LegacyBaseline()); +// +// // Chained construction is also possible. +// CPUFeatures g = +// f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32); +// +// // Features can be queried. Where multiple features are given, they are +// // combined with logical AND. +// if (h.Has(CPUFeatures::kNEON)) { ... } +// if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... } +// if (h.Has(g)) { ... } +// // If the empty set is requested, the result is always 'true'. +// VIXL_ASSERT(h.Has(CPUFeatures())); +// +// // For debug and reporting purposes, features can be enumerated (or +// // printed directly): +// std::cout << CPUFeatures::kNEON; // Prints something like "NEON". +// std::cout << f; // Prints something like "FP, NEON, CRC32". +class CPUFeatures { + public: + // clang-format off + // Individual features. + // These should be treated as opaque tokens. User code should not rely on + // specific numeric values or ordering. + enum Feature { + // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that + // this class supports. + + kNone = -1, +#define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL, + VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE) +#undef VIXL_DECLARE_FEATURE + kNumberOfFeatures + }; + // clang-format on + + // By default, construct with no features enabled. + CPUFeatures() : features_(0) {} + + // Construct with some features already enabled. + CPUFeatures(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone); + + // Construct with all features enabled. This can be used to disable feature + // checking: `Has(...)` returns true regardless of the argument. + static CPUFeatures All(); + + // Construct an empty CPUFeatures. This is equivalent to the default + // constructor, but is provided for symmetry and convenience. + static CPUFeatures None() { return CPUFeatures(); } + + // The presence of these features was assumed by version of VIXL before this + // API was added, so using this set by default ensures API compatibility. + static CPUFeatures AArch64LegacyBaseline() { + return CPUFeatures(kFP, kNEON, kCRC32); + } + + // Construct a new CPUFeatures object using ID registers. This assumes that + // kIDRegisterEmulation is present. + static CPUFeatures InferFromIDRegisters(); + + enum QueryIDRegistersOption { + kDontQueryIDRegisters, + kQueryIDRegistersIfAvailable + }; + + // Construct a new CPUFeatures object based on what the OS reports. + static CPUFeatures InferFromOS( + QueryIDRegistersOption option = kQueryIDRegistersIfAvailable); + + // Combine another CPUFeatures object into this one. Features that already + // exist in this set are left unchanged. + void Combine(const CPUFeatures& other); + + // Combine specific features into this set. Features that already exist in + // this set are left unchanged. + void Combine(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone); + + // Remove features in another CPUFeatures object from this one. + void Remove(const CPUFeatures& other); + + // Remove specific features from this set. + void Remove(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone); + + // Chaining helpers for convenient construction. + CPUFeatures With(const CPUFeatures& other) const; + CPUFeatures With(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone) const; + CPUFeatures Without(const CPUFeatures& other) const; + CPUFeatures Without(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone) const; + + // Query features. + // Note that an empty query (like `Has(kNone)`) always returns true. + bool Has(const CPUFeatures& other) const; + bool Has(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone) const; + + // Return the number of enabled features. + size_t Count() const; + bool HasNoFeatures() const { return Count() == 0; } + + // Check for equivalence. + bool operator==(const CPUFeatures& other) const { + return Has(other) && other.Has(*this); + } + bool operator!=(const CPUFeatures& other) const { return !(*this == other); } + + typedef CPUFeaturesConstIterator const_iterator; + + const_iterator begin() const; + const_iterator end() const; + + private: + // Each bit represents a feature. This field will be replaced as needed if + // features are added. + uint64_t features_; + + friend std::ostream& operator<<(std::ostream& os, + const vixl::CPUFeatures& features); +}; + +std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature); +std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features); + +// This is not a proper C++ iterator type, but it simulates enough of +// ForwardIterator that simple loops can be written. +class CPUFeaturesConstIterator { + public: + CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL, + CPUFeatures::Feature start = CPUFeatures::kNone) + : cpu_features_(cpu_features), feature_(start) { + VIXL_ASSERT(IsValid()); + } + + bool operator==(const CPUFeaturesConstIterator& other) const; + bool operator!=(const CPUFeaturesConstIterator& other) const { + return !(*this == other); + } + CPUFeatures::Feature operator++(); + CPUFeatures::Feature operator++(int); + + CPUFeatures::Feature operator*() const { + VIXL_ASSERT(IsValid()); + return feature_; + } + + // For proper support of C++'s simplest "Iterator" concept, this class would + // have to define member types (such as CPUFeaturesIterator::pointer) to make + // it appear as if it iterates over Feature objects in memory. That is, we'd + // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator. + // This is at least partially possible -- the std::vector<bool> specialisation + // does something similar -- but it doesn't seem worthwhile for a + // special-purpose debug helper, so they are omitted here. + private: + const CPUFeatures* cpu_features_; + CPUFeatures::Feature feature_; + + bool IsValid() const { + return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) || + cpu_features_->Has(feature_); + } +}; + +// A convenience scope for temporarily modifying a CPU features object. This +// allows features to be enabled for short sequences. +// +// Expected usage: +// +// { +// CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32); +// // This scope can now use CRC32, as well as anything else that was enabled +// // before the scope. +// +// ... +// +// // At the end of the scope, the original CPU features are restored. +// } +class CPUFeaturesScope { + public: + // Start a CPUFeaturesScope on any object that implements + // `CPUFeatures* GetCPUFeatures()`. + template <typename T> + explicit CPUFeaturesScope(T* cpu_features_wrapper, + CPUFeatures::Feature feature0 = CPUFeatures::kNone, + CPUFeatures::Feature feature1 = CPUFeatures::kNone, + CPUFeatures::Feature feature2 = CPUFeatures::kNone, + CPUFeatures::Feature feature3 = CPUFeatures::kNone) + : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), + old_features_(*cpu_features_) { + cpu_features_->Combine(feature0, feature1, feature2, feature3); + } + + template <typename T> + CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other) + : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), + old_features_(*cpu_features_) { + cpu_features_->Combine(other); + } + + ~CPUFeaturesScope() { *cpu_features_ = old_features_; } + + // For advanced usage, the CPUFeatures object can be accessed directly. + // The scope will restore the original state when it ends. + + CPUFeatures* GetCPUFeatures() const { return cpu_features_; } + + void SetCPUFeatures(const CPUFeatures& cpu_features) { + *cpu_features_ = cpu_features; + } + + private: + CPUFeatures* const cpu_features_; + const CPUFeatures old_features_; +}; + + +} // namespace vixl + +#endif // VIXL_CPU_FEATURES_H diff --git a/js/src/jit/arm64/vixl/Cpu-vixl.cpp b/js/src/jit/arm64/vixl/Cpu-vixl.cpp new file mode 100644 index 0000000000..12244e73e4 --- /dev/null +++ b/js/src/jit/arm64/vixl/Cpu-vixl.cpp @@ -0,0 +1,256 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Cpu-vixl.h" + +#include "jstypes.h" + +#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__)) +#include <sys/auxv.h> +#define VIXL_USE_LINUX_HWCAP 1 +#endif + +#include "jit/arm64/vixl/Utils-vixl.h" + + +namespace vixl { + + +const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned); +const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned); +const IDRegister::Field AA64PFR0::kSVE(32); +const IDRegister::Field AA64PFR0::kDIT(48); + +const IDRegister::Field AA64PFR1::kBT(0); + +const IDRegister::Field AA64ISAR0::kAES(4); +const IDRegister::Field AA64ISAR0::kSHA1(8); +const IDRegister::Field AA64ISAR0::kSHA2(12); +const IDRegister::Field AA64ISAR0::kCRC32(16); +const IDRegister::Field AA64ISAR0::kAtomic(20); +const IDRegister::Field AA64ISAR0::kRDM(28); +const IDRegister::Field AA64ISAR0::kSHA3(32); +const IDRegister::Field AA64ISAR0::kSM3(36); +const IDRegister::Field AA64ISAR0::kSM4(40); +const IDRegister::Field AA64ISAR0::kDP(44); +const IDRegister::Field AA64ISAR0::kFHM(48); +const IDRegister::Field AA64ISAR0::kTS(52); + +const IDRegister::Field AA64ISAR1::kDPB(0); +const IDRegister::Field AA64ISAR1::kAPA(4); +const IDRegister::Field AA64ISAR1::kAPI(8); +const IDRegister::Field AA64ISAR1::kJSCVT(12); +const IDRegister::Field AA64ISAR1::kFCMA(16); +const IDRegister::Field AA64ISAR1::kLRCPC(20); +const IDRegister::Field AA64ISAR1::kGPA(24); +const IDRegister::Field AA64ISAR1::kGPI(28); +const IDRegister::Field AA64ISAR1::kFRINTTS(32); +const IDRegister::Field AA64ISAR1::kSB(36); +const IDRegister::Field AA64ISAR1::kSPECRES(40); + +const IDRegister::Field AA64MMFR1::kLO(16); + +CPUFeatures AA64PFR0::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP); + if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf); + if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON); + if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf); + if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE); + if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT); + return f; +} + +CPUFeatures AA64PFR1::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI); + return f; +} + +CPUFeatures AA64ISAR0::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES); + if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q); + if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1); + if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2); + if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512); + if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32); + if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics); + if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM); + if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3); + if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3); + if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4); + if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct); + if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM); + if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM); + if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag); + return f; +} + +CPUFeatures AA64ISAR1::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP); + if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT); + if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma); + if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc); + if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm); + if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt); + + if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth); + if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA); + if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric); + if (Get(kGPA) >= 1) { + f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA); + } + return f; +} + +CPUFeatures AA64MMFR1::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions); + return f; +} + +int IDRegister::Get(IDRegister::Field field) const { + int msb = field.GetMsb(); + int lsb = field.GetLsb(); + VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) < + (sizeof(int) * kBitsPerByte)); + switch (field.GetType()) { + case Field::kSigned: + return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_)); + case Field::kUnsigned: + return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_)); + } + VIXL_UNREACHABLE(); + return 0; +} + +CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() { + CPUFeatures f; +#define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures()); + VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG) +#undef VIXL_COMBINE_ID_REG + return f; +} + +CPUFeatures CPU::InferCPUFeaturesFromOS( + CPUFeatures::QueryIDRegistersOption option) { + CPUFeatures features; + +#if VIXL_USE_LINUX_HWCAP + // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather + // than explicit bits, but explicit bits allow us to identify features that + // the toolchain doesn't know about. + static const CPUFeatures::Feature kFeatureBits[] = { + // Bits 0-7 + CPUFeatures::kFP, + CPUFeatures::kNEON, + CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track. + CPUFeatures::kAES, + CPUFeatures::kPmull1Q, + CPUFeatures::kSHA1, + CPUFeatures::kSHA2, + CPUFeatures::kCRC32, + // Bits 8-15 + CPUFeatures::kAtomics, + CPUFeatures::kFPHalf, + CPUFeatures::kNEONHalf, + CPUFeatures::kIDRegisterEmulation, + CPUFeatures::kRDM, + CPUFeatures::kJSCVT, + CPUFeatures::kFcma, + CPUFeatures::kRCpc, + // Bits 16-23 + CPUFeatures::kDCPoP, + CPUFeatures::kSHA3, + CPUFeatures::kSM3, + CPUFeatures::kSM4, + CPUFeatures::kDotProduct, + CPUFeatures::kSHA512, + CPUFeatures::kSVE, + CPUFeatures::kFHM, + // Bits 24-27 + CPUFeatures::kDIT, + CPUFeatures::kUSCAT, + CPUFeatures::kRCpcImm, + CPUFeatures::kFlagM + // Bits 28-31 are unassigned. + }; + static const size_t kFeatureBitCount = + sizeof(kFeatureBits) / sizeof(kFeatureBits[0]); + + // Mozilla change: Set the default for the simulator. +#ifdef JS_SIMULATOR_ARM64 + unsigned long auxv = ~(0UL); // Enable all features for the Simulator. +#else + unsigned long auxv = getauxval(AT_HWCAP); // NOLINT(runtime/int) +#endif + + VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte)); + for (size_t i = 0; i < kFeatureBitCount; i++) { + if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]); + } +#elif defined(XP_MACOSX) + // Apple processors have kJSCVT, kDotProduct, and kAtomics features. + features.Combine(CPUFeatures::kJSCVT, CPUFeatures::kDotProduct, + CPUFeatures::kAtomics); +#endif // VIXL_USE_LINUX_HWCAP + + if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) && + (features.Has(CPUFeatures::kIDRegisterEmulation))) { + features.Combine(InferCPUFeaturesFromIDRegisters()); + } + return features; +} + + +#ifdef __aarch64__ +#define VIXL_READ_ID_REG(NAME) \ + NAME CPU::Read##NAME() { \ + uint64_t value = 0; \ + __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \ + return NAME(value); \ + } +#else // __aarch64__ +#define VIXL_READ_ID_REG(NAME) \ + NAME CPU::Read##NAME() { \ + /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \ + VIXL_ABORT(); \ + } +#endif // __aarch64__ + +VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG) + +#undef VIXL_READ_ID_REG + + +// Initialise to smallest possible cache size. +unsigned CPU::dcache_line_size_ = 1; +unsigned CPU::icache_line_size_ = 1; + + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/Cpu-vixl.h b/js/src/jit/arm64/vixl/Cpu-vixl.h new file mode 100644 index 0000000000..4db51aad6b --- /dev/null +++ b/js/src/jit/arm64/vixl/Cpu-vixl.h @@ -0,0 +1,241 @@ +// Copyright 2014, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_CPU_AARCH64_H +#define VIXL_CPU_AARCH64_H + +#include "jit/arm64/vixl/Cpu-Features-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" + +#include "jit/arm64/vixl/Instructions-vixl.h" + +#ifndef VIXL_INCLUDE_TARGET_AARCH64 +// The supporting .cc file is only compiled when the A64 target is selected. +// Throw an explicit error now to avoid a harder-to-debug linker error later. +// +// These helpers _could_ work on any AArch64 host, even when generating AArch32 +// code, but we don't support this because the available features may differ +// between AArch32 and AArch64 on the same platform, so basing AArch32 code +// generation on aarch64::CPU features is probably broken. +#error cpu-aarch64.h requires VIXL_INCLUDE_TARGET_AARCH64 (scons target=a64). +#endif + +namespace vixl { + +// A CPU ID register, for use with CPUFeatures::kIDRegisterEmulation. Fields +// specific to each register are described in relevant subclasses. +class IDRegister { + protected: + explicit IDRegister(uint64_t value = 0) : value_(value) {} + + class Field { + public: + enum Type { kUnsigned, kSigned }; + + explicit Field(int lsb, Type type = kUnsigned) : lsb_(lsb), type_(type) {} + + static const int kMaxWidthInBits = 4; + + int GetWidthInBits() const { + // All current ID fields have four bits. + return kMaxWidthInBits; + } + int GetLsb() const { return lsb_; } + int GetMsb() const { return lsb_ + GetWidthInBits() - 1; } + Type GetType() const { return type_; } + + private: + int lsb_; + Type type_; + }; + + public: + // Extract the specified field, performing sign-extension for signed fields. + // This allows us to implement the 'value >= number' detection mechanism + // recommended by the Arm ARM, for both signed and unsigned fields. + int Get(Field field) const; + + private: + uint64_t value_; +}; + +class AA64PFR0 : public IDRegister { + public: + explicit AA64PFR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kFP; + static const Field kAdvSIMD; + static const Field kSVE; + static const Field kDIT; +}; + +class AA64PFR1 : public IDRegister { + public: + explicit AA64PFR1(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kBT; +}; + +class AA64ISAR0 : public IDRegister { + public: + explicit AA64ISAR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kAES; + static const Field kSHA1; + static const Field kSHA2; + static const Field kCRC32; + static const Field kAtomic; + static const Field kRDM; + static const Field kSHA3; + static const Field kSM3; + static const Field kSM4; + static const Field kDP; + static const Field kFHM; + static const Field kTS; +}; + +class AA64ISAR1 : public IDRegister { + public: + explicit AA64ISAR1(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kDPB; + static const Field kAPA; + static const Field kAPI; + static const Field kJSCVT; + static const Field kFCMA; + static const Field kLRCPC; + static const Field kGPA; + static const Field kGPI; + static const Field kFRINTTS; + static const Field kSB; + static const Field kSPECRES; +}; + +class AA64MMFR1 : public IDRegister { + public: + explicit AA64MMFR1(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kLO; +}; + +class CPU { + public: + // Initialise CPU support. + static void SetUp(); + + // Ensures the data at a given address and with a given size is the same for + // the I and D caches. I and D caches are not automatically coherent on ARM + // so this operation is required before any dynamically generated code can + // safely run. + static void EnsureIAndDCacheCoherency(void* address, size_t length); + + // Flush the local instruction pipeline, forcing a reload of any instructions + // beyond this barrier from the icache. + static void FlushExecutionContext(); + + // Read and interpret the ID registers. This requires + // CPUFeatures::kIDRegisterEmulation, and therefore cannot be called on + // non-AArch64 platforms. + static CPUFeatures InferCPUFeaturesFromIDRegisters(); + + // Read and interpret CPUFeatures reported by the OS. Failed queries (or + // unsupported platforms) return an empty list. Note that this is + // indistinguishable from a successful query on a platform that advertises no + // features. + // + // Non-AArch64 hosts are considered to be unsupported platforms, and this + // function returns an empty list. + static CPUFeatures InferCPUFeaturesFromOS( + CPUFeatures::QueryIDRegistersOption option = + CPUFeatures::kQueryIDRegistersIfAvailable); + + // Handle tagged pointers. + template <typename T> + static T SetPointerTag(T pointer, uint64_t tag) { + VIXL_ASSERT(IsUintN(kAddressTagWidth, tag)); + + // Use C-style casts to get static_cast behaviour for integral types (T), + // and reinterpret_cast behaviour for other types. + + uint64_t raw = (uint64_t)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw)); + + raw = (raw & ~kAddressTagMask) | (tag << kAddressTagOffset); + return (T)raw; + } + + template <typename T> + static uint64_t GetPointerTag(T pointer) { + // Use C-style casts to get static_cast behaviour for integral types (T), + // and reinterpret_cast behaviour for other types. + + uint64_t raw = (uint64_t)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw)); + + return (raw & kAddressTagMask) >> kAddressTagOffset; + } + + private: +#define VIXL_AARCH64_ID_REG_LIST(V) \ + V(AA64PFR0) \ + V(AA64PFR1) \ + V(AA64ISAR0) \ + V(AA64ISAR1) \ + V(AA64MMFR1) + +#define VIXL_READ_ID_REG(NAME) static NAME Read##NAME(); + // On native AArch64 platforms, read the named CPU ID registers. These require + // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64 + // platforms. + VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG) +#undef VIXL_READ_ID_REG + + // Return the content of the cache type register. + static uint32_t GetCacheType(); + + // I and D cache line size in bytes. + static unsigned icache_line_size_; + static unsigned dcache_line_size_; +}; + +} // namespace vixl + +#endif // VIXL_CPU_AARCH64_H diff --git a/js/src/jit/arm64/vixl/Debugger-vixl.cpp b/js/src/jit/arm64/vixl/Debugger-vixl.cpp new file mode 100644 index 0000000000..fa3e15601e --- /dev/null +++ b/js/src/jit/arm64/vixl/Debugger-vixl.cpp @@ -0,0 +1,1535 @@ +// Copyright 2014, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +// OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jstypes.h" + +#ifdef JS_SIMULATOR_ARM64 + +#include "jit/arm64/vixl/Debugger-vixl.h" + +#include "mozilla/Vector.h" + +#include "js/AllocPolicy.h" + +namespace vixl { + +// List of commands supported by the debugger. +#define DEBUG_COMMAND_LIST(C) \ +C(HelpCommand) \ +C(ContinueCommand) \ +C(StepCommand) \ +C(DisasmCommand) \ +C(PrintCommand) \ +C(ExamineCommand) + +// Debugger command lines are broken up in token of different type to make +// processing easier later on. +class Token { + public: + virtual ~Token() {} + + // Token type. + virtual bool IsRegister() const { return false; } + virtual bool IsFPRegister() const { return false; } + virtual bool IsIdentifier() const { return false; } + virtual bool IsAddress() const { return false; } + virtual bool IsInteger() const { return false; } + virtual bool IsFormat() const { return false; } + virtual bool IsUnknown() const { return false; } + // Token properties. + virtual bool CanAddressMemory() const { return false; } + virtual uint8_t* ToAddress(Debugger* debugger) const = 0; + virtual void Print(FILE* out = stdout) const = 0; + + static Token* Tokenize(const char* arg); +}; + +typedef mozilla::Vector<Token*, 0, js::SystemAllocPolicy> TokenVector; + +// Tokens often hold one value. +template<typename T> class ValueToken : public Token { + public: + explicit ValueToken(T value) : value_(value) {} + ValueToken() {} + + T value() const { return value_; } + + virtual uint8_t* ToAddress(Debugger* debugger) const override { + USE(debugger); + VIXL_ABORT(); + } + + protected: + T value_; +}; + +// Integer registers (X or W) and their aliases. +// Format: wn or xn with 0 <= n < 32 or a name in the aliases list. +class RegisterToken : public ValueToken<const Register> { + public: + explicit RegisterToken(const Register reg) + : ValueToken<const Register>(reg) {} + + virtual bool IsRegister() const override { return true; } + virtual bool CanAddressMemory() const override { return value().Is64Bits(); } + virtual uint8_t* ToAddress(Debugger* debugger) const override; + virtual void Print(FILE* out = stdout) const override; + const char* Name() const; + + static Token* Tokenize(const char* arg); + static RegisterToken* Cast(Token* tok) { + VIXL_ASSERT(tok->IsRegister()); + return reinterpret_cast<RegisterToken*>(tok); + } + + private: + static const int kMaxAliasNumber = 4; + static const char* kXAliases[kNumberOfRegisters][kMaxAliasNumber]; + static const char* kWAliases[kNumberOfRegisters][kMaxAliasNumber]; +}; + +// Floating point registers (D or S). +// Format: sn or dn with 0 <= n < 32. +class FPRegisterToken : public ValueToken<const FPRegister> { + public: + explicit FPRegisterToken(const FPRegister fpreg) + : ValueToken<const FPRegister>(fpreg) {} + + virtual bool IsFPRegister() const override { return true; } + virtual void Print(FILE* out = stdout) const override; + + static Token* Tokenize(const char* arg); + static FPRegisterToken* Cast(Token* tok) { + VIXL_ASSERT(tok->IsFPRegister()); + return reinterpret_cast<FPRegisterToken*>(tok); + } +}; + + +// Non-register identifiers. +// Format: Alphanumeric string starting with a letter. +class IdentifierToken : public ValueToken<char*> { + public: + explicit IdentifierToken(const char* name) { + size_t size = strlen(name) + 1; + value_ = js_pod_malloc<char>(size); + strncpy(value_, name, size); + } + virtual ~IdentifierToken() { js_free(value_); } + + virtual bool IsIdentifier() const override { return true; } + virtual bool CanAddressMemory() const override { return strcmp(value(), "pc") == 0; } + virtual uint8_t* ToAddress(Debugger* debugger) const override; + virtual void Print(FILE* out = stdout) const override; + + static Token* Tokenize(const char* arg); + static IdentifierToken* Cast(Token* tok) { + VIXL_ASSERT(tok->IsIdentifier()); + return reinterpret_cast<IdentifierToken*>(tok); + } +}; + +// 64-bit address literal. +// Format: 0x... with up to 16 hexadecimal digits. +class AddressToken : public ValueToken<uint8_t*> { + public: + explicit AddressToken(uint8_t* address) : ValueToken<uint8_t*>(address) {} + + virtual bool IsAddress() const override { return true; } + virtual bool CanAddressMemory() const override { return true; } + virtual uint8_t* ToAddress(Debugger* debugger) const override; + virtual void Print(FILE* out = stdout) const override; + + static Token* Tokenize(const char* arg); + static AddressToken* Cast(Token* tok) { + VIXL_ASSERT(tok->IsAddress()); + return reinterpret_cast<AddressToken*>(tok); + } +}; + + +// 64-bit decimal integer literal. +// Format: n. +class IntegerToken : public ValueToken<int64_t> { + public: + explicit IntegerToken(int64_t value) : ValueToken<int64_t>(value) {} + + virtual bool IsInteger() const override { return true; } + virtual void Print(FILE* out = stdout) const override; + + static Token* Tokenize(const char* arg); + static IntegerToken* Cast(Token* tok) { + VIXL_ASSERT(tok->IsInteger()); + return reinterpret_cast<IntegerToken*>(tok); + } +}; + +// Literal describing how to print a chunk of data (up to 64 bits). +// Format: .ln +// where l (letter) is one of +// * x: hexadecimal +// * s: signed integer +// * u: unsigned integer +// * f: floating point +// * i: instruction +// and n (size) is one of 8, 16, 32 and 64. n should be omitted for +// instructions. +class FormatToken : public Token { + public: + FormatToken() {} + + virtual bool IsFormat() const override { return true; } + virtual int SizeOf() const = 0; + virtual char type_code() const = 0; + virtual void PrintData(void* data, FILE* out = stdout) const = 0; + virtual void Print(FILE* out = stdout) const override = 0; + + virtual uint8_t* ToAddress(Debugger* debugger) const override { + USE(debugger); + VIXL_ABORT(); + } + + static Token* Tokenize(const char* arg); + static FormatToken* Cast(Token* tok) { + VIXL_ASSERT(tok->IsFormat()); + return reinterpret_cast<FormatToken*>(tok); + } +}; + + +template<typename T> class Format : public FormatToken { + public: + Format(const char* fmt, char type_code) : fmt_(fmt), type_code_(type_code) {} + + virtual int SizeOf() const override { return sizeof(T); } + virtual char type_code() const override { return type_code_; } + virtual void PrintData(void* data, FILE* out = stdout) const override { + T value; + memcpy(&value, data, sizeof(value)); + fprintf(out, fmt_, value); + } + virtual void Print(FILE* out = stdout) const override; + + private: + const char* fmt_; + char type_code_; +}; + +// Tokens which don't fit any of the above. +class UnknownToken : public Token { + public: + explicit UnknownToken(const char* arg) { + size_t size = strlen(arg) + 1; + unknown_ = js_pod_malloc<char>(size); + strncpy(unknown_, arg, size); + } + virtual ~UnknownToken() { js_free(unknown_); } + virtual uint8_t* ToAddress(Debugger* debugger) const override { + USE(debugger); + VIXL_ABORT(); + } + + virtual bool IsUnknown() const override { return true; } + virtual void Print(FILE* out = stdout) const override; + + private: + char* unknown_; +}; + + +// All debugger commands must subclass DebugCommand and implement Run, Print +// and Build. Commands must also define kHelp and kAliases. +class DebugCommand { + public: + explicit DebugCommand(Token* name) : name_(IdentifierToken::Cast(name)) {} + DebugCommand() : name_(NULL) {} + virtual ~DebugCommand() { js_delete(name_); } + + const char* name() { return name_->value(); } + // Run the command on the given debugger. The command returns true if + // execution should move to the next instruction. + virtual bool Run(Debugger * debugger) = 0; + virtual void Print(FILE* out = stdout); + + static bool Match(const char* name, const char** aliases); + static DebugCommand* Parse(char* line); + static void PrintHelp(const char** aliases, + const char* args, + const char* help); + + private: + IdentifierToken* name_; +}; + +// For all commands below see their respective kHelp and kAliases in +// debugger-a64.cc +class HelpCommand : public DebugCommand { + public: + explicit HelpCommand(Token* name) : DebugCommand(name) {} + + virtual bool Run(Debugger* debugger) override; + + static DebugCommand* Build(TokenVector&& args); + + static const char* kHelp; + static const char* kAliases[]; + static const char* kArguments; +}; + + +class ContinueCommand : public DebugCommand { + public: + explicit ContinueCommand(Token* name) : DebugCommand(name) {} + + virtual bool Run(Debugger* debugger) override; + + static DebugCommand* Build(TokenVector&& args); + + static const char* kHelp; + static const char* kAliases[]; + static const char* kArguments; +}; + + +class StepCommand : public DebugCommand { + public: + StepCommand(Token* name, IntegerToken* count) + : DebugCommand(name), count_(count) {} + virtual ~StepCommand() { js_delete(count_); } + + int64_t count() { return count_->value(); } + virtual bool Run(Debugger* debugger) override; + virtual void Print(FILE* out = stdout) override; + + static DebugCommand* Build(TokenVector&& args); + + static const char* kHelp; + static const char* kAliases[]; + static const char* kArguments; + + private: + IntegerToken* count_; +}; + +class DisasmCommand : public DebugCommand { + public: + static DebugCommand* Build(TokenVector&& args); + + static const char* kHelp; + static const char* kAliases[]; + static const char* kArguments; +}; + + +class PrintCommand : public DebugCommand { + public: + PrintCommand(Token* name, Token* target, FormatToken* format) + : DebugCommand(name), target_(target), format_(format) {} + virtual ~PrintCommand() { + js_delete(target_); + js_delete(format_); + } + + Token* target() { return target_; } + FormatToken* format() { return format_; } + virtual bool Run(Debugger* debugger) override; + virtual void Print(FILE* out = stdout) override; + + static DebugCommand* Build(TokenVector&& args); + + static const char* kHelp; + static const char* kAliases[]; + static const char* kArguments; + + private: + Token* target_; + FormatToken* format_; +}; + +class ExamineCommand : public DebugCommand { + public: + ExamineCommand(Token* name, + Token* target, + FormatToken* format, + IntegerToken* count) + : DebugCommand(name), target_(target), format_(format), count_(count) {} + virtual ~ExamineCommand() { + js_delete(target_); + js_delete(format_); + js_delete(count_); + } + + Token* target() { return target_; } + FormatToken* format() { return format_; } + IntegerToken* count() { return count_; } + virtual bool Run(Debugger* debugger) override; + virtual void Print(FILE* out = stdout) override; + + static DebugCommand* Build(TokenVector&& args); + + static const char* kHelp; + static const char* kAliases[]; + static const char* kArguments; + + private: + Token* target_; + FormatToken* format_; + IntegerToken* count_; +}; + +// Commands which name does not match any of the known commnand. +class UnknownCommand : public DebugCommand { + public: + explicit UnknownCommand(TokenVector&& args) : args_(std::move(args)) {} + virtual ~UnknownCommand(); + + virtual bool Run(Debugger* debugger) override; + + private: + TokenVector args_; +}; + +// Commands which name match a known command but the syntax is invalid. +class InvalidCommand : public DebugCommand { + public: + InvalidCommand(TokenVector&& args, int index, const char* cause) + : args_(std::move(args)), index_(index), cause_(cause) {} + virtual ~InvalidCommand(); + + virtual bool Run(Debugger* debugger) override; + + private: + TokenVector args_; + int index_; + const char* cause_; +}; + +const char* HelpCommand::kAliases[] = { "help", NULL }; +const char* HelpCommand::kArguments = NULL; +const char* HelpCommand::kHelp = " Print this help."; + +const char* ContinueCommand::kAliases[] = { "continue", "c", NULL }; +const char* ContinueCommand::kArguments = NULL; +const char* ContinueCommand::kHelp = " Resume execution."; + +const char* StepCommand::kAliases[] = { "stepi", "si", NULL }; +const char* StepCommand::kArguments = "[n = 1]"; +const char* StepCommand::kHelp = " Execute n next instruction(s)."; + +const char* DisasmCommand::kAliases[] = { "disasm", "di", NULL }; +const char* DisasmCommand::kArguments = "[n = 10]"; +const char* DisasmCommand::kHelp = + " Disassemble n instruction(s) at pc.\n" + " This command is equivalent to x pc.i [n = 10]." +; + +const char* PrintCommand::kAliases[] = { "print", "p", NULL }; +const char* PrintCommand::kArguments = "<entity>[.format]"; +const char* PrintCommand::kHelp = + " Print the given entity according to the given format.\n" + " The format parameter only affects individual registers; it is ignored\n" + " for other entities.\n" + " <entity> can be one of the following:\n" + " * A register name (such as x0, s1, ...).\n" + " * 'regs', to print all integer (W and X) registers.\n" + " * 'fpregs' to print all floating-point (S and D) registers.\n" + " * 'sysregs' to print all system registers (including NZCV).\n" + " * 'pc' to print the current program counter.\n" +; + +const char* ExamineCommand::kAliases[] = { "m", "mem", "x", NULL }; +const char* ExamineCommand::kArguments = "<addr>[.format] [n = 10]"; +const char* ExamineCommand::kHelp = + " Examine memory. Print n items of memory at address <addr> according to\n" + " the given [.format].\n" + " Addr can be an immediate address, a register name or pc.\n" + " Format is made of a type letter: 'x' (hexadecimal), 's' (signed), 'u'\n" + " (unsigned), 'f' (floating point), i (instruction) and a size in bits\n" + " when appropriate (8, 16, 32, 64)\n" + " E.g 'x sp.x64' will print 10 64-bit words from the stack in\n" + " hexadecimal format." +; + +const char* RegisterToken::kXAliases[kNumberOfRegisters][kMaxAliasNumber] = { + { "x0", NULL }, + { "x1", NULL }, + { "x2", NULL }, + { "x3", NULL }, + { "x4", NULL }, + { "x5", NULL }, + { "x6", NULL }, + { "x7", NULL }, + { "x8", NULL }, + { "x9", NULL }, + { "x10", NULL }, + { "x11", NULL }, + { "x12", NULL }, + { "x13", NULL }, + { "x14", NULL }, + { "x15", NULL }, + { "ip0", "x16", NULL }, + { "ip1", "x17", NULL }, + { "x18", "pr", NULL }, + { "x19", NULL }, + { "x20", NULL }, + { "x21", NULL }, + { "x22", NULL }, + { "x23", NULL }, + { "x24", NULL }, + { "x25", NULL }, + { "x26", NULL }, + { "x27", NULL }, + { "x28", NULL }, + { "fp", "x29", NULL }, + { "lr", "x30", NULL }, + { "sp", NULL} +}; + +const char* RegisterToken::kWAliases[kNumberOfRegisters][kMaxAliasNumber] = { + { "w0", NULL }, + { "w1", NULL }, + { "w2", NULL }, + { "w3", NULL }, + { "w4", NULL }, + { "w5", NULL }, + { "w6", NULL }, + { "w7", NULL }, + { "w8", NULL }, + { "w9", NULL }, + { "w10", NULL }, + { "w11", NULL }, + { "w12", NULL }, + { "w13", NULL }, + { "w14", NULL }, + { "w15", NULL }, + { "w16", NULL }, + { "w17", NULL }, + { "w18", NULL }, + { "w19", NULL }, + { "w20", NULL }, + { "w21", NULL }, + { "w22", NULL }, + { "w23", NULL }, + { "w24", NULL }, + { "w25", NULL }, + { "w26", NULL }, + { "w27", NULL }, + { "w28", NULL }, + { "w29", NULL }, + { "w30", NULL }, + { "wsp", NULL } +}; + + +Debugger::Debugger(Decoder* decoder, FILE* stream) + : Simulator(decoder, stream), + debug_parameters_(DBG_INACTIVE), + pending_request_(false), + steps_(0), + last_command_(NULL) { + disasm_ = js_new<PrintDisassembler>(stdout); + printer_ = js_new<Decoder>(); + printer_->AppendVisitor(disasm_); +} + + +Debugger::~Debugger() { + js_delete(disasm_); + js_delete(printer_); +} + + +void Debugger::Run() { + pc_modified_ = false; + while (pc_ != kEndOfSimAddress) { + if (pending_request()) RunDebuggerShell(); + ExecuteInstruction(); + LogAllWrittenRegisters(); + } +} + + +void Debugger::PrintInstructions(const void* address, int64_t count) { + if (count == 0) { + return; + } + + const Instruction* from = Instruction::CastConst(address); + if (count < 0) { + count = -count; + from -= (count - 1) * kInstructionSize; + } + const Instruction* to = from + count * kInstructionSize; + + for (const Instruction* current = from; + current < to; + current = current->NextInstruction()) { + printer_->Decode(current); + } +} + + +void Debugger::PrintMemory(const uint8_t* address, + const FormatToken* format, + int64_t count) { + if (count == 0) { + return; + } + + const uint8_t* from = address; + int size = format->SizeOf(); + if (count < 0) { + count = -count; + from -= (count - 1) * size; + } + const uint8_t* to = from + count * size; + + for (const uint8_t* current = from; current < to; current += size) { + if (((current - from) % 8) == 0) { + printf("\n%p: ", current); + } + + uint64_t data = Memory::Read<uint64_t>(current); + format->PrintData(&data); + printf(" "); + } + printf("\n\n"); +} + + +void Debugger::PrintRegister(const Register& target_reg, + const char* name, + const FormatToken* format) { + const uint64_t reg_size = target_reg.size(); + const uint64_t format_size = format->SizeOf() * 8; + const uint64_t count = reg_size / format_size; + const uint64_t mask = 0xffffffffffffffff >> (64 - format_size); + const uint64_t reg_value = reg<uint64_t>(target_reg.code(), + Reg31IsStackPointer); + VIXL_ASSERT(count > 0); + + printf("%s = ", name); + for (uint64_t i = 1; i <= count; i++) { + uint64_t data = reg_value >> (reg_size - (i * format_size)); + data &= mask; + format->PrintData(&data); + printf(" "); + } + printf("\n"); +} + + +// TODO(all): fix this for vector registers. +void Debugger::PrintFPRegister(const FPRegister& target_fpreg, + const FormatToken* format) { + const unsigned fpreg_size = target_fpreg.size(); + const uint64_t format_size = format->SizeOf() * 8; + const uint64_t count = fpreg_size / format_size; + const uint64_t mask = 0xffffffffffffffff >> (64 - format_size); + const uint64_t fpreg_value = vreg<uint64_t>(fpreg_size, target_fpreg.code()); + VIXL_ASSERT(count > 0); + + if (target_fpreg.Is32Bits()) { + printf("s%u = ", target_fpreg.code()); + } else { + printf("d%u = ", target_fpreg.code()); + } + for (uint64_t i = 1; i <= count; i++) { + uint64_t data = fpreg_value >> (fpreg_size - (i * format_size)); + data &= mask; + format->PrintData(&data); + printf(" "); + } + printf("\n"); +} + + +void Debugger::VisitException(const Instruction* instr) { + switch (instr->Mask(ExceptionMask)) { + case BRK: + DoBreakpoint(instr); + return; + case HLT: + VIXL_FALLTHROUGH(); + default: Simulator::VisitException(instr); + } +} + + +// Read a command. A command will be at most kMaxDebugShellLine char long and +// ends with '\n\0'. +// TODO: Should this be a utility function? +char* Debugger::ReadCommandLine(const char* prompt, char* buffer, int length) { + int fgets_calls = 0; + char* end = NULL; + + printf("%s", prompt); + fflush(stdout); + + do { + if (fgets(buffer, length, stdin) == NULL) { + printf(" ** Error while reading command. **\n"); + return NULL; + } + + fgets_calls++; + end = strchr(buffer, '\n'); + } while (end == NULL); + + if (fgets_calls != 1) { + printf(" ** Command too long. **\n"); + return NULL; + } + + // Remove the newline from the end of the command. + VIXL_ASSERT(end[1] == '\0'); + VIXL_ASSERT((end - buffer) < (length - 1)); + end[0] = '\0'; + + return buffer; +} + + +void Debugger::RunDebuggerShell() { + if (IsDebuggerRunning()) { + if (steps_ > 0) { + // Finish stepping first. + --steps_; + return; + } + + printf("Next: "); + PrintInstructions(pc()); + bool done = false; + while (!done) { + char buffer[kMaxDebugShellLine]; + char* line = ReadCommandLine("vixl> ", buffer, kMaxDebugShellLine); + + if (line == NULL) continue; // An error occurred. + + DebugCommand* command = DebugCommand::Parse(line); + if (command != NULL) { + last_command_ = command; + } + + if (last_command_ != NULL) { + done = last_command_->Run(this); + } else { + printf("No previous command to run!\n"); + } + } + + if ((debug_parameters_ & DBG_BREAK) != 0) { + // The break request has now been handled, move to next instruction. + debug_parameters_ &= ~DBG_BREAK; + increment_pc(); + } + } +} + + +void Debugger::DoBreakpoint(const Instruction* instr) { + VIXL_ASSERT(instr->Mask(ExceptionMask) == BRK); + + printf("Hit breakpoint at pc=%p.\n", reinterpret_cast<const void*>(instr)); + set_debug_parameters(debug_parameters() | DBG_BREAK | DBG_ACTIVE); + // Make the shell point to the brk instruction. + set_pc(instr); +} + + +static bool StringToUInt64(uint64_t* value, const char* line, int base = 10) { + char* endptr = NULL; + errno = 0; // Reset errors. + uint64_t parsed = strtoul(line, &endptr, base); + + if (errno == ERANGE) { + // Overflow. + return false; + } + + if (endptr == line) { + // No digits were parsed. + return false; + } + + if (*endptr != '\0') { + // Non-digit characters present at the end. + return false; + } + + *value = parsed; + return true; +} + + +static bool StringToInt64(int64_t* value, const char* line, int base = 10) { + char* endptr = NULL; + errno = 0; // Reset errors. + int64_t parsed = strtol(line, &endptr, base); + + if (errno == ERANGE) { + // Overflow, undeflow. + return false; + } + + if (endptr == line) { + // No digits were parsed. + return false; + } + + if (*endptr != '\0') { + // Non-digit characters present at the end. + return false; + } + + *value = parsed; + return true; +} + + +Token* Token::Tokenize(const char* arg) { + if ((arg == NULL) || (*arg == '\0')) { + return NULL; + } + + // The order is important. For example Identifier::Tokenize would consider + // any register to be a valid identifier. + + Token* token = RegisterToken::Tokenize(arg); + if (token != NULL) { + return token; + } + + token = FPRegisterToken::Tokenize(arg); + if (token != NULL) { + return token; + } + + token = IdentifierToken::Tokenize(arg); + if (token != NULL) { + return token; + } + + token = AddressToken::Tokenize(arg); + if (token != NULL) { + return token; + } + + token = IntegerToken::Tokenize(arg); + if (token != NULL) { + return token; + } + + return js_new<UnknownToken>(arg); +} + + +uint8_t* RegisterToken::ToAddress(Debugger* debugger) const { + VIXL_ASSERT(CanAddressMemory()); + uint64_t reg_value = debugger->xreg(value().code(), Reg31IsStackPointer); + uint8_t* address = NULL; + memcpy(&address, ®_value, sizeof(address)); + return address; +} + + +void RegisterToken::Print(FILE* out) const { + VIXL_ASSERT(value().IsValid()); + fprintf(out, "[Register %s]", Name()); +} + + +const char* RegisterToken::Name() const { + if (value().Is32Bits()) { + return kWAliases[value().code()][0]; + } else { + return kXAliases[value().code()][0]; + } +} + + +Token* RegisterToken::Tokenize(const char* arg) { + for (unsigned i = 0; i < kNumberOfRegisters; i++) { + // Is it a X register or alias? + for (const char** current = kXAliases[i]; *current != NULL; current++) { + if (strcmp(arg, *current) == 0) { + return js_new<RegisterToken>(Register::XRegFromCode(i)); + } + } + + // Is it a W register or alias? + for (const char** current = kWAliases[i]; *current != NULL; current++) { + if (strcmp(arg, *current) == 0) { + return js_new<RegisterToken>(Register::WRegFromCode(i)); + } + } + } + + return NULL; +} + + +void FPRegisterToken::Print(FILE* out) const { + VIXL_ASSERT(value().IsValid()); + char prefix = value().Is32Bits() ? 's' : 'd'; + fprintf(out, "[FPRegister %c%" PRIu32 "]", prefix, value().code()); +} + + +Token* FPRegisterToken::Tokenize(const char* arg) { + if (strlen(arg) < 2) { + return NULL; + } + + switch (*arg) { + case 's': + case 'd': + const char* cursor = arg + 1; + uint64_t code = 0; + if (!StringToUInt64(&code, cursor)) { + return NULL; + } + + if (code > kNumberOfFPRegisters) { + return NULL; + } + + VRegister fpreg = NoVReg; + switch (*arg) { + case 's': + fpreg = VRegister::SRegFromCode(static_cast<unsigned>(code)); + break; + case 'd': + fpreg = VRegister::DRegFromCode(static_cast<unsigned>(code)); + break; + default: VIXL_UNREACHABLE(); + } + + return js_new<FPRegisterToken>(fpreg); + } + + return NULL; +} + + +uint8_t* IdentifierToken::ToAddress(Debugger* debugger) const { + VIXL_ASSERT(CanAddressMemory()); + const Instruction* pc_value = debugger->pc(); + uint8_t* address = NULL; + memcpy(&address, &pc_value, sizeof(address)); + return address; +} + +void IdentifierToken::Print(FILE* out) const { + fprintf(out, "[Identifier %s]", value()); +} + + +Token* IdentifierToken::Tokenize(const char* arg) { + if (!isalpha(arg[0])) { + return NULL; + } + + const char* cursor = arg + 1; + while ((*cursor != '\0') && isalnum(*cursor)) { + ++cursor; + } + + if (*cursor == '\0') { + return js_new<IdentifierToken>(arg); + } + + return NULL; +} + + +uint8_t* AddressToken::ToAddress(Debugger* debugger) const { + USE(debugger); + return value(); +} + + +void AddressToken::Print(FILE* out) const { + fprintf(out, "[Address %p]", value()); +} + + +Token* AddressToken::Tokenize(const char* arg) { + if ((strlen(arg) < 3) || (arg[0] != '0') || (arg[1] != 'x')) { + return NULL; + } + + uint64_t ptr = 0; + if (!StringToUInt64(&ptr, arg, 16)) { + return NULL; + } + + uint8_t* address = reinterpret_cast<uint8_t*>(ptr); + return js_new<AddressToken>(address); +} + + +void IntegerToken::Print(FILE* out) const { + fprintf(out, "[Integer %" PRId64 "]", value()); +} + + +Token* IntegerToken::Tokenize(const char* arg) { + int64_t value = 0; + if (!StringToInt64(&value, arg)) { + return NULL; + } + + return js_new<IntegerToken>(value); +} + + +Token* FormatToken::Tokenize(const char* arg) { + size_t length = strlen(arg); + switch (arg[0]) { + case 'x': + case 's': + case 'u': + case 'f': + if (length == 1) return NULL; + break; + case 'i': + if (length == 1) return js_new<Format<uint32_t>>("%08" PRIx32, 'i'); + VIXL_FALLTHROUGH(); + default: return NULL; + } + + char* endptr = NULL; + errno = 0; // Reset errors. + uint64_t count = strtoul(arg + 1, &endptr, 10); + + if (errno != 0) { + // Overflow, etc. + return NULL; + } + + if (endptr == arg) { + // No digits were parsed. + return NULL; + } + + if (*endptr != '\0') { + // There are unexpected (non-digit) characters after the number. + return NULL; + } + + switch (arg[0]) { + case 'x': + switch (count) { + case 8: return js_new<Format<uint8_t>>("%02" PRIx8, 'x'); + case 16: return js_new<Format<uint16_t>>("%04" PRIx16, 'x'); + case 32: return js_new<Format<uint32_t>>("%08" PRIx32, 'x'); + case 64: return js_new<Format<uint64_t>>("%016" PRIx64, 'x'); + default: return NULL; + } + case 's': + switch (count) { + case 8: return js_new<Format<int8_t>>("%4" PRId8, 's'); + case 16: return js_new<Format<int16_t>>("%6" PRId16, 's'); + case 32: return js_new<Format<int32_t>>("%11" PRId32, 's'); + case 64: return js_new<Format<int64_t>>("%20" PRId64, 's'); + default: return NULL; + } + case 'u': + switch (count) { + case 8: return js_new<Format<uint8_t>>("%3" PRIu8, 'u'); + case 16: return js_new<Format<uint16_t>>("%5" PRIu16, 'u'); + case 32: return js_new<Format<uint32_t>>("%10" PRIu32, 'u'); + case 64: return js_new<Format<uint64_t>>("%20" PRIu64, 'u'); + default: return NULL; + } + case 'f': + switch (count) { + case 32: return js_new<Format<float>>("%13g", 'f'); + case 64: return js_new<Format<double>>("%13g", 'f'); + default: return NULL; + } + default: + VIXL_UNREACHABLE(); + return NULL; + } +} + + +template<typename T> +void Format<T>::Print(FILE* out) const { + unsigned size = sizeof(T) * 8; + fprintf(out, "[Format %c%u - %s]", type_code_, size, fmt_); +} + + +void UnknownToken::Print(FILE* out) const { + fprintf(out, "[Unknown %s]", unknown_); +} + + +void DebugCommand::Print(FILE* out) { + fprintf(out, "%s", name()); +} + + +bool DebugCommand::Match(const char* name, const char** aliases) { + for (const char** current = aliases; *current != NULL; current++) { + if (strcmp(name, *current) == 0) { + return true; + } + } + + return false; +} + + +DebugCommand* DebugCommand::Parse(char* line) { + TokenVector args; + + for (char* chunk = strtok(line, " \t"); + chunk != NULL; + chunk = strtok(NULL, " \t")) { + char* dot = strchr(chunk, '.'); + if (dot != NULL) { + // 'Token.format'. + Token* format = FormatToken::Tokenize(dot + 1); + if (format != NULL) { + *dot = '\0'; + (void)args.append(Token::Tokenize(chunk)); + (void)args.append(format); + } else { + // Error while parsing the format, push the UnknownToken so an error + // can be accurately reported. + (void)args.append(Token::Tokenize(chunk)); + } + } else { + (void)args.append(Token::Tokenize(chunk)); + } + } + + if (args.empty()) { + return NULL; + } + + if (!args[0]->IsIdentifier()) { + return js_new<InvalidCommand>(std::move(args), 0, "command name is not valid"); + } + + const char* name = IdentifierToken::Cast(args[0])->value(); + #define RETURN_IF_MATCH(Command) \ + if (Match(name, Command::kAliases)) { \ + return Command::Build(std::move(args)); \ + } + DEBUG_COMMAND_LIST(RETURN_IF_MATCH); + #undef RETURN_IF_MATCH + + return js_new<UnknownCommand>(std::move(args)); +} + + +void DebugCommand::PrintHelp(const char** aliases, + const char* args, + const char* help) { + VIXL_ASSERT(aliases[0] != NULL); + VIXL_ASSERT(help != NULL); + + printf("\n----\n\n"); + for (const char** current = aliases; *current != NULL; current++) { + if (args != NULL) { + printf("%s %s\n", *current, args); + } else { + printf("%s\n", *current); + } + } + printf("\n%s\n", help); +} + + +bool HelpCommand::Run(Debugger* debugger) { + VIXL_ASSERT(debugger->IsDebuggerRunning()); + USE(debugger); + + #define PRINT_HELP(Command) \ + DebugCommand::PrintHelp(Command::kAliases, \ + Command::kArguments, \ + Command::kHelp); + DEBUG_COMMAND_LIST(PRINT_HELP); + #undef PRINT_HELP + printf("\n----\n\n"); + + return false; +} + + +DebugCommand* HelpCommand::Build(TokenVector&& args) { + if (args.length() != 1) { + return js_new<InvalidCommand>(std::move(args), -1, "too many arguments"); + } + + return js_new<HelpCommand>(args[0]); +} + + +bool ContinueCommand::Run(Debugger* debugger) { + VIXL_ASSERT(debugger->IsDebuggerRunning()); + + debugger->set_debug_parameters(debugger->debug_parameters() & ~DBG_ACTIVE); + return true; +} + + +DebugCommand* ContinueCommand::Build(TokenVector&& args) { + if (args.length() != 1) { + return js_new<InvalidCommand>(std::move(args), -1, "too many arguments"); + } + + return js_new<ContinueCommand>(args[0]); +} + + +bool StepCommand::Run(Debugger* debugger) { + VIXL_ASSERT(debugger->IsDebuggerRunning()); + + int64_t steps = count(); + if (steps < 0) { + printf(" ** invalid value for steps: %" PRId64 " (<0) **\n", steps); + } else if (steps > 1) { + debugger->set_steps(steps - 1); + } + + return true; +} + + +void StepCommand::Print(FILE* out) { + fprintf(out, "%s %" PRId64 "", name(), count()); +} + + +DebugCommand* StepCommand::Build(TokenVector&& args) { + IntegerToken* count = NULL; + switch (args.length()) { + case 1: { // step [1] + count = js_new<IntegerToken>(1); + break; + } + case 2: { // step n + Token* first = args[1]; + if (!first->IsInteger()) { + return js_new<InvalidCommand>(std::move(args), 1, "expects int"); + } + count = IntegerToken::Cast(first); + break; + } + default: + return js_new<InvalidCommand>(std::move(args), -1, "too many arguments"); + } + + return js_new<StepCommand>(args[0], count); +} + + +DebugCommand* DisasmCommand::Build(TokenVector&& args) { + IntegerToken* count = NULL; + switch (args.length()) { + case 1: { // disasm [10] + count = js_new<IntegerToken>(10); + break; + } + case 2: { // disasm n + Token* first = args[1]; + if (!first->IsInteger()) { + return js_new<InvalidCommand>(std::move(args), 1, "expects int"); + } + + count = IntegerToken::Cast(first); + break; + } + default: + return js_new<InvalidCommand>(std::move(args), -1, "too many arguments"); + } + + Token* target = js_new<IdentifierToken>("pc"); + FormatToken* format = js_new<Format<uint32_t>>("%08" PRIx32, 'i'); + return js_new<ExamineCommand>(args[0], target, format, count); +} + + +void PrintCommand::Print(FILE* out) { + fprintf(out, "%s ", name()); + target()->Print(out); + if (format() != NULL) format()->Print(out); +} + + +bool PrintCommand::Run(Debugger* debugger) { + VIXL_ASSERT(debugger->IsDebuggerRunning()); + + Token* tok = target(); + if (tok->IsIdentifier()) { + char* identifier = IdentifierToken::Cast(tok)->value(); + if (strcmp(identifier, "regs") == 0) { + debugger->PrintRegisters(); + } else if (strcmp(identifier, "fpregs") == 0) { + debugger->PrintVRegisters(); + } else if (strcmp(identifier, "sysregs") == 0) { + debugger->PrintSystemRegisters(); + } else if (strcmp(identifier, "pc") == 0) { + printf("pc = %16p\n", reinterpret_cast<const void*>(debugger->pc())); + } else { + printf(" ** Unknown identifier to print: %s **\n", identifier); + } + + return false; + } + + FormatToken* format_tok = format(); + VIXL_ASSERT(format_tok != NULL); + if (format_tok->type_code() == 'i') { + // TODO(all): Add support for instruction disassembly. + printf(" ** unsupported format: instructions **\n"); + return false; + } + + if (tok->IsRegister()) { + RegisterToken* reg_tok = RegisterToken::Cast(tok); + Register reg = reg_tok->value(); + debugger->PrintRegister(reg, reg_tok->Name(), format_tok); + return false; + } + + if (tok->IsFPRegister()) { + FPRegister fpreg = FPRegisterToken::Cast(tok)->value(); + debugger->PrintFPRegister(fpreg, format_tok); + return false; + } + + VIXL_UNREACHABLE(); + return false; +} + + +DebugCommand* PrintCommand::Build(TokenVector&& args) { + if (args.length() < 2) { + return js_new<InvalidCommand>(std::move(args), -1, "too few arguments"); + } + + Token* target = args[1]; + if (!target->IsRegister() && + !target->IsFPRegister() && + !target->IsIdentifier()) { + return js_new<InvalidCommand>(std::move(args), 1, "expects reg or identifier"); + } + + FormatToken* format = NULL; + int target_size = 0; + if (target->IsRegister()) { + Register reg = RegisterToken::Cast(target)->value(); + target_size = reg.SizeInBytes(); + } else if (target->IsFPRegister()) { + FPRegister fpreg = FPRegisterToken::Cast(target)->value(); + target_size = fpreg.SizeInBytes(); + } + // If the target is an identifier there must be no format. This is checked + // in the switch statement below. + + switch (args.length()) { + case 2: { + if (target->IsRegister()) { + switch (target_size) { + case 4: format = js_new<Format<uint32_t>>("%08" PRIx32, 'x'); break; + case 8: format = js_new<Format<uint64_t>>("%016" PRIx64, 'x'); break; + default: VIXL_UNREACHABLE(); + } + } else if (target->IsFPRegister()) { + switch (target_size) { + case 4: format = js_new<Format<float>>("%8g", 'f'); break; + case 8: format = js_new<Format<double>>("%8g", 'f'); break; + default: VIXL_UNREACHABLE(); + } + } + break; + } + case 3: { + if (target->IsIdentifier()) { + return js_new<InvalidCommand>(std::move(args), 2, + "format is only allowed with registers"); + } + + Token* second = args[2]; + if (!second->IsFormat()) { + return js_new<InvalidCommand>(std::move(args), 2, "expects format"); + } + format = FormatToken::Cast(second); + + if (format->SizeOf() > target_size) { + return js_new<InvalidCommand>(std::move(args), 2, "format too wide"); + } + + break; + } + default: + return js_new<InvalidCommand>(std::move(args), -1, "too many arguments"); + } + + return js_new<PrintCommand>(args[0], target, format); +} + + +bool ExamineCommand::Run(Debugger* debugger) { + VIXL_ASSERT(debugger->IsDebuggerRunning()); + + uint8_t* address = target()->ToAddress(debugger); + int64_t amount = count()->value(); + if (format()->type_code() == 'i') { + debugger->PrintInstructions(address, amount); + } else { + debugger->PrintMemory(address, format(), amount); + } + + return false; +} + + +void ExamineCommand::Print(FILE* out) { + fprintf(out, "%s ", name()); + format()->Print(out); + target()->Print(out); +} + + +DebugCommand* ExamineCommand::Build(TokenVector&& args) { + if (args.length() < 2) { + return js_new<InvalidCommand>(std::move(args), -1, "too few arguments"); + } + + Token* target = args[1]; + if (!target->CanAddressMemory()) { + return js_new<InvalidCommand>(std::move(args), 1, "expects address"); + } + + FormatToken* format = NULL; + IntegerToken* count = NULL; + + switch (args.length()) { + case 2: { // mem addr[.x64] [10] + format = js_new<Format<uint64_t>>("%016" PRIx64, 'x'); + count = js_new<IntegerToken>(10); + break; + } + case 3: { // mem addr.format [10] + // mem addr[.x64] n + Token* second = args[2]; + if (second->IsFormat()) { + format = FormatToken::Cast(second); + count = js_new<IntegerToken>(10); + break; + } else if (second->IsInteger()) { + format = js_new<Format<uint64_t>>("%016" PRIx64, 'x'); + count = IntegerToken::Cast(second); + } else { + return js_new<InvalidCommand>(std::move(args), 2, "expects format or integer"); + } + VIXL_UNREACHABLE(); + break; + } + case 4: { // mem addr.format n + Token* second = args[2]; + Token* third = args[3]; + if (!second->IsFormat() || !third->IsInteger()) { + return js_new<InvalidCommand>(std::move(args), -1, "expects addr[.format] [n]"); + } + format = FormatToken::Cast(second); + count = IntegerToken::Cast(third); + break; + } + default: + return js_new<InvalidCommand>(std::move(args), -1, "too many arguments"); + } + + return js_new<ExamineCommand>(args[0], target, format, count); +} + + +UnknownCommand::~UnknownCommand() { + const size_t size = args_.length(); + for (size_t i = 0; i < size; ++i) { + js_delete(args_[i]); + } +} + + +bool UnknownCommand::Run(Debugger* debugger) { + VIXL_ASSERT(debugger->IsDebuggerRunning()); + USE(debugger); + + printf(" ** Unknown Command:"); + const size_t size = args_.length(); + for (size_t i = 0; i < size; ++i) { + printf(" "); + args_[i]->Print(stdout); + } + printf(" **\n"); + + return false; +} + + +InvalidCommand::~InvalidCommand() { + const size_t size = args_.length(); + for (size_t i = 0; i < size; ++i) { + js_delete(args_[i]); + } +} + + +bool InvalidCommand::Run(Debugger* debugger) { + VIXL_ASSERT(debugger->IsDebuggerRunning()); + USE(debugger); + + printf(" ** Invalid Command:"); + const size_t size = args_.length(); + for (size_t i = 0; i < size; ++i) { + printf(" "); + if (i == static_cast<size_t>(index_)) { + printf(">>"); + args_[i]->Print(stdout); + printf("<<"); + } else { + args_[i]->Print(stdout); + } + } + printf(" **\n"); + printf(" ** %s\n", cause_); + + return false; +} + +} // namespace vixl + +#endif // JS_SIMULATOR_ARM64 diff --git a/js/src/jit/arm64/vixl/Debugger-vixl.h b/js/src/jit/arm64/vixl/Debugger-vixl.h new file mode 100644 index 0000000000..7236bf1e5e --- /dev/null +++ b/js/src/jit/arm64/vixl/Debugger-vixl.h @@ -0,0 +1,117 @@ +// Copyright 2014, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifdef JS_SIMULATOR_ARM64 + +#ifndef VIXL_A64_DEBUGGER_A64_H_ +#define VIXL_A64_DEBUGGER_A64_H_ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> + +#include "jit/arm64/vixl/Constants-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Simulator-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" + +namespace vixl { + +// Flags that represent the debugger state. +enum DebugParameters { + DBG_INACTIVE = 0, + DBG_ACTIVE = 1 << 0, // The debugger is active. + DBG_BREAK = 1 << 1 // The debugger is at a breakpoint. +}; + +// Forward declarations. +class DebugCommand; +class Token; +class FormatToken; + +class Debugger : public Simulator { + public: + explicit Debugger(Decoder* decoder, FILE* stream = stdout); + ~Debugger(); + + virtual void Run() override; + virtual void VisitException(const Instruction* instr) override; + + int debug_parameters() const { return debug_parameters_; } + void set_debug_parameters(int parameters) { + debug_parameters_ = parameters; + + update_pending_request(); + } + + // Numbers of instructions to execute before the debugger shell is given + // back control. + int64_t steps() const { return steps_; } + void set_steps(int64_t value) { + VIXL_ASSERT(value > 1); + steps_ = value; + } + + bool IsDebuggerRunning() const { + return (debug_parameters_ & DBG_ACTIVE) != 0; + } + + bool pending_request() const { return pending_request_; } + void update_pending_request() { + pending_request_ = IsDebuggerRunning(); + } + + void PrintInstructions(const void* address, int64_t count = 1); + void PrintMemory(const uint8_t* address, + const FormatToken* format, + int64_t count = 1); + void PrintRegister(const Register& target_reg, + const char* name, + const FormatToken* format); + void PrintFPRegister(const FPRegister& target_fpreg, + const FormatToken* format); + + private: + char* ReadCommandLine(const char* prompt, char* buffer, int length); + void RunDebuggerShell(); + void DoBreakpoint(const Instruction* instr); + + int debug_parameters_; + bool pending_request_; + int64_t steps_; + DebugCommand* last_command_; + PrintDisassembler* disasm_; + Decoder* printer_; + + // Length of the biggest command line accepted by the debugger shell. + static const int kMaxDebugShellLine = 256; +}; + +} // namespace vixl + +#endif // VIXL_A64_DEBUGGER_A64_H_ + +#endif // JS_SIMULATOR_ARM64 diff --git a/js/src/jit/arm64/vixl/Decoder-vixl.cpp b/js/src/jit/arm64/vixl/Decoder-vixl.cpp new file mode 100644 index 0000000000..884654ec8e --- /dev/null +++ b/js/src/jit/arm64/vixl/Decoder-vixl.cpp @@ -0,0 +1,899 @@ +// Copyright 2014, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Decoder-vixl.h" + +#include <algorithm> + +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" + +namespace vixl { + +void Decoder::DecodeInstruction(const Instruction *instr) { + if (instr->Bits(28, 27) == 0) { + VisitUnallocated(instr); + } else { + switch (instr->Bits(27, 24)) { + // 0: PC relative addressing. + case 0x0: DecodePCRelAddressing(instr); break; + + // 1: Add/sub immediate. + case 0x1: DecodeAddSubImmediate(instr); break; + + // A: Logical shifted register. + // Add/sub with carry. + // Conditional compare register. + // Conditional compare immediate. + // Conditional select. + // Data processing 1 source. + // Data processing 2 source. + // B: Add/sub shifted register. + // Add/sub extended register. + // Data processing 3 source. + case 0xA: + case 0xB: DecodeDataProcessing(instr); break; + + // 2: Logical immediate. + // Move wide immediate. + case 0x2: DecodeLogical(instr); break; + + // 3: Bitfield. + // Extract. + case 0x3: DecodeBitfieldExtract(instr); break; + + // 4: Unconditional branch immediate. + // Exception generation. + // Compare and branch immediate. + // 5: Compare and branch immediate. + // Conditional branch. + // System. + // 6,7: Unconditional branch. + // Test and branch immediate. + case 0x4: + case 0x5: + case 0x6: + case 0x7: DecodeBranchSystemException(instr); break; + + // 8,9: Load/store register pair post-index. + // Load register literal. + // Load/store register unscaled immediate. + // Load/store register immediate post-index. + // Load/store register immediate pre-index. + // Load/store register offset. + // Load/store exclusive. + // C,D: Load/store register pair offset. + // Load/store register pair pre-index. + // Load/store register unsigned immediate. + // Advanced SIMD. + case 0x8: + case 0x9: + case 0xC: + case 0xD: DecodeLoadStore(instr); break; + + // E: FP fixed point conversion. + // FP integer conversion. + // FP data processing 1 source. + // FP compare. + // FP immediate. + // FP data processing 2 source. + // FP conditional compare. + // FP conditional select. + // Advanced SIMD. + // F: FP data processing 3 source. + // Advanced SIMD. + case 0xE: + case 0xF: DecodeFP(instr); break; + } + } +} + +void Decoder::AppendVisitor(DecoderVisitor* new_visitor) { + MOZ_ALWAYS_TRUE(visitors_.append(new_visitor)); +} + + +void Decoder::PrependVisitor(DecoderVisitor* new_visitor) { + MOZ_ALWAYS_TRUE(visitors_.insert(visitors_.begin(), new_visitor)); +} + + +void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor, + DecoderVisitor* registered_visitor) { + for (auto it = visitors_.begin(); it != visitors_.end(); it++) { + if (*it == registered_visitor) { + MOZ_ALWAYS_TRUE(visitors_.insert(it, new_visitor)); + return; + } + } + // We reached the end of the list without finding registered_visitor. + MOZ_ALWAYS_TRUE(visitors_.append(new_visitor)); +} + + +void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor, + DecoderVisitor* registered_visitor) { + for (auto it = visitors_.begin(); it != visitors_.end(); it++) { + if (*it == registered_visitor) { + it++; + MOZ_ALWAYS_TRUE(visitors_.insert(it, new_visitor)); + return; + } + } + // We reached the end of the list without finding registered_visitor. + MOZ_ALWAYS_TRUE(visitors_.append(new_visitor)); +} + + +void Decoder::RemoveVisitor(DecoderVisitor* visitor) { + visitors_.erase(std::remove(visitors_.begin(), visitors_.end(), visitor), + visitors_.end()); +} + + +void Decoder::DecodePCRelAddressing(const Instruction* instr) { + VIXL_ASSERT(instr->Bits(27, 24) == 0x0); + // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level + // decode. + VIXL_ASSERT(instr->Bit(28) == 0x1); + VisitPCRelAddressing(instr); +} + + +void Decoder::DecodeBranchSystemException(const Instruction* instr) { + VIXL_ASSERT((instr->Bits(27, 24) == 0x4) || + (instr->Bits(27, 24) == 0x5) || + (instr->Bits(27, 24) == 0x6) || + (instr->Bits(27, 24) == 0x7) ); + + switch (instr->Bits(31, 29)) { + case 0: + case 4: { + VisitUnconditionalBranch(instr); + break; + } + case 1: + case 5: { + if (instr->Bit(25) == 0) { + VisitCompareBranch(instr); + } else { + VisitTestBranch(instr); + } + break; + } + case 2: { + if (instr->Bit(25) == 0) { + if ((instr->Bit(24) == 0x1) || + (instr->Mask(0x01000010) == 0x00000010)) { + VisitUnallocated(instr); + } else { + VisitConditionalBranch(instr); + } + } else { + VisitUnallocated(instr); + } + break; + } + case 6: { + if (instr->Bit(25) == 0) { + if (instr->Bit(24) == 0) { + if ((instr->Bits(4, 2) != 0) || + (instr->Mask(0x00E0001D) == 0x00200001) || + (instr->Mask(0x00E0001D) == 0x00400001) || + (instr->Mask(0x00E0001E) == 0x00200002) || + (instr->Mask(0x00E0001E) == 0x00400002) || + (instr->Mask(0x00E0001C) == 0x00600000) || + (instr->Mask(0x00E0001C) == 0x00800000) || + (instr->Mask(0x00E0001F) == 0x00A00000) || + (instr->Mask(0x00C0001C) == 0x00C00000)) { + if (instr->InstructionBits() == UNDEFINED_INST_PATTERN) { + VisitException(instr); + } else { + VisitUnallocated(instr); + } + } else { + VisitException(instr); + } + } else { + if (instr->Bits(23, 22) == 0) { + const Instr masked_003FF0E0 = instr->Mask(0x003FF0E0); + if ((instr->Bits(21, 19) == 0x4) || + (masked_003FF0E0 == 0x00033000) || + (masked_003FF0E0 == 0x003FF020) || + (masked_003FF0E0 == 0x003FF060) || + (masked_003FF0E0 == 0x003FF0E0) || + (instr->Mask(0x00388000) == 0x00008000) || + (instr->Mask(0x0038E000) == 0x00000000) || + (instr->Mask(0x0039E000) == 0x00002000) || + (instr->Mask(0x003AE000) == 0x00002000) || + (instr->Mask(0x003CE000) == 0x00042000) || + (instr->Mask(0x003FFFC0) == 0x000320C0) || + (instr->Mask(0x003FF100) == 0x00032100) || + // (instr->Mask(0x003FF200) == 0x00032200) || // match CSDB + (instr->Mask(0x003FF400) == 0x00032400) || + (instr->Mask(0x003FF800) == 0x00032800) || + (instr->Mask(0x0038F000) == 0x00005000) || + (instr->Mask(0x0038E000) == 0x00006000)) { + VisitUnallocated(instr); + } else { + VisitSystem(instr); + } + } else { + VisitUnallocated(instr); + } + } + } else { + if ((instr->Bit(24) == 0x1) || + (instr->Bits(20, 16) != 0x1F) || + (instr->Bits(15, 10) != 0) || + (instr->Bits(4, 0) != 0) || + (instr->Bits(24, 21) == 0x3) || + (instr->Bits(24, 22) == 0x3)) { + VisitUnallocated(instr); + } else { + VisitUnconditionalBranchToRegister(instr); + } + } + break; + } + case 3: + case 7: { + VisitUnallocated(instr); + break; + } + } +} + + +void Decoder::DecodeLoadStore(const Instruction* instr) { + VIXL_ASSERT((instr->Bits(27, 24) == 0x8) || + (instr->Bits(27, 24) == 0x9) || + (instr->Bits(27, 24) == 0xC) || + (instr->Bits(27, 24) == 0xD) ); + // TODO(all): rearrange the tree to integrate this branch. + if ((instr->Bit(28) == 0) && (instr->Bit(29) == 0) && (instr->Bit(26) == 1)) { + DecodeNEONLoadStore(instr); + return; + } + + if (instr->Bit(24) == 0) { + if (instr->Bit(28) == 0) { + if (instr->Bit(29) == 0) { + if (instr->Bit(26) == 0) { + VisitLoadStoreExclusive(instr); + } else { + VIXL_UNREACHABLE(); + } + } else { + if ((instr->Bits(31, 30) == 0x3) || + (instr->Mask(0xC4400000) == 0x40000000)) { + VisitUnallocated(instr); + } else { + if (instr->Bit(23) == 0) { + if (instr->Mask(0xC4400000) == 0xC0400000) { + VisitUnallocated(instr); + } else { + VisitLoadStorePairNonTemporal(instr); + } + } else { + VisitLoadStorePairPostIndex(instr); + } + } + } + } else { + if (instr->Bit(29) == 0) { + if (instr->Mask(0xC4000000) == 0xC4000000) { + VisitUnallocated(instr); + } else { + VisitLoadLiteral(instr); + } + } else { + if ((instr->Mask(0x44800000) == 0x44800000) || + (instr->Mask(0x84800000) == 0x84800000)) { + VisitUnallocated(instr); + } else { + if (instr->Bit(21) == 0) { + switch (instr->Bits(11, 10)) { + case 0: { + VisitLoadStoreUnscaledOffset(instr); + break; + } + case 1: { + if (instr->Mask(0xC4C00000) == 0xC0800000) { + VisitUnallocated(instr); + } else { + VisitLoadStorePostIndex(instr); + } + break; + } + case 2: { + // TODO: VisitLoadStoreRegisterOffsetUnpriv. + VisitUnimplemented(instr); + break; + } + case 3: { + if (instr->Mask(0xC4C00000) == 0xC0800000) { + VisitUnallocated(instr); + } else { + VisitLoadStorePreIndex(instr); + } + break; + } + } + } else { + if (instr->Bits(11, 10) == 0x2) { + if (instr->Bit(14) == 0) { + VisitUnallocated(instr); + } else { + VisitLoadStoreRegisterOffset(instr); + } + } else { + if (instr->Bits(11, 10) == 0x0) { + if (instr->Bit(25) == 0) { + if (instr->Bit(26) == 0) { + if ((instr->Bit(15) == 1) && + ((instr->Bits(14, 12) == 0x1) || + (instr->Bit(13) == 1) || + (instr->Bits(14, 12) == 0x5) || + ((instr->Bits(14, 12) == 0x4) && + ((instr->Bit(23) == 0) || + (instr->Bits(23, 22) == 0x3))))) { + VisitUnallocated(instr); + } else { + VisitAtomicMemory(instr); + } + } else { + VisitUnallocated(instr); + } + } else { + VisitUnallocated(instr); + } + } else { + VisitUnallocated(instr); + } + } + } + } + } + } + } else { + if (instr->Bit(28) == 0) { + if (instr->Bit(29) == 0) { + VisitUnallocated(instr); + } else { + if ((instr->Bits(31, 30) == 0x3) || + (instr->Mask(0xC4400000) == 0x40000000)) { + VisitUnallocated(instr); + } else { + if (instr->Bit(23) == 0) { + VisitLoadStorePairOffset(instr); + } else { + VisitLoadStorePairPreIndex(instr); + } + } + } + } else { + if (instr->Bit(29) == 0) { + VisitUnallocated(instr); + } else { + if ((instr->Mask(0x84C00000) == 0x80C00000) || + (instr->Mask(0x44800000) == 0x44800000) || + (instr->Mask(0x84800000) == 0x84800000)) { + VisitUnallocated(instr); + } else { + VisitLoadStoreUnsignedOffset(instr); + } + } + } + } +} + + +void Decoder::DecodeLogical(const Instruction* instr) { + VIXL_ASSERT(instr->Bits(27, 24) == 0x2); + + if (instr->Mask(0x80400000) == 0x00400000) { + VisitUnallocated(instr); + } else { + if (instr->Bit(23) == 0) { + VisitLogicalImmediate(instr); + } else { + if (instr->Bits(30, 29) == 0x1) { + VisitUnallocated(instr); + } else { + VisitMoveWideImmediate(instr); + } + } + } +} + + +void Decoder::DecodeBitfieldExtract(const Instruction* instr) { + VIXL_ASSERT(instr->Bits(27, 24) == 0x3); + + if ((instr->Mask(0x80400000) == 0x80000000) || + (instr->Mask(0x80400000) == 0x00400000) || + (instr->Mask(0x80008000) == 0x00008000)) { + VisitUnallocated(instr); + } else if (instr->Bit(23) == 0) { + if ((instr->Mask(0x80200000) == 0x00200000) || + (instr->Mask(0x60000000) == 0x60000000)) { + VisitUnallocated(instr); + } else { + VisitBitfield(instr); + } + } else { + if ((instr->Mask(0x60200000) == 0x00200000) || + (instr->Mask(0x60000000) != 0x00000000)) { + VisitUnallocated(instr); + } else { + VisitExtract(instr); + } + } +} + + +void Decoder::DecodeAddSubImmediate(const Instruction* instr) { + VIXL_ASSERT(instr->Bits(27, 24) == 0x1); + if (instr->Bit(23) == 1) { + VisitUnallocated(instr); + } else { + VisitAddSubImmediate(instr); + } +} + + +void Decoder::DecodeDataProcessing(const Instruction* instr) { + VIXL_ASSERT((instr->Bits(27, 24) == 0xA) || + (instr->Bits(27, 24) == 0xB)); + + if (instr->Bit(24) == 0) { + if (instr->Bit(28) == 0) { + if (instr->Mask(0x80008000) == 0x00008000) { + VisitUnallocated(instr); + } else { + VisitLogicalShifted(instr); + } + } else { + switch (instr->Bits(23, 21)) { + case 0: { + if (instr->Mask(0x0000FC00) != 0) { + VisitUnallocated(instr); + } else { + VisitAddSubWithCarry(instr); + } + break; + } + case 2: { + if ((instr->Bit(29) == 0) || + (instr->Mask(0x00000410) != 0)) { + VisitUnallocated(instr); + } else { + if (instr->Bit(11) == 0) { + VisitConditionalCompareRegister(instr); + } else { + VisitConditionalCompareImmediate(instr); + } + } + break; + } + case 4: { + if (instr->Mask(0x20000800) != 0x00000000) { + VisitUnallocated(instr); + } else { + VisitConditionalSelect(instr); + } + break; + } + case 6: { + if (instr->Bit(29) == 0x1) { + VisitUnallocated(instr); + VIXL_FALLTHROUGH(); + } else { + if (instr->Bit(30) == 0) { + if ((instr->Bit(15) == 0x1) || + (instr->Bits(15, 11) == 0) || + (instr->Bits(15, 12) == 0x1) || + (instr->Bits(15, 12) == 0x3) || + (instr->Bits(15, 13) == 0x3) || + (instr->Mask(0x8000EC00) == 0x00004C00) || + (instr->Mask(0x8000E800) == 0x80004000) || + (instr->Mask(0x8000E400) == 0x80004000)) { + VisitUnallocated(instr); + } else { + VisitDataProcessing2Source(instr); + } + } else { + if ((instr->Bit(13) == 1) || + (instr->Bits(20, 16) != 0) || + (instr->Bits(15, 14) != 0) || + (instr->Mask(0xA01FFC00) == 0x00000C00) || + (instr->Mask(0x201FF800) == 0x00001800)) { + VisitUnallocated(instr); + } else { + VisitDataProcessing1Source(instr); + } + } + break; + } + } + case 1: + case 3: + case 5: + case 7: VisitUnallocated(instr); break; + } + } + } else { + if (instr->Bit(28) == 0) { + if (instr->Bit(21) == 0) { + if ((instr->Bits(23, 22) == 0x3) || + (instr->Mask(0x80008000) == 0x00008000)) { + VisitUnallocated(instr); + } else { + VisitAddSubShifted(instr); + } + } else { + if ((instr->Mask(0x00C00000) != 0x00000000) || + (instr->Mask(0x00001400) == 0x00001400) || + (instr->Mask(0x00001800) == 0x00001800)) { + VisitUnallocated(instr); + } else { + VisitAddSubExtended(instr); + } + } + } else { + if ((instr->Bit(30) == 0x1) || + (instr->Bits(30, 29) == 0x1) || + (instr->Mask(0xE0600000) == 0x00200000) || + (instr->Mask(0xE0608000) == 0x00400000) || + (instr->Mask(0x60608000) == 0x00408000) || + (instr->Mask(0x60E00000) == 0x00E00000) || + (instr->Mask(0x60E00000) == 0x00800000) || + (instr->Mask(0x60E00000) == 0x00600000)) { + VisitUnallocated(instr); + } else { + VisitDataProcessing3Source(instr); + } + } + } +} + + +void Decoder::DecodeFP(const Instruction* instr) { + VIXL_ASSERT((instr->Bits(27, 24) == 0xE) || + (instr->Bits(27, 24) == 0xF)); + if (instr->Bit(28) == 0) { + DecodeNEONVectorDataProcessing(instr); + } else { + if (instr->Bits(31, 30) == 0x3) { + VisitUnallocated(instr); + } else if (instr->Bits(31, 30) == 0x1) { + DecodeNEONScalarDataProcessing(instr); + } else { + if (instr->Bit(29) == 0) { + if (instr->Bit(24) == 0) { + if (instr->Bit(21) == 0) { + if ((instr->Bit(23) == 1) || + (instr->Bit(18) == 1) || + (instr->Mask(0x80008000) == 0x00000000) || + (instr->Mask(0x000E0000) == 0x00000000) || + (instr->Mask(0x000E0000) == 0x000A0000) || + (instr->Mask(0x00160000) == 0x00000000) || + (instr->Mask(0x00160000) == 0x00120000)) { + VisitUnallocated(instr); + } else { + VisitFPFixedPointConvert(instr); + } + } else { + if (instr->Bits(15, 10) == 32) { + VisitUnallocated(instr); + } else if (instr->Bits(15, 10) == 0) { + if ((instr->Bits(23, 22) == 0x3) || + (instr->Mask(0x000E0000) == 0x000A0000) || + (instr->Mask(0x000E0000) == 0x000C0000) || + (instr->Mask(0x00160000) == 0x00120000) || + (instr->Mask(0x00160000) == 0x00140000) || + (instr->Mask(0x20C40000) == 0x00800000) || + (instr->Mask(0x20C60000) == 0x00840000) || + (instr->Mask(0xA0C60000) == 0x80060000) || + (instr->Mask(0xA0C60000) == 0x00860000) || + (instr->Mask(0xA0C60000) == 0x00460000) || + (instr->Mask(0xA0CE0000) == 0x80860000) || + (instr->Mask(0xA0CE0000) == 0x804E0000) || + (instr->Mask(0xA0CE0000) == 0x000E0000) || + (instr->Mask(0xA0D60000) == 0x00160000) || + (instr->Mask(0xA0D60000) == 0x80560000) || + (instr->Mask(0xA0D60000) == 0x80960000)) { + VisitUnallocated(instr); + } else { + VisitFPIntegerConvert(instr); + } + } else if (instr->Bits(14, 10) == 16) { + const Instr masked_A0DF8000 = instr->Mask(0xA0DF8000); + if ((instr->Mask(0x80180000) != 0) || + (masked_A0DF8000 == 0x00020000) || + (masked_A0DF8000 == 0x00030000) || + (masked_A0DF8000 == 0x00068000) || + (masked_A0DF8000 == 0x00428000) || + (masked_A0DF8000 == 0x00430000) || + (masked_A0DF8000 == 0x00468000) || + (instr->Mask(0xA0D80000) == 0x00800000) || + (instr->Mask(0xA0DE0000) == 0x00C00000) || + (instr->Mask(0xA0DF0000) == 0x00C30000) || + (instr->Mask(0xA0DC0000) == 0x00C40000)) { + VisitUnallocated(instr); + } else { + VisitFPDataProcessing1Source(instr); + } + } else if (instr->Bits(13, 10) == 8) { + if ((instr->Bits(15, 14) != 0) || + (instr->Bits(2, 0) != 0) || + (instr->Mask(0x80800000) != 0x00000000)) { + VisitUnallocated(instr); + } else { + VisitFPCompare(instr); + } + } else if (instr->Bits(12, 10) == 4) { + if ((instr->Bits(9, 5) != 0) || + (instr->Mask(0x80800000) != 0x00000000)) { + VisitUnallocated(instr); + } else { + VisitFPImmediate(instr); + } + } else { + if (instr->Mask(0x80800000) != 0x00000000) { + VisitUnallocated(instr); + } else { + switch (instr->Bits(11, 10)) { + case 1: { + VisitFPConditionalCompare(instr); + break; + } + case 2: { + if ((instr->Bits(15, 14) == 0x3) || + (instr->Mask(0x00009000) == 0x00009000) || + (instr->Mask(0x0000A000) == 0x0000A000)) { + VisitUnallocated(instr); + } else { + VisitFPDataProcessing2Source(instr); + } + break; + } + case 3: { + VisitFPConditionalSelect(instr); + break; + } + default: VIXL_UNREACHABLE(); + } + } + } + } + } else { + // Bit 30 == 1 has been handled earlier. + VIXL_ASSERT(instr->Bit(30) == 0); + if (instr->Mask(0xA0800000) != 0) { + VisitUnallocated(instr); + } else { + VisitFPDataProcessing3Source(instr); + } + } + } else { + VisitUnallocated(instr); + } + } + } +} + + +void Decoder::DecodeNEONLoadStore(const Instruction* instr) { + VIXL_ASSERT(instr->Bits(29, 25) == 0x6); + if (instr->Bit(31) == 0) { + if ((instr->Bit(24) == 0) && (instr->Bit(21) == 1)) { + VisitUnallocated(instr); + return; + } + + if (instr->Bit(23) == 0) { + if (instr->Bits(20, 16) == 0) { + if (instr->Bit(24) == 0) { + VisitNEONLoadStoreMultiStruct(instr); + } else { + VisitNEONLoadStoreSingleStruct(instr); + } + } else { + VisitUnallocated(instr); + } + } else { + if (instr->Bit(24) == 0) { + VisitNEONLoadStoreMultiStructPostIndex(instr); + } else { + VisitNEONLoadStoreSingleStructPostIndex(instr); + } + } + } else { + VisitUnallocated(instr); + } +} + + +void Decoder::DecodeNEONVectorDataProcessing(const Instruction* instr) { + VIXL_ASSERT(instr->Bits(28, 25) == 0x7); + if (instr->Bit(31) == 0) { + if (instr->Bit(24) == 0) { + if (instr->Bit(21) == 0) { + if (instr->Bit(15) == 0) { + if (instr->Bit(10) == 0) { + if (instr->Bit(29) == 0) { + if (instr->Bit(11) == 0) { + VisitNEONTable(instr); + } else { + VisitNEONPerm(instr); + } + } else { + VisitNEONExtract(instr); + } + } else { + if (instr->Bits(23, 22) == 0) { + VisitNEONCopy(instr); + } else { + VisitUnallocated(instr); + } + } + } else { + VisitUnallocated(instr); + } + } else { + if (instr->Bit(10) == 0) { + if (instr->Bit(11) == 0) { + VisitNEON3Different(instr); + } else { + if (instr->Bits(18, 17) == 0) { + if (instr->Bit(20) == 0) { + if (instr->Bit(19) == 0) { + VisitNEON2RegMisc(instr); + } else { + if (instr->Bits(30, 29) == 0x2) { + VisitCryptoAES(instr); + } else { + VisitUnallocated(instr); + } + } + } else { + if (instr->Bit(19) == 0) { + VisitNEONAcrossLanes(instr); + } else { + VisitUnallocated(instr); + } + } + } else { + VisitUnallocated(instr); + } + } + } else { + VisitNEON3Same(instr); + } + } + } else { + if (instr->Bit(10) == 0) { + VisitNEONByIndexedElement(instr); + } else { + if (instr->Bit(23) == 0) { + if (instr->Bits(22, 19) == 0) { + VisitNEONModifiedImmediate(instr); + } else { + VisitNEONShiftImmediate(instr); + } + } else { + VisitUnallocated(instr); + } + } + } + } else { + VisitUnallocated(instr); + } +} + + +void Decoder::DecodeNEONScalarDataProcessing(const Instruction* instr) { + VIXL_ASSERT(instr->Bits(28, 25) == 0xF); + if (instr->Bit(24) == 0) { + if (instr->Bit(21) == 0) { + if (instr->Bit(15) == 0) { + if (instr->Bit(10) == 0) { + if (instr->Bit(29) == 0) { + if (instr->Bit(11) == 0) { + VisitCrypto3RegSHA(instr); + } else { + VisitUnallocated(instr); + } + } else { + VisitUnallocated(instr); + } + } else { + if (instr->Bits(23, 22) == 0) { + VisitNEONScalarCopy(instr); + } else { + VisitUnallocated(instr); + } + } + } else { + VisitUnallocated(instr); + } + } else { + if (instr->Bit(10) == 0) { + if (instr->Bit(11) == 0) { + VisitNEONScalar3Diff(instr); + } else { + if (instr->Bits(18, 17) == 0) { + if (instr->Bit(20) == 0) { + if (instr->Bit(19) == 0) { + VisitNEONScalar2RegMisc(instr); + } else { + if (instr->Bit(29) == 0) { + VisitCrypto2RegSHA(instr); + } else { + VisitUnallocated(instr); + } + } + } else { + if (instr->Bit(19) == 0) { + VisitNEONScalarPairwise(instr); + } else { + VisitUnallocated(instr); + } + } + } else { + VisitUnallocated(instr); + } + } + } else { + VisitNEONScalar3Same(instr); + } + } + } else { + if (instr->Bit(10) == 0) { + VisitNEONScalarByIndexedElement(instr); + } else { + if (instr->Bit(23) == 0) { + VisitNEONScalarShiftImmediate(instr); + } else { + VisitUnallocated(instr); + } + } + } +} + + +#define DEFINE_VISITOR_CALLERS(A) \ + void Decoder::Visit##A(const Instruction *instr) { \ + VIXL_ASSERT(instr->Mask(A##FMask) == A##Fixed); \ + for (auto visitor : visitors_) { \ + visitor->Visit##A(instr); \ + } \ + } +VISITOR_LIST(DEFINE_VISITOR_CALLERS) +#undef DEFINE_VISITOR_CALLERS +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/Decoder-vixl.h b/js/src/jit/arm64/vixl/Decoder-vixl.h new file mode 100644 index 0000000000..1b3cf172ac --- /dev/null +++ b/js/src/jit/arm64/vixl/Decoder-vixl.h @@ -0,0 +1,276 @@ +// Copyright 2014, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_DECODER_A64_H_ +#define VIXL_A64_DECODER_A64_H_ + +#include "mozilla/Vector.h" + +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Instructions-vixl.h" +#include "js/AllocPolicy.h" + + +// List macro containing all visitors needed by the decoder class. + +#define VISITOR_LIST_THAT_RETURN(V) \ + V(PCRelAddressing) \ + V(AddSubImmediate) \ + V(LogicalImmediate) \ + V(MoveWideImmediate) \ + V(AtomicMemory) \ + V(Bitfield) \ + V(Extract) \ + V(UnconditionalBranch) \ + V(UnconditionalBranchToRegister) \ + V(CompareBranch) \ + V(TestBranch) \ + V(ConditionalBranch) \ + V(System) \ + V(Exception) \ + V(LoadStorePairPostIndex) \ + V(LoadStorePairOffset) \ + V(LoadStorePairPreIndex) \ + V(LoadStorePairNonTemporal) \ + V(LoadLiteral) \ + V(LoadStoreUnscaledOffset) \ + V(LoadStorePostIndex) \ + V(LoadStorePreIndex) \ + V(LoadStoreRegisterOffset) \ + V(LoadStoreUnsignedOffset) \ + V(LoadStoreExclusive) \ + V(LogicalShifted) \ + V(AddSubShifted) \ + V(AddSubExtended) \ + V(AddSubWithCarry) \ + V(ConditionalCompareRegister) \ + V(ConditionalCompareImmediate) \ + V(ConditionalSelect) \ + V(DataProcessing1Source) \ + V(DataProcessing2Source) \ + V(DataProcessing3Source) \ + V(FPCompare) \ + V(FPConditionalCompare) \ + V(FPConditionalSelect) \ + V(FPImmediate) \ + V(FPDataProcessing1Source) \ + V(FPDataProcessing2Source) \ + V(FPDataProcessing3Source) \ + V(FPIntegerConvert) \ + V(FPFixedPointConvert) \ + V(Crypto2RegSHA) \ + V(Crypto3RegSHA) \ + V(CryptoAES) \ + V(NEON2RegMisc) \ + V(NEON3Different) \ + V(NEON3Same) \ + V(NEONAcrossLanes) \ + V(NEONByIndexedElement) \ + V(NEONCopy) \ + V(NEONExtract) \ + V(NEONLoadStoreMultiStruct) \ + V(NEONLoadStoreMultiStructPostIndex) \ + V(NEONLoadStoreSingleStruct) \ + V(NEONLoadStoreSingleStructPostIndex) \ + V(NEONModifiedImmediate) \ + V(NEONScalar2RegMisc) \ + V(NEONScalar3Diff) \ + V(NEONScalar3Same) \ + V(NEONScalarByIndexedElement) \ + V(NEONScalarCopy) \ + V(NEONScalarPairwise) \ + V(NEONScalarShiftImmediate) \ + V(NEONShiftImmediate) \ + V(NEONTable) \ + V(NEONPerm) + +#define VISITOR_LIST_THAT_DONT_RETURN(V) \ + V(Unallocated) \ + V(Unimplemented) \ + +#define VISITOR_LIST(V) \ + VISITOR_LIST_THAT_RETURN(V) \ + VISITOR_LIST_THAT_DONT_RETURN(V) \ + +namespace vixl { + +// The Visitor interface. Disassembler and simulator (and other tools) +// must provide implementations for all of these functions. +class DecoderVisitor { + public: + enum VisitorConstness { + kConstVisitor, + kNonConstVisitor + }; + explicit DecoderVisitor(VisitorConstness constness = kConstVisitor) + : constness_(constness) {} + + virtual ~DecoderVisitor() {} + + #define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0; + VISITOR_LIST(DECLARE) + #undef DECLARE + + bool IsConstVisitor() const { return constness_ == kConstVisitor; } + Instruction* MutableInstruction(const Instruction* instr) { + VIXL_ASSERT(!IsConstVisitor()); + return const_cast<Instruction*>(instr); + } + + private: + const VisitorConstness constness_; +}; + + +class Decoder { + public: + Decoder() {} + + // Top-level wrappers around the actual decoding function. + void Decode(const Instruction* instr) { +#ifdef DEBUG + for (auto visitor : visitors_) { + VIXL_ASSERT(visitor->IsConstVisitor()); + } +#endif + DecodeInstruction(instr); + } + void Decode(Instruction* instr) { + DecodeInstruction(const_cast<const Instruction*>(instr)); + } + + // Register a new visitor class with the decoder. + // Decode() will call the corresponding visitor method from all registered + // visitor classes when decoding reaches the leaf node of the instruction + // decode tree. + // Visitors are called in order. + // A visitor can be registered multiple times. + // + // d.AppendVisitor(V1); + // d.AppendVisitor(V2); + // d.PrependVisitor(V2); + // d.AppendVisitor(V3); + // + // d.Decode(i); + // + // will call in order visitor methods in V2, V1, V2, V3. + void AppendVisitor(DecoderVisitor* visitor); + void PrependVisitor(DecoderVisitor* visitor); + // These helpers register `new_visitor` before or after the first instance of + // `registered_visiter` in the list. + // So if + // V1, V2, V1, V2 + // are registered in this order in the decoder, calls to + // d.InsertVisitorAfter(V3, V1); + // d.InsertVisitorBefore(V4, V2); + // will yield the order + // V1, V3, V4, V2, V1, V2 + // + // For more complex modifications of the order of registered visitors, one can + // directly access and modify the list of visitors via the `visitors()' + // accessor. + void InsertVisitorBefore(DecoderVisitor* new_visitor, + DecoderVisitor* registered_visitor); + void InsertVisitorAfter(DecoderVisitor* new_visitor, + DecoderVisitor* registered_visitor); + + // Remove all instances of a previously registered visitor class from the list + // of visitors stored by the decoder. + void RemoveVisitor(DecoderVisitor* visitor); + + #define DECLARE(A) void Visit##A(const Instruction* instr); + VISITOR_LIST(DECLARE) + #undef DECLARE + + + private: + // Decodes an instruction and calls the visitor functions registered with the + // Decoder class. + void DecodeInstruction(const Instruction* instr); + + // Decode the PC relative addressing instruction, and call the corresponding + // visitors. + // On entry, instruction bits 27:24 = 0x0. + void DecodePCRelAddressing(const Instruction* instr); + + // Decode the add/subtract immediate instruction, and call the correspoding + // visitors. + // On entry, instruction bits 27:24 = 0x1. + void DecodeAddSubImmediate(const Instruction* instr); + + // Decode the branch, system command, and exception generation parts of + // the instruction tree, and call the corresponding visitors. + // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}. + void DecodeBranchSystemException(const Instruction* instr); + + // Decode the load and store parts of the instruction tree, and call + // the corresponding visitors. + // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}. + void DecodeLoadStore(const Instruction* instr); + + // Decode the logical immediate and move wide immediate parts of the + // instruction tree, and call the corresponding visitors. + // On entry, instruction bits 27:24 = 0x2. + void DecodeLogical(const Instruction* instr); + + // Decode the bitfield and extraction parts of the instruction tree, + // and call the corresponding visitors. + // On entry, instruction bits 27:24 = 0x3. + void DecodeBitfieldExtract(const Instruction* instr); + + // Decode the data processing parts of the instruction tree, and call the + // corresponding visitors. + // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}. + void DecodeDataProcessing(const Instruction* instr); + + // Decode the floating point parts of the instruction tree, and call the + // corresponding visitors. + // On entry, instruction bits 27:24 = {0xE, 0xF}. + void DecodeFP(const Instruction* instr); + + // Decode the Advanced SIMD (NEON) load/store part of the instruction tree, + // and call the corresponding visitors. + // On entry, instruction bits 29:25 = 0x6. + void DecodeNEONLoadStore(const Instruction* instr); + + // Decode the Advanced SIMD (NEON) vector data processing part of the + // instruction tree, and call the corresponding visitors. + // On entry, instruction bits 28:25 = 0x7. + void DecodeNEONVectorDataProcessing(const Instruction* instr); + + // Decode the Advanced SIMD (NEON) scalar data processing part of the + // instruction tree, and call the corresponding visitors. + // On entry, instruction bits 28:25 = 0xF. + void DecodeNEONScalarDataProcessing(const Instruction* instr); + + private: + // Visitors are registered in a list. + mozilla::Vector<DecoderVisitor*, 8, js::SystemAllocPolicy> visitors_; +}; + +} // namespace vixl + +#endif // VIXL_A64_DECODER_A64_H_ diff --git a/js/src/jit/arm64/vixl/Disasm-vixl.cpp b/js/src/jit/arm64/vixl/Disasm-vixl.cpp new file mode 100644 index 0000000000..1116ebb67b --- /dev/null +++ b/js/src/jit/arm64/vixl/Disasm-vixl.cpp @@ -0,0 +1,3741 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Disasm-vixl.h" + +#include "mozilla/Sprintf.h" +#include <cstdlib> + +namespace vixl { + +Disassembler::Disassembler() { + buffer_size_ = 256; + buffer_ = reinterpret_cast<char*>(malloc(buffer_size_)); + buffer_pos_ = 0; + own_buffer_ = true; + code_address_offset_ = 0; +} + + +Disassembler::Disassembler(char* text_buffer, int buffer_size) { + buffer_size_ = buffer_size; + buffer_ = text_buffer; + buffer_pos_ = 0; + own_buffer_ = false; + code_address_offset_ = 0; +} + + +Disassembler::~Disassembler() { + if (own_buffer_) { + free(buffer_); + } +} + + +char* Disassembler::GetOutput() { + return buffer_; +} + + +void Disassembler::VisitAddSubImmediate(const Instruction* instr) { + bool rd_is_zr = RdIsZROrSP(instr); + bool stack_op = (rd_is_zr || RnIsZROrSP(instr)) && + (instr->ImmAddSub() == 0) ? true : false; + const char *mnemonic = ""; + const char *form = "'Rds, 'Rns, 'IAddSub"; + const char *form_cmp = "'Rns, 'IAddSub"; + const char *form_mov = "'Rds, 'Rns"; + + switch (instr->Mask(AddSubImmediateMask)) { + case ADD_w_imm: + case ADD_x_imm: { + mnemonic = "add"; + if (stack_op) { + mnemonic = "mov"; + form = form_mov; + } + break; + } + case ADDS_w_imm: + case ADDS_x_imm: { + mnemonic = "adds"; + if (rd_is_zr) { + mnemonic = "cmn"; + form = form_cmp; + } + break; + } + case SUB_w_imm: + case SUB_x_imm: mnemonic = "sub"; break; + case SUBS_w_imm: + case SUBS_x_imm: { + mnemonic = "subs"; + if (rd_is_zr) { + mnemonic = "cmp"; + form = form_cmp; + } + break; + } + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitAddSubShifted(const Instruction* instr) { + bool rd_is_zr = RdIsZROrSP(instr); + bool rn_is_zr = RnIsZROrSP(instr); + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn, 'Rm'NDP"; + const char *form_cmp = "'Rn, 'Rm'NDP"; + const char *form_neg = "'Rd, 'Rm'NDP"; + + switch (instr->Mask(AddSubShiftedMask)) { + case ADD_w_shift: + case ADD_x_shift: mnemonic = "add"; break; + case ADDS_w_shift: + case ADDS_x_shift: { + mnemonic = "adds"; + if (rd_is_zr) { + mnemonic = "cmn"; + form = form_cmp; + } + break; + } + case SUB_w_shift: + case SUB_x_shift: { + mnemonic = "sub"; + if (rn_is_zr) { + mnemonic = "neg"; + form = form_neg; + } + break; + } + case SUBS_w_shift: + case SUBS_x_shift: { + mnemonic = "subs"; + if (rd_is_zr) { + mnemonic = "cmp"; + form = form_cmp; + } else if (rn_is_zr) { + mnemonic = "negs"; + form = form_neg; + } + break; + } + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitAddSubExtended(const Instruction* instr) { + bool rd_is_zr = RdIsZROrSP(instr); + const char *mnemonic = ""; + Extend mode = static_cast<Extend>(instr->ExtendMode()); + const char *form = ((mode == UXTX) || (mode == SXTX)) ? + "'Rds, 'Rns, 'Xm'Ext" : "'Rds, 'Rns, 'Wm'Ext"; + const char *form_cmp = ((mode == UXTX) || (mode == SXTX)) ? + "'Rns, 'Xm'Ext" : "'Rns, 'Wm'Ext"; + + switch (instr->Mask(AddSubExtendedMask)) { + case ADD_w_ext: + case ADD_x_ext: mnemonic = "add"; break; + case ADDS_w_ext: + case ADDS_x_ext: { + mnemonic = "adds"; + if (rd_is_zr) { + mnemonic = "cmn"; + form = form_cmp; + } + break; + } + case SUB_w_ext: + case SUB_x_ext: mnemonic = "sub"; break; + case SUBS_w_ext: + case SUBS_x_ext: { + mnemonic = "subs"; + if (rd_is_zr) { + mnemonic = "cmp"; + form = form_cmp; + } + break; + } + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitAddSubWithCarry(const Instruction* instr) { + bool rn_is_zr = RnIsZROrSP(instr); + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn, 'Rm"; + const char *form_neg = "'Rd, 'Rm"; + + switch (instr->Mask(AddSubWithCarryMask)) { + case ADC_w: + case ADC_x: mnemonic = "adc"; break; + case ADCS_w: + case ADCS_x: mnemonic = "adcs"; break; + case SBC_w: + case SBC_x: { + mnemonic = "sbc"; + if (rn_is_zr) { + mnemonic = "ngc"; + form = form_neg; + } + break; + } + case SBCS_w: + case SBCS_x: { + mnemonic = "sbcs"; + if (rn_is_zr) { + mnemonic = "ngcs"; + form = form_neg; + } + break; + } + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLogicalImmediate(const Instruction* instr) { + bool rd_is_zr = RdIsZROrSP(instr); + bool rn_is_zr = RnIsZROrSP(instr); + const char *mnemonic = ""; + const char *form = "'Rds, 'Rn, 'ITri"; + + if (instr->ImmLogical() == 0) { + // The immediate encoded in the instruction is not in the expected format. + Format(instr, "unallocated", "(LogicalImmediate)"); + return; + } + + switch (instr->Mask(LogicalImmediateMask)) { + case AND_w_imm: + case AND_x_imm: mnemonic = "and"; break; + case ORR_w_imm: + case ORR_x_imm: { + mnemonic = "orr"; + unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize + : kWRegSize; + if (rn_is_zr && !IsMovzMovnImm(reg_size, instr->ImmLogical())) { + mnemonic = "mov"; + form = "'Rds, 'ITri"; + } + break; + } + case EOR_w_imm: + case EOR_x_imm: mnemonic = "eor"; break; + case ANDS_w_imm: + case ANDS_x_imm: { + mnemonic = "ands"; + if (rd_is_zr) { + mnemonic = "tst"; + form = "'Rn, 'ITri"; + } + break; + } + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +bool Disassembler::IsMovzMovnImm(unsigned reg_size, uint64_t value) { + VIXL_ASSERT((reg_size == kXRegSize) || + ((reg_size == kWRegSize) && (value <= 0xffffffff))); + + // Test for movz: 16 bits set at positions 0, 16, 32 or 48. + if (((value & UINT64_C(0xffffffffffff0000)) == 0) || + ((value & UINT64_C(0xffffffff0000ffff)) == 0) || + ((value & UINT64_C(0xffff0000ffffffff)) == 0) || + ((value & UINT64_C(0x0000ffffffffffff)) == 0)) { + return true; + } + + // Test for movn: NOT(16 bits set at positions 0, 16, 32 or 48). + if ((reg_size == kXRegSize) && + (((~value & UINT64_C(0xffffffffffff0000)) == 0) || + ((~value & UINT64_C(0xffffffff0000ffff)) == 0) || + ((~value & UINT64_C(0xffff0000ffffffff)) == 0) || + ((~value & UINT64_C(0x0000ffffffffffff)) == 0))) { + return true; + } + if ((reg_size == kWRegSize) && + (((value & 0xffff0000) == 0xffff0000) || + ((value & 0x0000ffff) == 0x0000ffff))) { + return true; + } + return false; +} + + +void Disassembler::VisitLogicalShifted(const Instruction* instr) { + bool rd_is_zr = RdIsZROrSP(instr); + bool rn_is_zr = RnIsZROrSP(instr); + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn, 'Rm'NLo"; + + switch (instr->Mask(LogicalShiftedMask)) { + case AND_w: + case AND_x: mnemonic = "and"; break; + case BIC_w: + case BIC_x: mnemonic = "bic"; break; + case EOR_w: + case EOR_x: mnemonic = "eor"; break; + case EON_w: + case EON_x: mnemonic = "eon"; break; + case BICS_w: + case BICS_x: mnemonic = "bics"; break; + case ANDS_w: + case ANDS_x: { + mnemonic = "ands"; + if (rd_is_zr) { + mnemonic = "tst"; + form = "'Rn, 'Rm'NLo"; + } + break; + } + case ORR_w: + case ORR_x: { + mnemonic = "orr"; + if (rn_is_zr && (instr->ImmDPShift() == 0) && (instr->ShiftDP() == LSL)) { + mnemonic = "mov"; + form = "'Rd, 'Rm"; + } + break; + } + case ORN_w: + case ORN_x: { + mnemonic = "orn"; + if (rn_is_zr) { + mnemonic = "mvn"; + form = "'Rd, 'Rm'NLo"; + } + break; + } + default: VIXL_UNREACHABLE(); + } + + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitConditionalCompareRegister(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Rn, 'Rm, 'INzcv, 'Cond"; + + switch (instr->Mask(ConditionalCompareRegisterMask)) { + case CCMN_w: + case CCMN_x: mnemonic = "ccmn"; break; + case CCMP_w: + case CCMP_x: mnemonic = "ccmp"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitConditionalCompareImmediate(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Rn, 'IP, 'INzcv, 'Cond"; + + switch (instr->Mask(ConditionalCompareImmediateMask)) { + case CCMN_w_imm: + case CCMN_x_imm: mnemonic = "ccmn"; break; + case CCMP_w_imm: + case CCMP_x_imm: mnemonic = "ccmp"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitConditionalSelect(const Instruction* instr) { + bool rnm_is_zr = (RnIsZROrSP(instr) && RmIsZROrSP(instr)); + bool rn_is_rm = (instr->Rn() == instr->Rm()); + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn, 'Rm, 'Cond"; + const char *form_test = "'Rd, 'CInv"; + const char *form_update = "'Rd, 'Rn, 'CInv"; + + Condition cond = static_cast<Condition>(instr->Condition()); + bool invertible_cond = (cond != al) && (cond != nv); + + switch (instr->Mask(ConditionalSelectMask)) { + case CSEL_w: + case CSEL_x: mnemonic = "csel"; break; + case CSINC_w: + case CSINC_x: { + mnemonic = "csinc"; + if (rnm_is_zr && invertible_cond) { + mnemonic = "cset"; + form = form_test; + } else if (rn_is_rm && invertible_cond) { + mnemonic = "cinc"; + form = form_update; + } + break; + } + case CSINV_w: + case CSINV_x: { + mnemonic = "csinv"; + if (rnm_is_zr && invertible_cond) { + mnemonic = "csetm"; + form = form_test; + } else if (rn_is_rm && invertible_cond) { + mnemonic = "cinv"; + form = form_update; + } + break; + } + case CSNEG_w: + case CSNEG_x: { + mnemonic = "csneg"; + if (rn_is_rm && invertible_cond) { + mnemonic = "cneg"; + form = form_update; + } + break; + } + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitBitfield(const Instruction* instr) { + unsigned s = instr->ImmS(); + unsigned r = instr->ImmR(); + unsigned rd_size_minus_1 = + ((instr->SixtyFourBits() == 1) ? kXRegSize : kWRegSize) - 1; + const char *mnemonic = ""; + const char *form = ""; + const char *form_shift_right = "'Rd, 'Rn, 'IBr"; + const char *form_extend = "'Rd, 'Wn"; + const char *form_bfiz = "'Rd, 'Rn, 'IBZ-r, 'IBs+1"; + const char *form_bfx = "'Rd, 'Rn, 'IBr, 'IBs-r+1"; + const char *form_lsl = "'Rd, 'Rn, 'IBZ-r"; + + switch (instr->Mask(BitfieldMask)) { + case SBFM_w: + case SBFM_x: { + mnemonic = "sbfx"; + form = form_bfx; + if (r == 0) { + form = form_extend; + if (s == 7) { + mnemonic = "sxtb"; + } else if (s == 15) { + mnemonic = "sxth"; + } else if ((s == 31) && (instr->SixtyFourBits() == 1)) { + mnemonic = "sxtw"; + } else { + form = form_bfx; + } + } else if (s == rd_size_minus_1) { + mnemonic = "asr"; + form = form_shift_right; + } else if (s < r) { + mnemonic = "sbfiz"; + form = form_bfiz; + } + break; + } + case UBFM_w: + case UBFM_x: { + mnemonic = "ubfx"; + form = form_bfx; + if (r == 0) { + form = form_extend; + if (s == 7) { + mnemonic = "uxtb"; + } else if (s == 15) { + mnemonic = "uxth"; + } else { + form = form_bfx; + } + } + if (s == rd_size_minus_1) { + mnemonic = "lsr"; + form = form_shift_right; + } else if (r == s + 1) { + mnemonic = "lsl"; + form = form_lsl; + } else if (s < r) { + mnemonic = "ubfiz"; + form = form_bfiz; + } + break; + } + case BFM_w: + case BFM_x: { + mnemonic = "bfxil"; + form = form_bfx; + if (s < r) { + mnemonic = "bfi"; + form = form_bfiz; + } + } + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitExtract(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn, 'Rm, 'IExtract"; + + switch (instr->Mask(ExtractMask)) { + case EXTR_w: + case EXTR_x: { + if (instr->Rn() == instr->Rm()) { + mnemonic = "ror"; + form = "'Rd, 'Rn, 'IExtract"; + } else { + mnemonic = "extr"; + } + break; + } + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitPCRelAddressing(const Instruction* instr) { + switch (instr->Mask(PCRelAddressingMask)) { + case ADR: Format(instr, "adr", "'Xd, 'AddrPCRelByte"); break; + case ADRP: Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); break; + default: Format(instr, "unimplemented", "(PCRelAddressing)"); + } +} + + +void Disassembler::VisitConditionalBranch(const Instruction* instr) { + switch (instr->Mask(ConditionalBranchMask)) { + case B_cond: Format(instr, "b.'CBrn", "'TImmCond"); break; + default: VIXL_UNREACHABLE(); + } +} + + +void Disassembler::VisitUnconditionalBranchToRegister( + const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Xn"; + + switch (instr->Mask(UnconditionalBranchToRegisterMask)) { + case BR: mnemonic = "br"; break; + case BLR: mnemonic = "blr"; break; + case RET: { + mnemonic = "ret"; + if (instr->Rn() == kLinkRegCode) { + form = NULL; + } + break; + } + default: form = "(UnconditionalBranchToRegister)"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitUnconditionalBranch(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'TImmUncn"; + + switch (instr->Mask(UnconditionalBranchMask)) { + case B: mnemonic = "b"; break; + case BL: mnemonic = "bl"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitDataProcessing1Source(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn"; + + switch (instr->Mask(DataProcessing1SourceMask)) { + #define FORMAT(A, B) \ + case A##_w: \ + case A##_x: mnemonic = B; break; + FORMAT(RBIT, "rbit"); + FORMAT(REV16, "rev16"); + FORMAT(REV, "rev"); + FORMAT(CLZ, "clz"); + FORMAT(CLS, "cls"); + #undef FORMAT + case REV32_x: mnemonic = "rev32"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitDataProcessing2Source(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Rd, 'Rn, 'Rm"; + const char *form_wwx = "'Wd, 'Wn, 'Xm"; + + switch (instr->Mask(DataProcessing2SourceMask)) { + #define FORMAT(A, B) \ + case A##_w: \ + case A##_x: mnemonic = B; break; + FORMAT(UDIV, "udiv"); + FORMAT(SDIV, "sdiv"); + FORMAT(LSLV, "lsl"); + FORMAT(LSRV, "lsr"); + FORMAT(ASRV, "asr"); + FORMAT(RORV, "ror"); + #undef FORMAT + case CRC32B: mnemonic = "crc32b"; break; + case CRC32H: mnemonic = "crc32h"; break; + case CRC32W: mnemonic = "crc32w"; break; + case CRC32X: mnemonic = "crc32x"; form = form_wwx; break; + case CRC32CB: mnemonic = "crc32cb"; break; + case CRC32CH: mnemonic = "crc32ch"; break; + case CRC32CW: mnemonic = "crc32cw"; break; + case CRC32CX: mnemonic = "crc32cx"; form = form_wwx; break; + default: form = "(DataProcessing2Source)"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitDataProcessing3Source(const Instruction* instr) { + bool ra_is_zr = RaIsZROrSP(instr); + const char *mnemonic = ""; + const char *form = "'Xd, 'Wn, 'Wm, 'Xa"; + const char *form_rrr = "'Rd, 'Rn, 'Rm"; + const char *form_rrrr = "'Rd, 'Rn, 'Rm, 'Ra"; + const char *form_xww = "'Xd, 'Wn, 'Wm"; + const char *form_xxx = "'Xd, 'Xn, 'Xm"; + + switch (instr->Mask(DataProcessing3SourceMask)) { + case MADD_w: + case MADD_x: { + mnemonic = "madd"; + form = form_rrrr; + if (ra_is_zr) { + mnemonic = "mul"; + form = form_rrr; + } + break; + } + case MSUB_w: + case MSUB_x: { + mnemonic = "msub"; + form = form_rrrr; + if (ra_is_zr) { + mnemonic = "mneg"; + form = form_rrr; + } + break; + } + case SMADDL_x: { + mnemonic = "smaddl"; + if (ra_is_zr) { + mnemonic = "smull"; + form = form_xww; + } + break; + } + case SMSUBL_x: { + mnemonic = "smsubl"; + if (ra_is_zr) { + mnemonic = "smnegl"; + form = form_xww; + } + break; + } + case UMADDL_x: { + mnemonic = "umaddl"; + if (ra_is_zr) { + mnemonic = "umull"; + form = form_xww; + } + break; + } + case UMSUBL_x: { + mnemonic = "umsubl"; + if (ra_is_zr) { + mnemonic = "umnegl"; + form = form_xww; + } + break; + } + case SMULH_x: { + mnemonic = "smulh"; + form = form_xxx; + break; + } + case UMULH_x: { + mnemonic = "umulh"; + form = form_xxx; + break; + } + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitCompareBranch(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Rt, 'TImmCmpa"; + + switch (instr->Mask(CompareBranchMask)) { + case CBZ_w: + case CBZ_x: mnemonic = "cbz"; break; + case CBNZ_w: + case CBNZ_x: mnemonic = "cbnz"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitTestBranch(const Instruction* instr) { + const char *mnemonic = ""; + // If the top bit of the immediate is clear, the tested register is + // disassembled as Wt, otherwise Xt. As the top bit of the immediate is + // encoded in bit 31 of the instruction, we can reuse the Rt form, which + // uses bit 31 (normally "sf") to choose the register size. + const char *form = "'Rt, 'IS, 'TImmTest"; + + switch (instr->Mask(TestBranchMask)) { + case TBZ: mnemonic = "tbz"; break; + case TBNZ: mnemonic = "tbnz"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitMoveWideImmediate(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Rd, 'IMoveImm"; + + // Print the shift separately for movk, to make it clear which half word will + // be overwritten. Movn and movz print the computed immediate, which includes + // shift calculation. + switch (instr->Mask(MoveWideImmediateMask)) { + case MOVN_w: + case MOVN_x: + if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0)) { + if ((instr->SixtyFourBits() == 0) && (instr->ImmMoveWide() == 0xffff)) { + mnemonic = "movn"; + } else { + mnemonic = "mov"; + form = "'Rd, 'IMoveNeg"; + } + } else { + mnemonic = "movn"; + } + break; + case MOVZ_w: + case MOVZ_x: + if ((instr->ImmMoveWide()) || (instr->ShiftMoveWide() == 0)) + mnemonic = "mov"; + else + mnemonic = "movz"; + break; + case MOVK_w: + case MOVK_x: mnemonic = "movk"; form = "'Rd, 'IMoveLSL"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +#define LOAD_STORE_LIST(V) \ + V(STRB_w, "strb", "'Wt") \ + V(STRH_w, "strh", "'Wt") \ + V(STR_w, "str", "'Wt") \ + V(STR_x, "str", "'Xt") \ + V(LDRB_w, "ldrb", "'Wt") \ + V(LDRH_w, "ldrh", "'Wt") \ + V(LDR_w, "ldr", "'Wt") \ + V(LDR_x, "ldr", "'Xt") \ + V(LDRSB_x, "ldrsb", "'Xt") \ + V(LDRSH_x, "ldrsh", "'Xt") \ + V(LDRSW_x, "ldrsw", "'Xt") \ + V(LDRSB_w, "ldrsb", "'Wt") \ + V(LDRSH_w, "ldrsh", "'Wt") \ + V(STR_b, "str", "'Bt") \ + V(STR_h, "str", "'Ht") \ + V(STR_s, "str", "'St") \ + V(STR_d, "str", "'Dt") \ + V(LDR_b, "ldr", "'Bt") \ + V(LDR_h, "ldr", "'Ht") \ + V(LDR_s, "ldr", "'St") \ + V(LDR_d, "ldr", "'Dt") \ + V(STR_q, "str", "'Qt") \ + V(LDR_q, "ldr", "'Qt") + +void Disassembler::VisitLoadStorePreIndex(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(LoadStorePreIndex)"; + + switch (instr->Mask(LoadStorePreIndexMask)) { + #define LS_PREINDEX(A, B, C) \ + case A##_pre: mnemonic = B; form = C ", ['Xns'ILS]!"; break; + LOAD_STORE_LIST(LS_PREINDEX) + #undef LS_PREINDEX + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadStorePostIndex(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(LoadStorePostIndex)"; + + switch (instr->Mask(LoadStorePostIndexMask)) { + #define LS_POSTINDEX(A, B, C) \ + case A##_post: mnemonic = B; form = C ", ['Xns]'ILS"; break; + LOAD_STORE_LIST(LS_POSTINDEX) + #undef LS_POSTINDEX + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(LoadStoreUnsignedOffset)"; + + switch (instr->Mask(LoadStoreUnsignedOffsetMask)) { + #define LS_UNSIGNEDOFFSET(A, B, C) \ + case A##_unsigned: mnemonic = B; form = C ", ['Xns'ILU]"; break; + LOAD_STORE_LIST(LS_UNSIGNEDOFFSET) + #undef LS_UNSIGNEDOFFSET + case PRFM_unsigned: mnemonic = "prfm"; form = "'PrefOp, ['Xns'ILU]"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadStoreRegisterOffset(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(LoadStoreRegisterOffset)"; + + switch (instr->Mask(LoadStoreRegisterOffsetMask)) { + #define LS_REGISTEROFFSET(A, B, C) \ + case A##_reg: mnemonic = B; form = C ", ['Xns, 'Offsetreg]"; break; + LOAD_STORE_LIST(LS_REGISTEROFFSET) + #undef LS_REGISTEROFFSET + case PRFM_reg: mnemonic = "prfm"; form = "'PrefOp, ['Xns, 'Offsetreg]"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Wt, ['Xns'ILS]"; + const char *form_x = "'Xt, ['Xns'ILS]"; + const char *form_b = "'Bt, ['Xns'ILS]"; + const char *form_h = "'Ht, ['Xns'ILS]"; + const char *form_s = "'St, ['Xns'ILS]"; + const char *form_d = "'Dt, ['Xns'ILS]"; + const char *form_q = "'Qt, ['Xns'ILS]"; + const char *form_prefetch = "'PrefOp, ['Xns'ILS]"; + + switch (instr->Mask(LoadStoreUnscaledOffsetMask)) { + case STURB_w: mnemonic = "sturb"; break; + case STURH_w: mnemonic = "sturh"; break; + case STUR_w: mnemonic = "stur"; break; + case STUR_x: mnemonic = "stur"; form = form_x; break; + case STUR_b: mnemonic = "stur"; form = form_b; break; + case STUR_h: mnemonic = "stur"; form = form_h; break; + case STUR_s: mnemonic = "stur"; form = form_s; break; + case STUR_d: mnemonic = "stur"; form = form_d; break; + case STUR_q: mnemonic = "stur"; form = form_q; break; + case LDURB_w: mnemonic = "ldurb"; break; + case LDURH_w: mnemonic = "ldurh"; break; + case LDUR_w: mnemonic = "ldur"; break; + case LDUR_x: mnemonic = "ldur"; form = form_x; break; + case LDUR_b: mnemonic = "ldur"; form = form_b; break; + case LDUR_h: mnemonic = "ldur"; form = form_h; break; + case LDUR_s: mnemonic = "ldur"; form = form_s; break; + case LDUR_d: mnemonic = "ldur"; form = form_d; break; + case LDUR_q: mnemonic = "ldur"; form = form_q; break; + case LDURSB_x: form = form_x; VIXL_FALLTHROUGH(); + case LDURSB_w: mnemonic = "ldursb"; break; + case LDURSH_x: form = form_x; VIXL_FALLTHROUGH(); + case LDURSH_w: mnemonic = "ldursh"; break; + case LDURSW_x: mnemonic = "ldursw"; form = form_x; break; + case PRFUM: mnemonic = "prfum"; form = form_prefetch; break; + default: form = "(LoadStoreUnscaledOffset)"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadLiteral(const Instruction* instr) { + const char *mnemonic = "ldr"; + const char *form = "(LoadLiteral)"; + + switch (instr->Mask(LoadLiteralMask)) { + case LDR_w_lit: form = "'Wt, 'ILLiteral 'LValue"; break; + case LDR_x_lit: form = "'Xt, 'ILLiteral 'LValue"; break; + case LDR_s_lit: form = "'St, 'ILLiteral 'LValue"; break; + case LDR_d_lit: form = "'Dt, 'ILLiteral 'LValue"; break; + case LDR_q_lit: form = "'Qt, 'ILLiteral 'LValue"; break; + case LDRSW_x_lit: { + mnemonic = "ldrsw"; + form = "'Xt, 'ILLiteral 'LValue"; + break; + } + case PRFM_lit: { + mnemonic = "prfm"; + form = "'PrefOp, 'ILLiteral 'LValue"; + break; + } + default: mnemonic = "unimplemented"; + } + Format(instr, mnemonic, form); +} + + +#define LOAD_STORE_PAIR_LIST(V) \ + V(STP_w, "stp", "'Wt, 'Wt2", "2") \ + V(LDP_w, "ldp", "'Wt, 'Wt2", "2") \ + V(LDPSW_x, "ldpsw", "'Xt, 'Xt2", "2") \ + V(STP_x, "stp", "'Xt, 'Xt2", "3") \ + V(LDP_x, "ldp", "'Xt, 'Xt2", "3") \ + V(STP_s, "stp", "'St, 'St2", "2") \ + V(LDP_s, "ldp", "'St, 'St2", "2") \ + V(STP_d, "stp", "'Dt, 'Dt2", "3") \ + V(LDP_d, "ldp", "'Dt, 'Dt2", "3") \ + V(LDP_q, "ldp", "'Qt, 'Qt2", "4") \ + V(STP_q, "stp", "'Qt, 'Qt2", "4") + +void Disassembler::VisitLoadStorePairPostIndex(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(LoadStorePairPostIndex)"; + + switch (instr->Mask(LoadStorePairPostIndexMask)) { + #define LSP_POSTINDEX(A, B, C, D) \ + case A##_post: mnemonic = B; form = C ", ['Xns]'ILP" D; break; + LOAD_STORE_PAIR_LIST(LSP_POSTINDEX) + #undef LSP_POSTINDEX + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadStorePairPreIndex(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(LoadStorePairPreIndex)"; + + switch (instr->Mask(LoadStorePairPreIndexMask)) { + #define LSP_PREINDEX(A, B, C, D) \ + case A##_pre: mnemonic = B; form = C ", ['Xns'ILP" D "]!"; break; + LOAD_STORE_PAIR_LIST(LSP_PREINDEX) + #undef LSP_PREINDEX + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadStorePairOffset(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(LoadStorePairOffset)"; + + switch (instr->Mask(LoadStorePairOffsetMask)) { + #define LSP_OFFSET(A, B, C, D) \ + case A##_off: mnemonic = B; form = C ", ['Xns'ILP" D "]"; break; + LOAD_STORE_PAIR_LIST(LSP_OFFSET) + #undef LSP_OFFSET + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadStorePairNonTemporal(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form; + + switch (instr->Mask(LoadStorePairNonTemporalMask)) { + case STNP_w: mnemonic = "stnp"; form = "'Wt, 'Wt2, ['Xns'ILP2]"; break; + case LDNP_w: mnemonic = "ldnp"; form = "'Wt, 'Wt2, ['Xns'ILP2]"; break; + case STNP_x: mnemonic = "stnp"; form = "'Xt, 'Xt2, ['Xns'ILP3]"; break; + case LDNP_x: mnemonic = "ldnp"; form = "'Xt, 'Xt2, ['Xns'ILP3]"; break; + case STNP_s: mnemonic = "stnp"; form = "'St, 'St2, ['Xns'ILP2]"; break; + case LDNP_s: mnemonic = "ldnp"; form = "'St, 'St2, ['Xns'ILP2]"; break; + case STNP_d: mnemonic = "stnp"; form = "'Dt, 'Dt2, ['Xns'ILP3]"; break; + case LDNP_d: mnemonic = "ldnp"; form = "'Dt, 'Dt2, ['Xns'ILP3]"; break; + case STNP_q: mnemonic = "stnp"; form = "'Qt, 'Qt2, ['Xns'ILP4]"; break; + case LDNP_q: mnemonic = "ldnp"; form = "'Qt, 'Qt2, ['Xns'ILP4]"; break; + default: form = "(LoadStorePairNonTemporal)"; + } + Format(instr, mnemonic, form); +} + +// clang-format off +#define LOAD_STORE_EXCLUSIVE_LIST(V) \ + V(STXRB_w, "stxrb", "'Ws, 'Wt") \ + V(STXRH_w, "stxrh", "'Ws, 'Wt") \ + V(STXR_w, "stxr", "'Ws, 'Wt") \ + V(STXR_x, "stxr", "'Ws, 'Xt") \ + V(LDXRB_w, "ldxrb", "'Wt") \ + V(LDXRH_w, "ldxrh", "'Wt") \ + V(LDXR_w, "ldxr", "'Wt") \ + V(LDXR_x, "ldxr", "'Xt") \ + V(STXP_w, "stxp", "'Ws, 'Wt, 'Wt2") \ + V(STXP_x, "stxp", "'Ws, 'Xt, 'Xt2") \ + V(LDXP_w, "ldxp", "'Wt, 'Wt2") \ + V(LDXP_x, "ldxp", "'Xt, 'Xt2") \ + V(STLXRB_w, "stlxrb", "'Ws, 'Wt") \ + V(STLXRH_w, "stlxrh", "'Ws, 'Wt") \ + V(STLXR_w, "stlxr", "'Ws, 'Wt") \ + V(STLXR_x, "stlxr", "'Ws, 'Xt") \ + V(LDAXRB_w, "ldaxrb", "'Wt") \ + V(LDAXRH_w, "ldaxrh", "'Wt") \ + V(LDAXR_w, "ldaxr", "'Wt") \ + V(LDAXR_x, "ldaxr", "'Xt") \ + V(STLXP_w, "stlxp", "'Ws, 'Wt, 'Wt2") \ + V(STLXP_x, "stlxp", "'Ws, 'Xt, 'Xt2") \ + V(LDAXP_w, "ldaxp", "'Wt, 'Wt2") \ + V(LDAXP_x, "ldaxp", "'Xt, 'Xt2") \ + V(STLRB_w, "stlrb", "'Wt") \ + V(STLRH_w, "stlrh", "'Wt") \ + V(STLR_w, "stlr", "'Wt") \ + V(STLR_x, "stlr", "'Xt") \ + V(LDARB_w, "ldarb", "'Wt") \ + V(LDARH_w, "ldarh", "'Wt") \ + V(LDAR_w, "ldar", "'Wt") \ + V(LDAR_x, "ldar", "'Xt") \ + V(CAS_w, "cas", "'Ws, 'Wt") \ + V(CAS_x, "cas", "'Xs, 'Xt") \ + V(CASA_w, "casa", "'Ws, 'Wt") \ + V(CASA_x, "casa", "'Xs, 'Xt") \ + V(CASL_w, "casl", "'Ws, 'Wt") \ + V(CASL_x, "casl", "'Xs, 'Xt") \ + V(CASAL_w, "casal", "'Ws, 'Wt") \ + V(CASAL_x, "casal", "'Xs, 'Xt") \ + V(CASB, "casb", "'Ws, 'Wt") \ + V(CASAB, "casab", "'Ws, 'Wt") \ + V(CASLB, "caslb", "'Ws, 'Wt") \ + V(CASALB, "casalb", "'Ws, 'Wt") \ + V(CASH, "cash", "'Ws, 'Wt") \ + V(CASAH, "casah", "'Ws, 'Wt") \ + V(CASLH, "caslh", "'Ws, 'Wt") \ + V(CASALH, "casalh", "'Ws, 'Wt") \ + V(CASP_w, "casp", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ + V(CASP_x, "casp", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ + V(CASPA_w, "caspa", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ + V(CASPA_x, "caspa", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ + V(CASPL_w, "caspl", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ + V(CASPL_x, "caspl", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ + V(CASPAL_w, "caspal", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ + V(CASPAL_x, "caspal", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") +// clang-format on + +void Disassembler::VisitLoadStoreExclusive(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form; + + switch (instr->Mask(LoadStoreExclusiveMask)) { +#define LSX(A, B, C) \ + case A: \ + mnemonic = B; \ + form = C ", ['Xns]"; \ + break; + LOAD_STORE_EXCLUSIVE_LIST(LSX) +#undef LSX + default: + form = "(LoadStoreExclusive)"; + } + + switch (instr->Mask(LoadStoreExclusiveMask)) { + case CASP_w: + case CASP_x: + case CASPA_w: + case CASPA_x: + case CASPL_w: + case CASPL_x: + case CASPAL_w: + case CASPAL_x: + if ((instr->Rs() % 2 == 1) || (instr->Rt() % 2 == 1)) { + mnemonic = "unallocated"; + form = "(LoadStoreExclusive)"; + } + break; + } + + Format(instr, mnemonic, form); +} + +#define ATOMIC_MEMORY_SIMPLE_LIST(V) \ + V(LDADD, "add") \ + V(LDCLR, "clr") \ + V(LDEOR, "eor") \ + V(LDSET, "set") \ + V(LDSMAX, "smax") \ + V(LDSMIN, "smin") \ + V(LDUMAX, "umax") \ + V(LDUMIN, "umin") + +void Disassembler::VisitAtomicMemory(const Instruction* instr) { + const int kMaxAtomicOpMnemonicLength = 16; + const char* mnemonic; + const char* form = "'Ws, 'Wt, ['Xns]"; + + switch (instr->Mask(AtomicMemoryMask)) { +#define AMS(A, MN) \ + case A##B: \ + mnemonic = MN "b"; \ + break; \ + case A##AB: \ + mnemonic = MN "ab"; \ + break; \ + case A##LB: \ + mnemonic = MN "lb"; \ + break; \ + case A##ALB: \ + mnemonic = MN "alb"; \ + break; \ + case A##H: \ + mnemonic = MN "h"; \ + break; \ + case A##AH: \ + mnemonic = MN "ah"; \ + break; \ + case A##LH: \ + mnemonic = MN "lh"; \ + break; \ + case A##ALH: \ + mnemonic = MN "alh"; \ + break; \ + case A##_w: \ + mnemonic = MN; \ + break; \ + case A##A_w: \ + mnemonic = MN "a"; \ + break; \ + case A##L_w: \ + mnemonic = MN "l"; \ + break; \ + case A##AL_w: \ + mnemonic = MN "al"; \ + break; \ + case A##_x: \ + mnemonic = MN; \ + form = "'Xs, 'Xt, ['Xns]"; \ + break; \ + case A##A_x: \ + mnemonic = MN "a"; \ + form = "'Xs, 'Xt, ['Xns]"; \ + break; \ + case A##L_x: \ + mnemonic = MN "l"; \ + form = "'Xs, 'Xt, ['Xns]"; \ + break; \ + case A##AL_x: \ + mnemonic = MN "al"; \ + form = "'Xs, 'Xt, ['Xns]"; \ + break; + ATOMIC_MEMORY_SIMPLE_LIST(AMS) + + // SWP has the same semantics as ldadd etc but without the store aliases. + AMS(SWP, "swp") +#undef AMS + + case LDAPRB: + mnemonic = "ldaprb"; + form = "'Wt, ['Xns]"; + break; + case LDAPRH: + mnemonic = "ldaprh"; + form = "'Wt, ['Xns]"; + break; + case LDAPR_w: + mnemonic = "ldapr"; + form = "'Wt, ['Xns]"; + break; + case LDAPR_x: + mnemonic = "ldapr"; + form = "'Xt, ['Xns]"; + break; + default: + mnemonic = "unimplemented"; + form = "(AtomicMemory)"; + } + + const char* prefix = ""; + switch (instr->Mask(AtomicMemoryMask)) { +#define AMS(A, MN) \ + case A##AB: \ + case A##ALB: \ + case A##AH: \ + case A##ALH: \ + case A##A_w: \ + case A##AL_w: \ + case A##A_x: \ + case A##AL_x: \ + prefix = "ld"; \ + break; \ + case A##B: \ + case A##LB: \ + case A##H: \ + case A##LH: \ + case A##_w: \ + case A##L_w: { \ + prefix = "ld"; \ + unsigned rt = instr->Rt(); \ + if (Register(rt, 32).IsZero()) { \ + prefix = "st"; \ + form = "'Ws, ['Xns]"; \ + } \ + break; \ + } \ + case A##_x: \ + case A##L_x: { \ + prefix = "ld"; \ + unsigned rt = instr->Rt(); \ + if (Register(rt, 64).IsZero()) { \ + prefix = "st"; \ + form = "'Xs, ['Xns]"; \ + } \ + break; \ + } + ATOMIC_MEMORY_SIMPLE_LIST(AMS) +#undef AMS + } + + char buffer[kMaxAtomicOpMnemonicLength]; + if (strlen(prefix) > 0) { + snprintf(buffer, kMaxAtomicOpMnemonicLength, "%s%s", prefix, mnemonic); + mnemonic = buffer; + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitFPCompare(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Fn, 'Fm"; + const char *form_zero = "'Fn, #0.0"; + + switch (instr->Mask(FPCompareMask)) { + case FCMP_s_zero: + case FCMP_d_zero: form = form_zero; VIXL_FALLTHROUGH(); + case FCMP_s: + case FCMP_d: mnemonic = "fcmp"; break; + case FCMPE_s_zero: + case FCMPE_d_zero: form = form_zero; VIXL_FALLTHROUGH(); + case FCMPE_s: + case FCMPE_d: mnemonic = "fcmpe"; break; + default: form = "(FPCompare)"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitFPConditionalCompare(const Instruction* instr) { + const char *mnemonic = "unmplemented"; + const char *form = "'Fn, 'Fm, 'INzcv, 'Cond"; + + switch (instr->Mask(FPConditionalCompareMask)) { + case FCCMP_s: + case FCCMP_d: mnemonic = "fccmp"; break; + case FCCMPE_s: + case FCCMPE_d: mnemonic = "fccmpe"; break; + default: form = "(FPConditionalCompare)"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitFPConditionalSelect(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Fd, 'Fn, 'Fm, 'Cond"; + + switch (instr->Mask(FPConditionalSelectMask)) { + case FCSEL_s: + case FCSEL_d: mnemonic = "fcsel"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitFPDataProcessing1Source(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Fd, 'Fn"; + + switch (instr->Mask(FPDataProcessing1SourceMask)) { + #define FORMAT(A, B) \ + case A##_s: \ + case A##_d: mnemonic = B; break; + FORMAT(FMOV, "fmov"); + FORMAT(FABS, "fabs"); + FORMAT(FNEG, "fneg"); + FORMAT(FSQRT, "fsqrt"); + FORMAT(FRINTN, "frintn"); + FORMAT(FRINTP, "frintp"); + FORMAT(FRINTM, "frintm"); + FORMAT(FRINTZ, "frintz"); + FORMAT(FRINTA, "frinta"); + FORMAT(FRINTX, "frintx"); + FORMAT(FRINTI, "frinti"); + #undef FORMAT + case FCVT_ds: mnemonic = "fcvt"; form = "'Dd, 'Sn"; break; + case FCVT_sd: mnemonic = "fcvt"; form = "'Sd, 'Dn"; break; + case FCVT_hs: mnemonic = "fcvt"; form = "'Hd, 'Sn"; break; + case FCVT_sh: mnemonic = "fcvt"; form = "'Sd, 'Hn"; break; + case FCVT_dh: mnemonic = "fcvt"; form = "'Dd, 'Hn"; break; + case FCVT_hd: mnemonic = "fcvt"; form = "'Hd, 'Dn"; break; + default: form = "(FPDataProcessing1Source)"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitFPDataProcessing2Source(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Fd, 'Fn, 'Fm"; + + switch (instr->Mask(FPDataProcessing2SourceMask)) { + #define FORMAT(A, B) \ + case A##_s: \ + case A##_d: mnemonic = B; break; + FORMAT(FMUL, "fmul"); + FORMAT(FDIV, "fdiv"); + FORMAT(FADD, "fadd"); + FORMAT(FSUB, "fsub"); + FORMAT(FMAX, "fmax"); + FORMAT(FMIN, "fmin"); + FORMAT(FMAXNM, "fmaxnm"); + FORMAT(FMINNM, "fminnm"); + FORMAT(FNMUL, "fnmul"); + #undef FORMAT + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitFPDataProcessing3Source(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Fd, 'Fn, 'Fm, 'Fa"; + + switch (instr->Mask(FPDataProcessing3SourceMask)) { + #define FORMAT(A, B) \ + case A##_s: \ + case A##_d: mnemonic = B; break; + FORMAT(FMADD, "fmadd"); + FORMAT(FMSUB, "fmsub"); + FORMAT(FNMADD, "fnmadd"); + FORMAT(FNMSUB, "fnmsub"); + #undef FORMAT + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitFPImmediate(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "(FPImmediate)"; + + switch (instr->Mask(FPImmediateMask)) { + case FMOV_s_imm: mnemonic = "fmov"; form = "'Sd, 'IFPSingle"; break; + case FMOV_d_imm: mnemonic = "fmov"; form = "'Dd, 'IFPDouble"; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitFPIntegerConvert(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(FPIntegerConvert)"; + const char *form_rf = "'Rd, 'Fn"; + const char *form_fr = "'Fd, 'Rn"; + + switch (instr->Mask(FPIntegerConvertMask)) { + case FMOV_ws: + case FMOV_xd: mnemonic = "fmov"; form = form_rf; break; + case FMOV_sw: + case FMOV_dx: mnemonic = "fmov"; form = form_fr; break; + case FMOV_d1_x: mnemonic = "fmov"; form = "'Vd.D[1], 'Rn"; break; + case FMOV_x_d1: mnemonic = "fmov"; form = "'Rd, 'Vn.D[1]"; break; + case FCVTAS_ws: + case FCVTAS_xs: + case FCVTAS_wd: + case FCVTAS_xd: mnemonic = "fcvtas"; form = form_rf; break; + case FCVTAU_ws: + case FCVTAU_xs: + case FCVTAU_wd: + case FCVTAU_xd: mnemonic = "fcvtau"; form = form_rf; break; + case FCVTMS_ws: + case FCVTMS_xs: + case FCVTMS_wd: + case FCVTMS_xd: mnemonic = "fcvtms"; form = form_rf; break; + case FCVTMU_ws: + case FCVTMU_xs: + case FCVTMU_wd: + case FCVTMU_xd: mnemonic = "fcvtmu"; form = form_rf; break; + case FCVTNS_ws: + case FCVTNS_xs: + case FCVTNS_wd: + case FCVTNS_xd: mnemonic = "fcvtns"; form = form_rf; break; + case FCVTNU_ws: + case FCVTNU_xs: + case FCVTNU_wd: + case FCVTNU_xd: mnemonic = "fcvtnu"; form = form_rf; break; + case FCVTZU_xd: + case FCVTZU_ws: + case FCVTZU_wd: + case FCVTZU_xs: mnemonic = "fcvtzu"; form = form_rf; break; + case FCVTZS_xd: + case FCVTZS_wd: + case FCVTZS_xs: + case FCVTZS_ws: mnemonic = "fcvtzs"; form = form_rf; break; + case FCVTPU_xd: + case FCVTPU_ws: + case FCVTPU_wd: + case FCVTPU_xs: mnemonic = "fcvtpu"; form = form_rf; break; + case FCVTPS_xd: + case FCVTPS_wd: + case FCVTPS_xs: + case FCVTPS_ws: mnemonic = "fcvtps"; form = form_rf; break; + case SCVTF_sw: + case SCVTF_sx: + case SCVTF_dw: + case SCVTF_dx: mnemonic = "scvtf"; form = form_fr; break; + case UCVTF_sw: + case UCVTF_sx: + case UCVTF_dw: + case UCVTF_dx: mnemonic = "ucvtf"; form = form_fr; break; + case FJCVTZS: mnemonic = "fjcvtzs"; form = form_rf; break; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitFPFixedPointConvert(const Instruction* instr) { + const char *mnemonic = ""; + const char *form = "'Rd, 'Fn, 'IFPFBits"; + const char *form_fr = "'Fd, 'Rn, 'IFPFBits"; + + switch (instr->Mask(FPFixedPointConvertMask)) { + case FCVTZS_ws_fixed: + case FCVTZS_xs_fixed: + case FCVTZS_wd_fixed: + case FCVTZS_xd_fixed: mnemonic = "fcvtzs"; break; + case FCVTZU_ws_fixed: + case FCVTZU_xs_fixed: + case FCVTZU_wd_fixed: + case FCVTZU_xd_fixed: mnemonic = "fcvtzu"; break; + case SCVTF_sw_fixed: + case SCVTF_sx_fixed: + case SCVTF_dw_fixed: + case SCVTF_dx_fixed: mnemonic = "scvtf"; form = form_fr; break; + case UCVTF_sw_fixed: + case UCVTF_sx_fixed: + case UCVTF_dw_fixed: + case UCVTF_dx_fixed: mnemonic = "ucvtf"; form = form_fr; break; + default: VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitSystem(const Instruction* instr) { + // Some system instructions hijack their Op and Cp fields to represent a + // range of immediates instead of indicating a different instruction. This + // makes the decoding tricky. + const char *mnemonic = "unimplemented"; + const char *form = "(System)"; + + if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) { + switch (instr->Mask(SystemExclusiveMonitorMask)) { + case CLREX: { + mnemonic = "clrex"; + form = (instr->CRm() == 0xf) ? NULL : "'IX"; + break; + } + } + } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) { + switch (instr->Mask(SystemSysRegMask)) { + case MRS: { + mnemonic = "mrs"; + switch (instr->ImmSystemRegister()) { + case NZCV: form = "'Xt, nzcv"; break; + case FPCR: form = "'Xt, fpcr"; break; + default: form = "'Xt, (unknown)"; break; + } + break; + } + case MSR: { + mnemonic = "msr"; + switch (instr->ImmSystemRegister()) { + case NZCV: form = "nzcv, 'Xt"; break; + case FPCR: form = "fpcr, 'Xt"; break; + default: form = "(unknown), 'Xt"; break; + } + break; + } + } + } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) { + switch (instr->ImmHint()) { + case NOP: { + mnemonic = "nop"; + form = NULL; + break; + } + case CSDB: { + mnemonic = "csdb"; + form = NULL; + break; + } + } + } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) { + switch (instr->Mask(MemBarrierMask)) { + case DMB: { + mnemonic = "dmb"; + form = "'M"; + break; + } + case DSB: { + mnemonic = "dsb"; + form = "'M"; + break; + } + case ISB: { + mnemonic = "isb"; + form = NULL; + break; + } + } + } else if (instr->Mask(SystemSysFMask) == SystemSysFixed) { + switch (instr->SysOp()) { + case IVAU: + mnemonic = "ic"; + form = "ivau, 'Xt"; + break; + case CVAC: + mnemonic = "dc"; + form = "cvac, 'Xt"; + break; + case CVAU: + mnemonic = "dc"; + form = "cvau, 'Xt"; + break; + case CIVAC: + mnemonic = "dc"; + form = "civac, 'Xt"; + break; + case ZVA: + mnemonic = "dc"; + form = "zva, 'Xt"; + break; + default: + mnemonic = "sys"; + if (instr->Rt() == 31) { + form = "'G1, 'Kn, 'Km, 'G2"; + } else { + form = "'G1, 'Kn, 'Km, 'G2, 'Xt"; + } + break; + } + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitException(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'IDebug"; + + switch (instr->Mask(ExceptionMask)) { + case HLT: mnemonic = "hlt"; break; + case BRK: mnemonic = "brk"; break; + case SVC: mnemonic = "svc"; break; + case HVC: mnemonic = "hvc"; break; + case SMC: mnemonic = "smc"; break; + case DCPS1: mnemonic = "dcps1"; form = "{'IDebug}"; break; + case DCPS2: mnemonic = "dcps2"; form = "{'IDebug}"; break; + case DCPS3: mnemonic = "dcps3"; form = "{'IDebug}"; break; + default: form = "(Exception)"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitCrypto2RegSHA(const Instruction* instr) { + VisitUnimplemented(instr); +} + + +void Disassembler::VisitCrypto3RegSHA(const Instruction* instr) { + VisitUnimplemented(instr); +} + + +void Disassembler::VisitCryptoAES(const Instruction* instr) { + VisitUnimplemented(instr); +} + + +void Disassembler::VisitNEON2RegMisc(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Vd.%s, 'Vn.%s"; + const char *form_cmp_zero = "'Vd.%s, 'Vn.%s, #0"; + const char *form_fcmp_zero = "'Vd.%s, 'Vn.%s, #0.0"; + NEONFormatDecoder nfd(instr); + + static const NEONFormatMap map_lp_ta = { + {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D} + }; + + static const NEONFormatMap map_cvt_ta = { + {22}, {NF_4S, NF_2D} + }; + + static const NEONFormatMap map_cvt_tb = { + {22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S} + }; + + if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) { + // These instructions all use a two bit size field, except NOT and RBIT, + // which use the field to encode the operation. + switch (instr->Mask(NEON2RegMiscMask)) { + case NEON_REV64: mnemonic = "rev64"; break; + case NEON_REV32: mnemonic = "rev32"; break; + case NEON_REV16: mnemonic = "rev16"; break; + case NEON_SADDLP: + mnemonic = "saddlp"; + nfd.SetFormatMap(0, &map_lp_ta); + break; + case NEON_UADDLP: + mnemonic = "uaddlp"; + nfd.SetFormatMap(0, &map_lp_ta); + break; + case NEON_SUQADD: mnemonic = "suqadd"; break; + case NEON_USQADD: mnemonic = "usqadd"; break; + case NEON_CLS: mnemonic = "cls"; break; + case NEON_CLZ: mnemonic = "clz"; break; + case NEON_CNT: mnemonic = "cnt"; break; + case NEON_SADALP: + mnemonic = "sadalp"; + nfd.SetFormatMap(0, &map_lp_ta); + break; + case NEON_UADALP: + mnemonic = "uadalp"; + nfd.SetFormatMap(0, &map_lp_ta); + break; + case NEON_SQABS: mnemonic = "sqabs"; break; + case NEON_SQNEG: mnemonic = "sqneg"; break; + case NEON_CMGT_zero: mnemonic = "cmgt"; form = form_cmp_zero; break; + case NEON_CMGE_zero: mnemonic = "cmge"; form = form_cmp_zero; break; + case NEON_CMEQ_zero: mnemonic = "cmeq"; form = form_cmp_zero; break; + case NEON_CMLE_zero: mnemonic = "cmle"; form = form_cmp_zero; break; + case NEON_CMLT_zero: mnemonic = "cmlt"; form = form_cmp_zero; break; + case NEON_ABS: mnemonic = "abs"; break; + case NEON_NEG: mnemonic = "neg"; break; + case NEON_RBIT_NOT: + switch (instr->FPType()) { + case 0: mnemonic = "mvn"; break; + case 1: mnemonic = "rbit"; break; + default: form = "(NEON2RegMisc)"; + } + nfd.SetFormatMaps(nfd.LogicalFormatMap()); + break; + } + } else { + // These instructions all use a one bit size field, except XTN, SQXTUN, + // SHLL, SQXTN and UQXTN, which use a two bit size field. + nfd.SetFormatMaps(nfd.FPFormatMap()); + switch (instr->Mask(NEON2RegMiscFPMask)) { + case NEON_FABS: mnemonic = "fabs"; break; + case NEON_FNEG: mnemonic = "fneg"; break; + case NEON_FCVTN: + mnemonic = instr->Mask(NEON_Q) ? "fcvtn2" : "fcvtn"; + nfd.SetFormatMap(0, &map_cvt_tb); + nfd.SetFormatMap(1, &map_cvt_ta); + break; + case NEON_FCVTXN: + mnemonic = instr->Mask(NEON_Q) ? "fcvtxn2" : "fcvtxn"; + nfd.SetFormatMap(0, &map_cvt_tb); + nfd.SetFormatMap(1, &map_cvt_ta); + break; + case NEON_FCVTL: + mnemonic = instr->Mask(NEON_Q) ? "fcvtl2" : "fcvtl"; + nfd.SetFormatMap(0, &map_cvt_ta); + nfd.SetFormatMap(1, &map_cvt_tb); + break; + case NEON_FRINTN: mnemonic = "frintn"; break; + case NEON_FRINTA: mnemonic = "frinta"; break; + case NEON_FRINTP: mnemonic = "frintp"; break; + case NEON_FRINTM: mnemonic = "frintm"; break; + case NEON_FRINTX: mnemonic = "frintx"; break; + case NEON_FRINTZ: mnemonic = "frintz"; break; + case NEON_FRINTI: mnemonic = "frinti"; break; + case NEON_FCVTNS: mnemonic = "fcvtns"; break; + case NEON_FCVTNU: mnemonic = "fcvtnu"; break; + case NEON_FCVTPS: mnemonic = "fcvtps"; break; + case NEON_FCVTPU: mnemonic = "fcvtpu"; break; + case NEON_FCVTMS: mnemonic = "fcvtms"; break; + case NEON_FCVTMU: mnemonic = "fcvtmu"; break; + case NEON_FCVTZS: mnemonic = "fcvtzs"; break; + case NEON_FCVTZU: mnemonic = "fcvtzu"; break; + case NEON_FCVTAS: mnemonic = "fcvtas"; break; + case NEON_FCVTAU: mnemonic = "fcvtau"; break; + case NEON_FSQRT: mnemonic = "fsqrt"; break; + case NEON_SCVTF: mnemonic = "scvtf"; break; + case NEON_UCVTF: mnemonic = "ucvtf"; break; + case NEON_URSQRTE: mnemonic = "ursqrte"; break; + case NEON_URECPE: mnemonic = "urecpe"; break; + case NEON_FRSQRTE: mnemonic = "frsqrte"; break; + case NEON_FRECPE: mnemonic = "frecpe"; break; + case NEON_FCMGT_zero: mnemonic = "fcmgt"; form = form_fcmp_zero; break; + case NEON_FCMGE_zero: mnemonic = "fcmge"; form = form_fcmp_zero; break; + case NEON_FCMEQ_zero: mnemonic = "fcmeq"; form = form_fcmp_zero; break; + case NEON_FCMLE_zero: mnemonic = "fcmle"; form = form_fcmp_zero; break; + case NEON_FCMLT_zero: mnemonic = "fcmlt"; form = form_fcmp_zero; break; + default: + if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) && + (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) { + nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); + + switch (instr->Mask(NEON2RegMiscMask)) { + case NEON_XTN: mnemonic = "xtn"; break; + case NEON_SQXTN: mnemonic = "sqxtn"; break; + case NEON_UQXTN: mnemonic = "uqxtn"; break; + case NEON_SQXTUN: mnemonic = "sqxtun"; break; + case NEON_SHLL: + mnemonic = "shll"; + nfd.SetFormatMap(0, nfd.LongIntegerFormatMap()); + nfd.SetFormatMap(1, nfd.IntegerFormatMap()); + switch (instr->NEONSize()) { + case 0: form = "'Vd.%s, 'Vn.%s, #8"; break; + case 1: form = "'Vd.%s, 'Vn.%s, #16"; break; + case 2: form = "'Vd.%s, 'Vn.%s, #32"; break; + default: form = "(NEON2RegMisc)"; + } + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); + return; + } else { + form = "(NEON2RegMisc)"; + } + } + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEON3Same(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + NEONFormatDecoder nfd(instr); + + if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) { + switch (instr->Mask(NEON3SameLogicalMask)) { + case NEON_AND: mnemonic = "and"; break; + case NEON_ORR: + mnemonic = "orr"; + if (instr->Rm() == instr->Rn()) { + mnemonic = "mov"; + form = "'Vd.%s, 'Vn.%s"; + } + break; + case NEON_ORN: mnemonic = "orn"; break; + case NEON_EOR: mnemonic = "eor"; break; + case NEON_BIC: mnemonic = "bic"; break; + case NEON_BIF: mnemonic = "bif"; break; + case NEON_BIT: mnemonic = "bit"; break; + case NEON_BSL: mnemonic = "bsl"; break; + default: form = "(NEON3Same)"; + } + nfd.SetFormatMaps(nfd.LogicalFormatMap()); + } else { + static const char *mnemonics[] = { + "shadd", "uhadd", "shadd", "uhadd", + "sqadd", "uqadd", "sqadd", "uqadd", + "srhadd", "urhadd", "srhadd", "urhadd", + NULL, NULL, NULL, NULL, // Handled by logical cases above. + "shsub", "uhsub", "shsub", "uhsub", + "sqsub", "uqsub", "sqsub", "uqsub", + "cmgt", "cmhi", "cmgt", "cmhi", + "cmge", "cmhs", "cmge", "cmhs", + "sshl", "ushl", "sshl", "ushl", + "sqshl", "uqshl", "sqshl", "uqshl", + "srshl", "urshl", "srshl", "urshl", + "sqrshl", "uqrshl", "sqrshl", "uqrshl", + "smax", "umax", "smax", "umax", + "smin", "umin", "smin", "umin", + "sabd", "uabd", "sabd", "uabd", + "saba", "uaba", "saba", "uaba", + "add", "sub", "add", "sub", + "cmtst", "cmeq", "cmtst", "cmeq", + "mla", "mls", "mla", "mls", + "mul", "pmul", "mul", "pmul", + "smaxp", "umaxp", "smaxp", "umaxp", + "sminp", "uminp", "sminp", "uminp", + "sqdmulh", "sqrdmulh", "sqdmulh", "sqrdmulh", + "addp", "unallocated", "addp", "unallocated", + "fmaxnm", "fmaxnmp", "fminnm", "fminnmp", + "fmla", "unallocated", "fmls", "unallocated", + "fadd", "faddp", "fsub", "fabd", + "fmulx", "fmul", "unallocated", "unallocated", + "fcmeq", "fcmge", "unallocated", "fcmgt", + "unallocated", "facge", "unallocated", "facgt", + "fmax", "fmaxp", "fmin", "fminp", + "frecps", "fdiv", "frsqrts", "unallocated"}; + + // Operation is determined by the opcode bits (15-11), the top bit of + // size (23) and the U bit (29). + unsigned index = (instr->Bits(15, 11) << 2) | (instr->Bit(23) << 1) | + instr->Bit(29); + VIXL_ASSERT(index < (sizeof(mnemonics) / sizeof(mnemonics[0]))); + mnemonic = mnemonics[index]; + // Assert that index is not one of the previously handled logical + // instructions. + VIXL_ASSERT(mnemonic != NULL); + + if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) { + nfd.SetFormatMaps(nfd.FPFormatMap()); + } + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEON3Different(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + + NEONFormatDecoder nfd(instr); + nfd.SetFormatMap(0, nfd.LongIntegerFormatMap()); + + // Ignore the Q bit. Appending a "2" suffix is handled later. + switch (instr->Mask(NEON3DifferentMask) & ~NEON_Q) { + case NEON_PMULL: mnemonic = "pmull"; break; + case NEON_SABAL: mnemonic = "sabal"; break; + case NEON_SABDL: mnemonic = "sabdl"; break; + case NEON_SADDL: mnemonic = "saddl"; break; + case NEON_SMLAL: mnemonic = "smlal"; break; + case NEON_SMLSL: mnemonic = "smlsl"; break; + case NEON_SMULL: mnemonic = "smull"; break; + case NEON_SSUBL: mnemonic = "ssubl"; break; + case NEON_SQDMLAL: mnemonic = "sqdmlal"; break; + case NEON_SQDMLSL: mnemonic = "sqdmlsl"; break; + case NEON_SQDMULL: mnemonic = "sqdmull"; break; + case NEON_UABAL: mnemonic = "uabal"; break; + case NEON_UABDL: mnemonic = "uabdl"; break; + case NEON_UADDL: mnemonic = "uaddl"; break; + case NEON_UMLAL: mnemonic = "umlal"; break; + case NEON_UMLSL: mnemonic = "umlsl"; break; + case NEON_UMULL: mnemonic = "umull"; break; + case NEON_USUBL: mnemonic = "usubl"; break; + case NEON_SADDW: + mnemonic = "saddw"; + nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); + break; + case NEON_SSUBW: + mnemonic = "ssubw"; + nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); + break; + case NEON_UADDW: + mnemonic = "uaddw"; + nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); + break; + case NEON_USUBW: + mnemonic = "usubw"; + nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); + break; + case NEON_ADDHN: + mnemonic = "addhn"; + nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); + nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + break; + case NEON_RADDHN: + mnemonic = "raddhn"; + nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); + nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + break; + case NEON_RSUBHN: + mnemonic = "rsubhn"; + nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); + nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + break; + case NEON_SUBHN: + mnemonic = "subhn"; + nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); + nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + break; + default: form = "(NEON3Different)"; + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONAcrossLanes(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "%sd, 'Vn.%s"; + + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap(), + NEONFormatDecoder::IntegerFormatMap()); + + if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) { + nfd.SetFormatMap(0, nfd.FPScalarFormatMap()); + nfd.SetFormatMap(1, nfd.FPFormatMap()); + switch (instr->Mask(NEONAcrossLanesFPMask)) { + case NEON_FMAXV: mnemonic = "fmaxv"; break; + case NEON_FMINV: mnemonic = "fminv"; break; + case NEON_FMAXNMV: mnemonic = "fmaxnmv"; break; + case NEON_FMINNMV: mnemonic = "fminnmv"; break; + default: form = "(NEONAcrossLanes)"; break; + } + } else if (instr->Mask(NEONAcrossLanesFMask) == NEONAcrossLanesFixed) { + switch (instr->Mask(NEONAcrossLanesMask)) { + case NEON_ADDV: mnemonic = "addv"; break; + case NEON_SMAXV: mnemonic = "smaxv"; break; + case NEON_SMINV: mnemonic = "sminv"; break; + case NEON_UMAXV: mnemonic = "umaxv"; break; + case NEON_UMINV: mnemonic = "uminv"; break; + case NEON_SADDLV: + mnemonic = "saddlv"; + nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); + break; + case NEON_UADDLV: + mnemonic = "uaddlv"; + nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); + break; + default: form = "(NEONAcrossLanes)"; break; + } + } + Format(instr, mnemonic, nfd.Substitute(form, + NEONFormatDecoder::kPlaceholder, NEONFormatDecoder::kFormat)); +} + + +void Disassembler::VisitNEONByIndexedElement(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + bool l_instr = false; + bool fp_instr = false; + + const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]"; + + static const NEONFormatMap map_ta = { + {23, 22}, {NF_UNDEF, NF_4S, NF_2D} + }; + NEONFormatDecoder nfd(instr, &map_ta, + NEONFormatDecoder::IntegerFormatMap(), + NEONFormatDecoder::ScalarFormatMap()); + + switch (instr->Mask(NEONByIndexedElementMask)) { + case NEON_SMULL_byelement: mnemonic = "smull"; l_instr = true; break; + case NEON_UMULL_byelement: mnemonic = "umull"; l_instr = true; break; + case NEON_SMLAL_byelement: mnemonic = "smlal"; l_instr = true; break; + case NEON_UMLAL_byelement: mnemonic = "umlal"; l_instr = true; break; + case NEON_SMLSL_byelement: mnemonic = "smlsl"; l_instr = true; break; + case NEON_UMLSL_byelement: mnemonic = "umlsl"; l_instr = true; break; + case NEON_SQDMULL_byelement: mnemonic = "sqdmull"; l_instr = true; break; + case NEON_SQDMLAL_byelement: mnemonic = "sqdmlal"; l_instr = true; break; + case NEON_SQDMLSL_byelement: mnemonic = "sqdmlsl"; l_instr = true; break; + case NEON_MUL_byelement: mnemonic = "mul"; break; + case NEON_MLA_byelement: mnemonic = "mla"; break; + case NEON_MLS_byelement: mnemonic = "mls"; break; + case NEON_SQDMULH_byelement: mnemonic = "sqdmulh"; break; + case NEON_SQRDMULH_byelement: mnemonic = "sqrdmulh"; break; + default: + switch (instr->Mask(NEONByIndexedElementFPMask)) { + case NEON_FMUL_byelement: mnemonic = "fmul"; fp_instr = true; break; + case NEON_FMLA_byelement: mnemonic = "fmla"; fp_instr = true; break; + case NEON_FMLS_byelement: mnemonic = "fmls"; fp_instr = true; break; + case NEON_FMULX_byelement: mnemonic = "fmulx"; fp_instr = true; break; + } + } + + if (l_instr) { + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); + } else if (fp_instr) { + nfd.SetFormatMap(0, nfd.FPFormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); + } else { + nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); + } +} + + +void Disassembler::VisitNEONCopy(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONCopy)"; + + NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap(), + NEONFormatDecoder::TriangularScalarFormatMap()); + + if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) { + mnemonic = "mov"; + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + form = "'Vd.%s['IVInsIndex1], 'Vn.%s['IVInsIndex2]"; + } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) { + mnemonic = "mov"; + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + if (nfd.GetVectorFormat() == kFormatD) { + form = "'Vd.%s['IVInsIndex1], 'Xn"; + } else { + form = "'Vd.%s['IVInsIndex1], 'Wn"; + } + } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) { + if (instr->Mask(NEON_Q) || ((instr->ImmNEON5() & 7) == 4)) { + mnemonic = "mov"; + } else { + mnemonic = "umov"; + } + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + if (nfd.GetVectorFormat() == kFormatD) { + form = "'Xd, 'Vn.%s['IVInsIndex1]"; + } else { + form = "'Wd, 'Vn.%s['IVInsIndex1]"; + } + } else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) { + mnemonic = "smov"; + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + form = "'Rdq, 'Vn.%s['IVInsIndex1]"; + } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) { + mnemonic = "dup"; + form = "'Vd.%s, 'Vn.%s['IVInsIndex1]"; + } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) { + mnemonic = "dup"; + if (nfd.GetVectorFormat() == kFormat2D) { + form = "'Vd.%s, 'Xn"; + } else { + form = "'Vd.%s, 'Wn"; + } + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONExtract(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONExtract)"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + if (instr->Mask(NEONExtractMask) == NEON_EXT) { + mnemonic = "ext"; + form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVExtract"; + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONLoadStoreMultiStruct(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONLoadStoreMultiStruct)"; + const char *form_1v = "{'Vt.%1$s}, ['Xns]"; + const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns]"; + const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns]"; + const char *form_4v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + + switch (instr->Mask(NEONLoadStoreMultiStructMask)) { + case NEON_LD1_1v: mnemonic = "ld1"; form = form_1v; break; + case NEON_LD1_2v: mnemonic = "ld1"; form = form_2v; break; + case NEON_LD1_3v: mnemonic = "ld1"; form = form_3v; break; + case NEON_LD1_4v: mnemonic = "ld1"; form = form_4v; break; + case NEON_LD2: mnemonic = "ld2"; form = form_2v; break; + case NEON_LD3: mnemonic = "ld3"; form = form_3v; break; + case NEON_LD4: mnemonic = "ld4"; form = form_4v; break; + case NEON_ST1_1v: mnemonic = "st1"; form = form_1v; break; + case NEON_ST1_2v: mnemonic = "st1"; form = form_2v; break; + case NEON_ST1_3v: mnemonic = "st1"; form = form_3v; break; + case NEON_ST1_4v: mnemonic = "st1"; form = form_4v; break; + case NEON_ST2: mnemonic = "st2"; form = form_2v; break; + case NEON_ST3: mnemonic = "st3"; form = form_3v; break; + case NEON_ST4: mnemonic = "st4"; form = form_4v; break; + default: break; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONLoadStoreMultiStructPostIndex( + const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONLoadStoreMultiStructPostIndex)"; + const char *form_1v = "{'Vt.%1$s}, ['Xns], 'Xmr1"; + const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns], 'Xmr2"; + const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns], 'Xmr3"; + const char *form_4v = + "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmr4"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + + switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) { + case NEON_LD1_1v_post: mnemonic = "ld1"; form = form_1v; break; + case NEON_LD1_2v_post: mnemonic = "ld1"; form = form_2v; break; + case NEON_LD1_3v_post: mnemonic = "ld1"; form = form_3v; break; + case NEON_LD1_4v_post: mnemonic = "ld1"; form = form_4v; break; + case NEON_LD2_post: mnemonic = "ld2"; form = form_2v; break; + case NEON_LD3_post: mnemonic = "ld3"; form = form_3v; break; + case NEON_LD4_post: mnemonic = "ld4"; form = form_4v; break; + case NEON_ST1_1v_post: mnemonic = "st1"; form = form_1v; break; + case NEON_ST1_2v_post: mnemonic = "st1"; form = form_2v; break; + case NEON_ST1_3v_post: mnemonic = "st1"; form = form_3v; break; + case NEON_ST1_4v_post: mnemonic = "st1"; form = form_4v; break; + case NEON_ST2_post: mnemonic = "st2"; form = form_2v; break; + case NEON_ST3_post: mnemonic = "st3"; form = form_3v; break; + case NEON_ST4_post: mnemonic = "st4"; form = form_4v; break; + default: break; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONLoadStoreSingleStruct(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONLoadStoreSingleStruct)"; + + const char *form_1b = "{'Vt.b}['IVLSLane0], ['Xns]"; + const char *form_1h = "{'Vt.h}['IVLSLane1], ['Xns]"; + const char *form_1s = "{'Vt.s}['IVLSLane2], ['Xns]"; + const char *form_1d = "{'Vt.d}['IVLSLane3], ['Xns]"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + + switch (instr->Mask(NEONLoadStoreSingleStructMask)) { + case NEON_LD1_b: mnemonic = "ld1"; form = form_1b; break; + case NEON_LD1_h: mnemonic = "ld1"; form = form_1h; break; + case NEON_LD1_s: + mnemonic = "ld1"; + VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d); + form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d; + break; + case NEON_ST1_b: mnemonic = "st1"; form = form_1b; break; + case NEON_ST1_h: mnemonic = "st1"; form = form_1h; break; + case NEON_ST1_s: + mnemonic = "st1"; + VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d); + form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d; + break; + case NEON_LD1R: + mnemonic = "ld1r"; + form = "{'Vt.%s}, ['Xns]"; + break; + case NEON_LD2_b: + case NEON_ST2_b: + mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2"; + form = "{'Vt.b, 'Vt2.b}['IVLSLane0], ['Xns]"; + break; + case NEON_LD2_h: + case NEON_ST2_h: + mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2"; + form = "{'Vt.h, 'Vt2.h}['IVLSLane1], ['Xns]"; + break; + case NEON_LD2_s: + case NEON_ST2_s: + VIXL_STATIC_ASSERT((NEON_ST2_s | (1 << NEONLSSize_offset)) == NEON_ST2_d); + VIXL_STATIC_ASSERT((NEON_LD2_s | (1 << NEONLSSize_offset)) == NEON_LD2_d); + mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2"; + if ((instr->NEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s}['IVLSLane2], ['Xns]"; + else + form = "{'Vt.d, 'Vt2.d}['IVLSLane3], ['Xns]"; + break; + case NEON_LD2R: + mnemonic = "ld2r"; + form = "{'Vt.%s, 'Vt2.%s}, ['Xns]"; + break; + case NEON_LD3_b: + case NEON_ST3_b: + mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3"; + form = "{'Vt.b, 'Vt2.b, 'Vt3.b}['IVLSLane0], ['Xns]"; + break; + case NEON_LD3_h: + case NEON_ST3_h: + mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3"; + form = "{'Vt.h, 'Vt2.h, 'Vt3.h}['IVLSLane1], ['Xns]"; + break; + case NEON_LD3_s: + case NEON_ST3_s: + mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3"; + if ((instr->NEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s, 'Vt3.s}['IVLSLane2], ['Xns]"; + else + form = "{'Vt.d, 'Vt2.d, 'Vt3.d}['IVLSLane3], ['Xns]"; + break; + case NEON_LD3R: + mnemonic = "ld3r"; + form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns]"; + break; + case NEON_LD4_b: + case NEON_ST4_b: + mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4"; + form = "{'Vt.b, 'Vt2.b, 'Vt3.b, 'Vt4.b}['IVLSLane0], ['Xns]"; + break; + case NEON_LD4_h: + case NEON_ST4_h: + mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4"; + form = "{'Vt.h, 'Vt2.h, 'Vt3.h, 'Vt4.h}['IVLSLane1], ['Xns]"; + break; + case NEON_LD4_s: + case NEON_ST4_s: + VIXL_STATIC_ASSERT((NEON_LD4_s | (1 << NEONLSSize_offset)) == NEON_LD4_d); + VIXL_STATIC_ASSERT((NEON_ST4_s | (1 << NEONLSSize_offset)) == NEON_ST4_d); + mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4"; + if ((instr->NEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s, 'Vt3.s, 'Vt4.s}['IVLSLane2], ['Xns]"; + else + form = "{'Vt.d, 'Vt2.d, 'Vt3.d, 'Vt4.d}['IVLSLane3], ['Xns]"; + break; + case NEON_LD4R: + mnemonic = "ld4r"; + form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]"; + break; + default: break; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONLoadStoreSingleStructPostIndex( + const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONLoadStoreSingleStructPostIndex)"; + + const char *form_1b = "{'Vt.b}['IVLSLane0], ['Xns], 'Xmb1"; + const char *form_1h = "{'Vt.h}['IVLSLane1], ['Xns], 'Xmb2"; + const char *form_1s = "{'Vt.s}['IVLSLane2], ['Xns], 'Xmb4"; + const char *form_1d = "{'Vt.d}['IVLSLane3], ['Xns], 'Xmb8"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + + switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) { + case NEON_LD1_b_post: mnemonic = "ld1"; form = form_1b; break; + case NEON_LD1_h_post: mnemonic = "ld1"; form = form_1h; break; + case NEON_LD1_s_post: + mnemonic = "ld1"; + VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d); + form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d; + break; + case NEON_ST1_b_post: mnemonic = "st1"; form = form_1b; break; + case NEON_ST1_h_post: mnemonic = "st1"; form = form_1h; break; + case NEON_ST1_s_post: + mnemonic = "st1"; + VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d); + form = ((instr->NEONLSSize() & 1) == 0) ? form_1s : form_1d; + break; + case NEON_LD1R_post: + mnemonic = "ld1r"; + form = "{'Vt.%s}, ['Xns], 'Xmz1"; + break; + case NEON_LD2_b_post: + case NEON_ST2_b_post: + mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2"; + form = "{'Vt.b, 'Vt2.b}['IVLSLane0], ['Xns], 'Xmb2"; + break; + case NEON_ST2_h_post: + case NEON_LD2_h_post: + mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2"; + form = "{'Vt.h, 'Vt2.h}['IVLSLane1], ['Xns], 'Xmb4"; + break; + case NEON_LD2_s_post: + case NEON_ST2_s_post: + mnemonic = (instr->LdStXLoad() == 1) ? "ld2" : "st2"; + if ((instr->NEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s}['IVLSLane2], ['Xns], 'Xmb8"; + else + form = "{'Vt.d, 'Vt2.d}['IVLSLane3], ['Xns], 'Xmb16"; + break; + case NEON_LD2R_post: + mnemonic = "ld2r"; + form = "{'Vt.%s, 'Vt2.%s}, ['Xns], 'Xmz2"; + break; + case NEON_LD3_b_post: + case NEON_ST3_b_post: + mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3"; + form = "{'Vt.b, 'Vt2.b, 'Vt3.b}['IVLSLane0], ['Xns], 'Xmb3"; + break; + case NEON_LD3_h_post: + case NEON_ST3_h_post: + mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3"; + form = "{'Vt.h, 'Vt2.h, 'Vt3.h}['IVLSLane1], ['Xns], 'Xmb6"; + break; + case NEON_LD3_s_post: + case NEON_ST3_s_post: + mnemonic = (instr->LdStXLoad() == 1) ? "ld3" : "st3"; + if ((instr->NEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s, 'Vt3.s}['IVLSLane2], ['Xns], 'Xmb12"; + else + form = "{'Vt.d, 'Vt2.d, 'Vt3.d}['IVLSLane3], ['Xns], 'Xmr3"; + break; + case NEON_LD3R_post: + mnemonic = "ld3r"; + form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns], 'Xmz3"; + break; + case NEON_LD4_b_post: + case NEON_ST4_b_post: + mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4"; + form = "{'Vt.b, 'Vt2.b, 'Vt3.b, 'Vt4.b}['IVLSLane0], ['Xns], 'Xmb4"; + break; + case NEON_LD4_h_post: + case NEON_ST4_h_post: + mnemonic = (instr->LdStXLoad()) == 1 ? "ld4" : "st4"; + form = "{'Vt.h, 'Vt2.h, 'Vt3.h, 'Vt4.h}['IVLSLane1], ['Xns], 'Xmb8"; + break; + case NEON_LD4_s_post: + case NEON_ST4_s_post: + mnemonic = (instr->LdStXLoad() == 1) ? "ld4" : "st4"; + if ((instr->NEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s, 'Vt3.s, 'Vt4.s}['IVLSLane2], ['Xns], 'Xmb16"; + else + form = "{'Vt.d, 'Vt2.d, 'Vt3.d, 'Vt4.d}['IVLSLane3], ['Xns], 'Xmb32"; + break; + case NEON_LD4R_post: + mnemonic = "ld4r"; + form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmz4"; + break; + default: break; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONModifiedImmediate(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Vt.%s, 'IVMIImm8, lsl 'IVMIShiftAmt1"; + + int cmode = instr->NEONCmode(); + int cmode_3 = (cmode >> 3) & 1; + int cmode_2 = (cmode >> 2) & 1; + int cmode_1 = (cmode >> 1) & 1; + int cmode_0 = cmode & 1; + int q = instr->NEONQ(); + int op = instr->NEONModImmOp(); + + static const NEONFormatMap map_b = { {30}, {NF_8B, NF_16B} }; + static const NEONFormatMap map_h = { {30}, {NF_4H, NF_8H} }; + static const NEONFormatMap map_s = { {30}, {NF_2S, NF_4S} }; + NEONFormatDecoder nfd(instr, &map_b); + + if (cmode_3 == 0) { + if (cmode_0 == 0) { + mnemonic = (op == 1) ? "mvni" : "movi"; + } else { // cmode<0> == '1'. + mnemonic = (op == 1) ? "bic" : "orr"; + } + nfd.SetFormatMap(0, &map_s); + } else { // cmode<3> == '1'. + if (cmode_2 == 0) { + if (cmode_0 == 0) { + mnemonic = (op == 1) ? "mvni" : "movi"; + } else { // cmode<0> == '1'. + mnemonic = (op == 1) ? "bic" : "orr"; + } + nfd.SetFormatMap(0, &map_h); + } else { // cmode<2> == '1'. + if (cmode_1 == 0) { + mnemonic = (op == 1) ? "mvni" : "movi"; + form = "'Vt.%s, 'IVMIImm8, msl 'IVMIShiftAmt2"; + nfd.SetFormatMap(0, &map_s); + } else { // cmode<1> == '1'. + if (cmode_0 == 0) { + mnemonic = "movi"; + if (op == 0) { + form = "'Vt.%s, 'IVMIImm8"; + } else { + form = (q == 0) ? "'Dd, 'IVMIImm" : "'Vt.2d, 'IVMIImm"; + } + } else { // cmode<0> == '1' + mnemonic = "fmov"; + if (op == 0) { + form = "'Vt.%s, 'IVMIImmFPSingle"; + nfd.SetFormatMap(0, &map_s); + } else { + if (q == 1) { + form = "'Vt.2d, 'IVMIImmFPDouble"; + } + } + } + } + } + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONScalar2RegMisc(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "%sd, %sn"; + const char *form_0 = "%sd, %sn, #0"; + const char *form_fp0 = "%sd, %sn, #0.0"; + + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + + if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) { + // These instructions all use a two bit size field, except NOT and RBIT, + // which use the field to encode the operation. + switch (instr->Mask(NEONScalar2RegMiscMask)) { + case NEON_CMGT_zero_scalar: mnemonic = "cmgt"; form = form_0; break; + case NEON_CMGE_zero_scalar: mnemonic = "cmge"; form = form_0; break; + case NEON_CMLE_zero_scalar: mnemonic = "cmle"; form = form_0; break; + case NEON_CMLT_zero_scalar: mnemonic = "cmlt"; form = form_0; break; + case NEON_CMEQ_zero_scalar: mnemonic = "cmeq"; form = form_0; break; + case NEON_NEG_scalar: mnemonic = "neg"; break; + case NEON_SQNEG_scalar: mnemonic = "sqneg"; break; + case NEON_ABS_scalar: mnemonic = "abs"; break; + case NEON_SQABS_scalar: mnemonic = "sqabs"; break; + case NEON_SUQADD_scalar: mnemonic = "suqadd"; break; + case NEON_USQADD_scalar: mnemonic = "usqadd"; break; + default: form = "(NEONScalar2RegMisc)"; + } + } else { + // These instructions all use a one bit size field, except SQXTUN, SQXTN + // and UQXTN, which use a two bit size field. + nfd.SetFormatMaps(nfd.FPScalarFormatMap()); + switch (instr->Mask(NEONScalar2RegMiscFPMask)) { + case NEON_FRSQRTE_scalar: mnemonic = "frsqrte"; break; + case NEON_FRECPE_scalar: mnemonic = "frecpe"; break; + case NEON_SCVTF_scalar: mnemonic = "scvtf"; break; + case NEON_UCVTF_scalar: mnemonic = "ucvtf"; break; + case NEON_FCMGT_zero_scalar: mnemonic = "fcmgt"; form = form_fp0; break; + case NEON_FCMGE_zero_scalar: mnemonic = "fcmge"; form = form_fp0; break; + case NEON_FCMLE_zero_scalar: mnemonic = "fcmle"; form = form_fp0; break; + case NEON_FCMLT_zero_scalar: mnemonic = "fcmlt"; form = form_fp0; break; + case NEON_FCMEQ_zero_scalar: mnemonic = "fcmeq"; form = form_fp0; break; + case NEON_FRECPX_scalar: mnemonic = "frecpx"; break; + case NEON_FCVTNS_scalar: mnemonic = "fcvtns"; break; + case NEON_FCVTNU_scalar: mnemonic = "fcvtnu"; break; + case NEON_FCVTPS_scalar: mnemonic = "fcvtps"; break; + case NEON_FCVTPU_scalar: mnemonic = "fcvtpu"; break; + case NEON_FCVTMS_scalar: mnemonic = "fcvtms"; break; + case NEON_FCVTMU_scalar: mnemonic = "fcvtmu"; break; + case NEON_FCVTZS_scalar: mnemonic = "fcvtzs"; break; + case NEON_FCVTZU_scalar: mnemonic = "fcvtzu"; break; + case NEON_FCVTAS_scalar: mnemonic = "fcvtas"; break; + case NEON_FCVTAU_scalar: mnemonic = "fcvtau"; break; + case NEON_FCVTXN_scalar: + nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); + mnemonic = "fcvtxn"; + break; + default: + nfd.SetFormatMap(0, nfd.ScalarFormatMap()); + nfd.SetFormatMap(1, nfd.LongScalarFormatMap()); + switch (instr->Mask(NEONScalar2RegMiscMask)) { + case NEON_SQXTN_scalar: mnemonic = "sqxtn"; break; + case NEON_UQXTN_scalar: mnemonic = "uqxtn"; break; + case NEON_SQXTUN_scalar: mnemonic = "sqxtun"; break; + default: form = "(NEONScalar2RegMisc)"; + } + } + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + + +void Disassembler::VisitNEONScalar3Diff(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "%sd, %sn, %sm"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap(), + NEONFormatDecoder::ScalarFormatMap()); + + switch (instr->Mask(NEONScalar3DiffMask)) { + case NEON_SQDMLAL_scalar : mnemonic = "sqdmlal"; break; + case NEON_SQDMLSL_scalar : mnemonic = "sqdmlsl"; break; + case NEON_SQDMULL_scalar : mnemonic = "sqdmull"; break; + default: form = "(NEONScalar3Diff)"; + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + + +void Disassembler::VisitNEONScalar3Same(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "%sd, %sn, %sm"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + + if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) { + nfd.SetFormatMaps(nfd.FPScalarFormatMap()); + switch (instr->Mask(NEONScalar3SameFPMask)) { + case NEON_FACGE_scalar: mnemonic = "facge"; break; + case NEON_FACGT_scalar: mnemonic = "facgt"; break; + case NEON_FCMEQ_scalar: mnemonic = "fcmeq"; break; + case NEON_FCMGE_scalar: mnemonic = "fcmge"; break; + case NEON_FCMGT_scalar: mnemonic = "fcmgt"; break; + case NEON_FMULX_scalar: mnemonic = "fmulx"; break; + case NEON_FRECPS_scalar: mnemonic = "frecps"; break; + case NEON_FRSQRTS_scalar: mnemonic = "frsqrts"; break; + case NEON_FABD_scalar: mnemonic = "fabd"; break; + default: form = "(NEONScalar3Same)"; + } + } else { + switch (instr->Mask(NEONScalar3SameMask)) { + case NEON_ADD_scalar: mnemonic = "add"; break; + case NEON_SUB_scalar: mnemonic = "sub"; break; + case NEON_CMEQ_scalar: mnemonic = "cmeq"; break; + case NEON_CMGE_scalar: mnemonic = "cmge"; break; + case NEON_CMGT_scalar: mnemonic = "cmgt"; break; + case NEON_CMHI_scalar: mnemonic = "cmhi"; break; + case NEON_CMHS_scalar: mnemonic = "cmhs"; break; + case NEON_CMTST_scalar: mnemonic = "cmtst"; break; + case NEON_UQADD_scalar: mnemonic = "uqadd"; break; + case NEON_SQADD_scalar: mnemonic = "sqadd"; break; + case NEON_UQSUB_scalar: mnemonic = "uqsub"; break; + case NEON_SQSUB_scalar: mnemonic = "sqsub"; break; + case NEON_USHL_scalar: mnemonic = "ushl"; break; + case NEON_SSHL_scalar: mnemonic = "sshl"; break; + case NEON_UQSHL_scalar: mnemonic = "uqshl"; break; + case NEON_SQSHL_scalar: mnemonic = "sqshl"; break; + case NEON_URSHL_scalar: mnemonic = "urshl"; break; + case NEON_SRSHL_scalar: mnemonic = "srshl"; break; + case NEON_UQRSHL_scalar: mnemonic = "uqrshl"; break; + case NEON_SQRSHL_scalar: mnemonic = "sqrshl"; break; + case NEON_SQDMULH_scalar: mnemonic = "sqdmulh"; break; + case NEON_SQRDMULH_scalar: mnemonic = "sqrdmulh"; break; + default: form = "(NEONScalar3Same)"; + } + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + + +void Disassembler::VisitNEONScalarByIndexedElement(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + bool long_instr = false; + + switch (instr->Mask(NEONScalarByIndexedElementMask)) { + case NEON_SQDMULL_byelement_scalar: + mnemonic = "sqdmull"; + long_instr = true; + break; + case NEON_SQDMLAL_byelement_scalar: + mnemonic = "sqdmlal"; + long_instr = true; + break; + case NEON_SQDMLSL_byelement_scalar: + mnemonic = "sqdmlsl"; + long_instr = true; + break; + case NEON_SQDMULH_byelement_scalar: + mnemonic = "sqdmulh"; + break; + case NEON_SQRDMULH_byelement_scalar: + mnemonic = "sqrdmulh"; + break; + default: + nfd.SetFormatMap(0, nfd.FPScalarFormatMap()); + switch (instr->Mask(NEONScalarByIndexedElementFPMask)) { + case NEON_FMUL_byelement_scalar: mnemonic = "fmul"; break; + case NEON_FMLA_byelement_scalar: mnemonic = "fmla"; break; + case NEON_FMLS_byelement_scalar: mnemonic = "fmls"; break; + case NEON_FMULX_byelement_scalar: mnemonic = "fmulx"; break; + default: form = "(NEONScalarByIndexedElement)"; + } + } + + if (long_instr) { + nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); + } + + Format(instr, mnemonic, nfd.Substitute( + form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat)); +} + + +void Disassembler::VisitNEONScalarCopy(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONScalarCopy)"; + + NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap()); + + if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) { + mnemonic = "mov"; + form = "%sd, 'Vn.%s['IVInsIndex1]"; + } + + Format(instr, mnemonic, nfd.Substitute(form, nfd.kPlaceholder, nfd.kFormat)); +} + + +void Disassembler::VisitNEONScalarPairwise(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "%sd, 'Vn.%s"; + NEONFormatMap map = { {22}, {NF_2S, NF_2D} }; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap(), &map); + + switch (instr->Mask(NEONScalarPairwiseMask)) { + case NEON_ADDP_scalar: mnemonic = "addp"; break; + case NEON_FADDP_scalar: mnemonic = "faddp"; break; + case NEON_FMAXP_scalar: mnemonic = "fmaxp"; break; + case NEON_FMAXNMP_scalar: mnemonic = "fmaxnmp"; break; + case NEON_FMINP_scalar: mnemonic = "fminp"; break; + case NEON_FMINNMP_scalar: mnemonic = "fminnmp"; break; + default: form = "(NEONScalarPairwise)"; + } + Format(instr, mnemonic, nfd.Substitute(form, + NEONFormatDecoder::kPlaceholder, NEONFormatDecoder::kFormat)); +} + + +void Disassembler::VisitNEONScalarShiftImmediate(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "%sd, %sn, 'Is1"; + const char *form_2 = "%sd, %sn, 'Is2"; + + static const NEONFormatMap map_shift = { + {22, 21, 20, 19}, + {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S, + NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D} + }; + static const NEONFormatMap map_shift_narrow = { + {21, 20, 19}, + {NF_UNDEF, NF_H, NF_S, NF_S, NF_D, NF_D, NF_D, NF_D} + }; + NEONFormatDecoder nfd(instr, &map_shift); + + if (instr->ImmNEONImmh()) { // immh has to be non-zero. + switch (instr->Mask(NEONScalarShiftImmediateMask)) { + case NEON_FCVTZU_imm_scalar: mnemonic = "fcvtzu"; break; + case NEON_FCVTZS_imm_scalar: mnemonic = "fcvtzs"; break; + case NEON_SCVTF_imm_scalar: mnemonic = "scvtf"; break; + case NEON_UCVTF_imm_scalar: mnemonic = "ucvtf"; break; + case NEON_SRI_scalar: mnemonic = "sri"; break; + case NEON_SSHR_scalar: mnemonic = "sshr"; break; + case NEON_USHR_scalar: mnemonic = "ushr"; break; + case NEON_SRSHR_scalar: mnemonic = "srshr"; break; + case NEON_URSHR_scalar: mnemonic = "urshr"; break; + case NEON_SSRA_scalar: mnemonic = "ssra"; break; + case NEON_USRA_scalar: mnemonic = "usra"; break; + case NEON_SRSRA_scalar: mnemonic = "srsra"; break; + case NEON_URSRA_scalar: mnemonic = "ursra"; break; + case NEON_SHL_scalar: mnemonic = "shl"; form = form_2; break; + case NEON_SLI_scalar: mnemonic = "sli"; form = form_2; break; + case NEON_SQSHLU_scalar: mnemonic = "sqshlu"; form = form_2; break; + case NEON_SQSHL_imm_scalar: mnemonic = "sqshl"; form = form_2; break; + case NEON_UQSHL_imm_scalar: mnemonic = "uqshl"; form = form_2; break; + case NEON_UQSHRN_scalar: + mnemonic = "uqshrn"; + nfd.SetFormatMap(1, &map_shift_narrow); + break; + case NEON_UQRSHRN_scalar: + mnemonic = "uqrshrn"; + nfd.SetFormatMap(1, &map_shift_narrow); + break; + case NEON_SQSHRN_scalar: + mnemonic = "sqshrn"; + nfd.SetFormatMap(1, &map_shift_narrow); + break; + case NEON_SQRSHRN_scalar: + mnemonic = "sqrshrn"; + nfd.SetFormatMap(1, &map_shift_narrow); + break; + case NEON_SQSHRUN_scalar: + mnemonic = "sqshrun"; + nfd.SetFormatMap(1, &map_shift_narrow); + break; + case NEON_SQRSHRUN_scalar: + mnemonic = "sqrshrun"; + nfd.SetFormatMap(1, &map_shift_narrow); + break; + default: + form = "(NEONScalarShiftImmediate)"; + } + } else { + form = "(NEONScalarShiftImmediate)"; + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + + +void Disassembler::VisitNEONShiftImmediate(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Vd.%s, 'Vn.%s, 'Is1"; + const char *form_shift_2 = "'Vd.%s, 'Vn.%s, 'Is2"; + const char *form_xtl = "'Vd.%s, 'Vn.%s"; + + // 0001->8H, 001x->4S, 01xx->2D, all others undefined. + static const NEONFormatMap map_shift_ta = { + {22, 21, 20, 19}, + {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D} + }; + + // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H, + // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined. + static const NEONFormatMap map_shift_tb = { + {22, 21, 20, 19, 30}, + {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_4H, NF_8H, + NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D} + }; + + NEONFormatDecoder nfd(instr, &map_shift_tb); + + if (instr->ImmNEONImmh()) { // immh has to be non-zero. + switch (instr->Mask(NEONShiftImmediateMask)) { + case NEON_SQSHLU: mnemonic = "sqshlu"; form = form_shift_2; break; + case NEON_SQSHL_imm: mnemonic = "sqshl"; form = form_shift_2; break; + case NEON_UQSHL_imm: mnemonic = "uqshl"; form = form_shift_2; break; + case NEON_SHL: mnemonic = "shl"; form = form_shift_2; break; + case NEON_SLI: mnemonic = "sli"; form = form_shift_2; break; + case NEON_SCVTF_imm: mnemonic = "scvtf"; break; + case NEON_UCVTF_imm: mnemonic = "ucvtf"; break; + case NEON_FCVTZU_imm: mnemonic = "fcvtzu"; break; + case NEON_FCVTZS_imm: mnemonic = "fcvtzs"; break; + case NEON_SRI: mnemonic = "sri"; break; + case NEON_SSHR: mnemonic = "sshr"; break; + case NEON_USHR: mnemonic = "ushr"; break; + case NEON_SRSHR: mnemonic = "srshr"; break; + case NEON_URSHR: mnemonic = "urshr"; break; + case NEON_SSRA: mnemonic = "ssra"; break; + case NEON_USRA: mnemonic = "usra"; break; + case NEON_SRSRA: mnemonic = "srsra"; break; + case NEON_URSRA: mnemonic = "ursra"; break; + case NEON_SHRN: + mnemonic = instr->Mask(NEON_Q) ? "shrn2" : "shrn"; + nfd.SetFormatMap(1, &map_shift_ta); + break; + case NEON_RSHRN: + mnemonic = instr->Mask(NEON_Q) ? "rshrn2" : "rshrn"; + nfd.SetFormatMap(1, &map_shift_ta); + break; + case NEON_UQSHRN: + mnemonic = instr->Mask(NEON_Q) ? "uqshrn2" : "uqshrn"; + nfd.SetFormatMap(1, &map_shift_ta); + break; + case NEON_UQRSHRN: + mnemonic = instr->Mask(NEON_Q) ? "uqrshrn2" : "uqrshrn"; + nfd.SetFormatMap(1, &map_shift_ta); + break; + case NEON_SQSHRN: + mnemonic = instr->Mask(NEON_Q) ? "sqshrn2" : "sqshrn"; + nfd.SetFormatMap(1, &map_shift_ta); + break; + case NEON_SQRSHRN: + mnemonic = instr->Mask(NEON_Q) ? "sqrshrn2" : "sqrshrn"; + nfd.SetFormatMap(1, &map_shift_ta); + break; + case NEON_SQSHRUN: + mnemonic = instr->Mask(NEON_Q) ? "sqshrun2" : "sqshrun"; + nfd.SetFormatMap(1, &map_shift_ta); + break; + case NEON_SQRSHRUN: + mnemonic = instr->Mask(NEON_Q) ? "sqrshrun2" : "sqrshrun"; + nfd.SetFormatMap(1, &map_shift_ta); + break; + case NEON_SSHLL: + nfd.SetFormatMap(0, &map_shift_ta); + if (instr->ImmNEONImmb() == 0 && + CountSetBits(instr->ImmNEONImmh(), 32) == 1) { // sxtl variant. + form = form_xtl; + mnemonic = instr->Mask(NEON_Q) ? "sxtl2" : "sxtl"; + } else { // sshll variant. + form = form_shift_2; + mnemonic = instr->Mask(NEON_Q) ? "sshll2" : "sshll"; + } + break; + case NEON_USHLL: + nfd.SetFormatMap(0, &map_shift_ta); + if (instr->ImmNEONImmb() == 0 && + CountSetBits(instr->ImmNEONImmh(), 32) == 1) { // uxtl variant. + form = form_xtl; + mnemonic = instr->Mask(NEON_Q) ? "uxtl2" : "uxtl"; + } else { // ushll variant. + form = form_shift_2; + mnemonic = instr->Mask(NEON_Q) ? "ushll2" : "ushll"; + } + break; + default: form = "(NEONShiftImmediate)"; + } + } else { + form = "(NEONShiftImmediate)"; + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONTable(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONTable)"; + const char form_1v[] = "'Vd.%%s, {'Vn.16b}, 'Vm.%%s"; + const char form_2v[] = "'Vd.%%s, {'Vn.16b, v%d.16b}, 'Vm.%%s"; + const char form_3v[] = "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b}, 'Vm.%%s"; + const char form_4v[] = + "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b, v%d.16b}, 'Vm.%%s"; + static const NEONFormatMap map_b = { {30}, {NF_8B, NF_16B} }; + NEONFormatDecoder nfd(instr, &map_b); + + switch (instr->Mask(NEONTableMask)) { + case NEON_TBL_1v: mnemonic = "tbl"; form = form_1v; break; + case NEON_TBL_2v: mnemonic = "tbl"; form = form_2v; break; + case NEON_TBL_3v: mnemonic = "tbl"; form = form_3v; break; + case NEON_TBL_4v: mnemonic = "tbl"; form = form_4v; break; + case NEON_TBX_1v: mnemonic = "tbx"; form = form_1v; break; + case NEON_TBX_2v: mnemonic = "tbx"; form = form_2v; break; + case NEON_TBX_3v: mnemonic = "tbx"; form = form_3v; break; + case NEON_TBX_4v: mnemonic = "tbx"; form = form_4v; break; + default: break; + } + + char re_form[sizeof(form_4v) + 6]; + int reg_num = instr->Rn(); + SprintfLiteral(re_form, form, + (reg_num + 1) % kNumberOfVRegisters, + (reg_num + 2) % kNumberOfVRegisters, + (reg_num + 3) % kNumberOfVRegisters); + + Format(instr, mnemonic, nfd.Substitute(re_form)); +} + + +void Disassembler::VisitNEONPerm(const Instruction* instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + NEONFormatDecoder nfd(instr); + + switch (instr->Mask(NEONPermMask)) { + case NEON_TRN1: mnemonic = "trn1"; break; + case NEON_TRN2: mnemonic = "trn2"; break; + case NEON_UZP1: mnemonic = "uzp1"; break; + case NEON_UZP2: mnemonic = "uzp2"; break; + case NEON_ZIP1: mnemonic = "zip1"; break; + case NEON_ZIP2: mnemonic = "zip2"; break; + default: form = "(NEONPerm)"; + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitUnimplemented(const Instruction* instr) { + Format(instr, "unimplemented", "(Unimplemented)"); +} + + +void Disassembler::VisitUnallocated(const Instruction* instr) { + Format(instr, "unallocated", "(Unallocated)"); +} + + +void Disassembler::ProcessOutput(const Instruction* /*instr*/) { + // The base disasm does nothing more than disassembling into a buffer. +} + + +void Disassembler::AppendRegisterNameToOutput(const Instruction* instr, + const CPURegister& reg) { + USE(instr); + VIXL_ASSERT(reg.IsValid()); + char reg_char; + + if (reg.IsRegister()) { + reg_char = reg.Is64Bits() ? 'x' : 'w'; + } else { + VIXL_ASSERT(reg.IsVRegister()); + switch (reg.SizeInBits()) { + case kBRegSize: reg_char = 'b'; break; + case kHRegSize: reg_char = 'h'; break; + case kSRegSize: reg_char = 's'; break; + case kDRegSize: reg_char = 'd'; break; + default: + VIXL_ASSERT(reg.Is128Bits()); + reg_char = 'q'; + } + } + + if (reg.IsVRegister() || !(reg.Aliases(sp) || reg.Aliases(xzr))) { + // A core or scalar/vector register: [wx]0 - 30, [bhsdq]0 - 31. + AppendToOutput("%c%d", reg_char, reg.code()); + } else if (reg.Aliases(sp)) { + // Disassemble w31/x31 as stack pointer wsp/sp. + AppendToOutput("%s", reg.Is64Bits() ? "sp" : "wsp"); + } else { + // Disassemble w31/x31 as zero register wzr/xzr. + AppendToOutput("%czr", reg_char); + } +} + + +void Disassembler::AppendPCRelativeOffsetToOutput(const Instruction* instr, + int64_t offset) { + USE(instr); + char sign = (offset < 0) ? '-' : '+'; + AppendToOutput("#%c0x%" PRIx64, sign, std::abs(offset)); +} + + +void Disassembler::AppendAddressToOutput(const Instruction* instr, + const void* addr) { + USE(instr); + AppendToOutput("(addr 0x%" PRIxPTR ")", reinterpret_cast<uintptr_t>(addr)); +} + + +void Disassembler::AppendCodeAddressToOutput(const Instruction* instr, + const void* addr) { + AppendAddressToOutput(instr, addr); +} + + +void Disassembler::AppendDataAddressToOutput(const Instruction* instr, + const void* addr) { + AppendAddressToOutput(instr, addr); +} + + +void Disassembler::AppendCodeRelativeAddressToOutput(const Instruction* instr, + const void* addr) { + USE(instr); + int64_t rel_addr = CodeRelativeAddress(addr); + if (rel_addr >= 0) { + AppendToOutput("(addr 0x%" PRIx64 ")", rel_addr); + } else { + AppendToOutput("(addr -0x%" PRIx64 ")", -rel_addr); + } +} + + +void Disassembler::AppendCodeRelativeCodeAddressToOutput( + const Instruction* instr, const void* addr) { + AppendCodeRelativeAddressToOutput(instr, addr); +} + + +void Disassembler::AppendCodeRelativeDataAddressToOutput( + const Instruction* instr, const void* addr) { + AppendCodeRelativeAddressToOutput(instr, addr); +} + + +void Disassembler::MapCodeAddress(int64_t base_address, + const Instruction* instr_address) { + set_code_address_offset( + base_address - reinterpret_cast<intptr_t>(instr_address)); +} +int64_t Disassembler::CodeRelativeAddress(const void* addr) { + return reinterpret_cast<intptr_t>(addr) + code_address_offset(); +} + + +void Disassembler::Format(const Instruction* instr, const char* mnemonic, + const char* format) { + VIXL_ASSERT(mnemonic != NULL); + ResetOutput(); + uint32_t pos = buffer_pos_; + Substitute(instr, mnemonic); + if (format != NULL) { + uint32_t spaces = buffer_pos_ - pos < 8 ? 8 - (buffer_pos_ - pos) : 1; + while (spaces--) { + VIXL_ASSERT(buffer_pos_ < buffer_size_); + buffer_[buffer_pos_++] = ' '; + } + Substitute(instr, format); + } + VIXL_ASSERT(buffer_pos_ < buffer_size_); + buffer_[buffer_pos_] = 0; + ProcessOutput(instr); +} + + +void Disassembler::Substitute(const Instruction* instr, const char* string) { + char chr = *string++; + while (chr != '\0') { + if (chr == '\'') { + string += SubstituteField(instr, string); + } else { + VIXL_ASSERT(buffer_pos_ < buffer_size_); + buffer_[buffer_pos_++] = chr; + } + chr = *string++; + } +} + + +int Disassembler::SubstituteField(const Instruction* instr, + const char* format) { + switch (format[0]) { + // NB. The remaining substitution prefix characters are: GJKUZ. + case 'R': // Register. X or W, selected by sf bit. + case 'F': // FP register. S or D, selected by type field. + case 'V': // Vector register, V, vector format. + case 'W': + case 'X': + case 'B': + case 'H': + case 'S': + case 'D': + case 'Q': return SubstituteRegisterField(instr, format); + case 'I': return SubstituteImmediateField(instr, format); + case 'L': return SubstituteLiteralField(instr, format); + case 'N': return SubstituteShiftField(instr, format); + case 'P': return SubstitutePrefetchField(instr, format); + case 'C': return SubstituteConditionField(instr, format); + case 'E': return SubstituteExtendField(instr, format); + case 'A': return SubstitutePCRelAddressField(instr, format); + case 'T': return SubstituteBranchTargetField(instr, format); + case 'O': return SubstituteLSRegOffsetField(instr, format); + case 'M': return SubstituteBarrierField(instr, format); + case 'K': return SubstituteCrField(instr, format); + case 'G': return SubstituteSysOpField(instr, format); + default: { + VIXL_UNREACHABLE(); + return 1; + } + } +} + + +int Disassembler::SubstituteRegisterField(const Instruction* instr, + const char* format) { + char reg_prefix = format[0]; + unsigned reg_num = 0; + unsigned field_len = 2; + + switch (format[1]) { + case 'd': + reg_num = instr->Rd(); + if (format[2] == 'q') { + reg_prefix = instr->NEONQ() ? 'X' : 'W'; + field_len = 3; + } + break; + case 'n': reg_num = instr->Rn(); break; + case 'm': + reg_num = instr->Rm(); + switch (format[2]) { + // Handle registers tagged with b (bytes), z (instruction), or + // r (registers), used for address updates in + // NEON load/store instructions. + case 'r': + case 'b': + case 'z': { + field_len = 3; + char* eimm; + int imm = static_cast<int>(strtol(&format[3], &eimm, 10)); + field_len += eimm - &format[3]; + if (reg_num == 31) { + switch (format[2]) { + case 'z': + imm *= (1 << instr->NEONLSSize()); + break; + case 'r': + imm *= (instr->NEONQ() == 0) ? kDRegSizeInBytes + : kQRegSizeInBytes; + break; + case 'b': + break; + } + AppendToOutput("#%d", imm); + return field_len; + } + break; + } + } + break; + case 'e': + // This is register Rm, but using a 4-bit specifier. Used in NEON + // by-element instructions. + reg_num = (instr->Rm() & 0xf); + break; + case 'a': reg_num = instr->Ra(); break; + case 's': reg_num = instr->Rs(); break; + case 't': + reg_num = instr->Rt(); + if (format[0] == 'V') { + if ((format[2] >= '2') && (format[2] <= '4')) { + // Handle consecutive vector register specifiers Vt2, Vt3 and Vt4. + reg_num = (reg_num + format[2] - '1') % 32; + field_len = 3; + } + } else { + if (format[2] == '2') { + // Handle register specifier Rt2. + reg_num = instr->Rt2(); + field_len = 3; + } + } + break; + case '(': { + switch (format[2]) { + case 's': + reg_num = instr->Rs(); + break; + case 't': + reg_num = instr->Rt(); + break; + default: + VIXL_UNREACHABLE(); + } + + VIXL_ASSERT(format[3] == '+'); + int i = 4; + int addition = 0; + while (format[i] != ')') { + VIXL_ASSERT((format[i] >= '0') && (format[i] <= '9')); + addition *= 10; + addition += format[i] - '0'; + ++i; + } + reg_num += addition; + field_len = i + 1; + break; + } + default: VIXL_UNREACHABLE(); + } + + // Increase field length for registers tagged as stack. + if (format[1] != '(' && format[2] == 's') { + field_len = 3; + } + + CPURegister::RegisterType reg_type = CPURegister::kRegister; + unsigned reg_size = kXRegSize; + + if (reg_prefix == 'R') { + reg_prefix = instr->SixtyFourBits() ? 'X' : 'W'; + } else if (reg_prefix == 'F') { + reg_prefix = ((instr->FPType() & 1) == 0) ? 'S' : 'D'; + } + + switch (reg_prefix) { + case 'W': + reg_type = CPURegister::kRegister; reg_size = kWRegSize; break; + case 'X': + reg_type = CPURegister::kRegister; reg_size = kXRegSize; break; + case 'B': + reg_type = CPURegister::kVRegister; reg_size = kBRegSize; break; + case 'H': + reg_type = CPURegister::kVRegister; reg_size = kHRegSize; break; + case 'S': + reg_type = CPURegister::kVRegister; reg_size = kSRegSize; break; + case 'D': + reg_type = CPURegister::kVRegister; reg_size = kDRegSize; break; + case 'Q': + reg_type = CPURegister::kVRegister; reg_size = kQRegSize; break; + case 'V': + AppendToOutput("v%d", reg_num); + return field_len; + default: + VIXL_UNREACHABLE(); + } + + if ((reg_type == CPURegister::kRegister) && + (reg_num == kZeroRegCode) && (format[2] == 's')) { + reg_num = kSPRegInternalCode; + } + + AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type)); + + return field_len; +} + + +int Disassembler::SubstituteImmediateField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(format[0] == 'I'); + + switch (format[1]) { + case 'M': { // IMoveImm, IMoveNeg or IMoveLSL. + if (format[5] == 'L') { + AppendToOutput("#0x%" PRIx32, instr->ImmMoveWide()); + if (instr->ShiftMoveWide() > 0) { + AppendToOutput(", lsl #%" PRId32, 16 * instr->ShiftMoveWide()); + } + } else { + VIXL_ASSERT((format[5] == 'I') || (format[5] == 'N')); + uint64_t imm = static_cast<uint64_t>(instr->ImmMoveWide()) << + (16 * instr->ShiftMoveWide()); + if (format[5] == 'N') + imm = ~imm; + if (!instr->SixtyFourBits()) + imm &= UINT64_C(0xffffffff); + AppendToOutput("#0x%" PRIx64, imm); + } + return 8; + } + case 'L': { + switch (format[2]) { + case 'L': { // ILLiteral - Immediate Load Literal. + AppendToOutput("pc%+" PRId32, + instr->ImmLLiteral() << kLiteralEntrySizeLog2); + return 9; + } + case 'S': { // ILS - Immediate Load/Store. + if (instr->ImmLS() != 0) { + AppendToOutput(", #%" PRId32, instr->ImmLS()); + } + return 3; + } + case 'P': { // ILPx - Immediate Load/Store Pair, x = access size. + if (instr->ImmLSPair() != 0) { + // format[3] is the scale value. Convert to a number. + int scale = 1 << (format[3] - '0'); + AppendToOutput(", #%" PRId32, instr->ImmLSPair() * scale); + } + return 4; + } + case 'U': { // ILU - Immediate Load/Store Unsigned. + if (instr->ImmLSUnsigned() != 0) { + int shift = instr->SizeLS(); + AppendToOutput(", #%" PRId32, instr->ImmLSUnsigned() << shift); + } + return 3; + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + } + case 'C': { // ICondB - Immediate Conditional Branch. + int64_t offset = instr->ImmCondBranch() << 2; + AppendPCRelativeOffsetToOutput(instr, offset); + return 6; + } + case 'A': { // IAddSub. + VIXL_ASSERT(instr->ShiftAddSub() <= 1); + int64_t imm = instr->ImmAddSub() << (12 * instr->ShiftAddSub()); + AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm); + return 7; + } + case 'F': { // IFPSingle, IFPDouble or IFPFBits. + if (format[3] == 'F') { // IFPFbits. + AppendToOutput("#%" PRId32, 64 - instr->FPScale()); + return 8; + } else { + AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmFP(), + format[3] == 'S' ? instr->ImmFP32() : instr->ImmFP64()); + return 9; + } + } + case 'T': { // ITri - Immediate Triangular Encoded. + AppendToOutput("#0x%" PRIx64, instr->ImmLogical()); + return 4; + } + case 'N': { // INzcv. + int nzcv = (instr->Nzcv() << Flags_offset); + AppendToOutput("#%c%c%c%c", ((nzcv & NFlag) == 0) ? 'n' : 'N', + ((nzcv & ZFlag) == 0) ? 'z' : 'Z', + ((nzcv & CFlag) == 0) ? 'c' : 'C', + ((nzcv & VFlag) == 0) ? 'v' : 'V'); + return 5; + } + case 'P': { // IP - Conditional compare. + AppendToOutput("#%" PRId32, instr->ImmCondCmp()); + return 2; + } + case 'B': { // Bitfields. + return SubstituteBitfieldImmediateField(instr, format); + } + case 'E': { // IExtract. + AppendToOutput("#%" PRId32, instr->ImmS()); + return 8; + } + case 'S': { // IS - Test and branch bit. + AppendToOutput("#%" PRId32, (instr->ImmTestBranchBit5() << 5) | + instr->ImmTestBranchBit40()); + return 2; + } + case 's': { // Is - Shift (immediate). + switch (format[2]) { + case '1': { // Is1 - SSHR. + int shift = 16 << HighestSetBitPosition(instr->ImmNEONImmh()); + shift -= instr->ImmNEONImmhImmb(); + AppendToOutput("#%d", shift); + return 3; + } + case '2': { // Is2 - SLI. + int shift = instr->ImmNEONImmhImmb(); + shift -= 8 << HighestSetBitPosition(instr->ImmNEONImmh()); + AppendToOutput("#%d", shift); + return 3; + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + } + case 'D': { // IDebug - HLT and BRK instructions. + AppendToOutput("#0x%" PRIx32, instr->ImmException()); + return 6; + } + case 'V': { // Immediate Vector. + switch (format[2]) { + case 'E': { // IVExtract. + AppendToOutput("#%" PRId32, instr->ImmNEONExt()); + return 9; + } + case 'B': { // IVByElemIndex. + int vm_index = (instr->NEONH() << 1) | instr->NEONL(); + if (instr->NEONSize() == 1) { + vm_index = (vm_index << 1) | instr->NEONM(); + } + AppendToOutput("%d", vm_index); + return strlen("IVByElemIndex"); + } + case 'I': { // INS element. + if (strncmp(format, "IVInsIndex", strlen("IVInsIndex")) == 0) { + int rd_index, rn_index; + int imm5 = instr->ImmNEON5(); + int imm4 = instr->ImmNEON4(); + int tz = CountTrailingZeros(imm5, 32); + rd_index = imm5 >> (tz + 1); + rn_index = imm4 >> tz; + if (strncmp(format, "IVInsIndex1", strlen("IVInsIndex1")) == 0) { + AppendToOutput("%d", rd_index); + return strlen("IVInsIndex1"); + } else if (strncmp(format, "IVInsIndex2", + strlen("IVInsIndex2")) == 0) { + AppendToOutput("%d", rn_index); + return strlen("IVInsIndex2"); + } else { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + VIXL_FALLTHROUGH(); + } + case 'L': { // IVLSLane[0123] - suffix indicates access size shift. + AppendToOutput("%d", instr->NEONLSIndex(format[8] - '0')); + return 9; + } + case 'M': { // Modified Immediate cases. + if (strncmp(format, + "IVMIImmFPSingle", + strlen("IVMIImmFPSingle")) == 0) { + AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmNEONabcdefgh(), + instr->ImmNEONFP32()); + return strlen("IVMIImmFPSingle"); + } else if (strncmp(format, + "IVMIImmFPDouble", + strlen("IVMIImmFPDouble")) == 0) { + AppendToOutput("#0x%" PRIx32 " (%.4f)", instr->ImmNEONabcdefgh(), + instr->ImmNEONFP64()); + return strlen("IVMIImmFPDouble"); + } else if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) { + uint64_t imm8 = instr->ImmNEONabcdefgh(); + AppendToOutput("#0x%" PRIx64, imm8); + return strlen("IVMIImm8"); + } else if (strncmp(format, "IVMIImm", strlen("IVMIImm")) == 0) { + uint64_t imm8 = instr->ImmNEONabcdefgh(); + uint64_t imm = 0; + for (int i = 0; i < 8; ++i) { + if (imm8 & (1ULL << i)) { + imm |= (UINT64_C(0xff) << (8 * i)); + } + } + AppendToOutput("#0x%" PRIx64, imm); + return strlen("IVMIImm"); + } else if (strncmp(format, "IVMIShiftAmt1", + strlen("IVMIShiftAmt1")) == 0) { + int cmode = instr->NEONCmode(); + int shift_amount = 8 * ((cmode >> 1) & 3); + AppendToOutput("#%d", shift_amount); + return strlen("IVMIShiftAmt1"); + } else if (strncmp(format, "IVMIShiftAmt2", + strlen("IVMIShiftAmt2")) == 0) { + int cmode = instr->NEONCmode(); + int shift_amount = 8 << (cmode & 1); + AppendToOutput("#%d", shift_amount); + return strlen("IVMIShiftAmt2"); + } else { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + } + case 'X': { // IX - CLREX instruction. + AppendToOutput("#0x%" PRIx32, instr->CRm()); + return 2; + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } +} + + +int Disassembler::SubstituteBitfieldImmediateField(const Instruction* instr, + const char* format) { + VIXL_ASSERT((format[0] == 'I') && (format[1] == 'B')); + unsigned r = instr->ImmR(); + unsigned s = instr->ImmS(); + + switch (format[2]) { + case 'r': { // IBr. + AppendToOutput("#%d", r); + return 3; + } + case 's': { // IBs+1 or IBs-r+1. + if (format[3] == '+') { + AppendToOutput("#%d", s + 1); + return 5; + } else { + VIXL_ASSERT(format[3] == '-'); + AppendToOutput("#%d", s - r + 1); + return 7; + } + } + case 'Z': { // IBZ-r. + VIXL_ASSERT((format[3] == '-') && (format[4] == 'r')); + unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize : kWRegSize; + AppendToOutput("#%d", reg_size - r); + return 5; + } + default: { + VIXL_UNREACHABLE(); + return 0; + } + } +} + + +int Disassembler::SubstituteLiteralField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(strncmp(format, "LValue", 6) == 0); + USE(format); + + const void * address = instr->LiteralAddress<const void *>(); + switch (instr->Mask(LoadLiteralMask)) { + case LDR_w_lit: + case LDR_x_lit: + case LDRSW_x_lit: + case LDR_s_lit: + case LDR_d_lit: + case LDR_q_lit: + AppendCodeRelativeDataAddressToOutput(instr, address); + break; + case PRFM_lit: { + // Use the prefetch hint to decide how to print the address. + switch (instr->PrefetchHint()) { + case 0x0: // PLD: prefetch for load. + case 0x2: // PST: prepare for store. + AppendCodeRelativeDataAddressToOutput(instr, address); + break; + case 0x1: // PLI: preload instructions. + AppendCodeRelativeCodeAddressToOutput(instr, address); + break; + case 0x3: // Unallocated hint. + AppendCodeRelativeAddressToOutput(instr, address); + break; + } + break; + } + default: + VIXL_UNREACHABLE(); + } + + return 6; +} + + +int Disassembler::SubstituteShiftField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(format[0] == 'N'); + VIXL_ASSERT(instr->ShiftDP() <= 0x3); + + switch (format[1]) { + case 'D': { // HDP. + VIXL_ASSERT(instr->ShiftDP() != ROR); + VIXL_FALLTHROUGH(); + } + case 'L': { // HLo. + if (instr->ImmDPShift() != 0) { + const char* shift_type[] = {"lsl", "lsr", "asr", "ror"}; + AppendToOutput(", %s #%" PRId32, shift_type[instr->ShiftDP()], + instr->ImmDPShift()); + } + return 3; + } + default: + VIXL_UNIMPLEMENTED(); + return 0; + } +} + + +int Disassembler::SubstituteConditionField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(format[0] == 'C'); + const char* condition_code[] = { "eq", "ne", "hs", "lo", + "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", + "gt", "le", "al", "nv" }; + int cond; + switch (format[1]) { + case 'B': cond = instr->ConditionBranch(); break; + case 'I': { + cond = InvertCondition(static_cast<Condition>(instr->Condition())); + break; + } + default: cond = instr->Condition(); + } + AppendToOutput("%s", condition_code[cond]); + return 4; +} + + +int Disassembler::SubstitutePCRelAddressField(const Instruction* instr, + const char* format) { + VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) || // Used by `adr`. + (strcmp(format, "AddrPCRelPage") == 0)); // Used by `adrp`. + + int64_t offset = instr->ImmPCRel(); + + // Compute the target address based on the effective address (after applying + // code_address_offset). This is required for correct behaviour of adrp. + const Instruction* base = instr + code_address_offset(); + if (format[9] == 'P') { + offset *= kPageSize; + base = AlignDown(base, kPageSize); + } + // Strip code_address_offset before printing, so we can use the + // semantically-correct AppendCodeRelativeAddressToOutput. + const void* target = + reinterpret_cast<const void*>(base + offset - code_address_offset()); + + AppendPCRelativeOffsetToOutput(instr, offset); + AppendToOutput(" "); + AppendCodeRelativeAddressToOutput(instr, target); + return 13; +} + + +int Disassembler::SubstituteBranchTargetField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(strncmp(format, "TImm", 4) == 0); + + int64_t offset = 0; + switch (format[5]) { + // BImmUncn - unconditional branch immediate. + case 'n': offset = instr->ImmUncondBranch(); break; + // BImmCond - conditional branch immediate. + case 'o': offset = instr->ImmCondBranch(); break; + // BImmCmpa - compare and branch immediate. + case 'm': offset = instr->ImmCmpBranch(); break; + // BImmTest - test and branch immediate. + case 'e': offset = instr->ImmTestBranch(); break; + default: VIXL_UNIMPLEMENTED(); + } + offset <<= kInstructionSizeLog2; + const void* target_address = reinterpret_cast<const void*>(instr + offset); + VIXL_STATIC_ASSERT(sizeof(*instr) == 1); + + AppendPCRelativeOffsetToOutput(instr, offset); + AppendToOutput(" "); + AppendCodeRelativeCodeAddressToOutput(instr, target_address); + + return 8; +} + + +int Disassembler::SubstituteExtendField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(strncmp(format, "Ext", 3) == 0); + VIXL_ASSERT(instr->ExtendMode() <= 7); + USE(format); + + const char* extend_mode[] = { "uxtb", "uxth", "uxtw", "uxtx", + "sxtb", "sxth", "sxtw", "sxtx" }; + + // If rd or rn is SP, uxtw on 32-bit registers and uxtx on 64-bit + // registers becomes lsl. + if (((instr->Rd() == kZeroRegCode) || (instr->Rn() == kZeroRegCode)) && + (((instr->ExtendMode() == UXTW) && (instr->SixtyFourBits() == 0)) || + (instr->ExtendMode() == UXTX))) { + if (instr->ImmExtendShift() > 0) { + AppendToOutput(", lsl #%" PRId32, instr->ImmExtendShift()); + } + } else { + AppendToOutput(", %s", extend_mode[instr->ExtendMode()]); + if (instr->ImmExtendShift() > 0) { + AppendToOutput(" #%" PRId32, instr->ImmExtendShift()); + } + } + return 3; +} + + +int Disassembler::SubstituteLSRegOffsetField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(strncmp(format, "Offsetreg", 9) == 0); + const char* extend_mode[] = { "undefined", "undefined", "uxtw", "lsl", + "undefined", "undefined", "sxtw", "sxtx" }; + USE(format); + + unsigned shift = instr->ImmShiftLS(); + Extend ext = static_cast<Extend>(instr->ExtendMode()); + char reg_type = ((ext == UXTW) || (ext == SXTW)) ? 'w' : 'x'; + + unsigned rm = instr->Rm(); + if (rm == kZeroRegCode) { + AppendToOutput("%czr", reg_type); + } else { + AppendToOutput("%c%d", reg_type, rm); + } + + // Extend mode UXTX is an alias for shift mode LSL here. + if (!((ext == UXTX) && (shift == 0))) { + AppendToOutput(", %s", extend_mode[ext]); + if (shift != 0) { + AppendToOutput(" #%d", instr->SizeLS()); + } + } + return 9; +} + + +int Disassembler::SubstitutePrefetchField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(format[0] == 'P'); + USE(format); + + static const char* hints[] = {"ld", "li", "st"}; + static const char* stream_options[] = {"keep", "strm"}; + + unsigned hint = instr->PrefetchHint(); + unsigned target = instr->PrefetchTarget() + 1; + unsigned stream = instr->PrefetchStream(); + + if ((hint >= (sizeof(hints) / sizeof(hints[0]))) || (target > 3)) { + // Unallocated prefetch operations. + int prefetch_mode = instr->ImmPrefetchOperation(); + AppendToOutput("#0b%c%c%c%c%c", + (prefetch_mode & (1 << 4)) ? '1' : '0', + (prefetch_mode & (1 << 3)) ? '1' : '0', + (prefetch_mode & (1 << 2)) ? '1' : '0', + (prefetch_mode & (1 << 1)) ? '1' : '0', + (prefetch_mode & (1 << 0)) ? '1' : '0'); + } else { + VIXL_ASSERT(stream < (sizeof(stream_options) / sizeof(stream_options[0]))); + AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]); + } + return 6; +} + +int Disassembler::SubstituteBarrierField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(format[0] == 'M'); + USE(format); + + static const char* options[4][4] = { + { "sy (0b0000)", "oshld", "oshst", "osh" }, + { "sy (0b0100)", "nshld", "nshst", "nsh" }, + { "sy (0b1000)", "ishld", "ishst", "ish" }, + { "sy (0b1100)", "ld", "st", "sy" } + }; + int domain = instr->ImmBarrierDomain(); + int type = instr->ImmBarrierType(); + + AppendToOutput("%s", options[domain][type]); + return 1; +} + +int Disassembler::SubstituteSysOpField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(format[0] == 'G'); + int op = -1; + switch (format[1]) { + case '1': op = instr->SysOp1(); break; + case '2': op = instr->SysOp2(); break; + default: + VIXL_UNREACHABLE(); + } + AppendToOutput("#%d", op); + return 2; +} + +int Disassembler::SubstituteCrField(const Instruction* instr, + const char* format) { + VIXL_ASSERT(format[0] == 'K'); + int cr = -1; + switch (format[1]) { + case 'n': cr = instr->CRn(); break; + case 'm': cr = instr->CRm(); break; + default: + VIXL_UNREACHABLE(); + } + AppendToOutput("C%d", cr); + return 2; +} + +void Disassembler::ResetOutput() { + buffer_pos_ = 0; + buffer_[buffer_pos_] = 0; +} + + +void Disassembler::AppendToOutput(const char* format, ...) { + va_list args; + va_start(args, format); + buffer_pos_ += vsnprintf(&buffer_[buffer_pos_], buffer_size_ - buffer_pos_, + format, args); + va_end(args); +} + + +void PrintDisassembler::ProcessOutput(const Instruction* instr) { + fprintf(stream_, "0x%016" PRIx64 " %08" PRIx32 "\t\t%s\n", + reinterpret_cast<uint64_t>(instr), + instr->InstructionBits(), + GetOutput()); +} + +void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr) +{ + vixl::Disassembler disasm(buffer, bufsize-1); + vixl::Decoder decoder; + decoder.AppendVisitor(&disasm); + decoder.Decode(instr); + buffer[bufsize-1] = 0; // Just to be safe +} + +char* GdbDisassembleInstruction(const Instruction* instr) +{ + static char buffer[1024]; + DisassembleInstruction(buffer, sizeof(buffer), instr); + return buffer; +} + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/Disasm-vixl.h b/js/src/jit/arm64/vixl/Disasm-vixl.h new file mode 100644 index 0000000000..e04730da83 --- /dev/null +++ b/js/src/jit/arm64/vixl/Disasm-vixl.h @@ -0,0 +1,181 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_DISASM_A64_H +#define VIXL_A64_DISASM_A64_H + +#include "jit/arm64/vixl/Assembler-vixl.h" +#include "jit/arm64/vixl/Decoder-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Instructions-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" + +namespace vixl { + +class Disassembler: public DecoderVisitor { + public: + Disassembler(); + Disassembler(char* text_buffer, int buffer_size); + virtual ~Disassembler(); + char* GetOutput(); + + // Declare all Visitor functions. + #define DECLARE(A) virtual void Visit##A(const Instruction* instr) override; + VISITOR_LIST(DECLARE) + #undef DECLARE + + protected: + virtual void ProcessOutput(const Instruction* instr); + + // Default output functions. The functions below implement a default way of + // printing elements in the disassembly. A sub-class can override these to + // customize the disassembly output. + + // Prints the name of a register. + // TODO: This currently doesn't allow renaming of V registers. + virtual void AppendRegisterNameToOutput(const Instruction* instr, + const CPURegister& reg); + + // Prints a PC-relative offset. This is used for example when disassembling + // branches to immediate offsets. + virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr, + int64_t offset); + + // Prints an address, in the general case. It can be code or data. This is + // used for example to print the target address of an ADR instruction. + virtual void AppendCodeRelativeAddressToOutput(const Instruction* instr, + const void* addr); + + // Prints the address of some code. + // This is used for example to print the target address of a branch to an + // immediate offset. + // A sub-class can for example override this method to lookup the address and + // print an appropriate name. + virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr, + const void* addr); + + // Prints the address of some data. + // This is used for example to print the source address of a load literal + // instruction. + virtual void AppendCodeRelativeDataAddressToOutput(const Instruction* instr, + const void* addr); + + // Same as the above, but for addresses that are not relative to the code + // buffer. They are currently not used by VIXL. + virtual void AppendAddressToOutput(const Instruction* instr, + const void* addr); + virtual void AppendCodeAddressToOutput(const Instruction* instr, + const void* addr); + virtual void AppendDataAddressToOutput(const Instruction* instr, + const void* addr); + + public: + // Get/Set the offset that should be added to code addresses when printing + // code-relative addresses in the AppendCodeRelative<Type>AddressToOutput() + // helpers. + // Below is an example of how a branch immediate instruction in memory at + // address 0xb010200 would disassemble with different offsets. + // Base address | Disassembly + // 0x0 | 0xb010200: b #+0xcc (addr 0xb0102cc) + // 0x10000 | 0xb000200: b #+0xcc (addr 0xb0002cc) + // 0xb010200 | 0x0: b #+0xcc (addr 0xcc) + void MapCodeAddress(int64_t base_address, const Instruction* instr_address); + int64_t CodeRelativeAddress(const void* instr); + + private: + void Format( + const Instruction* instr, const char* mnemonic, const char* format); + void Substitute(const Instruction* instr, const char* string); + int SubstituteField(const Instruction* instr, const char* format); + int SubstituteRegisterField(const Instruction* instr, const char* format); + int SubstituteImmediateField(const Instruction* instr, const char* format); + int SubstituteLiteralField(const Instruction* instr, const char* format); + int SubstituteBitfieldImmediateField( + const Instruction* instr, const char* format); + int SubstituteShiftField(const Instruction* instr, const char* format); + int SubstituteExtendField(const Instruction* instr, const char* format); + int SubstituteConditionField(const Instruction* instr, const char* format); + int SubstitutePCRelAddressField(const Instruction* instr, const char* format); + int SubstituteBranchTargetField(const Instruction* instr, const char* format); + int SubstituteLSRegOffsetField(const Instruction* instr, const char* format); + int SubstitutePrefetchField(const Instruction* instr, const char* format); + int SubstituteBarrierField(const Instruction* instr, const char* format); + int SubstituteSysOpField(const Instruction* instr, const char* format); + int SubstituteCrField(const Instruction* instr, const char* format); + bool RdIsZROrSP(const Instruction* instr) const { + return (instr->Rd() == kZeroRegCode); + } + + bool RnIsZROrSP(const Instruction* instr) const { + return (instr->Rn() == kZeroRegCode); + } + + bool RmIsZROrSP(const Instruction* instr) const { + return (instr->Rm() == kZeroRegCode); + } + + bool RaIsZROrSP(const Instruction* instr) const { + return (instr->Ra() == kZeroRegCode); + } + + bool IsMovzMovnImm(unsigned reg_size, uint64_t value); + + int64_t code_address_offset() const { return code_address_offset_; } + + protected: + void ResetOutput(); + void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3); + + void set_code_address_offset(int64_t code_address_offset) { + code_address_offset_ = code_address_offset; + } + + char* buffer_; + uint32_t buffer_pos_; + uint32_t buffer_size_; + bool own_buffer_; + + int64_t code_address_offset_; +}; + + +class PrintDisassembler: public Disassembler { + public: + explicit PrintDisassembler(FILE* stream) : stream_(stream) { } + + protected: + virtual void ProcessOutput(const Instruction* instr) override; + + private: + FILE *stream_; +}; + +void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr); +char* GdbDisassembleInstruction(const Instruction* instr); + +} // namespace vixl + +#endif // VIXL_A64_DISASM_A64_H diff --git a/js/src/jit/arm64/vixl/Globals-vixl.h b/js/src/jit/arm64/vixl/Globals-vixl.h new file mode 100644 index 0000000000..2c7d5703f1 --- /dev/null +++ b/js/src/jit/arm64/vixl/Globals-vixl.h @@ -0,0 +1,272 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_GLOBALS_H +#define VIXL_GLOBALS_H + +// Get standard C99 macros for integer types. +#ifndef __STDC_CONSTANT_MACROS +#define __STDC_CONSTANT_MACROS +#endif + +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include "mozilla/Assertions.h" + +#include <cstdarg> +#include <cstddef> +#include <cstdio> +#include <cstdlib> + +extern "C" { +#include <inttypes.h> +#include <stdint.h> +} + +#include "jstypes.h" + +#include "jit/arm64/vixl/Platform-vixl.h" +#include "js/Utility.h" + +#ifdef VIXL_NEGATIVE_TESTING +#include <sstream> +#include <stdexcept> +#include <string> +#endif + +namespace vixl { + +typedef uint8_t byte; + +const int KBytes = 1024; +const int MBytes = 1024 * KBytes; + +const int kBitsPerByte = 8; + +template <int SizeInBits> +struct Unsigned; + +template <> +struct Unsigned<32> { + typedef uint32_t type; +}; + +template <> +struct Unsigned<64> { + typedef uint64_t type; +}; + +} // namespace vixl + +// Detect the host's pointer size. +#if (UINTPTR_MAX == UINT32_MAX) +#define VIXL_HOST_POINTER_32 +#elif (UINTPTR_MAX == UINT64_MAX) +#define VIXL_HOST_POINTER_64 +#else +#error "Unsupported host pointer size." +#endif + +#ifdef VIXL_NEGATIVE_TESTING +#define VIXL_ABORT() \ + do { \ + std::ostringstream oss; \ + oss << "Aborting in " << __FILE__ << ", line " << __LINE__ << std::endl; \ + throw std::runtime_error(oss.str()); \ + } while (false) +#define VIXL_ABORT_WITH_MSG(msg) \ + do { \ + std::ostringstream oss; \ + oss << (msg) << "in " << __FILE__ << ", line " << __LINE__ << std::endl; \ + throw std::runtime_error(oss.str()); \ + } while (false) +#define VIXL_CHECK(condition) \ + do { \ + if (!(condition)) { \ + std::ostringstream oss; \ + oss << "Assertion failed (" #condition ")\nin "; \ + oss << __FILE__ << ", line " << __LINE__ << std::endl; \ + throw std::runtime_error(oss.str()); \ + } \ + } while (false) +#else +#define VIXL_ABORT() \ + do { \ + MOZ_CRASH(); \ + } while (false) +#define VIXL_ABORT_WITH_MSG(msg) \ + do { \ + MOZ_CRASH(msg); \ + } while (false) +#define VIXL_CHECK(condition) \ + do { \ + if (!(condition)) { \ + MOZ_CRASH(); \ + } \ + } while (false) +#endif +#ifdef DEBUG +#define VIXL_ASSERT(condition) MOZ_ASSERT(condition) +#define VIXL_UNIMPLEMENTED() \ + do { \ + VIXL_ABORT_WITH_MSG("UNIMPLEMENTED "); \ + } while (false) +#define VIXL_UNREACHABLE() \ + do { \ + VIXL_ABORT_WITH_MSG("UNREACHABLE "); \ + } while (false) +#else +#define VIXL_ASSERT(condition) ((void)0) +#define VIXL_UNIMPLEMENTED() ((void)0) +#define VIXL_UNREACHABLE() MOZ_CRASH("vixl unreachable") +#endif +// This is not as powerful as template based assertions, but it is simple. +// It assumes that the descriptions are unique. If this starts being a problem, +// we can switch to a different implemention. +#define VIXL_CONCAT(a, b) a##b +#if __cplusplus >= 201103L +#define VIXL_STATIC_ASSERT_LINE(line_unused, condition, message) \ + static_assert(condition, message) +#else +#define VIXL_STATIC_ASSERT_LINE(line, condition, message_unused) \ + typedef char VIXL_CONCAT(STATIC_ASSERT_LINE_, line)[(condition) ? 1 : -1] \ + __attribute__((unused)) +#endif +#define VIXL_STATIC_ASSERT(condition) \ + VIXL_STATIC_ASSERT_LINE(__LINE__, condition, "") +#define VIXL_STATIC_ASSERT_MESSAGE(condition, message) \ + VIXL_STATIC_ASSERT_LINE(__LINE__, condition, message) + +#define VIXL_WARNING(message) \ + do { \ + printf("WARNING in %s, line %i: %s", __FILE__, __LINE__, message); \ + } while (false) + +template <typename T1> +inline void USE(const T1&) {} + +template <typename T1, typename T2> +inline void USE(const T1&, const T2&) {} + +template <typename T1, typename T2, typename T3> +inline void USE(const T1&, const T2&, const T3&) {} + +template <typename T1, typename T2, typename T3, typename T4> +inline void USE(const T1&, const T2&, const T3&, const T4&) {} + +#define VIXL_ALIGNMENT_EXCEPTION() \ + do { \ + VIXL_ABORT_WITH_MSG("ALIGNMENT EXCEPTION\t"); \ + } while (0) + +// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough +// argument to annotate intentional fall-through between switch labels. +// For more information please refer to: +// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough +#ifndef __has_warning +#define __has_warning(x) 0 +#endif + +// Fallthrough annotation for Clang and C++11(201103L). +#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L +#define VIXL_FALLTHROUGH() [[clang::fallthrough]] +// Fallthrough annotation for GCC >= 7. +#elif __GNUC__ >= 7 +#define VIXL_FALLTHROUGH() __attribute__((fallthrough)) +#else +#define VIXL_FALLTHROUGH() \ + do { \ + } while (0) +#endif + +#if __cplusplus >= 201103L +#define VIXL_NO_RETURN [[noreturn]] +#else +#define VIXL_NO_RETURN __attribute__((noreturn)) +#endif +#ifdef VIXL_DEBUG +#define VIXL_NO_RETURN_IN_DEBUG_MODE VIXL_NO_RETURN +#else +#define VIXL_NO_RETURN_IN_DEBUG_MODE +#endif + +#if __cplusplus >= 201103L +#define VIXL_OVERRIDE override +#else +#define VIXL_OVERRIDE +#endif + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 +#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE +#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 1 +#endif +#else +#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE +#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 0 +#endif +#if VIXL_AARCH64_GENERATE_SIMULATOR_CODE +#warning "Generating Simulator instructions without Simulator support." +#endif +#endif + +// We do not have a simulator for AArch32, although we can pretend we do so that +// tests that require running natively can be skipped. +#ifndef __arm__ +#define VIXL_INCLUDE_SIMULATOR_AARCH32 +#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE +#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 1 +#endif +#else +#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE +#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 0 +#endif +#endif + +// Target Architecture/ISA + +// Hack: always include AArch64. +#define VIXL_INCLUDE_TARGET_A64 + +#ifdef VIXL_INCLUDE_TARGET_A64 +#define VIXL_INCLUDE_TARGET_AARCH64 +#endif + +#if defined(VIXL_INCLUDE_TARGET_A32) && defined(VIXL_INCLUDE_TARGET_T32) +#define VIXL_INCLUDE_TARGET_AARCH32 +#elif defined(VIXL_INCLUDE_TARGET_A32) +#define VIXL_INCLUDE_TARGET_A32_ONLY +#else +#define VIXL_INCLUDE_TARGET_T32_ONLY +#endif + + +#endif // VIXL_GLOBALS_H diff --git a/js/src/jit/arm64/vixl/Instructions-vixl.cpp b/js/src/jit/arm64/vixl/Instructions-vixl.cpp new file mode 100644 index 0000000000..dcc0fab05e --- /dev/null +++ b/js/src/jit/arm64/vixl/Instructions-vixl.cpp @@ -0,0 +1,627 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Instructions-vixl.h" + +#include "jit/arm64/vixl/Assembler-vixl.h" + +namespace vixl { + +static uint64_t RepeatBitsAcrossReg(unsigned reg_size, + uint64_t value, + unsigned width) { + VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) || + (width == 32)); + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + uint64_t result = value & ((UINT64_C(1) << width) - 1); + for (unsigned i = width; i < reg_size; i *= 2) { + result |= (result << i); + } + return result; +} + + +bool Instruction::IsLoad() const { + if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) { + return false; + } + + if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) { + return Mask(LoadStorePairLBit) != 0; + } else { + LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreMask)); + switch (op) { + case LDRB_w: + case LDRH_w: + case LDR_w: + case LDR_x: + case LDRSB_w: + case LDRSB_x: + case LDRSH_w: + case LDRSH_x: + case LDRSW_x: + case LDR_b: + case LDR_h: + case LDR_s: + case LDR_d: + case LDR_q: return true; + default: return false; + } + } +} + + +bool Instruction::IsStore() const { + if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) { + return false; + } + + if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) { + return Mask(LoadStorePairLBit) == 0; + } else { + LoadStoreOp op = static_cast<LoadStoreOp>(Mask(LoadStoreMask)); + switch (op) { + case STRB_w: + case STRH_w: + case STR_w: + case STR_x: + case STR_b: + case STR_h: + case STR_s: + case STR_d: + case STR_q: return true; + default: return false; + } + } +} + + +// Logical immediates can't encode zero, so a return value of zero is used to +// indicate a failure case. Specifically, where the constraints on imm_s are +// not met. +uint64_t Instruction::ImmLogical() const { + unsigned reg_size = SixtyFourBits() ? kXRegSize : kWRegSize; + int32_t n = BitN(); + int32_t imm_s = ImmSetBits(); + int32_t imm_r = ImmRotate(); + + // An integer is constructed from the n, imm_s and imm_r bits according to + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 + // bits are set. The pattern is rotated right by R, and repeated across a + // 32 or 64-bit value, depending on destination register width. + // + + if (n == 1) { + if (imm_s == 0x3f) { + return 0; + } + uint64_t bits = (UINT64_C(1) << (imm_s + 1)) - 1; + return RotateRight(bits, imm_r, 64); + } else { + if ((imm_s >> 1) == 0x1f) { + return 0; + } + for (int width = 0x20; width >= 0x2; width >>= 1) { + if ((imm_s & width) == 0) { + int mask = width - 1; + if ((imm_s & mask) == mask) { + return 0; + } + uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1; + return RepeatBitsAcrossReg(reg_size, + RotateRight(bits, imm_r & mask, width), + width); + } + } + } + VIXL_UNREACHABLE(); + return 0; +} + + +uint32_t Instruction::ImmNEONabcdefgh() const { + return ImmNEONabc() << 5 | ImmNEONdefgh(); +} + + +float Instruction::Imm8ToFP32(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint32_t bit7 = (bits >> 7) & 0x1; + uint32_t bit6 = (bits >> 6) & 0x1; + uint32_t bit5_to_0 = bits & 0x3f; + uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19); + + return RawbitsToFloat(result); +} + + +float Instruction::ImmFP32() const { + return Imm8ToFP32(ImmFP()); +} + + +double Instruction::Imm8ToFP64(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint64_t bit7 = (bits >> 7) & 0x1; + uint64_t bit6 = (bits >> 6) & 0x1; + uint64_t bit5_to_0 = bits & 0x3f; + uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48); + + return RawbitsToDouble(result); +} + + +double Instruction::ImmFP64() const { + return Imm8ToFP64(ImmFP()); +} + + +float Instruction::ImmNEONFP32() const { + return Imm8ToFP32(ImmNEONabcdefgh()); +} + + +double Instruction::ImmNEONFP64() const { + return Imm8ToFP64(ImmNEONabcdefgh()); +} + +unsigned CalcLSPairDataSize(LoadStorePairOp op) { + VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes); + VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes); + switch (op) { + case STP_q: + case LDP_q: return kQRegSizeInBytesLog2; + case STP_x: + case LDP_x: + case STP_d: + case LDP_d: return kXRegSizeInBytesLog2; + default: return kWRegSizeInBytesLog2; + } +} + + +int Instruction::ImmBranchRangeBitwidth(ImmBranchType branch_type) { + switch (branch_type) { + case UncondBranchType: + return ImmUncondBranch_width; + case CondBranchType: + return ImmCondBranch_width; + case CompareBranchType: + return ImmCmpBranch_width; + case TestBranchType: + return ImmTestBranch_width; + default: + VIXL_UNREACHABLE(); + return 0; + } +} + + +int32_t Instruction::ImmBranchForwardRange(ImmBranchType branch_type) { + int32_t encoded_max = 1 << (ImmBranchRangeBitwidth(branch_type) - 1); + return encoded_max * kInstructionSize; +} + + +bool Instruction::IsValidImmPCOffset(ImmBranchType branch_type, + int64_t offset) { + return IsIntN(ImmBranchRangeBitwidth(branch_type), offset); +} + +ImmBranchRangeType Instruction::ImmBranchTypeToRange(ImmBranchType branch_type) +{ + switch (branch_type) { + case UncondBranchType: + return UncondBranchRangeType; + case CondBranchType: + case CompareBranchType: + return CondBranchRangeType; + case TestBranchType: + return TestBranchRangeType; + default: + return UnknownBranchRangeType; + } +} + +int32_t Instruction::ImmBranchMaxForwardOffset(ImmBranchRangeType range_type) +{ + // Branches encode a pc-relative two's complement number of 32-bit + // instructions. Compute the number of bytes corresponding to the largest + // positive number of instructions that can be encoded. + switch(range_type) { + case TestBranchRangeType: + return ((1 << ImmTestBranch_width) - 1) / 2 * kInstructionSize; + case CondBranchRangeType: + return ((1 << ImmCondBranch_width) - 1) / 2 * kInstructionSize; + case UncondBranchRangeType: + return ((1 << ImmUncondBranch_width) - 1) / 2 * kInstructionSize; + default: + VIXL_UNREACHABLE(); + return 0; + } +} + +int32_t Instruction::ImmBranchMinBackwardOffset(ImmBranchRangeType range_type) +{ + switch(range_type) { + case TestBranchRangeType: + return -int32_t(1 << ImmTestBranch_width) / int32_t(2 * kInstructionSize); + case CondBranchRangeType: + return -int32_t(1 << ImmCondBranch_width) / int32_t(2 * kInstructionSize); + case UncondBranchRangeType: + return -int32_t(1 << ImmUncondBranch_width) / int32_t(2 * kInstructionSize); + default: + VIXL_UNREACHABLE(); + return 0; + } +} + +const Instruction* Instruction::ImmPCOffsetTarget() const { + const Instruction * base = this; + ptrdiff_t offset; + if (IsPCRelAddressing()) { + // ADR and ADRP. + offset = ImmPCRel(); + if (Mask(PCRelAddressingMask) == ADRP) { + base = AlignDown(base, kPageSize); + offset *= kPageSize; + } else { + VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR); + } + } else { + // All PC-relative branches. + VIXL_ASSERT(BranchType() != UnknownBranchType); + // Relative branch offsets are instruction-size-aligned. + offset = ImmBranch() << kInstructionSizeLog2; + } + return base + offset; +} + + +int Instruction::ImmBranch() const { + switch (BranchType()) { + case CondBranchType: return ImmCondBranch(); + case UncondBranchType: return ImmUncondBranch(); + case CompareBranchType: return ImmCmpBranch(); + case TestBranchType: return ImmTestBranch(); + default: VIXL_UNREACHABLE(); + } + return 0; +} + + +void Instruction::SetImmPCOffsetTarget(const Instruction* target) { + if (IsPCRelAddressing()) { + SetPCRelImmTarget(target); + } else { + SetBranchImmTarget(target); + } +} + + +void Instruction::SetPCRelImmTarget(const Instruction* target) { + ptrdiff_t imm21; + if ((Mask(PCRelAddressingMask) == ADR)) { + imm21 = target - this; + } else { + VIXL_ASSERT(Mask(PCRelAddressingMask) == ADRP); + uintptr_t this_page = reinterpret_cast<uintptr_t>(this) / kPageSize; + uintptr_t target_page = reinterpret_cast<uintptr_t>(target) / kPageSize; + imm21 = target_page - this_page; + } + Instr imm = Assembler::ImmPCRelAddress(static_cast<int32_t>(imm21)); + + SetInstructionBits(Mask(~ImmPCRel_mask) | imm); +} + + +void Instruction::SetBranchImmTarget(const Instruction* target) { + VIXL_ASSERT(((target - this) & 3) == 0); + Instr branch_imm = 0; + uint32_t imm_mask = 0; + int offset = static_cast<int>((target - this) >> kInstructionSizeLog2); + switch (BranchType()) { + case CondBranchType: { + branch_imm = Assembler::ImmCondBranch(offset); + imm_mask = ImmCondBranch_mask; + break; + } + case UncondBranchType: { + branch_imm = Assembler::ImmUncondBranch(offset); + imm_mask = ImmUncondBranch_mask; + break; + } + case CompareBranchType: { + branch_imm = Assembler::ImmCmpBranch(offset); + imm_mask = ImmCmpBranch_mask; + break; + } + case TestBranchType: { + branch_imm = Assembler::ImmTestBranch(offset); + imm_mask = ImmTestBranch_mask; + break; + } + default: VIXL_UNREACHABLE(); + } + SetInstructionBits(Mask(~imm_mask) | branch_imm); +} + + +void Instruction::SetImmLLiteral(const Instruction* source) { + VIXL_ASSERT(IsWordAligned(source)); + ptrdiff_t offset = (source - this) >> kLiteralEntrySizeLog2; + Instr imm = Assembler::ImmLLiteral(static_cast<int>(offset)); + Instr mask = ImmLLiteral_mask; + + SetInstructionBits(Mask(~mask) | imm); +} + + +VectorFormat VectorFormatHalfWidth(const VectorFormat vform) { + VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D || + vform == kFormatH || vform == kFormatS || vform == kFormatD); + switch (vform) { + case kFormat8H: return kFormat8B; + case kFormat4S: return kFormat4H; + case kFormat2D: return kFormat2S; + case kFormatH: return kFormatB; + case kFormatS: return kFormatH; + case kFormatD: return kFormatS; + default: VIXL_UNREACHABLE(); return kFormatUndefined; + } +} + + +VectorFormat VectorFormatDoubleWidth(const VectorFormat vform) { + VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S || + vform == kFormatB || vform == kFormatH || vform == kFormatS); + switch (vform) { + case kFormat8B: return kFormat8H; + case kFormat4H: return kFormat4S; + case kFormat2S: return kFormat2D; + case kFormatB: return kFormatH; + case kFormatH: return kFormatS; + case kFormatS: return kFormatD; + default: VIXL_UNREACHABLE(); return kFormatUndefined; + } +} + + +VectorFormat VectorFormatFillQ(const VectorFormat vform) { + switch (vform) { + case kFormatB: + case kFormat8B: + case kFormat16B: return kFormat16B; + case kFormatH: + case kFormat4H: + case kFormat8H: return kFormat8H; + case kFormatS: + case kFormat2S: + case kFormat4S: return kFormat4S; + case kFormatD: + case kFormat1D: + case kFormat2D: return kFormat2D; + default: VIXL_UNREACHABLE(); return kFormatUndefined; + } +} + +VectorFormat VectorFormatHalfWidthDoubleLanes(const VectorFormat vform) { + switch (vform) { + case kFormat4H: return kFormat8B; + case kFormat8H: return kFormat16B; + case kFormat2S: return kFormat4H; + case kFormat4S: return kFormat8H; + case kFormat1D: return kFormat2S; + case kFormat2D: return kFormat4S; + default: VIXL_UNREACHABLE(); return kFormatUndefined; + } +} + +VectorFormat VectorFormatDoubleLanes(const VectorFormat vform) { + VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S); + switch (vform) { + case kFormat8B: return kFormat16B; + case kFormat4H: return kFormat8H; + case kFormat2S: return kFormat4S; + default: VIXL_UNREACHABLE(); return kFormatUndefined; + } +} + + +VectorFormat VectorFormatHalfLanes(const VectorFormat vform) { + VIXL_ASSERT(vform == kFormat16B || vform == kFormat8H || vform == kFormat4S); + switch (vform) { + case kFormat16B: return kFormat8B; + case kFormat8H: return kFormat4H; + case kFormat4S: return kFormat2S; + default: VIXL_UNREACHABLE(); return kFormatUndefined; + } +} + + +VectorFormat ScalarFormatFromLaneSize(int laneSize) { + switch (laneSize) { + case 8: return kFormatB; + case 16: return kFormatH; + case 32: return kFormatS; + case 64: return kFormatD; + default: VIXL_UNREACHABLE(); return kFormatUndefined; + } +} + + +unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: return kBRegSize; + case kFormatH: return kHRegSize; + case kFormatS: return kSRegSize; + case kFormatD: return kDRegSize; + case kFormat8B: + case kFormat4H: + case kFormat2S: + case kFormat1D: return kDRegSize; + default: return kQRegSize; + } +} + + +unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) { + return RegisterSizeInBitsFromFormat(vform) / 8; +} + + +unsigned LaneSizeInBitsFromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: + case kFormat8B: + case kFormat16B: return 8; + case kFormatH: + case kFormat4H: + case kFormat8H: return 16; + case kFormatS: + case kFormat2S: + case kFormat4S: return 32; + case kFormatD: + case kFormat1D: + case kFormat2D: return 64; + default: VIXL_UNREACHABLE(); return 0; + } +} + + +int LaneSizeInBytesFromFormat(VectorFormat vform) { + return LaneSizeInBitsFromFormat(vform) / 8; +} + + +int LaneSizeInBytesLog2FromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: + case kFormat8B: + case kFormat16B: return 0; + case kFormatH: + case kFormat4H: + case kFormat8H: return 1; + case kFormatS: + case kFormat2S: + case kFormat4S: return 2; + case kFormatD: + case kFormat1D: + case kFormat2D: return 3; + default: VIXL_UNREACHABLE(); return 0; + } +} + + +int LaneCountFromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormat16B: return 16; + case kFormat8B: + case kFormat8H: return 8; + case kFormat4H: + case kFormat4S: return 4; + case kFormat2S: + case kFormat2D: return 2; + case kFormat1D: + case kFormatB: + case kFormatH: + case kFormatS: + case kFormatD: return 1; + default: VIXL_UNREACHABLE(); return 0; + } +} + + +int MaxLaneCountFromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: + case kFormat8B: + case kFormat16B: return 16; + case kFormatH: + case kFormat4H: + case kFormat8H: return 8; + case kFormatS: + case kFormat2S: + case kFormat4S: return 4; + case kFormatD: + case kFormat1D: + case kFormat2D: return 2; + default: VIXL_UNREACHABLE(); return 0; + } +} + + +// Does 'vform' indicate a vector format or a scalar format? +bool IsVectorFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: + case kFormatH: + case kFormatS: + case kFormatD: return false; + default: return true; + } +} + + +int64_t MaxIntFromFormat(VectorFormat vform) { + return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform)); +} + + +int64_t MinIntFromFormat(VectorFormat vform) { + return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform)); +} + + +uint64_t MaxUintFromFormat(VectorFormat vform) { + return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform)); +} +} // namespace vixl + diff --git a/js/src/jit/arm64/vixl/Instructions-vixl.h b/js/src/jit/arm64/vixl/Instructions-vixl.h new file mode 100644 index 0000000000..4bcddf642a --- /dev/null +++ b/js/src/jit/arm64/vixl/Instructions-vixl.h @@ -0,0 +1,817 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_INSTRUCTIONS_A64_H_ +#define VIXL_A64_INSTRUCTIONS_A64_H_ + +#include "jit/arm64/vixl/Constants-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" + +namespace vixl { +// ISA constants. -------------------------------------------------------------- + +typedef uint32_t Instr; +const unsigned kInstructionSize = 4; +const unsigned kInstructionSizeLog2 = 2; +const unsigned kLiteralEntrySize = 4; +const unsigned kLiteralEntrySizeLog2 = 2; +const unsigned kMaxLoadLiteralRange = 1 * MBytes; + +// This is the nominal page size (as used by the adrp instruction); the actual +// size of the memory pages allocated by the kernel is likely to differ. +const unsigned kPageSize = 4 * KBytes; +const unsigned kPageSizeLog2 = 12; + +const unsigned kBRegSize = 8; +const unsigned kBRegSizeLog2 = 3; +const unsigned kBRegSizeInBytes = kBRegSize / 8; +const unsigned kBRegSizeInBytesLog2 = kBRegSizeLog2 - 3; +const unsigned kHRegSize = 16; +const unsigned kHRegSizeLog2 = 4; +const unsigned kHRegSizeInBytes = kHRegSize / 8; +const unsigned kHRegSizeInBytesLog2 = kHRegSizeLog2 - 3; +const unsigned kWRegSize = 32; +const unsigned kWRegSizeLog2 = 5; +const unsigned kWRegSizeInBytes = kWRegSize / 8; +const unsigned kWRegSizeInBytesLog2 = kWRegSizeLog2 - 3; +const unsigned kXRegSize = 64; +const unsigned kXRegSizeLog2 = 6; +const unsigned kXRegSizeInBytes = kXRegSize / 8; +const unsigned kXRegSizeInBytesLog2 = kXRegSizeLog2 - 3; +const unsigned kSRegSize = 32; +const unsigned kSRegSizeLog2 = 5; +const unsigned kSRegSizeInBytes = kSRegSize / 8; +const unsigned kSRegSizeInBytesLog2 = kSRegSizeLog2 - 3; +const unsigned kDRegSize = 64; +const unsigned kDRegSizeLog2 = 6; +const unsigned kDRegSizeInBytes = kDRegSize / 8; +const unsigned kDRegSizeInBytesLog2 = kDRegSizeLog2 - 3; +const unsigned kQRegSize = 128; +const unsigned kQRegSizeLog2 = 7; +const unsigned kQRegSizeInBytes = kQRegSize / 8; +const unsigned kQRegSizeInBytesLog2 = kQRegSizeLog2 - 3; +const uint64_t kWRegMask = UINT64_C(0xffffffff); +const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff); +const uint64_t kSRegMask = UINT64_C(0xffffffff); +const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff); +const uint64_t kSSignMask = UINT64_C(0x80000000); +const uint64_t kDSignMask = UINT64_C(0x8000000000000000); +const uint64_t kWSignMask = UINT64_C(0x80000000); +const uint64_t kXSignMask = UINT64_C(0x8000000000000000); +const uint64_t kByteMask = UINT64_C(0xff); +const uint64_t kHalfWordMask = UINT64_C(0xffff); +const uint64_t kWordMask = UINT64_C(0xffffffff); +const uint64_t kXMaxUInt = UINT64_C(0xffffffffffffffff); +const uint64_t kXMaxExactUInt = UINT64_C(0xfffffffffffff800); +const uint64_t kWMaxUInt = UINT64_C(0xffffffff); +const int64_t kXMaxInt = INT64_C(0x7fffffffffffffff); +const int64_t kXMaxExactInt = UINT64_C(0x7ffffffffffffc00); +const int64_t kXMinInt = INT64_C(0x8000000000000000); +const int32_t kWMaxInt = INT32_C(0x7fffffff); +const int32_t kWMinInt = INT32_C(0x80000000); +const unsigned kLinkRegCode = 30; +const unsigned kZeroRegCode = 31; +const unsigned kSPRegInternalCode = 63; +const unsigned kRegCodeMask = 0x1f; + +const unsigned kAddressTagOffset = 56; +const unsigned kAddressTagWidth = 8; +const uint64_t kAddressTagMask = + ((UINT64_C(1) << kAddressTagWidth) - 1) << kAddressTagOffset; +VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000)); + +static inline unsigned CalcLSDataSize(LoadStoreOp op) { + VIXL_ASSERT((LSSize_offset + LSSize_width) == (kInstructionSize * 8)); + unsigned size = static_cast<Instr>(op) >> LSSize_offset; + if ((op & LSVector_mask) != 0) { + // Vector register memory operations encode the access size in the "size" + // and "opc" fields. + if ((size == 0) && ((op & LSOpc_mask) >> LSOpc_offset) >= 2) { + size = kQRegSizeInBytesLog2; + } + } + return size; +} + +unsigned CalcLSPairDataSize(LoadStorePairOp op); + +enum ImmBranchType { + UnknownBranchType = 0, + CondBranchType = 1, + UncondBranchType = 2, + CompareBranchType = 3, + TestBranchType = 4 +}; + +// The classes of immediate branch ranges, in order of increasing range. +// Note that CondBranchType and CompareBranchType have the same range. +enum ImmBranchRangeType { + TestBranchRangeType, // tbz/tbnz: imm14 = +/- 32KB. + CondBranchRangeType, // b.cond/cbz/cbnz: imm19 = +/- 1MB. + UncondBranchRangeType, // b/bl: imm26 = +/- 128MB. + UnknownBranchRangeType, + + // Number of 'short-range' branch range types. + // We don't consider unconditional branches 'short-range'. + NumShortBranchRangeTypes = UncondBranchRangeType +}; + +enum AddrMode { + Offset, + PreIndex, + PostIndex +}; + +enum Reg31Mode { + Reg31IsStackPointer, + Reg31IsZeroRegister +}; + +// Instructions. --------------------------------------------------------------- + +class Instruction { + public: + Instr InstructionBits() const { + return *(reinterpret_cast<const Instr*>(this)); + } + + void SetInstructionBits(Instr new_instr) { + *(reinterpret_cast<Instr*>(this)) = new_instr; + } + + int Bit(int pos) const { + return (InstructionBits() >> pos) & 1; + } + + uint32_t Bits(int msb, int lsb) const { + return ExtractUnsignedBitfield32(msb, lsb, InstructionBits()); + } + + int32_t SignedBits(int msb, int lsb) const { + int32_t bits = *(reinterpret_cast<const int32_t*>(this)); + return ExtractSignedBitfield32(msb, lsb, bits); + } + + Instr Mask(uint32_t mask) const { + return InstructionBits() & mask; + } + + #define DEFINE_GETTER(Name, HighBit, LowBit, Func) \ + int32_t Name() const { return Func(HighBit, LowBit); } + INSTRUCTION_FIELDS_LIST(DEFINE_GETTER) + #undef DEFINE_GETTER + + #define DEFINE_SETTER(Name, HighBit, LowBit, Func) \ + inline void Set##Name(unsigned n) { SetBits32(HighBit, LowBit, n); } + INSTRUCTION_FIELDS_LIST(DEFINE_SETTER) + #undef DEFINE_SETTER + + // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST), + // formed from ImmPCRelLo and ImmPCRelHi. + int ImmPCRel() const { + int offset = + static_cast<int>((ImmPCRelHi() << ImmPCRelLo_width) | ImmPCRelLo()); + int width = ImmPCRelLo_width + ImmPCRelHi_width; + return ExtractSignedBitfield32(width - 1, 0, offset); + } + + uint64_t ImmLogical() const; + unsigned ImmNEONabcdefgh() const; + float ImmFP32() const; + double ImmFP64() const; + float ImmNEONFP32() const; + double ImmNEONFP64() const; + + unsigned SizeLS() const { + return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask))); + } + + unsigned SizeLSPair() const { + return CalcLSPairDataSize( + static_cast<LoadStorePairOp>(Mask(LoadStorePairMask))); + } + + int NEONLSIndex(int access_size_shift) const { + int64_t q = NEONQ(); + int64_t s = NEONS(); + int64_t size = NEONLSSize(); + int64_t index = (q << 3) | (s << 2) | size; + return static_cast<int>(index >> access_size_shift); + } + + // Helpers. + bool IsCondBranchImm() const { + return Mask(ConditionalBranchFMask) == ConditionalBranchFixed; + } + + bool IsUncondBranchImm() const { + return Mask(UnconditionalBranchFMask) == UnconditionalBranchFixed; + } + + bool IsCompareBranch() const { + return Mask(CompareBranchFMask) == CompareBranchFixed; + } + + bool IsTestBranch() const { + return Mask(TestBranchFMask) == TestBranchFixed; + } + + bool IsImmBranch() const { + return BranchType() != UnknownBranchType; + } + + bool IsPCRelAddressing() const { + return Mask(PCRelAddressingFMask) == PCRelAddressingFixed; + } + + bool IsLogicalImmediate() const { + return Mask(LogicalImmediateFMask) == LogicalImmediateFixed; + } + + bool IsAddSubImmediate() const { + return Mask(AddSubImmediateFMask) == AddSubImmediateFixed; + } + + bool IsAddSubExtended() const { + return Mask(AddSubExtendedFMask) == AddSubExtendedFixed; + } + + bool IsLoadOrStore() const { + return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed; + } + + bool IsLoad() const; + bool IsStore() const; + + bool IsLoadLiteral() const { + // This includes PRFM_lit. + return Mask(LoadLiteralFMask) == LoadLiteralFixed; + } + + bool IsMovn() const { + return (Mask(MoveWideImmediateMask) == MOVN_x) || + (Mask(MoveWideImmediateMask) == MOVN_w); + } + + // Mozilla modifications. + bool IsUncondB() const; + bool IsCondB() const; + bool IsBL() const; + bool IsBR() const; + bool IsBLR() const; + bool IsTBZ() const; + bool IsTBNZ() const; + bool IsCBZ() const; + bool IsCBNZ() const; + bool IsLDR() const; + bool IsNOP() const; + bool IsCSDB() const; + bool IsADR() const; + bool IsADRP() const; + bool IsMovz() const; + bool IsMovk() const; + bool IsBranchLinkImm() const; + bool IsTargetReachable(const Instruction* target) const; + ptrdiff_t ImmPCRawOffset() const; + void SetImmPCRawOffset(ptrdiff_t offset); + void SetBits32(int msb, int lsb, unsigned value); + + // Is this a stack pointer synchronization instruction as inserted by + // MacroAssembler::syncStackPtr()? + bool IsStackPtrSync() const; + + static int ImmBranchRangeBitwidth(ImmBranchType branch_type); + static int32_t ImmBranchForwardRange(ImmBranchType branch_type); + + // Check if offset can be encoded as a RAW offset in a branch_type + // instruction. The offset must be encodeable directly as the immediate field + // in the instruction, it is not scaled by kInstructionSize first. + static bool IsValidImmPCOffset(ImmBranchType branch_type, int64_t offset); + + // Get the range type corresponding to a branch type. + static ImmBranchRangeType ImmBranchTypeToRange(ImmBranchType); + + // Get the maximum realizable forward PC offset (in bytes) for an immediate + // branch of the given range type. + // This is the largest positive multiple of kInstructionSize, offset, such + // that: + // + // IsValidImmPCOffset(xxx, offset / kInstructionSize) + // + // returns true for the same branch type. + static int32_t ImmBranchMaxForwardOffset(ImmBranchRangeType range_type); + + // Get the minimuum realizable backward PC offset (in bytes) for an immediate + // branch of the given range type. + // This is the smallest (i.e., largest in magnitude) negative multiple of + // kInstructionSize, offset, such that: + // + // IsValidImmPCOffset(xxx, offset / kInstructionSize) + // + // returns true for the same branch type. + static int32_t ImmBranchMinBackwardOffset(ImmBranchRangeType range_type); + + // Indicate whether Rd can be the stack pointer or the zero register. This + // does not check that the instruction actually has an Rd field. + Reg31Mode RdMode() const { + // The following instructions use sp or wsp as Rd: + // Add/sub (immediate) when not setting the flags. + // Add/sub (extended) when not setting the flags. + // Logical (immediate) when not setting the flags. + // Otherwise, r31 is the zero register. + if (IsAddSubImmediate() || IsAddSubExtended()) { + if (Mask(AddSubSetFlagsBit)) { + return Reg31IsZeroRegister; + } else { + return Reg31IsStackPointer; + } + } + if (IsLogicalImmediate()) { + // Of the logical (immediate) instructions, only ANDS (and its aliases) + // can set the flags. The others can all write into sp. + // Note that some logical operations are not available to + // immediate-operand instructions, so we have to combine two masks here. + if (Mask(LogicalImmediateMask & LogicalOpMask) == ANDS) { + return Reg31IsZeroRegister; + } else { + return Reg31IsStackPointer; + } + } + return Reg31IsZeroRegister; + } + + // Indicate whether Rn can be the stack pointer or the zero register. This + // does not check that the instruction actually has an Rn field. + Reg31Mode RnMode() const { + // The following instructions use sp or wsp as Rn: + // All loads and stores. + // Add/sub (immediate). + // Add/sub (extended). + // Otherwise, r31 is the zero register. + if (IsLoadOrStore() || IsAddSubImmediate() || IsAddSubExtended()) { + return Reg31IsStackPointer; + } + return Reg31IsZeroRegister; + } + + ImmBranchType BranchType() const { + if (IsCondBranchImm()) { + return CondBranchType; + } else if (IsUncondBranchImm()) { + return UncondBranchType; + } else if (IsCompareBranch()) { + return CompareBranchType; + } else if (IsTestBranch()) { + return TestBranchType; + } else { + return UnknownBranchType; + } + } + + // Find the target of this instruction. 'this' may be a branch or a + // PC-relative addressing instruction. + const Instruction* ImmPCOffsetTarget() const; + + // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or + // a PC-relative addressing instruction. + void SetImmPCOffsetTarget(const Instruction* target); + // Patch a literal load instruction to load from 'source'. + void SetImmLLiteral(const Instruction* source); + + // The range of a load literal instruction, expressed as 'instr +- range'. + // The range is actually the 'positive' range; the branch instruction can + // target [instr - range - kInstructionSize, instr + range]. + static const int kLoadLiteralImmBitwidth = 19; + static const int kLoadLiteralRange = + (1 << kLoadLiteralImmBitwidth) / 2 - kInstructionSize; + + // Calculate the address of a literal referred to by a load-literal + // instruction, and return it as the specified type. + // + // The literal itself is safely mutable only if the backing buffer is safely + // mutable. + template <typename T> + T LiteralAddress() const { + uint64_t base_raw = reinterpret_cast<uint64_t>(this); + int64_t offset = ImmLLiteral() << kLiteralEntrySizeLog2; + uint64_t address_raw = base_raw + offset; + + // Cast the address using a C-style cast. A reinterpret_cast would be + // appropriate, but it can't cast one integral type to another. + T address = (T)(address_raw); + + // Assert that the address can be represented by the specified type. + VIXL_ASSERT((uint64_t)(address) == address_raw); + + return address; + } + + uint32_t Literal32() const { + uint32_t literal; + memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal)); + return literal; + } + + uint64_t Literal64() const { + uint64_t literal; + memcpy(&literal, LiteralAddress<const void*>(), sizeof(literal)); + return literal; + } + + void SetLiteral64(uint64_t literal) const { + memcpy(LiteralAddress<void*>(), &literal, sizeof(literal)); + } + + float LiteralFP32() const { + return RawbitsToFloat(Literal32()); + } + + double LiteralFP64() const { + return RawbitsToDouble(Literal64()); + } + + const Instruction* NextInstruction() const { + return this + kInstructionSize; + } + + // Skip any constant pools with artificial guards at this point. + // Return either |this| or the first instruction after the pool. + const Instruction* skipPool() const; + + const Instruction* InstructionAtOffset(int64_t offset) const { + VIXL_ASSERT(IsWordAligned(this + offset)); + return this + offset; + } + + template<typename T> static Instruction* Cast(T src) { + return reinterpret_cast<Instruction*>(src); + } + + template<typename T> static const Instruction* CastConst(T src) { + return reinterpret_cast<const Instruction*>(src); + } + + private: + int ImmBranch() const; + + static float Imm8ToFP32(uint32_t imm8); + static double Imm8ToFP64(uint32_t imm8); + + void SetPCRelImmTarget(const Instruction* target); + void SetBranchImmTarget(const Instruction* target); +}; + + +// Functions for handling NEON vector format information. +enum VectorFormat { + kFormatUndefined = 0xffffffff, + kFormat8B = NEON_8B, + kFormat16B = NEON_16B, + kFormat4H = NEON_4H, + kFormat8H = NEON_8H, + kFormat2S = NEON_2S, + kFormat4S = NEON_4S, + kFormat1D = NEON_1D, + kFormat2D = NEON_2D, + + // Scalar formats. We add the scalar bit to distinguish between scalar and + // vector enumerations; the bit is always set in the encoding of scalar ops + // and always clear for vector ops. Although kFormatD and kFormat1D appear + // to be the same, their meaning is subtly different. The first is a scalar + // operation, the second a vector operation that only affects one lane. + kFormatB = NEON_B | NEONScalar, + kFormatH = NEON_H | NEONScalar, + kFormatS = NEON_S | NEONScalar, + kFormatD = NEON_D | NEONScalar +}; + +VectorFormat VectorFormatHalfWidth(const VectorFormat vform); +VectorFormat VectorFormatDoubleWidth(const VectorFormat vform); +VectorFormat VectorFormatDoubleLanes(const VectorFormat vform); +VectorFormat VectorFormatHalfLanes(const VectorFormat vform); +VectorFormat ScalarFormatFromLaneSize(int lanesize); +VectorFormat VectorFormatHalfWidthDoubleLanes(const VectorFormat vform); +VectorFormat VectorFormatFillQ(const VectorFormat vform); +unsigned RegisterSizeInBitsFromFormat(VectorFormat vform); +unsigned RegisterSizeInBytesFromFormat(VectorFormat vform); +// TODO: Make the return types of these functions consistent. +unsigned LaneSizeInBitsFromFormat(VectorFormat vform); +int LaneSizeInBytesFromFormat(VectorFormat vform); +int LaneSizeInBytesLog2FromFormat(VectorFormat vform); +int LaneCountFromFormat(VectorFormat vform); +int MaxLaneCountFromFormat(VectorFormat vform); +bool IsVectorFormat(VectorFormat vform); +int64_t MaxIntFromFormat(VectorFormat vform); +int64_t MinIntFromFormat(VectorFormat vform); +uint64_t MaxUintFromFormat(VectorFormat vform); + + +enum NEONFormat { + NF_UNDEF = 0, + NF_8B = 1, + NF_16B = 2, + NF_4H = 3, + NF_8H = 4, + NF_2S = 5, + NF_4S = 6, + NF_1D = 7, + NF_2D = 8, + NF_B = 9, + NF_H = 10, + NF_S = 11, + NF_D = 12 +}; + +static const unsigned kNEONFormatMaxBits = 6; + +struct NEONFormatMap { + // The bit positions in the instruction to consider. + uint8_t bits[kNEONFormatMaxBits]; + + // Mapping from concatenated bits to format. + NEONFormat map[1 << kNEONFormatMaxBits]; +}; + +class NEONFormatDecoder { + public: + enum SubstitutionMode { + kPlaceholder, + kFormat + }; + + // Construct a format decoder with increasingly specific format maps for each + // subsitution. If no format map is specified, the default is the integer + // format map. + explicit NEONFormatDecoder(const Instruction* instr) { + instrbits_ = instr->InstructionBits(); + SetFormatMaps(IntegerFormatMap()); + } + NEONFormatDecoder(const Instruction* instr, + const NEONFormatMap* format) { + instrbits_ = instr->InstructionBits(); + SetFormatMaps(format); + } + NEONFormatDecoder(const Instruction* instr, + const NEONFormatMap* format0, + const NEONFormatMap* format1) { + instrbits_ = instr->InstructionBits(); + SetFormatMaps(format0, format1); + } + NEONFormatDecoder(const Instruction* instr, + const NEONFormatMap* format0, + const NEONFormatMap* format1, + const NEONFormatMap* format2) { + instrbits_ = instr->InstructionBits(); + SetFormatMaps(format0, format1, format2); + } + + // Set the format mapping for all or individual substitutions. + void SetFormatMaps(const NEONFormatMap* format0, + const NEONFormatMap* format1 = NULL, + const NEONFormatMap* format2 = NULL) { + VIXL_ASSERT(format0 != NULL); + formats_[0] = format0; + formats_[1] = (format1 == NULL) ? formats_[0] : format1; + formats_[2] = (format2 == NULL) ? formats_[1] : format2; + } + void SetFormatMap(unsigned index, const NEONFormatMap* format) { + VIXL_ASSERT(index <= (sizeof(formats_) / sizeof(formats_[0]))); + VIXL_ASSERT(format != NULL); + formats_[index] = format; + } + + // Substitute %s in the input string with the placeholder string for each + // register, ie. "'B", "'H", etc. + const char* SubstitutePlaceholders(const char* string) { + return Substitute(string, kPlaceholder, kPlaceholder, kPlaceholder); + } + + // Substitute %s in the input string with a new string based on the + // substitution mode. + const char* Substitute(const char* string, + SubstitutionMode mode0 = kFormat, + SubstitutionMode mode1 = kFormat, + SubstitutionMode mode2 = kFormat) { + snprintf(form_buffer_, sizeof(form_buffer_), string, + GetSubstitute(0, mode0), + GetSubstitute(1, mode1), + GetSubstitute(2, mode2)); + return form_buffer_; + } + + // Append a "2" to a mnemonic string based of the state of the Q bit. + const char* Mnemonic(const char* mnemonic) { + if ((instrbits_ & NEON_Q) != 0) { + snprintf(mne_buffer_, sizeof(mne_buffer_), "%s2", mnemonic); + return mne_buffer_; + } + return mnemonic; + } + + VectorFormat GetVectorFormat(int format_index = 0) { + return GetVectorFormat(formats_[format_index]); + } + + VectorFormat GetVectorFormat(const NEONFormatMap* format_map) { + static const VectorFormat vform[] = { + kFormatUndefined, + kFormat8B, kFormat16B, kFormat4H, kFormat8H, + kFormat2S, kFormat4S, kFormat1D, kFormat2D, + kFormatB, kFormatH, kFormatS, kFormatD + }; + VIXL_ASSERT(GetNEONFormat(format_map) < (sizeof(vform) / sizeof(vform[0]))); + return vform[GetNEONFormat(format_map)]; + } + + // Built in mappings for common cases. + + // The integer format map uses three bits (Q, size<1:0>) to encode the + // "standard" set of NEON integer vector formats. + static const NEONFormatMap* IntegerFormatMap() { + static const NEONFormatMap map = { + {23, 22, 30}, + {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_2D} + }; + return ↦ + } + + // The long integer format map uses two bits (size<1:0>) to encode the + // long set of NEON integer vector formats. These are used in narrow, wide + // and long operations. + static const NEONFormatMap* LongIntegerFormatMap() { + static const NEONFormatMap map = { + {23, 22}, {NF_8H, NF_4S, NF_2D} + }; + return ↦ + } + + // The FP format map uses two bits (Q, size<0>) to encode the NEON FP vector + // formats: NF_2S, NF_4S, NF_2D. + static const NEONFormatMap* FPFormatMap() { + // The FP format map assumes two bits (Q, size<0>) are used to encode the + // NEON FP vector formats: NF_2S, NF_4S, NF_2D. + static const NEONFormatMap map = { + {22, 30}, {NF_2S, NF_4S, NF_UNDEF, NF_2D} + }; + return ↦ + } + + // The load/store format map uses three bits (Q, 11, 10) to encode the + // set of NEON vector formats. + static const NEONFormatMap* LoadStoreFormatMap() { + static const NEONFormatMap map = { + {11, 10, 30}, + {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D} + }; + return ↦ + } + + // The logical format map uses one bit (Q) to encode the NEON vector format: + // NF_8B, NF_16B. + static const NEONFormatMap* LogicalFormatMap() { + static const NEONFormatMap map = { + {30}, {NF_8B, NF_16B} + }; + return ↦ + } + + // The triangular format map uses between two and five bits to encode the NEON + // vector format: + // xxx10->8B, xxx11->16B, xx100->4H, xx101->8H + // x1000->2S, x1001->4S, 10001->2D, all others undefined. + static const NEONFormatMap* TriangularFormatMap() { + static const NEONFormatMap map = { + {19, 18, 17, 16, 30}, + {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_2S, + NF_4S, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_UNDEF, NF_2D, + NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, NF_2S, NF_4S, NF_8B, NF_16B, + NF_4H, NF_8H, NF_8B, NF_16B} + }; + return ↦ + } + + // The scalar format map uses two bits (size<1:0>) to encode the NEON scalar + // formats: NF_B, NF_H, NF_S, NF_D. + static const NEONFormatMap* ScalarFormatMap() { + static const NEONFormatMap map = { + {23, 22}, {NF_B, NF_H, NF_S, NF_D} + }; + return ↦ + } + + // The long scalar format map uses two bits (size<1:0>) to encode the longer + // NEON scalar formats: NF_H, NF_S, NF_D. + static const NEONFormatMap* LongScalarFormatMap() { + static const NEONFormatMap map = { + {23, 22}, {NF_H, NF_S, NF_D} + }; + return ↦ + } + + // The FP scalar format map assumes one bit (size<0>) is used to encode the + // NEON FP scalar formats: NF_S, NF_D. + static const NEONFormatMap* FPScalarFormatMap() { + static const NEONFormatMap map = { + {22}, {NF_S, NF_D} + }; + return ↦ + } + + // The triangular scalar format map uses between one and four bits to encode + // the NEON FP scalar formats: + // xxx1->B, xx10->H, x100->S, 1000->D, all others undefined. + static const NEONFormatMap* TriangularScalarFormatMap() { + static const NEONFormatMap map = { + {19, 18, 17, 16}, + {NF_UNDEF, NF_B, NF_H, NF_B, NF_S, NF_B, NF_H, NF_B, + NF_D, NF_B, NF_H, NF_B, NF_S, NF_B, NF_H, NF_B} + }; + return ↦ + } + + private: + // Get a pointer to a string that represents the format or placeholder for + // the specified substitution index, based on the format map and instruction. + const char* GetSubstitute(int index, SubstitutionMode mode) { + if (mode == kFormat) { + return NEONFormatAsString(GetNEONFormat(formats_[index])); + } + VIXL_ASSERT(mode == kPlaceholder); + return NEONFormatAsPlaceholder(GetNEONFormat(formats_[index])); + } + + // Get the NEONFormat enumerated value for bits obtained from the + // instruction based on the specified format mapping. + NEONFormat GetNEONFormat(const NEONFormatMap* format_map) { + return format_map->map[PickBits(format_map->bits)]; + } + + // Convert a NEONFormat into a string. + static const char* NEONFormatAsString(NEONFormat format) { + static const char* formats[] = { + "undefined", + "8b", "16b", "4h", "8h", "2s", "4s", "1d", "2d", + "b", "h", "s", "d" + }; + VIXL_ASSERT(format < (sizeof(formats) / sizeof(formats[0]))); + return formats[format]; + } + + // Convert a NEONFormat into a register placeholder string. + static const char* NEONFormatAsPlaceholder(NEONFormat format) { + VIXL_ASSERT((format == NF_B) || (format == NF_H) || + (format == NF_S) || (format == NF_D) || + (format == NF_UNDEF)); + static const char* formats[] = { + "undefined", + "undefined", "undefined", "undefined", "undefined", + "undefined", "undefined", "undefined", "undefined", + "'B", "'H", "'S", "'D" + }; + return formats[format]; + } + + // Select bits from instrbits_ defined by the bits array, concatenate them, + // and return the value. + uint8_t PickBits(const uint8_t bits[]) { + uint8_t result = 0; + for (unsigned b = 0; b < kNEONFormatMaxBits; b++) { + if (bits[b] == 0) break; + result <<= 1; + result |= ((instrbits_ & (1 << bits[b])) == 0) ? 0 : 1; + } + return result; + } + + Instr instrbits_; + const NEONFormatMap* formats_[3]; + char form_buffer_[64]; + char mne_buffer_[16]; +}; +} // namespace vixl + +#endif // VIXL_A64_INSTRUCTIONS_A64_H_ diff --git a/js/src/jit/arm64/vixl/Instrument-vixl.cpp b/js/src/jit/arm64/vixl/Instrument-vixl.cpp new file mode 100644 index 0000000000..c07495c29d --- /dev/null +++ b/js/src/jit/arm64/vixl/Instrument-vixl.cpp @@ -0,0 +1,850 @@ +// Copyright 2014, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Instrument-vixl.h" + +namespace vixl { + +Counter::Counter(const char* name, CounterType type) + : count_(0), enabled_(false), type_(type) { + VIXL_ASSERT(name != NULL); + strncpy(name_, name, kCounterNameMaxLength); +} + + +void Counter::Enable() { + enabled_ = true; +} + + +void Counter::Disable() { + enabled_ = false; +} + + +bool Counter::IsEnabled() { + return enabled_; +} + + +void Counter::Increment() { + if (enabled_) { + count_++; + } +} + + +uint64_t Counter::count() { + uint64_t result = count_; + if (type_ == Gauge) { + // If the counter is a Gauge, reset the count after reading. + count_ = 0; + } + return result; +} + + +const char* Counter::name() { + return name_; +} + + +CounterType Counter::type() { + return type_; +} + + +struct CounterDescriptor { + const char* name; + CounterType type; +}; + + +static const CounterDescriptor kCounterList[] = { + {"Instruction", Cumulative}, + + {"Move Immediate", Gauge}, + {"Add/Sub DP", Gauge}, + {"Logical DP", Gauge}, + {"Other Int DP", Gauge}, + {"FP DP", Gauge}, + + {"Conditional Select", Gauge}, + {"Conditional Compare", Gauge}, + + {"Unconditional Branch", Gauge}, + {"Compare and Branch", Gauge}, + {"Test and Branch", Gauge}, + {"Conditional Branch", Gauge}, + + {"Load Integer", Gauge}, + {"Load FP", Gauge}, + {"Load Pair", Gauge}, + {"Load Literal", Gauge}, + + {"Store Integer", Gauge}, + {"Store FP", Gauge}, + {"Store Pair", Gauge}, + + {"PC Addressing", Gauge}, + {"Other", Gauge}, + {"NEON", Gauge}, + {"Crypto", Gauge} +}; + + +Instrument::Instrument(const char* datafile, uint64_t sample_period) + : output_stream_(stdout), sample_period_(sample_period) { + + // Set up the output stream. If datafile is non-NULL, use that file. If it + // can't be opened, or datafile is NULL, use stdout. + if (datafile != NULL) { + output_stream_ = fopen(datafile, "w"); + if (output_stream_ == NULL) { + printf("Can't open output file %s. Using stdout.\n", datafile); + output_stream_ = stdout; + } + } + + static const int num_counters = + sizeof(kCounterList) / sizeof(CounterDescriptor); + + // Dump an instrumentation description comment at the top of the file. + fprintf(output_stream_, "# counters=%d\n", num_counters); + fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_); + + // Construct Counter objects from counter description array. + for (int i = 0; i < num_counters; i++) { + if (Counter* counter = js_new<Counter>(kCounterList[i].name, kCounterList[i].type)) + (void)counters_.append(counter); + } + + DumpCounterNames(); +} + + +Instrument::~Instrument() { + // Dump any remaining instruction data to the output file. + DumpCounters(); + + // Free all the counter objects. + for (auto counter : counters_) { + js_delete(counter); + } + + if (output_stream_ != stdout) { + fclose(output_stream_); + } +} + + +void Instrument::Update() { + // Increment the instruction counter, and dump all counters if a sample period + // has elapsed. + static Counter* counter = GetCounter("Instruction"); + VIXL_ASSERT(counter->type() == Cumulative); + counter->Increment(); + + if (counter->IsEnabled() && (counter->count() % sample_period_) == 0) { + DumpCounters(); + } +} + + +void Instrument::DumpCounters() { + // Iterate through the counter objects, dumping their values to the output + // stream. + for (auto counter : counters_) { + fprintf(output_stream_, "%" PRIu64 ",", counter->count()); + } + fprintf(output_stream_, "\n"); + fflush(output_stream_); +} + + +void Instrument::DumpCounterNames() { + // Iterate through the counter objects, dumping the counter names to the + // output stream. + for (auto counter : counters_) { + fprintf(output_stream_, "%s,", counter->name()); + } + fprintf(output_stream_, "\n"); + fflush(output_stream_); +} + + +void Instrument::HandleInstrumentationEvent(unsigned event) { + switch (event) { + case InstrumentStateEnable: Enable(); break; + case InstrumentStateDisable: Disable(); break; + default: DumpEventMarker(event); + } +} + + +void Instrument::DumpEventMarker(unsigned marker) { + // Dumpan event marker to the output stream as a specially formatted comment + // line. + static Counter* counter = GetCounter("Instruction"); + + fprintf(output_stream_, "# %c%c @ %" PRId64 "\n", marker & 0xff, + (marker >> 8) & 0xff, counter->count()); +} + + +Counter* Instrument::GetCounter(const char* name) { + // Get a Counter object by name from the counter list. + for (auto counter : counters_) { + if (strcmp(counter->name(), name) == 0) { + return counter; + } + } + + // A Counter by that name does not exist: print an error message to stderr + // and the output file, and exit. + static const char* error_message = + "# Error: Unknown counter \"%s\". Exiting.\n"; + fprintf(stderr, error_message, name); + fprintf(output_stream_, error_message, name); + exit(1); +} + + +void Instrument::Enable() { + for (auto counter : counters_) { + counter->Enable(); + } +} + + +void Instrument::Disable() { + for (auto counter : counters_) { + counter->Disable(); + } +} + + +void Instrument::VisitPCRelAddressing(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("PC Addressing"); + counter->Increment(); +} + + +void Instrument::VisitAddSubImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Add/Sub DP"); + counter->Increment(); +} + + +void Instrument::VisitLogicalImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Logical DP"); + counter->Increment(); +} + + +void Instrument::VisitMoveWideImmediate(const Instruction* instr) { + Update(); + static Counter* counter = GetCounter("Move Immediate"); + + if (instr->IsMovn() && (instr->Rd() == kZeroRegCode)) { + unsigned imm = instr->ImmMoveWide(); + HandleInstrumentationEvent(imm); + } else { + counter->Increment(); + } +} + + +void Instrument::VisitBitfield(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitExtract(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitUnconditionalBranch(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Unconditional Branch"); + counter->Increment(); +} + + +void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Unconditional Branch"); + counter->Increment(); +} + + +void Instrument::VisitCompareBranch(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Compare and Branch"); + counter->Increment(); +} + + +void Instrument::VisitTestBranch(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Test and Branch"); + counter->Increment(); +} + + +void Instrument::VisitConditionalBranch(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Branch"); + counter->Increment(); +} + + +void Instrument::VisitSystem(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitException(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::InstrumentLoadStorePair(const Instruction* instr) { + static Counter* load_pair_counter = GetCounter("Load Pair"); + static Counter* store_pair_counter = GetCounter("Store Pair"); + + if (instr->Mask(LoadStorePairLBit) != 0) { + load_pair_counter->Increment(); + } else { + store_pair_counter->Increment(); + } +} + + +void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) { + Update(); + InstrumentLoadStorePair(instr); +} + + +void Instrument::VisitLoadStorePairOffset(const Instruction* instr) { + Update(); + InstrumentLoadStorePair(instr); +} + + +void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) { + Update(); + InstrumentLoadStorePair(instr); +} + + +void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) { + Update(); + InstrumentLoadStorePair(instr); +} + + +void Instrument::VisitLoadStoreExclusive(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + +void Instrument::VisitAtomicMemory(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + +void Instrument::VisitLoadLiteral(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Load Literal"); + counter->Increment(); +} + + +void Instrument::InstrumentLoadStore(const Instruction* instr) { + static Counter* load_int_counter = GetCounter("Load Integer"); + static Counter* store_int_counter = GetCounter("Store Integer"); + static Counter* load_fp_counter = GetCounter("Load FP"); + static Counter* store_fp_counter = GetCounter("Store FP"); + + switch (instr->Mask(LoadStoreMask)) { + case STRB_w: + case STRH_w: + case STR_w: + VIXL_FALLTHROUGH(); + case STR_x: store_int_counter->Increment(); break; + case STR_s: + VIXL_FALLTHROUGH(); + case STR_d: store_fp_counter->Increment(); break; + case LDRB_w: + case LDRH_w: + case LDR_w: + case LDR_x: + case LDRSB_x: + case LDRSH_x: + case LDRSW_x: + case LDRSB_w: + VIXL_FALLTHROUGH(); + case LDRSH_w: load_int_counter->Increment(); break; + case LDR_s: + VIXL_FALLTHROUGH(); + case LDR_d: load_fp_counter->Increment(); break; + } +} + + +void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) { + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLoadStorePostIndex(const Instruction* instr) { + USE(instr); + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLoadStorePreIndex(const Instruction* instr) { + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) { + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) { + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLogicalShifted(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Logical DP"); + counter->Increment(); +} + + +void Instrument::VisitAddSubShifted(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Add/Sub DP"); + counter->Increment(); +} + + +void Instrument::VisitAddSubExtended(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Add/Sub DP"); + counter->Increment(); +} + + +void Instrument::VisitAddSubWithCarry(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Add/Sub DP"); + counter->Increment(); +} + + +void Instrument::VisitConditionalCompareRegister(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Compare"); + counter->Increment(); +} + + +void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Compare"); + counter->Increment(); +} + + +void Instrument::VisitConditionalSelect(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Select"); + counter->Increment(); +} + + +void Instrument::VisitDataProcessing1Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitDataProcessing2Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitDataProcessing3Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitFPCompare(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPConditionalCompare(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Compare"); + counter->Increment(); +} + + +void Instrument::VisitFPConditionalSelect(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Select"); + counter->Increment(); +} + + +void Instrument::VisitFPImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPIntegerConvert(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPFixedPointConvert(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitCrypto2RegSHA(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Crypto"); + counter->Increment(); +} + + +void Instrument::VisitCrypto3RegSHA(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Crypto"); + counter->Increment(); +} + + +void Instrument::VisitCryptoAES(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Crypto"); + counter->Increment(); +} + + +void Instrument::VisitNEON2RegMisc(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEON3Same(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEON3Different(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONAcrossLanes(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONByIndexedElement(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONCopy(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONExtract(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONLoadStoreMultiStructPostIndex( + const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONLoadStoreSingleStructPostIndex( + const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar3Diff(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar3Same(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalarCopy(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalarPairwise(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONShiftImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONTable(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONPerm(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitUnallocated(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitUnimplemented(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/Instrument-vixl.h b/js/src/jit/arm64/vixl/Instrument-vixl.h new file mode 100644 index 0000000000..eca076d234 --- /dev/null +++ b/js/src/jit/arm64/vixl/Instrument-vixl.h @@ -0,0 +1,109 @@ +// Copyright 2014, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_INSTRUMENT_A64_H_ +#define VIXL_A64_INSTRUMENT_A64_H_ + +#include "mozilla/Vector.h" + +#include "jit/arm64/vixl/Constants-vixl.h" +#include "jit/arm64/vixl/Decoder-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" +#include "js/AllocPolicy.h" + +namespace vixl { + +const int kCounterNameMaxLength = 256; +const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22; + + +enum InstrumentState { + InstrumentStateDisable = 0, + InstrumentStateEnable = 1 +}; + + +enum CounterType { + Gauge = 0, // Gauge counters reset themselves after reading. + Cumulative = 1 // Cumulative counters keep their value after reading. +}; + + +class Counter { + public: + explicit Counter(const char* name, CounterType type = Gauge); + + void Increment(); + void Enable(); + void Disable(); + bool IsEnabled(); + uint64_t count(); + const char* name(); + CounterType type(); + + private: + char name_[kCounterNameMaxLength]; + uint64_t count_; + bool enabled_; + CounterType type_; +}; + + +class Instrument: public DecoderVisitor { + public: + explicit Instrument(const char* datafile = NULL, + uint64_t sample_period = kDefaultInstrumentationSamplingPeriod); + ~Instrument(); + + void Enable(); + void Disable(); + + // Declare all Visitor functions. + #define DECLARE(A) void Visit##A(const Instruction* instr) override; + VISITOR_LIST(DECLARE) + #undef DECLARE + + private: + void Update(); + void DumpCounters(); + void DumpCounterNames(); + void DumpEventMarker(unsigned marker); + void HandleInstrumentationEvent(unsigned event); + Counter* GetCounter(const char* name); + + void InstrumentLoadStore(const Instruction* instr); + void InstrumentLoadStorePair(const Instruction* instr); + + mozilla::Vector<Counter*, 8, js::SystemAllocPolicy> counters_; + + FILE *output_stream_; + uint64_t sample_period_; +}; + +} // namespace vixl + +#endif // VIXL_A64_INSTRUMENT_A64_H_ diff --git a/js/src/jit/arm64/vixl/Logic-vixl.cpp b/js/src/jit/arm64/vixl/Logic-vixl.cpp new file mode 100644 index 0000000000..71821a333f --- /dev/null +++ b/js/src/jit/arm64/vixl/Logic-vixl.cpp @@ -0,0 +1,4738 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifdef JS_SIMULATOR_ARM64 + +#include <cmath> + +#include "jit/arm64/vixl/Simulator-vixl.h" + +namespace vixl { + +template<> double Simulator::FPDefaultNaN<double>() { + return kFP64DefaultNaN; +} + + +template<> float Simulator::FPDefaultNaN<float>() { + return kFP32DefaultNaN; +} + + +double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { + if (src >= 0) { + return UFixedToDouble(src, fbits, round); + } else { + // This works for all negative values, including INT64_MIN. + return -UFixedToDouble(-src, fbits, round); + } +} + + +double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { + // An input of 0 is a special case because the result is effectively + // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. + if (src == 0) { + return 0.0; + } + + // Calculate the exponent. The highest significant bit will have the value + // 2^exponent. + const int highest_significant_bit = 63 - CountLeadingZeros(src); + const int64_t exponent = highest_significant_bit - fbits; + + return FPRoundToDouble(0, exponent, src, round); +} + + +float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { + if (src >= 0) { + return UFixedToFloat(src, fbits, round); + } else { + // This works for all negative values, including INT64_MIN. + return -UFixedToFloat(-src, fbits, round); + } +} + + +float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { + // An input of 0 is a special case because the result is effectively + // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. + if (src == 0) { + return 0.0f; + } + + // Calculate the exponent. The highest significant bit will have the value + // 2^exponent. + const int highest_significant_bit = 63 - CountLeadingZeros(src); + const int32_t exponent = highest_significant_bit - fbits; + + return FPRoundToFloat(0, exponent, src, round); +} + + +void Simulator::ld1(VectorFormat vform, + LogicVRegister dst, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, 16)) + return; + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.ReadUintFromMem(vform, i, addr); + addr += LaneSizeInBytesFromFormat(vform); + } +} + + +void Simulator::ld1(VectorFormat vform, + LogicVRegister dst, + int index, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform))) + return; + dst.ReadUintFromMem(vform, index, addr); +} + + +void Simulator::ld1r(VectorFormat vform, + LogicVRegister dst, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform))) + return; + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.ReadUintFromMem(vform, i, addr); + } +} + + +void Simulator::ld2(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + uint64_t addr1) { + if (handle_wasm_seg_fault(addr1, 16*2)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr1 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst1.ReadUintFromMem(vform, i, addr1); + dst2.ReadUintFromMem(vform, i, addr2); + addr1 += 2 * esize; + addr2 += 2 * esize; + } +} + + +void Simulator::ld2(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + int index, + uint64_t addr1) { + if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*2)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); + dst1.ReadUintFromMem(vform, index, addr1); + dst2.ReadUintFromMem(vform, index, addr2); +} + + +void Simulator::ld2r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*2)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst1.ReadUintFromMem(vform, i, addr); + dst2.ReadUintFromMem(vform, i, addr2); + } +} + + +void Simulator::ld3(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr1) { + if (handle_wasm_seg_fault(addr1, 16*3)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr1 + esize; + uint64_t addr3 = addr2 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst1.ReadUintFromMem(vform, i, addr1); + dst2.ReadUintFromMem(vform, i, addr2); + dst3.ReadUintFromMem(vform, i, addr3); + addr1 += 3 * esize; + addr2 += 3 * esize; + addr3 += 3 * esize; + } +} + + +void Simulator::ld3(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + int index, + uint64_t addr1) { + if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*3)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); + uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); + dst1.ReadUintFromMem(vform, index, addr1); + dst2.ReadUintFromMem(vform, index, addr2); + dst3.ReadUintFromMem(vform, index, addr3); +} + + +void Simulator::ld3r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*3)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); + uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst1.ReadUintFromMem(vform, i, addr); + dst2.ReadUintFromMem(vform, i, addr2); + dst3.ReadUintFromMem(vform, i, addr3); + } +} + + +void Simulator::ld4(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr1) { + if (handle_wasm_seg_fault(addr1, 16*4)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + dst4.ClearForWrite(vform); + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr1 + esize; + uint64_t addr3 = addr2 + esize; + uint64_t addr4 = addr3 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst1.ReadUintFromMem(vform, i, addr1); + dst2.ReadUintFromMem(vform, i, addr2); + dst3.ReadUintFromMem(vform, i, addr3); + dst4.ReadUintFromMem(vform, i, addr4); + addr1 += 4 * esize; + addr2 += 4 * esize; + addr3 += 4 * esize; + addr4 += 4 * esize; + } +} + + +void Simulator::ld4(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + int index, + uint64_t addr1) { + if (handle_wasm_seg_fault(addr1, LaneSizeInBytesFromFormat(vform)*4)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + dst4.ClearForWrite(vform); + uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); + uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); + uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); + dst1.ReadUintFromMem(vform, index, addr1); + dst2.ReadUintFromMem(vform, index, addr2); + dst3.ReadUintFromMem(vform, index, addr3); + dst4.ReadUintFromMem(vform, index, addr4); +} + + +void Simulator::ld4r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*4)) + return; + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + dst4.ClearForWrite(vform); + uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); + uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); + uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst1.ReadUintFromMem(vform, i, addr); + dst2.ReadUintFromMem(vform, i, addr2); + dst3.ReadUintFromMem(vform, i, addr3); + dst4.ReadUintFromMem(vform, i, addr4); + } +} + + +void Simulator::st1(VectorFormat vform, + LogicVRegister src, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, 16)) + return; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + src.WriteUintToMem(vform, i, addr); + addr += LaneSizeInBytesFromFormat(vform); + } +} + + +void Simulator::st1(VectorFormat vform, + LogicVRegister src, + int index, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform))) + return; + src.WriteUintToMem(vform, index, addr); +} + + +void Simulator::st2(VectorFormat vform, + LogicVRegister dst, + LogicVRegister dst2, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, 16*2)) + return; + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.WriteUintToMem(vform, i, addr); + dst2.WriteUintToMem(vform, i, addr2); + addr += 2 * esize; + addr2 += 2 * esize; + } +} + + +void Simulator::st2(VectorFormat vform, + LogicVRegister dst, + LogicVRegister dst2, + int index, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*2)) + return; + int esize = LaneSizeInBytesFromFormat(vform); + dst.WriteUintToMem(vform, index, addr); + dst2.WriteUintToMem(vform, index, addr + 1 * esize); +} + + +void Simulator::st3(VectorFormat vform, + LogicVRegister dst, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, 16*3)) + return; + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr + esize; + uint64_t addr3 = addr2 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.WriteUintToMem(vform, i, addr); + dst2.WriteUintToMem(vform, i, addr2); + dst3.WriteUintToMem(vform, i, addr3); + addr += 3 * esize; + addr2 += 3 * esize; + addr3 += 3 * esize; + } +} + + +void Simulator::st3(VectorFormat vform, + LogicVRegister dst, + LogicVRegister dst2, + LogicVRegister dst3, + int index, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*3)) + return; + int esize = LaneSizeInBytesFromFormat(vform); + dst.WriteUintToMem(vform, index, addr); + dst2.WriteUintToMem(vform, index, addr + 1 * esize); + dst3.WriteUintToMem(vform, index, addr + 2 * esize); +} + + +void Simulator::st4(VectorFormat vform, + LogicVRegister dst, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, 16*4)) + return; + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr + esize; + uint64_t addr3 = addr2 + esize; + uint64_t addr4 = addr3 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.WriteUintToMem(vform, i, addr); + dst2.WriteUintToMem(vform, i, addr2); + dst3.WriteUintToMem(vform, i, addr3); + dst4.WriteUintToMem(vform, i, addr4); + addr += 4 * esize; + addr2 += 4 * esize; + addr3 += 4 * esize; + addr4 += 4 * esize; + } +} + + +void Simulator::st4(VectorFormat vform, + LogicVRegister dst, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + int index, + uint64_t addr) { + if (handle_wasm_seg_fault(addr, LaneSizeInBytesFromFormat(vform)*4)) + return; + int esize = LaneSizeInBytesFromFormat(vform); + dst.WriteUintToMem(vform, index, addr); + dst2.WriteUintToMem(vform, index, addr + 1 * esize); + dst3.WriteUintToMem(vform, index, addr + 2 * esize); + dst4.WriteUintToMem(vform, index, addr + 3 * esize); +} + + +LogicVRegister Simulator::cmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t sa = src1.Int(vform, i); + int64_t sb = src2.Int(vform, i); + uint64_t ua = src1.Uint(vform, i); + uint64_t ub = src2.Uint(vform, i); + bool result = false; + switch (cond) { + case eq: result = (ua == ub); break; + case ge: result = (sa >= sb); break; + case gt: result = (sa > sb) ; break; + case hi: result = (ua > ub) ; break; + case hs: result = (ua >= ub); break; + case lt: result = (sa < sb) ; break; + case le: result = (sa <= sb); break; + default: VIXL_UNREACHABLE(); break; + } + dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); + } + return dst; +} + + +LogicVRegister Simulator::cmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + int imm, + Condition cond) { + SimVRegister temp; + LogicVRegister imm_reg = dup_immediate(vform, temp, imm); + return cmp(vform, dst, src1, imm_reg, cond); +} + + +LogicVRegister Simulator::cmptst(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t ua = src1.Uint(vform, i); + uint64_t ub = src2.Uint(vform, i); + dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); + } + return dst; +} + + +LogicVRegister Simulator::add(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + // TODO(all): consider assigning the result of LaneCountFromFormat to a local. + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for unsigned saturation. + uint64_t ua = src1.UintLeftJustified(vform, i); + uint64_t ub = src2.UintLeftJustified(vform, i); + uint64_t ur = ua + ub; + if (ur < ua) { + dst.SetUnsignedSat(i, true); + } + + // Test for signed saturation. + int64_t sa = src1.IntLeftJustified(vform, i); + int64_t sb = src2.IntLeftJustified(vform, i); + int64_t sr = sa + sb; + // If the signs of the operands are the same, but different from the result, + // there was an overflow. + if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { + dst.SetSignedSat(i, sa >= 0); + } + + dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::addp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uzp1(vform, temp1, src1, src2); + uzp2(vform, temp2, src1, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::mla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + mul(vform, temp, src1, src2); + add(vform, dst, dst, temp); + return dst; +} + + +LogicVRegister Simulator::mls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + mul(vform, temp, src1, src2); + sub(vform, dst, dst, temp); + return dst; +} + + +LogicVRegister Simulator::mul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::mul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::mla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::mls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::smull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::smull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::umull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::umull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::smlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::smlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::umlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::umlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::smlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::smlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::umlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::umlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqdmull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqdmull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqdmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqdmlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqdmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqrdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { + uint16_t result = 0; + uint16_t extended_op2 = op2; + for (int i = 0; i < 8; ++i) { + if ((op1 >> i) & 1) { + result = result ^ (extended_op2 << i); + } + } + return result; +} + + +LogicVRegister Simulator::pmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, + PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); + } + return dst; +} + + +LogicVRegister Simulator::pmull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VectorFormat vform_src = VectorFormatHalfWidth(vform); + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i), + src2.Uint(vform_src, i))); + } + return dst; +} + + +LogicVRegister Simulator::pmull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); + dst.ClearForWrite(vform); + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; i++) { + dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i), + src2.Uint(vform_src, lane_count + i))); + } + return dst; +} + + +LogicVRegister Simulator::sub(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for unsigned saturation. + if (src2.Uint(vform, i) > src1.Uint(vform, i)) { + dst.SetUnsignedSat(i, false); + } + + // Test for signed saturation. + int64_t sa = src1.IntLeftJustified(vform, i); + int64_t sb = src2.IntLeftJustified(vform, i); + int64_t sr = sa - sb; + // If the signs of the operands are different, and the sign of the first + // operand doesn't match the result, there was an overflow. + if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { + dst.SetSignedSat(i, sr < 0); + } + + dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::and_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::orr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::orn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::eor(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::bic(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::bic(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + uint64_t imm) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { + result[i] = src.Uint(vform, i) & ~imm; + } + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::bif(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t operand1 = dst.Uint(vform, i); + uint64_t operand2 = ~src2.Uint(vform, i); + uint64_t operand3 = src1.Uint(vform, i); + uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); + dst.SetUint(vform, i, result); + } + return dst; +} + + +LogicVRegister Simulator::bit(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t operand1 = dst.Uint(vform, i); + uint64_t operand2 = src2.Uint(vform, i); + uint64_t operand3 = src1.Uint(vform, i); + uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); + dst.SetUint(vform, i, result); + } + return dst; +} + + +LogicVRegister Simulator::bsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t operand1 = src2.Uint(vform, i); + uint64_t operand2 = dst.Uint(vform, i); + uint64_t operand3 = src1.Uint(vform, i); + uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); + dst.SetUint(vform, i, result); + } + return dst; +} + + +LogicVRegister Simulator::sminmax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t src1_val = src1.Int(vform, i); + int64_t src2_val = src2.Int(vform, i); + int64_t dst_val; + if (max == true) { + dst_val = (src1_val > src2_val) ? src1_val : src2_val; + } else { + dst_val = (src1_val < src2_val) ? src1_val : src2_val; + } + dst.SetInt(vform, i, dst_val); + } + return dst; +} + + +LogicVRegister Simulator::smax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sminmax(vform, dst, src1, src2, true); +} + + +LogicVRegister Simulator::smin(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sminmax(vform, dst, src1, src2, false); +} + + +LogicVRegister Simulator::sminmaxp(VectorFormat vform, + LogicVRegister dst, + int dst_index, + const LogicVRegister& src, + bool max) { + for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { + int64_t src1_val = src.Int(vform, i); + int64_t src2_val = src.Int(vform, i + 1); + int64_t dst_val; + if (max == true) { + dst_val = (src1_val > src2_val) ? src1_val : src2_val; + } else { + dst_val = (src1_val < src2_val) ? src1_val : src2_val; + } + dst.SetInt(vform, dst_index + (i >> 1), dst_val); + } + return dst; +} + + +LogicVRegister Simulator::smaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + sminmaxp(vform, dst, 0, src1, true); + sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); + return dst; +} + + +LogicVRegister Simulator::sminp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + sminmaxp(vform, dst, 0, src1, false); + sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); + return dst; +} + + +LogicVRegister Simulator::addp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VIXL_ASSERT(vform == kFormatD); + + int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1); + dst.ClearForWrite(vform); + dst.SetInt(vform, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::addv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_dst + = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + + + int64_t dst_val = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst_val += src.Int(vform, i); + } + + dst.ClearForWrite(vform_dst); + dst.SetInt(vform_dst, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::saddlv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_dst + = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); + + int64_t dst_val = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst_val += src.Int(vform, i); + } + + dst.ClearForWrite(vform_dst); + dst.SetInt(vform_dst, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::uaddlv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_dst + = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); + + uint64_t dst_val = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst_val += src.Uint(vform, i); + } + + dst.ClearForWrite(vform_dst); + dst.SetUint(vform_dst, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::sminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool max) { + dst.ClearForWrite(vform); + int64_t dst_val = max ? INT64_MIN : INT64_MAX; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t src_val = src.Int(vform, i); + if (max == true) { + dst_val = (src_val > dst_val) ? src_val : dst_val; + } else { + dst_val = (src_val < dst_val) ? src_val : dst_val; + } + } + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetInt(vform, i, 0); + } + dst.SetInt(vform, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::smaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + sminmaxv(vform, dst, src, true); + return dst; +} + + +LogicVRegister Simulator::sminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + sminmaxv(vform, dst, src, false); + return dst; +} + + +LogicVRegister Simulator::uminmax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t src1_val = src1.Uint(vform, i); + uint64_t src2_val = src2.Uint(vform, i); + uint64_t dst_val; + if (max == true) { + dst_val = (src1_val > src2_val) ? src1_val : src2_val; + } else { + dst_val = (src1_val < src2_val) ? src1_val : src2_val; + } + dst.SetUint(vform, i, dst_val); + } + return dst; +} + + +LogicVRegister Simulator::umax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return uminmax(vform, dst, src1, src2, true); +} + + +LogicVRegister Simulator::umin(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return uminmax(vform, dst, src1, src2, false); +} + + +LogicVRegister Simulator::uminmaxp(VectorFormat vform, + LogicVRegister dst, + int dst_index, + const LogicVRegister& src, + bool max) { + for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { + uint64_t src1_val = src.Uint(vform, i); + uint64_t src2_val = src.Uint(vform, i + 1); + uint64_t dst_val; + if (max == true) { + dst_val = (src1_val > src2_val) ? src1_val : src2_val; + } else { + dst_val = (src1_val < src2_val) ? src1_val : src2_val; + } + dst.SetUint(vform, dst_index + (i >> 1), dst_val); + } + return dst; +} + + +LogicVRegister Simulator::umaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + uminmaxp(vform, dst, 0, src1, true); + uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); + return dst; +} + + +LogicVRegister Simulator::uminp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + uminmaxp(vform, dst, 0, src1, false); + uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); + return dst; +} + + +LogicVRegister Simulator::uminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool max) { + dst.ClearForWrite(vform); + uint64_t dst_val = max ? 0 : UINT64_MAX; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t src_val = src.Uint(vform, i); + if (max == true) { + dst_val = (src_val > dst_val) ? src_val : dst_val; + } else { + dst_val = (src_val < dst_val) ? src_val : dst_val; + } + } + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, 0); + } + dst.SetUint(vform, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::umaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uminmaxv(vform, dst, src, true); + return dst; +} + + +LogicVRegister Simulator::uminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uminmaxv(vform, dst, src, false); + return dst; +} + + +LogicVRegister Simulator::shl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, shift); + return ushl(vform, dst, src, shiftreg); +} + + +LogicVRegister Simulator::sshll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp1, temp2; + LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); + LogicVRegister extendedreg = sxtl(vform, temp2, src); + return sshl(vform, dst, extendedreg, shiftreg); +} + + +LogicVRegister Simulator::sshll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp1, temp2; + LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); + LogicVRegister extendedreg = sxtl2(vform, temp2, src); + return sshl(vform, dst, extendedreg, shiftreg); +} + + +LogicVRegister Simulator::shll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int shift = LaneSizeInBitsFromFormat(vform) / 2; + return sshll(vform, dst, src, shift); +} + + +LogicVRegister Simulator::shll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int shift = LaneSizeInBitsFromFormat(vform) / 2; + return sshll2(vform, dst, src, shift); +} + + +LogicVRegister Simulator::ushll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp1, temp2; + LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); + LogicVRegister extendedreg = uxtl(vform, temp2, src); + return ushl(vform, dst, extendedreg, shiftreg); +} + + +LogicVRegister Simulator::ushll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp1, temp2; + LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); + LogicVRegister extendedreg = uxtl2(vform, temp2, src); + return ushl(vform, dst, extendedreg, shiftreg); +} + + +LogicVRegister Simulator::sli(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + dst.ClearForWrite(vform); + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; i++) { + uint64_t src_lane = src.Uint(vform, i); + uint64_t dst_lane = dst.Uint(vform, i); + uint64_t shifted = src_lane << shift; + uint64_t mask = MaxUintFromFormat(vform) << shift; + dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); + } + return dst; +} + + +LogicVRegister Simulator::sqshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, shift); + return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); +} + + +LogicVRegister Simulator::uqshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, shift); + return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::sqshlu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, shift); + return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::sri(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + dst.ClearForWrite(vform); + int laneCount = LaneCountFromFormat(vform); + VIXL_ASSERT((shift > 0) && + (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); + for (int i = 0; i < laneCount; i++) { + uint64_t src_lane = src.Uint(vform, i); + uint64_t dst_lane = dst.Uint(vform, i); + uint64_t shifted; + uint64_t mask; + if (shift == 64) { + shifted = 0; + mask = 0; + } else { + shifted = src_lane >> shift; + mask = MaxUintFromFormat(vform) >> shift; + } + dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); + } + return dst; +} + + +LogicVRegister Simulator::ushr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); + return ushl(vform, dst, src, shiftreg); +} + + +LogicVRegister Simulator::sshr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); + return sshl(vform, dst, src, shiftreg); +} + + +LogicVRegister Simulator::ssra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + LogicVRegister shifted_reg = sshr(vform, temp, src, shift); + return add(vform, dst, dst, shifted_reg); +} + + +LogicVRegister Simulator::usra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + LogicVRegister shifted_reg = ushr(vform, temp, src, shift); + return add(vform, dst, dst, shifted_reg); +} + + +LogicVRegister Simulator::srsra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); + return add(vform, dst, dst, shifted_reg); +} + + +LogicVRegister Simulator::ursra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); + return add(vform, dst, dst, shifted_reg); +} + + +LogicVRegister Simulator::cls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uint64_t result[16]; + int laneSizeInBits = LaneSizeInBitsFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; i++) { + result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::clz(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uint64_t result[16]; + int laneSizeInBits = LaneSizeInBitsFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; i++) { + result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::cnt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uint64_t result[16]; + int laneSizeInBits = LaneSizeInBitsFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; i++) { + uint64_t value = src.Uint(vform, i); + result[i] = 0; + for (int j = 0; j < laneSizeInBits; j++) { + result[i] += (value & 1); + value >>= 1; + } + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::sshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int8_t shift_val = src2.Int(vform, i); + int64_t lj_src_val = src1.IntLeftJustified(vform, i); + + // Set signed saturation state. + if ((shift_val > CountLeadingSignBits(lj_src_val)) && + (lj_src_val != 0)) { + dst.SetSignedSat(i, lj_src_val >= 0); + } + + // Set unsigned saturation state. + if (lj_src_val < 0) { + dst.SetUnsignedSat(i, false); + } else if ((shift_val > CountLeadingZeros(lj_src_val)) && + (lj_src_val != 0)) { + dst.SetUnsignedSat(i, true); + } + + int64_t src_val = src1.Int(vform, i); + if (shift_val > 63) { + dst.SetInt(vform, i, 0); + } else if (shift_val < -63) { + dst.SetRounding(i, src_val < 0); + dst.SetInt(vform, i, (src_val < 0) ? -1 : 0); + } else { + if (shift_val < 0) { + // Set rounding state. Rounding only needed on right shifts. + if (((src_val >> (-shift_val - 1)) & 1) == 1) { + dst.SetRounding(i, true); + } + src_val >>= -shift_val; + } else { + src_val <<= shift_val; + } + dst.SetInt(vform, i, src_val); + } + } + return dst; +} + + +LogicVRegister Simulator::ushl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int8_t shift_val = src2.Int(vform, i); + uint64_t lj_src_val = src1.UintLeftJustified(vform, i); + + // Set saturation state. + if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { + dst.SetUnsignedSat(i, true); + } + + uint64_t src_val = src1.Uint(vform, i); + if ((shift_val > 63) || (shift_val < -64)) { + dst.SetUint(vform, i, 0); + } else { + if (shift_val < 0) { + // Set rounding state. Rounding only needed on right shifts. + if (((src_val >> (-shift_val - 1)) & 1) == 1) { + dst.SetRounding(i, true); + } + + if (shift_val == -64) { + src_val = 0; + } else { + src_val >>= -shift_val; + } + } else { + src_val <<= shift_val; + } + dst.SetUint(vform, i, src_val); + } + } + return dst; +} + + +LogicVRegister Simulator::neg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for signed saturation. + int64_t sa = src.Int(vform, i); + if (sa == MinIntFromFormat(vform)) { + dst.SetSignedSat(i, true); + } + dst.SetInt(vform, i, -sa); + } + return dst; +} + + +LogicVRegister Simulator::suqadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t sa = dst.IntLeftJustified(vform, i); + uint64_t ub = src.UintLeftJustified(vform, i); + int64_t sr = sa + ub; + + if (sr < sa) { // Test for signed positive saturation. + dst.SetInt(vform, i, MaxIntFromFormat(vform)); + } else { + dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i)); + } + } + return dst; +} + + +LogicVRegister Simulator::usqadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t ua = dst.UintLeftJustified(vform, i); + int64_t sb = src.IntLeftJustified(vform, i); + uint64_t ur = ua + sb; + + if ((sb > 0) && (ur <= ua)) { + dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. + } else if ((sb < 0) && (ur >= ua)) { + dst.SetUint(vform, i, 0); // Negative saturation. + } else { + dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); + } + } + return dst; +} + + +LogicVRegister Simulator::abs(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for signed saturation. + int64_t sa = src.Int(vform, i); + if (sa == MinIntFromFormat(vform)) { + dst.SetSignedSat(i, true); + } + if (sa < 0) { + dst.SetInt(vform, i, -sa); + } else { + dst.SetInt(vform, i, sa); + } + } + return dst; +} + + +LogicVRegister Simulator::extractnarrow(VectorFormat dstform, + LogicVRegister dst, + bool dstIsSigned, + const LogicVRegister& src, + bool srcIsSigned) { + bool upperhalf = false; + VectorFormat srcform = kFormatUndefined; + int64_t ssrc[8]; + uint64_t usrc[8]; + + switch (dstform) { + case kFormat8B : upperhalf = false; srcform = kFormat8H; break; + case kFormat16B: upperhalf = true; srcform = kFormat8H; break; + case kFormat4H : upperhalf = false; srcform = kFormat4S; break; + case kFormat8H : upperhalf = true; srcform = kFormat4S; break; + case kFormat2S : upperhalf = false; srcform = kFormat2D; break; + case kFormat4S : upperhalf = true; srcform = kFormat2D; break; + case kFormatB : upperhalf = false; srcform = kFormatH; break; + case kFormatH : upperhalf = false; srcform = kFormatS; break; + case kFormatS : upperhalf = false; srcform = kFormatD; break; + default:VIXL_UNIMPLEMENTED(); + } + + for (int i = 0; i < LaneCountFromFormat(srcform); i++) { + ssrc[i] = src.Int(srcform, i); + usrc[i] = src.Uint(srcform, i); + } + + int offset; + if (upperhalf) { + offset = LaneCountFromFormat(dstform) / 2; + } else { + offset = 0; + dst.ClearForWrite(dstform); + } + + for (int i = 0; i < LaneCountFromFormat(srcform); i++) { + // Test for signed saturation + if (ssrc[i] > MaxIntFromFormat(dstform)) { + dst.SetSignedSat(offset + i, true); + } else if (ssrc[i] < MinIntFromFormat(dstform)) { + dst.SetSignedSat(offset + i, false); + } + + // Test for unsigned saturation + if (srcIsSigned) { + if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { + dst.SetUnsignedSat(offset + i, true); + } else if (ssrc[i] < 0) { + dst.SetUnsignedSat(offset + i, false); + } + } else { + if (usrc[i] > MaxUintFromFormat(dstform)) { + dst.SetUnsignedSat(offset + i, true); + } + } + + int64_t result; + if (srcIsSigned) { + result = ssrc[i] & MaxUintFromFormat(dstform); + } else { + result = usrc[i] & MaxUintFromFormat(dstform); + } + + if (dstIsSigned) { + dst.SetInt(dstform, offset + i, result); + } else { + dst.SetUint(dstform, offset + i, result); + } + } + return dst; +} + + +LogicVRegister Simulator::xtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return extractnarrow(vform, dst, true, src, true); +} + + +LogicVRegister Simulator::sqxtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqxtun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::uqxtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::absdiff(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool issigned) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (issigned) { + int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); + sr = sr > 0 ? sr : -sr; + dst.SetInt(vform, i, sr); + } else { + int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); + sr = sr > 0 ? sr : -sr; + dst.SetUint(vform, i, sr); + } + } + return dst; +} + + +LogicVRegister Simulator::saba(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + dst.ClearForWrite(vform); + absdiff(vform, temp, src1, src2, true); + add(vform, dst, dst, temp); + return dst; +} + + +LogicVRegister Simulator::uaba(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + dst.ClearForWrite(vform); + absdiff(vform, temp, src1, src2, false); + add(vform, dst, dst, temp); + return dst; +} + + +LogicVRegister Simulator::not_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, ~src.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::rbit(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int laneSizeInBits = LaneSizeInBitsFromFormat(vform); + uint64_t reversed_value; + uint64_t value; + for (int i = 0; i < laneCount; i++) { + value = src.Uint(vform, i); + reversed_value = 0; + for (int j = 0; j < laneSizeInBits; j++) { + reversed_value = (reversed_value << 1) | (value & 1); + value >>= 1; + } + result[i] = reversed_value; + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::rev(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int revSize) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int laneSize = LaneSizeInBytesFromFormat(vform); + int lanesPerLoop = revSize / laneSize; + for (int i = 0; i < laneCount; i += lanesPerLoop) { + for (int j = 0; j < lanesPerLoop; j++) { + result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); + } + } + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::rev16(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return rev(vform, dst, src, 2); +} + + +LogicVRegister Simulator::rev32(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return rev(vform, dst, src, 4); +} + + +LogicVRegister Simulator::rev64(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return rev(vform, dst, src, 8); +} + + +LogicVRegister Simulator::addlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool is_signed, + bool do_accumulate) { + VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); + + int64_t sr[16]; + uint64_t ur[16]; + + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { + if (is_signed) { + sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1); + } else { + ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); + } + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + if (do_accumulate) { + if (is_signed) { + dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]); + } else { + dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]); + } + } else { + if (is_signed) { + dst.SetInt(vform, i, sr[i]); + } else { + dst.SetUint(vform, i, ur[i]); + } + } + } + + return dst; +} + + +LogicVRegister Simulator::saddlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return addlp(vform, dst, src, true, false); +} + + +LogicVRegister Simulator::uaddlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return addlp(vform, dst, src, false, false); +} + + +LogicVRegister Simulator::sadalp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return addlp(vform, dst, src, true, true); +} + + +LogicVRegister Simulator::uadalp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return addlp(vform, dst, src, false, true); +} + + +LogicVRegister Simulator::ext(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + uint8_t result[16]; + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount - index; ++i) { + result[i] = src1.Uint(vform, i + index); + } + for (int i = 0; i < index; ++i) { + result[laneCount - index + i] = src2.Uint(vform, i); + } + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::dup_element(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int src_index) { + int laneCount = LaneCountFromFormat(vform); + uint64_t value = src.Uint(vform, src_index); + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, value); + } + return dst; +} + + +LogicVRegister Simulator::dup_immediate(VectorFormat vform, + LogicVRegister dst, + uint64_t imm) { + int laneCount = LaneCountFromFormat(vform); + uint64_t value = imm & MaxUintFromFormat(vform); + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, value); + } + return dst; +} + + +LogicVRegister Simulator::ins_element(VectorFormat vform, + LogicVRegister dst, + int dst_index, + const LogicVRegister& src, + int src_index) { + dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); + return dst; +} + + +LogicVRegister Simulator::ins_immediate(VectorFormat vform, + LogicVRegister dst, + int dst_index, + uint64_t imm) { + uint64_t value = imm & MaxUintFromFormat(vform); + dst.SetUint(vform, dst_index, value); + return dst; +} + + +LogicVRegister Simulator::mov(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + dst.SetUint(vform, lane, src.Uint(vform, lane)); + } + return dst; +} + + +LogicVRegister Simulator::movi(VectorFormat vform, + LogicVRegister dst, + uint64_t imm) { + int laneCount = LaneCountFromFormat(vform); + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, imm); + } + return dst; +} + + +LogicVRegister Simulator::mvni(VectorFormat vform, + LogicVRegister dst, + uint64_t imm) { + int laneCount = LaneCountFromFormat(vform); + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, ~imm); + } + return dst; +} + + +LogicVRegister Simulator::orr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + uint64_t imm) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { + result[i] = src.Uint(vform, i) | imm; + } + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::uxtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_half = VectorFormatHalfWidth(vform); + + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src.Uint(vform_half, i)); + } + return dst; +} + + +LogicVRegister Simulator::sxtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_half = VectorFormatHalfWidth(vform); + + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetInt(vform, i, src.Int(vform_half, i)); + } + return dst; +} + + +LogicVRegister Simulator::uxtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_half = VectorFormatHalfWidth(vform); + int lane_count = LaneCountFromFormat(vform); + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; i++) { + dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); + } + return dst; +} + + +LogicVRegister Simulator::sxtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_half = VectorFormatHalfWidth(vform); + int lane_count = LaneCountFromFormat(vform); + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; i++) { + dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); + } + return dst; +} + + +LogicVRegister Simulator::shrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vform_src = VectorFormatDoubleWidth(vform); + VectorFormat vform_dst = vform; + LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); + return extractnarrow(vform_dst, dst, false, shifted_src, false); +} + + +LogicVRegister Simulator::shrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); + return extractnarrow(vformdst, dst, false, shifted_src, false); +} + + +LogicVRegister Simulator::rshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); + return extractnarrow(vformdst, dst, false, shifted_src, false); +} + + +LogicVRegister Simulator::rshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); + return extractnarrow(vformdst, dst, false, shifted_src, false); +} + + +LogicVRegister Simulator::tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind) { + SimVRegister result; + movi(vform, result, 0); + tbx(vform, result, tab, ind); + return orr(vform, dst, result, result); +} + + +LogicVRegister Simulator::tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& ind) { + SimVRegister result; + movi(vform, result, 0); + tbx(vform, result, tab, tab2, ind); + return orr(vform, dst, result, result); +} + + +LogicVRegister Simulator::tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& ind) { + SimVRegister result; + movi(vform, result, 0); + tbx(vform, result, tab, tab2, tab3, ind); + return orr(vform, dst, result, result); +} + + +LogicVRegister Simulator::tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& tab4, + const LogicVRegister& ind) { + SimVRegister result; + movi(vform, result, 0); + tbx(vform, result, tab, tab2, tab3, tab4, ind); + return orr(vform, dst, result, result); +} + + +LogicVRegister Simulator::tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t j = ind.Uint(vform, i); + switch (j >> 4) { + case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; + } + } + return dst; +} + + +LogicVRegister Simulator::tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& ind) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t j = ind.Uint(vform, i); + switch (j >> 4) { + case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; + case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; + } + } + return dst; +} + + +LogicVRegister Simulator::tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& ind) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t j = ind.Uint(vform, i); + switch (j >> 4) { + case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; + case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; + case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break; + } + } + return dst; +} + + +LogicVRegister Simulator::tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& tab4, + const LogicVRegister& ind) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t j = ind.Uint(vform, i); + switch (j >> 4) { + case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; + case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; + case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break; + case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break; + } + } + return dst; +} + + +LogicVRegister Simulator::uqshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + return shrn(vform, dst, src, shift).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::uqshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::uqrshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::uqrshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::sqshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); + return sqxtn(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); + return sqxtn(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqrshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); + return sqxtn(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqrshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); + return sqxtn(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqshrun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); + return sqxtun(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqshrun2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); + return sqxtun(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqrshrun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); + return sqxtun(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqrshrun2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); + return sqxtun(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::uaddl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::uaddl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::uaddw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + uxtl(vform, temp, src2); + add(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::uaddw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + uxtl2(vform, temp, src2); + add(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::saddl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::saddl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::saddw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sxtl(vform, temp, src2); + add(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::saddw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sxtl2(vform, temp, src2); + add(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::usubl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + sub(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::usubl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + sub(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::usubw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + uxtl(vform, temp, src2); + sub(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::usubw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + uxtl2(vform, temp, src2); + sub(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::ssubl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + sub(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::ssubl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + sub(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::ssubw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sxtl(vform, temp, src2); + sub(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::ssubw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sxtl2(vform, temp, src2); + sub(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::uabal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + uaba(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::uabal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + uaba(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::sabal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + saba(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::sabal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + saba(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::uabdl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + absdiff(vform, dst, temp1, temp2, false); + return dst; +} + + +LogicVRegister Simulator::uabdl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + absdiff(vform, dst, temp1, temp2, false); + return dst; +} + + +LogicVRegister Simulator::sabdl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + absdiff(vform, dst, temp1, temp2, true); + return dst; +} + + +LogicVRegister Simulator::sabdl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + absdiff(vform, dst, temp1, temp2, true); + return dst; +} + + +LogicVRegister Simulator::umull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + mul(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::umull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + mul(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + mul(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + mul(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::umlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + mls(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::umlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + mls(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + mls(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + mls(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::umlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + mla(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::umlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + mla(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + mla(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + mla(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::sqdmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + LogicVRegister product = sqdmull(vform, temp, src1, src2); + return add(vform, dst, dst, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqdmlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + LogicVRegister product = sqdmull2(vform, temp, src1, src2); + return add(vform, dst, dst, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqdmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + LogicVRegister product = sqdmull(vform, temp, src1, src2); + return sub(vform, dst, dst, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + LogicVRegister product = sqdmull2(vform, temp, src1, src2); + return sub(vform, dst, dst, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqdmull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + LogicVRegister product = smull(vform, temp, src1, src2); + return add(vform, dst, product, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqdmull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + LogicVRegister product = smull2(vform, temp, src1, src2); + return add(vform, dst, product, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqrdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round) { + // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. + // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) + // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. + + int esize = LaneSizeInBitsFromFormat(vform); + int round_const = round ? (1 << (esize - 2)) : 0; + int64_t product; + + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + product = src1.Int(vform, i) * src2.Int(vform, i); + product += round_const; + product = product >> (esize - 1); + + if (product > MaxIntFromFormat(vform)) { + product = MaxIntFromFormat(vform); + } else if (product < MinIntFromFormat(vform)) { + product = MinIntFromFormat(vform); + } + dst.SetInt(vform, i, product); + } + return dst; +} + + +LogicVRegister Simulator::sqdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sqrdmulh(vform, dst, src1, src2, false); +} + + +LogicVRegister Simulator::addhn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + add(VectorFormatDoubleWidth(vform), temp, src1, src2); + shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::addhn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); + shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::raddhn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + add(VectorFormatDoubleWidth(vform), temp, src1, src2); + rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::raddhn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); + rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::subhn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sub(VectorFormatDoubleWidth(vform), temp, src1, src2); + shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::subhn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); + shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::rsubhn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sub(VectorFormatDoubleWidth(vform), temp, src1, src2); + rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::rsubhn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); + rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::trn1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int pairs = laneCount / 2; + for (int i = 0; i < pairs; ++i) { + result[2 * i] = src1.Uint(vform, 2 * i); + result[(2 * i) + 1] = src2.Uint(vform, 2 * i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::trn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int pairs = laneCount / 2; + for (int i = 0; i < pairs; ++i) { + result[2 * i] = src1.Uint(vform, (2 * i) + 1); + result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::zip1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int pairs = laneCount / 2; + for (int i = 0; i < pairs; ++i) { + result[2 * i] = src1.Uint(vform, i); + result[(2 * i) + 1] = src2.Uint(vform, i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::zip2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int pairs = laneCount / 2; + for (int i = 0; i < pairs; ++i) { + result[2 * i] = src1.Uint(vform, pairs + i); + result[(2 * i) + 1] = src2.Uint(vform, pairs + i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::uzp1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[32]; + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { + result[i] = src1.Uint(vform, i); + result[laneCount + i] = src2.Uint(vform, i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[2 * i]); + } + return dst; +} + + +LogicVRegister Simulator::uzp2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[32]; + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { + result[i] = src1.Uint(vform, i); + result[laneCount + i] = src2.Uint(vform, i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, result[ (2 * i) + 1]); + } + return dst; +} + + +template <typename T> +T Simulator::FPAdd(T op1, T op2) { + T result = FPProcessNaNs(op1, op2); + if (std::isnan(result)) return result; + + if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { + // inf + -inf returns the default NaN. + FPProcessException(); + return FPDefaultNaN<T>(); + } else { + // Other cases should be handled by standard arithmetic. + return op1 + op2; + } +} + + +template <typename T> +T Simulator::FPSub(T op1, T op2) { + // NaNs should be handled elsewhere. + VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); + + if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { + // inf - inf returns the default NaN. + FPProcessException(); + return FPDefaultNaN<T>(); + } else { + // Other cases should be handled by standard arithmetic. + return op1 - op2; + } +} + + +template <typename T> +T Simulator::FPMul(T op1, T op2) { + // NaNs should be handled elsewhere. + VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); + + if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { + // inf * 0.0 returns the default NaN. + FPProcessException(); + return FPDefaultNaN<T>(); + } else { + // Other cases should be handled by standard arithmetic. + return op1 * op2; + } +} + + +template<typename T> +T Simulator::FPMulx(T op1, T op2) { + if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { + // inf * 0.0 returns +/-2.0. + T two = 2.0; + return copysign(1.0, op1) * copysign(1.0, op2) * two; + } + return FPMul(op1, op2); +} + + +template<typename T> +T Simulator::FPMulAdd(T a, T op1, T op2) { + T result = FPProcessNaNs3(a, op1, op2); + + T sign_a = copysign(1.0, a); + T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); + bool isinf_prod = std::isinf(op1) || std::isinf(op2); + bool operation_generates_nan = + (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 + (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf + (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf + + if (std::isnan(result)) { + // Generated NaNs override quiet NaNs propagated from a. + if (operation_generates_nan && IsQuietNaN(a)) { + FPProcessException(); + return FPDefaultNaN<T>(); + } else { + return result; + } + } + + // If the operation would produce a NaN, return the default NaN. + if (operation_generates_nan) { + FPProcessException(); + return FPDefaultNaN<T>(); + } + + // Work around broken fma implementations for exact zero results: The sign of + // exact 0.0 results is positive unless both a and op1 * op2 are negative. + if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { + return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; + } + + result = FusedMultiplyAdd(op1, op2, a); + VIXL_ASSERT(!std::isnan(result)); + + // Work around broken fma implementations for rounded zero results: If a is + // 0.0, the sign of the result is the sign of op1 * op2 before rounding. + if ((a == 0.0) && (result == 0.0)) { + return copysign(0.0, sign_prod); + } + + return result; +} + + +template <typename T> +T Simulator::FPDiv(T op1, T op2) { + // NaNs should be handled elsewhere. + VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); + + if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { + // inf / inf and 0.0 / 0.0 return the default NaN. + FPProcessException(); + return FPDefaultNaN<T>(); + } else { + if (op2 == 0.0) FPProcessException(); + + // Other cases should be handled by standard arithmetic. + return op1 / op2; + } +} + + +template <typename T> +T Simulator::FPSqrt(T op) { + if (std::isnan(op)) { + return FPProcessNaN(op); + } else if (op < 0.0) { + FPProcessException(); + return FPDefaultNaN<T>(); + } else { + return sqrt(op); + } +} + + +template <typename T> +T Simulator::FPMax(T a, T b) { + T result = FPProcessNaNs(a, b); + if (std::isnan(result)) return result; + + if ((a == 0.0) && (b == 0.0) && + (copysign(1.0, a) != copysign(1.0, b))) { + // a and b are zero, and the sign differs: return +0.0. + return 0.0; + } else { + return (a > b) ? a : b; + } +} + + +template <typename T> +T Simulator::FPMaxNM(T a, T b) { + if (IsQuietNaN(a) && !IsQuietNaN(b)) { + a = kFP64NegativeInfinity; + } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { + b = kFP64NegativeInfinity; + } + + T result = FPProcessNaNs(a, b); + return std::isnan(result) ? result : FPMax(a, b); +} + + +template <typename T> +T Simulator::FPMin(T a, T b) { + T result = FPProcessNaNs(a, b); + if (std::isnan(result)) return result; + + if ((a == 0.0) && (b == 0.0) && + (copysign(1.0, a) != copysign(1.0, b))) { + // a and b are zero, and the sign differs: return -0.0. + return -0.0; + } else { + return (a < b) ? a : b; + } +} + + +template <typename T> +T Simulator::FPMinNM(T a, T b) { + if (IsQuietNaN(a) && !IsQuietNaN(b)) { + a = kFP64PositiveInfinity; + } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { + b = kFP64PositiveInfinity; + } + + T result = FPProcessNaNs(a, b); + return std::isnan(result) ? result : FPMin(a, b); +} + + +template <typename T> +T Simulator::FPRecipStepFused(T op1, T op2) { + const T two = 2.0; + if ((std::isinf(op1) && (op2 == 0.0)) + || ((op1 == 0.0) && (std::isinf(op2)))) { + return two; + } else if (std::isinf(op1) || std::isinf(op2)) { + // Return +inf if signs match, otherwise -inf. + return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity + : kFP64NegativeInfinity; + } else { + return FusedMultiplyAdd(op1, op2, two); + } +} + + +template <typename T> +T Simulator::FPRSqrtStepFused(T op1, T op2) { + const T one_point_five = 1.5; + const T two = 2.0; + + if ((std::isinf(op1) && (op2 == 0.0)) + || ((op1 == 0.0) && (std::isinf(op2)))) { + return one_point_five; + } else if (std::isinf(op1) || std::isinf(op2)) { + // Return +inf if signs match, otherwise -inf. + return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity + : kFP64NegativeInfinity; + } else { + // The multiply-add-halve operation must be fully fused, so avoid interim + // rounding by checking which operand can be losslessly divided by two + // before doing the multiply-add. + if (std::isnormal(op1 / two)) { + return FusedMultiplyAdd(op1 / two, op2, one_point_five); + } else if (std::isnormal(op2 / two)) { + return FusedMultiplyAdd(op1, op2 / two, one_point_five); + } else { + // Neither operand is normal after halving: the result is dominated by + // the addition term, so just return that. + return one_point_five; + } + } +} + +int32_t Simulator::FPToFixedJS(double value) { + // The Z-flag is set when the conversion from double precision floating-point + // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN, + // outside the bounds of a 32-bit integer, or isn't an exact integer then the + // Z-flag is unset. + int Z = 1; + int32_t result; + + if ((value == 0.0) || (value == kFP64PositiveInfinity) || + (value == kFP64NegativeInfinity)) { + // +/- zero and infinity all return zero, however -0 and +/- Infinity also + // unset the Z-flag. + result = 0.0; + if ((value != 0.0) || std::signbit(value)) { + Z = 0; + } + } else if (std::isnan(value)) { + // NaN values unset the Z-flag and set the result to 0. + FPProcessNaN(value); + result = 0; + Z = 0; + } else { + // All other values are converted to an integer representation, rounded + // toward zero. + double int_result = std::floor(value); + double error = value - int_result; + + if ((error != 0.0) && (int_result < 0.0)) { + int_result++; + } + + // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost + // write a one-liner with std::round, but the behaviour on ties is incorrect + // for our purposes. + double mod_const = static_cast<double>(UINT64_C(1) << 32); + double mod_error = + (int_result / mod_const) - std::floor(int_result / mod_const); + double constrained; + if (mod_error == 0.5) { + constrained = INT32_MIN; + } else { + constrained = int_result - mod_const * round(int_result / mod_const); + } + + VIXL_ASSERT(std::floor(constrained) == constrained); + VIXL_ASSERT(constrained >= INT32_MIN); + VIXL_ASSERT(constrained <= INT32_MAX); + + // Take the bottom 32 bits of the result as a 32-bit integer. + result = static_cast<int32_t>(constrained); + + if ((int_result < INT32_MIN) || (int_result > INT32_MAX) || + (error != 0.0)) { + // If the integer result is out of range or the conversion isn't exact, + // take exception and unset the Z-flag. + FPProcessException(); + Z = 0; + } + } + + ReadNzcv().SetN(0); + ReadNzcv().SetZ(Z); + ReadNzcv().SetC(0); + ReadNzcv().SetV(0); + + return result; +} + + +double Simulator::FPRoundInt(double value, FPRounding round_mode) { + if ((value == 0.0) || (value == kFP64PositiveInfinity) || + (value == kFP64NegativeInfinity)) { + return value; + } else if (std::isnan(value)) { + return FPProcessNaN(value); + } + + double int_result = std::floor(value); + double error = value - int_result; + switch (round_mode) { + case FPTieAway: { + // Take care of correctly handling the range ]-0.5, -0.0], which must + // yield -0.0. + if ((-0.5 < value) && (value < 0.0)) { + int_result = -0.0; + + } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { + // If the error is greater than 0.5, or is equal to 0.5 and the integer + // result is positive, round up. + int_result++; + } + break; + } + case FPTieEven: { + // Take care of correctly handling the range [-0.5, -0.0], which must + // yield -0.0. + if ((-0.5 <= value) && (value < 0.0)) { + int_result = -0.0; + + // If the error is greater than 0.5, or is equal to 0.5 and the integer + // result is odd, round up. + } else if ((error > 0.5) || + ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { + int_result++; + } + break; + } + case FPZero: { + // If value>0 then we take floor(value) + // otherwise, ceil(value). + if (value < 0) { + int_result = ceil(value); + } + break; + } + case FPNegativeInfinity: { + // We always use floor(value). + break; + } + case FPPositiveInfinity: { + // Take care of correctly handling the range ]-1.0, -0.0], which must + // yield -0.0. + if ((-1.0 < value) && (value < 0.0)) { + int_result = -0.0; + + // If the error is non-zero, round up. + } else if (error > 0.0) { + int_result++; + } + break; + } + default: VIXL_UNIMPLEMENTED(); + } + return int_result; +} + + +int32_t Simulator::FPToInt32(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + if (value >= kWMaxInt) { + return kWMaxInt; + } else if (value < kWMinInt) { + return kWMinInt; + } + return std::isnan(value) ? 0 : static_cast<int32_t>(value); +} + + +int64_t Simulator::FPToInt64(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + // The compiler would have to round kXMaxInt, triggering a warning. Compare + // against the largest int64_t that is exactly representable as a double. + if (value > kXMaxExactInt) { + return kXMaxInt; + } else if (value < kXMinInt) { + return kXMinInt; + } + return std::isnan(value) ? 0 : static_cast<int64_t>(value); +} + + +uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + if (value >= kWMaxUInt) { + return kWMaxUInt; + } else if (value < 0.0) { + return 0; + } + return std::isnan(value) ? 0 : static_cast<uint32_t>(value); +} + + +uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + // The compiler would have to round kXMaxUInt, triggering a warning. Compare + // against the largest uint64_t that is exactly representable as a double. + if (value > kXMaxExactUInt) { + return kXMaxUInt; + } else if (value < 0.0) { + return 0; + } + return std::isnan(value) ? 0 : static_cast<uint64_t>(value); +} + + +#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ +template <typename T> \ +LogicVRegister Simulator::FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2) { \ + dst.ClearForWrite(vform); \ + for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ + T op1 = src1.Float<T>(i); \ + T op2 = src2.Float<T>(i); \ + T result; \ + if (PROCNAN) { \ + result = FPProcessNaNs(op1, op2); \ + if (!std::isnan(result)) { \ + result = OP(op1, op2); \ + } \ + } else { \ + result = OP(op1, op2); \ + } \ + dst.SetFloat(i, result); \ + } \ + return dst; \ +} \ + \ +LogicVRegister Simulator::FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2) { \ + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ + FN<float>(vform, dst, src1, src2); \ + } else { \ + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ + FN<double>(vform, dst, src1, src2); \ + } \ + return dst; \ +} +NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) +#undef DEFINE_NEON_FP_VECTOR_OP + + +LogicVRegister Simulator::fnmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + LogicVRegister product = fmul(vform, temp, src1, src2); + return fneg(vform, dst, product); +} + + +template <typename T> +LogicVRegister Simulator::frecps(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op1 = -src1.Float<T>(i); + T op2 = src2.Float<T>(i); + T result = FPProcessNaNs(op1, op2); + dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); + } + return dst; +} + + +LogicVRegister Simulator::frecps(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + frecps<float>(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + frecps<double>(vform, dst, src1, src2); + } + return dst; +} + + +template <typename T> +LogicVRegister Simulator::frsqrts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op1 = -src1.Float<T>(i); + T op2 = src2.Float<T>(i); + T result = FPProcessNaNs(op1, op2); + dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); + } + return dst; +} + + +LogicVRegister Simulator::frsqrts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + frsqrts<float>(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + frsqrts<double>(vform, dst, src1, src2); + } + return dst; +} + + +template <typename T> +LogicVRegister Simulator::fcmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + bool result = false; + T op1 = src1.Float<T>(i); + T op2 = src2.Float<T>(i); + T nan_result = FPProcessNaNs(op1, op2); + if (!std::isnan(nan_result)) { + switch (cond) { + case eq: result = (op1 == op2); break; + case ge: result = (op1 >= op2); break; + case gt: result = (op1 > op2) ; break; + case le: result = (op1 <= op2); break; + case lt: result = (op1 < op2) ; break; + default: VIXL_UNREACHABLE(); break; + } + } + dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); + } + return dst; +} + + +LogicVRegister Simulator::fcmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fcmp<float>(vform, dst, src1, src2, cond); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fcmp<double>(vform, dst, src1, src2, cond); + } + return dst; +} + + +LogicVRegister Simulator::fcmp_zero(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + Condition cond) { + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); + fcmp<float>(vform, dst, src, zero_reg, cond); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister zero_reg = dup_immediate(vform, temp, + DoubleToRawbits(0.0)); + fcmp<double>(vform, dst, src, zero_reg, cond); + } + return dst; +} + + +LogicVRegister Simulator::fabscmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond) { + SimVRegister temp1, temp2; + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); + LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); + fcmp<float>(vform, dst, abs_src1, abs_src2, cond); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); + LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); + fcmp<double>(vform, dst, abs_src1, abs_src2, cond); + } + return dst; +} + + +template <typename T> +LogicVRegister Simulator::fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op1 = src1.Float<T>(i); + T op2 = src2.Float<T>(i); + T acc = dst.Float<T>(i); + T result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fmla<float>(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fmla<double>(vform, dst, src1, src2); + } + return dst; +} + + +template <typename T> +LogicVRegister Simulator::fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op1 = -src1.Float<T>(i); + T op2 = src2.Float<T>(i); + T acc = dst.Float<T>(i); + T result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fmls<float>(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fmls<double>(vform, dst, src1, src2); + } + return dst; +} + + +template <typename T> +LogicVRegister Simulator::fneg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op = src.Float<T>(i); + op = -op; + dst.SetFloat(i, op); + } + return dst; +} + + +LogicVRegister Simulator::fneg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fneg<float>(vform, dst, src); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fneg<double>(vform, dst, src); + } + return dst; +} + + +template <typename T> +LogicVRegister Simulator::fabs_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op = src.Float<T>(i); + if (copysign(1.0, op) < 0.0) { + op = -op; + } + dst.SetFloat(i, op); + } + return dst; +} + + +LogicVRegister Simulator::fabs_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fabs_<float>(vform, dst, src); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fabs_<double>(vform, dst, src); + } + return dst; +} + + +LogicVRegister Simulator::fabd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + fsub(vform, temp, src1, src2); + fabs_(vform, dst, temp); + return dst; +} + + +LogicVRegister Simulator::fsqrt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float result = FPSqrt(src.Float<float>(i)); + dst.SetFloat(i, result); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double result = FPSqrt(src.Float<double>(i)); + dst.SetFloat(i, result); + } + } + return dst; +} + + +#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ +LogicVRegister Simulator::FNP(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2) { \ + SimVRegister temp1, temp2; \ + uzp1(vform, temp1, src1, src2); \ + uzp2(vform, temp2, src1, src2); \ + FN(vform, dst, temp1, temp2); \ + return dst; \ +} \ + \ +LogicVRegister Simulator::FNP(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src) { \ + if (vform == kFormatS) { \ + float result = OP(src.Float<float>(0), src.Float<float>(1)); \ + dst.SetFloat(0, result); \ + } else { \ + VIXL_ASSERT(vform == kFormatD); \ + double result = OP(src.Float<double>(0), src.Float<double>(1)); \ + dst.SetFloat(0, result); \ + } \ + dst.ClearForWrite(vform); \ + return dst; \ +} +NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) +#undef DEFINE_NEON_FP_PAIR_OP + + +LogicVRegister Simulator::fminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPMinMaxOp Op) { + VIXL_ASSERT(vform == kFormat4S); + USE(vform); + float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); + float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); + float result = (this->*Op)(result1, result2); + dst.ClearForWrite(kFormatS); + dst.SetFloat<float>(0, result); + return dst; +} + + +LogicVRegister Simulator::fmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return fminmaxv(vform, dst, src, &Simulator::FPMax); +} + + +LogicVRegister Simulator::fminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return fminmaxv(vform, dst, src, &Simulator::FPMin); +} + + +LogicVRegister Simulator::fmaxnmv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); +} + + +LogicVRegister Simulator::fminnmv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return fminmaxv(vform, dst, src, &Simulator::FPMinNM); +} + + +LogicVRegister Simulator::fmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + dst.ClearForWrite(vform); + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); + fmul<float>(vform, dst, src1, index_reg); + + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); + fmul<double>(vform, dst, src1, index_reg); + } + return dst; +} + + +LogicVRegister Simulator::fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + dst.ClearForWrite(vform); + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); + fmla<float>(vform, dst, src1, index_reg); + + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); + fmla<double>(vform, dst, src1, index_reg); + } + return dst; +} + + +LogicVRegister Simulator::fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + dst.ClearForWrite(vform); + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); + fmls<float>(vform, dst, src1, index_reg); + + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); + fmls<double>(vform, dst, src1, index_reg); + } + return dst; +} + + +LogicVRegister Simulator::fmulx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + dst.ClearForWrite(vform); + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); + fmulx<float>(vform, dst, src1, index_reg); + + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); + fmulx<double>(vform, dst, src1, index_reg); + } + return dst; +} + + +LogicVRegister Simulator::frint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + bool inexact_exception) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float input = src.Float<float>(i); + float rounded = FPRoundInt(input, rounding_mode); + if (inexact_exception && !std::isnan(input) && (input != rounded)) { + FPProcessException(); + } + dst.SetFloat<float>(i, rounded); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double input = src.Float<double>(i); + double rounded = FPRoundInt(input, rounding_mode); + if (inexact_exception && !std::isnan(input) && (input != rounded)) { + FPProcessException(); + } + dst.SetFloat<double>(i, rounded); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + int fbits) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float op = src.Float<float>(i) * std::pow(2.0f, fbits); + dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double op = src.Float<double>(i) * std::pow(2.0, fbits); + dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + int fbits) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float op = src.Float<float>(i) * std::pow(2.0f, fbits); + dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double op = src.Float<double>(i) * std::pow(2.0, fbits); + dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { + // TODO: Full support for SimFloat16 in SimRegister(s). + dst.SetFloat(i, + FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)), + ReadDN())); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { + dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN())); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_count = LaneCountFromFormat(vform); + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < lane_count; i++) { + // TODO: Full support for SimFloat16 in SimRegister(s). + dst.SetFloat(i, + FPToFloat(RawbitsToFloat16( + src.Float<uint16_t>(i + lane_count)), + ReadDN())); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < lane_count; i++) { + dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN())); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + SimVRegister tmp; + LogicVRegister srctmp = mov(kFormat2D, tmp, src); + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetFloat(i, + Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i), + FPTieEven, + ReadDN()))); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN())); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_count = LaneCountFromFormat(vform) / 2; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = lane_count - 1; i >= 0; i--) { + dst.SetFloat(i + lane_count, + Float16ToRawbits( + FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN()))); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + for (int i = lane_count - 1; i >= 0; i--) { + dst.SetFloat(i + lane_count, + FPToFloat(src.Float<double>(i), FPTieEven, ReadDN())); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtxn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + SimVRegister tmp; + LogicVRegister srctmp = mov(kFormat2D, tmp, src); + dst.ClearForWrite(vform); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN())); + } + return dst; +} + + +LogicVRegister Simulator::fcvtxn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + int lane_count = LaneCountFromFormat(vform) / 2; + for (int i = lane_count - 1; i >= 0; i--) { + dst.SetFloat(i + lane_count, + FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN())); + } + return dst; +} + + +// Based on reference C function recip_sqrt_estimate from ARM ARM. +double Simulator::recip_sqrt_estimate(double a) { + int q0, q1, s; + double r; + if (a < 0.5) { + q0 = static_cast<int>(a * 512.0); + r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); + } else { + q1 = static_cast<int>(a * 256.0); + r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); + } + s = static_cast<int>(256.0 * r + 0.5); + return static_cast<double>(s) / 256.0; +} + + +static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { + return ExtractUnsignedBitfield64(start_bit, end_bit, val); +} + + +template <typename T> +T Simulator::FPRecipSqrtEstimate(T op) { + if (std::isnan(op)) { + return FPProcessNaN(op); + } else if (op == 0.0) { + if (copysign(1.0, op) < 0.0) { + return kFP64NegativeInfinity; + } else { + return kFP64PositiveInfinity; + } + } else if (copysign(1.0, op) < 0.0) { + FPProcessException(); + return FPDefaultNaN<T>(); + } else if (std::isinf(op)) { + return 0.0; + } else { + uint64_t fraction; + int exp, result_exp; + + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + exp = FloatExp(op); + fraction = FloatMantissa(op); + fraction <<= 29; + } else { + exp = DoubleExp(op); + fraction = DoubleMantissa(op); + } + + if (exp == 0) { + while (Bits(fraction, 51, 51) == 0) { + fraction = Bits(fraction, 50, 0) << 1; + exp -= 1; + } + fraction = Bits(fraction, 50, 0) << 1; + } + + double scaled; + if (Bits(exp, 0, 0) == 0) { + scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); + } else { + scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); + } + + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + result_exp = (380 - exp) / 2; + } else { + result_exp = (3068 - exp) / 2; + } + + uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); + + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); + uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); + return FloatPack(0, exp_bits, est_bits); + } else { + return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); + } + } +} + + +LogicVRegister Simulator::frsqrte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float input = src.Float<float>(i); + dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double input = src.Float<double>(i); + dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); + } + } + return dst; +} + +template <typename T> +T Simulator::FPRecipEstimate(T op, FPRounding rounding) { + uint32_t sign; + + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + sign = FloatSign(op); + } else { + sign = DoubleSign(op); + } + + if (std::isnan(op)) { + return FPProcessNaN(op); + } else if (std::isinf(op)) { + return (sign == 1) ? -0.0 : 0.0; + } else if (op == 0.0) { + FPProcessException(); // FPExc_DivideByZero exception. + return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; + } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) + (std::fabs(op) < std::pow(2.0, -128.0))) || + ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) + (std::fabs(op) < std::pow(2.0, -1024.0)))) { + bool overflow_to_inf = false; + switch (rounding) { + case FPTieEven: overflow_to_inf = true; break; + case FPPositiveInfinity: overflow_to_inf = (sign == 0); break; + case FPNegativeInfinity: overflow_to_inf = (sign == 1); break; + case FPZero: overflow_to_inf = false; break; + default: break; + } + FPProcessException(); // FPExc_Overflow and FPExc_Inexact. + if (overflow_to_inf) { + return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; + } else { + // Return FPMaxNormal(sign). + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + return FloatPack(sign, 0xfe, 0x07fffff); + } else { + return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); + } + } + } else { + uint64_t fraction; + int exp, result_exp; + uint32_t sign; + + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + sign = FloatSign(op); + exp = FloatExp(op); + fraction = FloatMantissa(op); + fraction <<= 29; + } else { + sign = DoubleSign(op); + exp = DoubleExp(op); + fraction = DoubleMantissa(op); + } + + if (exp == 0) { + if (Bits(fraction, 51, 51) == 0) { + exp -= 1; + fraction = Bits(fraction, 49, 0) << 2; + } else { + fraction = Bits(fraction, 50, 0) << 1; + } + } + + double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); + + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. + } else { + result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. + } + + double estimate = recip_estimate(scaled); + + fraction = DoubleMantissa(estimate); + if (result_exp == 0) { + fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); + } else if (result_exp == -1) { + fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); + result_exp = 0; + } + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); + uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); + return FloatPack(sign, exp_bits, frac_bits); + } else { + return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); + } + } +} + + +LogicVRegister Simulator::frecpe(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding round) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float input = src.Float<float>(i); + dst.SetFloat(i, FPRecipEstimate<float>(input, round)); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double input = src.Float<double>(i); + dst.SetFloat(i, FPRecipEstimate<double>(input, round)); + } + } + return dst; +} + + +LogicVRegister Simulator::ursqrte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + uint64_t operand; + uint32_t result; + double dp_operand, dp_result; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + operand = src.Uint(vform, i); + if (operand <= 0x3FFFFFFF) { + result = 0xFFFFFFFF; + } else { + dp_operand = operand * std::pow(2.0, -32); + dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); + result = static_cast<uint32_t>(dp_result); + } + dst.SetUint(vform, i, result); + } + return dst; +} + + +// Based on reference C function recip_estimate from ARM ARM. +double Simulator::recip_estimate(double a) { + int q, s; + double r; + q = static_cast<int>(a * 512.0); + r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); + s = static_cast<int>(256.0 * r + 0.5); + return static_cast<double>(s) / 256.0; +} + + +LogicVRegister Simulator::urecpe(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + uint64_t operand; + uint32_t result; + double dp_operand, dp_result; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + operand = src.Uint(vform, i); + if (operand <= 0x7FFFFFFF) { + result = 0xFFFFFFFF; + } else { + dp_operand = operand * std::pow(2.0, -32); + dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); + result = static_cast<uint32_t>(dp_result); + } + dst.SetUint(vform, i, result); + } + return dst; +} + +template <typename T> +LogicVRegister Simulator::frecpx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op = src.Float<T>(i); + T result; + if (std::isnan(op)) { + result = FPProcessNaN(op); + } else { + int exp; + uint32_t sign; + if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) + sign = FloatSign(op); + exp = FloatExp(op); + exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); + result = FloatPack(sign, exp, 0); + } else { + sign = DoubleSign(op); + exp = DoubleExp(op); + exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); + result = DoublePack(sign, exp, 0); + } + } + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::frecpx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + frecpx<float>(vform, dst, src); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + frecpx<double>(vform, dst, src); + } + return dst; +} + +LogicVRegister Simulator::scvtf(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int fbits, + FPRounding round) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); + dst.SetFloat<float>(i, result); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); + dst.SetFloat<double>(i, result); + } + } + return dst; +} + + +LogicVRegister Simulator::ucvtf(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int fbits, + FPRounding round) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); + dst.SetFloat<float>(i, result); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); + dst.SetFloat<double>(i, result); + } + } + return dst; +} + + +} // namespace vixl + +#endif // JS_SIMULATOR_ARM64 diff --git a/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp b/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp new file mode 100644 index 0000000000..5c4a5ce145 --- /dev/null +++ b/js/src/jit/arm64/vixl/MacroAssembler-vixl.cpp @@ -0,0 +1,2027 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/MacroAssembler-vixl.h" + +#include <ctype.h> + +namespace vixl { + +MacroAssembler::MacroAssembler() + : js::jit::Assembler(), + sp_(x28), + tmp_list_(ip0, ip1), + fptmp_list_(d31) +{ +} + + +void MacroAssembler::FinalizeCode() { + Assembler::FinalizeCode(); +} + + +int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm, + const Register &rd, + uint64_t imm) { + bool emit_code = (masm != NULL); + VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits()); + // The worst case for size is mov 64-bit immediate to sp: + // * up to 4 instructions to materialise the constant + // * 1 instruction to move to sp + MacroEmissionCheckScope guard(masm); + + // Immediates on Aarch64 can be produced using an initial value, and zero to + // three move keep operations. + // + // Initial values can be generated with: + // 1. 64-bit move zero (movz). + // 2. 32-bit move inverted (movn). + // 3. 64-bit move inverted. + // 4. 32-bit orr immediate. + // 5. 64-bit orr immediate. + // Move-keep may then be used to modify each of the 16-bit half words. + // + // The code below supports all five initial value generators, and + // applying move-keep operations to move-zero and move-inverted initial + // values. + + // Try to move the immediate in one instruction, and if that fails, switch to + // using multiple instructions. + if (OneInstrMoveImmediateHelper(masm, rd, imm)) { + return 1; + } else { + int instruction_count = 0; + unsigned reg_size = rd.size(); + + // Generic immediate case. Imm will be represented by + // [imm3, imm2, imm1, imm0], where each imm is 16 bits. + // A move-zero or move-inverted is generated for the first non-zero or + // non-0xffff immX, and a move-keep for subsequent non-zero immX. + + uint64_t ignored_halfword = 0; + bool invert_move = false; + // If the number of 0xffff halfwords is greater than the number of 0x0000 + // halfwords, it's more efficient to use move-inverted. + if (CountClearHalfWords(~imm, reg_size) > + CountClearHalfWords(imm, reg_size)) { + ignored_halfword = 0xffff; + invert_move = true; + } + + // Mov instructions can't move values into the stack pointer, so set up a + // temporary register, if needed. + UseScratchRegisterScope temps; + Register temp; + if (emit_code) { + temps.Open(masm); + temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd; + } + + // Iterate through the halfwords. Use movn/movz for the first non-ignored + // halfword, and movk for subsequent halfwords. + VIXL_ASSERT((reg_size % 16) == 0); + bool first_mov_done = false; + for (unsigned i = 0; i < (temp.size() / 16); i++) { + uint64_t imm16 = (imm >> (16 * i)) & 0xffff; + if (imm16 != ignored_halfword) { + if (!first_mov_done) { + if (invert_move) { + if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i); + instruction_count++; + } else { + if (emit_code) masm->movz(temp, imm16, 16 * i); + instruction_count++; + } + first_mov_done = true; + } else { + // Construct a wider constant. + if (emit_code) masm->movk(temp, imm16, 16 * i); + instruction_count++; + } + } + } + + VIXL_ASSERT(first_mov_done); + + // Move the temporary if the original destination register was the stack + // pointer. + if (rd.IsSP()) { + if (emit_code) masm->mov(rd, temp); + instruction_count++; + } + return instruction_count; + } +} + + +bool MacroAssembler::OneInstrMoveImmediateHelper(MacroAssembler* masm, + const Register& dst, + int64_t imm) { + bool emit_code = masm != NULL; + unsigned n, imm_s, imm_r; + int reg_size = dst.size(); + + if (IsImmMovz(imm, reg_size) && !dst.IsSP()) { + // Immediate can be represented in a move zero instruction. Movz can't write + // to the stack pointer. + if (emit_code) { + masm->movz(dst, imm); + } + return true; + } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) { + // Immediate can be represented in a move negative instruction. Movn can't + // write to the stack pointer. + if (emit_code) { + masm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask)); + } + return true; + } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) { + // Immediate can be represented in a logical orr instruction. + VIXL_ASSERT(!dst.IsZero()); + if (emit_code) { + masm->LogicalImmediate( + dst, AppropriateZeroRegFor(dst), n, imm_s, imm_r, ORR); + } + return true; + } + return false; +} + + +void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) { + VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) && + ((bit == -1) || (type >= kBranchTypeFirstUsingBit))); + if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) { + B(static_cast<Condition>(type), label); + } else { + switch (type) { + case always: B(label); break; + case never: break; + case reg_zero: Cbz(reg, label); break; + case reg_not_zero: Cbnz(reg, label); break; + case reg_bit_clear: Tbz(reg, bit, label); break; + case reg_bit_set: Tbnz(reg, bit, label); break; + default: + VIXL_UNREACHABLE(); + } + } +} + + +void MacroAssembler::B(Label* label) { + SingleEmissionCheckScope guard(this); + b(label); +} + + +void MacroAssembler::B(Label* label, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) { + Label done; + b(&done, InvertCondition(cond)); + b(label); + bind(&done); + } else { + b(label, cond); + } +} + + +void MacroAssembler::Cbnz(const Register& rt, Label* label) { + VIXL_ASSERT(!rt.IsZero()); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) { + Label done; + cbz(rt, &done); + b(label); + bind(&done); + } else { + cbnz(rt, label); + } +} + + +void MacroAssembler::Cbz(const Register& rt, Label* label) { + VIXL_ASSERT(!rt.IsZero()); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->bound() && LabelIsOutOfRange(label, CondBranchType)) { + Label done; + cbnz(rt, &done); + b(label); + bind(&done); + } else { + cbz(rt, label); + } +} + + +void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) { + VIXL_ASSERT(!rt.IsZero()); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->bound() && LabelIsOutOfRange(label, TestBranchType)) { + Label done; + tbz(rt, bit_pos, &done); + b(label); + bind(&done); + } else { + tbnz(rt, bit_pos, label); + } +} + + +void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) { + VIXL_ASSERT(!rt.IsZero()); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->bound() && LabelIsOutOfRange(label, TestBranchType)) { + Label done; + tbnz(rt, bit_pos, &done); + b(label); + bind(&done); + } else { + tbz(rt, bit_pos, label); + } +} + + +void MacroAssembler::And(const Register& rd, + const Register& rn, + const Operand& operand) { + LogicalMacro(rd, rn, operand, AND); +} + + +void MacroAssembler::Ands(const Register& rd, + const Register& rn, + const Operand& operand) { + LogicalMacro(rd, rn, operand, ANDS); +} + + +void MacroAssembler::Tst(const Register& rn, + const Operand& operand) { + Ands(AppropriateZeroRegFor(rn), rn, operand); +} + + +void MacroAssembler::Bic(const Register& rd, + const Register& rn, + const Operand& operand) { + LogicalMacro(rd, rn, operand, BIC); +} + + +void MacroAssembler::Bics(const Register& rd, + const Register& rn, + const Operand& operand) { + LogicalMacro(rd, rn, operand, BICS); +} + + +void MacroAssembler::Orr(const Register& rd, + const Register& rn, + const Operand& operand) { + LogicalMacro(rd, rn, operand, ORR); +} + + +void MacroAssembler::Orn(const Register& rd, + const Register& rn, + const Operand& operand) { + LogicalMacro(rd, rn, operand, ORN); +} + + +void MacroAssembler::Eor(const Register& rd, + const Register& rn, + const Operand& operand) { + LogicalMacro(rd, rn, operand, EOR); +} + + +void MacroAssembler::Eon(const Register& rd, + const Register& rn, + const Operand& operand) { + LogicalMacro(rd, rn, operand, EON); +} + + +void MacroAssembler::LogicalMacro(const Register& rd, + const Register& rn, + const Operand& operand, + LogicalOp op) { + // The worst case for size is logical immediate to sp: + // * up to 4 instructions to materialise the constant + // * 1 instruction to do the operation + // * 1 instruction to move to sp + MacroEmissionCheckScope guard(this); + UseScratchRegisterScope temps(this); + + if (operand.IsImmediate()) { + int64_t immediate = operand.immediate(); + unsigned reg_size = rd.size(); + + // If the operation is NOT, invert the operation and immediate. + if ((op & NOT) == NOT) { + op = static_cast<LogicalOp>(op & ~NOT); + immediate = ~immediate; + } + + // Ignore the top 32 bits of an immediate if we're moving to a W register. + if (rd.Is32Bits()) { + // Check that the top 32 bits are consistent. + VIXL_ASSERT(((immediate >> kWRegSize) == 0) || + ((immediate >> kWRegSize) == -1)); + immediate &= kWRegMask; + } + + VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate)); + + // Special cases for all set or all clear immediates. + if (immediate == 0) { + switch (op) { + case AND: + Mov(rd, 0); + return; + case ORR: + VIXL_FALLTHROUGH(); + case EOR: + Mov(rd, rn); + return; + case ANDS: + VIXL_FALLTHROUGH(); + case BICS: + break; + default: + VIXL_UNREACHABLE(); + } + } else if ((rd.Is64Bits() && (immediate == -1)) || + (rd.Is32Bits() && (immediate == 0xffffffff))) { + switch (op) { + case AND: + Mov(rd, rn); + return; + case ORR: + Mov(rd, immediate); + return; + case EOR: + Mvn(rd, rn); + return; + case ANDS: + VIXL_FALLTHROUGH(); + case BICS: + break; + default: + VIXL_UNREACHABLE(); + } + } + + unsigned n, imm_s, imm_r; + if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) { + // Immediate can be encoded in the instruction. + LogicalImmediate(rd, rn, n, imm_s, imm_r, op); + } else { + // Immediate can't be encoded: synthesize using move immediate. + Register temp = temps.AcquireSameSizeAs(rn); + + // If the left-hand input is the stack pointer, we can't pre-shift the + // immediate, as the encoding won't allow the subsequent post shift. + PreShiftImmMode mode = rn.IsSP() ? kNoShift : kAnyShift; + Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode); + + // VIXL can acquire temp registers. Assert that the caller is aware. + VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn)); + VIXL_ASSERT(!temp.Is(operand.maybeReg())); + + if (rd.Is(sp)) { + // If rd is the stack pointer we cannot use it as the destination + // register so we use the temp register as an intermediate again. + Logical(temp, rn, imm_operand, op); + Mov(sp, temp); + } else { + Logical(rd, rn, imm_operand, op); + } + } + } else if (operand.IsExtendedRegister()) { + VIXL_ASSERT(operand.reg().size() <= rd.size()); + // Add/sub extended supports shift <= 4. We want to support exactly the + // same modes here. + VIXL_ASSERT(operand.shift_amount() <= 4); + VIXL_ASSERT(operand.reg().Is64Bits() || + ((operand.extend() != UXTX) && (operand.extend() != SXTX))); + + temps.Exclude(operand.reg()); + Register temp = temps.AcquireSameSizeAs(rn); + + // VIXL can acquire temp registers. Assert that the caller is aware. + VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn)); + VIXL_ASSERT(!temp.Is(operand.maybeReg())); + + EmitExtendShift(temp, operand.reg(), operand.extend(), + operand.shift_amount()); + Logical(rd, rn, Operand(temp), op); + } else { + // The operand can be encoded in the instruction. + VIXL_ASSERT(operand.IsShiftedRegister()); + Logical(rd, rn, operand, op); + } +} + + +void MacroAssembler::Mov(const Register& rd, + const Operand& operand, + DiscardMoveMode discard_mode) { + // The worst case for size is mov immediate with up to 4 instructions. + MacroEmissionCheckScope guard(this); + + if (operand.IsImmediate()) { + // Call the macro assembler for generic immediates. + Mov(rd, operand.immediate()); + } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) { + // Emit a shift instruction if moving a shifted register. This operation + // could also be achieved using an orr instruction (like orn used by Mvn), + // but using a shift instruction makes the disassembly clearer. + EmitShift(rd, operand.reg(), operand.shift(), operand.shift_amount()); + } else if (operand.IsExtendedRegister()) { + // Emit an extend instruction if moving an extended register. This handles + // extend with post-shift operations, too. + EmitExtendShift(rd, operand.reg(), operand.extend(), + operand.shift_amount()); + } else { + // Otherwise, emit a register move only if the registers are distinct, or + // if they are not X registers. + // + // Note that mov(w0, w0) is not a no-op because it clears the top word of + // x0. A flag is provided (kDiscardForSameWReg) if a move between the same W + // registers is not required to clear the top word of the X register. In + // this case, the instruction is discarded. + // + // If the sp is an operand, add #0 is emitted, otherwise, orr #0. + if (!rd.Is(operand.reg()) || (rd.Is32Bits() && + (discard_mode == kDontDiscardForSameWReg))) { + mov(rd, operand.reg()); + } + } +} + + +void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) { + VIXL_ASSERT(IsUint16(imm)); + int byte1 = (imm & 0xff); + int byte2 = ((imm >> 8) & 0xff); + if (byte1 == byte2) { + movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1); + } else if (byte1 == 0) { + movi(vd, byte2, LSL, 8); + } else if (byte2 == 0) { + movi(vd, byte1); + } else if (byte1 == 0xff) { + mvni(vd, ~byte2 & 0xff, LSL, 8); + } else if (byte2 == 0xff) { + mvni(vd, ~byte1 & 0xff); + } else { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireW(); + movz(temp, imm); + dup(vd, temp); + } +} + + +void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) { + VIXL_ASSERT(IsUint32(imm)); + + uint8_t bytes[sizeof(imm)]; + memcpy(bytes, &imm, sizeof(imm)); + + // All bytes are either 0x00 or 0xff. + { + bool all0orff = true; + for (int i = 0; i < 4; ++i) { + if ((bytes[i] != 0) && (bytes[i] != 0xff)) { + all0orff = false; + break; + } + } + + if (all0orff == true) { + movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm)); + return; + } + } + + // Of the 4 bytes, only one byte is non-zero. + for (int i = 0; i < 4; i++) { + if ((imm & (0xff << (i * 8))) == imm) { + movi(vd, bytes[i], LSL, i * 8); + return; + } + } + + // Of the 4 bytes, only one byte is not 0xff. + for (int i = 0; i < 4; i++) { + uint32_t mask = ~(0xff << (i * 8)); + if ((imm & mask) == mask) { + mvni(vd, ~bytes[i] & 0xff, LSL, i * 8); + return; + } + } + + // Immediate is of the form 0x00MMFFFF. + if ((imm & 0xff00ffff) == 0x0000ffff) { + movi(vd, bytes[2], MSL, 16); + return; + } + + // Immediate is of the form 0x0000MMFF. + if ((imm & 0xffff00ff) == 0x000000ff) { + movi(vd, bytes[1], MSL, 8); + return; + } + + // Immediate is of the form 0xFFMM0000. + if ((imm & 0xff00ffff) == 0xff000000) { + mvni(vd, ~bytes[2] & 0xff, MSL, 16); + return; + } + // Immediate is of the form 0xFFFFMM00. + if ((imm & 0xffff00ff) == 0xffff0000) { + mvni(vd, ~bytes[1] & 0xff, MSL, 8); + return; + } + + // Top and bottom 16-bits are equal. + if (((imm >> 16) & 0xffff) == (imm & 0xffff)) { + Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff); + return; + } + + // Default case. + { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireW(); + Mov(temp, imm); + dup(vd, temp); + } +} + + +void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) { + // All bytes are either 0x00 or 0xff. + { + bool all0orff = true; + for (int i = 0; i < 8; ++i) { + int byteval = (imm >> (i * 8)) & 0xff; + if (byteval != 0 && byteval != 0xff) { + all0orff = false; + break; + } + } + if (all0orff == true) { + movi(vd, imm); + return; + } + } + + // Top and bottom 32-bits are equal. + if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) { + Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff); + return; + } + + // Default case. + { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireX(); + Mov(temp, imm); + if (vd.Is1D()) { + mov(vd.D(), 0, temp); + } else { + dup(vd.V2D(), temp); + } + } +} + + +void MacroAssembler::Movi(const VRegister& vd, + uint64_t imm, + Shift shift, + int shift_amount) { + MacroEmissionCheckScope guard(this); + if (shift_amount != 0 || shift != LSL) { + movi(vd, imm, shift, shift_amount); + } else if (vd.Is8B() || vd.Is16B()) { + // 8-bit immediate. + VIXL_ASSERT(IsUint8(imm)); + movi(vd, imm); + } else if (vd.Is4H() || vd.Is8H()) { + // 16-bit immediate. + Movi16bitHelper(vd, imm); + } else if (vd.Is2S() || vd.Is4S()) { + // 32-bit immediate. + Movi32bitHelper(vd, imm); + } else { + // 64-bit immediate. + Movi64bitHelper(vd, imm); + } +} + + +void MacroAssembler::Movi(const VRegister& vd, + uint64_t hi, + uint64_t lo) { + VIXL_ASSERT(vd.Is128Bits()); + UseScratchRegisterScope temps(this); + + // When hi == lo, the following generates good code. + // + // In situations where the constants are complex and hi != lo, the following + // can turn into up to 10 instructions: 2*(mov + 3*movk + dup/insert). To do + // any better, we could try to estimate whether splatting the high value and + // updating the low value would generate fewer instructions than vice versa + // (what we do now). + // + // (A PC-relative load from memory to the vector register (ADR + LD2) is going + // to have fairly high latency but is fairly compact; not clear what the best + // tradeoff is.) + + Movi(vd.V2D(), lo); + if (hi != lo) { + Register temp = temps.AcquireX(); + Mov(temp, hi); + Ins(vd.V2D(), 1, temp); + } +} + + +void MacroAssembler::Mvn(const Register& rd, const Operand& operand) { + // The worst case for size is mvn immediate with up to 4 instructions. + MacroEmissionCheckScope guard(this); + + if (operand.IsImmediate()) { + // Call the macro assembler for generic immediates. + Mvn(rd, operand.immediate()); + } else if (operand.IsExtendedRegister()) { + UseScratchRegisterScope temps(this); + temps.Exclude(operand.reg()); + + // Emit two instructions for the extend case. This differs from Mov, as + // the extend and invert can't be achieved in one instruction. + Register temp = temps.AcquireSameSizeAs(rd); + + // VIXL can acquire temp registers. Assert that the caller is aware. + VIXL_ASSERT(!temp.Is(rd) && !temp.Is(operand.maybeReg())); + + EmitExtendShift(temp, operand.reg(), operand.extend(), + operand.shift_amount()); + mvn(rd, Operand(temp)); + } else { + // Otherwise, register and shifted register cases can be handled by the + // assembler directly, using orn. + mvn(rd, operand); + } +} + + +void MacroAssembler::Mov(const Register& rd, uint64_t imm) { + MoveImmediateHelper(this, rd, imm); +} + + +void MacroAssembler::Ccmp(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond) { + if (operand.IsImmediate() && (operand.immediate() < 0)) { + ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMN); + } else { + ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP); + } +} + + +void MacroAssembler::Ccmn(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond) { + if (operand.IsImmediate() && (operand.immediate() < 0)) { + ConditionalCompareMacro(rn, -operand.immediate(), nzcv, cond, CCMP); + } else { + ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN); + } +} + + +void MacroAssembler::ConditionalCompareMacro(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond, + ConditionalCompareOp op) { + VIXL_ASSERT((cond != al) && (cond != nv)); + // The worst case for size is ccmp immediate: + // * up to 4 instructions to materialise the constant + // * 1 instruction for ccmp + MacroEmissionCheckScope guard(this); + + if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) || + (operand.IsImmediate() && IsImmConditionalCompare(operand.immediate()))) { + // The immediate can be encoded in the instruction, or the operand is an + // unshifted register: call the assembler. + ConditionalCompare(rn, operand, nzcv, cond, op); + } else { + UseScratchRegisterScope temps(this); + // The operand isn't directly supported by the instruction: perform the + // operation on a temporary register. + Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Is(rn) && !temp.Is(operand.maybeReg())); + Mov(temp, operand); + ConditionalCompare(rn, temp, nzcv, cond, op); + } +} + + +void MacroAssembler::Csel(const Register& rd, + const Register& rn, + const Operand& operand, + Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT((cond != al) && (cond != nv)); + // The worst case for size is csel immediate: + // * up to 4 instructions to materialise the constant + // * 1 instruction for csel + MacroEmissionCheckScope guard(this); + + if (operand.IsImmediate()) { + // Immediate argument. Handle special cases of 0, 1 and -1 using zero + // register. + int64_t imm = operand.immediate(); + Register zr = AppropriateZeroRegFor(rn); + if (imm == 0) { + csel(rd, rn, zr, cond); + } else if (imm == 1) { + csinc(rd, rn, zr, cond); + } else if (imm == -1) { + csinv(rd, rn, zr, cond); + } else { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn)); + VIXL_ASSERT(!temp.Is(operand.maybeReg())); + Mov(temp, operand.immediate()); + csel(rd, rn, temp, cond); + } + } else if (operand.IsShiftedRegister() && (operand.shift_amount() == 0)) { + // Unshifted register argument. + csel(rd, rn, operand.reg(), cond); + } else { + // All other arguments. + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn)); + VIXL_ASSERT(!temp.Is(operand.maybeReg())); + Mov(temp, operand); + csel(rd, rn, temp, cond); + } +} + + +void MacroAssembler::Add(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S) { + if (operand.IsImmediate() && (operand.immediate() < 0) && + IsImmAddSub(-operand.immediate())) { + AddSubMacro(rd, rn, -operand.immediate(), S, SUB); + } else { + AddSubMacro(rd, rn, operand, S, ADD); + } +} + + +void MacroAssembler::Adds(const Register& rd, + const Register& rn, + const Operand& operand) { + Add(rd, rn, operand, SetFlags); +} + + +void MacroAssembler::Sub(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S) { + if (operand.IsImmediate() && (operand.immediate() < 0) && + IsImmAddSub(-operand.immediate())) { + AddSubMacro(rd, rn, -operand.immediate(), S, ADD); + } else { + AddSubMacro(rd, rn, operand, S, SUB); + } +} + + +void MacroAssembler::Subs(const Register& rd, + const Register& rn, + const Operand& operand) { + Sub(rd, rn, operand, SetFlags); +} + + +void MacroAssembler::Cmn(const Register& rn, const Operand& operand) { + Adds(AppropriateZeroRegFor(rn), rn, operand); +} + + +void MacroAssembler::Cmp(const Register& rn, const Operand& operand) { + Subs(AppropriateZeroRegFor(rn), rn, operand); +} + + +void MacroAssembler::Fcmp(const FPRegister& fn, double value, + FPTrapFlags trap) { + // The worst case for size is: + // * 1 to materialise the constant, using literal pool if necessary + // * 1 instruction for fcmp{e} + MacroEmissionCheckScope guard(this); + if (value != 0.0) { + UseScratchRegisterScope temps(this); + FPRegister tmp = temps.AcquireSameSizeAs(fn); + VIXL_ASSERT(!tmp.Is(fn)); + Fmov(tmp, value); + FPCompareMacro(fn, tmp, trap); + } else { + FPCompareMacro(fn, value, trap); + } +} + + +void MacroAssembler::Fcmpe(const FPRegister& fn, double value) { + Fcmp(fn, value, EnableTrap); +} + + +void MacroAssembler::Fmov(VRegister vd, double imm) { + // Floating point immediates are loaded through the literal pool. + MacroEmissionCheckScope guard(this); + + if (vd.Is1S() || vd.Is2S() || vd.Is4S()) { + Fmov(vd, static_cast<float>(imm)); + return; + } + + VIXL_ASSERT(vd.Is1D() || vd.Is2D()); + if (IsImmFP64(imm)) { + fmov(vd, imm); + } else { + uint64_t rawbits = DoubleToRawbits(imm); + if (vd.IsScalar()) { + if (rawbits == 0) { + fmov(vd, xzr); + } else { + Assembler::fImmPool64(vd, imm); + } + } else { + // TODO: consider NEON support for load literal. + Movi(vd, rawbits); + } + } +} + + +void MacroAssembler::Fmov(VRegister vd, float imm) { + // Floating point immediates are loaded through the literal pool. + MacroEmissionCheckScope guard(this); + + if (vd.Is1D() || vd.Is2D()) { + Fmov(vd, static_cast<double>(imm)); + return; + } + + VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S()); + if (IsImmFP32(imm)) { + fmov(vd, imm); + } else { + uint32_t rawbits = FloatToRawbits(imm); + if (vd.IsScalar()) { + if (rawbits == 0) { + fmov(vd, wzr); + } else { + Assembler::fImmPool32(vd, imm); + } + } else { + // TODO: consider NEON support for load literal. + Movi(vd, rawbits); + } + } +} + + + +void MacroAssembler::Neg(const Register& rd, + const Operand& operand) { + if (operand.IsImmediate()) { + Mov(rd, -operand.immediate()); + } else { + Sub(rd, AppropriateZeroRegFor(rd), operand); + } +} + + +void MacroAssembler::Negs(const Register& rd, + const Operand& operand) { + Subs(rd, AppropriateZeroRegFor(rd), operand); +} + + +bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst, + int64_t imm) { + return OneInstrMoveImmediateHelper(this, dst, imm); +} + + +Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst, + int64_t imm, + PreShiftImmMode mode) { + int reg_size = dst.size(); + + // Encode the immediate in a single move instruction, if possible. + if (TryOneInstrMoveImmediate(dst, imm)) { + // The move was successful; nothing to do here. + } else { + // Pre-shift the immediate to the least-significant bits of the register. + int shift_low = CountTrailingZeros(imm, reg_size); + if (mode == kLimitShiftForSP) { + // When applied to the stack pointer, the subsequent arithmetic operation + // can use the extend form to shift left by a maximum of four bits. Right + // shifts are not allowed, so we filter them out later before the new + // immediate is tested. + shift_low = std::min(shift_low, 4); + } + + int64_t imm_low = imm >> shift_low; + + // Pre-shift the immediate to the most-significant bits of the register, + // inserting set bits in the least-significant bits. + int shift_high = CountLeadingZeros(imm, reg_size); + int64_t imm_high = (imm << shift_high) | ((INT64_C(1) << shift_high) - 1); + + if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) { + // The new immediate has been moved into the destination's low bits: + // return a new leftward-shifting operand. + return Operand(dst, LSL, shift_low); + } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) { + // The new immediate has been moved into the destination's high bits: + // return a new rightward-shifting operand. + return Operand(dst, LSR, shift_high); + } else { + Mov(dst, imm); + } + } + return Operand(dst); +} + + +void MacroAssembler::ComputeAddress(const Register& dst, + const MemOperand& mem_op) { + // We cannot handle pre-indexing or post-indexing. + VIXL_ASSERT(mem_op.addrmode() == Offset); + Register base = mem_op.base(); + if (mem_op.IsImmediateOffset()) { + Add(dst, base, mem_op.offset()); + } else { + VIXL_ASSERT(mem_op.IsRegisterOffset()); + Register reg_offset = mem_op.regoffset(); + Shift shift = mem_op.shift(); + Extend extend = mem_op.extend(); + if (shift == NO_SHIFT) { + VIXL_ASSERT(extend != NO_EXTEND); + Add(dst, base, Operand(reg_offset, extend, mem_op.shift_amount())); + } else { + VIXL_ASSERT(extend == NO_EXTEND); + Add(dst, base, Operand(reg_offset, shift, mem_op.shift_amount())); + } + } +} + + +void MacroAssembler::AddSubMacro(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubOp op) { + // Worst case is add/sub immediate: + // * up to 4 instructions to materialise the constant + // * 1 instruction for add/sub + MacroEmissionCheckScope guard(this); + + if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() && + (S == LeaveFlags)) { + // The instruction would be a nop. Avoid generating useless code. + return; + } + + if ((operand.IsImmediate() && !IsImmAddSub(operand.immediate())) || + (rn.IsZero() && !operand.IsShiftedRegister()) || + (operand.IsShiftedRegister() && (operand.shift() == ROR))) { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(rn); + if (operand.IsImmediate()) { + PreShiftImmMode mode = kAnyShift; + + // If the destination or source register is the stack pointer, we can + // only pre-shift the immediate right by values supported in the add/sub + // extend encoding. + if (rd.IsSP()) { + // If the destination is SP and flags will be set, we can't pre-shift + // the immediate at all. + mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP; + } else if (rn.IsSP()) { + mode = kLimitShiftForSP; + } + + Operand imm_operand = + MoveImmediateForShiftedOp(temp, operand.immediate(), mode); + AddSub(rd, rn, imm_operand, S, op); + } else { + Mov(temp, operand); + AddSub(rd, rn, temp, S, op); + } + } else { + AddSub(rd, rn, operand, S, op); + } +} + + +void MacroAssembler::Adc(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC); +} + + +void MacroAssembler::Adcs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC); +} + + +void MacroAssembler::Sbc(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC); +} + + +void MacroAssembler::Sbcs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC); +} + + +void MacroAssembler::Ngc(const Register& rd, + const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + Sbc(rd, zr, operand); +} + + +void MacroAssembler::Ngcs(const Register& rd, + const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + Sbcs(rd, zr, operand); +} + + +void MacroAssembler::AddSubWithCarryMacro(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubWithCarryOp op) { + VIXL_ASSERT(rd.size() == rn.size()); + // Worst case is addc/subc immediate: + // * up to 4 instructions to materialise the constant + // * 1 instruction for add/sub + MacroEmissionCheckScope guard(this); + UseScratchRegisterScope temps(this); + + if (operand.IsImmediate() || + (operand.IsShiftedRegister() && (operand.shift() == ROR))) { + // Add/sub with carry (immediate or ROR shifted register.) + Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg())); + Mov(temp, operand); + AddSubWithCarry(rd, rn, Operand(temp), S, op); + } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) { + // Add/sub with carry (shifted register). + VIXL_ASSERT(operand.reg().size() == rd.size()); + VIXL_ASSERT(operand.shift() != ROR); + VIXL_ASSERT(IsUintN(rd.size() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2, + operand.shift_amount())); + temps.Exclude(operand.reg()); + Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg())); + EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount()); + AddSubWithCarry(rd, rn, Operand(temp), S, op); + } else if (operand.IsExtendedRegister()) { + // Add/sub with carry (extended register). + VIXL_ASSERT(operand.reg().size() <= rd.size()); + // Add/sub extended supports a shift <= 4. We want to support exactly the + // same modes. + VIXL_ASSERT(operand.shift_amount() <= 4); + VIXL_ASSERT(operand.reg().Is64Bits() || + ((operand.extend() != UXTX) && (operand.extend() != SXTX))); + temps.Exclude(operand.reg()); + Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Is(rd) && !temp.Is(rn) && !temp.Is(operand.maybeReg())); + EmitExtendShift(temp, operand.reg(), operand.extend(), + operand.shift_amount()); + AddSubWithCarry(rd, rn, Operand(temp), S, op); + } else { + // The addressing mode is directly supported by the instruction. + AddSubWithCarry(rd, rn, operand, S, op); + } +} + + +#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP) \ +void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \ + LoadStoreMacro(REG, addr, OP); \ +} +LS_MACRO_LIST(DEFINE_FUNCTION) +#undef DEFINE_FUNCTION + + +void MacroAssembler::LoadStoreMacro(const CPURegister& rt, + const MemOperand& addr, + LoadStoreOp op) { + // Worst case is ldr/str pre/post index: + // * 1 instruction for ldr/str + // * up to 4 instructions to materialise the constant + // * 1 instruction to update the base + MacroEmissionCheckScope guard(this); + + int64_t offset = addr.offset(); + unsigned access_size = CalcLSDataSize(op); + + // Check if an immediate offset fits in the immediate field of the + // appropriate instruction. If not, emit two instructions to perform + // the operation. + if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) && + !IsImmLSUnscaled(offset)) { + // Immediate offset that can't be encoded using unsigned or unscaled + // addressing modes. + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(addr.base()); + VIXL_ASSERT(!temp.Is(rt)); + VIXL_ASSERT(!temp.Is(addr.base()) && !temp.Is(addr.regoffset())); + Mov(temp, addr.offset()); + LoadStore(rt, MemOperand(addr.base(), temp), op); + } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) { + // Post-index beyond unscaled addressing range. + LoadStore(rt, MemOperand(addr.base()), op); + Add(addr.base(), addr.base(), Operand(offset)); + } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) { + // Pre-index beyond unscaled addressing range. + Add(addr.base(), addr.base(), Operand(offset)); + LoadStore(rt, MemOperand(addr.base()), op); + } else { + // Encodable in one load/store instruction. + LoadStore(rt, addr, op); + } +} + + +#define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \ +void MacroAssembler::FN(const REGTYPE REG, \ + const REGTYPE REG2, \ + const MemOperand& addr) { \ + LoadStorePairMacro(REG, REG2, addr, OP); \ +} +LSPAIR_MACRO_LIST(DEFINE_FUNCTION) +#undef DEFINE_FUNCTION + +void MacroAssembler::LoadStorePairMacro(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op) { + // TODO(all): Should we support register offset for load-store-pair? + VIXL_ASSERT(!addr.IsRegisterOffset()); + // Worst case is ldp/stp immediate: + // * 1 instruction for ldp/stp + // * up to 4 instructions to materialise the constant + // * 1 instruction to update the base + MacroEmissionCheckScope guard(this); + + int64_t offset = addr.offset(); + unsigned access_size = CalcLSPairDataSize(op); + + // Check if the offset fits in the immediate field of the appropriate + // instruction. If not, emit two instructions to perform the operation. + if (IsImmLSPair(offset, access_size)) { + // Encodable in one load/store pair instruction. + LoadStorePair(rt, rt2, addr, op); + } else { + Register base = addr.base(); + if (addr.IsImmediateOffset()) { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(base); + Add(temp, base, offset); + LoadStorePair(rt, rt2, MemOperand(temp), op); + } else if (addr.IsPostIndex()) { + LoadStorePair(rt, rt2, MemOperand(base), op); + Add(base, base, offset); + } else { + VIXL_ASSERT(addr.IsPreIndex()); + Add(base, base, offset); + LoadStorePair(rt, rt2, MemOperand(base), op); + } + } +} + + +void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) { + MacroEmissionCheckScope guard(this); + + // There are no pre- or post-index modes for prfm. + VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset()); + + // The access size is implicitly 8 bytes for all prefetch operations. + unsigned size = kXRegSizeInBytesLog2; + + // Check if an immediate offset fits in the immediate field of the + // appropriate instruction. If not, emit two instructions to perform + // the operation. + if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.offset(), size) && + !IsImmLSUnscaled(addr.offset())) { + // Immediate offset that can't be encoded using unsigned or unscaled + // addressing modes. + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(addr.base()); + Mov(temp, addr.offset()); + Prefetch(op, MemOperand(addr.base(), temp)); + } else { + // Simple register-offsets are encodable in one instruction. + Prefetch(op, addr); + } +} + + +void MacroAssembler::PushStackPointer() { + PrepareForPush(1, 8); + + // Pushing a stack pointer leads to implementation-defined + // behavior, which may be surprising. In particular, + // str x28, [x28, #-8]! + // pre-decrements the stack pointer, storing the decremented value. + // Additionally, sp is read as xzr in this context, so it cannot be pushed. + // So we must use a scratch register. + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX(); + + Mov(scratch, GetStackPointer64()); + str(scratch, MemOperand(GetStackPointer64(), -8, PreIndex)); +} + + +void MacroAssembler::Push(const CPURegister& src0, const CPURegister& src1, + const CPURegister& src2, const CPURegister& src3) { + VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3)); + VIXL_ASSERT(src0.IsValid()); + + int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid(); + int size = src0.SizeInBytes(); + + if (src0.Is(GetStackPointer64())) { + VIXL_ASSERT(count == 1); + VIXL_ASSERT(size == 8); + PushStackPointer(); + return; + } + + PrepareForPush(count, size); + PushHelper(count, size, src0, src1, src2, src3); +} + + +void MacroAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1, + const CPURegister& dst2, const CPURegister& dst3) { + // It is not valid to pop into the same register more than once in one + // instruction, not even into the zero register. + VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3)); + VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3)); + VIXL_ASSERT(dst0.IsValid()); + + int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid(); + int size = dst0.SizeInBytes(); + + PrepareForPop(count, size); + PopHelper(count, size, dst0, dst1, dst2, dst3); +} + + +void MacroAssembler::PushCPURegList(CPURegList registers) { + VIXL_ASSERT(!registers.Overlaps(*TmpList())); + VIXL_ASSERT(!registers.Overlaps(*FPTmpList())); + + int reg_size = registers.RegisterSizeInBytes(); + PrepareForPush(registers.Count(), reg_size); + + // Bump the stack pointer and store two registers at the bottom. + int size = registers.TotalSizeInBytes(); + const CPURegister& bottom_0 = registers.PopLowestIndex(); + const CPURegister& bottom_1 = registers.PopLowestIndex(); + if (bottom_0.IsValid() && bottom_1.IsValid()) { + Stp(bottom_0, bottom_1, MemOperand(GetStackPointer64(), -size, PreIndex)); + } else if (bottom_0.IsValid()) { + Str(bottom_0, MemOperand(GetStackPointer64(), -size, PreIndex)); + } + + int offset = 2 * reg_size; + while (!registers.IsEmpty()) { + const CPURegister& src0 = registers.PopLowestIndex(); + const CPURegister& src1 = registers.PopLowestIndex(); + if (src1.IsValid()) { + Stp(src0, src1, MemOperand(GetStackPointer64(), offset)); + } else { + Str(src0, MemOperand(GetStackPointer64(), offset)); + } + offset += 2 * reg_size; + } +} + + +void MacroAssembler::PopCPURegList(CPURegList registers) { + VIXL_ASSERT(!registers.Overlaps(*TmpList())); + VIXL_ASSERT(!registers.Overlaps(*FPTmpList())); + + int reg_size = registers.RegisterSizeInBytes(); + PrepareForPop(registers.Count(), reg_size); + + + int size = registers.TotalSizeInBytes(); + const CPURegister& bottom_0 = registers.PopLowestIndex(); + const CPURegister& bottom_1 = registers.PopLowestIndex(); + + int offset = 2 * reg_size; + while (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + if (dst1.IsValid()) { + Ldp(dst0, dst1, MemOperand(GetStackPointer64(), offset)); + } else { + Ldr(dst0, MemOperand(GetStackPointer64(), offset)); + } + offset += 2 * reg_size; + } + + // Load the two registers at the bottom and drop the stack pointer. + if (bottom_0.IsValid() && bottom_1.IsValid()) { + Ldp(bottom_0, bottom_1, MemOperand(GetStackPointer64(), size, PostIndex)); + } else if (bottom_0.IsValid()) { + Ldr(bottom_0, MemOperand(GetStackPointer64(), size, PostIndex)); + } +} + + +void MacroAssembler::PushMultipleTimes(int count, Register src) { + int size = src.SizeInBytes(); + + PrepareForPush(count, size); + // Push up to four registers at a time if possible because if the current + // stack pointer is sp and the register size is 32, registers must be pushed + // in blocks of four in order to maintain the 16-byte alignment for sp. + while (count >= 4) { + PushHelper(4, size, src, src, src, src); + count -= 4; + } + if (count >= 2) { + PushHelper(2, size, src, src, NoReg, NoReg); + count -= 2; + } + if (count == 1) { + PushHelper(1, size, src, NoReg, NoReg, NoReg); + count -= 1; + } + VIXL_ASSERT(count == 0); +} + + +void MacroAssembler::PushHelper(int count, int size, + const CPURegister& src0, + const CPURegister& src1, + const CPURegister& src2, + const CPURegister& src3) { + // Ensure that we don't unintentionally modify scratch or debug registers. + // Worst case for size is 2 stp. + InstructionAccurateScope scope(this, 2, + InstructionAccurateScope::kMaximumSize); + + VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3)); + VIXL_ASSERT(size == src0.SizeInBytes()); + + // Pushing the stack pointer has unexpected behavior. See PushStackPointer(). + VIXL_ASSERT(!src0.Is(GetStackPointer64()) && !src0.Is(sp)); + VIXL_ASSERT(!src1.Is(GetStackPointer64()) && !src1.Is(sp)); + VIXL_ASSERT(!src2.Is(GetStackPointer64()) && !src2.Is(sp)); + VIXL_ASSERT(!src3.Is(GetStackPointer64()) && !src3.Is(sp)); + + // The JS engine should never push 4 bytes. + VIXL_ASSERT(size >= 8); + + // When pushing multiple registers, the store order is chosen such that + // Push(a, b) is equivalent to Push(a) followed by Push(b). + switch (count) { + case 1: + VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone()); + str(src0, MemOperand(GetStackPointer64(), -1 * size, PreIndex)); + break; + case 2: + VIXL_ASSERT(src2.IsNone() && src3.IsNone()); + stp(src1, src0, MemOperand(GetStackPointer64(), -2 * size, PreIndex)); + break; + case 3: + VIXL_ASSERT(src3.IsNone()); + stp(src2, src1, MemOperand(GetStackPointer64(), -3 * size, PreIndex)); + str(src0, MemOperand(GetStackPointer64(), 2 * size)); + break; + case 4: + // Skip over 4 * size, then fill in the gap. This allows four W registers + // to be pushed using sp, whilst maintaining 16-byte alignment for sp at + // all times. + stp(src3, src2, MemOperand(GetStackPointer64(), -4 * size, PreIndex)); + stp(src1, src0, MemOperand(GetStackPointer64(), 2 * size)); + break; + default: + VIXL_UNREACHABLE(); + } +} + + +void MacroAssembler::PopHelper(int count, int size, + const CPURegister& dst0, + const CPURegister& dst1, + const CPURegister& dst2, + const CPURegister& dst3) { + // Ensure that we don't unintentionally modify scratch or debug registers. + // Worst case for size is 2 ldp. + InstructionAccurateScope scope(this, 2, + InstructionAccurateScope::kMaximumSize); + + VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3)); + VIXL_ASSERT(size == dst0.SizeInBytes()); + + // When popping multiple registers, the load order is chosen such that + // Pop(a, b) is equivalent to Pop(a) followed by Pop(b). + switch (count) { + case 1: + VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone()); + ldr(dst0, MemOperand(GetStackPointer64(), 1 * size, PostIndex)); + break; + case 2: + VIXL_ASSERT(dst2.IsNone() && dst3.IsNone()); + ldp(dst0, dst1, MemOperand(GetStackPointer64(), 2 * size, PostIndex)); + break; + case 3: + VIXL_ASSERT(dst3.IsNone()); + ldr(dst2, MemOperand(GetStackPointer64(), 2 * size)); + ldp(dst0, dst1, MemOperand(GetStackPointer64(), 3 * size, PostIndex)); + break; + case 4: + // Load the higher addresses first, then load the lower addresses and skip + // the whole block in the second instruction. This allows four W registers + // to be popped using sp, whilst maintaining 16-byte alignment for sp at + // all times. + ldp(dst2, dst3, MemOperand(GetStackPointer64(), 2 * size)); + ldp(dst0, dst1, MemOperand(GetStackPointer64(), 4 * size, PostIndex)); + break; + default: + VIXL_UNREACHABLE(); + } +} + + +void MacroAssembler::PrepareForPush(int count, int size) { + if (sp.Is(GetStackPointer64())) { + // If the current stack pointer is sp, then it must be aligned to 16 bytes + // on entry and the total size of the specified registers must also be a + // multiple of 16 bytes. + VIXL_ASSERT((count * size) % 16 == 0); + } else { + // Even if the current stack pointer is not the system stack pointer (sp), + // the system stack pointer will still be modified in order to comply with + // ABI rules about accessing memory below the system stack pointer. + BumpSystemStackPointer(count * size); + } +} + + +void MacroAssembler::PrepareForPop(int count, int size) { + USE(count, size); + if (sp.Is(GetStackPointer64())) { + // If the current stack pointer is sp, then it must be aligned to 16 bytes + // on entry and the total size of the specified registers must also be a + // multiple of 16 bytes. + VIXL_ASSERT((count * size) % 16 == 0); + } +} + +void MacroAssembler::Poke(const Register& src, const Operand& offset) { + if (offset.IsImmediate()) { + VIXL_ASSERT(offset.immediate() >= 0); + } + + Str(src, MemOperand(GetStackPointer64(), offset)); +} + + +void MacroAssembler::Peek(const Register& dst, const Operand& offset) { + if (offset.IsImmediate()) { + VIXL_ASSERT(offset.immediate() >= 0); + } + + Ldr(dst, MemOperand(GetStackPointer64(), offset)); +} + + +void MacroAssembler::Claim(const Operand& size) { + + if (size.IsZero()) { + return; + } + + if (size.IsImmediate()) { + VIXL_ASSERT(size.immediate() > 0); + if (sp.Is(GetStackPointer64())) { + VIXL_ASSERT((size.immediate() % 16) == 0); + } + } + + Sub(GetStackPointer64(), GetStackPointer64(), size); + + // Make sure the real stack pointer reflects the claimed stack space. + // We can't use stack memory below the stack pointer, it could be clobbered by + // interupts and signal handlers. + if (!sp.Is(GetStackPointer64())) { + Mov(sp, GetStackPointer64()); + } +} + + +void MacroAssembler::Drop(const Operand& size) { + + if (size.IsZero()) { + return; + } + + if (size.IsImmediate()) { + VIXL_ASSERT(size.immediate() > 0); + if (sp.Is(GetStackPointer64())) { + VIXL_ASSERT((size.immediate() % 16) == 0); + } + } + + Add(GetStackPointer64(), GetStackPointer64(), size); +} + + +void MacroAssembler::PushCalleeSavedRegisters() { + // Ensure that the macro-assembler doesn't use any scratch registers. + // 10 stp will be emitted. + // TODO(all): Should we use GetCalleeSaved and SavedFP. + InstructionAccurateScope scope(this, 10); + + // This method must not be called unless the current stack pointer is sp. + VIXL_ASSERT(sp.Is(GetStackPointer64())); + + MemOperand tos(sp, -2 * static_cast<int>(kXRegSizeInBytes), PreIndex); + + stp(x29, x30, tos); + stp(x27, x28, tos); + stp(x25, x26, tos); + stp(x23, x24, tos); + stp(x21, x22, tos); + stp(x19, x20, tos); + + stp(d14, d15, tos); + stp(d12, d13, tos); + stp(d10, d11, tos); + stp(d8, d9, tos); +} + + +void MacroAssembler::PopCalleeSavedRegisters() { + // Ensure that the macro-assembler doesn't use any scratch registers. + // 10 ldp will be emitted. + // TODO(all): Should we use GetCalleeSaved and SavedFP. + InstructionAccurateScope scope(this, 10); + + // This method must not be called unless the current stack pointer is sp. + VIXL_ASSERT(sp.Is(GetStackPointer64())); + + MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex); + + ldp(d8, d9, tos); + ldp(d10, d11, tos); + ldp(d12, d13, tos); + ldp(d14, d15, tos); + + ldp(x19, x20, tos); + ldp(x21, x22, tos); + ldp(x23, x24, tos); + ldp(x25, x26, tos); + ldp(x27, x28, tos); + ldp(x29, x30, tos); +} + +void MacroAssembler::LoadCPURegList(CPURegList registers, + const MemOperand& src) { + LoadStoreCPURegListHelper(kLoad, registers, src); +} + +void MacroAssembler::StoreCPURegList(CPURegList registers, + const MemOperand& dst) { + LoadStoreCPURegListHelper(kStore, registers, dst); +} + + +void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op, + CPURegList registers, + const MemOperand& mem) { + // We do not handle pre-indexing or post-indexing. + VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex())); + VIXL_ASSERT(!registers.Overlaps(tmp_list_)); + VIXL_ASSERT(!registers.Overlaps(fptmp_list_)); + VIXL_ASSERT(!registers.IncludesAliasOf(sp)); + + UseScratchRegisterScope temps(this); + + MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers, + mem, + &temps); + + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + if (op == kStore) { + Stp(dst0, dst1, loc); + } else { + VIXL_ASSERT(op == kLoad); + Ldp(dst0, dst1, loc); + } + loc.AddOffset(2 * registers.RegisterSizeInBytes()); + } + if (!registers.IsEmpty()) { + if (op == kStore) { + Str(registers.PopLowestIndex(), loc); + } else { + VIXL_ASSERT(op == kLoad); + Ldr(registers.PopLowestIndex(), loc); + } + } +} + +MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList( + const CPURegList& registers, + const MemOperand& mem, + UseScratchRegisterScope* scratch_scope) { + // If necessary, pre-compute the base address for the accesses. + if (mem.IsRegisterOffset()) { + Register reg_base = scratch_scope->AcquireX(); + ComputeAddress(reg_base, mem); + return MemOperand(reg_base); + + } else if (mem.IsImmediateOffset()) { + int reg_size = registers.RegisterSizeInBytes(); + int total_size = registers.TotalSizeInBytes(); + int64_t min_offset = mem.offset(); + int64_t max_offset = mem.offset() + std::max(0, total_size - 2 * reg_size); + if ((registers.Count() >= 2) && + (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) || + !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) { + Register reg_base = scratch_scope->AcquireX(); + ComputeAddress(reg_base, mem); + return MemOperand(reg_base); + } + } + + return mem; +} + +void MacroAssembler::BumpSystemStackPointer(const Operand& space) { + VIXL_ASSERT(!sp.Is(GetStackPointer64())); + // TODO: Several callers rely on this not using scratch registers, so we use + // the assembler directly here. However, this means that large immediate + // values of 'space' cannot be handled. + InstructionAccurateScope scope(this, 1); + sub(sp, GetStackPointer64(), space); +} + + +void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) { + +#ifdef JS_SIMULATOR_ARM64 + // The arguments to the trace pseudo instruction need to be contiguous in + // memory, so make sure we don't try to emit a literal pool. + InstructionAccurateScope scope(this, kTraceLength / kInstructionSize); + + Label start; + bind(&start); + + // Refer to simulator-a64.h for a description of the marker and its + // arguments. + hlt(kTraceOpcode); + + // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceParamsOffset); + dc32(parameters); + + // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kTraceCommandOffset); + dc32(command); +#else + // Emit nothing on real hardware. + USE(parameters, command); +#endif +} + + +void MacroAssembler::Log(TraceParameters parameters) { + +#ifdef JS_SIMULATOR_ARM64 + // The arguments to the log pseudo instruction need to be contiguous in + // memory, so make sure we don't try to emit a literal pool. + InstructionAccurateScope scope(this, kLogLength / kInstructionSize); + + Label start; + bind(&start); + + // Refer to simulator-a64.h for a description of the marker and its + // arguments. + hlt(kLogOpcode); + + // VIXL_ASSERT(SizeOfCodeGeneratedSince(&start) == kLogParamsOffset); + dc32(parameters); +#else + // Emit nothing on real hardware. + USE(parameters); +#endif +} + + +void MacroAssembler::EnableInstrumentation() { + VIXL_ASSERT(!isprint(InstrumentStateEnable)); + InstructionAccurateScope scope(this, 1); + movn(xzr, InstrumentStateEnable); +} + + +void MacroAssembler::DisableInstrumentation() { + VIXL_ASSERT(!isprint(InstrumentStateDisable)); + InstructionAccurateScope scope(this, 1); + movn(xzr, InstrumentStateDisable); +} + + +void MacroAssembler::AnnotateInstrumentation(const char* marker_name) { + VIXL_ASSERT(strlen(marker_name) == 2); + + // We allow only printable characters in the marker names. Unprintable + // characters are reserved for controlling features of the instrumentation. + VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1])); + + InstructionAccurateScope scope(this, 1); + movn(xzr, (marker_name[1] << 8) | marker_name[0]); +} + + +void UseScratchRegisterScope::Open(MacroAssembler* masm) { + VIXL_ASSERT(!initialised_); + available_ = masm->TmpList(); + availablefp_ = masm->FPTmpList(); + old_available_ = available_->list(); + old_availablefp_ = availablefp_->list(); + VIXL_ASSERT(available_->type() == CPURegister::kRegister); + VIXL_ASSERT(availablefp_->type() == CPURegister::kVRegister); +#ifdef DEBUG + initialised_ = true; +#endif +} + + +void UseScratchRegisterScope::Close() { + if (available_) { + available_->set_list(old_available_); + available_ = NULL; + } + if (availablefp_) { + availablefp_->set_list(old_availablefp_); + availablefp_ = NULL; + } +#ifdef DEBUG + initialised_ = false; +#endif +} + + +UseScratchRegisterScope::UseScratchRegisterScope(MacroAssembler* masm) { +#ifdef DEBUG + initialised_ = false; +#endif + Open(masm); +} + +// This allows deferred (and optional) initialisation of the scope. +UseScratchRegisterScope::UseScratchRegisterScope() + : available_(NULL), availablefp_(NULL), + old_available_(0), old_availablefp_(0) { +#ifdef DEBUG + initialised_ = false; +#endif +} + +UseScratchRegisterScope::~UseScratchRegisterScope() { + Close(); +} + + +bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const { + return available_->IncludesAliasOf(reg) || availablefp_->IncludesAliasOf(reg); +} + + +Register UseScratchRegisterScope::AcquireSameSizeAs(const Register& reg) { + int code = AcquireNextAvailable(available_).code(); + return Register(code, reg.size()); +} + + +FPRegister UseScratchRegisterScope::AcquireSameSizeAs(const FPRegister& reg) { + int code = AcquireNextAvailable(availablefp_).code(); + return FPRegister(code, reg.size()); +} + + +void UseScratchRegisterScope::Release(const CPURegister& reg) { + VIXL_ASSERT(initialised_); + if (reg.IsRegister()) { + ReleaseByCode(available_, reg.code()); + } else if (reg.IsFPRegister()) { + ReleaseByCode(availablefp_, reg.code()); + } else { + VIXL_ASSERT(reg.IsNone()); + } +} + + +void UseScratchRegisterScope::Include(const CPURegList& list) { + VIXL_ASSERT(initialised_); + if (list.type() == CPURegister::kRegister) { + // Make sure that neither sp nor xzr are included the list. + IncludeByRegList(available_, list.list() & ~(xzr.Bit() | sp.Bit())); + } else { + VIXL_ASSERT(list.type() == CPURegister::kVRegister); + IncludeByRegList(availablefp_, list.list()); + } +} + + +void UseScratchRegisterScope::Include(const Register& reg1, + const Register& reg2, + const Register& reg3, + const Register& reg4) { + VIXL_ASSERT(initialised_); + RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit(); + // Make sure that neither sp nor xzr are included the list. + include &= ~(xzr.Bit() | sp.Bit()); + + IncludeByRegList(available_, include); +} + + +void UseScratchRegisterScope::Include(const FPRegister& reg1, + const FPRegister& reg2, + const FPRegister& reg3, + const FPRegister& reg4) { + RegList include = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit(); + IncludeByRegList(availablefp_, include); +} + + +void UseScratchRegisterScope::Exclude(const CPURegList& list) { + if (list.type() == CPURegister::kRegister) { + ExcludeByRegList(available_, list.list()); + } else { + VIXL_ASSERT(list.type() == CPURegister::kVRegister); + ExcludeByRegList(availablefp_, list.list()); + } +} + + +void UseScratchRegisterScope::Exclude(const Register& reg1, + const Register& reg2, + const Register& reg3, + const Register& reg4) { + RegList exclude = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit(); + ExcludeByRegList(available_, exclude); +} + + +void UseScratchRegisterScope::Exclude(const FPRegister& reg1, + const FPRegister& reg2, + const FPRegister& reg3, + const FPRegister& reg4) { + RegList excludefp = reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit(); + ExcludeByRegList(availablefp_, excludefp); +} + + +void UseScratchRegisterScope::Exclude(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + RegList exclude = 0; + RegList excludefp = 0; + + const CPURegister regs[] = {reg1, reg2, reg3, reg4}; + + for (unsigned i = 0; i < (sizeof(regs) / sizeof(regs[0])); i++) { + if (regs[i].IsRegister()) { + exclude |= regs[i].Bit(); + } else if (regs[i].IsFPRegister()) { + excludefp |= regs[i].Bit(); + } else { + VIXL_ASSERT(regs[i].IsNone()); + } + } + + ExcludeByRegList(available_, exclude); + ExcludeByRegList(availablefp_, excludefp); +} + + +void UseScratchRegisterScope::ExcludeAll() { + ExcludeByRegList(available_, available_->list()); + ExcludeByRegList(availablefp_, availablefp_->list()); +} + + +CPURegister UseScratchRegisterScope::AcquireNextAvailable( + CPURegList* available) { + VIXL_CHECK(!available->IsEmpty()); + CPURegister result = available->PopLowestIndex(); + VIXL_ASSERT(!AreAliased(result, xzr, sp)); + return result; +} + + +void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) { + ReleaseByRegList(available, static_cast<RegList>(1) << code); +} + + +void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available, + RegList regs) { + available->set_list(available->list() | regs); +} + + +void UseScratchRegisterScope::IncludeByRegList(CPURegList* available, + RegList regs) { + available->set_list(available->list() | regs); +} + + +void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available, + RegList exclude) { + available->set_list(available->list() & ~exclude); +} + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/MacroAssembler-vixl.h b/js/src/jit/arm64/vixl/MacroAssembler-vixl.h new file mode 100644 index 0000000000..3c403a815f --- /dev/null +++ b/js/src/jit/arm64/vixl/MacroAssembler-vixl.h @@ -0,0 +1,2622 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_MACRO_ASSEMBLER_A64_H_ +#define VIXL_A64_MACRO_ASSEMBLER_A64_H_ + +#include <algorithm> +#include <limits> + +#include "jit/arm64/Assembler-arm64.h" +#include "jit/arm64/vixl/Debugger-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Instrument-vixl.h" +#include "jit/arm64/vixl/Simulator-Constants-vixl.h" + +#define LS_MACRO_LIST(V) \ + V(Ldrb, Register&, rt, LDRB_w) \ + V(Strb, Register&, rt, STRB_w) \ + V(Ldrsb, Register&, rt, rt.Is64Bits() ? LDRSB_x : LDRSB_w) \ + V(Ldrh, Register&, rt, LDRH_w) \ + V(Strh, Register&, rt, STRH_w) \ + V(Ldrsh, Register&, rt, rt.Is64Bits() ? LDRSH_x : LDRSH_w) \ + V(Ldr, CPURegister&, rt, LoadOpFor(rt)) \ + V(Str, CPURegister&, rt, StoreOpFor(rt)) \ + V(Ldrsw, Register&, rt, LDRSW_x) + + +#define LSPAIR_MACRO_LIST(V) \ + V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2)) \ + V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \ + V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x) + +namespace vixl { + +// Forward declaration +class MacroAssembler; +class UseScratchRegisterScope; + +// This scope has the following purposes: +// * Acquire/Release the underlying assembler's code buffer. +// * This is mandatory before emitting. +// * Emit the literal or veneer pools if necessary before emitting the +// macro-instruction. +// * Ensure there is enough space to emit the macro-instruction. +class EmissionCheckScope { + public: + EmissionCheckScope(MacroAssembler* masm, size_t size) + : masm_(masm) + { } + + protected: + MacroAssembler* masm_; +#ifdef DEBUG + Label start_; + size_t size_; +#endif +}; + + +// Helper for common Emission checks. +// The macro-instruction maps to a single instruction. +class SingleEmissionCheckScope : public EmissionCheckScope { + public: + explicit SingleEmissionCheckScope(MacroAssembler* masm) + : EmissionCheckScope(masm, kInstructionSize) {} +}; + + +// The macro instruction is a "typical" macro-instruction. Typical macro- +// instruction only emit a few instructions, a few being defined as 8 here. +class MacroEmissionCheckScope : public EmissionCheckScope { + public: + explicit MacroEmissionCheckScope(MacroAssembler* masm) + : EmissionCheckScope(masm, kTypicalMacroInstructionMaxSize) {} + + private: + static const size_t kTypicalMacroInstructionMaxSize = 8 * kInstructionSize; +}; + + +enum BranchType { + // Copies of architectural conditions. + // The associated conditions can be used in place of those, the code will + // take care of reinterpreting them with the correct type. + integer_eq = eq, + integer_ne = ne, + integer_hs = hs, + integer_lo = lo, + integer_mi = mi, + integer_pl = pl, + integer_vs = vs, + integer_vc = vc, + integer_hi = hi, + integer_ls = ls, + integer_ge = ge, + integer_lt = lt, + integer_gt = gt, + integer_le = le, + integer_al = al, + integer_nv = nv, + + // These two are *different* from the architectural codes al and nv. + // 'always' is used to generate unconditional branches. + // 'never' is used to not generate a branch (generally as the inverse + // branch type of 'always). + always, never, + // cbz and cbnz + reg_zero, reg_not_zero, + // tbz and tbnz + reg_bit_clear, reg_bit_set, + + // Aliases. + kBranchTypeFirstCondition = eq, + kBranchTypeLastCondition = nv, + kBranchTypeFirstUsingReg = reg_zero, + kBranchTypeFirstUsingBit = reg_bit_clear +}; + + +enum DiscardMoveMode { kDontDiscardForSameWReg, kDiscardForSameWReg }; + +// The macro assembler supports moving automatically pre-shifted immediates for +// arithmetic and logical instructions, and then applying a post shift in the +// instruction to undo the modification, in order to reduce the code emitted for +// an operation. For example: +// +// Add(x0, x0, 0x1f7de) => movz x16, 0xfbef; add x0, x0, x16, lsl #1. +// +// This optimisation can be only partially applied when the stack pointer is an +// operand or destination, so this enumeration is used to control the shift. +enum PreShiftImmMode { + kNoShift, // Don't pre-shift. + kLimitShiftForSP, // Limit pre-shift for add/sub extend use. + kAnyShift // Allow any pre-shift. +}; + + +class MacroAssembler : public js::jit::Assembler { + public: + MacroAssembler(); + + // Finalize a code buffer of generated instructions. This function must be + // called before executing or copying code from the buffer. + void FinalizeCode(); + + + // Constant generation helpers. + // These functions return the number of instructions required to move the + // immediate into the destination register. Also, if the masm pointer is + // non-null, it generates the code to do so. + // The two features are implemented using one function to avoid duplication of + // the logic. + // The function can be used to evaluate the cost of synthesizing an + // instruction using 'mov immediate' instructions. A user might prefer loading + // a constant using the literal pool instead of using multiple 'mov immediate' + // instructions. + static int MoveImmediateHelper(MacroAssembler* masm, + const Register &rd, + uint64_t imm); + static bool OneInstrMoveImmediateHelper(MacroAssembler* masm, + const Register& dst, + int64_t imm); + + + // Logical macros. + void And(const Register& rd, + const Register& rn, + const Operand& operand); + void Ands(const Register& rd, + const Register& rn, + const Operand& operand); + void Bic(const Register& rd, + const Register& rn, + const Operand& operand); + void Bics(const Register& rd, + const Register& rn, + const Operand& operand); + void Orr(const Register& rd, + const Register& rn, + const Operand& operand); + void Orn(const Register& rd, + const Register& rn, + const Operand& operand); + void Eor(const Register& rd, + const Register& rn, + const Operand& operand); + void Eon(const Register& rd, + const Register& rn, + const Operand& operand); + void Tst(const Register& rn, const Operand& operand); + void LogicalMacro(const Register& rd, + const Register& rn, + const Operand& operand, + LogicalOp op); + + // Add and sub macros. + void Add(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S = LeaveFlags); + void Adds(const Register& rd, + const Register& rn, + const Operand& operand); + void Sub(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S = LeaveFlags); + void Subs(const Register& rd, + const Register& rn, + const Operand& operand); + void Cmn(const Register& rn, const Operand& operand); + void Cmp(const Register& rn, const Operand& operand); + void Neg(const Register& rd, + const Operand& operand); + void Negs(const Register& rd, + const Operand& operand); + + void AddSubMacro(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubOp op); + + // Add/sub with carry macros. + void Adc(const Register& rd, + const Register& rn, + const Operand& operand); + void Adcs(const Register& rd, + const Register& rn, + const Operand& operand); + void Sbc(const Register& rd, + const Register& rn, + const Operand& operand); + void Sbcs(const Register& rd, + const Register& rn, + const Operand& operand); + void Ngc(const Register& rd, + const Operand& operand); + void Ngcs(const Register& rd, + const Operand& operand); + void AddSubWithCarryMacro(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubWithCarryOp op); + + // Move macros. + void Mov(const Register& rd, uint64_t imm); + void Mov(const Register& rd, + const Operand& operand, + DiscardMoveMode discard_mode = kDontDiscardForSameWReg); + void Mvn(const Register& rd, uint64_t imm) { + Mov(rd, (rd.size() == kXRegSize) ? ~imm : (~imm & kWRegMask)); + } + void Mvn(const Register& rd, const Operand& operand); + + // Try to move an immediate into the destination register in a single + // instruction. Returns true for success, and updates the contents of dst. + // Returns false, otherwise. + bool TryOneInstrMoveImmediate(const Register& dst, int64_t imm); + + // Move an immediate into register dst, and return an Operand object for + // use with a subsequent instruction that accepts a shift. The value moved + // into dst is not necessarily equal to imm; it may have had a shifting + // operation applied to it that will be subsequently undone by the shift + // applied in the Operand. + Operand MoveImmediateForShiftedOp(const Register& dst, + int64_t imm, + PreShiftImmMode mode); + + // Synthesises the address represented by a MemOperand into a register. + void ComputeAddress(const Register& dst, const MemOperand& mem_op); + + // Conditional macros. + void Ccmp(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond); + void Ccmn(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond); + void ConditionalCompareMacro(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond, + ConditionalCompareOp op); + void Csel(const Register& rd, + const Register& rn, + const Operand& operand, + Condition cond); + + // Load/store macros. +#define DECLARE_FUNCTION(FN, REGTYPE, REG, OP) \ + void FN(const REGTYPE REG, const MemOperand& addr); + LS_MACRO_LIST(DECLARE_FUNCTION) +#undef DECLARE_FUNCTION + + void LoadStoreMacro(const CPURegister& rt, + const MemOperand& addr, + LoadStoreOp op); + +#define DECLARE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \ + void FN(const REGTYPE REG, const REGTYPE REG2, const MemOperand& addr); + LSPAIR_MACRO_LIST(DECLARE_FUNCTION) +#undef DECLARE_FUNCTION + + void LoadStorePairMacro(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op); + + void Prfm(PrefetchOperation op, const MemOperand& addr); + + // Push or pop up to 4 registers of the same width to or from the stack, + // using the current stack pointer as set by SetStackPointer. + // + // If an argument register is 'NoReg', all further arguments are also assumed + // to be 'NoReg', and are thus not pushed or popped. + // + // Arguments are ordered such that "Push(a, b);" is functionally equivalent + // to "Push(a); Push(b);". + // + // It is valid to push the same register more than once, and there is no + // restriction on the order in which registers are specified. + // + // It is not valid to pop into the same register more than once in one + // operation, not even into the zero register. + // + // If the current stack pointer (as set by SetStackPointer) is sp, then it + // must be aligned to 16 bytes on entry and the total size of the specified + // registers must also be a multiple of 16 bytes. + // + // Even if the current stack pointer is not the system stack pointer (sp), + // Push (and derived methods) will still modify the system stack pointer in + // order to comply with ABI rules about accessing memory below the system + // stack pointer. + // + // Other than the registers passed into Pop, the stack pointer and (possibly) + // the system stack pointer, these methods do not modify any other registers. + void Push(const CPURegister& src0, const CPURegister& src1 = NoReg, + const CPURegister& src2 = NoReg, const CPURegister& src3 = NoReg); + void Pop(const CPURegister& dst0, const CPURegister& dst1 = NoReg, + const CPURegister& dst2 = NoReg, const CPURegister& dst3 = NoReg); + void PushStackPointer(); + + // Alternative forms of Push and Pop, taking a RegList or CPURegList that + // specifies the registers that are to be pushed or popped. Higher-numbered + // registers are associated with higher memory addresses (as in the A32 push + // and pop instructions). + // + // (Push|Pop)SizeRegList allow you to specify the register size as a + // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are + // supported. + // + // Otherwise, (Push|Pop)(CPU|X|W|D|S)RegList is preferred. + void PushCPURegList(CPURegList registers); + void PopCPURegList(CPURegList registers); + + void PushSizeRegList(RegList registers, unsigned reg_size, + CPURegister::RegisterType type = CPURegister::kRegister) { + PushCPURegList(CPURegList(type, reg_size, registers)); + } + void PopSizeRegList(RegList registers, unsigned reg_size, + CPURegister::RegisterType type = CPURegister::kRegister) { + PopCPURegList(CPURegList(type, reg_size, registers)); + } + void PushXRegList(RegList regs) { + PushSizeRegList(regs, kXRegSize); + } + void PopXRegList(RegList regs) { + PopSizeRegList(regs, kXRegSize); + } + void PushWRegList(RegList regs) { + PushSizeRegList(regs, kWRegSize); + } + void PopWRegList(RegList regs) { + PopSizeRegList(regs, kWRegSize); + } + void PushDRegList(RegList regs) { + PushSizeRegList(regs, kDRegSize, CPURegister::kVRegister); + } + void PopDRegList(RegList regs) { + PopSizeRegList(regs, kDRegSize, CPURegister::kVRegister); + } + void PushSRegList(RegList regs) { + PushSizeRegList(regs, kSRegSize, CPURegister::kVRegister); + } + void PopSRegList(RegList regs) { + PopSizeRegList(regs, kSRegSize, CPURegister::kVRegister); + } + + // Push the specified register 'count' times. + void PushMultipleTimes(int count, Register src); + + // Poke 'src' onto the stack. The offset is in bytes. + // + // If the current stack pointer (as set by SetStackPointer) is sp, then sp + // must be aligned to 16 bytes. + void Poke(const Register& src, const Operand& offset); + + // Peek at a value on the stack, and put it in 'dst'. The offset is in bytes. + // + // If the current stack pointer (as set by SetStackPointer) is sp, then sp + // must be aligned to 16 bytes. + void Peek(const Register& dst, const Operand& offset); + + // Alternative forms of Peek and Poke, taking a RegList or CPURegList that + // specifies the registers that are to be pushed or popped. Higher-numbered + // registers are associated with higher memory addresses. + // + // (Peek|Poke)SizeRegList allow you to specify the register size as a + // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are + // supported. + // + // Otherwise, (Peek|Poke)(CPU|X|W|D|S)RegList is preferred. + void PeekCPURegList(CPURegList registers, int64_t offset) { + LoadCPURegList(registers, MemOperand(StackPointer(), offset)); + } + void PokeCPURegList(CPURegList registers, int64_t offset) { + StoreCPURegList(registers, MemOperand(StackPointer(), offset)); + } + + void PeekSizeRegList(RegList registers, int64_t offset, unsigned reg_size, + CPURegister::RegisterType type = CPURegister::kRegister) { + PeekCPURegList(CPURegList(type, reg_size, registers), offset); + } + void PokeSizeRegList(RegList registers, int64_t offset, unsigned reg_size, + CPURegister::RegisterType type = CPURegister::kRegister) { + PokeCPURegList(CPURegList(type, reg_size, registers), offset); + } + void PeekXRegList(RegList regs, int64_t offset) { + PeekSizeRegList(regs, offset, kXRegSize); + } + void PokeXRegList(RegList regs, int64_t offset) { + PokeSizeRegList(regs, offset, kXRegSize); + } + void PeekWRegList(RegList regs, int64_t offset) { + PeekSizeRegList(regs, offset, kWRegSize); + } + void PokeWRegList(RegList regs, int64_t offset) { + PokeSizeRegList(regs, offset, kWRegSize); + } + void PeekDRegList(RegList regs, int64_t offset) { + PeekSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister); + } + void PokeDRegList(RegList regs, int64_t offset) { + PokeSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister); + } + void PeekSRegList(RegList regs, int64_t offset) { + PeekSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister); + } + void PokeSRegList(RegList regs, int64_t offset) { + PokeSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister); + } + + + // Claim or drop stack space without actually accessing memory. + // + // If the current stack pointer (as set by SetStackPointer) is sp, then it + // must be aligned to 16 bytes and the size claimed or dropped must be a + // multiple of 16 bytes. + void Claim(const Operand& size); + void Drop(const Operand& size); + + // Preserve the callee-saved registers (as defined by AAPCS64). + // + // Higher-numbered registers are pushed before lower-numbered registers, and + // thus get higher addresses. + // Floating-point registers are pushed before general-purpose registers, and + // thus get higher addresses. + // + // This method must not be called unless StackPointer() is sp, and it is + // aligned to 16 bytes. + void PushCalleeSavedRegisters(); + + // Restore the callee-saved registers (as defined by AAPCS64). + // + // Higher-numbered registers are popped after lower-numbered registers, and + // thus come from higher addresses. + // Floating-point registers are popped after general-purpose registers, and + // thus come from higher addresses. + // + // This method must not be called unless StackPointer() is sp, and it is + // aligned to 16 bytes. + void PopCalleeSavedRegisters(); + + void LoadCPURegList(CPURegList registers, const MemOperand& src); + void StoreCPURegList(CPURegList registers, const MemOperand& dst); + + // Remaining instructions are simple pass-through calls to the assembler. + void Adr(const Register& rd, Label* label) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + adr(rd, label); + } + void Adrp(const Register& rd, Label* label) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + adrp(rd, label); + } + void Asr(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + asr(rd, rn, shift); + } + void Asr(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + asrv(rd, rn, rm); + } + + // Branch type inversion relies on these relations. + VIXL_STATIC_ASSERT((reg_zero == (reg_not_zero ^ 1)) && + (reg_bit_clear == (reg_bit_set ^ 1)) && + (always == (never ^ 1))); + + BranchType InvertBranchType(BranchType type) { + if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) { + return static_cast<BranchType>( + InvertCondition(static_cast<Condition>(type))); + } else { + return static_cast<BranchType>(type ^ 1); + } + } + + void B(Label* label, BranchType type, Register reg = NoReg, int bit = -1); + + void B(Label* label); + void B(Label* label, Condition cond); + void B(Condition cond, Label* label) { + B(label, cond); + } + void Bfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + bfm(rd, rn, immr, imms); + } + void Bfi(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + bfi(rd, rn, lsb, width); + } + void Bfxil(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + bfxil(rd, rn, lsb, width); + } + void Bind(Label* label); + // Bind a label to a specified offset from the start of the buffer. + void BindToOffset(Label* label, ptrdiff_t offset); + void Bl(Label* label) { + SingleEmissionCheckScope guard(this); + bl(label); + } + void Blr(const Register& xn) { + VIXL_ASSERT(!xn.IsZero()); + SingleEmissionCheckScope guard(this); + blr(xn); + } + void Br(const Register& xn) { + VIXL_ASSERT(!xn.IsZero()); + SingleEmissionCheckScope guard(this); + br(xn); + } + void Brk(int code = 0) { + SingleEmissionCheckScope guard(this); + brk(code); + } + void Cbnz(const Register& rt, Label* label); + void Cbz(const Register& rt, Label* label); + void Cinc(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + cinc(rd, rn, cond); + } + void Cinv(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + cinv(rd, rn, cond); + } + void Clrex() { + SingleEmissionCheckScope guard(this); + clrex(); + } + void Cls(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + cls(rd, rn); + } + void Clz(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + clz(rd, rn); + } + void Cneg(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + cneg(rd, rn, cond); + } + void Cset(const Register& rd, Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + cset(rd, cond); + } + void Csetm(const Register& rd, Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + csetm(rd, cond); + } + void Csinc(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + // The VIXL source code contains these assertions, but the AArch64 ISR + // explicitly permits the use of zero registers. CSET itself is defined + // in terms of CSINC with WZR/XZR. + // + // VIXL_ASSERT(!rn.IsZero()); + // VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + csinc(rd, rn, rm, cond); + } + void Csinv(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + csinv(rd, rn, rm, cond); + } + void Csneg(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + csneg(rd, rn, rm, cond); + } + void Dmb(BarrierDomain domain, BarrierType type) { + SingleEmissionCheckScope guard(this); + dmb(domain, type); + } + void Dsb(BarrierDomain domain, BarrierType type) { + SingleEmissionCheckScope guard(this); + dsb(domain, type); + } + void Extr(const Register& rd, + const Register& rn, + const Register& rm, + unsigned lsb) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + extr(rd, rn, rm, lsb); + } + void Fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fadd(vd, vn, vm); + } + void Fccmp(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond, + FPTrapFlags trap = DisableTrap) { + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + FPCCompareMacro(vn, vm, nzcv, cond, trap); + } + void Fccmpe(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond) { + Fccmp(vn, vm, nzcv, cond, EnableTrap); + } + void Fcmp(const VRegister& vn, const VRegister& vm, + FPTrapFlags trap = DisableTrap) { + SingleEmissionCheckScope guard(this); + FPCompareMacro(vn, vm, trap); + } + void Fcmp(const VRegister& vn, double value, + FPTrapFlags trap = DisableTrap); + void Fcmpe(const VRegister& vn, double value); + void Fcmpe(const VRegister& vn, const VRegister& vm) { + Fcmp(vn, vm, EnableTrap); + } + void Fcsel(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + fcsel(vd, vn, vm, cond); + } + void Fcvt(const VRegister& vd, const VRegister& vn) { + SingleEmissionCheckScope guard(this); + fcvt(vd, vn); + } + void Fcvtl(const VRegister& vd, const VRegister& vn) { + SingleEmissionCheckScope guard(this); + fcvtl(vd, vn); + } + void Fcvtl2(const VRegister& vd, const VRegister& vn) { + SingleEmissionCheckScope guard(this); + fcvtl2(vd, vn); + } + void Fcvtn(const VRegister& vd, const VRegister& vn) { + SingleEmissionCheckScope guard(this); + fcvtn(vd, vn); + } + void Fcvtn2(const VRegister& vd, const VRegister& vn) { + SingleEmissionCheckScope guard(this); + fcvtn2(vd, vn); + } + void Fcvtxn(const VRegister& vd, const VRegister& vn) { + SingleEmissionCheckScope guard(this); + fcvtxn(vd, vn); + } + void Fcvtxn2(const VRegister& vd, const VRegister& vn) { + SingleEmissionCheckScope guard(this); + fcvtxn2(vd, vn); + } + void Fcvtas(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtas(rd, vn); + } + void Fcvtau(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtau(rd, vn); + } + void Fcvtms(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtms(rd, vn); + } + void Fcvtmu(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtmu(rd, vn); + } + void Fcvtns(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtns(rd, vn); + } + void Fcvtnu(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtnu(rd, vn); + } + void Fcvtps(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtps(rd, vn); + } + void Fcvtpu(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtpu(rd, vn); + } + void Fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtzs(rd, vn, fbits); + } + void Fjcvtzs(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fjcvtzs(rd, vn); + } + void Fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtzu(rd, vn, fbits); + } + void Fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fdiv(vd, vn, vm); + } + void Fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fmax(vd, vn, vm); + } + void Fmaxnm(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fmaxnm(vd, vn, vm); + } + void Fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fmin(vd, vn, vm); + } + void Fminnm(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fminnm(vd, vn, vm); + } + void Fmov(VRegister vd, VRegister vn) { + SingleEmissionCheckScope guard(this); + // Only emit an instruction if vd and vn are different, and they are both D + // registers. fmov(s0, s0) is not a no-op because it clears the top word of + // d0. Technically, fmov(d0, d0) is not a no-op either because it clears + // the top of q0, but VRegister does not currently support Q registers. + if (!vd.Is(vn) || !vd.Is64Bits()) { + fmov(vd, vn); + } + } + void Fmov(VRegister vd, Register rn) { + SingleEmissionCheckScope guard(this); + fmov(vd, rn); + } + void Fmov(const VRegister& vd, int index, const Register& rn) { + SingleEmissionCheckScope guard(this); + fmov(vd, index, rn); + } + void Fmov(const Register& rd, const VRegister& vn, int index) { + SingleEmissionCheckScope guard(this); + fmov(rd, vn, index); + } + + // Provide explicit double and float interfaces for FP immediate moves, rather + // than relying on implicit C++ casts. This allows signalling NaNs to be + // preserved when the immediate matches the format of vd. Most systems convert + // signalling NaNs to quiet NaNs when converting between float and double. + void Fmov(VRegister vd, double imm); + void Fmov(VRegister vd, float imm); + // Provide a template to allow other types to be converted automatically. + template<typename T> + void Fmov(VRegister vd, T imm) { + Fmov(vd, static_cast<double>(imm)); + } + void Fmov(Register rd, VRegister vn) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fmov(rd, vn); + } + void Fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fmul(vd, vn, vm); + } + void Fnmul(const VRegister& vd, const VRegister& vn, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fnmul(vd, vn, vm); + } + void Fmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + SingleEmissionCheckScope guard(this); + fmadd(vd, vn, vm, va); + } + void Fmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + SingleEmissionCheckScope guard(this); + fmsub(vd, vn, vm, va); + } + void Fnmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + SingleEmissionCheckScope guard(this); + fnmadd(vd, vn, vm, va); + } + void Fnmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + SingleEmissionCheckScope guard(this); + fnmsub(vd, vn, vm, va); + } + void Fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + SingleEmissionCheckScope guard(this); + fsub(vd, vn, vm); + } + void Hint(SystemHint code) { + SingleEmissionCheckScope guard(this); + hint(code); + } + void Hlt(int code) { + SingleEmissionCheckScope guard(this); + hlt(code); + } + void Isb() { + SingleEmissionCheckScope guard(this); + isb(); + } + void Ldar(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldar(rt, src); + } + void Ldarb(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldarb(rt, src); + } + void Ldarh(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldarh(rt, src); + } + void Ldaxp(const Register& rt, const Register& rt2, const MemOperand& src) { + VIXL_ASSERT(!rt.Aliases(rt2)); + SingleEmissionCheckScope guard(this); + ldaxp(rt, rt2, src); + } + void Ldaxr(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldaxr(rt, src); + } + void Ldaxrb(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldaxrb(rt, src); + } + void Ldaxrh(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldaxrh(rt, src); + } + +// clang-format off +#define COMPARE_AND_SWAP_SINGLE_MACRO_LIST(V) \ + V(cas, Cas) \ + V(casa, Casa) \ + V(casl, Casl) \ + V(casal, Casal) \ + V(casb, Casb) \ + V(casab, Casab) \ + V(caslb, Caslb) \ + V(casalb, Casalb) \ + V(cash, Cash) \ + V(casah, Casah) \ + V(caslh, Caslh) \ + V(casalh, Casalh) + // clang-format on + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, rt, src); \ + } + COMPARE_AND_SWAP_SINGLE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +// clang-format off +#define COMPARE_AND_SWAP_PAIR_MACRO_LIST(V) \ + V(casp, Casp) \ + V(caspa, Caspa) \ + V(caspl, Caspl) \ + V(caspal, Caspal) + // clang-format on + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const Register& rs, const Register& rs2, const Register& rt, \ + const Register& rt2, const MemOperand& src) { \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, rs2, rt, rt2, src); \ + } + COMPARE_AND_SWAP_PAIR_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +// These macros generate all the variations of the atomic memory operations, +// e.g. ldadd, ldadda, ldaddb, staddl, etc. + +// clang-format off +#define ATOMIC_MEMORY_SIMPLE_MACRO_LIST(V, DEF, MASM_PRE, ASM_PRE) \ + V(DEF, MASM_PRE##add, ASM_PRE##add) \ + V(DEF, MASM_PRE##clr, ASM_PRE##clr) \ + V(DEF, MASM_PRE##eor, ASM_PRE##eor) \ + V(DEF, MASM_PRE##set, ASM_PRE##set) \ + V(DEF, MASM_PRE##smax, ASM_PRE##smax) \ + V(DEF, MASM_PRE##smin, ASM_PRE##smin) \ + V(DEF, MASM_PRE##umax, ASM_PRE##umax) \ + V(DEF, MASM_PRE##umin, ASM_PRE##umin) + +#define ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \ + V(MASM, ASM) \ + V(MASM##l, ASM##l) \ + V(MASM##b, ASM##b) \ + V(MASM##lb, ASM##lb) \ + V(MASM##h, ASM##h) \ + V(MASM##lh, ASM##lh) + +#define ATOMIC_MEMORY_LOAD_MACRO_MODES(V, MASM, ASM) \ + ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \ + V(MASM##a, ASM##a) \ + V(MASM##al, ASM##al) \ + V(MASM##ab, ASM##ab) \ + V(MASM##alb, ASM##alb) \ + V(MASM##ah, ASM##ah) \ + V(MASM##alh, ASM##alh) + // clang-format on + +#define DEFINE_MACRO_LOAD_ASM_FUNC(MASM, ASM) \ + void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, rt, src); \ + } +#define DEFINE_MACRO_STORE_ASM_FUNC(MASM, ASM) \ + void MASM(const Register& rs, const MemOperand& src) { \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, src); \ + } + + ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_LOAD_MACRO_MODES, + DEFINE_MACRO_LOAD_ASM_FUNC, + Ld, + ld) + ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_STORE_MACRO_MODES, + DEFINE_MACRO_STORE_ASM_FUNC, + St, + st) + +#define DEFINE_MACRO_SWP_ASM_FUNC(MASM, ASM) \ + void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, rt, src); \ + } + + ATOMIC_MEMORY_LOAD_MACRO_MODES(DEFINE_MACRO_SWP_ASM_FUNC, Swp, swp) + +#undef DEFINE_MACRO_LOAD_ASM_FUNC +#undef DEFINE_MACRO_STORE_ASM_FUNC +#undef DEFINE_MACRO_SWP_ASM_FUNC + + void Ldnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldnp(rt, rt2, src); + } + // Provide both double and float interfaces for FP immediate loads, rather + // than relying on implicit C++ casts. This allows signalling NaNs to be + // preserved when the immediate matches the format of fd. Most systems convert + // signalling NaNs to quiet NaNs when converting between float and double. + void Ldr(const VRegister& vt, double imm) { + SingleEmissionCheckScope guard(this); + if (vt.Is64Bits()) { + ldr(vt, imm); + } else { + ldr(vt, static_cast<float>(imm)); + } + } + void Ldr(const VRegister& vt, float imm) { + SingleEmissionCheckScope guard(this); + if (vt.Is32Bits()) { + ldr(vt, imm); + } else { + ldr(vt, static_cast<double>(imm)); + } + } + /* + void Ldr(const VRegister& vt, uint64_t high64, uint64_t low64) { + VIXL_ASSERT(vt.IsQ()); + SingleEmissionCheckScope guard(this); + ldr(vt, new Literal<uint64_t>(high64, low64, + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool)); + } + */ + void Ldr(const Register& rt, uint64_t imm) { + VIXL_ASSERT(!rt.IsZero()); + SingleEmissionCheckScope guard(this); + ldr(rt, imm); + } + void Ldrsw(const Register& rt, uint32_t imm) { + VIXL_ASSERT(!rt.IsZero()); + SingleEmissionCheckScope guard(this); + ldrsw(rt, imm); + } + void Ldxp(const Register& rt, const Register& rt2, const MemOperand& src) { + VIXL_ASSERT(!rt.Aliases(rt2)); + SingleEmissionCheckScope guard(this); + ldxp(rt, rt2, src); + } + void Ldxr(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldxr(rt, src); + } + void Ldxrb(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldxrb(rt, src); + } + void Ldxrh(const Register& rt, const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ldxrh(rt, src); + } + void Lsl(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + lsl(rd, rn, shift); + } + void Lsl(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + lslv(rd, rn, rm); + } + void Lsr(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + lsr(rd, rn, shift); + } + void Lsr(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + lsrv(rd, rn, rm); + } + void Madd(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + madd(rd, rn, rm, ra); + } + void Mneg(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + mneg(rd, rn, rm); + } + void Mov(const Register& rd, const Register& rn) { + SingleEmissionCheckScope guard(this); + mov(rd, rn); + } + void Movk(const Register& rd, uint64_t imm, int shift = -1) { + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + movk(rd, imm, shift); + } + void Mrs(const Register& rt, SystemRegister sysreg) { + VIXL_ASSERT(!rt.IsZero()); + SingleEmissionCheckScope guard(this); + mrs(rt, sysreg); + } + void Msr(SystemRegister sysreg, const Register& rt) { + VIXL_ASSERT(!rt.IsZero()); + SingleEmissionCheckScope guard(this); + msr(sysreg, rt); + } + void Sys(int op1, int crn, int crm, int op2, const Register& rt = xzr) { + SingleEmissionCheckScope guard(this); + sys(op1, crn, crm, op2, rt); + } + void Dc(DataCacheOp op, const Register& rt) { + SingleEmissionCheckScope guard(this); + dc(op, rt); + } + void Ic(InstructionCacheOp op, const Register& rt) { + SingleEmissionCheckScope guard(this); + ic(op, rt); + } + void Msub(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + msub(rd, rn, rm, ra); + } + void Mul(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + mul(rd, rn, rm); + } + void Nop() { + SingleEmissionCheckScope guard(this); + nop(); + } + void Csdb() { + SingleEmissionCheckScope guard(this); + csdb(); + } + void Rbit(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rbit(rd, rn); + } + void Ret(const Register& xn = lr) { + VIXL_ASSERT(!xn.IsZero()); + SingleEmissionCheckScope guard(this); + ret(xn); + } + void Rev(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rev(rd, rn); + } + void Rev16(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rev16(rd, rn); + } + void Rev32(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rev32(rd, rn); + } + void Ror(const Register& rd, const Register& rs, unsigned shift) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rs.IsZero()); + SingleEmissionCheckScope guard(this); + ror(rd, rs, shift); + } + void Ror(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + rorv(rd, rn, rm); + } + void Sbfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sbfiz(rd, rn, lsb, width); + } + void Sbfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sbfm(rd, rn, immr, imms); + } + void Sbfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sbfx(rd, rn, lsb, width); + } + void Scvtf(const VRegister& vd, const Register& rn, int fbits = 0) { + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + scvtf(vd, rn, fbits); + } + void Sdiv(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + sdiv(rd, rn, rm); + } + void Smaddl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + smaddl(rd, rn, rm, ra); + } + void Smsubl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + smsubl(rd, rn, rm, ra); + } + void Smull(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + smull(rd, rn, rm); + } + void Smulh(const Register& xd, const Register& xn, const Register& xm) { + VIXL_ASSERT(!xd.IsZero()); + VIXL_ASSERT(!xn.IsZero()); + VIXL_ASSERT(!xm.IsZero()); + SingleEmissionCheckScope guard(this); + smulh(xd, xn, xm); + } + void Stlr(const Register& rt, const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + stlr(rt, dst); + } + void Stlrb(const Register& rt, const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + stlrb(rt, dst); + } + void Stlrh(const Register& rt, const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + stlrh(rt, dst); + } + void Stlxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst) { + VIXL_ASSERT(!rs.Aliases(dst.base())); + VIXL_ASSERT(!rs.Aliases(rt)); + VIXL_ASSERT(!rs.Aliases(rt2)); + SingleEmissionCheckScope guard(this); + stlxp(rs, rt, rt2, dst); + } + void Stlxr(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(!rs.Aliases(dst.base())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stlxr(rs, rt, dst); + } + void Stlxrb(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(!rs.Aliases(dst.base())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stlxrb(rs, rt, dst); + } + void Stlxrh(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(!rs.Aliases(dst.base())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stlxrh(rs, rt, dst); + } + void Stnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + stnp(rt, rt2, dst); + } + void Stxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst) { + VIXL_ASSERT(!rs.Aliases(dst.base())); + VIXL_ASSERT(!rs.Aliases(rt)); + VIXL_ASSERT(!rs.Aliases(rt2)); + SingleEmissionCheckScope guard(this); + stxp(rs, rt, rt2, dst); + } + void Stxr(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(!rs.Aliases(dst.base())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stxr(rs, rt, dst); + } + void Stxrb(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(!rs.Aliases(dst.base())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stxrb(rs, rt, dst); + } + void Stxrh(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(!rs.Aliases(dst.base())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stxrh(rs, rt, dst); + } + void Svc(int code) { + SingleEmissionCheckScope guard(this); + svc(code); + } + void Sxtb(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sxtb(rd, rn); + } + void Sxth(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sxth(rd, rn); + } + void Sxtw(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sxtw(rd, rn); + } + void Tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + tbl(vd, vn, vm); + } + void Tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + tbl(vd, vn, vn2, vm); + } + void Tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + tbl(vd, vn, vn2, vn3, vm); + } + void Tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + tbl(vd, vn, vn2, vn3, vn4, vm); + } + void Tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + tbx(vd, vn, vm); + } + void Tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + tbx(vd, vn, vn2, vm); + } + void Tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + tbx(vd, vn, vn2, vn3, vm); + } + void Tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm) { + SingleEmissionCheckScope guard(this); + tbx(vd, vn, vn2, vn3, vn4, vm); + } + void Tbnz(const Register& rt, unsigned bit_pos, Label* label); + void Tbz(const Register& rt, unsigned bit_pos, Label* label); + void Ubfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + ubfiz(rd, rn, lsb, width); + } + void Ubfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + ubfm(rd, rn, immr, imms); + } + void Ubfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + ubfx(rd, rn, lsb, width); + } + void Ucvtf(const VRegister& vd, const Register& rn, int fbits = 0) { + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + ucvtf(vd, rn, fbits); + } + void Udiv(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + udiv(rd, rn, rm); + } + void Umaddl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + umaddl(rd, rn, rm, ra); + } + void Umull(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + umull(rd, rn, rm); + } + void Umulh(const Register& xd, const Register& xn, const Register& xm) { + VIXL_ASSERT(!xd.IsZero()); + VIXL_ASSERT(!xn.IsZero()); + VIXL_ASSERT(!xm.IsZero()); + SingleEmissionCheckScope guard(this); + umulh(xd, xn, xm); + } + void Umsubl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + umsubl(rd, rn, rm, ra); + } + + void Unreachable() { + SingleEmissionCheckScope guard(this); + Emit(UNDEFINED_INST_PATTERN); + } + + void Uxtb(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + uxtb(rd, rn); + } + void Uxth(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + uxth(rd, rn); + } + void Uxtw(const Register& rd, const Register& rn) { + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + uxtw(rd, rn); + } + + // NEON 3 vector register instructions. + #define NEON_3VREG_MACRO_LIST(V) \ + V(add, Add) \ + V(addhn, Addhn) \ + V(addhn2, Addhn2) \ + V(addp, Addp) \ + V(and_, And) \ + V(bic, Bic) \ + V(bif, Bif) \ + V(bit, Bit) \ + V(bsl, Bsl) \ + V(cmeq, Cmeq) \ + V(cmge, Cmge) \ + V(cmgt, Cmgt) \ + V(cmhi, Cmhi) \ + V(cmhs, Cmhs) \ + V(cmtst, Cmtst) \ + V(eor, Eor) \ + V(fabd, Fabd) \ + V(facge, Facge) \ + V(facgt, Facgt) \ + V(faddp, Faddp) \ + V(fcmeq, Fcmeq) \ + V(fcmge, Fcmge) \ + V(fcmgt, Fcmgt) \ + V(fmaxnmp, Fmaxnmp) \ + V(fmaxp, Fmaxp) \ + V(fminnmp, Fminnmp) \ + V(fminp, Fminp) \ + V(fmla, Fmla) \ + V(fmls, Fmls) \ + V(fmulx, Fmulx) \ + V(frecps, Frecps) \ + V(frsqrts, Frsqrts) \ + V(mla, Mla) \ + V(mls, Mls) \ + V(mul, Mul) \ + V(orn, Orn) \ + V(orr, Orr) \ + V(pmul, Pmul) \ + V(pmull, Pmull) \ + V(pmull2, Pmull2) \ + V(raddhn, Raddhn) \ + V(raddhn2, Raddhn2) \ + V(rsubhn, Rsubhn) \ + V(rsubhn2, Rsubhn2) \ + V(saba, Saba) \ + V(sabal, Sabal) \ + V(sabal2, Sabal2) \ + V(sabd, Sabd) \ + V(sabdl, Sabdl) \ + V(sabdl2, Sabdl2) \ + V(saddl, Saddl) \ + V(saddl2, Saddl2) \ + V(saddw, Saddw) \ + V(saddw2, Saddw2) \ + V(shadd, Shadd) \ + V(shsub, Shsub) \ + V(smax, Smax) \ + V(smaxp, Smaxp) \ + V(smin, Smin) \ + V(sminp, Sminp) \ + V(smlal, Smlal) \ + V(smlal2, Smlal2) \ + V(smlsl, Smlsl) \ + V(smlsl2, Smlsl2) \ + V(smull, Smull) \ + V(smull2, Smull2) \ + V(sqadd, Sqadd) \ + V(sqdmlal, Sqdmlal) \ + V(sqdmlal2, Sqdmlal2) \ + V(sqdmlsl, Sqdmlsl) \ + V(sqdmlsl2, Sqdmlsl2) \ + V(sqdmulh, Sqdmulh) \ + V(sqdmull, Sqdmull) \ + V(sqdmull2, Sqdmull2) \ + V(sqrdmulh, Sqrdmulh) \ + V(sqrshl, Sqrshl) \ + V(sqshl, Sqshl) \ + V(sqsub, Sqsub) \ + V(srhadd, Srhadd) \ + V(srshl, Srshl) \ + V(sshl, Sshl) \ + V(ssubl, Ssubl) \ + V(ssubl2, Ssubl2) \ + V(ssubw, Ssubw) \ + V(ssubw2, Ssubw2) \ + V(sub, Sub) \ + V(subhn, Subhn) \ + V(subhn2, Subhn2) \ + V(trn1, Trn1) \ + V(trn2, Trn2) \ + V(uaba, Uaba) \ + V(uabal, Uabal) \ + V(uabal2, Uabal2) \ + V(uabd, Uabd) \ + V(uabdl, Uabdl) \ + V(uabdl2, Uabdl2) \ + V(uaddl, Uaddl) \ + V(uaddl2, Uaddl2) \ + V(uaddw, Uaddw) \ + V(uaddw2, Uaddw2) \ + V(uhadd, Uhadd) \ + V(uhsub, Uhsub) \ + V(umax, Umax) \ + V(umaxp, Umaxp) \ + V(umin, Umin) \ + V(uminp, Uminp) \ + V(umlal, Umlal) \ + V(umlal2, Umlal2) \ + V(umlsl, Umlsl) \ + V(umlsl2, Umlsl2) \ + V(umull, Umull) \ + V(umull2, Umull2) \ + V(uqadd, Uqadd) \ + V(uqrshl, Uqrshl) \ + V(uqshl, Uqshl) \ + V(uqsub, Uqsub) \ + V(urhadd, Urhadd) \ + V(urshl, Urshl) \ + V(ushl, Ushl) \ + V(usubl, Usubl) \ + V(usubl2, Usubl2) \ + V(usubw, Usubw) \ + V(usubw2, Usubw2) \ + V(uzp1, Uzp1) \ + V(uzp2, Uzp2) \ + V(zip1, Zip1) \ + V(zip2, Zip2) + + #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn, vm); \ + } + NEON_3VREG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) + #undef DEFINE_MACRO_ASM_FUNC + + // NEON 2 vector register instructions. + #define NEON_2VREG_MACRO_LIST(V) \ + V(abs, Abs) \ + V(addp, Addp) \ + V(addv, Addv) \ + V(cls, Cls) \ + V(clz, Clz) \ + V(cnt, Cnt) \ + V(fabs, Fabs) \ + V(faddp, Faddp) \ + V(fcvtas, Fcvtas) \ + V(fcvtau, Fcvtau) \ + V(fcvtms, Fcvtms) \ + V(fcvtmu, Fcvtmu) \ + V(fcvtns, Fcvtns) \ + V(fcvtnu, Fcvtnu) \ + V(fcvtps, Fcvtps) \ + V(fcvtpu, Fcvtpu) \ + V(fmaxnmp, Fmaxnmp) \ + V(fmaxnmv, Fmaxnmv) \ + V(fmaxp, Fmaxp) \ + V(fmaxv, Fmaxv) \ + V(fminnmp, Fminnmp) \ + V(fminnmv, Fminnmv) \ + V(fminp, Fminp) \ + V(fminv, Fminv) \ + V(fneg, Fneg) \ + V(frecpe, Frecpe) \ + V(frecpx, Frecpx) \ + V(frinta, Frinta) \ + V(frinti, Frinti) \ + V(frintm, Frintm) \ + V(frintn, Frintn) \ + V(frintp, Frintp) \ + V(frintx, Frintx) \ + V(frintz, Frintz) \ + V(frsqrte, Frsqrte) \ + V(fsqrt, Fsqrt) \ + V(mov, Mov) \ + V(mvn, Mvn) \ + V(neg, Neg) \ + V(not_, Not) \ + V(rbit, Rbit) \ + V(rev16, Rev16) \ + V(rev32, Rev32) \ + V(rev64, Rev64) \ + V(sadalp, Sadalp) \ + V(saddlp, Saddlp) \ + V(saddlv, Saddlv) \ + V(smaxv, Smaxv) \ + V(sminv, Sminv) \ + V(sqabs, Sqabs) \ + V(sqneg, Sqneg) \ + V(sqxtn, Sqxtn) \ + V(sqxtn2, Sqxtn2) \ + V(sqxtun, Sqxtun) \ + V(sqxtun2, Sqxtun2) \ + V(suqadd, Suqadd) \ + V(sxtl, Sxtl) \ + V(sxtl2, Sxtl2) \ + V(uadalp, Uadalp) \ + V(uaddlp, Uaddlp) \ + V(uaddlv, Uaddlv) \ + V(umaxv, Umaxv) \ + V(uminv, Uminv) \ + V(uqxtn, Uqxtn) \ + V(uqxtn2, Uqxtn2) \ + V(urecpe, Urecpe) \ + V(ursqrte, Ursqrte) \ + V(usqadd, Usqadd) \ + V(uxtl, Uxtl) \ + V(uxtl2, Uxtl2) \ + V(xtn, Xtn) \ + V(xtn2, Xtn2) + + #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, \ + const VRegister& vn) { \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn); \ + } + NEON_2VREG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) + #undef DEFINE_MACRO_ASM_FUNC + + // NEON 2 vector register with immediate instructions. + #define NEON_2VREG_FPIMM_MACRO_LIST(V) \ + V(fcmeq, Fcmeq) \ + V(fcmge, Fcmge) \ + V(fcmgt, Fcmgt) \ + V(fcmle, Fcmle) \ + V(fcmlt, Fcmlt) + + #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, \ + const VRegister& vn, \ + double imm) { \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn, imm); \ + } + NEON_2VREG_FPIMM_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) + #undef DEFINE_MACRO_ASM_FUNC + + // NEON by element instructions. + #define NEON_BYELEMENT_MACRO_LIST(V) \ + V(fmul, Fmul) \ + V(fmla, Fmla) \ + V(fmls, Fmls) \ + V(fmulx, Fmulx) \ + V(mul, Mul) \ + V(mla, Mla) \ + V(mls, Mls) \ + V(sqdmulh, Sqdmulh) \ + V(sqrdmulh, Sqrdmulh) \ + V(sqdmull, Sqdmull) \ + V(sqdmull2, Sqdmull2) \ + V(sqdmlal, Sqdmlal) \ + V(sqdmlal2, Sqdmlal2) \ + V(sqdmlsl, Sqdmlsl) \ + V(sqdmlsl2, Sqdmlsl2) \ + V(smull, Smull) \ + V(smull2, Smull2) \ + V(smlal, Smlal) \ + V(smlal2, Smlal2) \ + V(smlsl, Smlsl) \ + V(smlsl2, Smlsl2) \ + V(umull, Umull) \ + V(umull2, Umull2) \ + V(umlal, Umlal) \ + V(umlal2, Umlal2) \ + V(umlsl, Umlsl) \ + V(umlsl2, Umlsl2) + + #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index \ + ) { \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn, vm, vm_index); \ + } + NEON_BYELEMENT_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) + #undef DEFINE_MACRO_ASM_FUNC + + #define NEON_2VREG_SHIFT_MACRO_LIST(V) \ + V(rshrn, Rshrn) \ + V(rshrn2, Rshrn2) \ + V(shl, Shl) \ + V(shll, Shll) \ + V(shll2, Shll2) \ + V(shrn, Shrn) \ + V(shrn2, Shrn2) \ + V(sli, Sli) \ + V(sqrshrn, Sqrshrn) \ + V(sqrshrn2, Sqrshrn2) \ + V(sqrshrun, Sqrshrun) \ + V(sqrshrun2, Sqrshrun2) \ + V(sqshl, Sqshl) \ + V(sqshlu, Sqshlu) \ + V(sqshrn, Sqshrn) \ + V(sqshrn2, Sqshrn2) \ + V(sqshrun, Sqshrun) \ + V(sqshrun2, Sqshrun2) \ + V(sri, Sri) \ + V(srshr, Srshr) \ + V(srsra, Srsra) \ + V(sshll, Sshll) \ + V(sshll2, Sshll2) \ + V(sshr, Sshr) \ + V(ssra, Ssra) \ + V(uqrshrn, Uqrshrn) \ + V(uqrshrn2, Uqrshrn2) \ + V(uqshl, Uqshl) \ + V(uqshrn, Uqshrn) \ + V(uqshrn2, Uqshrn2) \ + V(urshr, Urshr) \ + V(ursra, Ursra) \ + V(ushll, Ushll) \ + V(ushll2, Ushll2) \ + V(ushr, Ushr) \ + V(usra, Usra) \ + + #define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, \ + const VRegister& vn, \ + int shift) { \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn, shift); \ + } + NEON_2VREG_SHIFT_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) + #undef DEFINE_MACRO_ASM_FUNC + + void Bic(const VRegister& vd, + const int imm8, + const int left_shift = 0) { + SingleEmissionCheckScope guard(this); + bic(vd, imm8, left_shift); + } + void Cmeq(const VRegister& vd, + const VRegister& vn, + int imm) { + SingleEmissionCheckScope guard(this); + cmeq(vd, vn, imm); + } + void Cmge(const VRegister& vd, + const VRegister& vn, + int imm) { + SingleEmissionCheckScope guard(this); + cmge(vd, vn, imm); + } + void Cmgt(const VRegister& vd, + const VRegister& vn, + int imm) { + SingleEmissionCheckScope guard(this); + cmgt(vd, vn, imm); + } + void Cmle(const VRegister& vd, + const VRegister& vn, + int imm) { + SingleEmissionCheckScope guard(this); + cmle(vd, vn, imm); + } + void Cmlt(const VRegister& vd, + const VRegister& vn, + int imm) { + SingleEmissionCheckScope guard(this); + cmlt(vd, vn, imm); + } + void Dup(const VRegister& vd, + const VRegister& vn, + int index) { + SingleEmissionCheckScope guard(this); + dup(vd, vn, index); + } + void Dup(const VRegister& vd, + const Register& rn) { + SingleEmissionCheckScope guard(this); + dup(vd, rn); + } + void Ext(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int index) { + SingleEmissionCheckScope guard(this); + ext(vd, vn, vm, index); + } + void Ins(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index) { + SingleEmissionCheckScope guard(this); + ins(vd, vd_index, vn, vn_index); + } + void Ins(const VRegister& vd, + int vd_index, + const Register& rn) { + SingleEmissionCheckScope guard(this); + ins(vd, vd_index, rn); + } + void Ld1(const VRegister& vt, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld1(vt, src); + } + void Ld1(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld1(vt, vt2, src); + } + void Ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld1(vt, vt2, vt3, src); + } + void Ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld1(vt, vt2, vt3, vt4, src); + } + void Ld1(const VRegister& vt, + int lane, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld1(vt, lane, src); + } + void Ld1r(const VRegister& vt, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld1r(vt, src); + } + void Ld2(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld2(vt, vt2, src); + } + void Ld2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld2(vt, vt2, lane, src); + } + void Ld2r(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld2r(vt, vt2, src); + } + void Ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld3(vt, vt2, vt3, src); + } + void Ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld3(vt, vt2, vt3, lane, src); + } + void Ld3r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld3r(vt, vt2, vt3, src); + } + void Ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld4(vt, vt2, vt3, vt4, src); + } + void Ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld4(vt, vt2, vt3, vt4, lane, src); + } + void Ld4r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + SingleEmissionCheckScope guard(this); + ld4r(vt, vt2, vt3, vt4, src); + } + void Mov(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index) { + SingleEmissionCheckScope guard(this); + mov(vd, vd_index, vn, vn_index); + } + void Mov(const VRegister& vd, + const VRegister& vn, + int index) { + SingleEmissionCheckScope guard(this); + mov(vd, vn, index); + } + void Mov(const VRegister& vd, + int vd_index, + const Register& rn) { + SingleEmissionCheckScope guard(this); + mov(vd, vd_index, rn); + } + void Mov(const Register& rd, + const VRegister& vn, + int vn_index) { + SingleEmissionCheckScope guard(this); + mov(rd, vn, vn_index); + } + void Movi(const VRegister& vd, + uint64_t imm, + Shift shift = LSL, + int shift_amount = 0); + void Movi(const VRegister& vd, uint64_t hi, uint64_t lo); + void Mvni(const VRegister& vd, + const int imm8, + Shift shift = LSL, + const int shift_amount = 0) { + SingleEmissionCheckScope guard(this); + mvni(vd, imm8, shift, shift_amount); + } + void Orr(const VRegister& vd, + const int imm8, + const int left_shift = 0) { + SingleEmissionCheckScope guard(this); + orr(vd, imm8, left_shift); + } + void Scvtf(const VRegister& vd, + const VRegister& vn, + int fbits = 0) { + SingleEmissionCheckScope guard(this); + scvtf(vd, vn, fbits); + } + void Ucvtf(const VRegister& vd, + const VRegister& vn, + int fbits = 0) { + SingleEmissionCheckScope guard(this); + ucvtf(vd, vn, fbits); + } + void Fcvtzs(const VRegister& vd, + const VRegister& vn, + int fbits = 0) { + SingleEmissionCheckScope guard(this); + fcvtzs(vd, vn, fbits); + } + void Fcvtzu(const VRegister& vd, + const VRegister& vn, + int fbits = 0) { + SingleEmissionCheckScope guard(this); + fcvtzu(vd, vn, fbits); + } + void St1(const VRegister& vt, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st1(vt, dst); + } + void St1(const VRegister& vt, + const VRegister& vt2, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st1(vt, vt2, dst); + } + void St1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st1(vt, vt2, vt3, dst); + } + void St1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st1(vt, vt2, vt3, vt4, dst); + } + void St1(const VRegister& vt, + int lane, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st1(vt, lane, dst); + } + void St2(const VRegister& vt, + const VRegister& vt2, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st2(vt, vt2, dst); + } + void St3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st3(vt, vt2, vt3, dst); + } + void St4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st4(vt, vt2, vt3, vt4, dst); + } + void St2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st2(vt, vt2, lane, dst); + } + void St3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st3(vt, vt2, vt3, lane, dst); + } + void St4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& dst) { + SingleEmissionCheckScope guard(this); + st4(vt, vt2, vt3, vt4, lane, dst); + } + void Smov(const Register& rd, + const VRegister& vn, + int vn_index) { + SingleEmissionCheckScope guard(this); + smov(rd, vn, vn_index); + } + void Umov(const Register& rd, + const VRegister& vn, + int vn_index) { + SingleEmissionCheckScope guard(this); + umov(rd, vn, vn_index); + } + void Crc32b(const Register& rd, + const Register& rn, + const Register& rm) { + SingleEmissionCheckScope guard(this); + crc32b(rd, rn, rm); + } + void Crc32h(const Register& rd, + const Register& rn, + const Register& rm) { + SingleEmissionCheckScope guard(this); + crc32h(rd, rn, rm); + } + void Crc32w(const Register& rd, + const Register& rn, + const Register& rm) { + SingleEmissionCheckScope guard(this); + crc32w(rd, rn, rm); + } + void Crc32x(const Register& rd, + const Register& rn, + const Register& rm) { + SingleEmissionCheckScope guard(this); + crc32x(rd, rn, rm); + } + void Crc32cb(const Register& rd, + const Register& rn, + const Register& rm) { + SingleEmissionCheckScope guard(this); + crc32cb(rd, rn, rm); + } + void Crc32ch(const Register& rd, + const Register& rn, + const Register& rm) { + SingleEmissionCheckScope guard(this); + crc32ch(rd, rn, rm); + } + void Crc32cw(const Register& rd, + const Register& rn, + const Register& rm) { + SingleEmissionCheckScope guard(this); + crc32cw(rd, rn, rm); + } + void Crc32cx(const Register& rd, + const Register& rn, + const Register& rm) { + SingleEmissionCheckScope guard(this); + crc32cx(rd, rn, rm); + } + + // Push the system stack pointer (sp) down to allow the same to be done to + // the current stack pointer (according to StackPointer()). This must be + // called _before_ accessing the memory. + // + // This is necessary when pushing or otherwise adding things to the stack, to + // satisfy the AAPCS64 constraint that the memory below the system stack + // pointer is not accessed. + // + // This method asserts that StackPointer() is not sp, since the call does + // not make sense in that context. + // + // TODO: This method can only accept values of 'space' that can be encoded in + // one instruction. Refer to the implementation for details. + void BumpSystemStackPointer(const Operand& space); + + // Set the current stack pointer, but don't generate any code. + void SetStackPointer64(const Register& stack_pointer) { + VIXL_ASSERT(!TmpList()->IncludesAliasOf(stack_pointer)); + sp_ = stack_pointer; + } + + // Return the current stack pointer, as set by SetStackPointer. + const Register& StackPointer() const { + return sp_; + } + + const Register& GetStackPointer64() const { + return sp_; + } + + js::jit::RegisterOrSP getStackPointer() const { + return js::jit::RegisterOrSP(sp_.code()); + } + + CPURegList* TmpList() { return &tmp_list_; } + CPURegList* FPTmpList() { return &fptmp_list_; } + + // Trace control when running the debug simulator. + // + // For example: + // + // __ Trace(LOG_REGS, TRACE_ENABLE); + // Will add registers to the trace if it wasn't already the case. + // + // __ Trace(LOG_DISASM, TRACE_DISABLE); + // Will stop logging disassembly. It has no effect if the disassembly wasn't + // already being logged. + void Trace(TraceParameters parameters, TraceCommand command); + + // Log the requested data independently of what is being traced. + // + // For example: + // + // __ Log(LOG_FLAGS) + // Will output the flags. + void Log(TraceParameters parameters); + + // Enable or disable instrumentation when an Instrument visitor is attached to + // the simulator. + void EnableInstrumentation(); + void DisableInstrumentation(); + + // Add a marker to the instrumentation data produced by an Instrument visitor. + // The name is a two character string that will be attached to the marker in + // the output data. + void AnnotateInstrumentation(const char* marker_name); + + private: + // The actual Push and Pop implementations. These don't generate any code + // other than that required for the push or pop. This allows + // (Push|Pop)CPURegList to bundle together setup code for a large block of + // registers. + // + // Note that size is per register, and is specified in bytes. + void PushHelper(int count, int size, + const CPURegister& src0, const CPURegister& src1, + const CPURegister& src2, const CPURegister& src3); + void PopHelper(int count, int size, + const CPURegister& dst0, const CPURegister& dst1, + const CPURegister& dst2, const CPURegister& dst3); + + void Movi16bitHelper(const VRegister& vd, uint64_t imm); + void Movi32bitHelper(const VRegister& vd, uint64_t imm); + void Movi64bitHelper(const VRegister& vd, uint64_t imm); + + // Perform necessary maintenance operations before a push or pop. + // + // Note that size is per register, and is specified in bytes. + void PrepareForPush(int count, int size); + void PrepareForPop(int count, int size); + + // The actual implementation of load and store operations for CPURegList. + enum LoadStoreCPURegListAction { + kLoad, + kStore + }; + void LoadStoreCPURegListHelper(LoadStoreCPURegListAction operation, + CPURegList registers, + const MemOperand& mem); + // Returns a MemOperand suitable for loading or storing a CPURegList at `dst`. + // This helper may allocate registers from `scratch_scope` and generate code + // to compute an intermediate address. The resulting MemOperand is only valid + // as long as `scratch_scope` remains valid. + MemOperand BaseMemOperandForLoadStoreCPURegList( + const CPURegList& registers, + const MemOperand& mem, + UseScratchRegisterScope* scratch_scope); + + bool LabelIsOutOfRange(Label* label, ImmBranchType branch_type) { + return !Instruction::IsValidImmPCOffset(branch_type, nextOffset().getOffset() - label->offset()); + } + + // The register to use as a stack pointer for stack operations. + Register sp_; + + // Scratch registers available for use by the MacroAssembler. + CPURegList tmp_list_; + CPURegList fptmp_list_; + + ptrdiff_t checkpoint_; + ptrdiff_t recommended_checkpoint_; +}; + + +// All Assembler emits MUST acquire/release the underlying code buffer. The +// helper scope below will do so and optionally ensure the buffer is big enough +// to receive the emit. It is possible to request the scope not to perform any +// checks (kNoCheck) if for example it is known in advance the buffer size is +// adequate or there is some other size checking mechanism in place. +class CodeBufferCheckScope { + public: + // Tell whether or not the scope needs to ensure the associated CodeBuffer + // has enough space for the requested size. + enum CheckPolicy { + kNoCheck, + kCheck + }; + + // Tell whether or not the scope should assert the amount of code emitted + // within the scope is consistent with the requested amount. + enum AssertPolicy { + kNoAssert, // No assert required. + kExactSize, // The code emitted must be exactly size bytes. + kMaximumSize // The code emitted must be at most size bytes. + }; + + CodeBufferCheckScope(Assembler* assm, + size_t size, + CheckPolicy check_policy = kCheck, + AssertPolicy assert_policy = kMaximumSize) + { } + + // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert). + explicit CodeBufferCheckScope(Assembler* assm) {} +}; + + +// Use this scope when you need a one-to-one mapping between methods and +// instructions. This scope prevents the MacroAssembler from being called and +// literal pools from being emitted. It also asserts the number of instructions +// emitted is what you specified when creating the scope. +// FIXME: Because of the disabled calls below, this class asserts nothing. +class InstructionAccurateScope : public CodeBufferCheckScope { + public: + InstructionAccurateScope(MacroAssembler* masm, + int64_t count, + AssertPolicy policy = kExactSize) + : CodeBufferCheckScope(masm, + (count * kInstructionSize), + kCheck, + policy) { + } +}; + + +// This scope utility allows scratch registers to be managed safely. The +// MacroAssembler's TmpList() (and FPTmpList()) is used as a pool of scratch +// registers. These registers can be allocated on demand, and will be returned +// at the end of the scope. +// +// When the scope ends, the MacroAssembler's lists will be restored to their +// original state, even if the lists were modified by some other means. +class UseScratchRegisterScope { + public: + // This constructor implicitly calls the `Open` function to initialise the + // scope, so it is ready to use immediately after it has been constructed. + explicit UseScratchRegisterScope(MacroAssembler* masm); + // This constructor allows deferred and optional initialisation of the scope. + // The user is required to explicitly call the `Open` function before using + // the scope. + UseScratchRegisterScope(); + // This function performs the actual initialisation work. + void Open(MacroAssembler* masm); + + // The destructor always implicitly calls the `Close` function. + ~UseScratchRegisterScope(); + // This function performs the cleaning-up work. It must succeed even if the + // scope has not been opened. It is safe to call multiple times. + void Close(); + + + bool IsAvailable(const CPURegister& reg) const; + + + // Take a register from the appropriate temps list. It will be returned + // automatically when the scope ends. + Register AcquireW() { return AcquireNextAvailable(available_).W(); } + Register AcquireX() { return AcquireNextAvailable(available_).X(); } + VRegister AcquireS() { return AcquireNextAvailable(availablefp_).S(); } + VRegister AcquireD() { return AcquireNextAvailable(availablefp_).D(); } + VRegister AcquireQ() { return AcquireNextAvailable(availablefp_).Q(); } + + + Register AcquireSameSizeAs(const Register& reg); + VRegister AcquireSameSizeAs(const VRegister& reg); + + + // Explicitly release an acquired (or excluded) register, putting it back in + // the appropriate temps list. + void Release(const CPURegister& reg); + + + // Make the specified registers available as scratch registers for the + // duration of this scope. + void Include(const CPURegList& list); + void Include(const Register& reg1, + const Register& reg2 = NoReg, + const Register& reg3 = NoReg, + const Register& reg4 = NoReg); + void Include(const VRegister& reg1, + const VRegister& reg2 = NoVReg, + const VRegister& reg3 = NoVReg, + const VRegister& reg4 = NoVReg); + + + // Make sure that the specified registers are not available in this scope. + // This can be used to prevent helper functions from using sensitive + // registers, for example. + void Exclude(const CPURegList& list); + void Exclude(const Register& reg1, + const Register& reg2 = NoReg, + const Register& reg3 = NoReg, + const Register& reg4 = NoReg); + void Exclude(const VRegister& reg1, + const VRegister& reg2 = NoVReg, + const VRegister& reg3 = NoVReg, + const VRegister& reg4 = NoVReg); + void Exclude(const CPURegister& reg1, + const CPURegister& reg2 = NoCPUReg, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + + + // Prevent any scratch registers from being used in this scope. + void ExcludeAll(); + + + private: + static CPURegister AcquireNextAvailable(CPURegList* available); + + static void ReleaseByCode(CPURegList* available, int code); + + static void ReleaseByRegList(CPURegList* available, + RegList regs); + + static void IncludeByRegList(CPURegList* available, + RegList exclude); + + static void ExcludeByRegList(CPURegList* available, + RegList exclude); + + // Available scratch registers. + CPURegList* available_; // kRegister + CPURegList* availablefp_; // kVRegister + + // The state of the available lists at the start of this scope. + RegList old_available_; // kRegister + RegList old_availablefp_; // kVRegister +#ifdef DEBUG + bool initialised_; +#endif + + // Disallow copy constructor and operator=. + UseScratchRegisterScope(const UseScratchRegisterScope&) { + VIXL_UNREACHABLE(); + } + void operator=(const UseScratchRegisterScope&) { + VIXL_UNREACHABLE(); + } +}; + + +} // namespace vixl + +#endif // VIXL_A64_MACRO_ASSEMBLER_A64_H_ diff --git a/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp b/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp new file mode 100644 index 0000000000..b9189cc23b --- /dev/null +++ b/js/src/jit/arm64/vixl/MozAssembler-vixl.cpp @@ -0,0 +1,610 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Assembler-vixl.h" +#include "jit/Label.h" + +namespace vixl { + +using LabelDoc = js::jit::DisassemblerSpew::LabelDoc; + +// Assembler +void Assembler::FinalizeCode() { +#ifdef DEBUG + finalized_ = true; +#endif +} + +// Unbound Label Representation. +// +// We can have multiple branches using the same label before it is bound. +// Assembler::bind() must then be able to enumerate all the branches and patch +// them to target the final label location. +// +// When a Label is unbound with uses, its offset is pointing to the tip of a +// linked list of uses. The uses can be branches or adr/adrp instructions. In +// the case of branches, the next member in the linked list is simply encoded +// as the branch target. For adr/adrp, the relative pc offset is encoded in the +// immediate field as a signed instruction offset. +// +// In both cases, the end of the list is encoded as a 0 pc offset, i.e. the +// tail is pointing to itself. + +static const ptrdiff_t kEndOfLabelUseList = 0; + +BufferOffset +MozBaseAssembler::NextLink(BufferOffset cur) +{ + Instruction* link = getInstructionAt(cur); + // Raw encoded offset. + ptrdiff_t offset = link->ImmPCRawOffset(); + // End of the list is encoded as 0. + if (offset == kEndOfLabelUseList) + return BufferOffset(); + // The encoded offset is the number of instructions to move. + return BufferOffset(cur.getOffset() + offset * kInstructionSize); +} + +static ptrdiff_t +EncodeOffset(BufferOffset cur, BufferOffset next) +{ + MOZ_ASSERT(next.assigned() && cur.assigned()); + ptrdiff_t offset = next.getOffset() - cur.getOffset(); + MOZ_ASSERT(offset % kInstructionSize == 0); + return offset / kInstructionSize; +} + +void +MozBaseAssembler::SetNextLink(BufferOffset cur, BufferOffset next) +{ + Instruction* link = getInstructionAt(cur); + link->SetImmPCRawOffset(EncodeOffset(cur, next)); +} + +// A common implementation for the LinkAndGet<Type>OffsetTo helpers. +// +// If the label is bound, returns the offset as a multiple of 1 << elementShift. +// Otherwise, links the instruction to the label and returns the raw offset to +// encode. (This will be an instruction count.) +// +// The offset is calculated by aligning the PC and label addresses down to a +// multiple of 1 << elementShift, then calculating the (scaled) offset between +// them. This matches the semantics of adrp, for example. (Assuming that the +// assembler buffer is page-aligned, which it probably isn't.) +// +// For an unbound label, the returned offset will be encodable in the provided +// branch range. If the label is already bound, the caller is expected to make +// sure that it is in range, and emit the necessary branch instrutions if it +// isn't. +// +ptrdiff_t +MozBaseAssembler::LinkAndGetOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange, + unsigned elementShift, Label* label) +{ + if (armbuffer_.oom()) + return kEndOfLabelUseList; + + if (label->bound()) { + // The label is bound: all uses are already linked. + ptrdiff_t branch_offset = ptrdiff_t(branch.getOffset() >> elementShift); + ptrdiff_t label_offset = ptrdiff_t(label->offset() >> elementShift); + return label_offset - branch_offset; + } + + // Keep track of short-range branches targeting unbound labels. We may need + // to insert veneers in PatchShortRangeBranchToVeneer() below. + if (branchRange < NumShortBranchRangeTypes) { + // This is the last possible branch target. + BufferOffset deadline(branch.getOffset() + + Instruction::ImmBranchMaxForwardOffset(branchRange)); + armbuffer_.registerBranchDeadline(branchRange, deadline); + } + + // The label is unbound and previously unused: Store the offset in the label + // itself for patching by bind(). + if (!label->used()) { + label->use(branch.getOffset()); + return kEndOfLabelUseList; + } + + // The label is unbound and has multiple users. Create a linked list between + // the branches, and update the linked list head in the label struct. This is + // not always trivial since the branches in the linked list have limited + // ranges. + + // What is the earliest buffer offset that would be reachable by the branch + // we're about to add? + ptrdiff_t earliestReachable = + branch.getOffset() + Instruction::ImmBranchMinBackwardOffset(branchRange); + + // If the existing instruction at the head of the list is within reach of the + // new branch, we can simply insert the new branch at the front of the list. + if (label->offset() >= earliestReachable) { + ptrdiff_t offset = EncodeOffset(branch, BufferOffset(label)); + label->use(branch.getOffset()); + MOZ_ASSERT(offset != kEndOfLabelUseList); + return offset; + } + + // The label already has a linked list of uses, but we can't reach the head + // of the list with the allowed branch range. Insert this branch at a + // different position in the list. + // + // Find an existing branch, exbr, such that: + // + // 1. The new branch can be reached by exbr, and either + // 2a. The new branch can reach exbr's target, or + // 2b. The exbr branch is at the end of the list. + // + // Then the new branch can be inserted after exbr in the linked list. + // + // We know that it is always possible to find an exbr branch satisfying these + // conditions because of the PatchShortRangeBranchToVeneer() mechanism. All + // branches are guaranteed to either be able to reach the end of the + // assembler buffer, or they will be pointing to an unconditional branch that + // can. + // + // In particular, the end of the list is always a viable candidate, so we'll + // just get that. + BufferOffset next(label); + BufferOffset exbr; + do { + exbr = next; + next = NextLink(next); + } while (next.assigned()); + SetNextLink(exbr, branch); + + // This branch becomes the new end of the list. + return kEndOfLabelUseList; +} + +ptrdiff_t MozBaseAssembler::LinkAndGetByteOffsetTo(BufferOffset branch, Label* label) { + return LinkAndGetOffsetTo(branch, UncondBranchRangeType, 0, label); +} + +ptrdiff_t MozBaseAssembler::LinkAndGetInstructionOffsetTo(BufferOffset branch, + ImmBranchRangeType branchRange, + Label* label) { + return LinkAndGetOffsetTo(branch, branchRange, kInstructionSizeLog2, label); +} + +ptrdiff_t MozBaseAssembler::LinkAndGetPageOffsetTo(BufferOffset branch, Label* label) { + return LinkAndGetOffsetTo(branch, UncondBranchRangeType, kPageSizeLog2, label); +} + +BufferOffset Assembler::b(int imm26, const LabelDoc& doc) { + return EmitBranch(B | ImmUncondBranch(imm26), doc); +} + + +void Assembler::b(Instruction* at, int imm26) { + return EmitBranch(at, B | ImmUncondBranch(imm26)); +} + + +BufferOffset Assembler::b(int imm19, Condition cond, const LabelDoc& doc) { + return EmitBranch(B_cond | ImmCondBranch(imm19) | cond, doc); +} + + +void Assembler::b(Instruction* at, int imm19, Condition cond) { + EmitBranch(at, B_cond | ImmCondBranch(imm19) | cond); +} + + +BufferOffset Assembler::b(Label* label) { + // Encode the relative offset from the inserted branch to the label. + LabelDoc doc = refLabel(label); + return b(LinkAndGetInstructionOffsetTo(nextInstrOffset(), UncondBranchRangeType, label), doc); +} + + +BufferOffset Assembler::b(Label* label, Condition cond) { + // Encode the relative offset from the inserted branch to the label. + LabelDoc doc = refLabel(label); + return b(LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), cond, doc); +} + +void Assembler::br(Instruction* at, const Register& xn) { + VIXL_ASSERT(xn.Is64Bits()); + // No need for EmitBranch(): no immediate offset needs fixing. + Emit(at, BR | Rn(xn)); +} + + +void Assembler::blr(Instruction* at, const Register& xn) { + VIXL_ASSERT(xn.Is64Bits()); + // No need for EmitBranch(): no immediate offset needs fixing. + Emit(at, BLR | Rn(xn)); +} + + +void Assembler::bl(int imm26, const LabelDoc& doc) { + EmitBranch(BL | ImmUncondBranch(imm26), doc); +} + + +void Assembler::bl(Instruction* at, int imm26) { + EmitBranch(at, BL | ImmUncondBranch(imm26)); +} + + +void Assembler::bl(Label* label) { + // Encode the relative offset from the inserted branch to the label. + LabelDoc doc = refLabel(label); + return bl(LinkAndGetInstructionOffsetTo(nextInstrOffset(), UncondBranchRangeType, label), doc); +} + + +void Assembler::cbz(const Register& rt, int imm19, const LabelDoc& doc) { + EmitBranch(SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt), doc); +} + + +void Assembler::cbz(Instruction* at, const Register& rt, int imm19) { + EmitBranch(at, SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt)); +} + + +void Assembler::cbz(const Register& rt, Label* label) { + // Encode the relative offset from the inserted branch to the label. + LabelDoc doc = refLabel(label); + return cbz(rt, LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), doc); +} + + +void Assembler::cbnz(const Register& rt, int imm19, const LabelDoc& doc) { + EmitBranch(SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt), doc); +} + + +void Assembler::cbnz(Instruction* at, const Register& rt, int imm19) { + EmitBranch(at, SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt)); +} + + +void Assembler::cbnz(const Register& rt, Label* label) { + // Encode the relative offset from the inserted branch to the label. + LabelDoc doc = refLabel(label); + return cbnz(rt, LinkAndGetInstructionOffsetTo(nextInstrOffset(), CondBranchRangeType, label), doc); +} + + +void Assembler::tbz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc) { + VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize))); + EmitBranch(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt), doc); +} + + +void Assembler::tbz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14) { + VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize))); + EmitBranch(at, TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt)); +} + + +void Assembler::tbz(const Register& rt, unsigned bit_pos, Label* label) { + // Encode the relative offset from the inserted branch to the label. + LabelDoc doc = refLabel(label); + return tbz(rt, bit_pos, LinkAndGetInstructionOffsetTo(nextInstrOffset(), TestBranchRangeType, label), doc); +} + + +void Assembler::tbnz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc) { + VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize))); + EmitBranch(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt), doc); +} + + +void Assembler::tbnz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14) { + VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize))); + EmitBranch(at, TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt)); +} + + +void Assembler::tbnz(const Register& rt, unsigned bit_pos, Label* label) { + // Encode the relative offset from the inserted branch to the label. + LabelDoc doc = refLabel(label); + return tbnz(rt, bit_pos, LinkAndGetInstructionOffsetTo(nextInstrOffset(), TestBranchRangeType, label), doc); +} + + +void Assembler::adr(const Register& rd, int imm21, const LabelDoc& doc) { + VIXL_ASSERT(rd.Is64Bits()); + EmitBranch(ADR | ImmPCRelAddress(imm21) | Rd(rd), doc); +} + + +void Assembler::adr(Instruction* at, const Register& rd, int imm21) { + VIXL_ASSERT(rd.Is64Bits()); + EmitBranch(at, ADR | ImmPCRelAddress(imm21) | Rd(rd)); +} + + +void Assembler::adr(const Register& rd, Label* label) { + // Encode the relative offset from the inserted adr to the label. + LabelDoc doc = refLabel(label); + return adr(rd, LinkAndGetByteOffsetTo(nextInstrOffset(), label), doc); +} + + +void Assembler::adrp(const Register& rd, int imm21, const LabelDoc& doc) { + VIXL_ASSERT(rd.Is64Bits()); + EmitBranch(ADRP | ImmPCRelAddress(imm21) | Rd(rd), doc); +} + + +void Assembler::adrp(Instruction* at, const Register& rd, int imm21) { + VIXL_ASSERT(rd.Is64Bits()); + EmitBranch(at, ADRP | ImmPCRelAddress(imm21) | Rd(rd)); +} + + +void Assembler::adrp(const Register& rd, Label* label) { + VIXL_ASSERT(AllowPageOffsetDependentCode()); + // Encode the relative offset from the inserted adr to the label. + LabelDoc doc = refLabel(label); + return adrp(rd, LinkAndGetPageOffsetTo(nextInstrOffset(), label), doc); +} + + +BufferOffset Assembler::ands(const Register& rd, const Register& rn, const Operand& operand) { + return Logical(rd, rn, operand, ANDS); +} + + +BufferOffset Assembler::tst(const Register& rn, const Operand& operand) { + return ands(AppropriateZeroRegFor(rn), rn, operand); +} + + +void Assembler::ldr(Instruction* at, const CPURegister& rt, int imm19) { + LoadLiteralOp op = LoadLiteralOpFor(rt); + Emit(at, op | ImmLLiteral(imm19) | Rt(rt)); +} + + +BufferOffset Assembler::hint(SystemHint code) { + return Emit(HINT | ImmHint(code)); +} + + +void Assembler::hint(Instruction* at, SystemHint code) { + Emit(at, HINT | ImmHint(code)); +} + + +void Assembler::svc(Instruction* at, int code) { + VIXL_ASSERT(IsUint16(code)); + Emit(at, SVC | ImmException(code)); +} + + +void Assembler::nop(Instruction* at) { + hint(at, NOP); +} + + +void Assembler::csdb(Instruction* at) { + hint(at, CSDB); +} + + +BufferOffset Assembler::Logical(const Register& rd, const Register& rn, + const Operand& operand, LogicalOp op) +{ + VIXL_ASSERT(rd.size() == rn.size()); + if (operand.IsImmediate()) { + int64_t immediate = operand.immediate(); + unsigned reg_size = rd.size(); + + VIXL_ASSERT(immediate != 0); + VIXL_ASSERT(immediate != -1); + VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate)); + + // If the operation is NOT, invert the operation and immediate. + if ((op & NOT) == NOT) { + op = static_cast<LogicalOp>(op & ~NOT); + immediate = rd.Is64Bits() ? ~immediate : (~immediate & kWRegMask); + } + + unsigned n, imm_s, imm_r; + if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) { + // Immediate can be encoded in the instruction. + return LogicalImmediate(rd, rn, n, imm_s, imm_r, op); + } else { + // This case is handled in the macro assembler. + VIXL_UNREACHABLE(); + } + } else { + VIXL_ASSERT(operand.IsShiftedRegister()); + VIXL_ASSERT(operand.reg().size() == rd.size()); + Instr dp_op = static_cast<Instr>(op | LogicalShiftedFixed); + return DataProcShiftedRegister(rd, rn, operand, LeaveFlags, dp_op); + } +} + + +BufferOffset Assembler::LogicalImmediate(const Register& rd, const Register& rn, + unsigned n, unsigned imm_s, unsigned imm_r, LogicalOp op) +{ + unsigned reg_size = rd.size(); + Instr dest_reg = (op == ANDS) ? Rd(rd) : RdSP(rd); + return Emit(SF(rd) | LogicalImmediateFixed | op | BitN(n, reg_size) | + ImmSetBits(imm_s, reg_size) | ImmRotate(imm_r, reg_size) | dest_reg | Rn(rn)); +} + + +BufferOffset Assembler::DataProcShiftedRegister(const Register& rd, const Register& rn, + const Operand& operand, FlagsUpdate S, Instr op) +{ + VIXL_ASSERT(operand.IsShiftedRegister()); + VIXL_ASSERT(rn.Is64Bits() || (rn.Is32Bits() && IsUint5(operand.shift_amount()))); + return Emit(SF(rd) | op | Flags(S) | + ShiftDP(operand.shift()) | ImmDPShift(operand.shift_amount()) | + Rm(operand.reg()) | Rn(rn) | Rd(rd)); +} + + +void MozBaseAssembler::InsertIndexIntoTag(uint8_t* load, uint32_t index) { + // Store the js::jit::PoolEntry index into the instruction. + // finishPool() will walk over all literal load instructions + // and use PatchConstantPoolLoad() to patch to the final relative offset. + *((uint32_t*)load) |= Assembler::ImmLLiteral(index); +} + + +bool MozBaseAssembler::PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr) { + Instruction* load = reinterpret_cast<Instruction*>(loadAddr); + + // The load currently contains the js::jit::PoolEntry's index, + // as written by InsertIndexIntoTag(). + uint32_t index = load->ImmLLiteral(); + + // Each entry in the literal pool is uint32_t-sized, + // but literals may use multiple entries. + uint32_t* constPool = reinterpret_cast<uint32_t*>(constPoolAddr); + Instruction* source = reinterpret_cast<Instruction*>(&constPool[index]); + + load->SetImmLLiteral(source); + return false; // Nothing uses the return value. +} + +void +MozBaseAssembler::PatchShortRangeBranchToVeneer(ARMBuffer* buffer, unsigned rangeIdx, + BufferOffset deadline, BufferOffset veneer) +{ + // Reconstruct the position of the branch from (rangeIdx, deadline). + vixl::ImmBranchRangeType branchRange = static_cast<vixl::ImmBranchRangeType>(rangeIdx); + BufferOffset branch(deadline.getOffset() - Instruction::ImmBranchMaxForwardOffset(branchRange)); + Instruction *branchInst = buffer->getInst(branch); + Instruction *veneerInst = buffer->getInst(veneer); + + // Verify that the branch range matches what's encoded. + MOZ_ASSERT(Instruction::ImmBranchTypeToRange(branchInst->BranchType()) == branchRange); + + // We want to insert veneer after branch in the linked list of instructions + // that use the same unbound label. + // The veneer should be an unconditional branch. + ptrdiff_t nextElemOffset = branchInst->ImmPCRawOffset(); + + // If offset is 0, this is the end of the linked list. + if (nextElemOffset != kEndOfLabelUseList) { + // Make the offset relative to veneer so it targets the same instruction + // as branchInst. + nextElemOffset *= kInstructionSize; + nextElemOffset += branch.getOffset() - veneer.getOffset(); + nextElemOffset /= kInstructionSize; + } + Assembler::b(veneerInst, nextElemOffset); + + // Now point branchInst at veneer. See also SetNextLink() above. + branchInst->SetImmPCRawOffset(EncodeOffset(branch, veneer)); +} + +struct PoolHeader { + uint32_t data; + + struct Header { + // The size should take into account the pool header. + // The size is in units of Instruction (4bytes), not byte. + union { + struct { + uint32_t size : 15; + + // "Natural" guards are part of the normal instruction stream, + // while "non-natural" guards are inserted for the sole purpose + // of skipping around a pool. + uint32_t isNatural : 1; + uint32_t ONES : 16; + }; + uint32_t data; + }; + + Header(int size_, bool isNatural_) + : size(size_), + isNatural(isNatural_), + ONES(0xffff) + { } + + Header(uint32_t data) + : data(data) + { + VIXL_STATIC_ASSERT(sizeof(Header) == sizeof(uint32_t)); + VIXL_ASSERT(ONES == 0xffff); + } + + uint32_t raw() const { + VIXL_STATIC_ASSERT(sizeof(Header) == sizeof(uint32_t)); + return data; + } + }; + + PoolHeader(int size_, bool isNatural_) + : data(Header(size_, isNatural_).raw()) + { } + + uint32_t size() const { + Header tmp(data); + return tmp.size; + } + + uint32_t isNatural() const { + Header tmp(data); + return tmp.isNatural; + } +}; + + +void MozBaseAssembler::WritePoolHeader(uint8_t* start, js::jit::Pool* p, bool isNatural) { + static_assert(sizeof(PoolHeader) == 4); + + // Get the total size of the pool. + const uintptr_t totalPoolSize = sizeof(PoolHeader) + p->getPoolSize(); + const uintptr_t totalPoolInstructions = totalPoolSize / kInstructionSize; + + VIXL_ASSERT((totalPoolSize & 0x3) == 0); + VIXL_ASSERT(totalPoolInstructions < (1 << 15)); + + PoolHeader header(totalPoolInstructions, isNatural); + *(PoolHeader*)start = header; +} + + +void MozBaseAssembler::WritePoolFooter(uint8_t* start, js::jit::Pool* p, bool isNatural) { + return; +} + + +void MozBaseAssembler::WritePoolGuard(BufferOffset branch, Instruction* inst, BufferOffset dest) { + int byteOffset = dest.getOffset() - branch.getOffset(); + VIXL_ASSERT(byteOffset % kInstructionSize == 0); + + int instOffset = byteOffset >> kInstructionSizeLog2; + Assembler::b(inst, instOffset); +} + + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h new file mode 100644 index 0000000000..5d12f81bb1 --- /dev/null +++ b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h @@ -0,0 +1,356 @@ +// Copyright 2013, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef jit_arm64_vixl_MozBaseAssembler_vixl_h +#define jit_arm64_vixl_MozBaseAssembler_vixl_h + + +#include "mozilla/Assertions.h" // MOZ_ASSERT +#include "mozilla/Sprintf.h" // SprintfLiteral + +#include <stddef.h> // size_t +#include <stdint.h> // uint8_t, uint32_t +#include <string.h> // strstr + +#include "jit/arm64/vixl/Constants-vixl.h" // vixl::{HINT, NOP, ImmHint_offset} +#include "jit/arm64/vixl/Globals-vixl.h" // VIXL_ASSERT +#include "jit/arm64/vixl/Instructions-vixl.h" // vixl::{Instruction, NumShortBranchRangeTypes, Instr, ImmBranchRangeType} + +#include "jit/Label.h" // jit::Label +#include "jit/shared/Assembler-shared.h" // jit::AssemblerShared +#include "jit/shared/Disassembler-shared.h" // jit::DisassemblerSpew +#include "jit/shared/IonAssemblerBuffer.h" // jit::BufferOffset +#include "jit/shared/IonAssemblerBufferWithConstantPools.h" // jit::AssemblerBufferWithConstantPools + +namespace vixl { + + +using js::jit::BufferOffset; +using js::jit::DisassemblerSpew; +using js::jit::Label; + +using LabelDoc = DisassemblerSpew::LabelDoc; +using LiteralDoc = DisassemblerSpew::LiteralDoc; + +#ifdef JS_DISASM_ARM64 +void DisassembleInstruction(char* buffer, size_t bufsize, const Instruction* instr); +#endif + +class MozBaseAssembler; +typedef js::jit::AssemblerBufferWithConstantPools<1024, 4, Instruction, MozBaseAssembler, + NumShortBranchRangeTypes> ARMBuffer; + +// Base class for vixl::Assembler, for isolating Moz-specific changes to VIXL. +class MozBaseAssembler : public js::jit::AssemblerShared { + // Buffer initialization constants. + static const unsigned BufferGuardSize = 1; + static const unsigned BufferHeaderSize = 1; + static const size_t BufferCodeAlignment = 8; + static const size_t BufferMaxPoolOffset = 1024; + static const unsigned BufferPCBias = 0; + static const uint32_t BufferAlignmentFillInstruction = HINT | (NOP << ImmHint_offset); + static const uint32_t BufferNopFillInstruction = HINT | (NOP << ImmHint_offset); + static const unsigned BufferNumDebugNopsToInsert = 0; + +#ifdef JS_DISASM_ARM64 + static constexpr const char* const InstrIndent = " "; + static constexpr const char* const LabelIndent = " "; + static constexpr const char* const TargetIndent = " "; +#endif + + public: + MozBaseAssembler() + : armbuffer_(BufferGuardSize, + BufferHeaderSize, + BufferCodeAlignment, + BufferMaxPoolOffset, + BufferPCBias, + BufferAlignmentFillInstruction, + BufferNopFillInstruction, + BufferNumDebugNopsToInsert) + { +#ifdef JS_DISASM_ARM64 + spew_.setLabelIndent(LabelIndent); + spew_.setTargetIndent(TargetIndent); +#endif +} + ~MozBaseAssembler() + { +#ifdef JS_DISASM_ARM64 + spew_.spewOrphans(); +#endif + } + + public: + // Return the Instruction at a given byte offset. + Instruction* getInstructionAt(BufferOffset offset) { + return armbuffer_.getInst(offset); + } + + // Return the byte offset of a bound label. + template <typename T> + inline T GetLabelByteOffset(const js::jit::Label* label) { + VIXL_ASSERT(label->bound()); + static_assert(sizeof(T) >= sizeof(uint32_t)); + return reinterpret_cast<T>(label->offset()); + } + + protected: + // Get the buffer offset of the next inserted instruction. This may flush + // constant pools. + BufferOffset nextInstrOffset() { + return armbuffer_.nextInstrOffset(); + } + + // Get the next usable buffer offset. Note that a constant pool may be placed + // here before the next instruction is emitted. + BufferOffset nextOffset() const { + return armbuffer_.nextOffset(); + } + + // Allocate memory in the buffer by forwarding to armbuffer_. + // Propagate OOM errors. + BufferOffset allocLiteralLoadEntry(size_t numInst, unsigned numPoolEntries, + uint8_t* inst, uint8_t* data, + const LiteralDoc& doc = LiteralDoc(), + ARMBuffer::PoolEntry* pe = nullptr) + { + MOZ_ASSERT(inst); + MOZ_ASSERT(numInst == 1); /* If not, then fix disassembly */ + BufferOffset offset = armbuffer_.allocEntry(numInst, numPoolEntries, inst, + data, pe); + propagateOOM(offset.assigned()); +#ifdef JS_DISASM_ARM64 + Instruction* instruction = armbuffer_.getInstOrNull(offset); + if (instruction) + spewLiteralLoad(offset, + reinterpret_cast<vixl::Instruction*>(instruction), doc); +#endif + return offset; + } + +#ifdef JS_DISASM_ARM64 + DisassemblerSpew spew_; + + void spew(BufferOffset offs, const vixl::Instruction* instr) { + if (spew_.isDisabled() || !instr) + return; + + char buffer[2048]; + DisassembleInstruction(buffer, sizeof(buffer), instr); + spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s", + (uint32_t)offs.getOffset(), + instr->InstructionBits(), InstrIndent, buffer); + } + + void spewBranch(BufferOffset offs, + const vixl::Instruction* instr, const LabelDoc& target) { + if (spew_.isDisabled() || !instr) + return; + + char buffer[2048]; + DisassembleInstruction(buffer, sizeof(buffer), instr); + + char labelBuf[128]; + labelBuf[0] = 0; + + bool hasTarget = target.valid; + if (!hasTarget) + SprintfLiteral(labelBuf, "-> (link-time target)"); + + if (instr->IsImmBranch() && hasTarget) { + // The target information in the instruction is likely garbage, so remove it. + // The target label will in any case be printed if we have it. + // + // The format of the instruction disassembly is /.*#.*/. Strip the # and later. + size_t i; + const size_t BUFLEN = sizeof(buffer)-1; + for ( i=0 ; i < BUFLEN && buffer[i] && buffer[i] != '#' ; i++ ) + ; + buffer[i] = 0; + + SprintfLiteral(labelBuf, "-> %d%s", target.doc, !target.bound ? "f" : ""); + hasTarget = false; + } + + spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s%s", + (uint32_t)offs.getOffset(), + instr->InstructionBits(), InstrIndent, buffer, labelBuf); + + if (hasTarget) + spew_.spewRef(target); + } + + void spewLiteralLoad(BufferOffset offs, + const vixl::Instruction* instr, const LiteralDoc& doc) { + if (spew_.isDisabled() || !instr) + return; + + char buffer[2048]; + DisassembleInstruction(buffer, sizeof(buffer), instr); + + char litbuf[2048]; + spew_.formatLiteral(doc, litbuf, sizeof(litbuf)); + + // The instruction will have the form /^.*pc\+0/ followed by junk that we + // don't need; try to strip it. + + char *probe = strstr(buffer, "pc+0"); + if (probe) + *(probe + 4) = 0; + spew_.spew("%06" PRIx32 " %08" PRIx32 "%s%s ; .const %s", + (uint32_t)offs.getOffset(), + instr->InstructionBits(), InstrIndent, buffer, litbuf); + } + + LabelDoc refLabel(Label* label) { + if (spew_.isDisabled()) + return LabelDoc(); + + return spew_.refLabel(label); + } +#else + LabelDoc refLabel(js::jit::Label*) { + return LabelDoc(); + } +#endif + + // Emit the instruction, returning its offset. + BufferOffset Emit(Instr instruction, bool isBranch = false) { + static_assert(sizeof(instruction) == kInstructionSize); + // TODO: isBranch is obsolete and should be removed. + (void)isBranch; + MOZ_ASSERT(hasCreator()); + BufferOffset offs = armbuffer_.putInt(*(uint32_t*)(&instruction)); +#ifdef JS_DISASM_ARM64 + if (!isBranch) + spew(offs, armbuffer_.getInstOrNull(offs)); +#endif + return offs; + } + + BufferOffset EmitBranch(Instr instruction, const LabelDoc& doc) { + BufferOffset offs = Emit(instruction, true); +#ifdef JS_DISASM_ARM64 + spewBranch(offs, armbuffer_.getInstOrNull(offs), doc); +#endif + return offs; + } + + public: + // Emit the instruction at |at|. + static void Emit(Instruction* at, Instr instruction) { + static_assert(sizeof(instruction) == kInstructionSize); + memcpy(at, &instruction, sizeof(instruction)); + } + + static void EmitBranch(Instruction* at, Instr instruction) { + // TODO: Assert that the buffer already has the instruction marked as a branch. + Emit(at, instruction); + } + + // Emit data inline in the instruction stream. + BufferOffset EmitData(void const * data, unsigned size) { + VIXL_ASSERT(size % 4 == 0); + MOZ_ASSERT(hasCreator()); + return armbuffer_.allocEntry(size / sizeof(uint32_t), 0, (uint8_t*)(data), nullptr); + } + + public: + // Size of the code generated in bytes, including pools. + size_t SizeOfCodeGenerated() const { + return armbuffer_.size(); + } + + // Move the pool into the instruction stream. + void flushBuffer() { + armbuffer_.flushPool(); + } + + // Inhibit pool flushing for the given number of instructions. + // Generating more than |maxInst| instructions in a no-pool region + // triggers an assertion within the ARMBuffer. + // Does not nest. + void enterNoPool(size_t maxInst) { + armbuffer_.enterNoPool(maxInst); + } + + // Marks the end of a no-pool region. + void leaveNoPool() { + armbuffer_.leaveNoPool(); + } + + void enterNoNops() { + armbuffer_.enterNoNops(); + } + void leaveNoNops() { + armbuffer_.leaveNoNops(); + } + + public: + // Static interface used by IonAssemblerBufferWithConstantPools. + static void InsertIndexIntoTag(uint8_t* load, uint32_t index); + static bool PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr); + static void PatchShortRangeBranchToVeneer(ARMBuffer*, unsigned rangeIdx, BufferOffset deadline, + BufferOffset veneer); + static uint32_t PlaceConstantPoolBarrier(int offset); + + static void WritePoolHeader(uint8_t* start, js::jit::Pool* p, bool isNatural); + static void WritePoolFooter(uint8_t* start, js::jit::Pool* p, bool isNatural); + static void WritePoolGuard(BufferOffset branch, Instruction* inst, BufferOffset dest); + + protected: + // Functions for managing Labels and linked lists of Label uses. + + // Get the next Label user in the linked list of Label uses. + // Return an unassigned BufferOffset when the end of the list is reached. + BufferOffset NextLink(BufferOffset cur); + + // Patch the instruction at cur to link to the instruction at next. + void SetNextLink(BufferOffset cur, BufferOffset next); + + // Link the current (not-yet-emitted) instruction to the specified label, + // then return a raw offset to be encoded in the instruction. + ptrdiff_t LinkAndGetByteOffsetTo(BufferOffset branch, js::jit::Label* label); + ptrdiff_t LinkAndGetInstructionOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange, + js::jit::Label* label); + ptrdiff_t LinkAndGetPageOffsetTo(BufferOffset branch, js::jit::Label* label); + + // A common implementation for the LinkAndGet<Type>OffsetTo helpers. + ptrdiff_t LinkAndGetOffsetTo(BufferOffset branch, ImmBranchRangeType branchRange, + unsigned elementSizeBits, js::jit::Label* label); + + protected: + // The buffer into which code and relocation info are generated. + ARMBuffer armbuffer_; +}; + + +} // namespace vixl + + +#endif // jit_arm64_vixl_MozBaseAssembler_vixl_h + diff --git a/js/src/jit/arm64/vixl/MozCachingDecoder.h b/js/src/jit/arm64/vixl/MozCachingDecoder.h new file mode 100644 index 0000000000..5b4cfc17d5 --- /dev/null +++ b/js/src/jit/arm64/vixl/MozCachingDecoder.h @@ -0,0 +1,179 @@ +#ifndef VIXL_A64_MOZ_CACHING_DECODER_A64_H_ +#define VIXL_A64_MOZ_CACHING_DECODER_A64_H_ + +#include "mozilla/HashTable.h" + +#include "jit/arm64/vixl/Decoder-vixl.h" +#include "js/AllocPolicy.h" + +#ifdef DEBUG +#define JS_CACHE_SIMULATOR_ARM64 1 +#endif + +#ifdef JS_CACHE_SIMULATOR_ARM64 +namespace vixl { + +// This enumeration list the different kind of instructions which can be +// decoded. These kind correspond to the set of visitor defined by the default +// Decoder. +enum class InstDecodedKind : uint8_t { + NotDecodedYet, +#define DECLARE(E) E, + VISITOR_LIST(DECLARE) +#undef DECLARE +}; + +// A SinglePageDecodeCache is used to store the decoded kind of all instructions +// in an executable page of code. Each time an instruction is decoded, its +// decoded kind is recorded in this structure. The previous instruction value is +// also recorded in this structure when using a debug build. +// +// The next time the same offset is visited, the instruction would be decoded +// using the previously recorded decode kind. It is also compared against the +// previously recorded bits of the instruction to check for potential missing +// cache invalidations, in debug builds. +// +// This structure stores the equivalent of a single page of code to have better +// memory locality when using the simulator. As opposed to having a hash-table +// for all instructions. However a hash-table is used by the CachingDecoder to +// map the prefixes of page addresses to these SinglePageDecodeCaches. +class SinglePageDecodeCache { + public: + static const uintptr_t PageSize = 1 << 12; + static const uintptr_t PageMask = PageSize - 1; + static const uintptr_t InstSize = vixl::kInstructionSize; + static const uintptr_t InstMask = InstSize - 1; + static const uintptr_t InstPerPage = PageSize / InstSize; + + SinglePageDecodeCache(const Instruction* inst) + : pageStart_(PageStart(inst)) + { + memset(&decodeCache_, int(InstDecodedKind::NotDecodedYet), sizeof(decodeCache_)); + } + // Compute the start address of the page which contains this instruction. + static uintptr_t PageStart(const Instruction* inst) { + return uintptr_t(inst) & ~PageMask; + } + // Returns whether the instruction decoded kind is stored in this + // SinglePageDecodeCache. + bool contains(const Instruction* inst) { + return pageStart_ == PageStart(inst); + } + void clearDecode(const Instruction* inst) { + uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize; + decodeCache_[offset] = InstDecodedKind::NotDecodedYet; + } + InstDecodedKind* decodePtr(const Instruction* inst) { + uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize; + uint32_t instValue = *reinterpret_cast<const uint32_t*>(inst); + instCache_[offset] = instValue; + return &decodeCache_[offset]; + } + InstDecodedKind decode(const Instruction* inst) const { + uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize; + InstDecodedKind val = decodeCache_[offset]; + uint32_t instValue = *reinterpret_cast<const uint32_t*>(inst); + MOZ_ASSERT_IF(val != InstDecodedKind::NotDecodedYet, + instCache_[offset] == instValue); + return val; + } + + private: + // Record the address at which the corresponding code page starts. + const uintptr_t pageStart_; + + // Cache what instruction got decoded previously, in order to assert if we see + // any stale instructions after. + uint32_t instCache_[InstPerPage]; + + // Cache the decoding of the instruction such that we can skip the decoding + // part. + InstDecodedKind decodeCache_[InstPerPage]; +}; + +// A DecoderVisitor which will record which visitor function should be called +// the next time we want to decode the same instruction. +class CachingDecoderVisitor : public DecoderVisitor { + public: + CachingDecoderVisitor() = default; + virtual ~CachingDecoderVisitor() {} + +#define DECLARE(A) virtual void Visit##A(const Instruction* instr) { \ + if (last_) { \ + MOZ_ASSERT(*last_ == InstDecodedKind::NotDecodedYet); \ + *last_ = InstDecodedKind::A; \ + last_ = nullptr; \ + } \ + }; + + VISITOR_LIST(DECLARE) +#undef DECLARE + + void setDecodePtr(InstDecodedKind* ptr) { + last_ = ptr; + } + + private: + InstDecodedKind* last_; +}; + +// The Caching decoder works by extending the default vixl Decoder class. It +// extends it by overloading the Decode function. +// +// The overloaded Decode function checks whether the instruction given as +// argument got decoded before or since it got invalidated. If it was not +// previously decoded, the value of the instruction is recorded as well as the +// kind of instruction. Otherwise, the value of the instruction is checked +// against the previously recorded value and the instruction kind is used to +// skip the decoding visitor and resume the execution of instruction. +// +// The caching decoder stores the equivalent of a page of executable code in a +// hash-table. Each SinglePageDecodeCache stores an array of decoded kind as +// well as the value of the previously decoded instruction. +// +// When testing if an instruction was decoded before, we check if the address of +// the instruction is contained in the last SinglePageDecodeCache. If it is not, +// then the hash-table entry is queried and created if necessary, and the last +// SinglePageDecodeCache is updated. Then, the last SinglePageDecodeCache +// necessary contains the decoded kind of the instruction given as argument. +// +// The caching decoder add an extra function for flushing the cache, which is in +// charge of clearing the decoded kind of instruction in the range of addresses +// given as argument. This is indirectly called by +// CPU::EnsureIAndDCacheCoherency. +class CachingDecoder : public Decoder { + using ICacheMap = mozilla::HashMap<uintptr_t, SinglePageDecodeCache*>; + public: + CachingDecoder() + : lastPage_(nullptr) + { + PrependVisitor(&cachingDecoder_); + } + ~CachingDecoder() { + RemoveVisitor(&cachingDecoder_); + } + + void Decode(const Instruction* instr); + void Decode(Instruction* instr) { + Decode(const_cast<const Instruction*>(instr)); + } + + void FlushICache(void* start, size_t size); + + private: + // Record the type of the decoded instruction, to avoid decoding it a second + // time the next time we execute it. + CachingDecoderVisitor cachingDecoder_; + + // Store the mapping of Instruction pointer to the corresponding + // SinglePageDecodeCache. + ICacheMap iCache_; + + // Record the last SinglePageDecodeCache seen, such that we can quickly access + // it for the next instruction. + SinglePageDecodeCache* lastPage_; +}; + +} +#endif // !JS_CACHE_SIMULATOR_ARM64 +#endif // !VIXL_A64_MOZ_CACHING_DECODER_A64_H_ diff --git a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp new file mode 100644 index 0000000000..909cc590ae --- /dev/null +++ b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp @@ -0,0 +1,226 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Cpu-vixl.h" +#include "jit/arm64/vixl/Simulator-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" +#include "util/WindowsWrapper.h" + +#if defined(XP_DARWIN) +# include <libkern/OSCacheControl.h> +#endif + +namespace vixl { + +// Currently computes I and D cache line size. +void CPU::SetUp() { + uint32_t cache_type_register = GetCacheType(); + + // The cache type register holds information about the caches, including I + // D caches line size. + static const int kDCacheLineSizeShift = 16; + static const int kICacheLineSizeShift = 0; + static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift; + static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift; + + // The cache type register holds the size of the I and D caches in words as + // a power of two. + uint32_t dcache_line_size_power_of_two = + (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift; + uint32_t icache_line_size_power_of_two = + (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift; + + dcache_line_size_ = 4 << dcache_line_size_power_of_two; + icache_line_size_ = 4 << icache_line_size_power_of_two; + + // Bug 1521158 suggests that having CPU with different cache line sizes could + // cause issues as we would only invalidate half of the cache line of we + // invalidate every 128 bytes, but other little cores have a different stride + // such as 64 bytes. To be conservative, we will try reducing the stride to 32 + // bytes, which should be smaller than any known cache line. + const uint32_t conservative_line_size = 32; + dcache_line_size_ = std::min(dcache_line_size_, conservative_line_size); + icache_line_size_ = std::min(icache_line_size_, conservative_line_size); +} + + +uint32_t CPU::GetCacheType() { +#if defined(__aarch64__) && (defined(__linux__) || defined(__android__)) + uint64_t cache_type_register; + // Copy the content of the cache type register to a core register. + __asm__ __volatile__ ("mrs %[ctr], ctr_el0" // NOLINT + : [ctr] "=r" (cache_type_register)); + VIXL_ASSERT(IsUint32(cache_type_register)); + return static_cast<uint32_t>(cache_type_register); +#else + // This will lead to a cache with 1 byte long lines, which is fine since + // neither EnsureIAndDCacheCoherency nor the simulator will need this + // information. + return 0; +#endif +} + +void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) { +#if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64) + // This code attempts to emulate what the following assembly sequence is + // doing, which is sending the information to all cores that some cache line + // have to be invalidated and invalidating them only on the current core. + // + // This is done by recording the current range to be flushed to all + // simulators, then if there is a simulator associated with the current + // thread, applying all flushed ranges as the "isb" instruction would do. + // + // As we have no control over the CPU cores used by the code generator and the + // execution threads, this code assumes that each thread runs on its own core. + // + // See Bug 1529933 for more detailed explanation of this issue. + using js::jit::SimulatorProcess; + js::jit::AutoLockSimulatorCache alsc; + if (length > 0) { + SimulatorProcess::recordICacheFlush(address, length); + } + Simulator* sim = vixl::Simulator::Current(); + if (sim) { + sim->FlushICache(); + } +#elif defined(_MSC_VER) && defined(_M_ARM64) + FlushInstructionCache(GetCurrentProcess(), address, length); +#elif defined(XP_DARWIN) + sys_icache_invalidate(address, length); +#elif defined(__aarch64__) && (defined(__linux__) || defined(__android__)) + // Implement the cache synchronisation for all targets where AArch64 is the + // host, even if we're building the simulator for an AAarch64 host. This + // allows for cases where the user wants to simulate code as well as run it + // natively. + + if (length == 0) { + return; + } + + // The code below assumes user space cache operations are allowed. + + // Work out the line sizes for each cache, and use them to determine the + // start addresses. + uintptr_t start = reinterpret_cast<uintptr_t>(address); + uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_); + uintptr_t isize = static_cast<uintptr_t>(icache_line_size_); + uintptr_t dline = start & ~(dsize - 1); + uintptr_t iline = start & ~(isize - 1); + + // Cache line sizes are always a power of 2. + VIXL_ASSERT(IsPowerOf2(dsize)); + VIXL_ASSERT(IsPowerOf2(isize)); + uintptr_t end = start + length; + + do { + __asm__ __volatile__ ( + // Clean each line of the D cache containing the target data. + // + // dc : Data Cache maintenance + // c : Clean + // i : Invalidate + // va : by (Virtual) Address + // c : to the point of Coherency + // Original implementation used cvau, but changed to civac due to + // errata on Cortex-A53 819472, 826319, 827319 and 824069. + // See ARM DDI 0406B page B2-12 for more information. + // + " dc civac, %[dline]\n" + : + : [dline] "r" (dline) + // This code does not write to memory, but the "memory" dependency + // prevents GCC from reordering the code. + : "memory"); + dline += dsize; + } while (dline < end); + + __asm__ __volatile__ ( + // Make sure that the data cache operations (above) complete before the + // instruction cache operations (below). + // + // dsb : Data Synchronisation Barrier + // ish : Inner SHareable domain + // + // The point of unification for an Inner Shareable shareability domain is + // the point by which the instruction and data caches of all the processors + // in that Inner Shareable shareability domain are guaranteed to see the + // same copy of a memory location. See ARM DDI 0406B page B2-12 for more + // information. + " dsb ish\n" + : : : "memory"); + + do { + __asm__ __volatile__ ( + // Invalidate each line of the I cache containing the target data. + // + // ic : Instruction Cache maintenance + // i : Invalidate + // va : by Address + // u : to the point of Unification + " ic ivau, %[iline]\n" + : + : [iline] "r" (iline) + : "memory"); + iline += isize; + } while (iline < end); + + __asm__ __volatile__( + // Make sure that the instruction cache operations (above) take effect + // before the isb (below). + " dsb ish\n" + + // Ensure that any instructions already in the pipeline are discarded and + // reloaded from the new data. + // isb : Instruction Synchronisation Barrier + " isb\n" + : + : + : "memory"); +#else + // If the host isn't AArch64, we must be using the simulator, so this function + // doesn't have to do anything. + USE(address, length); +#endif +} + +void CPU::FlushExecutionContext() { +#if defined(JS_SIMULATOR_ARM64) && defined(JS_CACHE_SIMULATOR_ARM64) + // Performing an 'isb' will ensure the current core instruction pipeline is + // synchronized with an icache flush executed by another core. + using js::jit::SimulatorProcess; + js::jit::AutoLockSimulatorCache alsc; + Simulator* sim = vixl::Simulator::Current(); + if (sim) { + sim->FlushICache(); + } +#elif defined(__aarch64__) + // Ensure that any instructions already in the pipeline are discarded and + // reloaded from the icache. + __asm__ __volatile__("isb\n" : : : "memory"); +#endif +} + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp b/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp new file mode 100644 index 0000000000..398f864493 --- /dev/null +++ b/js/src/jit/arm64/vixl/MozInstructions-vixl.cpp @@ -0,0 +1,211 @@ +// Copyright 2013, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/Architecture-arm64.h" +#include "jit/arm64/vixl/Assembler-vixl.h" +#include "jit/arm64/vixl/Instructions-vixl.h" + +namespace vixl { + +bool Instruction::IsUncondB() const { + return Mask(UnconditionalBranchMask) == (UnconditionalBranchFixed | B); +} + + +bool Instruction::IsCondB() const { + return Mask(ConditionalBranchMask) == (ConditionalBranchFixed | B_cond); +} + + +bool Instruction::IsBL() const { + return Mask(UnconditionalBranchMask) == (UnconditionalBranchFixed | BL); +} + + +bool Instruction::IsBR() const { + return Mask(UnconditionalBranchToRegisterMask) == (UnconditionalBranchToRegisterFixed | BR); +} + + +bool Instruction::IsBLR() const { + return Mask(UnconditionalBranchToRegisterMask) == (UnconditionalBranchToRegisterFixed | BLR); +} + + +bool Instruction::IsTBZ() const { + return Mask(TestBranchMask) == TBZ; +} + + +bool Instruction::IsTBNZ() const { + return Mask(TestBranchMask) == TBNZ; +} + + +bool Instruction::IsCBZ() const { + return Mask(CompareBranchMask) == CBZ_w || Mask(CompareBranchMask) == CBZ_x; +} + + +bool Instruction::IsCBNZ() const { + return Mask(CompareBranchMask) == CBNZ_w || Mask(CompareBranchMask) == CBNZ_x; +} + + +bool Instruction::IsLDR() const { + return Mask(LoadLiteralMask) == LDR_x_lit; +} + + +bool Instruction::IsNOP() const { + return Mask(SystemHintMask) == HINT && ImmHint() == NOP; +} + + +bool Instruction::IsCSDB() const { + return Mask(SystemHintMask) == HINT && ImmHint() == CSDB; +} + + +bool Instruction::IsADR() const { + return Mask(PCRelAddressingMask) == ADR; +} + + +bool Instruction::IsADRP() const { + return Mask(PCRelAddressingMask) == ADRP; +} + + +bool Instruction::IsMovz() const { + return (Mask(MoveWideImmediateMask) == MOVZ_x) || + (Mask(MoveWideImmediateMask) == MOVZ_w); +} + + +bool Instruction::IsMovk() const { + return (Mask(MoveWideImmediateMask) == MOVK_x) || + (Mask(MoveWideImmediateMask) == MOVK_w); +} + +bool Instruction::IsBranchLinkImm() const { + return Mask(UnconditionalBranchFMask) == (UnconditionalBranchFixed | BL); +} + + +bool Instruction::IsTargetReachable(const Instruction* target) const { + VIXL_ASSERT(((target - this) & 3) == 0); + int offset = (target - this) >> kInstructionSizeLog2; + switch (BranchType()) { + case CondBranchType: + return IsInt19(offset); + case UncondBranchType: + return IsInt26(offset); + case CompareBranchType: + return IsInt19(offset); + case TestBranchType: + return IsInt14(offset); + default: + VIXL_UNREACHABLE(); + } +} + + +ptrdiff_t Instruction::ImmPCRawOffset() const { + ptrdiff_t offset; + if (IsPCRelAddressing()) { + // ADR and ADRP. + offset = ImmPCRel(); + } else if (BranchType() == UnknownBranchType) { + offset = ImmLLiteral(); + } else { + offset = ImmBranch(); + } + return offset; +} + +void +Instruction::SetImmPCRawOffset(ptrdiff_t offset) +{ + if (IsPCRelAddressing()) { + // ADR and ADRP. We're encoding a raw offset here. + // See also SetPCRelImmTarget(). + Instr imm = vixl::Assembler::ImmPCRelAddress(offset); + SetInstructionBits(Mask(~ImmPCRel_mask) | imm); + } else { + SetBranchImmTarget(this + (offset << kInstructionSizeLog2)); + } +} + +// Is this a stack pointer synchronization instruction as inserted by +// MacroAssembler::syncStackPtr()? +bool +Instruction::IsStackPtrSync() const +{ + // The stack pointer sync is a move to the stack pointer. + // This is encoded as 'add sp, Rs, #0'. + return IsAddSubImmediate() && Rd() == js::jit::Registers::sp && ImmAddSub() == 0; +} + +// Skip over a constant pool at |this| if there is one. +// +// If |this| is pointing to the artifical guard branch around a constant pool, +// return the instruction after the pool. Otherwise return |this| itself. +// +// This function does not skip constant pools with a natural guard branch. It +// is assumed that anyone inspecting the instruction stream understands about +// branches that were inserted naturally. +const Instruction* +Instruction::skipPool() const +{ + // Artificial pool guards can only be B (rather than BR), and they must be + // forward branches. + if (!IsUncondB() || ImmUncondBranch() <= 0) + return this; + + // Check for a constant pool header which has the high 16 bits set. See + // struct PoolHeader. Bit 15 indicates a natural pool guard when set. It + // must be clear which indicates an artificial pool guard. + const Instruction *header = InstructionAtOffset(kInstructionSize); + if (header->Mask(0xffff8000) != 0xffff0000) + return this; + + // OK, this is an artificial jump around a constant pool. + return ImmPCOffsetTarget(); +} + + +void Instruction::SetBits32(int msb, int lsb, unsigned value) { + uint32_t me; + memcpy(&me, this, sizeof(me)); + uint32_t new_mask = (1 << (msb+1)) - (1 << lsb); + uint32_t keep_mask = ~new_mask; + me = (me & keep_mask) | ((value << lsb) & new_mask); + memcpy(this, &me, sizeof(me)); +} + + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp new file mode 100644 index 0000000000..9f817cf0a3 --- /dev/null +++ b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp @@ -0,0 +1,1258 @@ +// Copyright 2013, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "mozilla/DebugOnly.h" + +#include "jit/arm64/vixl/Debugger-vixl.h" +#include "jit/arm64/vixl/MozCachingDecoder.h" +#include "jit/arm64/vixl/Simulator-vixl.h" +#include "jit/IonTypes.h" +#include "js/UniquePtr.h" +#include "js/Utility.h" +#include "threading/LockGuard.h" +#include "vm/JSContext.h" +#include "vm/Runtime.h" + +js::jit::SimulatorProcess* js::jit::SimulatorProcess::singleton_ = nullptr; + +namespace vixl { + +using mozilla::DebugOnly; +using js::jit::ABIFunctionType; +using js::jit::JitActivation; +using js::jit::SimulatorProcess; + +Simulator::Simulator(Decoder* decoder, FILE* stream) + : stream_(nullptr) + , print_disasm_(nullptr) + , instrumentation_(nullptr) + , stack_(nullptr) + , stack_limit_(nullptr) + , decoder_(nullptr) + , oom_(false) +{ + this->init(decoder, stream); + + // If this environment variable is present, trace the executed instructions. + // (Very helpful for debugging code generation crashes.) + if (getenv("VIXL_TRACE")) { + set_trace_parameters(LOG_DISASM); + } +} + + +Simulator::~Simulator() { + js_free(stack_); + stack_ = nullptr; + + // The decoder may outlive the simulator. + if (print_disasm_) { + decoder_->RemoveVisitor(print_disasm_); + js_delete(print_disasm_); + print_disasm_ = nullptr; + } + + if (instrumentation_) { + decoder_->RemoveVisitor(instrumentation_); + js_delete(instrumentation_); + instrumentation_ = nullptr; + } +} + + +void Simulator::ResetState() { + // Reset the system registers. + nzcv_ = SimSystemRegister::DefaultValueFor(NZCV); + fpcr_ = SimSystemRegister::DefaultValueFor(FPCR); + + // Reset registers to 0. + pc_ = nullptr; + pc_modified_ = false; + for (unsigned i = 0; i < kNumberOfRegisters; i++) { + set_xreg(i, 0xbadbeef); + } + // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP. + uint64_t nan_bits = UINT64_C(0x7ff0dead7f8beef1); + VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask))); + VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask))); + for (unsigned i = 0; i < kNumberOfFPRegisters; i++) { + set_dreg_bits(i, nan_bits); + } + // Returning to address 0 exits the Simulator. + set_lr(kEndOfSimAddress); +} + + +void Simulator::init(Decoder* decoder, FILE* stream) { + // Ensure that shift operations act as the simulator expects. + VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1); + VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7FFFFFFF); + + instruction_stats_ = false; + + // Set up the decoder. + decoder_ = decoder; + decoder_->AppendVisitor(this); + + stream_ = stream; + print_disasm_ = js_new<PrintDisassembler>(stream_); + if (!print_disasm_) { + oom_ = true; + return; + } + set_coloured_trace(false); + trace_parameters_ = LOG_NONE; + + ResetState(); + + // Allocate and set up the simulator stack. + stack_ = js_pod_malloc<byte>(stack_size_); + if (!stack_) { + oom_ = true; + return; + } + stack_limit_ = stack_ + stack_protection_size_; + // Configure the starting stack pointer. + // - Find the top of the stack. + byte * tos = stack_ + stack_size_; + // - There's a protection region at both ends of the stack. + tos -= stack_protection_size_; + // - The stack pointer must be 16-byte aligned. + tos = AlignDown(tos, 16); + set_sp(tos); + + // Set the sample period to 10, as the VIXL examples and tests are short. + if (getenv("VIXL_STATS")) { + instrumentation_ = js_new<Instrument>("vixl_stats.csv", 10); + if (!instrumentation_) { + oom_ = true; + return; + } + } + + // Print a warning about exclusive-access instructions, but only the first + // time they are encountered. This warning can be silenced using + // SilenceExclusiveAccessWarning(). + print_exclusive_access_warning_ = true; +} + + +Simulator* Simulator::Current() { + JSContext* cx = js::TlsContext.get(); + if (!cx) { + return nullptr; + } + JSRuntime* rt = cx->runtime(); + if (!rt) { + return nullptr; + } + if (!js::CurrentThreadCanAccessRuntime(rt)) { + return nullptr; + } + return cx->simulator(); +} + + +Simulator* Simulator::Create() { + Decoder *decoder = js_new<Decoder>(); + if (!decoder) + return nullptr; + + // FIXME: This just leaks the Decoder object for now, which is probably OK. + // FIXME: We should free it at some point. + // FIXME: Note that it can't be stored in the SimulatorRuntime due to lifetime conflicts. + js::UniquePtr<Simulator> sim; + if (getenv("USE_DEBUGGER") != nullptr) { + sim.reset(js_new<Debugger>(decoder, stdout)); + } else { + sim.reset(js_new<Simulator>(decoder, stdout)); + } + + // Check if Simulator:init ran out of memory. + if (sim && sim->oom()) { + return nullptr; + } + +#ifdef JS_CACHE_SIMULATOR_ARM64 + // Register the simulator in the Simulator process to handle cache flushes + // across threads. + js::jit::AutoLockSimulatorCache alsc; + if (!SimulatorProcess::registerSimulator(sim.get())) { + return nullptr; + } +#endif + + return sim.release(); +} + + +void Simulator::Destroy(Simulator* sim) { +#ifdef JS_CACHE_SIMULATOR_ARM64 + if (sim) { + js::jit::AutoLockSimulatorCache alsc; + SimulatorProcess::unregisterSimulator(sim); + } +#endif + + js_delete(sim); +} + + +void Simulator::ExecuteInstruction() { + // The program counter should always be aligned. + VIXL_ASSERT(IsWordAligned(pc_)); +#ifdef JS_CACHE_SIMULATOR_ARM64 + if (pendingCacheRequests) { + // We're here emulating the behavior of the membarrier carried over on + // real hardware does; see syscalls to membarrier in MozCpu-vixl.cpp. + // There's a slight difference that the simulator is not being + // interrupted: instead, we effectively run the icache flush request + // before executing the next instruction, which is close enough and + // sufficient for our use case. + js::jit::AutoLockSimulatorCache alsc; + FlushICache(); + } +#endif + decoder_->Decode(pc_); + increment_pc(); +} + + +uintptr_t Simulator::stackLimit() const { + return reinterpret_cast<uintptr_t>(stack_limit_); +} + + +uintptr_t* Simulator::addressOfStackLimit() { + return (uintptr_t*)&stack_limit_; +} + + +bool Simulator::overRecursed(uintptr_t newsp) const { + if (newsp == 0) { + newsp = get_sp(); + } + return newsp <= stackLimit(); +} + + +bool Simulator::overRecursedWithExtra(uint32_t extra) const { + uintptr_t newsp = get_sp() - extra; + return newsp <= stackLimit(); +} + + +JS::ProfilingFrameIterator::RegisterState +Simulator::registerState() +{ + JS::ProfilingFrameIterator::RegisterState state; + state.pc = (uint8_t*) get_pc(); + state.fp = (uint8_t*) get_fp(); + state.lr = (uint8_t*) get_lr(); + state.sp = (uint8_t*) get_sp(); + return state; +} + +int64_t Simulator::call(uint8_t* entry, int argument_count, ...) { + va_list parameters; + va_start(parameters, argument_count); + + // First eight arguments passed in registers. + VIXL_ASSERT(argument_count <= 8); + // This code should use the type of the called function + // (with templates, like the callVM machinery), but since the + // number of called functions is miniscule, their types have been + // divined from the number of arguments. + if (argument_count == 8) { + // EnterJitData::jitcode. + set_xreg(0, va_arg(parameters, int64_t)); + // EnterJitData::maxArgc. + set_xreg(1, va_arg(parameters, unsigned)); + // EnterJitData::maxArgv. + set_xreg(2, va_arg(parameters, int64_t)); + // EnterJitData::osrFrame. + set_xreg(3, va_arg(parameters, int64_t)); + // EnterJitData::calleeToken. + set_xreg(4, va_arg(parameters, int64_t)); + // EnterJitData::scopeChain. + set_xreg(5, va_arg(parameters, int64_t)); + // EnterJitData::osrNumStackValues. + set_xreg(6, va_arg(parameters, unsigned)); + // Address of EnterJitData::result. + set_xreg(7, va_arg(parameters, int64_t)); + } else if (argument_count == 2) { + // EntryArg* args + set_xreg(0, va_arg(parameters, int64_t)); + // uint8_t* GlobalData + set_xreg(1, va_arg(parameters, int64_t)); + } else if (argument_count == 1) { // irregexp + // InputOutputData& data + set_xreg(0, va_arg(parameters, int64_t)); + } else if (argument_count == 0) { // testsJit.cpp + // accept. + } else { + MOZ_CRASH("Unknown number of arguments"); + } + + va_end(parameters); + + // Call must transition back to native code on exit. + VIXL_ASSERT(get_lr() == int64_t(kEndOfSimAddress)); + + // Execute the simulation. + DebugOnly<int64_t> entryStack = get_sp(); + RunFrom((Instruction*)entry); + DebugOnly<int64_t> exitStack = get_sp(); + VIXL_ASSERT(entryStack == exitStack); + + int64_t result = xreg(0); + if (getenv("USE_DEBUGGER")) { + printf("LEAVE\n"); + } + return result; +} + + +// When the generated code calls a VM function (masm.callWithABI) we need to +// call that function instead of trying to execute it with the simulator +// (because it's x64 code instead of AArch64 code). We do that by redirecting the VM +// call to a svc (Supervisor Call) instruction that is handled by the +// simulator. We write the original destination of the jump just at a known +// offset from the svc instruction so the simulator knows what to call. +class Redirection +{ + friend class Simulator; + + Redirection(void* nativeFunction, ABIFunctionType type) + : nativeFunction_(nativeFunction), + type_(type), + next_(nullptr) + { + next_ = SimulatorProcess::redirection(); + SimulatorProcess::setRedirection(this); + + Instruction* instr = (Instruction*)(&svcInstruction_); + vixl::Assembler::svc(instr, kCallRtRedirected); + } + + public: + void* addressOfSvcInstruction() { return &svcInstruction_; } + void* nativeFunction() const { return nativeFunction_; } + ABIFunctionType type() const { return type_; } + + static Redirection* Get(void* nativeFunction, ABIFunctionType type) { + js::jit::AutoLockSimulatorCache alsr; + + // TODO: Store srt_ in the simulator for this assertion. + // VIXL_ASSERT_IF(pt->simulator(), pt->simulator()->srt_ == srt); + + Redirection* current = SimulatorProcess::redirection(); + for (; current != nullptr; current = current->next_) { + if (current->nativeFunction_ == nativeFunction) { + VIXL_ASSERT(current->type() == type); + return current; + } + } + + // Note: we can't use js_new here because the constructor is private. + js::AutoEnterOOMUnsafeRegion oomUnsafe; + Redirection* redir = js_pod_malloc<Redirection>(1); + if (!redir) + oomUnsafe.crash("Simulator redirection"); + new(redir) Redirection(nativeFunction, type); + return redir; + } + + static const Redirection* FromSvcInstruction(const Instruction* svcInstruction) { + const uint8_t* addrOfSvc = reinterpret_cast<const uint8_t*>(svcInstruction); + const uint8_t* addrOfRedirection = addrOfSvc - offsetof(Redirection, svcInstruction_); + return reinterpret_cast<const Redirection*>(addrOfRedirection); + } + + private: + void* nativeFunction_; + uint32_t svcInstruction_; + ABIFunctionType type_; + Redirection* next_; +}; + + + + +void* Simulator::RedirectNativeFunction(void* nativeFunction, ABIFunctionType type) { + Redirection* redirection = Redirection::Get(nativeFunction, type); + return redirection->addressOfSvcInstruction(); +} + +void Simulator::VisitException(const Instruction* instr) { + if (instr->InstructionBits() == UNDEFINED_INST_PATTERN) { + uint8_t* newPC; + if (js::wasm::HandleIllegalInstruction(registerState(), &newPC)) { + set_pc((Instruction*)newPC); + return; + } + DoUnreachable(instr); + } + + switch (instr->Mask(ExceptionMask)) { + case BRK: { + int lowbit = ImmException_offset; + int highbit = ImmException_offset + ImmException_width - 1; + HostBreakpoint(instr->Bits(highbit, lowbit)); + break; + } + case HLT: + switch (instr->ImmException()) { + case kTraceOpcode: + DoTrace(instr); + return; + case kLogOpcode: + DoLog(instr); + return; + case kPrintfOpcode: + DoPrintf(instr); + return; + default: + HostBreakpoint(); + return; + } + case SVC: + // The SVC instruction is hijacked by the JIT as a pseudo-instruction + // causing the Simulator to execute host-native code for callWithABI. + switch (instr->ImmException()) { + case kCallRtRedirected: + VisitCallRedirection(instr); + return; + case kMarkStackPointer: { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!spStack_.append(get_sp())) + oomUnsafe.crash("tracking stack for ARM64 simulator"); + return; + } + case kCheckStackPointer: { + DebugOnly<int64_t> current = get_sp(); + DebugOnly<int64_t> expected = spStack_.popCopy(); + VIXL_ASSERT(current == expected); + return; + } + default: + VIXL_UNIMPLEMENTED(); + } + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::setGPR32Result(int32_t result) { + set_wreg(0, result); +} + + +void Simulator::setGPR64Result(int64_t result) { + set_xreg(0, result); +} + + +void Simulator::setFP32Result(float result) { + set_sreg(0, result); +} + + +void Simulator::setFP64Result(double result) { + set_dreg(0, result); +} + + +typedef int64_t (*Prototype_General0)(); +typedef int64_t (*Prototype_General1)(int64_t arg0); +typedef int64_t (*Prototype_General2)(int64_t arg0, int64_t arg1); +typedef int64_t (*Prototype_General3)(int64_t arg0, int64_t arg1, int64_t arg2); +typedef int64_t (*Prototype_General4)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3); +typedef int64_t (*Prototype_General5)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3, + int64_t arg4); +typedef int64_t (*Prototype_General6)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3, + int64_t arg4, int64_t arg5); +typedef int64_t (*Prototype_General7)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3, + int64_t arg4, int64_t arg5, int64_t arg6); +typedef int64_t (*Prototype_General8)(int64_t arg0, int64_t arg1, int64_t arg2, int64_t arg3, + int64_t arg4, int64_t arg5, int64_t arg6, int64_t arg7); +typedef int64_t (*Prototype_GeneralGeneralGeneralInt64)(int64_t arg0, int64_t arg1, int64_t arg2, + int64_t arg3); +typedef int64_t (*Prototype_GeneralGeneralInt64Int64)(int64_t arg0, int64_t arg1, int64_t arg2, + int64_t arg3); + +typedef int64_t (*Prototype_Int_Double)(double arg0); +typedef int64_t (*Prototype_Int_IntDouble)(int64_t arg0, double arg1); +typedef int64_t (*Prototype_Int_DoubleInt)(double arg0, int64_t arg1); +typedef int64_t (*Prototype_Int_DoubleIntInt)(double arg0, uint64_t arg1, uint64_t arg2); +typedef int64_t (*Prototype_Int_IntDoubleIntInt)(uint64_t arg0, double arg1, + uint64_t arg2, uint64_t arg3); + +typedef float (*Prototype_Float32_Float32)(float arg0); +typedef int64_t (*Prototype_Int_Float32)(float arg0); +typedef float (*Prototype_Float32_Float32Float32)(float arg0, float arg1); + +typedef double (*Prototype_Double_None)(); +typedef double (*Prototype_Double_Double)(double arg0); +typedef double (*Prototype_Double_Int)(int64_t arg0); +typedef double (*Prototype_Double_DoubleInt)(double arg0, int64_t arg1); +typedef double (*Prototype_Double_IntDouble)(int64_t arg0, double arg1); +typedef double (*Prototype_Double_DoubleDouble)(double arg0, double arg1); +typedef double (*Prototype_Double_DoubleDoubleDouble)(double arg0, double arg1, double arg2); +typedef double (*Prototype_Double_DoubleDoubleDoubleDouble)(double arg0, double arg1, + double arg2, double arg3); + +typedef int32_t (*Prototype_Int32_General)(int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32)(int64_t, int32_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int32)(int64_t, int32_t, int32_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32)(int64_t, + int32_t, + int32_t, + int32_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32)(int64_t, + int32_t, + int32_t, + int32_t, + int32_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32General)(int64_t, + int32_t, + int32_t, + int32_t, + int32_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General)(int64_t, + int32_t, + int32_t, + int32_t, + int32_t, + int32_t, + int32_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General)(int64_t, + int32_t, + float, + float, + int32_t, + int32_t, + int32_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General)(int64_t, + int32_t, + float, + float, + float, + float, + int32_t, + int32_t, + int32_t, + int32_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General)(int64_t, + int32_t, + float, + float, + int32_t, + float, + float, + int32_t, + float, + int32_t, + int32_t, + int32_t, + int32_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int32General)(int64_t, + int32_t, + int32_t, + int32_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int32Int64)(int64_t, + int32_t, + int32_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int32General)(int64_t, + int32_t, + int32_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32Int64Int64)(int64_t, + int32_t, + int64_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32)(int64_t, + int32_t, + int64_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralInt32GeneralInt32Int32)(int64_t, + int32_t, + int64_t, + int32_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralGeneral)(int64_t, int64_t); +typedef int32_t (*Prototype_Int32_GeneralGeneralGeneral)(int64_t, + int64_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralGeneralInt32Int32)(int64_t, + int64_t, + int32_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int32Int32)(int64_t, int64_t, + int32_t, int32_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralInt64Int32)(int64_t, int64_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64)(int64_t, int64_t, + int32_t, int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt64Int32Int64General)(int64_t, int64_t, + int32_t, int64_t, + int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64)(int64_t, int64_t, + int64_t, int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt64Int64General)(int64_t, int64_t, + int64_t, int64_t); +typedef int32_t (*Prototype_Int32_GeneralInt64Int64Int64General)(int64_t, int64_t, + int64_t, int64_t, + int64_t); +typedef int64_t (*Prototype_General_GeneralInt32)(int64_t, int32_t); +typedef int64_t (*Prototype_General_GeneralInt32Int32)(int64_t, + int32_t, + int32_t); +typedef int64_t (*Prototype_General_GeneralInt32General)(int64_t, + int32_t, + int64_t); +typedef int64_t (*Prototype_General_GeneralInt32Int32GeneralInt32)(int64_t, + int32_t, + int32_t, + int64_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32)( + int64_t, + int64_t, + int32_t, + int64_t, + int32_t, + int32_t, + int32_t); +typedef int32_t (*Prototype_Int32_GeneralGeneralInt32General)(int64_t, + int64_t, + int32_t, + int64_t); +typedef int64_t (*Prototype_Int64_General)(int64_t); +typedef int64_t (*Prototype_Int64_GeneralInt64)(int64_t, int64_t); + +// Simulator support for callWithABI(). +void +Simulator::VisitCallRedirection(const Instruction* instr) +{ + VIXL_ASSERT(instr->Mask(ExceptionMask) == SVC); + VIXL_ASSERT(instr->ImmException() == kCallRtRedirected); + + const Redirection* redir = Redirection::FromSvcInstruction(instr); + uintptr_t nativeFn = reinterpret_cast<uintptr_t>(redir->nativeFunction()); + + // Stack must be aligned prior to the call. + // FIXME: It's actually our job to perform the alignment... + //VIXL_ASSERT((xreg(31, Reg31IsStackPointer) & (StackAlignment - 1)) == 0); + + // Used to assert that callee-saved registers are preserved. + DebugOnly<int64_t> x19 = xreg(19); + DebugOnly<int64_t> x20 = xreg(20); + DebugOnly<int64_t> x21 = xreg(21); + DebugOnly<int64_t> x22 = xreg(22); + DebugOnly<int64_t> x23 = xreg(23); + DebugOnly<int64_t> x24 = xreg(24); + DebugOnly<int64_t> x25 = xreg(25); + DebugOnly<int64_t> x26 = xreg(26); + DebugOnly<int64_t> x27 = xreg(27); + DebugOnly<int64_t> x28 = xreg(28); + DebugOnly<int64_t> x29 = xreg(29); + DebugOnly<int64_t> savedSP = get_sp(); + + // Remember LR for returning from the "call". + int64_t savedLR = xreg(30); + + // Allow recursive Simulator calls: returning from the call must stop + // the simulation and transition back to native Simulator code. + set_xreg(30, int64_t(kEndOfSimAddress)); + + // Store argument register values in local variables for ease of use below. + int64_t x0 = xreg(0); + int64_t x1 = xreg(1); + int64_t x2 = xreg(2); + int64_t x3 = xreg(3); + int64_t x4 = xreg(4); + int64_t x5 = xreg(5); + int64_t x6 = xreg(6); + int64_t x7 = xreg(7); + int64_t x8 = xreg(8); + double d0 = dreg(0); + double d1 = dreg(1); + double d2 = dreg(2); + double d3 = dreg(3); + float s0 = sreg(0); + float s1 = sreg(1); + float s2 = sreg(2); + float s3 = sreg(3); + float s4 = sreg(4); + + // Dispatch the call and set the return value. + switch (redir->type()) { + // Cases with int64_t return type. + case js::jit::Args_General0: { + int64_t ret = reinterpret_cast<Prototype_General0>(nativeFn)(); + setGPR64Result(ret); + break; + } + case js::jit::Args_General1: { + int64_t ret = reinterpret_cast<Prototype_General1>(nativeFn)(x0); + setGPR64Result(ret); + break; + } + case js::jit::Args_General2: { + int64_t ret = reinterpret_cast<Prototype_General2>(nativeFn)(x0, x1); + setGPR64Result(ret); + break; + } + case js::jit::Args_General3: { + int64_t ret = reinterpret_cast<Prototype_General3>(nativeFn)(x0, x1, x2); + setGPR64Result(ret); + break; + } + case js::jit::Args_General4: { + int64_t ret = reinterpret_cast<Prototype_General4>(nativeFn)(x0, x1, x2, x3); + setGPR64Result(ret); + break; + } + case js::jit::Args_General5: { + int64_t ret = reinterpret_cast<Prototype_General5>(nativeFn)(x0, x1, x2, x3, x4); + setGPR64Result(ret); + break; + } + case js::jit::Args_General6: { + int64_t ret = reinterpret_cast<Prototype_General6>(nativeFn)(x0, x1, x2, x3, x4, x5); + setGPR64Result(ret); + break; + } + case js::jit::Args_General7: { + int64_t ret = reinterpret_cast<Prototype_General7>(nativeFn)(x0, x1, x2, x3, x4, x5, x6); + setGPR64Result(ret); + break; + } + case js::jit::Args_General8: { + int64_t ret = reinterpret_cast<Prototype_General8>(nativeFn)(x0, x1, x2, x3, x4, x5, x6, x7); + setGPR64Result(ret); + break; + } + case js::jit::Args_Int_GeneralGeneralGeneralInt64: { + int64_t ret = reinterpret_cast<Prototype_GeneralGeneralGeneralInt64>(nativeFn)(x0, x1, x2, x3); + setGPR64Result(ret); + break; + } + case js::jit::Args_Int_GeneralGeneralInt64Int64: { + int64_t ret = reinterpret_cast<Prototype_GeneralGeneralInt64Int64>(nativeFn)(x0, x1, x2, x3); + setGPR64Result(ret); + break; + } + + // Cases with GPR return type. This can be int32 or int64, but int64 is a safer assumption. + case js::jit::Args_Int_Double: { + int64_t ret = reinterpret_cast<Prototype_Int_Double>(nativeFn)(d0); + setGPR64Result(ret); + break; + } + case js::jit::Args_Int_IntDouble: { + int64_t ret = reinterpret_cast<Prototype_Int_IntDouble>(nativeFn)(x0, d0); + setGPR64Result(ret); + break; + } + + case js::jit::Args_Int_DoubleInt: { + int64_t ret = reinterpret_cast<Prototype_Int_DoubleInt>(nativeFn)(d0, x0); + setGPR64Result(ret); + break; + } + + case js::jit::Args_Int_IntDoubleIntInt: { + int64_t ret = reinterpret_cast<Prototype_Int_IntDoubleIntInt>(nativeFn)(x0, d0, x1, x2); + setGPR64Result(ret); + break; + } + + case js::jit::Args_Int_DoubleIntInt: { + int64_t ret = reinterpret_cast<Prototype_Int_DoubleIntInt>(nativeFn)(d0, x0, x1); + setGPR64Result(ret); + break; + } + + // Cases with float return type. + case js::jit::Args_Float32_Float32: { + float ret = reinterpret_cast<Prototype_Float32_Float32>(nativeFn)(s0); + setFP32Result(ret); + break; + } + case js::jit::Args_Int_Float32: { + int64_t ret = reinterpret_cast<Prototype_Int_Float32>(nativeFn)(s0); + setGPR64Result(ret); + break; + } + case js::jit::Args_Float32_Float32Float32: { + float ret = reinterpret_cast<Prototype_Float32_Float32Float32>(nativeFn)(s0, s1); + setFP32Result(ret); + break; + } + + // Cases with double return type. + case js::jit::Args_Double_None: { + double ret = reinterpret_cast<Prototype_Double_None>(nativeFn)(); + setFP64Result(ret); + break; + } + case js::jit::Args_Double_Double: { + double ret = reinterpret_cast<Prototype_Double_Double>(nativeFn)(d0); + setFP64Result(ret); + break; + } + case js::jit::Args_Double_Int: { + double ret = reinterpret_cast<Prototype_Double_Int>(nativeFn)(x0); + setFP64Result(ret); + break; + } + case js::jit::Args_Double_DoubleInt: { + double ret = reinterpret_cast<Prototype_Double_DoubleInt>(nativeFn)(d0, x0); + setFP64Result(ret); + break; + } + case js::jit::Args_Double_DoubleDouble: { + double ret = reinterpret_cast<Prototype_Double_DoubleDouble>(nativeFn)(d0, d1); + setFP64Result(ret); + break; + } + case js::jit::Args_Double_DoubleDoubleDouble: { + double ret = reinterpret_cast<Prototype_Double_DoubleDoubleDouble>(nativeFn)(d0, d1, d2); + setFP64Result(ret); + break; + } + case js::jit::Args_Double_DoubleDoubleDoubleDouble: { + double ret = reinterpret_cast<Prototype_Double_DoubleDoubleDoubleDouble>(nativeFn)(d0, d1, d2, d3); + setFP64Result(ret); + break; + } + + case js::jit::Args_Double_IntDouble: { + double ret = reinterpret_cast<Prototype_Double_IntDouble>(nativeFn)(x0, d0); + setFP64Result(ret); + break; + } + + case js::jit::Args_Int32_General: { + int32_t ret = reinterpret_cast<Prototype_Int32_General>(nativeFn)(x0); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32>(nativeFn)(x0, x1); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int32: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32>( + nativeFn)(x0, x1, x2); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int32Int32Int32: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32>( + nativeFn)(x0, x1, x2, x3, x4); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int32Int32Int32Int32: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32Int32>( + nativeFn)(x0, x1, x2, x3, x4, x5); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int32Int32Int32General: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32General>( + nativeFn)(x0, x1, x2, x3, x4, x5); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int32Int32Int32Int32Int32General: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32Int32Int32Int32General>( + nativeFn)(x0, x1, x2, x3, x4, x5, x6, x7); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Int32Int32Int32General>( + nativeFn)(x0, x1, s0, s1, x2, x3, x4, x5); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General>( + nativeFn)(x0, x1, s0, s1, s2, s3, x2, x3, x4, x5, x6); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General>( + nativeFn)(x0, x1, s0, s1, x2, s2, s3, x3, s4, x4, x5, x6, x7, x8); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int32Int32General: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int32General>( + nativeFn)(x0, x1, x2, x3, x4); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int32Int64: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32Int64>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int32General: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int32General>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32Int64Int64: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32Int64Int64>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32GeneralInt32: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt32GeneralInt32Int32: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralInt32GeneralInt32Int32>( + nativeFn)(x0, x1, x2, x3, x4); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralGeneral: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralGeneral>(nativeFn)(x0, x1); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralGeneralGeneral: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralGeneralGeneral>( + nativeFn)(x0, x1, x2); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralGeneralInt32Int32: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralGeneralInt32Int32>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt64Int32Int32Int32: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int32Int32>( + nativeFn)(x0, x1, x2, x3, x4); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt64Int32: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32>( + nativeFn)(x0, x1, x2); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt64Int32Int64: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt64Int32Int64General: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int32Int64General>( + nativeFn)(x0, x1, x2, x3, x4); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt64Int64Int64: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt64Int64General: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64General>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralInt64Int64Int64General: { + int32_t ret = reinterpret_cast<Prototype_Int32_GeneralInt64Int64Int64General>( + nativeFn)(x0, x1, x2, x3, x4); + setGPR32Result(ret); + break; + } + case js::jit::Args_General_GeneralInt32: { + int64_t ret = + reinterpret_cast<Prototype_General_GeneralInt32>(nativeFn)(x0, x1); + setGPR64Result(ret); + break; + } + case js::jit::Args_General_GeneralInt32Int32: { + int64_t ret = reinterpret_cast<Prototype_General_GeneralInt32Int32>( + nativeFn)(x0, x1, x2); + setGPR64Result(ret); + break; + } + case js::jit::Args_General_GeneralInt32General: { + int64_t ret = + reinterpret_cast<Prototype_General_GeneralInt32General>( + nativeFn)(x0, x1, x2); + setGPR64Result(ret); + break; + } + case js::jit::Args_General_GeneralInt32Int32GeneralInt32: { + int64_t ret = + reinterpret_cast<Prototype_General_GeneralInt32Int32GeneralInt32>( + nativeFn)(x0, x1, x2, x3, x4); + setGPR64Result(ret); + break; + } + case js::jit::Args_Int32_GeneralGeneralInt32GeneralInt32Int32Int32: { + int32_t ret = reinterpret_cast< + Prototype_Int32_GeneralGeneralInt32GeneralInt32Int32Int32>(nativeFn)( + x0, x1, x2, x3, x4, x5, x6); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int32_GeneralGeneralInt32General: { + int32_t ret = + reinterpret_cast<Prototype_Int32_GeneralGeneralInt32General>( + nativeFn)(x0, x1, x2, x3); + setGPR32Result(ret); + break; + } + case js::jit::Args_Int64_General: { + int64_t ret = + reinterpret_cast<Prototype_Int64_General>( + nativeFn)(x0); + setGPR64Result(ret); + break; + } + case js::jit::Args_Int64_GeneralInt64: { + int64_t ret = + reinterpret_cast<Prototype_Int64_GeneralInt64>( + nativeFn)(x0, x1); + setGPR64Result(ret); + break; + } + + default: + MOZ_CRASH("Unknown function type."); + } + + // Nuke the volatile registers. x0-x7 are used as result registers, but except + // for x0, none are used in the above signatures. + for (int i = 1; i <= 18; i++) { + // Code feed 1 bad data + set_xreg(i, int64_t(0xc0defeed1badda7a)); + } + + // Assert that callee-saved registers are unchanged. + VIXL_ASSERT(xreg(19) == x19); + VIXL_ASSERT(xreg(20) == x20); + VIXL_ASSERT(xreg(21) == x21); + VIXL_ASSERT(xreg(22) == x22); + VIXL_ASSERT(xreg(23) == x23); + VIXL_ASSERT(xreg(24) == x24); + VIXL_ASSERT(xreg(25) == x25); + VIXL_ASSERT(xreg(26) == x26); + VIXL_ASSERT(xreg(27) == x27); + VIXL_ASSERT(xreg(28) == x28); + VIXL_ASSERT(xreg(29) == x29); + + // Assert that the stack is unchanged. + VIXL_ASSERT(savedSP == get_sp()); + + // Simulate a return. + set_lr(savedLR); + set_pc((Instruction*)savedLR); + if (getenv("USE_DEBUGGER")) + printf("SVCRET\n"); +} + +#ifdef JS_CACHE_SIMULATOR_ARM64 +void +Simulator::FlushICache() +{ + // Flush the caches recorded by the current thread as well as what got + // recorded from other threads before this call. + auto& vec = SimulatorProcess::getICacheFlushes(this); + for (auto& flush : vec) { + decoder_->FlushICache(flush.start, flush.length); + } + vec.clear(); + pendingCacheRequests = false; +} + +void CachingDecoder::Decode(const Instruction* instr) { + InstDecodedKind state; + if (lastPage_ && lastPage_->contains(instr)) { + state = lastPage_->decode(instr); + } else { + uintptr_t key = SinglePageDecodeCache::PageStart(instr); + ICacheMap::AddPtr p = iCache_.lookupForAdd(key); + if (p) { + lastPage_ = p->value(); + state = lastPage_->decode(instr); + } else { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + SinglePageDecodeCache* newPage = js_new<SinglePageDecodeCache>(instr); + if (!newPage || !iCache_.add(p, key, newPage)) { + oomUnsafe.crash("Simulator SinglePageDecodeCache"); + } + lastPage_ = newPage; + state = InstDecodedKind::NotDecodedYet; + } + } + + switch (state) { + case InstDecodedKind::NotDecodedYet: { + cachingDecoder_.setDecodePtr(lastPage_->decodePtr(instr)); + this->Decoder::Decode(instr); + break; + } +#define CASE(A) \ + case InstDecodedKind::A: { \ + Visit##A(instr); \ + break; \ + } + + VISITOR_LIST(CASE) +#undef CASE + } +} + +void CachingDecoder::FlushICache(void* start, size_t size) { + MOZ_ASSERT(uintptr_t(start) % vixl::kInstructionSize == 0); + MOZ_ASSERT(size % vixl::kInstructionSize == 0); + const uint8_t* it = reinterpret_cast<const uint8_t*>(start); + const uint8_t* end = it + size; + SinglePageDecodeCache* last = nullptr; + for (; it < end; it += vixl::kInstructionSize) { + auto instr = reinterpret_cast<const Instruction*>(it); + if (last && last->contains(instr)) { + last->clearDecode(instr); + } else { + uintptr_t key = SinglePageDecodeCache::PageStart(instr); + ICacheMap::Ptr p = iCache_.lookup(key); + if (p) { + last = p->value(); + last->clearDecode(instr); + } + } + } +} +#endif + +} // namespace vixl + +namespace js { +namespace jit { + +#ifdef JS_CACHE_SIMULATOR_ARM64 +void SimulatorProcess::recordICacheFlush(void* start, size_t length) { + singleton_->lock_.assertOwnedByCurrentThread(); + AutoEnterOOMUnsafeRegion oomUnsafe; + ICacheFlush range{start, length}; + for (auto& s : singleton_->pendingFlushes_) { + if (!s.records.append(range)) { + oomUnsafe.crash("Simulator recordFlushICache"); + } + } +} + +void SimulatorProcess::membarrier() { + singleton_->lock_.assertOwnedByCurrentThread(); + for (auto& s : singleton_->pendingFlushes_) { + s.thread->pendingCacheRequests = true; + } +} + +SimulatorProcess::ICacheFlushes& SimulatorProcess::getICacheFlushes(Simulator* sim) { + singleton_->lock_.assertOwnedByCurrentThread(); + for (auto& s : singleton_->pendingFlushes_) { + if (s.thread == sim) { + return s.records; + } + } + MOZ_CRASH("Simulator is not registered in the SimulatorProcess"); +} + +bool SimulatorProcess::registerSimulator(Simulator* sim) { + singleton_->lock_.assertOwnedByCurrentThread(); + ICacheFlushes empty; + SimFlushes simFlushes{sim, std::move(empty)}; + return singleton_->pendingFlushes_.append(std::move(simFlushes)); +} + +void SimulatorProcess::unregisterSimulator(Simulator* sim) { + singleton_->lock_.assertOwnedByCurrentThread(); + for (auto& s : singleton_->pendingFlushes_) { + if (s.thread == sim) { + singleton_->pendingFlushes_.erase(&s); + return; + } + } + MOZ_CRASH("Simulator is not registered in the SimulatorProcess"); +} +#endif // !JS_CACHE_SIMULATOR_ARM64 + +} // namespace jit +} // namespace js + +vixl::Simulator* JSContext::simulator() const { + return simulator_; +} diff --git a/js/src/jit/arm64/vixl/Platform-vixl.h b/js/src/jit/arm64/vixl/Platform-vixl.h new file mode 100644 index 0000000000..a4de54c785 --- /dev/null +++ b/js/src/jit/arm64/vixl/Platform-vixl.h @@ -0,0 +1,39 @@ +// Copyright 2014, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_PLATFORM_H +#define VIXL_PLATFORM_H + +// Define platform specific functionalities. +#include <signal.h> + +#include "jstypes.h" + +namespace vixl { +inline void HostBreakpoint(int64_t code = 0) { raise(SIGINT); } +} // namespace vixl + +#endif diff --git a/js/src/jit/arm64/vixl/README.md b/js/src/jit/arm64/vixl/README.md new file mode 100644 index 0000000000..7111753279 --- /dev/null +++ b/js/src/jit/arm64/vixl/README.md @@ -0,0 +1,7 @@ +This directory is a mix of VIXL files for ARM64, and files added to integrate +VIXL within SpiderMonkey MacroAssembler. Many of SpiderMonkey extension would be +in files prefixed with Moz*, but some might be spread across imported files when +convenient. + +VIXL upstream sources can be found at: +https://git.linaro.org/arm/vixl.git/about/ diff --git a/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h b/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h new file mode 100644 index 0000000000..4b9064a89b --- /dev/null +++ b/js/src/jit/arm64/vixl/Simulator-Constants-vixl.h @@ -0,0 +1,140 @@ +// Copyright 2015, ARM Limited +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_SIMULATOR_CONSTANTS_A64_H_ +#define VIXL_A64_SIMULATOR_CONSTANTS_A64_H_ + +namespace vixl { + +// Debug instructions. +// +// VIXL's macro-assembler and simulator support a few pseudo instructions to +// make debugging easier. These pseudo instructions do not exist on real +// hardware. +// +// TODO: Also consider allowing these pseudo-instructions to be disabled in the +// simulator, so that users can check that the input is a valid native code. +// (This isn't possible in all cases. Printf won't work, for example.) +// +// Each debug pseudo instruction is represented by a HLT instruction. The HLT +// immediate field is used to identify the type of debug pseudo instruction. + +enum DebugHltOpcodes { + kPrintfOpcode, + kTraceOpcode, + kLogOpcode, + // Aliases. + kDebugHltFirstOpcode = kPrintfOpcode, + kDebugHltLastOpcode = kLogOpcode +}; + +// Each pseudo instruction uses a custom encoding for additional arguments, as +// described below. + +// Unreachable - kUnreachableOpcode +// +// Instruction which should never be executed. This is used as a guard in parts +// of the code that should not be reachable, such as in data encoded inline in +// the instructions. + +// Printf - kPrintfOpcode +// - arg_count: The number of arguments. +// - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields. +// +// Simulate a call to printf. +// +// Floating-point and integer arguments are passed in separate sets of registers +// in AAPCS64 (even for varargs functions), so it is not possible to determine +// the type of each argument without some information about the values that were +// passed in. This information could be retrieved from the printf format string, +// but the format string is not trivial to parse so we encode the relevant +// information with the HLT instruction. +// +// Also, the following registers are populated (as if for a native A64 call): +// x0: The format string +// x1-x7: Optional arguments, if type == CPURegister::kRegister +// d0-d7: Optional arguments, if type == CPURegister::kFPRegister +const unsigned kPrintfArgCountOffset = 1 * kInstructionSize; +const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize; +const unsigned kPrintfLength = 3 * kInstructionSize; + +const unsigned kPrintfMaxArgCount = 4; + +// The argument pattern is a set of two-bit-fields, each with one of the +// following values: +enum PrintfArgPattern { + kPrintfArgW = 1, + kPrintfArgX = 2, + // There is no kPrintfArgS because floats are always converted to doubles in C + // varargs calls. + kPrintfArgD = 3 +}; +static const unsigned kPrintfArgPatternBits = 2; + +// Trace - kTraceOpcode +// - parameter: TraceParameter stored as a uint32_t +// - command: TraceCommand stored as a uint32_t +// +// Allow for trace management in the generated code. This enables or disables +// automatic tracing of the specified information for every simulated +// instruction. +const unsigned kTraceParamsOffset = 1 * kInstructionSize; +const unsigned kTraceCommandOffset = 2 * kInstructionSize; +const unsigned kTraceLength = 3 * kInstructionSize; + +// Trace parameters. +enum TraceParameters { + LOG_DISASM = 1 << 0, // Log disassembly. + LOG_REGS = 1 << 1, // Log general purpose registers. + LOG_VREGS = 1 << 2, // Log NEON and floating-point registers. + LOG_SYSREGS = 1 << 3, // Log the flags and system registers. + LOG_WRITE = 1 << 4, // Log writes to memory. + + LOG_NONE = 0, + LOG_STATE = LOG_REGS | LOG_VREGS | LOG_SYSREGS, + LOG_ALL = LOG_DISASM | LOG_STATE | LOG_WRITE +}; + +// Trace commands. +enum TraceCommand { + TRACE_ENABLE = 1, + TRACE_DISABLE = 2 +}; + +// Log - kLogOpcode +// - parameter: TraceParameter stored as a uint32_t +// +// Print the specified information once. This mechanism is separate from Trace. +// In particular, _all_ of the specified registers are printed, rather than just +// the registers that the instruction writes. +// +// Any combination of the TraceParameters values can be used, except that +// LOG_DISASM is not supported for Log. +const unsigned kLogParamsOffset = 1 * kInstructionSize; +const unsigned kLogLength = 2 * kInstructionSize; +} // namespace vixl + +#endif // VIXL_A64_SIMULATOR_CONSTANTS_A64_H_ diff --git a/js/src/jit/arm64/vixl/Simulator-vixl.cpp b/js/src/jit/arm64/vixl/Simulator-vixl.cpp new file mode 100644 index 0000000000..71e1a31d46 --- /dev/null +++ b/js/src/jit/arm64/vixl/Simulator-vixl.cpp @@ -0,0 +1,4371 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jstypes.h" + +#ifdef JS_SIMULATOR_ARM64 + +#include "jit/arm64/vixl/Simulator-vixl.h" + +#include <cmath> +#include <string.h> + +#include "jit/AtomicOperations.h" + +namespace vixl { + +const Instruction* Simulator::kEndOfSimAddress = NULL; + +void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) { + int width = msb - lsb + 1; + VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits)); + + bits <<= lsb; + uint32_t mask = ((1 << width) - 1) << lsb; + VIXL_ASSERT((mask & write_ignore_mask_) == 0); + + value_ = (value_ & ~mask) | (bits & mask); +} + + +SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) { + switch (id) { + case NZCV: + return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask); + case FPCR: + return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask); + default: + VIXL_UNREACHABLE(); + return SimSystemRegister(); + } +} + + +void Simulator::Run() { + pc_modified_ = false; + while (pc_ != kEndOfSimAddress) { + ExecuteInstruction(); + LogAllWrittenRegisters(); + } +} + + +void Simulator::RunFrom(const Instruction* first) { + set_pc(first); + Run(); +} + + +const char* Simulator::xreg_names[] = { +"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", +"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", +"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", +"x24", "x25", "x26", "x27", "x28", "x29", "lr", "xzr", "sp"}; + +const char* Simulator::wreg_names[] = { +"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", +"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", +"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", +"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr", "wsp"}; + +const char* Simulator::sreg_names[] = { +"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", +"s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", +"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", +"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"}; + +const char* Simulator::dreg_names[] = { +"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", +"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", +"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", +"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"}; + +const char* Simulator::vreg_names[] = { +"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", +"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", +"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", +"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"}; + + + +const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) { + VIXL_ASSERT(code < kNumberOfRegisters); + // If the code represents the stack pointer, index the name after zr. + if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) { + code = kZeroRegCode + 1; + } + return wreg_names[code]; +} + + +const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) { + VIXL_ASSERT(code < kNumberOfRegisters); + // If the code represents the stack pointer, index the name after zr. + if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) { + code = kZeroRegCode + 1; + } + return xreg_names[code]; +} + + +const char* Simulator::SRegNameForCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfFPRegisters); + return sreg_names[code]; +} + + +const char* Simulator::DRegNameForCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfFPRegisters); + return dreg_names[code]; +} + + +const char* Simulator::VRegNameForCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return vreg_names[code]; +} + + +#define COLOUR(colour_code) "\033[0;" colour_code "m" +#define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m" +#define NORMAL "" +#define GREY "30" +#define RED "31" +#define GREEN "32" +#define YELLOW "33" +#define BLUE "34" +#define MAGENTA "35" +#define CYAN "36" +#define WHITE "37" +void Simulator::set_coloured_trace(bool value) { + coloured_trace_ = value; + + clr_normal = value ? COLOUR(NORMAL) : ""; + clr_flag_name = value ? COLOUR_BOLD(WHITE) : ""; + clr_flag_value = value ? COLOUR(NORMAL) : ""; + clr_reg_name = value ? COLOUR_BOLD(CYAN) : ""; + clr_reg_value = value ? COLOUR(CYAN) : ""; + clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : ""; + clr_vreg_value = value ? COLOUR(MAGENTA) : ""; + clr_memory_address = value ? COLOUR_BOLD(BLUE) : ""; + clr_warning = value ? COLOUR_BOLD(YELLOW) : ""; + clr_warning_message = value ? COLOUR(YELLOW) : ""; + clr_printf = value ? COLOUR(GREEN) : ""; +} +#undef COLOUR +#undef COLOUR_BOLD +#undef NORMAL +#undef GREY +#undef RED +#undef GREEN +#undef YELLOW +#undef BLUE +#undef MAGENTA +#undef CYAN +#undef WHITE + + +void Simulator::set_trace_parameters(int parameters) { + bool disasm_before = trace_parameters_ & LOG_DISASM; + trace_parameters_ = parameters; + bool disasm_after = trace_parameters_ & LOG_DISASM; + + if (disasm_before != disasm_after) { + if (disasm_after) { + decoder_->InsertVisitorBefore(print_disasm_, this); + } else { + decoder_->RemoveVisitor(print_disasm_); + } + } +} + + +void Simulator::set_instruction_stats(bool value) { + if (instrumentation_ == nullptr) { + return; + } + + if (value != instruction_stats_) { + if (value) { + decoder_->AppendVisitor(instrumentation_); + } else { + decoder_->RemoveVisitor(instrumentation_); + } + instruction_stats_ = value; + } +} + +// Helpers --------------------------------------------------------------------- +uint64_t Simulator::AddWithCarry(unsigned reg_size, + bool set_flags, + uint64_t left, + uint64_t right, + int carry_in) { + VIXL_ASSERT((carry_in == 0) || (carry_in == 1)); + VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize)); + + uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt; + uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask; + uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask; + + left &= reg_mask; + right &= reg_mask; + uint64_t result = (left + right + carry_in) & reg_mask; + + if (set_flags) { + nzcv().SetN(CalcNFlag(result, reg_size)); + nzcv().SetZ(CalcZFlag(result)); + + // Compute the C flag by comparing the result to the max unsigned integer. + uint64_t max_uint_2op = max_uint - carry_in; + bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right); + nzcv().SetC(C ? 1 : 0); + + // Overflow iff the sign bit is the same for the two inputs and different + // for the result. + uint64_t left_sign = left & sign_mask; + uint64_t right_sign = right & sign_mask; + uint64_t result_sign = result & sign_mask; + bool V = (left_sign == right_sign) && (left_sign != result_sign); + nzcv().SetV(V ? 1 : 0); + + LogSystemRegister(NZCV); + } + return result; +} + + +int64_t Simulator::ShiftOperand(unsigned reg_size, + int64_t value, + Shift shift_type, + unsigned amount) { + if (amount == 0) { + return value; + } + int64_t mask = reg_size == kXRegSize ? kXRegMask : kWRegMask; + switch (shift_type) { + case LSL: + return (value << amount) & mask; + case LSR: + return static_cast<uint64_t>(value) >> amount; + case ASR: { + // Shift used to restore the sign. + unsigned s_shift = kXRegSize - reg_size; + // Value with its sign restored. + int64_t s_value = (value << s_shift) >> s_shift; + return (s_value >> amount) & mask; + } + case ROR: { + if (reg_size == kWRegSize) { + value &= kWRegMask; + } + return (static_cast<uint64_t>(value) >> amount) | + ((value & ((INT64_C(1) << amount) - 1)) << + (reg_size - amount)); + } + default: + VIXL_UNIMPLEMENTED(); + return 0; + } +} + + +int64_t Simulator::ExtendValue(unsigned reg_size, + int64_t value, + Extend extend_type, + unsigned left_shift) { + switch (extend_type) { + case UXTB: + value &= kByteMask; + break; + case UXTH: + value &= kHalfWordMask; + break; + case UXTW: + value &= kWordMask; + break; + case SXTB: + value = (value << 56) >> 56; + break; + case SXTH: + value = (value << 48) >> 48; + break; + case SXTW: + value = (value << 32) >> 32; + break; + case UXTX: + case SXTX: + break; + default: + VIXL_UNREACHABLE(); + } + int64_t mask = (reg_size == kXRegSize) ? kXRegMask : kWRegMask; + return (value << left_shift) & mask; +} + + +void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) { + AssertSupportedFPCR(); + + // TODO: This assumes that the C++ implementation handles comparisons in the + // way that we expect (as per AssertSupportedFPCR()). + bool process_exception = false; + if ((std::isnan(val0) != 0) || (std::isnan(val1) != 0)) { + nzcv().SetRawValue(FPUnorderedFlag); + if (IsSignallingNaN(val0) || IsSignallingNaN(val1) || + (trap == EnableTrap)) { + process_exception = true; + } + } else if (val0 < val1) { + nzcv().SetRawValue(FPLessThanFlag); + } else if (val0 > val1) { + nzcv().SetRawValue(FPGreaterThanFlag); + } else if (val0 == val1) { + nzcv().SetRawValue(FPEqualFlag); + } else { + VIXL_UNREACHABLE(); + } + LogSystemRegister(NZCV); + if (process_exception) FPProcessException(); +} + + +Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize( + unsigned reg_size, unsigned lane_size) { + VIXL_ASSERT(reg_size >= lane_size); + + uint32_t format = 0; + if (reg_size != lane_size) { + switch (reg_size) { + default: VIXL_UNREACHABLE(); break; + case kQRegSizeInBytes: format = kPrintRegAsQVector; break; + case kDRegSizeInBytes: format = kPrintRegAsDVector; break; + } + } + + switch (lane_size) { + default: VIXL_UNREACHABLE(); break; + case kQRegSizeInBytes: format |= kPrintReg1Q; break; + case kDRegSizeInBytes: format |= kPrintReg1D; break; + case kSRegSizeInBytes: format |= kPrintReg1S; break; + case kHRegSizeInBytes: format |= kPrintReg1H; break; + case kBRegSizeInBytes: format |= kPrintReg1B; break; + } + // These sizes would be duplicate case labels. + VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes); + VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes); + VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D); + VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S); + + return static_cast<PrintRegisterFormat>(format); +} + + +Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat( + VectorFormat vform) { + switch (vform) { + default: VIXL_UNREACHABLE(); return kPrintReg16B; + case kFormat16B: return kPrintReg16B; + case kFormat8B: return kPrintReg8B; + case kFormat8H: return kPrintReg8H; + case kFormat4H: return kPrintReg4H; + case kFormat4S: return kPrintReg4S; + case kFormat2S: return kPrintReg2S; + case kFormat2D: return kPrintReg2D; + case kFormat1D: return kPrintReg1D; + } +} + + +void Simulator::PrintWrittenRegisters() { + for (unsigned i = 0; i < kNumberOfRegisters; i++) { + if (registers_[i].WrittenSinceLastLog()) PrintRegister(i); + } +} + + +void Simulator::PrintWrittenVRegisters() { + for (unsigned i = 0; i < kNumberOfVRegisters; i++) { + // At this point there is no type information, so print as a raw 1Q. + if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q); + } +} + + +void Simulator::PrintSystemRegisters() { + PrintSystemRegister(NZCV); + PrintSystemRegister(FPCR); +} + + +void Simulator::PrintRegisters() { + for (unsigned i = 0; i < kNumberOfRegisters; i++) { + PrintRegister(i); + } +} + + +void Simulator::PrintVRegisters() { + for (unsigned i = 0; i < kNumberOfVRegisters; i++) { + // At this point there is no type information, so print as a raw 1Q. + PrintVRegister(i, kPrintReg1Q); + } +} + + +// Print a register's name and raw value. +// +// Only the least-significant `size_in_bytes` bytes of the register are printed, +// but the value is aligned as if the whole register had been printed. +// +// For typical register updates, size_in_bytes should be set to kXRegSizeInBytes +// -- the default -- so that the whole register is printed. Other values of +// size_in_bytes are intended for use when the register hasn't actually been +// updated (such as in PrintWrite). +// +// No newline is printed. This allows the caller to print more details (such as +// a memory access annotation). +void Simulator::PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode, + int size_in_bytes) { + // The template for all supported sizes. + // "# x{code}: 0xffeeddccbbaa9988" + // "# w{code}: 0xbbaa9988" + // "# w{code}<15:0>: 0x9988" + // "# w{code}<7:0>: 0x88" + unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2; + + const char * name = ""; + const char * suffix = ""; + switch (size_in_bytes) { + case kXRegSizeInBytes: name = XRegNameForCode(code, r31mode); break; + case kWRegSizeInBytes: name = WRegNameForCode(code, r31mode); break; + case 2: + name = WRegNameForCode(code, r31mode); + suffix = "<15:0>"; + padding_chars -= strlen(suffix); + break; + case 1: + name = WRegNameForCode(code, r31mode); + suffix = "<7:0>"; + padding_chars -= strlen(suffix); + break; + default: + VIXL_UNREACHABLE(); + } + fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix); + + // Print leading padding spaces. + VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2)); + for (unsigned i = 0; i < padding_chars; i++) { + putc(' ', stream_); + } + + // Print the specified bits in hexadecimal format. + uint64_t bits = reg<uint64_t>(code, r31mode); + bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8); + VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes); + + int chars = size_in_bytes * 2; + fprintf(stream_, "%s0x%0*" PRIx64 "%s", + clr_reg_value, chars, bits, clr_normal); +} + + +void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) { + registers_[code].NotifyRegisterLogged(); + + // Don't print writes into xzr. + if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) { + return; + } + + // The template for all x and w registers: + // "# x{code}: 0x{value}" + // "# w{code}: 0x{value}" + + PrintRegisterRawHelper(code, r31mode); + fprintf(stream_, "\n"); +} + + +// Print a register's name and raw value. +// +// The `bytes` and `lsb` arguments can be used to limit the bytes that are +// printed. These arguments are intended for use in cases where register hasn't +// actually been updated (such as in PrintVWrite). +// +// No newline is printed. This allows the caller to print more details (such as +// a floating-point interpretation or a memory access annotation). +void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) { + // The template for vector types: + // "# v{code}: 0xffeeddccbbaa99887766554433221100". + // An example with bytes=4 and lsb=8: + // "# v{code}: 0xbbaa9988 ". + fprintf(stream_, "# %s%5s: %s", + clr_vreg_name, VRegNameForCode(code), clr_vreg_value); + + int msb = lsb + bytes - 1; + int byte = kQRegSizeInBytes - 1; + + // Print leading padding spaces. (Two spaces per byte.) + while (byte > msb) { + fprintf(stream_, " "); + byte--; + } + + // Print the specified part of the value, byte by byte. + qreg_t rawbits = qreg(code); + fprintf(stream_, "0x"); + while (byte >= lsb) { + fprintf(stream_, "%02x", rawbits.val[byte]); + byte--; + } + + // Print trailing padding spaces. + while (byte >= 0) { + fprintf(stream_, " "); + byte--; + } + fprintf(stream_, "%s", clr_normal); +} + + +// Print each of the specified lanes of a register as a float or double value. +// +// The `lane_count` and `lslane` arguments can be used to limit the lanes that +// are printed. These arguments are intended for use in cases where register +// hasn't actually been updated (such as in PrintVWrite). +// +// No newline is printed. This allows the caller to print more details (such as +// a memory access annotation). +void Simulator::PrintVRegisterFPHelper(unsigned code, + unsigned lane_size_in_bytes, + int lane_count, + int rightmost_lane) { + VIXL_ASSERT((lane_size_in_bytes == kSRegSizeInBytes) || + (lane_size_in_bytes == kDRegSizeInBytes)); + + unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes); + VIXL_ASSERT(msb <= kQRegSizeInBytes); + + // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register + // name is used: + // " (s{code}: {value})" + // " (d{code}: {value})" + // For vector types, "..." is used to represent one or more omitted lanes. + // " (..., {value}, {value}, ...)" + if ((lane_count == 1) && (rightmost_lane == 0)) { + const char * name = + (lane_size_in_bytes == kSRegSizeInBytes) ? SRegNameForCode(code) + : DRegNameForCode(code); + fprintf(stream_, " (%s%s: ", clr_vreg_name, name); + } else { + if (msb < (kQRegSizeInBytes - 1)) { + fprintf(stream_, " (..., "); + } else { + fprintf(stream_, " ("); + } + } + + // Print the list of values. + const char * separator = ""; + int leftmost_lane = rightmost_lane + lane_count - 1; + for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) { + double value = + (lane_size_in_bytes == kSRegSizeInBytes) ? vreg(code).Get<float>(lane) + : vreg(code).Get<double>(lane); + fprintf(stream_, "%s%s%#g%s", separator, clr_vreg_value, value, clr_normal); + separator = ", "; + } + + if (rightmost_lane > 0) { + fprintf(stream_, ", ..."); + } + fprintf(stream_, ")"); +} + + +void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) { + vregisters_[code].NotifyRegisterLogged(); + + int lane_size_log2 = format & kPrintRegLaneSizeMask; + + int reg_size_log2; + if (format & kPrintRegAsQVector) { + reg_size_log2 = kQRegSizeInBytesLog2; + } else if (format & kPrintRegAsDVector) { + reg_size_log2 = kDRegSizeInBytesLog2; + } else { + // Scalar types. + reg_size_log2 = lane_size_log2; + } + + int lane_count = 1 << (reg_size_log2 - lane_size_log2); + int lane_size = 1 << lane_size_log2; + + // The template for vector types: + // "# v{code}: 0x{rawbits} (..., {value}, ...)". + // The template for scalar types: + // "# v{code}: 0x{rawbits} ({reg}:{value})". + // The values in parentheses after the bit representations are floating-point + // interpretations. They are displayed only if the kPrintVRegAsFP bit is set. + + PrintVRegisterRawHelper(code); + if (format & kPrintRegAsFP) { + PrintVRegisterFPHelper(code, lane_size, lane_count); + } + + fprintf(stream_, "\n"); +} + + +void Simulator::PrintSystemRegister(SystemRegister id) { + switch (id) { + case NZCV: + fprintf(stream_, "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n", + clr_flag_name, clr_flag_value, + nzcv().N(), nzcv().Z(), nzcv().C(), nzcv().V(), + clr_normal); + break; + case FPCR: { + static const char * rmode[] = { + "0b00 (Round to Nearest)", + "0b01 (Round towards Plus Infinity)", + "0b10 (Round towards Minus Infinity)", + "0b11 (Round towards Zero)" + }; + VIXL_ASSERT(fpcr().RMode() < (sizeof(rmode) / sizeof(rmode[0]))); + fprintf(stream_, + "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n", + clr_flag_name, clr_flag_value, + fpcr().AHP(), fpcr().DN(), fpcr().FZ(), rmode[fpcr().RMode()], + clr_normal); + break; + } + default: + VIXL_UNREACHABLE(); + } +} + + +void Simulator::PrintRead(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format) { + registers_[reg_code].NotifyRegisterLogged(); + + USE(format); + + // The template is "# {reg}: 0x{value} <- {address}". + PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister); + fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n", + clr_memory_address, address, clr_normal); +} + + +void Simulator::PrintVRead(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format, + unsigned lane) { + vregisters_[reg_code].NotifyRegisterLogged(); + + // The template is "# v{code}: 0x{rawbits} <- address". + PrintVRegisterRawHelper(reg_code); + if (format & kPrintRegAsFP) { + PrintVRegisterFPHelper(reg_code, GetPrintRegLaneSizeInBytes(format), + GetPrintRegLaneCount(format), lane); + } + fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n", + clr_memory_address, address, clr_normal); +} + + +void Simulator::PrintWrite(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format) { + VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); + + // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy + // and readable, the value is aligned with the values in the register trace. + PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister, + GetPrintRegSizeInBytes(format)); + fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n", + clr_memory_address, address, clr_normal); +} + + +void Simulator::PrintVWrite(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format, + unsigned lane) { + // The templates: + // "# v{code}: 0x{rawbits} -> {address}" + // "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}". + // "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}" + // Because this trace doesn't represent a change to the source register's + // value, only the relevant part of the value is printed. To keep the trace + // tidy and readable, the raw value is aligned with the other values in the + // register trace. + int lane_count = GetPrintRegLaneCount(format); + int lane_size = GetPrintRegLaneSizeInBytes(format); + int reg_size = GetPrintRegSizeInBytes(format); + PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane); + if (format & kPrintRegAsFP) { + PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane); + } + fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n", + clr_memory_address, address, clr_normal); +} + + +// Visitors--------------------------------------------------------------------- + +void Simulator::VisitUnimplemented(const Instruction* instr) { + printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n", + reinterpret_cast<const void*>(instr), instr->InstructionBits()); + VIXL_UNIMPLEMENTED(); +} + + +void Simulator::VisitUnallocated(const Instruction* instr) { + printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n", + reinterpret_cast<const void*>(instr), instr->InstructionBits()); + VIXL_UNIMPLEMENTED(); +} + + +void Simulator::VisitPCRelAddressing(const Instruction* instr) { + VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) || + (instr->Mask(PCRelAddressingMask) == ADRP)); + + set_reg(instr->Rd(), instr->ImmPCOffsetTarget()); +} + + +void Simulator::VisitUnconditionalBranch(const Instruction* instr) { + switch (instr->Mask(UnconditionalBranchMask)) { + case BL: + set_lr(instr->NextInstruction()); + VIXL_FALLTHROUGH(); + case B: + set_pc(instr->ImmPCOffsetTarget()); + break; + default: VIXL_UNREACHABLE(); + } +} + + +void Simulator::VisitConditionalBranch(const Instruction* instr) { + VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond); + if (ConditionPassed(instr->ConditionBranch())) { + set_pc(instr->ImmPCOffsetTarget()); + } +} + + +void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) { + const Instruction* target = Instruction::Cast(xreg(instr->Rn())); + + switch (instr->Mask(UnconditionalBranchToRegisterMask)) { + case BLR: + set_lr(instr->NextInstruction()); + VIXL_FALLTHROUGH(); + case BR: + case RET: set_pc(target); break; + default: VIXL_UNREACHABLE(); + } +} + + +void Simulator::VisitTestBranch(const Instruction* instr) { + unsigned bit_pos = (instr->ImmTestBranchBit5() << 5) | + instr->ImmTestBranchBit40(); + bool bit_zero = ((xreg(instr->Rt()) >> bit_pos) & 1) == 0; + bool take_branch = false; + switch (instr->Mask(TestBranchMask)) { + case TBZ: take_branch = bit_zero; break; + case TBNZ: take_branch = !bit_zero; break; + default: VIXL_UNIMPLEMENTED(); + } + if (take_branch) { + set_pc(instr->ImmPCOffsetTarget()); + } +} + + +void Simulator::VisitCompareBranch(const Instruction* instr) { + unsigned rt = instr->Rt(); + bool take_branch = false; + switch (instr->Mask(CompareBranchMask)) { + case CBZ_w: take_branch = (wreg(rt) == 0); break; + case CBZ_x: take_branch = (xreg(rt) == 0); break; + case CBNZ_w: take_branch = (wreg(rt) != 0); break; + case CBNZ_x: take_branch = (xreg(rt) != 0); break; + default: VIXL_UNIMPLEMENTED(); + } + if (take_branch) { + set_pc(instr->ImmPCOffsetTarget()); + } +} + + +void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + bool set_flags = instr->FlagsUpdate(); + int64_t new_val = 0; + Instr operation = instr->Mask(AddSubOpMask); + + switch (operation) { + case ADD: + case ADDS: { + new_val = AddWithCarry(reg_size, + set_flags, + reg(reg_size, instr->Rn(), instr->RnMode()), + op2); + break; + } + case SUB: + case SUBS: { + new_val = AddWithCarry(reg_size, + set_flags, + reg(reg_size, instr->Rn(), instr->RnMode()), + ~op2, + 1); + break; + } + default: VIXL_UNREACHABLE(); + } + + set_reg(reg_size, instr->Rd(), new_val, LogRegWrites, instr->RdMode()); +} + + +void Simulator::VisitAddSubShifted(const Instruction* instr) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + int64_t op2 = ShiftOperand(reg_size, + reg(reg_size, instr->Rm()), + static_cast<Shift>(instr->ShiftDP()), + instr->ImmDPShift()); + AddSubHelper(instr, op2); +} + + +void Simulator::VisitAddSubImmediate(const Instruction* instr) { + int64_t op2 = instr->ImmAddSub() << ((instr->ShiftAddSub() == 1) ? 12 : 0); + AddSubHelper(instr, op2); +} + + +void Simulator::VisitAddSubExtended(const Instruction* instr) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + int64_t op2 = ExtendValue(reg_size, + reg(reg_size, instr->Rm()), + static_cast<Extend>(instr->ExtendMode()), + instr->ImmExtendShift()); + AddSubHelper(instr, op2); +} + + +void Simulator::VisitAddSubWithCarry(const Instruction* instr) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + int64_t op2 = reg(reg_size, instr->Rm()); + int64_t new_val; + + if ((instr->Mask(AddSubOpMask) == SUB) || instr->Mask(AddSubOpMask) == SUBS) { + op2 = ~op2; + } + + new_val = AddWithCarry(reg_size, + instr->FlagsUpdate(), + reg(reg_size, instr->Rn()), + op2, + C()); + + set_reg(reg_size, instr->Rd(), new_val); +} + + +void Simulator::VisitLogicalShifted(const Instruction* instr) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + Shift shift_type = static_cast<Shift>(instr->ShiftDP()); + unsigned shift_amount = instr->ImmDPShift(); + int64_t op2 = ShiftOperand(reg_size, reg(reg_size, instr->Rm()), shift_type, + shift_amount); + if (instr->Mask(NOT) == NOT) { + op2 = ~op2; + } + LogicalHelper(instr, op2); +} + + +void Simulator::VisitLogicalImmediate(const Instruction* instr) { + LogicalHelper(instr, instr->ImmLogical()); +} + + +void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + int64_t op1 = reg(reg_size, instr->Rn()); + int64_t result = 0; + bool update_flags = false; + + // Switch on the logical operation, stripping out the NOT bit, as it has a + // different meaning for logical immediate instructions. + switch (instr->Mask(LogicalOpMask & ~NOT)) { + case ANDS: update_flags = true; VIXL_FALLTHROUGH(); + case AND: result = op1 & op2; break; + case ORR: result = op1 | op2; break; + case EOR: result = op1 ^ op2; break; + default: + VIXL_UNIMPLEMENTED(); + } + + if (update_flags) { + nzcv().SetN(CalcNFlag(result, reg_size)); + nzcv().SetZ(CalcZFlag(result)); + nzcv().SetC(0); + nzcv().SetV(0); + LogSystemRegister(NZCV); + } + + set_reg(reg_size, instr->Rd(), result, LogRegWrites, instr->RdMode()); +} + + +void Simulator::VisitConditionalCompareRegister(const Instruction* instr) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + ConditionalCompareHelper(instr, reg(reg_size, instr->Rm())); +} + + +void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) { + ConditionalCompareHelper(instr, instr->ImmCondCmp()); +} + + +void Simulator::ConditionalCompareHelper(const Instruction* instr, + int64_t op2) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + int64_t op1 = reg(reg_size, instr->Rn()); + + if (ConditionPassed(instr->Condition())) { + // If the condition passes, set the status flags to the result of comparing + // the operands. + if (instr->Mask(ConditionalCompareMask) == CCMP) { + AddWithCarry(reg_size, true, op1, ~op2, 1); + } else { + VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN); + AddWithCarry(reg_size, true, op1, op2, 0); + } + } else { + // If the condition fails, set the status flags to the nzcv immediate. + nzcv().SetFlags(instr->Nzcv()); + LogSystemRegister(NZCV); + } +} + + +void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) { + int offset = instr->ImmLSUnsigned() << instr->SizeLS(); + LoadStoreHelper(instr, offset, Offset); +} + + +void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) { + LoadStoreHelper(instr, instr->ImmLS(), Offset); +} + + +void Simulator::VisitLoadStorePreIndex(const Instruction* instr) { + LoadStoreHelper(instr, instr->ImmLS(), PreIndex); +} + + +void Simulator::VisitLoadStorePostIndex(const Instruction* instr) { + LoadStoreHelper(instr, instr->ImmLS(), PostIndex); +} + + +void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) { + Extend ext = static_cast<Extend>(instr->ExtendMode()); + VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX)); + unsigned shift_amount = instr->ImmShiftLS() * instr->SizeLS(); + + int64_t offset = ExtendValue(kXRegSize, xreg(instr->Rm()), ext, + shift_amount); + LoadStoreHelper(instr, offset, Offset); +} + +template<typename T> +static T Faulted() { + return ~0; +} + +template<> +Simulator::qreg_t Faulted() { + static_assert(kQRegSizeInBytes == 16, "Known constraint"); + static Simulator::qreg_t dummy = { { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255 + } }; + return dummy; +} + +template<typename T> T +Simulator::Read(uintptr_t address) +{ + address = Memory::AddressUntag(address); + if (handle_wasm_seg_fault(address, sizeof(T))) + return Faulted<T>(); + return Memory::Read<T>(address); +} + +template <typename T> void +Simulator::Write(uintptr_t address, T value) +{ + address = Memory::AddressUntag(address); + if (handle_wasm_seg_fault(address, sizeof(T))) + return; + Memory::Write<T>(address, value); +} + +void Simulator::LoadStoreHelper(const Instruction* instr, + int64_t offset, + AddrMode addrmode) { + unsigned srcdst = instr->Rt(); + uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode); + + LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask)); + switch (op) { + case LDRB_w: + set_wreg(srcdst, Read<uint8_t>(address), NoRegLog); break; + case LDRH_w: + set_wreg(srcdst, Read<uint16_t>(address), NoRegLog); break; + case LDR_w: + set_wreg(srcdst, Read<uint32_t>(address), NoRegLog); break; + case LDR_x: + set_xreg(srcdst, Read<uint64_t>(address), NoRegLog); break; + case LDRSB_w: + set_wreg(srcdst, Read<int8_t>(address), NoRegLog); break; + case LDRSH_w: + set_wreg(srcdst, Read<int16_t>(address), NoRegLog); break; + case LDRSB_x: + set_xreg(srcdst, Read<int8_t>(address), NoRegLog); break; + case LDRSH_x: + set_xreg(srcdst, Read<int16_t>(address), NoRegLog); break; + case LDRSW_x: + set_xreg(srcdst, Read<int32_t>(address), NoRegLog); break; + case LDR_b: + set_breg(srcdst, Read<uint8_t>(address), NoRegLog); break; + case LDR_h: + set_hreg(srcdst, Read<uint16_t>(address), NoRegLog); break; + case LDR_s: + set_sreg(srcdst, Read<float>(address), NoRegLog); break; + case LDR_d: + set_dreg(srcdst, Read<double>(address), NoRegLog); break; + case LDR_q: + set_qreg(srcdst, Read<qreg_t>(address), NoRegLog); break; + + case STRB_w: Write<uint8_t>(address, wreg(srcdst)); break; + case STRH_w: Write<uint16_t>(address, wreg(srcdst)); break; + case STR_w: Write<uint32_t>(address, wreg(srcdst)); break; + case STR_x: Write<uint64_t>(address, xreg(srcdst)); break; + case STR_b: Write<uint8_t>(address, breg(srcdst)); break; + case STR_h: Write<uint16_t>(address, hreg(srcdst)); break; + case STR_s: Write<float>(address, sreg(srcdst)); break; + case STR_d: Write<double>(address, dreg(srcdst)); break; + case STR_q: Write<qreg_t>(address, qreg(srcdst)); break; + + // Ignore prfm hint instructions. + case PRFM: break; + + default: VIXL_UNIMPLEMENTED(); + } + + unsigned access_size = 1 << instr->SizeLS(); + if (instr->IsLoad()) { + if ((op == LDR_s) || (op == LDR_d)) { + LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); + } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) { + LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); + } else { + LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); + } + } else { + if ((op == STR_s) || (op == STR_d)) { + LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); + } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) { + LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); + } else { + LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); + } + } + + local_monitor_.MaybeClear(); +} + + +void Simulator::VisitLoadStorePairOffset(const Instruction* instr) { + LoadStorePairHelper(instr, Offset); +} + + +void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) { + LoadStorePairHelper(instr, PreIndex); +} + + +void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) { + LoadStorePairHelper(instr, PostIndex); +} + + +void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) { + LoadStorePairHelper(instr, Offset); +} + + +void Simulator::LoadStorePairHelper(const Instruction* instr, + AddrMode addrmode) { + unsigned rt = instr->Rt(); + unsigned rt2 = instr->Rt2(); + int element_size = 1 << instr->SizeLSPair(); + int64_t offset = instr->ImmLSPair() * element_size; + uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode); + uintptr_t address2 = address + element_size; + + LoadStorePairOp op = + static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask)); + + // 'rt' and 'rt2' can only be aliased for stores. + VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2)); + + switch (op) { + // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We + // will print a more detailed log. + case LDP_w: { + set_wreg(rt, Read<uint32_t>(address), NoRegLog); + set_wreg(rt2, Read<uint32_t>(address2), NoRegLog); + break; + } + case LDP_s: { + set_sreg(rt, Read<float>(address), NoRegLog); + set_sreg(rt2, Read<float>(address2), NoRegLog); + break; + } + case LDP_x: { + set_xreg(rt, Read<uint64_t>(address), NoRegLog); + set_xreg(rt2, Read<uint64_t>(address2), NoRegLog); + break; + } + case LDP_d: { + set_dreg(rt, Read<double>(address), NoRegLog); + set_dreg(rt2, Read<double>(address2), NoRegLog); + break; + } + case LDP_q: { + set_qreg(rt, Read<qreg_t>(address), NoRegLog); + set_qreg(rt2, Read<qreg_t>(address2), NoRegLog); + break; + } + case LDPSW_x: { + set_xreg(rt, Read<int32_t>(address), NoRegLog); + set_xreg(rt2, Read<int32_t>(address2), NoRegLog); + break; + } + case STP_w: { + Write<uint32_t>(address, wreg(rt)); + Write<uint32_t>(address2, wreg(rt2)); + break; + } + case STP_s: { + Write<float>(address, sreg(rt)); + Write<float>(address2, sreg(rt2)); + break; + } + case STP_x: { + Write<uint64_t>(address, xreg(rt)); + Write<uint64_t>(address2, xreg(rt2)); + break; + } + case STP_d: { + Write<double>(address, dreg(rt)); + Write<double>(address2, dreg(rt2)); + break; + } + case STP_q: { + Write<qreg_t>(address, qreg(rt)); + Write<qreg_t>(address2, qreg(rt2)); + break; + } + default: VIXL_UNREACHABLE(); + } + + // Print a detailed trace (including the memory address) instead of the basic + // register:value trace generated by set_*reg(). + if (instr->IsLoad()) { + if ((op == LDP_s) || (op == LDP_d)) { + LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size)); + LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size)); + } else if (op == LDP_q) { + LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size)); + } else { + LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size)); + } + } else { + if ((op == STP_s) || (op == STP_d)) { + LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size)); + LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size)); + } else if (op == STP_q) { + LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size)); + } else { + LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size)); + } + } + + local_monitor_.MaybeClear(); +} + + +void Simulator::PrintExclusiveAccessWarning() { + if (print_exclusive_access_warning_) { + fprintf( + stderr, + "%sWARNING:%s VIXL simulator support for load-/store-/clear-exclusive " + "instructions is limited. Refer to the README for details.%s\n", + clr_warning, clr_warning_message, clr_normal); + print_exclusive_access_warning_ = false; + } +} + +template <typename T> +void Simulator::CompareAndSwapHelper(const Instruction* instr) { + unsigned rs = instr->Rs(); + unsigned rt = instr->Rt(); + unsigned rn = instr->Rn(); + + unsigned element_size = sizeof(T); + uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer); + + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + address = Memory::AddressUntag(address); + if (handle_wasm_seg_fault(address, element_size)) + return; + + bool is_acquire = instr->Bit(22) == 1; + bool is_release = instr->Bit(15) == 1; + + T comparevalue = reg<T>(rs); + T newvalue = reg<T>(rt); + + // The architecture permits that the data read clears any exclusive monitors + // associated with that location, even if the compare subsequently fails. + local_monitor_.Clear(); + + T data = Memory::Read<T>(address); + if (is_acquire) { + // Approximate load-acquire by issuing a full barrier after the load. + __sync_synchronize(); + } + + if (data == comparevalue) { + if (is_release) { + // Approximate store-release by issuing a full barrier before the store. + __sync_synchronize(); + } + Memory::Write<T>(address, newvalue); + LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); + } + set_reg<T>(rs, data); + LogRead(address, rs, GetPrintRegisterFormatForSize(element_size)); +} + +template <typename T> +void Simulator::CompareAndSwapPairHelper(const Instruction* instr) { + VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8)); + unsigned rs = instr->Rs(); + unsigned rt = instr->Rt(); + unsigned rn = instr->Rn(); + + VIXL_ASSERT((rs % 2 == 0) && (rs % 2 == 0)); + + unsigned element_size = sizeof(T); + uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer); + + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + address = Memory::AddressUntag(address); + if (handle_wasm_seg_fault(address, element_size)) + return; + + uint64_t address2 = address + element_size; + + bool is_acquire = instr->Bit(22) == 1; + bool is_release = instr->Bit(15) == 1; + + T comparevalue_high = reg<T>(rs + 1); + T comparevalue_low = reg<T>(rs); + T newvalue_high = reg<T>(rt + 1); + T newvalue_low = reg<T>(rt); + + // The architecture permits that the data read clears any exclusive monitors + // associated with that location, even if the compare subsequently fails. + local_monitor_.Clear(); + + T data_high = Memory::Read<T>(address); + T data_low = Memory::Read<T>(address2); + + if (is_acquire) { + // Approximate load-acquire by issuing a full barrier after the load. + __sync_synchronize(); + } + + bool same = + (data_high == comparevalue_high) && (data_low == comparevalue_low); + if (same) { + if (is_release) { + // Approximate store-release by issuing a full barrier before the store. + __sync_synchronize(); + } + + Memory::Write<T>(address, newvalue_high); + Memory::Write<T>(address2, newvalue_low); + } + + set_reg<T>(rs + 1, data_high); + set_reg<T>(rs, data_low); + + LogRead(address, rs + 1, GetPrintRegisterFormatForSize(element_size)); + LogRead(address2, rs, GetPrintRegisterFormatForSize(element_size)); + + if (same) { + LogWrite(address, rt + 1, GetPrintRegisterFormatForSize(element_size)); + LogWrite(address2, rt, GetPrintRegisterFormatForSize(element_size)); + } +} + +void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { + LoadStoreExclusive op = + static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask)); + + switch (op) { + case CAS_w: + case CASA_w: + case CASL_w: + case CASAL_w: + CompareAndSwapHelper<uint32_t>(instr); + break; + case CAS_x: + case CASA_x: + case CASL_x: + case CASAL_x: + CompareAndSwapHelper<uint64_t>(instr); + break; + case CASB: + case CASAB: + case CASLB: + case CASALB: + CompareAndSwapHelper<uint8_t>(instr); + break; + case CASH: + case CASAH: + case CASLH: + case CASALH: + CompareAndSwapHelper<uint16_t>(instr); + break; + case CASP_w: + case CASPA_w: + case CASPL_w: + case CASPAL_w: + CompareAndSwapPairHelper<uint32_t>(instr); + break; + case CASP_x: + case CASPA_x: + case CASPL_x: + case CASPAL_x: + CompareAndSwapPairHelper<uint64_t>(instr); + break; + default: + PrintExclusiveAccessWarning(); + + unsigned rs = instr->Rs(); + unsigned rt = instr->Rt(); + unsigned rt2 = instr->Rt2(); + unsigned rn = instr->Rn(); + + bool is_exclusive = !instr->LdStXNotExclusive(); + bool is_acquire_release = !is_exclusive || instr->LdStXAcquireRelease(); + bool is_load = instr->LdStXLoad(); + bool is_pair = instr->LdStXPair(); + + unsigned element_size = 1 << instr->LdStXSizeLog2(); + unsigned access_size = is_pair ? element_size * 2 : element_size; + uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer); + + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + // Check the alignment of `address`. + if (AlignDown(address, access_size) != address) { + VIXL_ALIGNMENT_EXCEPTION(); + } + + // The sp must be aligned to 16 bytes when it is accessed. + if ((rn == 31) && (AlignDown(address, 16) != address)) { + VIXL_ALIGNMENT_EXCEPTION(); + } + + if (is_load) { + if (is_exclusive) { + local_monitor_.MarkExclusive(address, access_size); + } else { + // Any non-exclusive load can clear the local monitor as a side + // effect. We don't need to do this, but it is useful to stress the + // simulated code. + local_monitor_.Clear(); + } + + // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). + // We will print a more detailed log. + switch (op) { + case LDXRB_w: + case LDAXRB_w: + case LDARB_w: + set_wreg(rt, Read<uint8_t>(address), NoRegLog); + break; + case LDXRH_w: + case LDAXRH_w: + case LDARH_w: + set_wreg(rt, Read<uint16_t>(address), NoRegLog); + break; + case LDXR_w: + case LDAXR_w: + case LDAR_w: + set_wreg(rt, Read<uint32_t>(address), NoRegLog); + break; + case LDXR_x: + case LDAXR_x: + case LDAR_x: + set_xreg(rt, Read<uint64_t>(address), NoRegLog); + break; + case LDXP_w: + case LDAXP_w: + set_wreg(rt, Read<uint32_t>(address), NoRegLog); + set_wreg(rt2, Read<uint32_t>(address + element_size), NoRegLog); + break; + case LDXP_x: + case LDAXP_x: + set_xreg(rt, Read<uint64_t>(address), NoRegLog); + set_xreg(rt2, Read<uint64_t>(address + element_size), NoRegLog); + break; + default: + VIXL_UNREACHABLE(); + } + + if (is_acquire_release) { + // Approximate load-acquire by issuing a full barrier after the load. + js::jit::AtomicOperations::fenceSeqCst(); + } + + LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); + if (is_pair) { + LogRead(address + element_size, rt2, + GetPrintRegisterFormatForSize(element_size)); + } + } else { + if (is_acquire_release) { + // Approximate store-release by issuing a full barrier before the + // store. + js::jit::AtomicOperations::fenceSeqCst(); + } + + bool do_store = true; + if (is_exclusive) { + do_store = local_monitor_.IsExclusive(address, access_size) && + global_monitor_.IsExclusive(address, access_size); + set_wreg(rs, do_store ? 0 : 1); + + // - All exclusive stores explicitly clear the local monitor. + local_monitor_.Clear(); + } else { + // - Any other store can clear the local monitor as a side effect. + local_monitor_.MaybeClear(); + } + + if (do_store) { + switch (op) { + case STXRB_w: + case STLXRB_w: + case STLRB_w: + Write<uint8_t>(address, wreg(rt)); + break; + case STXRH_w: + case STLXRH_w: + case STLRH_w: + Write<uint16_t>(address, wreg(rt)); + break; + case STXR_w: + case STLXR_w: + case STLR_w: + Write<uint32_t>(address, wreg(rt)); + break; + case STXR_x: + case STLXR_x: + case STLR_x: + Write<uint64_t>(address, xreg(rt)); + break; + case STXP_w: + case STLXP_w: + Write<uint32_t>(address, wreg(rt)); + Write<uint32_t>(address + element_size, wreg(rt2)); + break; + case STXP_x: + case STLXP_x: + Write<uint64_t>(address, xreg(rt)); + Write<uint64_t>(address + element_size, xreg(rt2)); + break; + default: + VIXL_UNREACHABLE(); + } + + LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); + if (is_pair) { + LogWrite(address + element_size, rt2, + GetPrintRegisterFormatForSize(element_size)); + } + } + } + } +} + +template <typename T> +void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) { + unsigned rs = instr->Rs(); + unsigned rt = instr->Rt(); + unsigned rn = instr->Rn(); + + bool is_acquire = (instr->Bit(23) == 1) && (rt != kZeroRegCode); + bool is_release = instr->Bit(22) == 1; + + unsigned element_size = sizeof(T); + uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer); + + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + address = Memory::AddressUntag(address); + if (handle_wasm_seg_fault(address, sizeof(T))) + return; + + T value = reg<T>(rs); + + T data = Memory::Read<T>(address); + + if (is_acquire) { + // Approximate load-acquire by issuing a full barrier after the load. + __sync_synchronize(); + } + + T result = 0; + switch (instr->Mask(AtomicMemorySimpleOpMask)) { + case LDADDOp: + result = data + value; + break; + case LDCLROp: + VIXL_ASSERT(!std::numeric_limits<T>::is_signed); + result = data & ~value; + break; + case LDEOROp: + VIXL_ASSERT(!std::numeric_limits<T>::is_signed); + result = data ^ value; + break; + case LDSETOp: + VIXL_ASSERT(!std::numeric_limits<T>::is_signed); + result = data | value; + break; + + // Signed/Unsigned difference is done via the templated type T. + case LDSMAXOp: + case LDUMAXOp: + result = (data > value) ? data : value; + break; + case LDSMINOp: + case LDUMINOp: + result = (data > value) ? value : data; + break; + } + + if (is_release) { + // Approximate store-release by issuing a full barrier before the store. + __sync_synchronize(); + } + + Memory::Write<T>(address, result); + set_reg<T>(rt, data, NoRegLog); + + LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size)); +} + +template <typename T> +void Simulator::AtomicMemorySwapHelper(const Instruction* instr) { + unsigned rs = instr->Rs(); + unsigned rt = instr->Rt(); + unsigned rn = instr->Rn(); + + bool is_acquire = (instr->Bit(23) == 1) && (rt != kZeroRegCode); + bool is_release = instr->Bit(22) == 1; + + unsigned element_size = sizeof(T); + uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer); + + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + address = Memory::AddressUntag(address); + if (handle_wasm_seg_fault(address, sizeof(T))) + return; + + T data = Memory::Read<T>(address); + if (is_acquire) { + // Approximate load-acquire by issuing a full barrier after the load. + __sync_synchronize(); + } + + if (is_release) { + // Approximate store-release by issuing a full barrier before the store. + __sync_synchronize(); + } + Memory::Write<T>(address, reg<T>(rs)); + + set_reg<T>(rt, data); + + LogRead(address, rt, GetPrintRegisterFormat(element_size)); + LogWrite(address, rs, GetPrintRegisterFormat(element_size)); +} + +template <typename T> +void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) { + unsigned rt = instr->Rt(); + unsigned rn = instr->Rn(); + + unsigned element_size = sizeof(T); + uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer); + + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + address = Memory::AddressUntag(address); + if (handle_wasm_seg_fault(address, sizeof(T))) + return; + + set_reg<T>(rt, Memory::Read<T>(address)); + + // Approximate load-acquire by issuing a full barrier after the load. + __sync_synchronize(); + + LogRead(address, rt, GetPrintRegisterFormat(element_size)); +} + +#define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \ + V(LDADD) \ + V(LDCLR) \ + V(LDEOR) \ + V(LDSET) \ + V(LDUMAX) \ + V(LDUMIN) + +#define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \ + V(LDSMAX) \ + V(LDSMIN) + +void Simulator::VisitAtomicMemory(const Instruction* instr) { + switch (instr->Mask(AtomicMemoryMask)) { +// clang-format off +#define SIM_FUNC_B(A) \ + case A##B: \ + case A##AB: \ + case A##LB: \ + case A##ALB: +#define SIM_FUNC_H(A) \ + case A##H: \ + case A##AH: \ + case A##LH: \ + case A##ALH: +#define SIM_FUNC_w(A) \ + case A##_w: \ + case A##A_w: \ + case A##L_w: \ + case A##AL_w: +#define SIM_FUNC_x(A) \ + case A##_x: \ + case A##A_x: \ + case A##L_x: \ + case A##AL_x: + + ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B) + AtomicMemorySimpleHelper<uint8_t>(instr); + break; + ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B) + AtomicMemorySimpleHelper<int8_t>(instr); + break; + ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H) + AtomicMemorySimpleHelper<uint16_t>(instr); + break; + ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H) + AtomicMemorySimpleHelper<int16_t>(instr); + break; + ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w) + AtomicMemorySimpleHelper<uint32_t>(instr); + break; + ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w) + AtomicMemorySimpleHelper<int32_t>(instr); + break; + ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x) + AtomicMemorySimpleHelper<uint64_t>(instr); + break; + ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x) + AtomicMemorySimpleHelper<int64_t>(instr); + break; + // clang-format on + + case SWPB: + case SWPAB: + case SWPLB: + case SWPALB: + AtomicMemorySwapHelper<uint8_t>(instr); + break; + case SWPH: + case SWPAH: + case SWPLH: + case SWPALH: + AtomicMemorySwapHelper<uint16_t>(instr); + break; + case SWP_w: + case SWPA_w: + case SWPL_w: + case SWPAL_w: + AtomicMemorySwapHelper<uint32_t>(instr); + break; + case SWP_x: + case SWPA_x: + case SWPL_x: + case SWPAL_x: + AtomicMemorySwapHelper<uint64_t>(instr); + break; + case LDAPRB: + LoadAcquireRCpcHelper<uint8_t>(instr); + break; + case LDAPRH: + LoadAcquireRCpcHelper<uint16_t>(instr); + break; + case LDAPR_w: + LoadAcquireRCpcHelper<uint32_t>(instr); + break; + case LDAPR_x: + LoadAcquireRCpcHelper<uint64_t>(instr); + break; + } +} + +void Simulator::VisitLoadLiteral(const Instruction* instr) { + unsigned rt = instr->Rt(); + uint64_t address = instr->LiteralAddress<uint64_t>(); + + // Verify that the calculated address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + switch (instr->Mask(LoadLiteralMask)) { + // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then + // print a more detailed log. + case LDR_w_lit: + set_wreg(rt, Read<uint32_t>(address), NoRegLog); + LogRead(address, rt, kPrintWReg); + break; + case LDR_x_lit: + set_xreg(rt, Read<uint64_t>(address), NoRegLog); + LogRead(address, rt, kPrintXReg); + break; + case LDR_s_lit: + set_sreg(rt, Read<float>(address), NoRegLog); + LogVRead(address, rt, kPrintSReg); + break; + case LDR_d_lit: + set_dreg(rt, Read<double>(address), NoRegLog); + LogVRead(address, rt, kPrintDReg); + break; + case LDR_q_lit: + set_qreg(rt, Read<qreg_t>(address), NoRegLog); + LogVRead(address, rt, kPrintReg1Q); + break; + case LDRSW_x_lit: + set_xreg(rt, Read<int32_t>(address), NoRegLog); + LogRead(address, rt, kPrintWReg); + break; + + // Ignore prfm hint instructions. + case PRFM_lit: break; + + default: VIXL_UNREACHABLE(); + } + + local_monitor_.MaybeClear(); +} + + +uintptr_t Simulator::AddressModeHelper(unsigned addr_reg, + int64_t offset, + AddrMode addrmode) { + uint64_t address = xreg(addr_reg, Reg31IsStackPointer); + + if ((addr_reg == 31) && ((address % 16) != 0)) { + // When the base register is SP the stack pointer is required to be + // quadword aligned prior to the address calculation and write-backs. + // Misalignment will cause a stack alignment fault. + VIXL_ALIGNMENT_EXCEPTION(); + } + + if ((addrmode == PreIndex) || (addrmode == PostIndex)) { + VIXL_ASSERT(offset != 0); + // Only preindex should log the register update here. For Postindex, the + // update will be printed automatically by LogWrittenRegisters _after_ the + // memory access itself is logged. + RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog; + set_xreg(addr_reg, address + offset, log_mode, Reg31IsStackPointer); + } + + if ((addrmode == Offset) || (addrmode == PreIndex)) { + address += offset; + } + + // Verify that the calculated address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + return static_cast<uintptr_t>(address); +} + + +void Simulator::VisitMoveWideImmediate(const Instruction* instr) { + MoveWideImmediateOp mov_op = + static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask)); + int64_t new_xn_val = 0; + + bool is_64_bits = instr->SixtyFourBits() == 1; + // Shift is limited for W operations. + VIXL_ASSERT(is_64_bits || (instr->ShiftMoveWide() < 2)); + + // Get the shifted immediate. + int64_t shift = instr->ShiftMoveWide() * 16; + int64_t shifted_imm16 = static_cast<int64_t>(instr->ImmMoveWide()) << shift; + + // Compute the new value. + switch (mov_op) { + case MOVN_w: + case MOVN_x: { + new_xn_val = ~shifted_imm16; + if (!is_64_bits) new_xn_val &= kWRegMask; + break; + } + case MOVK_w: + case MOVK_x: { + unsigned reg_code = instr->Rd(); + int64_t prev_xn_val = is_64_bits ? xreg(reg_code) + : wreg(reg_code); + new_xn_val = + (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16; + break; + } + case MOVZ_w: + case MOVZ_x: { + new_xn_val = shifted_imm16; + break; + } + default: + VIXL_UNREACHABLE(); + } + + // Update the destination register. + set_xreg(instr->Rd(), new_xn_val); +} + + +void Simulator::VisitConditionalSelect(const Instruction* instr) { + uint64_t new_val = xreg(instr->Rn()); + + if (ConditionFailed(static_cast<Condition>(instr->Condition()))) { + new_val = xreg(instr->Rm()); + switch (instr->Mask(ConditionalSelectMask)) { + case CSEL_w: + case CSEL_x: break; + case CSINC_w: + case CSINC_x: new_val++; break; + case CSINV_w: + case CSINV_x: new_val = ~new_val; break; + case CSNEG_w: + case CSNEG_x: new_val = -new_val; break; + default: VIXL_UNIMPLEMENTED(); + } + } + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + set_reg(reg_size, instr->Rd(), new_val); +} + + +void Simulator::VisitDataProcessing1Source(const Instruction* instr) { + unsigned dst = instr->Rd(); + unsigned src = instr->Rn(); + + switch (instr->Mask(DataProcessing1SourceMask)) { + case RBIT_w: set_wreg(dst, ReverseBits(wreg(src))); break; + case RBIT_x: set_xreg(dst, ReverseBits(xreg(src))); break; + case REV16_w: set_wreg(dst, ReverseBytes(wreg(src), 1)); break; + case REV16_x: set_xreg(dst, ReverseBytes(xreg(src), 1)); break; + case REV_w: set_wreg(dst, ReverseBytes(wreg(src), 2)); break; + case REV32_x: set_xreg(dst, ReverseBytes(xreg(src), 2)); break; + case REV_x: set_xreg(dst, ReverseBytes(xreg(src), 3)); break; + case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src))); break; + case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src))); break; + case CLS_w: { + set_wreg(dst, CountLeadingSignBits(wreg(src))); + break; + } + case CLS_x: { + set_xreg(dst, CountLeadingSignBits(xreg(src))); + break; + } + default: VIXL_UNIMPLEMENTED(); + } +} + + +uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) { + VIXL_ASSERT((n > 32) && (n <= 64)); + for (unsigned i = (n - 1); i >= 32; i--) { + if (((data >> i) & 1) != 0) { + uint64_t polysh32 = (uint64_t)poly << (i - 32); + uint64_t mask = (UINT64_C(1) << i) - 1; + data = ((data & mask) ^ polysh32); + } + } + return data & 0xffffffff; +} + + +template <typename T> +uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) { + unsigned size = sizeof(val) * 8; // Number of bits in type T. + VIXL_ASSERT((size == 8) || (size == 16) || (size == 32)); + uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size; + uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32; + return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly)); +} + + +uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) { + // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute + // the CRC of each 32-bit word sequentially. + acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly); + return Crc32Checksum(acc, (uint32_t)(val >> 32), poly); +} + + +void Simulator::VisitDataProcessing2Source(const Instruction* instr) { + Shift shift_op = NO_SHIFT; + int64_t result = 0; + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + + switch (instr->Mask(DataProcessing2SourceMask)) { + case SDIV_w: { + int32_t rn = wreg(instr->Rn()); + int32_t rm = wreg(instr->Rm()); + if ((rn == kWMinInt) && (rm == -1)) { + result = kWMinInt; + } else if (rm == 0) { + // Division by zero can be trapped, but not on A-class processors. + result = 0; + } else { + result = rn / rm; + } + break; + } + case SDIV_x: { + int64_t rn = xreg(instr->Rn()); + int64_t rm = xreg(instr->Rm()); + if ((rn == kXMinInt) && (rm == -1)) { + result = kXMinInt; + } else if (rm == 0) { + // Division by zero can be trapped, but not on A-class processors. + result = 0; + } else { + result = rn / rm; + } + break; + } + case UDIV_w: { + uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn())); + uint32_t rm = static_cast<uint32_t>(wreg(instr->Rm())); + if (rm == 0) { + // Division by zero can be trapped, but not on A-class processors. + result = 0; + } else { + result = rn / rm; + } + break; + } + case UDIV_x: { + uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn())); + uint64_t rm = static_cast<uint64_t>(xreg(instr->Rm())); + if (rm == 0) { + // Division by zero can be trapped, but not on A-class processors. + result = 0; + } else { + result = rn / rm; + } + break; + } + case LSLV_w: + case LSLV_x: shift_op = LSL; break; + case LSRV_w: + case LSRV_x: shift_op = LSR; break; + case ASRV_w: + case ASRV_x: shift_op = ASR; break; + case RORV_w: + case RORV_x: shift_op = ROR; break; + case CRC32B: { + uint32_t acc = reg<uint32_t>(instr->Rn()); + uint8_t val = reg<uint8_t>(instr->Rm()); + result = Crc32Checksum(acc, val, CRC32_POLY); + break; + } + case CRC32H: { + uint32_t acc = reg<uint32_t>(instr->Rn()); + uint16_t val = reg<uint16_t>(instr->Rm()); + result = Crc32Checksum(acc, val, CRC32_POLY); + break; + } + case CRC32W: { + uint32_t acc = reg<uint32_t>(instr->Rn()); + uint32_t val = reg<uint32_t>(instr->Rm()); + result = Crc32Checksum(acc, val, CRC32_POLY); + break; + } + case CRC32X: { + uint32_t acc = reg<uint32_t>(instr->Rn()); + uint64_t val = reg<uint64_t>(instr->Rm()); + result = Crc32Checksum(acc, val, CRC32_POLY); + reg_size = kWRegSize; + break; + } + case CRC32CB: { + uint32_t acc = reg<uint32_t>(instr->Rn()); + uint8_t val = reg<uint8_t>(instr->Rm()); + result = Crc32Checksum(acc, val, CRC32C_POLY); + break; + } + case CRC32CH: { + uint32_t acc = reg<uint32_t>(instr->Rn()); + uint16_t val = reg<uint16_t>(instr->Rm()); + result = Crc32Checksum(acc, val, CRC32C_POLY); + break; + } + case CRC32CW: { + uint32_t acc = reg<uint32_t>(instr->Rn()); + uint32_t val = reg<uint32_t>(instr->Rm()); + result = Crc32Checksum(acc, val, CRC32C_POLY); + break; + } + case CRC32CX: { + uint32_t acc = reg<uint32_t>(instr->Rn()); + uint64_t val = reg<uint64_t>(instr->Rm()); + result = Crc32Checksum(acc, val, CRC32C_POLY); + reg_size = kWRegSize; + break; + } + default: VIXL_UNIMPLEMENTED(); + } + + if (shift_op != NO_SHIFT) { + // Shift distance encoded in the least-significant five/six bits of the + // register. + int mask = (instr->SixtyFourBits() == 1) ? 0x3f : 0x1f; + unsigned shift = wreg(instr->Rm()) & mask; + result = ShiftOperand(reg_size, reg(reg_size, instr->Rn()), shift_op, + shift); + } + set_reg(reg_size, instr->Rd(), result); +} + + +// The algorithm used is adapted from the one described in section 8.2 of +// Hacker's Delight, by Henry S. Warren, Jr. +// It assumes that a right shift on a signed integer is an arithmetic shift. +// Type T must be either uint64_t or int64_t. +template <typename T> +static T MultiplyHigh(T u, T v) { + uint64_t u0, v0, w0; + T u1, v1, w1, w2, t; + + VIXL_ASSERT(sizeof(u) == sizeof(u0)); + + u0 = u & 0xffffffff; + u1 = u >> 32; + v0 = v & 0xffffffff; + v1 = v >> 32; + + w0 = u0 * v0; + t = u1 * v0 + (w0 >> 32); + w1 = t & 0xffffffff; + w2 = t >> 32; + w1 = u0 * v1 + w1; + + return u1 * v1 + w2 + (w1 >> 32); +} + + +void Simulator::VisitDataProcessing3Source(const Instruction* instr) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + + int64_t result = 0; + // Extract and sign- or zero-extend 32-bit arguments for widening operations. + uint64_t rn_u32 = reg<uint32_t>(instr->Rn()); + uint64_t rm_u32 = reg<uint32_t>(instr->Rm()); + int64_t rn_s32 = reg<int32_t>(instr->Rn()); + int64_t rm_s32 = reg<int32_t>(instr->Rm()); + switch (instr->Mask(DataProcessing3SourceMask)) { + case MADD_w: + case MADD_x: + result = xreg(instr->Ra()) + (xreg(instr->Rn()) * xreg(instr->Rm())); + break; + case MSUB_w: + case MSUB_x: + result = xreg(instr->Ra()) - (xreg(instr->Rn()) * xreg(instr->Rm())); + break; + case SMADDL_x: result = xreg(instr->Ra()) + (rn_s32 * rm_s32); break; + case SMSUBL_x: result = xreg(instr->Ra()) - (rn_s32 * rm_s32); break; + case UMADDL_x: result = xreg(instr->Ra()) + (rn_u32 * rm_u32); break; + case UMSUBL_x: result = xreg(instr->Ra()) - (rn_u32 * rm_u32); break; + case UMULH_x: + result = MultiplyHigh(reg<uint64_t>(instr->Rn()), + reg<uint64_t>(instr->Rm())); + break; + case SMULH_x: + result = MultiplyHigh(xreg(instr->Rn()), xreg(instr->Rm())); + break; + default: VIXL_UNIMPLEMENTED(); + } + set_reg(reg_size, instr->Rd(), result); +} + + +void Simulator::VisitBitfield(const Instruction* instr) { + unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize; + int64_t reg_mask = instr->SixtyFourBits() ? kXRegMask : kWRegMask; + int64_t R = instr->ImmR(); + int64_t S = instr->ImmS(); + int64_t diff = S - R; + int64_t mask; + if (diff >= 0) { + mask = (diff < (reg_size - 1)) ? (INT64_C(1) << (diff + 1)) - 1 + : reg_mask; + } else { + mask = (INT64_C(1) << (S + 1)) - 1; + mask = (static_cast<uint64_t>(mask) >> R) | (mask << (reg_size - R)); + diff += reg_size; + } + + // inzero indicates if the extracted bitfield is inserted into the + // destination register value or in zero. + // If extend is true, extend the sign of the extracted bitfield. + bool inzero = false; + bool extend = false; + switch (instr->Mask(BitfieldMask)) { + case BFM_x: + case BFM_w: + break; + case SBFM_x: + case SBFM_w: + inzero = true; + extend = true; + break; + case UBFM_x: + case UBFM_w: + inzero = true; + break; + default: + VIXL_UNIMPLEMENTED(); + } + + int64_t dst = inzero ? 0 : reg(reg_size, instr->Rd()); + int64_t src = reg(reg_size, instr->Rn()); + // Rotate source bitfield into place. + int64_t result = (static_cast<uint64_t>(src) >> R) | (src << (reg_size - R)); + // Determine the sign extension. + int64_t topbits = ((INT64_C(1) << (reg_size - diff - 1)) - 1) << (diff + 1); + int64_t signbits = extend && ((src >> S) & 1) ? topbits : 0; + + // Merge sign extension, dest/zero and bitfield. + result = signbits | (result & mask) | (dst & ~mask); + + set_reg(reg_size, instr->Rd(), result); +} + + +void Simulator::VisitExtract(const Instruction* instr) { + unsigned lsb = instr->ImmS(); + unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize + : kWRegSize; + uint64_t low_res = static_cast<uint64_t>(reg(reg_size, instr->Rm())) >> lsb; + uint64_t high_res = + (lsb == 0) ? 0 : reg(reg_size, instr->Rn()) << (reg_size - lsb); + set_reg(reg_size, instr->Rd(), low_res | high_res); +} + + +void Simulator::VisitFPImmediate(const Instruction* instr) { + AssertSupportedFPCR(); + + unsigned dest = instr->Rd(); + switch (instr->Mask(FPImmediateMask)) { + case FMOV_s_imm: set_sreg(dest, instr->ImmFP32()); break; + case FMOV_d_imm: set_dreg(dest, instr->ImmFP64()); break; + default: VIXL_UNREACHABLE(); + } +} + + +void Simulator::VisitFPIntegerConvert(const Instruction* instr) { + AssertSupportedFPCR(); + + unsigned dst = instr->Rd(); + unsigned src = instr->Rn(); + + FPRounding round = RMode(); + + switch (instr->Mask(FPIntegerConvertMask)) { + case FCVTAS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieAway)); break; + case FCVTAS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieAway)); break; + case FCVTAS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieAway)); break; + case FCVTAS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieAway)); break; + case FCVTAU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieAway)); break; + case FCVTAU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieAway)); break; + case FCVTAU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieAway)); break; + case FCVTAU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieAway)); break; + case FCVTMS_ws: + set_wreg(dst, FPToInt32(sreg(src), FPNegativeInfinity)); + break; + case FCVTMS_xs: + set_xreg(dst, FPToInt64(sreg(src), FPNegativeInfinity)); + break; + case FCVTMS_wd: + set_wreg(dst, FPToInt32(dreg(src), FPNegativeInfinity)); + break; + case FCVTMS_xd: + set_xreg(dst, FPToInt64(dreg(src), FPNegativeInfinity)); + break; + case FCVTMU_ws: + set_wreg(dst, FPToUInt32(sreg(src), FPNegativeInfinity)); + break; + case FCVTMU_xs: + set_xreg(dst, FPToUInt64(sreg(src), FPNegativeInfinity)); + break; + case FCVTMU_wd: + set_wreg(dst, FPToUInt32(dreg(src), FPNegativeInfinity)); + break; + case FCVTMU_xd: + set_xreg(dst, FPToUInt64(dreg(src), FPNegativeInfinity)); + break; + case FCVTPS_ws: + set_wreg(dst, FPToInt32(sreg(src), FPPositiveInfinity)); + break; + case FCVTPS_xs: + set_xreg(dst, FPToInt64(sreg(src), FPPositiveInfinity)); + break; + case FCVTPS_wd: + set_wreg(dst, FPToInt32(dreg(src), FPPositiveInfinity)); + break; + case FCVTPS_xd: + set_xreg(dst, FPToInt64(dreg(src), FPPositiveInfinity)); + break; + case FCVTPU_ws: + set_wreg(dst, FPToUInt32(sreg(src), FPPositiveInfinity)); + break; + case FCVTPU_xs: + set_xreg(dst, FPToUInt64(sreg(src), FPPositiveInfinity)); + break; + case FCVTPU_wd: + set_wreg(dst, FPToUInt32(dreg(src), FPPositiveInfinity)); + break; + case FCVTPU_xd: + set_xreg(dst, FPToUInt64(dreg(src), FPPositiveInfinity)); + break; + case FCVTNS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieEven)); break; + case FCVTNS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieEven)); break; + case FCVTNS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieEven)); break; + case FCVTNS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieEven)); break; + case FCVTNU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieEven)); break; + case FCVTNU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieEven)); break; + case FCVTNU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieEven)); break; + case FCVTNU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieEven)); break; + case FCVTZS_ws: set_wreg(dst, FPToInt32(sreg(src), FPZero)); break; + case FCVTZS_xs: set_xreg(dst, FPToInt64(sreg(src), FPZero)); break; + case FCVTZS_wd: set_wreg(dst, FPToInt32(dreg(src), FPZero)); break; + case FCVTZS_xd: set_xreg(dst, FPToInt64(dreg(src), FPZero)); break; + case FCVTZU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPZero)); break; + case FCVTZU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPZero)); break; + case FCVTZU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPZero)); break; + case FCVTZU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPZero)); break; + case FJCVTZS: set_wreg(dst, FPToFixedJS(dreg(src))); break; + case FMOV_ws: set_wreg(dst, sreg_bits(src)); break; + case FMOV_xd: set_xreg(dst, dreg_bits(src)); break; + case FMOV_sw: set_sreg_bits(dst, wreg(src)); break; + case FMOV_dx: set_dreg_bits(dst, xreg(src)); break; + case FMOV_d1_x: + LogicVRegister(vreg(dst)).SetUint(kFormatD, 1, xreg(src)); + break; + case FMOV_x_d1: + set_xreg(dst, LogicVRegister(vreg(src)).Uint(kFormatD, 1)); + break; + + // A 32-bit input can be handled in the same way as a 64-bit input, since + // the sign- or zero-extension will not affect the conversion. + case SCVTF_dx: set_dreg(dst, FixedToDouble(xreg(src), 0, round)); break; + case SCVTF_dw: set_dreg(dst, FixedToDouble(wreg(src), 0, round)); break; + case UCVTF_dx: set_dreg(dst, UFixedToDouble(xreg(src), 0, round)); break; + case UCVTF_dw: { + set_dreg(dst, UFixedToDouble(static_cast<uint32_t>(wreg(src)), 0, round)); + break; + } + case SCVTF_sx: set_sreg(dst, FixedToFloat(xreg(src), 0, round)); break; + case SCVTF_sw: set_sreg(dst, FixedToFloat(wreg(src), 0, round)); break; + case UCVTF_sx: set_sreg(dst, UFixedToFloat(xreg(src), 0, round)); break; + case UCVTF_sw: { + set_sreg(dst, UFixedToFloat(static_cast<uint32_t>(wreg(src)), 0, round)); + break; + } + + default: VIXL_UNREACHABLE(); + } +} + + +void Simulator::VisitFPFixedPointConvert(const Instruction* instr) { + AssertSupportedFPCR(); + + unsigned dst = instr->Rd(); + unsigned src = instr->Rn(); + int fbits = 64 - instr->FPScale(); + + FPRounding round = RMode(); + + switch (instr->Mask(FPFixedPointConvertMask)) { + // A 32-bit input can be handled in the same way as a 64-bit input, since + // the sign- or zero-extension will not affect the conversion. + case SCVTF_dx_fixed: + set_dreg(dst, FixedToDouble(xreg(src), fbits, round)); + break; + case SCVTF_dw_fixed: + set_dreg(dst, FixedToDouble(wreg(src), fbits, round)); + break; + case UCVTF_dx_fixed: + set_dreg(dst, UFixedToDouble(xreg(src), fbits, round)); + break; + case UCVTF_dw_fixed: { + set_dreg(dst, + UFixedToDouble(static_cast<uint32_t>(wreg(src)), fbits, round)); + break; + } + case SCVTF_sx_fixed: + set_sreg(dst, FixedToFloat(xreg(src), fbits, round)); + break; + case SCVTF_sw_fixed: + set_sreg(dst, FixedToFloat(wreg(src), fbits, round)); + break; + case UCVTF_sx_fixed: + set_sreg(dst, UFixedToFloat(xreg(src), fbits, round)); + break; + case UCVTF_sw_fixed: { + set_sreg(dst, + UFixedToFloat(static_cast<uint32_t>(wreg(src)), fbits, round)); + break; + } + case FCVTZS_xd_fixed: + set_xreg(dst, FPToInt64(dreg(src) * std::pow(2.0, fbits), FPZero)); + break; + case FCVTZS_wd_fixed: + set_wreg(dst, FPToInt32(dreg(src) * std::pow(2.0, fbits), FPZero)); + break; + case FCVTZU_xd_fixed: + set_xreg(dst, FPToUInt64(dreg(src) * std::pow(2.0, fbits), FPZero)); + break; + case FCVTZU_wd_fixed: + set_wreg(dst, FPToUInt32(dreg(src) * std::pow(2.0, fbits), FPZero)); + break; + case FCVTZS_xs_fixed: + set_xreg(dst, FPToInt64(sreg(src) * std::pow(2.0f, fbits), FPZero)); + break; + case FCVTZS_ws_fixed: + set_wreg(dst, FPToInt32(sreg(src) * std::pow(2.0f, fbits), FPZero)); + break; + case FCVTZU_xs_fixed: + set_xreg(dst, FPToUInt64(sreg(src) * std::pow(2.0f, fbits), FPZero)); + break; + case FCVTZU_ws_fixed: + set_wreg(dst, FPToUInt32(sreg(src) * std::pow(2.0f, fbits), FPZero)); + break; + default: VIXL_UNREACHABLE(); + } +} + + +void Simulator::VisitFPCompare(const Instruction* instr) { + AssertSupportedFPCR(); + + FPTrapFlags trap = DisableTrap; + switch (instr->Mask(FPCompareMask)) { + case FCMPE_s: trap = EnableTrap; VIXL_FALLTHROUGH(); + case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap); break; + case FCMPE_d: trap = EnableTrap; VIXL_FALLTHROUGH(); + case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap); break; + case FCMPE_s_zero: trap = EnableTrap; VIXL_FALLTHROUGH(); + case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f, trap); break; + case FCMPE_d_zero: trap = EnableTrap; VIXL_FALLTHROUGH(); + case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0, trap); break; + default: VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitFPConditionalCompare(const Instruction* instr) { + AssertSupportedFPCR(); + + FPTrapFlags trap = DisableTrap; + switch (instr->Mask(FPConditionalCompareMask)) { + case FCCMPE_s: trap = EnableTrap; + VIXL_FALLTHROUGH(); + case FCCMP_s: + if (ConditionPassed(instr->Condition())) { + FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap); + } else { + nzcv().SetFlags(instr->Nzcv()); + LogSystemRegister(NZCV); + } + break; + case FCCMPE_d: trap = EnableTrap; + VIXL_FALLTHROUGH(); + case FCCMP_d: + if (ConditionPassed(instr->Condition())) { + FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap); + } else { + nzcv().SetFlags(instr->Nzcv()); + LogSystemRegister(NZCV); + } + break; + default: VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitFPConditionalSelect(const Instruction* instr) { + AssertSupportedFPCR(); + + Instr selected; + if (ConditionPassed(instr->Condition())) { + selected = instr->Rn(); + } else { + selected = instr->Rm(); + } + + switch (instr->Mask(FPConditionalSelectMask)) { + case FCSEL_s: set_sreg(instr->Rd(), sreg(selected)); break; + case FCSEL_d: set_dreg(instr->Rd(), dreg(selected)); break; + default: VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) { + AssertSupportedFPCR(); + + FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); + VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS; + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + bool inexact_exception = false; + + unsigned fd = instr->Rd(); + unsigned fn = instr->Rn(); + + switch (instr->Mask(FPDataProcessing1SourceMask)) { + case FMOV_s: set_sreg(fd, sreg(fn)); return; + case FMOV_d: set_dreg(fd, dreg(fn)); return; + case FABS_s: fabs_(kFormatS, vreg(fd), vreg(fn)); return; + case FABS_d: fabs_(kFormatD, vreg(fd), vreg(fn)); return; + case FNEG_s: fneg(kFormatS, vreg(fd), vreg(fn)); return; + case FNEG_d: fneg(kFormatD, vreg(fd), vreg(fn)); return; + case FCVT_ds: + set_dreg(fd, FPToDouble(sreg(fn), ReadDN())); + return; + case FCVT_sd: + set_sreg(fd, FPToFloat(dreg(fn), FPTieEven, ReadDN())); + return; + case FCVT_hs: + set_hreg(fd, Float16ToRawbits(FPToFloat16(sreg(fn), FPTieEven, ReadDN()))); + return; + case FCVT_sh: + set_sreg(fd, FPToFloat(RawbitsToFloat16(hreg(fn)), ReadDN())); + return; + case FCVT_dh: + set_dreg(fd, FPToDouble(hreg(fn), ReadDN())); + return; + case FCVT_hd: + set_hreg(fd, Float16ToRawbits(FPToFloat16(dreg(fn), FPTieEven, ReadDN()))); + return; + case FSQRT_s: + case FSQRT_d: fsqrt(vform, rd, rn); return; + case FRINTI_s: + case FRINTI_d: break; // Use FPCR rounding mode. + case FRINTX_s: + case FRINTX_d: inexact_exception = true; break; + case FRINTA_s: + case FRINTA_d: fpcr_rounding = FPTieAway; break; + case FRINTM_s: + case FRINTM_d: fpcr_rounding = FPNegativeInfinity; break; + case FRINTN_s: + case FRINTN_d: fpcr_rounding = FPTieEven; break; + case FRINTP_s: + case FRINTP_d: fpcr_rounding = FPPositiveInfinity; break; + case FRINTZ_s: + case FRINTZ_d: fpcr_rounding = FPZero; break; + default: VIXL_UNIMPLEMENTED(); + } + + // Only FRINT* instructions fall through the switch above. + frint(vform, rd, rn, fpcr_rounding, inexact_exception); +} + + +void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) { + AssertSupportedFPCR(); + + VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS; + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + SimVRegister& rm = vreg(instr->Rm()); + + switch (instr->Mask(FPDataProcessing2SourceMask)) { + case FADD_s: + case FADD_d: fadd(vform, rd, rn, rm); break; + case FSUB_s: + case FSUB_d: fsub(vform, rd, rn, rm); break; + case FMUL_s: + case FMUL_d: fmul(vform, rd, rn, rm); break; + case FNMUL_s: + case FNMUL_d: fnmul(vform, rd, rn, rm); break; + case FDIV_s: + case FDIV_d: fdiv(vform, rd, rn, rm); break; + case FMAX_s: + case FMAX_d: fmax(vform, rd, rn, rm); break; + case FMIN_s: + case FMIN_d: fmin(vform, rd, rn, rm); break; + case FMAXNM_s: + case FMAXNM_d: fmaxnm(vform, rd, rn, rm); break; + case FMINNM_s: + case FMINNM_d: fminnm(vform, rd, rn, rm); break; + default: + VIXL_UNREACHABLE(); + } +} + + +void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) { + AssertSupportedFPCR(); + + unsigned fd = instr->Rd(); + unsigned fn = instr->Rn(); + unsigned fm = instr->Rm(); + unsigned fa = instr->Ra(); + + switch (instr->Mask(FPDataProcessing3SourceMask)) { + // fd = fa +/- (fn * fm) + case FMADD_s: set_sreg(fd, FPMulAdd(sreg(fa), sreg(fn), sreg(fm))); break; + case FMSUB_s: set_sreg(fd, FPMulAdd(sreg(fa), -sreg(fn), sreg(fm))); break; + case FMADD_d: set_dreg(fd, FPMulAdd(dreg(fa), dreg(fn), dreg(fm))); break; + case FMSUB_d: set_dreg(fd, FPMulAdd(dreg(fa), -dreg(fn), dreg(fm))); break; + // Negated variants of the above. + case FNMADD_s: + set_sreg(fd, FPMulAdd(-sreg(fa), -sreg(fn), sreg(fm))); + break; + case FNMSUB_s: + set_sreg(fd, FPMulAdd(-sreg(fa), sreg(fn), sreg(fm))); + break; + case FNMADD_d: + set_dreg(fd, FPMulAdd(-dreg(fa), -dreg(fn), dreg(fm))); + break; + case FNMSUB_d: + set_dreg(fd, FPMulAdd(-dreg(fa), dreg(fn), dreg(fm))); + break; + default: VIXL_UNIMPLEMENTED(); + } +} + + +bool Simulator::FPProcessNaNs(const Instruction* instr) { + unsigned fd = instr->Rd(); + unsigned fn = instr->Rn(); + unsigned fm = instr->Rm(); + bool done = false; + + if (instr->Mask(FP64) == FP64) { + double result = FPProcessNaNs(dreg(fn), dreg(fm)); + if (std::isnan(result)) { + set_dreg(fd, result); + done = true; + } + } else { + float result = FPProcessNaNs(sreg(fn), sreg(fm)); + if (std::isnan(result)) { + set_sreg(fd, result); + done = true; + } + } + + return done; +} + + +void Simulator::SysOp_W(int op, int64_t val) { + switch (op) { + case IVAU: + case CVAC: + case CVAU: + case CIVAC: { + // Perform a dummy memory access to ensure that we have read access + // to the specified address. + volatile uint8_t y = Read<uint8_t>(val); + USE(y); + // TODO: Implement "case ZVA:". + break; + } + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitSystem(const Instruction* instr) { + // Some system instructions hijack their Op and Cp fields to represent a + // range of immediates instead of indicating a different instruction. This + // makes the decoding tricky. + if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) { + VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX); + switch (instr->Mask(SystemExclusiveMonitorMask)) { + case CLREX: { + PrintExclusiveAccessWarning(); + ClearLocalMonitor(); + break; + } + } + } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) { + switch (instr->Mask(SystemSysRegMask)) { + case MRS: { + switch (instr->ImmSystemRegister()) { + case NZCV: set_xreg(instr->Rt(), nzcv().RawValue()); break; + case FPCR: set_xreg(instr->Rt(), fpcr().RawValue()); break; + default: VIXL_UNIMPLEMENTED(); + } + break; + } + case MSR: { + switch (instr->ImmSystemRegister()) { + case NZCV: + nzcv().SetRawValue(wreg(instr->Rt())); + LogSystemRegister(NZCV); + break; + case FPCR: + fpcr().SetRawValue(wreg(instr->Rt())); + LogSystemRegister(FPCR); + break; + default: VIXL_UNIMPLEMENTED(); + } + break; + } + } + } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) { + VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT); + switch (instr->ImmHint()) { + case NOP: break; + case CSDB: break; + default: VIXL_UNIMPLEMENTED(); + } + } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) { + js::jit::AtomicOperations::fenceSeqCst(); + } else if ((instr->Mask(SystemSysFMask) == SystemSysFixed)) { + switch (instr->Mask(SystemSysMask)) { + case SYS: SysOp_W(instr->SysOp(), xreg(instr->Rt())); break; + default: VIXL_UNIMPLEMENTED(); + } + } else { + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitCrypto2RegSHA(const Instruction* instr) { + VisitUnimplemented(instr); +} + + +void Simulator::VisitCrypto3RegSHA(const Instruction* instr) { + VisitUnimplemented(instr); +} + + +void Simulator::VisitCryptoAES(const Instruction* instr) { + VisitUnimplemented(instr); +} + + +void Simulator::VisitNEON2RegMisc(const Instruction* instr) { + NEONFormatDecoder nfd(instr); + VectorFormat vf = nfd.GetVectorFormat(); + + static const NEONFormatMap map_lp = { + {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D} + }; + VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp); + + static const NEONFormatMap map_fcvtl = { + {22}, {NF_4S, NF_2D} + }; + VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl); + + static const NEONFormatMap map_fcvtn = { + {22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S} + }; + VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + + if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) { + // These instructions all use a two bit size field, except NOT and RBIT, + // which use the field to encode the operation. + switch (instr->Mask(NEON2RegMiscMask)) { + case NEON_REV64: rev64(vf, rd, rn); break; + case NEON_REV32: rev32(vf, rd, rn); break; + case NEON_REV16: rev16(vf, rd, rn); break; + case NEON_SUQADD: suqadd(vf, rd, rn); break; + case NEON_USQADD: usqadd(vf, rd, rn); break; + case NEON_CLS: cls(vf, rd, rn); break; + case NEON_CLZ: clz(vf, rd, rn); break; + case NEON_CNT: cnt(vf, rd, rn); break; + case NEON_SQABS: abs(vf, rd, rn).SignedSaturate(vf); break; + case NEON_SQNEG: neg(vf, rd, rn).SignedSaturate(vf); break; + case NEON_CMGT_zero: cmp(vf, rd, rn, 0, gt); break; + case NEON_CMGE_zero: cmp(vf, rd, rn, 0, ge); break; + case NEON_CMEQ_zero: cmp(vf, rd, rn, 0, eq); break; + case NEON_CMLE_zero: cmp(vf, rd, rn, 0, le); break; + case NEON_CMLT_zero: cmp(vf, rd, rn, 0, lt); break; + case NEON_ABS: abs(vf, rd, rn); break; + case NEON_NEG: neg(vf, rd, rn); break; + case NEON_SADDLP: saddlp(vf_lp, rd, rn); break; + case NEON_UADDLP: uaddlp(vf_lp, rd, rn); break; + case NEON_SADALP: sadalp(vf_lp, rd, rn); break; + case NEON_UADALP: uadalp(vf_lp, rd, rn); break; + case NEON_RBIT_NOT: + vf = nfd.GetVectorFormat(nfd.LogicalFormatMap()); + switch (instr->FPType()) { + case 0: not_(vf, rd, rn); break; + case 1: rbit(vf, rd, rn);; break; + default: + VIXL_UNIMPLEMENTED(); + } + break; + } + } else { + VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap()); + FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); + bool inexact_exception = false; + + // These instructions all use a one bit size field, except XTN, SQXTUN, + // SHLL, SQXTN and UQXTN, which use a two bit size field. + switch (instr->Mask(NEON2RegMiscFPMask)) { + case NEON_FABS: fabs_(fpf, rd, rn); return; + case NEON_FNEG: fneg(fpf, rd, rn); return; + case NEON_FSQRT: fsqrt(fpf, rd, rn); return; + case NEON_FCVTL: + if (instr->Mask(NEON_Q)) { + fcvtl2(vf_fcvtl, rd, rn); + } else { + fcvtl(vf_fcvtl, rd, rn); + } + return; + case NEON_FCVTN: + if (instr->Mask(NEON_Q)) { + fcvtn2(vf_fcvtn, rd, rn); + } else { + fcvtn(vf_fcvtn, rd, rn); + } + return; + case NEON_FCVTXN: + if (instr->Mask(NEON_Q)) { + fcvtxn2(vf_fcvtn, rd, rn); + } else { + fcvtxn(vf_fcvtn, rd, rn); + } + return; + + // The following instructions break from the switch statement, rather + // than return. + case NEON_FRINTI: break; // Use FPCR rounding mode. + case NEON_FRINTX: inexact_exception = true; break; + case NEON_FRINTA: fpcr_rounding = FPTieAway; break; + case NEON_FRINTM: fpcr_rounding = FPNegativeInfinity; break; + case NEON_FRINTN: fpcr_rounding = FPTieEven; break; + case NEON_FRINTP: fpcr_rounding = FPPositiveInfinity; break; + case NEON_FRINTZ: fpcr_rounding = FPZero; break; + + case NEON_FCVTNS: fcvts(fpf, rd, rn, FPTieEven); return; + case NEON_FCVTNU: fcvtu(fpf, rd, rn, FPTieEven); return; + case NEON_FCVTPS: fcvts(fpf, rd, rn, FPPositiveInfinity); return; + case NEON_FCVTPU: fcvtu(fpf, rd, rn, FPPositiveInfinity); return; + case NEON_FCVTMS: fcvts(fpf, rd, rn, FPNegativeInfinity); return; + case NEON_FCVTMU: fcvtu(fpf, rd, rn, FPNegativeInfinity); return; + case NEON_FCVTZS: fcvts(fpf, rd, rn, FPZero); return; + case NEON_FCVTZU: fcvtu(fpf, rd, rn, FPZero); return; + case NEON_FCVTAS: fcvts(fpf, rd, rn, FPTieAway); return; + case NEON_FCVTAU: fcvtu(fpf, rd, rn, FPTieAway); return; + case NEON_SCVTF: scvtf(fpf, rd, rn, 0, fpcr_rounding); return; + case NEON_UCVTF: ucvtf(fpf, rd, rn, 0, fpcr_rounding); return; + case NEON_URSQRTE: ursqrte(fpf, rd, rn); return; + case NEON_URECPE: urecpe(fpf, rd, rn); return; + case NEON_FRSQRTE: frsqrte(fpf, rd, rn); return; + case NEON_FRECPE: frecpe(fpf, rd, rn, fpcr_rounding); return; + case NEON_FCMGT_zero: fcmp_zero(fpf, rd, rn, gt); return; + case NEON_FCMGE_zero: fcmp_zero(fpf, rd, rn, ge); return; + case NEON_FCMEQ_zero: fcmp_zero(fpf, rd, rn, eq); return; + case NEON_FCMLE_zero: fcmp_zero(fpf, rd, rn, le); return; + case NEON_FCMLT_zero: fcmp_zero(fpf, rd, rn, lt); return; + default: + if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) && + (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) { + switch (instr->Mask(NEON2RegMiscMask)) { + case NEON_XTN: xtn(vf, rd, rn); return; + case NEON_SQXTN: sqxtn(vf, rd, rn); return; + case NEON_UQXTN: uqxtn(vf, rd, rn); return; + case NEON_SQXTUN: sqxtun(vf, rd, rn); return; + case NEON_SHLL: + vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap()); + if (instr->Mask(NEON_Q)) { + shll2(vf, rd, rn); + } else { + shll(vf, rd, rn); + } + return; + default: + VIXL_UNIMPLEMENTED(); + } + } else { + VIXL_UNIMPLEMENTED(); + } + } + + // Only FRINT* instructions fall through the switch above. + frint(fpf, rd, rn, fpcr_rounding, inexact_exception); + } +} + + +void Simulator::VisitNEON3Same(const Instruction* instr) { + NEONFormatDecoder nfd(instr); + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + SimVRegister& rm = vreg(instr->Rm()); + + if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) { + VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap()); + switch (instr->Mask(NEON3SameLogicalMask)) { + case NEON_AND: and_(vf, rd, rn, rm); break; + case NEON_ORR: orr(vf, rd, rn, rm); break; + case NEON_ORN: orn(vf, rd, rn, rm); break; + case NEON_EOR: eor(vf, rd, rn, rm); break; + case NEON_BIC: bic(vf, rd, rn, rm); break; + case NEON_BIF: bif(vf, rd, rn, rm); break; + case NEON_BIT: bit(vf, rd, rn, rm); break; + case NEON_BSL: bsl(vf, rd, rn, rm); break; + default: + VIXL_UNIMPLEMENTED(); + } + } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) { + VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap()); + switch (instr->Mask(NEON3SameFPMask)) { + case NEON_FADD: fadd(vf, rd, rn, rm); break; + case NEON_FSUB: fsub(vf, rd, rn, rm); break; + case NEON_FMUL: fmul(vf, rd, rn, rm); break; + case NEON_FDIV: fdiv(vf, rd, rn, rm); break; + case NEON_FMAX: fmax(vf, rd, rn, rm); break; + case NEON_FMIN: fmin(vf, rd, rn, rm); break; + case NEON_FMAXNM: fmaxnm(vf, rd, rn, rm); break; + case NEON_FMINNM: fminnm(vf, rd, rn, rm); break; + case NEON_FMLA: fmla(vf, rd, rn, rm); break; + case NEON_FMLS: fmls(vf, rd, rn, rm); break; + case NEON_FMULX: fmulx(vf, rd, rn, rm); break; + case NEON_FACGE: fabscmp(vf, rd, rn, rm, ge); break; + case NEON_FACGT: fabscmp(vf, rd, rn, rm, gt); break; + case NEON_FCMEQ: fcmp(vf, rd, rn, rm, eq); break; + case NEON_FCMGE: fcmp(vf, rd, rn, rm, ge); break; + case NEON_FCMGT: fcmp(vf, rd, rn, rm, gt); break; + case NEON_FRECPS: frecps(vf, rd, rn, rm); break; + case NEON_FRSQRTS: frsqrts(vf, rd, rn, rm); break; + case NEON_FABD: fabd(vf, rd, rn, rm); break; + case NEON_FADDP: faddp(vf, rd, rn, rm); break; + case NEON_FMAXP: fmaxp(vf, rd, rn, rm); break; + case NEON_FMAXNMP: fmaxnmp(vf, rd, rn, rm); break; + case NEON_FMINP: fminp(vf, rd, rn, rm); break; + case NEON_FMINNMP: fminnmp(vf, rd, rn, rm); break; + default: + VIXL_UNIMPLEMENTED(); + } + } else { + VectorFormat vf = nfd.GetVectorFormat(); + switch (instr->Mask(NEON3SameMask)) { + case NEON_ADD: add(vf, rd, rn, rm); break; + case NEON_ADDP: addp(vf, rd, rn, rm); break; + case NEON_CMEQ: cmp(vf, rd, rn, rm, eq); break; + case NEON_CMGE: cmp(vf, rd, rn, rm, ge); break; + case NEON_CMGT: cmp(vf, rd, rn, rm, gt); break; + case NEON_CMHI: cmp(vf, rd, rn, rm, hi); break; + case NEON_CMHS: cmp(vf, rd, rn, rm, hs); break; + case NEON_CMTST: cmptst(vf, rd, rn, rm); break; + case NEON_MLS: mls(vf, rd, rn, rm); break; + case NEON_MLA: mla(vf, rd, rn, rm); break; + case NEON_MUL: mul(vf, rd, rn, rm); break; + case NEON_PMUL: pmul(vf, rd, rn, rm); break; + case NEON_SMAX: smax(vf, rd, rn, rm); break; + case NEON_SMAXP: smaxp(vf, rd, rn, rm); break; + case NEON_SMIN: smin(vf, rd, rn, rm); break; + case NEON_SMINP: sminp(vf, rd, rn, rm); break; + case NEON_SUB: sub(vf, rd, rn, rm); break; + case NEON_UMAX: umax(vf, rd, rn, rm); break; + case NEON_UMAXP: umaxp(vf, rd, rn, rm); break; + case NEON_UMIN: umin(vf, rd, rn, rm); break; + case NEON_UMINP: uminp(vf, rd, rn, rm); break; + case NEON_SSHL: sshl(vf, rd, rn, rm); break; + case NEON_USHL: ushl(vf, rd, rn, rm); break; + case NEON_SABD: absdiff(vf, rd, rn, rm, true); break; + case NEON_UABD: absdiff(vf, rd, rn, rm, false); break; + case NEON_SABA: saba(vf, rd, rn, rm); break; + case NEON_UABA: uaba(vf, rd, rn, rm); break; + case NEON_UQADD: add(vf, rd, rn, rm).UnsignedSaturate(vf); break; + case NEON_SQADD: add(vf, rd, rn, rm).SignedSaturate(vf); break; + case NEON_UQSUB: sub(vf, rd, rn, rm).UnsignedSaturate(vf); break; + case NEON_SQSUB: sub(vf, rd, rn, rm).SignedSaturate(vf); break; + case NEON_SQDMULH: sqdmulh(vf, rd, rn, rm); break; + case NEON_SQRDMULH: sqrdmulh(vf, rd, rn, rm); break; + case NEON_UQSHL: ushl(vf, rd, rn, rm).UnsignedSaturate(vf); break; + case NEON_SQSHL: sshl(vf, rd, rn, rm).SignedSaturate(vf); break; + case NEON_URSHL: ushl(vf, rd, rn, rm).Round(vf); break; + case NEON_SRSHL: sshl(vf, rd, rn, rm).Round(vf); break; + case NEON_UQRSHL: + ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf); + break; + case NEON_SQRSHL: + sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf); + break; + case NEON_UHADD: + add(vf, rd, rn, rm).Uhalve(vf); + break; + case NEON_URHADD: + add(vf, rd, rn, rm).Uhalve(vf).Round(vf); + break; + case NEON_SHADD: + add(vf, rd, rn, rm).Halve(vf); + break; + case NEON_SRHADD: + add(vf, rd, rn, rm).Halve(vf).Round(vf); + break; + case NEON_UHSUB: + sub(vf, rd, rn, rm).Uhalve(vf); + break; + case NEON_SHSUB: + sub(vf, rd, rn, rm).Halve(vf); + break; + default: + VIXL_UNIMPLEMENTED(); + } + } +} + + +void Simulator::VisitNEON3Different(const Instruction* instr) { + NEONFormatDecoder nfd(instr); + VectorFormat vf = nfd.GetVectorFormat(); + VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap()); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + SimVRegister& rm = vreg(instr->Rm()); + + switch (instr->Mask(NEON3DifferentMask)) { + case NEON_PMULL: pmull(vf_l, rd, rn, rm); break; + case NEON_PMULL2: pmull2(vf_l, rd, rn, rm); break; + case NEON_UADDL: uaddl(vf_l, rd, rn, rm); break; + case NEON_UADDL2: uaddl2(vf_l, rd, rn, rm); break; + case NEON_SADDL: saddl(vf_l, rd, rn, rm); break; + case NEON_SADDL2: saddl2(vf_l, rd, rn, rm); break; + case NEON_USUBL: usubl(vf_l, rd, rn, rm); break; + case NEON_USUBL2: usubl2(vf_l, rd, rn, rm); break; + case NEON_SSUBL: ssubl(vf_l, rd, rn, rm); break; + case NEON_SSUBL2: ssubl2(vf_l, rd, rn, rm); break; + case NEON_SABAL: sabal(vf_l, rd, rn, rm); break; + case NEON_SABAL2: sabal2(vf_l, rd, rn, rm); break; + case NEON_UABAL: uabal(vf_l, rd, rn, rm); break; + case NEON_UABAL2: uabal2(vf_l, rd, rn, rm); break; + case NEON_SABDL: sabdl(vf_l, rd, rn, rm); break; + case NEON_SABDL2: sabdl2(vf_l, rd, rn, rm); break; + case NEON_UABDL: uabdl(vf_l, rd, rn, rm); break; + case NEON_UABDL2: uabdl2(vf_l, rd, rn, rm); break; + case NEON_SMLAL: smlal(vf_l, rd, rn, rm); break; + case NEON_SMLAL2: smlal2(vf_l, rd, rn, rm); break; + case NEON_UMLAL: umlal(vf_l, rd, rn, rm); break; + case NEON_UMLAL2: umlal2(vf_l, rd, rn, rm); break; + case NEON_SMLSL: smlsl(vf_l, rd, rn, rm); break; + case NEON_SMLSL2: smlsl2(vf_l, rd, rn, rm); break; + case NEON_UMLSL: umlsl(vf_l, rd, rn, rm); break; + case NEON_UMLSL2: umlsl2(vf_l, rd, rn, rm); break; + case NEON_SMULL: smull(vf_l, rd, rn, rm); break; + case NEON_SMULL2: smull2(vf_l, rd, rn, rm); break; + case NEON_UMULL: umull(vf_l, rd, rn, rm); break; + case NEON_UMULL2: umull2(vf_l, rd, rn, rm); break; + case NEON_SQDMLAL: sqdmlal(vf_l, rd, rn, rm); break; + case NEON_SQDMLAL2: sqdmlal2(vf_l, rd, rn, rm); break; + case NEON_SQDMLSL: sqdmlsl(vf_l, rd, rn, rm); break; + case NEON_SQDMLSL2: sqdmlsl2(vf_l, rd, rn, rm); break; + case NEON_SQDMULL: sqdmull(vf_l, rd, rn, rm); break; + case NEON_SQDMULL2: sqdmull2(vf_l, rd, rn, rm); break; + case NEON_UADDW: uaddw(vf_l, rd, rn, rm); break; + case NEON_UADDW2: uaddw2(vf_l, rd, rn, rm); break; + case NEON_SADDW: saddw(vf_l, rd, rn, rm); break; + case NEON_SADDW2: saddw2(vf_l, rd, rn, rm); break; + case NEON_USUBW: usubw(vf_l, rd, rn, rm); break; + case NEON_USUBW2: usubw2(vf_l, rd, rn, rm); break; + case NEON_SSUBW: ssubw(vf_l, rd, rn, rm); break; + case NEON_SSUBW2: ssubw2(vf_l, rd, rn, rm); break; + case NEON_ADDHN: addhn(vf, rd, rn, rm); break; + case NEON_ADDHN2: addhn2(vf, rd, rn, rm); break; + case NEON_RADDHN: raddhn(vf, rd, rn, rm); break; + case NEON_RADDHN2: raddhn2(vf, rd, rn, rm); break; + case NEON_SUBHN: subhn(vf, rd, rn, rm); break; + case NEON_SUBHN2: subhn2(vf, rd, rn, rm); break; + case NEON_RSUBHN: rsubhn(vf, rd, rn, rm); break; + case NEON_RSUBHN2: rsubhn2(vf, rd, rn, rm); break; + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitNEONAcrossLanes(const Instruction* instr) { + NEONFormatDecoder nfd(instr); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + + // The input operand's VectorFormat is passed for these instructions. + if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) { + VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap()); + + switch (instr->Mask(NEONAcrossLanesFPMask)) { + case NEON_FMAXV: fmaxv(vf, rd, rn); break; + case NEON_FMINV: fminv(vf, rd, rn); break; + case NEON_FMAXNMV: fmaxnmv(vf, rd, rn); break; + case NEON_FMINNMV: fminnmv(vf, rd, rn); break; + default: + VIXL_UNIMPLEMENTED(); + } + } else { + VectorFormat vf = nfd.GetVectorFormat(); + + switch (instr->Mask(NEONAcrossLanesMask)) { + case NEON_ADDV: addv(vf, rd, rn); break; + case NEON_SMAXV: smaxv(vf, rd, rn); break; + case NEON_SMINV: sminv(vf, rd, rn); break; + case NEON_UMAXV: umaxv(vf, rd, rn); break; + case NEON_UMINV: uminv(vf, rd, rn); break; + case NEON_SADDLV: saddlv(vf, rd, rn); break; + case NEON_UADDLV: uaddlv(vf, rd, rn); break; + default: + VIXL_UNIMPLEMENTED(); + } + } +} + + +void Simulator::VisitNEONByIndexedElement(const Instruction* instr) { + NEONFormatDecoder nfd(instr); + VectorFormat vf_r = nfd.GetVectorFormat(); + VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap()); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + + ByElementOp Op = NULL; + + int rm_reg = instr->Rm(); + int index = (instr->NEONH() << 1) | instr->NEONL(); + if (instr->NEONSize() == 1) { + rm_reg &= 0xf; + index = (index << 1) | instr->NEONM(); + } + + switch (instr->Mask(NEONByIndexedElementMask)) { + case NEON_MUL_byelement: Op = &Simulator::mul; vf = vf_r; break; + case NEON_MLA_byelement: Op = &Simulator::mla; vf = vf_r; break; + case NEON_MLS_byelement: Op = &Simulator::mls; vf = vf_r; break; + case NEON_SQDMULH_byelement: Op = &Simulator::sqdmulh; vf = vf_r; break; + case NEON_SQRDMULH_byelement: Op = &Simulator::sqrdmulh; vf = vf_r; break; + case NEON_SMULL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::smull2; + } else { + Op = &Simulator::smull; + } + break; + case NEON_UMULL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::umull2; + } else { + Op = &Simulator::umull; + } + break; + case NEON_SMLAL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::smlal2; + } else { + Op = &Simulator::smlal; + } + break; + case NEON_UMLAL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::umlal2; + } else { + Op = &Simulator::umlal; + } + break; + case NEON_SMLSL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::smlsl2; + } else { + Op = &Simulator::smlsl; + } + break; + case NEON_UMLSL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::umlsl2; + } else { + Op = &Simulator::umlsl; + } + break; + case NEON_SQDMULL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::sqdmull2; + } else { + Op = &Simulator::sqdmull; + } + break; + case NEON_SQDMLAL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::sqdmlal2; + } else { + Op = &Simulator::sqdmlal; + } + break; + case NEON_SQDMLSL_byelement: + if (instr->Mask(NEON_Q)) { + Op = &Simulator::sqdmlsl2; + } else { + Op = &Simulator::sqdmlsl; + } + break; + default: + index = instr->NEONH(); + if ((instr->FPType() & 1) == 0) { + index = (index << 1) | instr->NEONL(); + } + + vf = nfd.GetVectorFormat(nfd.FPFormatMap()); + + switch (instr->Mask(NEONByIndexedElementFPMask)) { + case NEON_FMUL_byelement: Op = &Simulator::fmul; break; + case NEON_FMLA_byelement: Op = &Simulator::fmla; break; + case NEON_FMLS_byelement: Op = &Simulator::fmls; break; + case NEON_FMULX_byelement: Op = &Simulator::fmulx; break; + default: VIXL_UNIMPLEMENTED(); + } + } + + (this->*Op)(vf, rd, rn, vreg(rm_reg), index); +} + + +void Simulator::VisitNEONCopy(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + int imm5 = instr->ImmNEON5(); + int tz = CountTrailingZeros(imm5, 32); + int reg_index = imm5 >> (tz + 1); + + if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) { + int imm4 = instr->ImmNEON4(); + int rn_index = imm4 >> tz; + ins_element(vf, rd, reg_index, rn, rn_index); + } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) { + ins_immediate(vf, rd, reg_index, xreg(instr->Rn())); + } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) { + uint64_t value = LogicVRegister(rn).Uint(vf, reg_index); + value &= MaxUintFromFormat(vf); + set_xreg(instr->Rd(), value); + } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) { + int64_t value = LogicVRegister(rn).Int(vf, reg_index); + if (instr->NEONQ()) { + set_xreg(instr->Rd(), value); + } else { + set_wreg(instr->Rd(), (int32_t)value); + } + } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) { + dup_element(vf, rd, rn, reg_index); + } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) { + dup_immediate(vf, rd, xreg(instr->Rn())); + } else { + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitNEONExtract(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + SimVRegister& rm = vreg(instr->Rm()); + if (instr->Mask(NEONExtractMask) == NEON_EXT) { + int index = instr->ImmNEONExt(); + ext(vf, rd, rn, rm, index); + } else { + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, + AddrMode addr_mode) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + + uint64_t addr_base = xreg(instr->Rn(), Reg31IsStackPointer); + int reg_size = RegisterSizeInBytesFromFormat(vf); + + int reg[4]; + uint64_t addr[4]; + for (int i = 0; i < 4; i++) { + reg[i] = (instr->Rt() + i) % kNumberOfVRegisters; + addr[i] = addr_base + (i * reg_size); + } + int count = 1; + bool log_read = true; + + Instr itype = instr->Mask(NEONLoadStoreMultiStructMask); + if (((itype == NEON_LD1_1v) || (itype == NEON_LD1_2v) || + (itype == NEON_LD1_3v) || (itype == NEON_LD1_4v) || + (itype == NEON_ST1_1v) || (itype == NEON_ST1_2v) || + (itype == NEON_ST1_3v) || (itype == NEON_ST1_4v)) && + (instr->Bits(20, 16) != 0)) { + VIXL_UNREACHABLE(); + } + + // We use the PostIndex mask here, as it works in this case for both Offset + // and PostIndex addressing. + switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) { + case NEON_LD1_4v: + case NEON_LD1_4v_post: ld1(vf, vreg(reg[3]), addr[3]); count++; + VIXL_FALLTHROUGH(); + case NEON_LD1_3v: + case NEON_LD1_3v_post: ld1(vf, vreg(reg[2]), addr[2]); count++; + VIXL_FALLTHROUGH(); + case NEON_LD1_2v: + case NEON_LD1_2v_post: ld1(vf, vreg(reg[1]), addr[1]); count++; + VIXL_FALLTHROUGH(); + case NEON_LD1_1v: + case NEON_LD1_1v_post: + ld1(vf, vreg(reg[0]), addr[0]); + log_read = true; + break; + case NEON_ST1_4v: + case NEON_ST1_4v_post: st1(vf, vreg(reg[3]), addr[3]); count++; + VIXL_FALLTHROUGH(); + case NEON_ST1_3v: + case NEON_ST1_3v_post: st1(vf, vreg(reg[2]), addr[2]); count++; + VIXL_FALLTHROUGH(); + case NEON_ST1_2v: + case NEON_ST1_2v_post: st1(vf, vreg(reg[1]), addr[1]); count++; + VIXL_FALLTHROUGH(); + case NEON_ST1_1v: + case NEON_ST1_1v_post: + st1(vf, vreg(reg[0]), addr[0]); + log_read = false; + break; + case NEON_LD2_post: + case NEON_LD2: + ld2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]); + count = 2; + break; + case NEON_ST2: + case NEON_ST2_post: + st2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]); + count = 2; + break; + case NEON_LD3_post: + case NEON_LD3: + ld3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]); + count = 3; + break; + case NEON_ST3: + case NEON_ST3_post: + st3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]); + count = 3; + break; + case NEON_ST4: + case NEON_ST4_post: + st4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]), + addr[0]); + count = 4; + break; + case NEON_LD4_post: + case NEON_LD4: + ld4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]), + addr[0]); + count = 4; + break; + default: VIXL_UNIMPLEMENTED(); + } + + // Explicitly log the register update whilst we have type information. + for (int i = 0; i < count; i++) { + // For de-interleaving loads, only print the base address. + int lane_size = LaneSizeInBytesFromFormat(vf); + PrintRegisterFormat format = GetPrintRegisterFormatTryFP( + GetPrintRegisterFormatForSize(reg_size, lane_size)); + if (log_read) { + LogVRead(addr_base, reg[i], format); + } else { + LogVWrite(addr_base, reg[i], format); + } + } + + if (addr_mode == PostIndex) { + int rm = instr->Rm(); + // The immediate post index addressing mode is indicated by rm = 31. + // The immediate is implied by the number of vector registers used. + addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count + : xreg(rm); + set_xreg(instr->Rn(), addr_base); + } else { + VIXL_ASSERT(addr_mode == Offset); + } +} + + +void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) { + NEONLoadStoreMultiStructHelper(instr, Offset); +} + + +void Simulator::VisitNEONLoadStoreMultiStructPostIndex( + const Instruction* instr) { + NEONLoadStoreMultiStructHelper(instr, PostIndex); +} + + +void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, + AddrMode addr_mode) { + uint64_t addr = xreg(instr->Rn(), Reg31IsStackPointer); + int rt = instr->Rt(); + + Instr itype = instr->Mask(NEONLoadStoreSingleStructMask); + if (((itype == NEON_LD1_b) || (itype == NEON_LD1_h) || + (itype == NEON_LD1_s) || (itype == NEON_LD1_d)) && + (instr->Bits(20, 16) != 0)) { + VIXL_UNREACHABLE(); + } + + // We use the PostIndex mask here, as it works in this case for both Offset + // and PostIndex addressing. + bool do_load = false; + + bool replicating = false; + + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + VectorFormat vf_t = nfd.GetVectorFormat(); + + VectorFormat vf = kFormat16B; + switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) { + case NEON_LD1_b: + case NEON_LD1_b_post: + case NEON_LD2_b: + case NEON_LD2_b_post: + case NEON_LD3_b: + case NEON_LD3_b_post: + case NEON_LD4_b: + case NEON_LD4_b_post: do_load = true; + VIXL_FALLTHROUGH(); + case NEON_ST1_b: + case NEON_ST1_b_post: + case NEON_ST2_b: + case NEON_ST2_b_post: + case NEON_ST3_b: + case NEON_ST3_b_post: + case NEON_ST4_b: + case NEON_ST4_b_post: break; + + case NEON_LD1_h: + case NEON_LD1_h_post: + case NEON_LD2_h: + case NEON_LD2_h_post: + case NEON_LD3_h: + case NEON_LD3_h_post: + case NEON_LD4_h: + case NEON_LD4_h_post: do_load = true; + VIXL_FALLTHROUGH(); + case NEON_ST1_h: + case NEON_ST1_h_post: + case NEON_ST2_h: + case NEON_ST2_h_post: + case NEON_ST3_h: + case NEON_ST3_h_post: + case NEON_ST4_h: + case NEON_ST4_h_post: vf = kFormat8H; break; + case NEON_LD1_s: + case NEON_LD1_s_post: + case NEON_LD2_s: + case NEON_LD2_s_post: + case NEON_LD3_s: + case NEON_LD3_s_post: + case NEON_LD4_s: + case NEON_LD4_s_post: do_load = true; + VIXL_FALLTHROUGH(); + case NEON_ST1_s: + case NEON_ST1_s_post: + case NEON_ST2_s: + case NEON_ST2_s_post: + case NEON_ST3_s: + case NEON_ST3_s_post: + case NEON_ST4_s: + case NEON_ST4_s_post: { + VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d); + VIXL_STATIC_ASSERT( + (NEON_LD1_s_post | (1 << NEONLSSize_offset)) == NEON_LD1_d_post); + VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d); + VIXL_STATIC_ASSERT( + (NEON_ST1_s_post | (1 << NEONLSSize_offset)) == NEON_ST1_d_post); + vf = ((instr->NEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D; + break; + } + + case NEON_LD1R: + case NEON_LD1R_post: + case NEON_LD2R: + case NEON_LD2R_post: + case NEON_LD3R: + case NEON_LD3R_post: + case NEON_LD4R: + case NEON_LD4R_post: { + vf = vf_t; + do_load = true; + replicating = true; + break; + } + default: VIXL_UNIMPLEMENTED(); + } + + PrintRegisterFormat print_format = + GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf)); + // Make sure that the print_format only includes a single lane. + print_format = + static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask); + + int esize = LaneSizeInBytesFromFormat(vf); + int index_shift = LaneSizeInBytesLog2FromFormat(vf); + int lane = instr->NEONLSIndex(index_shift); + int scale = 0; + int rt2 = (rt + 1) % kNumberOfVRegisters; + int rt3 = (rt2 + 1) % kNumberOfVRegisters; + int rt4 = (rt3 + 1) % kNumberOfVRegisters; + switch (instr->Mask(NEONLoadStoreSingleLenMask)) { + case NEONLoadStoreSingle1: + scale = 1; + if (do_load) { + if (replicating) { + ld1r(vf, vreg(rt), addr); + } else { + ld1(vf, vreg(rt), lane, addr); + } + LogVRead(addr, rt, print_format, lane); + } else { + st1(vf, vreg(rt), lane, addr); + LogVWrite(addr, rt, print_format, lane); + } + break; + case NEONLoadStoreSingle2: + scale = 2; + if (do_load) { + if (replicating) { + ld2r(vf, vreg(rt), vreg(rt2), addr); + } else { + ld2(vf, vreg(rt), vreg(rt2), lane, addr); + } + LogVRead(addr, rt, print_format, lane); + LogVRead(addr + esize, rt2, print_format, lane); + } else { + st2(vf, vreg(rt), vreg(rt2), lane, addr); + LogVWrite(addr, rt, print_format, lane); + LogVWrite(addr + esize, rt2, print_format, lane); + } + break; + case NEONLoadStoreSingle3: + scale = 3; + if (do_load) { + if (replicating) { + ld3r(vf, vreg(rt), vreg(rt2), vreg(rt3), addr); + } else { + ld3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr); + } + LogVRead(addr, rt, print_format, lane); + LogVRead(addr + esize, rt2, print_format, lane); + LogVRead(addr + (2 * esize), rt3, print_format, lane); + } else { + st3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr); + LogVWrite(addr, rt, print_format, lane); + LogVWrite(addr + esize, rt2, print_format, lane); + LogVWrite(addr + (2 * esize), rt3, print_format, lane); + } + break; + case NEONLoadStoreSingle4: + scale = 4; + if (do_load) { + if (replicating) { + ld4r(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), addr); + } else { + ld4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr); + } + LogVRead(addr, rt, print_format, lane); + LogVRead(addr + esize, rt2, print_format, lane); + LogVRead(addr + (2 * esize), rt3, print_format, lane); + LogVRead(addr + (3 * esize), rt4, print_format, lane); + } else { + st4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr); + LogVWrite(addr, rt, print_format, lane); + LogVWrite(addr + esize, rt2, print_format, lane); + LogVWrite(addr + (2 * esize), rt3, print_format, lane); + LogVWrite(addr + (3 * esize), rt4, print_format, lane); + } + break; + default: VIXL_UNIMPLEMENTED(); + } + + if (addr_mode == PostIndex) { + int rm = instr->Rm(); + int lane_size = LaneSizeInBytesFromFormat(vf); + set_xreg(instr->Rn(), addr + ((rm == 31) ? (scale * lane_size) : xreg(rm))); + } +} + + +void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) { + NEONLoadStoreSingleStructHelper(instr, Offset); +} + + +void Simulator::VisitNEONLoadStoreSingleStructPostIndex( + const Instruction* instr) { + NEONLoadStoreSingleStructHelper(instr, PostIndex); +} + + +void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) { + SimVRegister& rd = vreg(instr->Rd()); + int cmode = instr->NEONCmode(); + int cmode_3_1 = (cmode >> 1) & 7; + int cmode_3 = (cmode >> 3) & 1; + int cmode_2 = (cmode >> 2) & 1; + int cmode_1 = (cmode >> 1) & 1; + int cmode_0 = cmode & 1; + int q = instr->NEONQ(); + int op_bit = instr->NEONModImmOp(); + uint64_t imm8 = instr->ImmNEONabcdefgh(); + + // Find the format and immediate value + uint64_t imm = 0; + VectorFormat vform = kFormatUndefined; + switch (cmode_3_1) { + case 0x0: + case 0x1: + case 0x2: + case 0x3: + vform = (q == 1) ? kFormat4S : kFormat2S; + imm = imm8 << (8 * cmode_3_1); + break; + case 0x4: + case 0x5: + vform = (q == 1) ? kFormat8H : kFormat4H; + imm = imm8 << (8 * cmode_1); + break; + case 0x6: + vform = (q == 1) ? kFormat4S : kFormat2S; + if (cmode_0 == 0) { + imm = imm8 << 8 | 0x000000ff; + } else { + imm = imm8 << 16 | 0x0000ffff; + } + break; + case 0x7: + if (cmode_0 == 0 && op_bit == 0) { + vform = q ? kFormat16B : kFormat8B; + imm = imm8; + } else if (cmode_0 == 0 && op_bit == 1) { + vform = q ? kFormat2D : kFormat1D; + imm = 0; + for (int i = 0; i < 8; ++i) { + if (imm8 & (1ULL << i)) { + imm |= (UINT64_C(0xff) << (8 * i)); + } + } + } else { // cmode_0 == 1, cmode == 0xf. + if (op_bit == 0) { + vform = q ? kFormat4S : kFormat2S; + imm = FloatToRawbits(instr->ImmNEONFP32()); + } else if (q == 1) { + vform = kFormat2D; + imm = DoubleToRawbits(instr->ImmNEONFP64()); + } else { + VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf)); + VisitUnallocated(instr); + } + } + break; + default: VIXL_UNREACHABLE(); break; + } + + // Find the operation + NEONModifiedImmediateOp op; + if (cmode_3 == 0) { + if (cmode_0 == 0) { + op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI; + } else { // cmode<0> == '1' + op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR; + } + } else { // cmode<3> == '1' + if (cmode_2 == 0) { + if (cmode_0 == 0) { + op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI; + } else { // cmode<0> == '1' + op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR; + } + } else { // cmode<2> == '1' + if (cmode_1 == 0) { + op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI; + } else { // cmode<1> == '1' + if (cmode_0 == 0) { + op = NEONModifiedImmediate_MOVI; + } else { // cmode<0> == '1' + op = NEONModifiedImmediate_MOVI; + } + } + } + } + + // Call the logic function + if (op == NEONModifiedImmediate_ORR) { + orr(vform, rd, rd, imm); + } else if (op == NEONModifiedImmediate_BIC) { + bic(vform, rd, rd, imm); + } else if (op == NEONModifiedImmediate_MOVI) { + movi(vform, rd, imm); + } else if (op == NEONModifiedImmediate_MVNI) { + mvni(vform, rd, imm); + } else { + VisitUnimplemented(instr); + } +} + + +void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + + if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) { + // These instructions all use a two bit size field, except NOT and RBIT, + // which use the field to encode the operation. + switch (instr->Mask(NEONScalar2RegMiscMask)) { + case NEON_CMEQ_zero_scalar: cmp(vf, rd, rn, 0, eq); break; + case NEON_CMGE_zero_scalar: cmp(vf, rd, rn, 0, ge); break; + case NEON_CMGT_zero_scalar: cmp(vf, rd, rn, 0, gt); break; + case NEON_CMLT_zero_scalar: cmp(vf, rd, rn, 0, lt); break; + case NEON_CMLE_zero_scalar: cmp(vf, rd, rn, 0, le); break; + case NEON_ABS_scalar: abs(vf, rd, rn); break; + case NEON_SQABS_scalar: abs(vf, rd, rn).SignedSaturate(vf); break; + case NEON_NEG_scalar: neg(vf, rd, rn); break; + case NEON_SQNEG_scalar: neg(vf, rd, rn).SignedSaturate(vf); break; + case NEON_SUQADD_scalar: suqadd(vf, rd, rn); break; + case NEON_USQADD_scalar: usqadd(vf, rd, rn); break; + default: VIXL_UNIMPLEMENTED(); break; + } + } else { + VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap()); + FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); + + // These instructions all use a one bit size field, except SQXTUN, SQXTN + // and UQXTN, which use a two bit size field. + switch (instr->Mask(NEONScalar2RegMiscFPMask)) { + case NEON_FRECPE_scalar: frecpe(fpf, rd, rn, fpcr_rounding); break; + case NEON_FRECPX_scalar: frecpx(fpf, rd, rn); break; + case NEON_FRSQRTE_scalar: frsqrte(fpf, rd, rn); break; + case NEON_FCMGT_zero_scalar: fcmp_zero(fpf, rd, rn, gt); break; + case NEON_FCMGE_zero_scalar: fcmp_zero(fpf, rd, rn, ge); break; + case NEON_FCMEQ_zero_scalar: fcmp_zero(fpf, rd, rn, eq); break; + case NEON_FCMLE_zero_scalar: fcmp_zero(fpf, rd, rn, le); break; + case NEON_FCMLT_zero_scalar: fcmp_zero(fpf, rd, rn, lt); break; + case NEON_SCVTF_scalar: scvtf(fpf, rd, rn, 0, fpcr_rounding); break; + case NEON_UCVTF_scalar: ucvtf(fpf, rd, rn, 0, fpcr_rounding); break; + case NEON_FCVTNS_scalar: fcvts(fpf, rd, rn, FPTieEven); break; + case NEON_FCVTNU_scalar: fcvtu(fpf, rd, rn, FPTieEven); break; + case NEON_FCVTPS_scalar: fcvts(fpf, rd, rn, FPPositiveInfinity); break; + case NEON_FCVTPU_scalar: fcvtu(fpf, rd, rn, FPPositiveInfinity); break; + case NEON_FCVTMS_scalar: fcvts(fpf, rd, rn, FPNegativeInfinity); break; + case NEON_FCVTMU_scalar: fcvtu(fpf, rd, rn, FPNegativeInfinity); break; + case NEON_FCVTZS_scalar: fcvts(fpf, rd, rn, FPZero); break; + case NEON_FCVTZU_scalar: fcvtu(fpf, rd, rn, FPZero); break; + case NEON_FCVTAS_scalar: fcvts(fpf, rd, rn, FPTieAway); break; + case NEON_FCVTAU_scalar: fcvtu(fpf, rd, rn, FPTieAway); break; + case NEON_FCVTXN_scalar: + // Unlike all of the other FP instructions above, fcvtxn encodes dest + // size S as size<0>=1. There's only one case, so we ignore the form. + VIXL_ASSERT(instr->Bit(22) == 1); + fcvtxn(kFormatS, rd, rn); + break; + default: + switch (instr->Mask(NEONScalar2RegMiscMask)) { + case NEON_SQXTN_scalar: sqxtn(vf, rd, rn); break; + case NEON_UQXTN_scalar: uqxtn(vf, rd, rn); break; + case NEON_SQXTUN_scalar: sqxtun(vf, rd, rn); break; + default: + VIXL_UNIMPLEMENTED(); + } + } + } +} + + +void Simulator::VisitNEONScalar3Diff(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + SimVRegister& rm = vreg(instr->Rm()); + switch (instr->Mask(NEONScalar3DiffMask)) { + case NEON_SQDMLAL_scalar: sqdmlal(vf, rd, rn, rm); break; + case NEON_SQDMLSL_scalar: sqdmlsl(vf, rd, rn, rm); break; + case NEON_SQDMULL_scalar: sqdmull(vf, rd, rn, rm); break; + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitNEONScalar3Same(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + SimVRegister& rm = vreg(instr->Rm()); + + if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) { + vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap()); + switch (instr->Mask(NEONScalar3SameFPMask)) { + case NEON_FMULX_scalar: fmulx(vf, rd, rn, rm); break; + case NEON_FACGE_scalar: fabscmp(vf, rd, rn, rm, ge); break; + case NEON_FACGT_scalar: fabscmp(vf, rd, rn, rm, gt); break; + case NEON_FCMEQ_scalar: fcmp(vf, rd, rn, rm, eq); break; + case NEON_FCMGE_scalar: fcmp(vf, rd, rn, rm, ge); break; + case NEON_FCMGT_scalar: fcmp(vf, rd, rn, rm, gt); break; + case NEON_FRECPS_scalar: frecps(vf, rd, rn, rm); break; + case NEON_FRSQRTS_scalar: frsqrts(vf, rd, rn, rm); break; + case NEON_FABD_scalar: fabd(vf, rd, rn, rm); break; + default: + VIXL_UNIMPLEMENTED(); + } + } else { + switch (instr->Mask(NEONScalar3SameMask)) { + case NEON_ADD_scalar: add(vf, rd, rn, rm); break; + case NEON_SUB_scalar: sub(vf, rd, rn, rm); break; + case NEON_CMEQ_scalar: cmp(vf, rd, rn, rm, eq); break; + case NEON_CMGE_scalar: cmp(vf, rd, rn, rm, ge); break; + case NEON_CMGT_scalar: cmp(vf, rd, rn, rm, gt); break; + case NEON_CMHI_scalar: cmp(vf, rd, rn, rm, hi); break; + case NEON_CMHS_scalar: cmp(vf, rd, rn, rm, hs); break; + case NEON_CMTST_scalar: cmptst(vf, rd, rn, rm); break; + case NEON_USHL_scalar: ushl(vf, rd, rn, rm); break; + case NEON_SSHL_scalar: sshl(vf, rd, rn, rm); break; + case NEON_SQDMULH_scalar: sqdmulh(vf, rd, rn, rm); break; + case NEON_SQRDMULH_scalar: sqrdmulh(vf, rd, rn, rm); break; + case NEON_UQADD_scalar: + add(vf, rd, rn, rm).UnsignedSaturate(vf); + break; + case NEON_SQADD_scalar: + add(vf, rd, rn, rm).SignedSaturate(vf); + break; + case NEON_UQSUB_scalar: + sub(vf, rd, rn, rm).UnsignedSaturate(vf); + break; + case NEON_SQSUB_scalar: + sub(vf, rd, rn, rm).SignedSaturate(vf); + break; + case NEON_UQSHL_scalar: + ushl(vf, rd, rn, rm).UnsignedSaturate(vf); + break; + case NEON_SQSHL_scalar: + sshl(vf, rd, rn, rm).SignedSaturate(vf); + break; + case NEON_URSHL_scalar: + ushl(vf, rd, rn, rm).Round(vf); + break; + case NEON_SRSHL_scalar: + sshl(vf, rd, rn, rm).Round(vf); + break; + case NEON_UQRSHL_scalar: + ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf); + break; + case NEON_SQRSHL_scalar: + sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf); + break; + default: + VIXL_UNIMPLEMENTED(); + } + } +} + + +void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap()); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + ByElementOp Op = NULL; + + int rm_reg = instr->Rm(); + int index = (instr->NEONH() << 1) | instr->NEONL(); + if (instr->NEONSize() == 1) { + rm_reg &= 0xf; + index = (index << 1) | instr->NEONM(); + } + + switch (instr->Mask(NEONScalarByIndexedElementMask)) { + case NEON_SQDMULL_byelement_scalar: Op = &Simulator::sqdmull; break; + case NEON_SQDMLAL_byelement_scalar: Op = &Simulator::sqdmlal; break; + case NEON_SQDMLSL_byelement_scalar: Op = &Simulator::sqdmlsl; break; + case NEON_SQDMULH_byelement_scalar: + Op = &Simulator::sqdmulh; + vf = vf_r; + break; + case NEON_SQRDMULH_byelement_scalar: + Op = &Simulator::sqrdmulh; + vf = vf_r; + break; + default: + vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap()); + index = instr->NEONH(); + if ((instr->FPType() & 1) == 0) { + index = (index << 1) | instr->NEONL(); + } + switch (instr->Mask(NEONScalarByIndexedElementFPMask)) { + case NEON_FMUL_byelement_scalar: Op = &Simulator::fmul; break; + case NEON_FMLA_byelement_scalar: Op = &Simulator::fmla; break; + case NEON_FMLS_byelement_scalar: Op = &Simulator::fmls; break; + case NEON_FMULX_byelement_scalar: Op = &Simulator::fmulx; break; + default: VIXL_UNIMPLEMENTED(); + } + } + + (this->*Op)(vf, rd, rn, vreg(rm_reg), index); +} + + +void Simulator::VisitNEONScalarCopy(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + + if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) { + int imm5 = instr->ImmNEON5(); + int tz = CountTrailingZeros(imm5, 32); + int rn_index = imm5 >> (tz + 1); + dup_element(vf, rd, rn, rn_index); + } else { + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitNEONScalarPairwise(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + switch (instr->Mask(NEONScalarPairwiseMask)) { + case NEON_ADDP_scalar: addp(vf, rd, rn); break; + case NEON_FADDP_scalar: faddp(vf, rd, rn); break; + case NEON_FMAXP_scalar: fmaxp(vf, rd, rn); break; + case NEON_FMAXNMP_scalar: fmaxnmp(vf, rd, rn); break; + case NEON_FMINP_scalar: fminp(vf, rd, rn); break; + case NEON_FMINNMP_scalar: fminnmp(vf, rd, rn); break; + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) { + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); + + static const NEONFormatMap map = { + {22, 21, 20, 19}, + {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S, + NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D} + }; + NEONFormatDecoder nfd(instr, &map); + VectorFormat vf = nfd.GetVectorFormat(); + + int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh()); + int immhimmb = instr->ImmNEONImmhImmb(); + int right_shift = (16 << highestSetBit) - immhimmb; + int left_shift = immhimmb - (8 << highestSetBit); + switch (instr->Mask(NEONScalarShiftImmediateMask)) { + case NEON_SHL_scalar: shl(vf, rd, rn, left_shift); break; + case NEON_SLI_scalar: sli(vf, rd, rn, left_shift); break; + case NEON_SQSHL_imm_scalar: sqshl(vf, rd, rn, left_shift); break; + case NEON_UQSHL_imm_scalar: uqshl(vf, rd, rn, left_shift); break; + case NEON_SQSHLU_scalar: sqshlu(vf, rd, rn, left_shift); break; + case NEON_SRI_scalar: sri(vf, rd, rn, right_shift); break; + case NEON_SSHR_scalar: sshr(vf, rd, rn, right_shift); break; + case NEON_USHR_scalar: ushr(vf, rd, rn, right_shift); break; + case NEON_SRSHR_scalar: sshr(vf, rd, rn, right_shift).Round(vf); break; + case NEON_URSHR_scalar: ushr(vf, rd, rn, right_shift).Round(vf); break; + case NEON_SSRA_scalar: ssra(vf, rd, rn, right_shift); break; + case NEON_USRA_scalar: usra(vf, rd, rn, right_shift); break; + case NEON_SRSRA_scalar: srsra(vf, rd, rn, right_shift); break; + case NEON_URSRA_scalar: ursra(vf, rd, rn, right_shift); break; + case NEON_UQSHRN_scalar: uqshrn(vf, rd, rn, right_shift); break; + case NEON_UQRSHRN_scalar: uqrshrn(vf, rd, rn, right_shift); break; + case NEON_SQSHRN_scalar: sqshrn(vf, rd, rn, right_shift); break; + case NEON_SQRSHRN_scalar: sqrshrn(vf, rd, rn, right_shift); break; + case NEON_SQSHRUN_scalar: sqshrun(vf, rd, rn, right_shift); break; + case NEON_SQRSHRUN_scalar: sqrshrun(vf, rd, rn, right_shift); break; + case NEON_FCVTZS_imm_scalar: fcvts(vf, rd, rn, FPZero, right_shift); break; + case NEON_FCVTZU_imm_scalar: fcvtu(vf, rd, rn, FPZero, right_shift); break; + case NEON_SCVTF_imm_scalar: + scvtf(vf, rd, rn, right_shift, fpcr_rounding); + break; + case NEON_UCVTF_imm_scalar: + ucvtf(vf, rd, rn, right_shift, fpcr_rounding); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitNEONShiftImmediate(const Instruction* instr) { + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode()); + + // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H, + // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined. + static const NEONFormatMap map = { + {22, 21, 20, 19, 30}, + {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_4H, NF_8H, + NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D} + }; + NEONFormatDecoder nfd(instr, &map); + VectorFormat vf = nfd.GetVectorFormat(); + + // 0001->8H, 001x->4S, 01xx->2D, all others undefined. + static const NEONFormatMap map_l = { + {22, 21, 20, 19}, + {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D} + }; + VectorFormat vf_l = nfd.GetVectorFormat(&map_l); + + int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh()); + int immhimmb = instr->ImmNEONImmhImmb(); + int right_shift = (16 << highestSetBit) - immhimmb; + int left_shift = immhimmb - (8 << highestSetBit); + + switch (instr->Mask(NEONShiftImmediateMask)) { + case NEON_SHL: shl(vf, rd, rn, left_shift); break; + case NEON_SLI: sli(vf, rd, rn, left_shift); break; + case NEON_SQSHLU: sqshlu(vf, rd, rn, left_shift); break; + case NEON_SRI: sri(vf, rd, rn, right_shift); break; + case NEON_SSHR: sshr(vf, rd, rn, right_shift); break; + case NEON_USHR: ushr(vf, rd, rn, right_shift); break; + case NEON_SRSHR: sshr(vf, rd, rn, right_shift).Round(vf); break; + case NEON_URSHR: ushr(vf, rd, rn, right_shift).Round(vf); break; + case NEON_SSRA: ssra(vf, rd, rn, right_shift); break; + case NEON_USRA: usra(vf, rd, rn, right_shift); break; + case NEON_SRSRA: srsra(vf, rd, rn, right_shift); break; + case NEON_URSRA: ursra(vf, rd, rn, right_shift); break; + case NEON_SQSHL_imm: sqshl(vf, rd, rn, left_shift); break; + case NEON_UQSHL_imm: uqshl(vf, rd, rn, left_shift); break; + case NEON_SCVTF_imm: scvtf(vf, rd, rn, right_shift, fpcr_rounding); break; + case NEON_UCVTF_imm: ucvtf(vf, rd, rn, right_shift, fpcr_rounding); break; + case NEON_FCVTZS_imm: fcvts(vf, rd, rn, FPZero, right_shift); break; + case NEON_FCVTZU_imm: fcvtu(vf, rd, rn, FPZero, right_shift); break; + case NEON_SSHLL: + vf = vf_l; + if (instr->Mask(NEON_Q)) { + sshll2(vf, rd, rn, left_shift); + } else { + sshll(vf, rd, rn, left_shift); + } + break; + case NEON_USHLL: + vf = vf_l; + if (instr->Mask(NEON_Q)) { + ushll2(vf, rd, rn, left_shift); + } else { + ushll(vf, rd, rn, left_shift); + } + break; + case NEON_SHRN: + if (instr->Mask(NEON_Q)) { + shrn2(vf, rd, rn, right_shift); + } else { + shrn(vf, rd, rn, right_shift); + } + break; + case NEON_RSHRN: + if (instr->Mask(NEON_Q)) { + rshrn2(vf, rd, rn, right_shift); + } else { + rshrn(vf, rd, rn, right_shift); + } + break; + case NEON_UQSHRN: + if (instr->Mask(NEON_Q)) { + uqshrn2(vf, rd, rn, right_shift); + } else { + uqshrn(vf, rd, rn, right_shift); + } + break; + case NEON_UQRSHRN: + if (instr->Mask(NEON_Q)) { + uqrshrn2(vf, rd, rn, right_shift); + } else { + uqrshrn(vf, rd, rn, right_shift); + } + break; + case NEON_SQSHRN: + if (instr->Mask(NEON_Q)) { + sqshrn2(vf, rd, rn, right_shift); + } else { + sqshrn(vf, rd, rn, right_shift); + } + break; + case NEON_SQRSHRN: + if (instr->Mask(NEON_Q)) { + sqrshrn2(vf, rd, rn, right_shift); + } else { + sqrshrn(vf, rd, rn, right_shift); + } + break; + case NEON_SQSHRUN: + if (instr->Mask(NEON_Q)) { + sqshrun2(vf, rd, rn, right_shift); + } else { + sqshrun(vf, rd, rn, right_shift); + } + break; + case NEON_SQRSHRUN: + if (instr->Mask(NEON_Q)) { + sqrshrun2(vf, rd, rn, right_shift); + } else { + sqrshrun(vf, rd, rn, right_shift); + } + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitNEONTable(const Instruction* instr) { + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + VectorFormat vf = nfd.GetVectorFormat(); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + SimVRegister& rn2 = vreg((instr->Rn() + 1) % kNumberOfVRegisters); + SimVRegister& rn3 = vreg((instr->Rn() + 2) % kNumberOfVRegisters); + SimVRegister& rn4 = vreg((instr->Rn() + 3) % kNumberOfVRegisters); + SimVRegister& rm = vreg(instr->Rm()); + + switch (instr->Mask(NEONTableMask)) { + case NEON_TBL_1v: tbl(vf, rd, rn, rm); break; + case NEON_TBL_2v: tbl(vf, rd, rn, rn2, rm); break; + case NEON_TBL_3v: tbl(vf, rd, rn, rn2, rn3, rm); break; + case NEON_TBL_4v: tbl(vf, rd, rn, rn2, rn3, rn4, rm); break; + case NEON_TBX_1v: tbx(vf, rd, rn, rm); break; + case NEON_TBX_2v: tbx(vf, rd, rn, rn2, rm); break; + case NEON_TBX_3v: tbx(vf, rd, rn, rn2, rn3, rm); break; + case NEON_TBX_4v: tbx(vf, rd, rn, rn2, rn3, rn4, rm); break; + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::VisitNEONPerm(const Instruction* instr) { + NEONFormatDecoder nfd(instr); + VectorFormat vf = nfd.GetVectorFormat(); + + SimVRegister& rd = vreg(instr->Rd()); + SimVRegister& rn = vreg(instr->Rn()); + SimVRegister& rm = vreg(instr->Rm()); + + switch (instr->Mask(NEONPermMask)) { + case NEON_TRN1: trn1(vf, rd, rn, rm); break; + case NEON_TRN2: trn2(vf, rd, rn, rm); break; + case NEON_UZP1: uzp1(vf, rd, rn, rm); break; + case NEON_UZP2: uzp2(vf, rd, rn, rm); break; + case NEON_ZIP1: zip1(vf, rd, rn, rm); break; + case NEON_ZIP2: zip2(vf, rd, rn, rm); break; + default: + VIXL_UNIMPLEMENTED(); + } +} + + +void Simulator::DoUnreachable(const Instruction* instr) { + VIXL_ASSERT(instr->InstructionBits() == UNDEFINED_INST_PATTERN); + + fprintf(stream_, "Hit UNREACHABLE marker at pc=%p.\n", + reinterpret_cast<const void*>(instr)); + abort(); +} + + +void Simulator::DoTrace(const Instruction* instr) { + VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) && + (instr->ImmException() == kTraceOpcode)); + + // Read the arguments encoded inline in the instruction stream. + uint32_t parameters; + uint32_t command; + + VIXL_STATIC_ASSERT(sizeof(*instr) == 1); + memcpy(¶meters, instr + kTraceParamsOffset, sizeof(parameters)); + memcpy(&command, instr + kTraceCommandOffset, sizeof(command)); + + switch (command) { + case TRACE_ENABLE: + set_trace_parameters(trace_parameters() | parameters); + break; + case TRACE_DISABLE: + set_trace_parameters(trace_parameters() & ~parameters); + break; + default: + VIXL_UNREACHABLE(); + } + + set_pc(instr->InstructionAtOffset(kTraceLength)); +} + + +void Simulator::DoLog(const Instruction* instr) { + VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) && + (instr->ImmException() == kLogOpcode)); + + // Read the arguments encoded inline in the instruction stream. + uint32_t parameters; + + VIXL_STATIC_ASSERT(sizeof(*instr) == 1); + memcpy(¶meters, instr + kTraceParamsOffset, sizeof(parameters)); + + // We don't support a one-shot LOG_DISASM. + VIXL_ASSERT((parameters & LOG_DISASM) == 0); + // Print the requested information. + if (parameters & LOG_SYSREGS) PrintSystemRegisters(); + if (parameters & LOG_REGS) PrintRegisters(); + if (parameters & LOG_VREGS) PrintVRegisters(); + + set_pc(instr->InstructionAtOffset(kLogLength)); +} + + +void Simulator::DoPrintf(const Instruction* instr) { + VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) && + (instr->ImmException() == kPrintfOpcode)); + + // Read the arguments encoded inline in the instruction stream. + uint32_t arg_count; + uint32_t arg_pattern_list; + VIXL_STATIC_ASSERT(sizeof(*instr) == 1); + memcpy(&arg_count, + instr + kPrintfArgCountOffset, + sizeof(arg_count)); + memcpy(&arg_pattern_list, + instr + kPrintfArgPatternListOffset, + sizeof(arg_pattern_list)); + + VIXL_ASSERT(arg_count <= kPrintfMaxArgCount); + VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0); + + // We need to call the host printf function with a set of arguments defined by + // arg_pattern_list. Because we don't know the types and sizes of the + // arguments, this is very difficult to do in a robust and portable way. To + // work around the problem, we pick apart the format string, and print one + // format placeholder at a time. + + // Allocate space for the format string. We take a copy, so we can modify it. + // Leave enough space for one extra character per expected argument (plus the + // '\0' termination). + const char * format_base = reg<const char *>(0); + VIXL_ASSERT(format_base != NULL); + size_t length = strlen(format_base) + 1; + char * const format = (char *)js_calloc(length + arg_count); + + // A list of chunks, each with exactly one format placeholder. + const char * chunks[kPrintfMaxArgCount]; + + // Copy the format string and search for format placeholders. + uint32_t placeholder_count = 0; + char * format_scratch = format; + for (size_t i = 0; i < length; i++) { + if (format_base[i] != '%') { + *format_scratch++ = format_base[i]; + } else { + if (format_base[i + 1] == '%') { + // Ignore explicit "%%" sequences. + *format_scratch++ = format_base[i]; + i++; + // Chunks after the first are passed as format strings to printf, so we + // need to escape '%' characters in those chunks. + if (placeholder_count > 0) *format_scratch++ = format_base[i]; + } else { + VIXL_CHECK(placeholder_count < arg_count); + // Insert '\0' before placeholders, and store their locations. + *format_scratch++ = '\0'; + chunks[placeholder_count++] = format_scratch; + *format_scratch++ = format_base[i]; + } + } + } + VIXL_CHECK(placeholder_count == arg_count); + + // Finally, call printf with each chunk, passing the appropriate register + // argument. Normally, printf returns the number of bytes transmitted, so we + // can emulate a single printf call by adding the result from each chunk. If + // any call returns a negative (error) value, though, just return that value. + + printf("%s", clr_printf); + + // Because '\0' is inserted before each placeholder, the first string in + // 'format' contains no format placeholders and should be printed literally. + int result = printf("%s", format); + int pcs_r = 1; // Start at x1. x0 holds the format string. + int pcs_f = 0; // Start at d0. + if (result >= 0) { + for (uint32_t i = 0; i < placeholder_count; i++) { + int part_result = -1; + + uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits); + arg_pattern &= (1 << kPrintfArgPatternBits) - 1; + switch (arg_pattern) { + case kPrintfArgW: part_result = printf(chunks[i], wreg(pcs_r++)); break; + case kPrintfArgX: part_result = printf(chunks[i], xreg(pcs_r++)); break; + case kPrintfArgD: part_result = printf(chunks[i], dreg(pcs_f++)); break; + default: VIXL_UNREACHABLE(); + } + + if (part_result < 0) { + // Handle error values. + result = part_result; + break; + } + + result += part_result; + } + } + + printf("%s", clr_normal); + + // Printf returns its result in x0 (just like the C library's printf). + set_xreg(0, result); + + // The printf parameters are inlined in the code, so skip them. + set_pc(instr->InstructionAtOffset(kPrintfLength)); + + // Set LR as if we'd just called a native printf function. + set_lr(pc()); + + js_free(format); +} + +} // namespace vixl + +#endif // JS_SIMULATOR_ARM64 diff --git a/js/src/jit/arm64/vixl/Simulator-vixl.h b/js/src/jit/arm64/vixl/Simulator-vixl.h new file mode 100644 index 0000000000..af78f5bad0 --- /dev/null +++ b/js/src/jit/arm64/vixl/Simulator-vixl.h @@ -0,0 +1,2592 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_A64_SIMULATOR_A64_H_ +#define VIXL_A64_SIMULATOR_A64_H_ + +#include "jstypes.h" + +#ifdef JS_SIMULATOR_ARM64 + +#include "mozilla/Vector.h" + +#include "jit/arm64/vixl/Assembler-vixl.h" +#include "jit/arm64/vixl/Disasm-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" +#include "jit/arm64/vixl/Instructions-vixl.h" +#include "jit/arm64/vixl/Instrument-vixl.h" +#include "jit/arm64/vixl/MozCachingDecoder.h" +#include "jit/arm64/vixl/Simulator-Constants-vixl.h" +#include "jit/arm64/vixl/Utils-vixl.h" +#include "jit/IonTypes.h" +#include "js/AllocPolicy.h" +#include "vm/MutexIDs.h" +#include "wasm/WasmSignalHandlers.h" + +namespace vixl { + +// Representation of memory, with typed getters and setters for access. +class Memory { + public: + template <typename T> + static T AddressUntag(T address) { + // Cast the address using a C-style cast. A reinterpret_cast would be + // appropriate, but it can't cast one integral type to another. + uint64_t bits = (uint64_t)address; + return (T)(bits & ~kAddressTagMask); + } + + template <typename T, typename A> + static T Read(A address) { + T value; + address = AddressUntag(address); + VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || + (sizeof(value) == 4) || (sizeof(value) == 8) || + (sizeof(value) == 16)); + memcpy(&value, reinterpret_cast<const char *>(address), sizeof(value)); + return value; + } + + template <typename T, typename A> + static void Write(A address, T value) { + address = AddressUntag(address); + VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || + (sizeof(value) == 4) || (sizeof(value) == 8) || + (sizeof(value) == 16)); + memcpy(reinterpret_cast<char *>(address), &value, sizeof(value)); + } +}; + +// Represent a register (r0-r31, v0-v31). +template<int kSizeInBytes> +class SimRegisterBase { + public: + SimRegisterBase() : written_since_last_log_(false) {} + + // Write the specified value. The value is zero-extended if necessary. + template<typename T> + void Set(T new_value) { + VIXL_STATIC_ASSERT(sizeof(new_value) <= kSizeInBytes); + if (sizeof(new_value) < kSizeInBytes) { + // All AArch64 registers are zero-extending. + memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value)); + } + memcpy(value_, &new_value, sizeof(new_value)); + NotifyRegisterWrite(); + } + + // Insert a typed value into a register, leaving the rest of the register + // unchanged. The lane parameter indicates where in the register the value + // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where + // 0 represents the least significant bits. + template<typename T> + void Insert(int lane, T new_value) { + VIXL_ASSERT(lane >= 0); + VIXL_ASSERT((sizeof(new_value) + + (lane * sizeof(new_value))) <= kSizeInBytes); + memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value)); + NotifyRegisterWrite(); + } + + // Read the value as the specified type. The value is truncated if necessary. + template<typename T> + T Get(int lane = 0) const { + T result; + VIXL_ASSERT(lane >= 0); + VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes); + memcpy(&result, &value_[lane * sizeof(result)], sizeof(result)); + return result; + } + + // TODO: Make this return a map of updated bytes, so that we can highlight + // updated lanes for load-and-insert. (That never happens for scalar code, but + // NEON has some instructions that can update individual lanes.) + bool WrittenSinceLastLog() const { + return written_since_last_log_; + } + + void NotifyRegisterLogged() { + written_since_last_log_ = false; + } + + protected: + uint8_t value_[kSizeInBytes]; + + // Helpers to aid with register tracing. + bool written_since_last_log_; + + void NotifyRegisterWrite() { + written_since_last_log_ = true; + } +}; +typedef SimRegisterBase<kXRegSizeInBytes> SimRegister; // r0-r31 +typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister; // v0-v31 + +// Representation of a vector register, with typed getters and setters for lanes +// and additional information to represent lane state. +class LogicVRegister { + public: + inline LogicVRegister(SimVRegister& other) // NOLINT + : register_(other) { + for (unsigned i = 0; i < sizeof(saturated_) / sizeof(saturated_[0]); i++) { + saturated_[i] = kNotSaturated; + } + for (unsigned i = 0; i < sizeof(round_) / sizeof(round_[0]); i++) { + round_[i] = 0; + } + } + + int64_t Int(VectorFormat vform, int index) const { + int64_t element; + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: element = register_.Get<int8_t>(index); break; + case 16: element = register_.Get<int16_t>(index); break; + case 32: element = register_.Get<int32_t>(index); break; + case 64: element = register_.Get<int64_t>(index); break; + default: VIXL_UNREACHABLE(); return 0; + } + return element; + } + + uint64_t Uint(VectorFormat vform, int index) const { + uint64_t element; + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: element = register_.Get<uint8_t>(index); break; + case 16: element = register_.Get<uint16_t>(index); break; + case 32: element = register_.Get<uint32_t>(index); break; + case 64: element = register_.Get<uint64_t>(index); break; + default: VIXL_UNREACHABLE(); return 0; + } + return element; + } + + int64_t IntLeftJustified(VectorFormat vform, int index) const { + return Int(vform, index) << (64 - LaneSizeInBitsFromFormat(vform)); + } + + uint64_t UintLeftJustified(VectorFormat vform, int index) const { + return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform)); + } + + void SetInt(VectorFormat vform, int index, int64_t value) const { + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: register_.Insert(index, static_cast<int8_t>(value)); break; + case 16: register_.Insert(index, static_cast<int16_t>(value)); break; + case 32: register_.Insert(index, static_cast<int32_t>(value)); break; + case 64: register_.Insert(index, static_cast<int64_t>(value)); break; + default: VIXL_UNREACHABLE(); return; + } + } + + void SetUint(VectorFormat vform, int index, uint64_t value) const { + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: register_.Insert(index, static_cast<uint8_t>(value)); break; + case 16: register_.Insert(index, static_cast<uint16_t>(value)); break; + case 32: register_.Insert(index, static_cast<uint32_t>(value)); break; + case 64: register_.Insert(index, static_cast<uint64_t>(value)); break; + default: VIXL_UNREACHABLE(); return; + } + } + + void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const { + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: register_.Insert(index, Memory::Read<uint8_t>(addr)); break; + case 16: register_.Insert(index, Memory::Read<uint16_t>(addr)); break; + case 32: register_.Insert(index, Memory::Read<uint32_t>(addr)); break; + case 64: register_.Insert(index, Memory::Read<uint64_t>(addr)); break; + default: VIXL_UNREACHABLE(); return; + } + } + + void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const { + uint64_t value = Uint(vform, index); + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: Memory::Write(addr, static_cast<uint8_t>(value)); break; + case 16: Memory::Write(addr, static_cast<uint16_t>(value)); break; + case 32: Memory::Write(addr, static_cast<uint32_t>(value)); break; + case 64: Memory::Write(addr, value); break; + } + } + + template <typename T> + T Float(int index) const { + return register_.Get<T>(index); + } + + template <typename T> + void SetFloat(int index, T value) const { + register_.Insert(index, value); + } + + // When setting a result in a register of size less than Q, the top bits of + // the Q register must be cleared. + void ClearForWrite(VectorFormat vform) const { + unsigned size = RegisterSizeInBytesFromFormat(vform); + for (unsigned i = size; i < kQRegSizeInBytes; i++) { + SetUint(kFormat16B, i, 0); + } + } + + // Saturation state for each lane of a vector. + enum Saturation { + kNotSaturated = 0, + kSignedSatPositive = 1 << 0, + kSignedSatNegative = 1 << 1, + kSignedSatMask = kSignedSatPositive | kSignedSatNegative, + kSignedSatUndefined = kSignedSatMask, + kUnsignedSatPositive = 1 << 2, + kUnsignedSatNegative = 1 << 3, + kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative, + kUnsignedSatUndefined = kUnsignedSatMask + }; + + // Getters for saturation state. + Saturation GetSignedSaturation(int index) { + return static_cast<Saturation>(saturated_[index] & kSignedSatMask); + } + + Saturation GetUnsignedSaturation(int index) { + return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask); + } + + // Setters for saturation state. + void ClearSat(int index) { + saturated_[index] = kNotSaturated; + } + + void SetSignedSat(int index, bool positive) { + SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative); + } + + void SetUnsignedSat(int index, bool positive) { + SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative); + } + + void SetSatFlag(int index, Saturation sat) { + saturated_[index] = static_cast<Saturation>(saturated_[index] | sat); + VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined); + VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined); + } + + // Saturate lanes of a vector based on saturation state. + LogicVRegister& SignedSaturate(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + Saturation sat = GetSignedSaturation(i); + if (sat == kSignedSatPositive) { + SetInt(vform, i, MaxIntFromFormat(vform)); + } else if (sat == kSignedSatNegative) { + SetInt(vform, i, MinIntFromFormat(vform)); + } + } + return *this; + } + + LogicVRegister& UnsignedSaturate(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + Saturation sat = GetUnsignedSaturation(i); + if (sat == kUnsignedSatPositive) { + SetUint(vform, i, MaxUintFromFormat(vform)); + } else if (sat == kUnsignedSatNegative) { + SetUint(vform, i, 0); + } + } + return *this; + } + + // Getter for rounding state. + bool GetRounding(int index) { + return round_[index]; + } + + // Setter for rounding state. + void SetRounding(int index, bool round) { + round_[index] = round; + } + + // Round lanes of a vector based on rounding state. + LogicVRegister& Round(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SetInt(vform, i, Int(vform, i) + (GetRounding(i) ? 1 : 0)); + } + return *this; + } + + // Unsigned halve lanes of a vector, and use the saturation state to set the + // top bit. + LogicVRegister& Uhalve(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t val = Uint(vform, i); + SetRounding(i, (val & 1) == 1); + val >>= 1; + if (GetUnsignedSaturation(i) != kNotSaturated) { + // If the operation causes unsigned saturation, the bit shifted into the + // most significant bit must be set. + val |= (MaxUintFromFormat(vform) >> 1) + 1; + } + SetInt(vform, i, val); + } + return *this; + } + + // Signed halve lanes of a vector, and use the carry state to set the top bit. + LogicVRegister& Halve(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t val = Int(vform, i); + SetRounding(i, (val & 1) == 1); + val >>= 1; + if (GetSignedSaturation(i) != kNotSaturated) { + // If the operation causes signed saturation, the sign bit must be + // inverted. + val ^= (MaxUintFromFormat(vform) >> 1) + 1; + } + SetInt(vform, i, val); + } + return *this; + } + + private: + SimVRegister& register_; + + // Allocate one saturation state entry per lane; largest register is type Q, + // and lanes can be a minimum of one byte wide. + Saturation saturated_[kQRegSizeInBytes]; + + // Allocate one rounding state entry per lane. + bool round_[kQRegSizeInBytes]; +}; + +// The proper way to initialize a simulated system register (such as NZCV) is as +// follows: +// SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV); +class SimSystemRegister { + public: + // The default constructor represents a register which has no writable bits. + // It is not possible to set its value to anything other than 0. + SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) { } + + uint32_t RawValue() const { + return value_; + } + + void SetRawValue(uint32_t new_value) { + value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_); + } + + uint32_t Bits(int msb, int lsb) const { + return ExtractUnsignedBitfield32(msb, lsb, value_); + } + + int32_t SignedBits(int msb, int lsb) const { + return ExtractSignedBitfield32(msb, lsb, value_); + } + + void SetBits(int msb, int lsb, uint32_t bits); + + // Default system register values. + static SimSystemRegister DefaultValueFor(SystemRegister id); + +#define DEFINE_GETTER(Name, HighBit, LowBit, Func) \ + uint32_t Name() const { return Func(HighBit, LowBit); } \ + void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); } +#define DEFINE_WRITE_IGNORE_MASK(Name, Mask) \ + static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask); + + SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK) + +#undef DEFINE_ZERO_BITS +#undef DEFINE_GETTER + + protected: + // Most system registers only implement a few of the bits in the word. Other + // bits are "read-as-zero, write-ignored". The write_ignore_mask argument + // describes the bits which are not modifiable. + SimSystemRegister(uint32_t value, uint32_t write_ignore_mask) + : value_(value), write_ignore_mask_(write_ignore_mask) { } + + uint32_t value_; + uint32_t write_ignore_mask_; +}; + + +class SimExclusiveLocalMonitor { + public: + SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) { + Clear(); + } + + // Clear the exclusive monitor (like clrex). + void Clear() { + address_ = 0; + size_ = 0; + } + + // Clear the exclusive monitor most of the time. + void MaybeClear() { + if ((seed_ % kSkipClearProbability) != 0) { + Clear(); + } + + // Advance seed_ using a simple linear congruential generator. + seed_ = (seed_ * 48271) % 2147483647; + } + + // Mark the address range for exclusive access (like load-exclusive). + void MarkExclusive(uint64_t address, size_t size) { + address_ = address; + size_ = size; + } + + // Return true if the address range is marked (like store-exclusive). + // This helper doesn't implicitly clear the monitor. + bool IsExclusive(uint64_t address, size_t size) { + VIXL_ASSERT(size > 0); + // Be pedantic: Require both the address and the size to match. + return (size == size_) && (address == address_); + } + + private: + uint64_t address_; + size_t size_; + + const int kSkipClearProbability; + uint32_t seed_; +}; + + +// We can't accurate simulate the global monitor since it depends on external +// influences. Instead, this implementation occasionally causes accesses to +// fail, according to kPassProbability. +class SimExclusiveGlobalMonitor { + public: + SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {} + + bool IsExclusive(uint64_t address, size_t size) { + USE(address, size); + + bool pass = (seed_ % kPassProbability) != 0; + // Advance seed_ using a simple linear congruential generator. + seed_ = (seed_ * 48271) % 2147483647; + return pass; + } + + private: + const int kPassProbability; + uint32_t seed_; +}; + +class Redirection; + +class Simulator : public DecoderVisitor { + public: +#ifdef JS_CACHE_SIMULATOR_ARM64 + using Decoder = CachingDecoder; + mozilla::Atomic<bool> pendingCacheRequests = mozilla::Atomic<bool>{ false }; +#endif + explicit Simulator(Decoder* decoder, FILE* stream = stdout); + ~Simulator(); + + // Moz changes. + void init(Decoder* decoder, FILE* stream); + static Simulator* Current(); + static Simulator* Create(); + static void Destroy(Simulator* sim); + uintptr_t stackLimit() const; + uintptr_t* addressOfStackLimit(); + bool overRecursed(uintptr_t newsp = 0) const; + bool overRecursedWithExtra(uint32_t extra) const; + int64_t call(uint8_t* entry, int argument_count, ...); + static void* RedirectNativeFunction(void* nativeFunction, js::jit::ABIFunctionType type); + void setGPR32Result(int32_t result); + void setGPR64Result(int64_t result); + void setFP32Result(float result); + void setFP64Result(double result); +#ifdef JS_CACHE_SIMULATOR_ARM64 + void FlushICache(); +#endif + void VisitCallRedirection(const Instruction* instr); + static uintptr_t StackLimit() { + return Simulator::Current()->stackLimit(); + } + template<typename T> T Read(uintptr_t address); + template <typename T> void Write(uintptr_t address_, T value); + JS::ProfilingFrameIterator::RegisterState registerState(); + + void ResetState(); + + // Run the simulator. + virtual void Run(); + void RunFrom(const Instruction* first); + + // Simulation helpers. + const Instruction* pc() const { return pc_; } + const Instruction* get_pc() const { return pc_; } + int64_t get_sp() const { return xreg(31, Reg31IsStackPointer); } + int64_t get_lr() const { return xreg(30); } + int64_t get_fp() const { return xreg(29); } + + template <typename T> + T get_pc_as() const { return reinterpret_cast<T>(const_cast<Instruction*>(pc())); } + + void set_pc(const Instruction* new_pc) { + pc_ = Memory::AddressUntag(new_pc); + pc_modified_ = true; + } + + // Handle any wasm faults, returning true if the fault was handled. + // This method is rather hot so inline the normal (no-wasm) case. + bool MOZ_ALWAYS_INLINE handle_wasm_seg_fault(uintptr_t addr, unsigned numBytes) { + if (MOZ_LIKELY(!js::wasm::CodeExists)) { + return false; + } + + uint8_t* newPC; + if (!js::wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes, &newPC)) { + return false; + } + + set_pc((Instruction*)newPC); + return true; + } + + void increment_pc() { + if (!pc_modified_) { + pc_ = pc_->NextInstruction(); + } + + pc_modified_ = false; + } + + void ExecuteInstruction(); + + // Declare all Visitor functions. + #define DECLARE(A) virtual void Visit##A(const Instruction* instr) override; + VISITOR_LIST_THAT_RETURN(DECLARE) + VISITOR_LIST_THAT_DONT_RETURN(DECLARE) + #undef DECLARE + + + // Integer register accessors. + + // Basic accessor: Read the register as the specified type. + template<typename T> + T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const { + VIXL_ASSERT(code < kNumberOfRegisters); + if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { + T result; + memset(&result, 0, sizeof(result)); + return result; + } + return registers_[code].Get<T>(); + } + + // Common specialized accessors for the reg() template. + int32_t wreg(unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + return reg<int32_t>(code, r31mode); + } + + int64_t xreg(unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + return reg<int64_t>(code, r31mode); + } + + // As above, with parameterized size and return type. The value is + // either zero-extended or truncated to fit, as required. + template<typename T> + T reg(unsigned size, unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + uint64_t raw; + switch (size) { + case kWRegSize: raw = reg<uint32_t>(code, r31mode); break; + case kXRegSize: raw = reg<uint64_t>(code, r31mode); break; + default: + VIXL_UNREACHABLE(); + return 0; + } + + T result; + VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw)); + // Copy the result and truncate to fit. This assumes a little-endian host. + memcpy(&result, &raw, sizeof(result)); + return result; + } + + // Use int64_t by default if T is not specified. + int64_t reg(unsigned size, unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + return reg<int64_t>(size, code, r31mode); + } + + enum RegLogMode { + LogRegWrites, + NoRegLog + }; + + // Write 'value' into an integer register. The value is zero-extended. This + // behaviour matches AArch64 register writes. + template<typename T> + void set_reg(unsigned code, T value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister) { + if (sizeof(T) < kWRegSizeInBytes) { + // We use a C-style cast on purpose here. + // Since we do not have access to 'constepxr if', the casts in this `if` + // must be valid even if we know the code will never be executed, in + // particular when `T` is a pointer type. + int64_t tmp_64bit = (int64_t)value; + int32_t tmp_32bit = static_cast<int32_t>(tmp_64bit); + set_reg<int32_t>(code, tmp_32bit, log_mode, r31mode); + return; + } + + VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) || + (sizeof(T) == kXRegSizeInBytes)); + VIXL_ASSERT(code < kNumberOfRegisters); + + if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { + return; + } + + registers_[code].Set(value); + + if (log_mode == LogRegWrites) LogRegister(code, r31mode); + } + + // Common specialized accessors for the set_reg() template. + void set_wreg(unsigned code, int32_t value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister) { + set_reg(code, value, log_mode, r31mode); + } + + void set_xreg(unsigned code, int64_t value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister) { + set_reg(code, value, log_mode, r31mode); + } + + // As above, with parameterized size and type. The value is either + // zero-extended or truncated to fit, as required. + template<typename T> + void set_reg(unsigned size, unsigned code, T value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister) { + // Zero-extend the input. + uint64_t raw = 0; + VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw)); + memcpy(&raw, &value, sizeof(value)); + + // Write (and possibly truncate) the value. + switch (size) { + case kWRegSize: + set_reg(code, static_cast<uint32_t>(raw), log_mode, r31mode); + break; + case kXRegSize: + set_reg(code, raw, log_mode, r31mode); + break; + default: + VIXL_UNREACHABLE(); + return; + } + } + + // Common specialized accessors for the set_reg() template. + + // Commonly-used special cases. + template<typename T> + void set_lr(T value) { + set_reg(kLinkRegCode, value); + } + + template<typename T> + void set_sp(T value) { + set_reg(31, value, LogRegWrites, Reg31IsStackPointer); + } + + // Vector register accessors. + // These are equivalent to the integer register accessors, but for vector + // registers. + + // A structure for representing a 128-bit Q register. + struct qreg_t { uint8_t val[kQRegSizeInBytes]; }; + + // Basic accessor: read the register as the specified type. + template<typename T> + T vreg(unsigned code) const { + VIXL_STATIC_ASSERT((sizeof(T) == kBRegSizeInBytes) || + (sizeof(T) == kHRegSizeInBytes) || + (sizeof(T) == kSRegSizeInBytes) || + (sizeof(T) == kDRegSizeInBytes) || + (sizeof(T) == kQRegSizeInBytes)); + VIXL_ASSERT(code < kNumberOfVRegisters); + + return vregisters_[code].Get<T>(); + } + + // Common specialized accessors for the vreg() template. + int8_t breg(unsigned code) const { + return vreg<int8_t>(code); + } + + int16_t hreg(unsigned code) const { + return vreg<int16_t>(code); + } + + float sreg(unsigned code) const { + return vreg<float>(code); + } + + uint32_t sreg_bits(unsigned code) const { + return vreg<uint32_t>(code); + } + + double dreg(unsigned code) const { + return vreg<double>(code); + } + + uint64_t dreg_bits(unsigned code) const { + return vreg<uint64_t>(code); + } + + qreg_t qreg(unsigned code) const { + return vreg<qreg_t>(code); + } + + // As above, with parameterized size and return type. The value is + // either zero-extended or truncated to fit, as required. + template<typename T> + T vreg(unsigned size, unsigned code) const { + uint64_t raw = 0; + T result; + + switch (size) { + case kSRegSize: raw = vreg<uint32_t>(code); break; + case kDRegSize: raw = vreg<uint64_t>(code); break; + default: + VIXL_UNREACHABLE(); + break; + } + + VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw)); + // Copy the result and truncate to fit. This assumes a little-endian host. + memcpy(&result, &raw, sizeof(result)); + return result; + } + + inline SimVRegister& vreg(unsigned code) { + return vregisters_[code]; + } + + // Basic accessor: Write the specified value. + template<typename T> + void set_vreg(unsigned code, T value, + RegLogMode log_mode = LogRegWrites) { + VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) || + (sizeof(value) == kHRegSizeInBytes) || + (sizeof(value) == kSRegSizeInBytes) || + (sizeof(value) == kDRegSizeInBytes) || + (sizeof(value) == kQRegSizeInBytes)); + VIXL_ASSERT(code < kNumberOfVRegisters); + vregisters_[code].Set(value); + + if (log_mode == LogRegWrites) { + LogVRegister(code, GetPrintRegisterFormat(value)); + } + } + + // Common specialized accessors for the set_vreg() template. + void set_breg(unsigned code, int8_t value, + RegLogMode log_mode = LogRegWrites) { + set_vreg(code, value, log_mode); + } + + void set_hreg(unsigned code, int16_t value, + RegLogMode log_mode = LogRegWrites) { + set_vreg(code, value, log_mode); + } + + void set_sreg(unsigned code, float value, + RegLogMode log_mode = LogRegWrites) { + set_vreg(code, value, log_mode); + } + + void set_sreg_bits(unsigned code, uint32_t value, + RegLogMode log_mode = LogRegWrites) { + set_vreg(code, value, log_mode); + } + + void set_dreg(unsigned code, double value, + RegLogMode log_mode = LogRegWrites) { + set_vreg(code, value, log_mode); + } + + void set_dreg_bits(unsigned code, uint64_t value, + RegLogMode log_mode = LogRegWrites) { + set_vreg(code, value, log_mode); + } + + void set_qreg(unsigned code, qreg_t value, + RegLogMode log_mode = LogRegWrites) { + set_vreg(code, value, log_mode); + } + + bool N() const { return nzcv_.N() != 0; } + bool Z() const { return nzcv_.Z() != 0; } + bool C() const { return nzcv_.C() != 0; } + bool V() const { return nzcv_.V() != 0; } + + SimSystemRegister& ReadNzcv() { return nzcv_; } + SimSystemRegister& nzcv() { return nzcv_; } + + // TODO: Find a way to make the fpcr_ members return the proper types, so + // these accessors are not necessary. + FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); } + bool DN() { return fpcr_.DN() != 0; } + SimSystemRegister& fpcr() { return fpcr_; } + + UseDefaultNaN ReadDN() const { + return fpcr_.DN() != 0 ? kUseDefaultNaN : kIgnoreDefaultNaN; + } + + // Specify relevant register formats for Print(V)Register and related helpers. + enum PrintRegisterFormat { + // The lane size. + kPrintRegLaneSizeB = 0 << 0, + kPrintRegLaneSizeH = 1 << 0, + kPrintRegLaneSizeS = 2 << 0, + kPrintRegLaneSizeW = kPrintRegLaneSizeS, + kPrintRegLaneSizeD = 3 << 0, + kPrintRegLaneSizeX = kPrintRegLaneSizeD, + kPrintRegLaneSizeQ = 4 << 0, + + kPrintRegLaneSizeOffset = 0, + kPrintRegLaneSizeMask = 7 << 0, + + // The lane count. + kPrintRegAsScalar = 0, + kPrintRegAsDVector = 1 << 3, + kPrintRegAsQVector = 2 << 3, + + kPrintRegAsVectorMask = 3 << 3, + + // Indicate floating-point format lanes. (This flag is only supported for S- + // and D-sized lanes.) + kPrintRegAsFP = 1 << 5, + + // Supported combinations. + + kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar, + kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar, + kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP, + kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP, + + kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar, + kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector, + kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector, + kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar, + kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector, + kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector, + kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar, + kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector, + kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector, + kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP, + kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP, + kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP, + kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar, + kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector, + kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP, + kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP, + kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar + }; + + unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) { + return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset; + } + + unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) { + return 1 << GetPrintRegLaneSizeInBytesLog2(format); + } + + unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) { + if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2; + if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2; + + // Scalar types. + return GetPrintRegLaneSizeInBytesLog2(format); + } + + unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) { + return 1 << GetPrintRegSizeInBytesLog2(format); + } + + unsigned GetPrintRegLaneCount(PrintRegisterFormat format) { + unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format); + unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format); + VIXL_ASSERT(reg_size_log2 >= lane_size_log2); + return 1 << (reg_size_log2 - lane_size_log2); + } + + PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size, + unsigned lane_size); + + PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) { + return GetPrintRegisterFormatForSize(size, size); + } + + PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) { + switch (size) { + default: VIXL_UNREACHABLE(); return kPrintDReg; + case kDRegSizeInBytes: return kPrintDReg; + case kSRegSizeInBytes: return kPrintSReg; + } + } + + PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) { + if ((GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) || + (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) { + return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP); + } + return format; + } + + template<typename T> + PrintRegisterFormat GetPrintRegisterFormat(T value) { + return GetPrintRegisterFormatForSize(sizeof(value)); + } + + PrintRegisterFormat GetPrintRegisterFormat(double value) { + VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes); + return GetPrintRegisterFormatForSizeFP(sizeof(value)); + } + + PrintRegisterFormat GetPrintRegisterFormat(float value) { + VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes); + return GetPrintRegisterFormatForSizeFP(sizeof(value)); + } + + PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform); + + // Print all registers of the specified types. + void PrintRegisters(); + void PrintVRegisters(); + void PrintSystemRegisters(); + + // As above, but only print the registers that have been updated. + void PrintWrittenRegisters(); + void PrintWrittenVRegisters(); + + // As above, but respect LOG_REG and LOG_VREG. + inline void LogWrittenRegisters() { + if (trace_parameters() & LOG_REGS) PrintWrittenRegisters(); + } + inline void LogWrittenVRegisters() { + if (trace_parameters() & LOG_VREGS) PrintWrittenVRegisters(); + } + inline void LogAllWrittenRegisters() { + LogWrittenRegisters(); + LogWrittenVRegisters(); + } + + // Print individual register values (after update). + void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer); + void PrintVRegister(unsigned code, PrintRegisterFormat format); + void PrintSystemRegister(SystemRegister id); + + // Like Print* (above), but respect trace_parameters(). + void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) { + if (trace_parameters() & LOG_REGS) PrintRegister(code, r31mode); + } + void LogVRegister(unsigned code, PrintRegisterFormat format) { + if (trace_parameters() & LOG_VREGS) PrintVRegister(code, format); + } + void LogSystemRegister(SystemRegister id) { + if (trace_parameters() & LOG_SYSREGS) PrintSystemRegister(id); + } + + // Print memory accesses. + void PrintRead(uintptr_t address, unsigned reg_code, + PrintRegisterFormat format); + void PrintWrite(uintptr_t address, unsigned reg_code, + PrintRegisterFormat format); + void PrintVRead(uintptr_t address, unsigned reg_code, + PrintRegisterFormat format, unsigned lane); + void PrintVWrite(uintptr_t address, unsigned reg_code, + PrintRegisterFormat format, unsigned lane); + + // Like Print* (above), but respect trace_parameters(). + void LogRead(uintptr_t address, unsigned reg_code, + PrintRegisterFormat format) { + if (trace_parameters() & LOG_REGS) PrintRead(address, reg_code, format); + } + void LogWrite(uintptr_t address, unsigned reg_code, + PrintRegisterFormat format) { + if (trace_parameters() & LOG_WRITE) PrintWrite(address, reg_code, format); + } + void LogVRead(uintptr_t address, unsigned reg_code, + PrintRegisterFormat format, unsigned lane = 0) { + if (trace_parameters() & LOG_VREGS) { + PrintVRead(address, reg_code, format, lane); + } + } + void LogVWrite(uintptr_t address, unsigned reg_code, + PrintRegisterFormat format, unsigned lane = 0) { + if (trace_parameters() & LOG_WRITE) { + PrintVWrite(address, reg_code, format, lane); + } + } + + // Helper functions for register tracing. + void PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode, + int size_in_bytes = kXRegSizeInBytes); + void PrintVRegisterRawHelper(unsigned code, int bytes = kQRegSizeInBytes, + int lsb = 0); + void PrintVRegisterFPHelper(unsigned code, unsigned lane_size_in_bytes, + int lane_count = 1, int rightmost_lane = 0); + + void DoUnreachable(const Instruction* instr); + void DoTrace(const Instruction* instr); + void DoLog(const Instruction* instr); + + static const char* WRegNameForCode(unsigned code, + Reg31Mode mode = Reg31IsZeroRegister); + static const char* XRegNameForCode(unsigned code, + Reg31Mode mode = Reg31IsZeroRegister); + static const char* SRegNameForCode(unsigned code); + static const char* DRegNameForCode(unsigned code); + static const char* VRegNameForCode(unsigned code); + + bool coloured_trace() const { return coloured_trace_; } + void set_coloured_trace(bool value); + + int trace_parameters() const { return trace_parameters_; } + void set_trace_parameters(int parameters); + + void set_instruction_stats(bool value); + + // Clear the simulated local monitor to force the next store-exclusive + // instruction to fail. + void ClearLocalMonitor() { + local_monitor_.Clear(); + } + + void SilenceExclusiveAccessWarning() { + print_exclusive_access_warning_ = false; + } + + protected: + const char* clr_normal; + const char* clr_flag_name; + const char* clr_flag_value; + const char* clr_reg_name; + const char* clr_reg_value; + const char* clr_vreg_name; + const char* clr_vreg_value; + const char* clr_memory_address; + const char* clr_warning; + const char* clr_warning_message; + const char* clr_printf; + + // Simulation helpers ------------------------------------ + bool ConditionPassed(Condition cond) { + switch (cond) { + case eq: + return Z(); + case ne: + return !Z(); + case hs: + return C(); + case lo: + return !C(); + case mi: + return N(); + case pl: + return !N(); + case vs: + return V(); + case vc: + return !V(); + case hi: + return C() && !Z(); + case ls: + return !(C() && !Z()); + case ge: + return N() == V(); + case lt: + return N() != V(); + case gt: + return !Z() && (N() == V()); + case le: + return !(!Z() && (N() == V())); + case nv: + VIXL_FALLTHROUGH(); + case al: + return true; + default: + VIXL_UNREACHABLE(); + return false; + } + } + + bool ConditionPassed(Instr cond) { + return ConditionPassed(static_cast<Condition>(cond)); + } + + bool ConditionFailed(Condition cond) { + return !ConditionPassed(cond); + } + + void AddSubHelper(const Instruction* instr, int64_t op2); + uint64_t AddWithCarry(unsigned reg_size, + bool set_flags, + uint64_t left, + uint64_t right, + int carry_in = 0); + void LogicalHelper(const Instruction* instr, int64_t op2); + void ConditionalCompareHelper(const Instruction* instr, int64_t op2); + void LoadStoreHelper(const Instruction* instr, + int64_t offset, + AddrMode addrmode); + void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode); + template <typename T> + void CompareAndSwapHelper(const Instruction* instr); + template <typename T> + void CompareAndSwapPairHelper(const Instruction* instr); + template <typename T> + void AtomicMemorySimpleHelper(const Instruction* instr); + template <typename T> + void AtomicMemorySwapHelper(const Instruction* instr); + template <typename T> + void LoadAcquireRCpcHelper(const Instruction* instr); + uintptr_t AddressModeHelper(unsigned addr_reg, + int64_t offset, + AddrMode addrmode); + void NEONLoadStoreMultiStructHelper(const Instruction* instr, + AddrMode addr_mode); + void NEONLoadStoreSingleStructHelper(const Instruction* instr, + AddrMode addr_mode); + + uint64_t AddressUntag(uint64_t address) { + return address & ~kAddressTagMask; + } + + template <typename T> + T* AddressUntag(T* address) { + uintptr_t address_raw = reinterpret_cast<uintptr_t>(address); + return reinterpret_cast<T*>(AddressUntag(address_raw)); + } + + int64_t ShiftOperand(unsigned reg_size, + int64_t value, + Shift shift_type, + unsigned amount); + int64_t Rotate(unsigned reg_width, + int64_t value, + Shift shift_type, + unsigned amount); + int64_t ExtendValue(unsigned reg_width, + int64_t value, + Extend extend_type, + unsigned left_shift = 0); + uint16_t PolynomialMult(uint8_t op1, uint8_t op2); + + void ld1(VectorFormat vform, + LogicVRegister dst, + uint64_t addr); + void ld1(VectorFormat vform, + LogicVRegister dst, + int index, + uint64_t addr); + void ld1r(VectorFormat vform, + LogicVRegister dst, + uint64_t addr); + void ld2(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + uint64_t addr); + void ld2(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + int index, + uint64_t addr); + void ld2r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + uint64_t addr); + void ld3(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr); + void ld3(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + int index, + uint64_t addr); + void ld3r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr); + void ld4(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr); + void ld4(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + int index, + uint64_t addr); + void ld4r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr); + void st1(VectorFormat vform, + LogicVRegister src, + uint64_t addr); + void st1(VectorFormat vform, + LogicVRegister src, + int index, + uint64_t addr); + void st2(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + uint64_t addr); + void st2(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + int index, + uint64_t addr); + void st3(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + uint64_t addr); + void st3(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + int index, + uint64_t addr); + void st4(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + LogicVRegister src4, + uint64_t addr); + void st4(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + LogicVRegister src4, + int index, + uint64_t addr); + LogicVRegister cmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond); + LogicVRegister cmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + int imm, + Condition cond); + LogicVRegister cmptst(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister add(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister addp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister mla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister mls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister mul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister mul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister mla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister mls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister pmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + + typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmulx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister smull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister smull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister umull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister umull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister smlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister smlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister umlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister umlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister smlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister smlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister umlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister umlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqrdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sub(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister and_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister orr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister orn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister eor(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister bic(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister bic(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + uint64_t imm); + LogicVRegister bif(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister bit(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister bsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister cls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister clz(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister cnt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister not_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rbit(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rev(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int revSize); + LogicVRegister rev16(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rev32(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rev64(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister addlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool is_signed, + bool do_accumulate); + LogicVRegister saddlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uaddlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sadalp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uadalp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister ext(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister ins_element(VectorFormat vform, + LogicVRegister dst, + int dst_index, + const LogicVRegister& src, + int src_index); + LogicVRegister ins_immediate(VectorFormat vform, + LogicVRegister dst, + int dst_index, + uint64_t imm); + LogicVRegister dup_element(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int src_index); + LogicVRegister dup_immediate(VectorFormat vform, + LogicVRegister dst, + uint64_t imm); + LogicVRegister mov(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister movi(VectorFormat vform, + LogicVRegister dst, + uint64_t imm); + LogicVRegister mvni(VectorFormat vform, + LogicVRegister dst, + uint64_t imm); + LogicVRegister orr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + uint64_t imm); + LogicVRegister sshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ushl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sminmax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max); + LogicVRegister smax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister smin(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sminmaxp(VectorFormat vform, + LogicVRegister dst, + int dst_index, + const LogicVRegister& src, + bool max); + LogicVRegister smaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sminp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister addp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister addv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uaddlv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister saddlv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool max); + LogicVRegister smaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uxtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uxtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sxtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sxtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind); + LogicVRegister tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& ind); + LogicVRegister tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& ind); + LogicVRegister tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& tab4, + const LogicVRegister& ind); + LogicVRegister tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind); + LogicVRegister tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& ind); + LogicVRegister tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& ind); + LogicVRegister tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& tab4, + const LogicVRegister& ind); + LogicVRegister uaddl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uaddl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uaddw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uaddw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister saddl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister saddl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister saddw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister saddw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usubl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usubl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usubw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usubw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ssubl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ssubl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ssubw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ssubw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uminmax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max); + LogicVRegister umax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister umin(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uminmaxp(VectorFormat vform, + LogicVRegister dst, + int dst_index, + const LogicVRegister& src, + bool max); + LogicVRegister umaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uminp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool max); + LogicVRegister umaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister trn1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister trn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister zip1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister zip2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uzp1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uzp2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister shl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister scvtf(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int fbits, + FPRounding rounding_mode); + LogicVRegister ucvtf(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int fbits, + FPRounding rounding_mode); + LogicVRegister sshll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sshll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister shll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister shll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister ushll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister ushll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sli(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sri(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sshr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister ushr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister ssra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister usra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister srsra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister ursra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister suqadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister usqadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sqshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshlu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister abs(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister neg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister extractnarrow(VectorFormat vform, + LogicVRegister dst, + bool dstIsSigned, + const LogicVRegister& src, + bool srcIsSigned); + LogicVRegister xtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sqxtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uqxtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sqxtun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister absdiff(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool issigned); + LogicVRegister saba(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uaba(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister shrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister shrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister rshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister rshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqrshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqrshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshrun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshrun2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrshrun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrshrun2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round = true); + LogicVRegister sqdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + #define NEON_3VREG_LOGIC_LIST(V) \ + V(addhn) \ + V(addhn2) \ + V(raddhn) \ + V(raddhn2) \ + V(subhn) \ + V(subhn2) \ + V(rsubhn) \ + V(rsubhn2) \ + V(pmull) \ + V(pmull2) \ + V(sabal) \ + V(sabal2) \ + V(uabal) \ + V(uabal2) \ + V(sabdl) \ + V(sabdl2) \ + V(uabdl) \ + V(uabdl2) \ + V(smull) \ + V(smull2) \ + V(umull) \ + V(umull2) \ + V(smlal) \ + V(smlal2) \ + V(umlal) \ + V(umlal2) \ + V(smlsl) \ + V(smlsl2) \ + V(umlsl) \ + V(umlsl2) \ + V(sqdmlal) \ + V(sqdmlal2) \ + V(sqdmlsl) \ + V(sqdmlsl2) \ + V(sqdmull) \ + V(sqdmull2) + + #define DEFINE_LOGIC_FUNC(FXN) \ + LogicVRegister FXN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2); + NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC) + #undef DEFINE_LOGIC_FUNC + + #define NEON_FP3SAME_LIST(V) \ + V(fadd, FPAdd, false) \ + V(fsub, FPSub, true) \ + V(fmul, FPMul, true) \ + V(fmulx, FPMulx, true) \ + V(fdiv, FPDiv, true) \ + V(fmax, FPMax, false) \ + V(fmin, FPMin, false) \ + V(fmaxnm, FPMaxNM, false) \ + V(fminnm, FPMinNM, false) + + #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ + template <typename T> \ + LogicVRegister FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2); \ + LogicVRegister FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2); + NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP) + #undef DECLARE_NEON_FP_VECTOR_OP + + #define NEON_FPPAIRWISE_LIST(V) \ + V(faddp, fadd, FPAdd) \ + V(fmaxp, fmax, FPMax) \ + V(fmaxnmp, fmaxnm, FPMaxNM) \ + V(fminp, fmin, FPMin) \ + V(fminnmp, fminnm, FPMinNM) + + #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP) \ + LogicVRegister FNP(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2); \ + LogicVRegister FNP(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src); + NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP) + #undef DECLARE_NEON_FP_PAIR_OP + + template <typename T> + LogicVRegister frecps(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister frecps(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + template <typename T> + LogicVRegister frsqrts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister frsqrts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + template <typename T> + LogicVRegister fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + template <typename T> + LogicVRegister fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fnmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + + template <typename T> + LogicVRegister fcmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond); + LogicVRegister fcmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond); + LogicVRegister fabscmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond); + LogicVRegister fcmp_zero(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + Condition cond); + + template <typename T> + LogicVRegister fneg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fneg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + template <typename T> + LogicVRegister frecpx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister frecpx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + template <typename T> + LogicVRegister fabs_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fabs_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fabd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister frint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + bool inexact_exception = false); + LogicVRegister fcvts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + int fbits = 0); + LogicVRegister fcvtu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + int fbits = 0); + LogicVRegister fcvtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtxn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtxn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fsqrt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister frsqrte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister frecpe(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding); + LogicVRegister ursqrte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister urecpe(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + typedef float (Simulator::*FPMinMaxOp)(float a, float b); + + LogicVRegister fminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPMinMaxOp Op); + + LogicVRegister fminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fminnmv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fmaxnmv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + static const uint32_t CRC32_POLY = 0x04C11DB7; + static const uint32_t CRC32C_POLY = 0x1EDC6F41; + uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly); + template <typename T> + uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly); + uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly); + + void SysOp_W(int op, int64_t val); + + template <typename T> + T FPRecipSqrtEstimate(T op); + template <typename T> + T FPRecipEstimate(T op, FPRounding rounding); + template <typename T, typename R> + R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding); + + void FPCompare(double val0, double val1, FPTrapFlags trap); + double FPRoundInt(double value, FPRounding round_mode); + double recip_sqrt_estimate(double a); + double recip_estimate(double a); + double FPRecipSqrtEstimate(double a); + double FPRecipEstimate(double a); + double FixedToDouble(int64_t src, int fbits, FPRounding round_mode); + double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode); + float FixedToFloat(int64_t src, int fbits, FPRounding round_mode); + float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode); + int32_t FPToInt32(double value, FPRounding rmode); + int64_t FPToInt64(double value, FPRounding rmode); + uint32_t FPToUInt32(double value, FPRounding rmode); + uint64_t FPToUInt64(double value, FPRounding rmode); + int32_t FPToFixedJS(double value); + + template <typename T> + T FPAdd(T op1, T op2); + + template <typename T> + T FPDiv(T op1, T op2); + + template <typename T> + T FPMax(T a, T b); + + template <typename T> + T FPMaxNM(T a, T b); + + template <typename T> + T FPMin(T a, T b); + + template <typename T> + T FPMinNM(T a, T b); + + template <typename T> + T FPMul(T op1, T op2); + + template <typename T> + T FPMulx(T op1, T op2); + + template <typename T> + T FPMulAdd(T a, T op1, T op2); + + template <typename T> + T FPSqrt(T op); + + template <typename T> + T FPSub(T op1, T op2); + + template <typename T> + T FPRecipStepFused(T op1, T op2); + + template <typename T> + T FPRSqrtStepFused(T op1, T op2); + + // This doesn't do anything at the moment. We'll need it if we want support + // for cumulative exception bits or floating-point exceptions. + void FPProcessException() { } + + bool FPProcessNaNs(const Instruction* instr); + + // Pseudo Printf instruction + void DoPrintf(const Instruction* instr); + + // Processor state --------------------------------------- + + // Simulated monitors for exclusive access instructions. + SimExclusiveLocalMonitor local_monitor_; + SimExclusiveGlobalMonitor global_monitor_; + + // Output stream. + FILE* stream_; + PrintDisassembler* print_disasm_; + + // Instruction statistics instrumentation. + Instrument* instrumentation_; + + // General purpose registers. Register 31 is the stack pointer. + SimRegister registers_[kNumberOfRegisters]; + + // Vector registers + SimVRegister vregisters_[kNumberOfVRegisters]; + + // Program Status Register. + // bits[31, 27]: Condition flags N, Z, C, and V. + // (Negative, Zero, Carry, Overflow) + SimSystemRegister nzcv_; + + // Floating-Point Control Register + SimSystemRegister fpcr_; + + // Only a subset of FPCR features are supported by the simulator. This helper + // checks that the FPCR settings are supported. + // + // This is checked when floating-point instructions are executed, not when + // FPCR is set. This allows generated code to modify FPCR for external + // functions, or to save and restore it when entering and leaving generated + // code. + void AssertSupportedFPCR() { + VIXL_ASSERT(fpcr().FZ() == 0); // No flush-to-zero support. + VIXL_ASSERT(fpcr().RMode() == FPTieEven); // Ties-to-even rounding only. + + // The simulator does not support half-precision operations so fpcr().AHP() + // is irrelevant, and is not checked here. + } + + static int CalcNFlag(uint64_t result, unsigned reg_size) { + return (result >> (reg_size - 1)) & 1; + } + + static int CalcZFlag(uint64_t result) { + return (result == 0) ? 1 : 0; + } + + static const uint32_t kConditionFlagsMask = 0xf0000000; + + // Stack + byte* stack_; + static const int stack_protection_size_ = 512 * KBytes; + static const int stack_size_ = (2 * MBytes) + (2 * stack_protection_size_); + byte* stack_limit_; + + Decoder* decoder_; + // Indicates if the pc has been modified by the instruction and should not be + // automatically incremented. + bool pc_modified_; + const Instruction* pc_; + + static const char* xreg_names[]; + static const char* wreg_names[]; + static const char* sreg_names[]; + static const char* dreg_names[]; + static const char* vreg_names[]; + + static const Instruction* kEndOfSimAddress; + + private: + template <typename T> + static T FPDefaultNaN(); + + // Standard NaN processing. + template <typename T> + T FPProcessNaN(T op) { + VIXL_ASSERT(std::isnan(op)); + if (IsSignallingNaN(op)) { + FPProcessException(); + } + return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op); + } + + template <typename T> + T FPProcessNaNs(T op1, T op2) { + if (IsSignallingNaN(op1)) { + return FPProcessNaN(op1); + } else if (IsSignallingNaN(op2)) { + return FPProcessNaN(op2); + } else if (std::isnan(op1)) { + VIXL_ASSERT(IsQuietNaN(op1)); + return FPProcessNaN(op1); + } else if (std::isnan(op2)) { + VIXL_ASSERT(IsQuietNaN(op2)); + return FPProcessNaN(op2); + } else { + return 0.0; + } + } + + template <typename T> + T FPProcessNaNs3(T op1, T op2, T op3) { + if (IsSignallingNaN(op1)) { + return FPProcessNaN(op1); + } else if (IsSignallingNaN(op2)) { + return FPProcessNaN(op2); + } else if (IsSignallingNaN(op3)) { + return FPProcessNaN(op3); + } else if (std::isnan(op1)) { + VIXL_ASSERT(IsQuietNaN(op1)); + return FPProcessNaN(op1); + } else if (std::isnan(op2)) { + VIXL_ASSERT(IsQuietNaN(op2)); + return FPProcessNaN(op2); + } else if (std::isnan(op3)) { + VIXL_ASSERT(IsQuietNaN(op3)); + return FPProcessNaN(op3); + } else { + return 0.0; + } + } + + bool coloured_trace_; + + // A set of TraceParameters flags. + int trace_parameters_; + + // Indicates whether the instruction instrumentation is active. + bool instruction_stats_; + + // Indicates whether the exclusive-access warning has been printed. + bool print_exclusive_access_warning_; + void PrintExclusiveAccessWarning(); + + // Indicates that the simulator ran out of memory at some point. + // Data structures may not be fully allocated. + bool oom_; + + public: + // True if the simulator ran out of memory during or after construction. + bool oom() const { return oom_; } + + protected: + mozilla::Vector<int64_t, 0, js::SystemAllocPolicy> spStack_; +}; + +} // namespace vixl + +namespace js { +namespace jit { + +class SimulatorProcess +{ + public: + static SimulatorProcess* singleton_; + + SimulatorProcess() + : lock_(mutexid::Arm64SimulatorLock) + , redirection_(nullptr) + {} + + // Synchronizes access between main thread and compilation threads. + js::Mutex lock_ MOZ_UNANNOTATED; + vixl::Redirection* redirection_; + +#ifdef JS_CACHE_SIMULATOR_ARM64 + // For each simulator, record what other thread registered as instruction + // being invalidated. + struct ICacheFlush { + void* start; + size_t length; + }; + using ICacheFlushes = mozilla::Vector<ICacheFlush, 2>; + struct SimFlushes { + vixl::Simulator* thread; + ICacheFlushes records; + }; + mozilla::Vector<SimFlushes, 1> pendingFlushes_; + + static void recordICacheFlush(void* start, size_t length); + static void membarrier(); + static ICacheFlushes& getICacheFlushes(vixl::Simulator* sim); + [[nodiscard]] static bool registerSimulator(vixl::Simulator* sim); + static void unregisterSimulator(vixl::Simulator* sim); +#endif + + static void setRedirection(vixl::Redirection* redirection) { + singleton_->lock_.assertOwnedByCurrentThread(); + singleton_->redirection_ = redirection; + } + + static vixl::Redirection* redirection() { + singleton_->lock_.assertOwnedByCurrentThread(); + return singleton_->redirection_; + } + + static bool initialize() { + singleton_ = js_new<SimulatorProcess>(); + return !!singleton_; + } + static void destroy() { + js_delete(singleton_); + singleton_ = nullptr; + } +}; + +// Protects the icache and redirection properties of the simulator. +class AutoLockSimulatorCache : public js::LockGuard<js::Mutex> +{ + using Base = js::LockGuard<js::Mutex>; + + public: + explicit AutoLockSimulatorCache() + : Base(SimulatorProcess::singleton_->lock_) + { + } +}; + +} // namespace jit +} // namespace js + +#endif // JS_SIMULATOR_ARM64 +#endif // VIXL_A64_SIMULATOR_A64_H_ diff --git a/js/src/jit/arm64/vixl/Utils-vixl.cpp b/js/src/jit/arm64/vixl/Utils-vixl.cpp new file mode 100644 index 0000000000..381c3501d1 --- /dev/null +++ b/js/src/jit/arm64/vixl/Utils-vixl.cpp @@ -0,0 +1,555 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "jit/arm64/vixl/Utils-vixl.h" + +#include <cstdio> + +namespace vixl { + +// The default NaN values (for FPCR.DN=1). +const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000)); +const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000); +const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00); + +// Floating-point zero values. +const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0); +const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000); + +// Floating-point infinity values. +const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00); +const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00); +const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000); +const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000); +const double kFP64PositiveInfinity = + RawbitsToDouble(UINT64_C(0x7ff0000000000000)); +const double kFP64NegativeInfinity = + RawbitsToDouble(UINT64_C(0xfff0000000000000)); + +bool IsZero(Float16 value) { + uint16_t bits = Float16ToRawbits(value); + return (bits == Float16ToRawbits(kFP16PositiveZero) || + bits == Float16ToRawbits(kFP16NegativeZero)); +} + +uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; } + +uint32_t FloatToRawbits(float value) { + uint32_t bits = 0; + memcpy(&bits, &value, 4); + return bits; +} + + +uint64_t DoubleToRawbits(double value) { + uint64_t bits = 0; + memcpy(&bits, &value, 8); + return bits; +} + + +Float16 RawbitsToFloat16(uint16_t bits) { + Float16 f; + f.rawbits_ = bits; + return f; +} + + +float RawbitsToFloat(uint32_t bits) { + float value = 0.0; + memcpy(&value, &bits, 4); + return value; +} + + +double RawbitsToDouble(uint64_t bits) { + double value = 0.0; + memcpy(&value, &bits, 8); + return value; +} + + +uint32_t Float16Sign(internal::SimFloat16 val) { + uint16_t rawbits = Float16ToRawbits(val); + return ExtractUnsignedBitfield32(15, 15, rawbits); +} + + +uint32_t Float16Exp(internal::SimFloat16 val) { + uint16_t rawbits = Float16ToRawbits(val); + return ExtractUnsignedBitfield32(14, 10, rawbits); +} + +uint32_t Float16Mantissa(internal::SimFloat16 val) { + uint16_t rawbits = Float16ToRawbits(val); + return ExtractUnsignedBitfield32(9, 0, rawbits); +} + + +uint32_t FloatSign(float val) { + uint32_t rawbits = FloatToRawbits(val); + return ExtractUnsignedBitfield32(31, 31, rawbits); +} + + +uint32_t FloatExp(float val) { + uint32_t rawbits = FloatToRawbits(val); + return ExtractUnsignedBitfield32(30, 23, rawbits); +} + + +uint32_t FloatMantissa(float val) { + uint32_t rawbits = FloatToRawbits(val); + return ExtractUnsignedBitfield32(22, 0, rawbits); +} + + +uint32_t DoubleSign(double val) { + uint64_t rawbits = DoubleToRawbits(val); + return static_cast<uint32_t>(ExtractUnsignedBitfield64(63, 63, rawbits)); +} + + +uint32_t DoubleExp(double val) { + uint64_t rawbits = DoubleToRawbits(val); + return static_cast<uint32_t>(ExtractUnsignedBitfield64(62, 52, rawbits)); +} + + +uint64_t DoubleMantissa(double val) { + uint64_t rawbits = DoubleToRawbits(val); + return ExtractUnsignedBitfield64(51, 0, rawbits); +} + + +internal::SimFloat16 Float16Pack(uint16_t sign, + uint16_t exp, + uint16_t mantissa) { + uint16_t bits = (sign << 15) | (exp << 10) | mantissa; + return RawbitsToFloat16(bits); +} + + +float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) { + uint32_t bits = (sign << 31) | (exp << 23) | mantissa; + return RawbitsToFloat(bits); +} + + +double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) { + uint64_t bits = (sign << 63) | (exp << 52) | mantissa; + return RawbitsToDouble(bits); +} + + +int Float16Classify(Float16 value) { + uint16_t bits = Float16ToRawbits(value); + uint16_t exponent_max = (1 << 5) - 1; + uint16_t exponent_mask = exponent_max << 10; + uint16_t mantissa_mask = (1 << 10) - 1; + + uint16_t exponent = (bits & exponent_mask) >> 10; + uint16_t mantissa = bits & mantissa_mask; + if (exponent == 0) { + if (mantissa == 0) { + return FP_ZERO; + } + return FP_SUBNORMAL; + } else if (exponent == exponent_max) { + if (mantissa == 0) { + return FP_INFINITE; + } + return FP_NAN; + } + return FP_NORMAL; +} + + +unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) { + VIXL_ASSERT((reg_size % 8) == 0); + int count = 0; + for (unsigned i = 0; i < (reg_size / 16); i++) { + if ((imm & 0xffff) == 0) { + count++; + } + imm >>= 16; + } + return count; +} + + +int BitCount(uint64_t value) { return CountSetBits(value); } + +// Float16 definitions. + +Float16::Float16(double dvalue) { + rawbits_ = + Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN)); +} + +namespace internal { + +SimFloat16 SimFloat16::operator-() const { + return RawbitsToFloat16(rawbits_ ^ 0x8000); +} + +// SimFloat16 definitions. +SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const { + return static_cast<double>(*this) + static_cast<double>(rhs); +} + +SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const { + return static_cast<double>(*this) - static_cast<double>(rhs); +} + +SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const { + return static_cast<double>(*this) * static_cast<double>(rhs); +} + +SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const { + return static_cast<double>(*this) / static_cast<double>(rhs); +} + +bool SimFloat16::operator<(SimFloat16 rhs) const { + return static_cast<double>(*this) < static_cast<double>(rhs); +} + +bool SimFloat16::operator>(SimFloat16 rhs) const { + return static_cast<double>(*this) > static_cast<double>(rhs); +} + +bool SimFloat16::operator==(SimFloat16 rhs) const { + if (IsNaN(*this) || IsNaN(rhs)) { + return false; + } else if (IsZero(rhs) && IsZero(*this)) { + // +0 and -0 should be treated as equal. + return true; + } + return this->rawbits_ == rhs.rawbits_; +} + +bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); } + +bool SimFloat16::operator==(double rhs) const { + return static_cast<double>(*this) == static_cast<double>(rhs); +} + +SimFloat16::operator double() const { + return FPToDouble(*this, kIgnoreDefaultNaN); +} + +Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); } + +} // namespace internal + +float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) { + uint16_t bits = Float16ToRawbits(value); + uint32_t sign = bits >> 15; + uint32_t exponent = + ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1, + kFloat16MantissaBits, + bits); + uint32_t mantissa = + ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits); + + switch (Float16Classify(value)) { + case FP_ZERO: + return (sign == 0) ? 0.0f : -0.0f; + + case FP_INFINITE: + return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; + + case FP_SUBNORMAL: { + // Calculate shift required to put mantissa into the most-significant bits + // of the destination mantissa. + int shift = CountLeadingZeros(mantissa << (32 - 10)); + + // Shift mantissa and discard implicit '1'. + mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; + mantissa &= (1 << kFloatMantissaBits) - 1; + + // Adjust the exponent for the shift applied, and rebias. + exponent = exponent - shift + (-15 + 127); + break; + } + + case FP_NAN: + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP32DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred entirely, except that the top + // bit is forced to '1', making the result a quiet NaN. The unused + // (low-order) payload bits are set to 0. + exponent = (1 << kFloatExponentBits) - 1; + + // Increase bits in mantissa, making low-order bits 0. + mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); + mantissa |= 1 << 22; // Force a quiet NaN. + break; + + case FP_NORMAL: + // Increase bits in mantissa, making low-order bits 0. + mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); + + // Change exponent bias. + exponent += (-15 + 127); + break; + + default: + VIXL_UNREACHABLE(); + } + return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) | + mantissa); +} + + +float FPToFloat(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception) { + // Only the FPTieEven rounding mode is implemented. + VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); + USE(round_mode); + + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP32DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred as much as possible, except + // that the top bit is forced to '1', making the result a quiet NaN. + uint64_t raw = DoubleToRawbits(value); + + uint32_t sign = raw >> 63; + uint32_t exponent = (1 << 8) - 1; + uint32_t payload = + static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw)); + payload |= (1 << 22); // Force a quiet NaN. + + return RawbitsToFloat((sign << 31) | (exponent << 23) | payload); + } + + case FP_ZERO: + case FP_INFINITE: { + // In a C++ cast, any value representable in the target type will be + // unchanged. This is always the case for +/-0.0 and infinities. + return static_cast<float>(value); + } + + case FP_NORMAL: + case FP_SUBNORMAL: { + // Convert double-to-float as the processor would, assuming that FPCR.FZ + // (flush-to-zero) is not set. + uint64_t raw = DoubleToRawbits(value); + // Extract the IEEE-754 double components. + uint32_t sign = raw >> 63; + // Extract the exponent and remove the IEEE-754 encoding bias. + int32_t exponent = + static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023; + // Extract the mantissa and add the implicit '1' bit. + uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); + if (std::fpclassify(value) == FP_NORMAL) { + mantissa |= (UINT64_C(1) << 52); + } + return FPRoundToFloat(sign, exponent, mantissa, round_mode); + } + } + + VIXL_UNREACHABLE(); + return value; +} + +// TODO: We should consider implementing a full FPToDouble(Float16) +// conversion function (for performance reasons). +double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) { + // We can rely on implicit float to double conversion here. + return FPToFloat(value, DN, exception); +} + + +double FPToDouble(float value, UseDefaultNaN DN, bool* exception) { + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP64DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred entirely, except that the top + // bit is forced to '1', making the result a quiet NaN. The unused + // (low-order) payload bits are set to 0. + uint32_t raw = FloatToRawbits(value); + + uint64_t sign = raw >> 31; + uint64_t exponent = (1 << 11) - 1; + uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw); + payload <<= (52 - 23); // The unused low-order bits should be 0. + payload |= (UINT64_C(1) << 51); // Force a quiet NaN. + + return RawbitsToDouble((sign << 63) | (exponent << 52) | payload); + } + + case FP_ZERO: + case FP_NORMAL: + case FP_SUBNORMAL: + case FP_INFINITE: { + // All other inputs are preserved in a standard cast, because every value + // representable using an IEEE-754 float is also representable using an + // IEEE-754 double. + return static_cast<double>(value); + } + } + + VIXL_UNREACHABLE(); + return static_cast<double>(value); +} + + +Float16 FPToFloat16(float value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception) { + // Only the FPTieEven rounding mode is implemented. + VIXL_ASSERT(round_mode == FPTieEven); + USE(round_mode); + + uint32_t raw = FloatToRawbits(value); + int32_t sign = raw >> 31; + int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127; + uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw); + + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP16DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred as much as possible, except + // that the top bit is forced to '1', making the result a quiet NaN. + uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity) + : Float16ToRawbits(kFP16NegativeInfinity); + result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); + result |= (1 << 9); // Force a quiet NaN; + return RawbitsToFloat16(result); + } + + case FP_ZERO: + return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero; + + case FP_INFINITE: + return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; + + case FP_NORMAL: + case FP_SUBNORMAL: { + // Convert float-to-half as the processor would, assuming that FPCR.FZ + // (flush-to-zero) is not set. + + // Add the implicit '1' bit to the mantissa. + mantissa += (1 << 23); + return FPRoundToFloat16(sign, exponent, mantissa, round_mode); + } + } + + VIXL_UNREACHABLE(); + return kFP16PositiveZero; +} + + +Float16 FPToFloat16(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception) { + // Only the FPTieEven rounding mode is implemented. + VIXL_ASSERT(round_mode == FPTieEven); + USE(round_mode); + + uint64_t raw = DoubleToRawbits(value); + int32_t sign = raw >> 63; + int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023; + uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); + + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP16DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred as much as possible, except + // that the top bit is forced to '1', making the result a quiet NaN. + uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity) + : Float16ToRawbits(kFP16NegativeInfinity); + result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); + result |= (1 << 9); // Force a quiet NaN; + return RawbitsToFloat16(result); + } + + case FP_ZERO: + return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero; + + case FP_INFINITE: + return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; + case FP_NORMAL: + case FP_SUBNORMAL: { + // Convert double-to-half as the processor would, assuming that FPCR.FZ + // (flush-to-zero) is not set. + + // Add the implicit '1' bit to the mantissa. + mantissa += (UINT64_C(1) << 52); + return FPRoundToFloat16(sign, exponent, mantissa, round_mode); + } + } + + VIXL_UNREACHABLE(); + return kFP16PositiveZero; +} + +} // namespace vixl diff --git a/js/src/jit/arm64/vixl/Utils-vixl.h b/js/src/jit/arm64/vixl/Utils-vixl.h new file mode 100644 index 0000000000..d1f6a835f8 --- /dev/null +++ b/js/src/jit/arm64/vixl/Utils-vixl.h @@ -0,0 +1,1283 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_UTILS_H +#define VIXL_UTILS_H + +#include "mozilla/FloatingPoint.h" + +#include <cmath> +#include <cstring> +#include <limits> +#include <vector> + +#include "jit/arm64/vixl/CompilerIntrinsics-vixl.h" +#include "jit/arm64/vixl/Globals-vixl.h" + +namespace vixl { + +// Macros for compile-time format checking. +#if GCC_VERSION_OR_NEWER(4, 4, 0) +#define PRINTF_CHECK(format_index, varargs_index) \ + __attribute__((format(gnu_printf, format_index, varargs_index))) +#else +#define PRINTF_CHECK(format_index, varargs_index) +#endif + +#ifdef __GNUC__ +#define VIXL_HAS_DEPRECATED_WITH_MSG +#elif defined(__clang__) +#ifdef __has_extension +#define VIXL_HAS_DEPRECATED_WITH_MSG +#endif +#endif + +#ifdef VIXL_HAS_DEPRECATED_WITH_MSG +#define VIXL_DEPRECATED(replaced_by, declarator) \ + __attribute__((deprecated("Use \"" replaced_by "\" instead"))) declarator +#else +#define VIXL_DEPRECATED(replaced_by, declarator) declarator +#endif + +#ifdef VIXL_DEBUG +#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_UNREACHABLE() +#else +#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH() +#endif + +template <typename T, size_t n> +size_t ArrayLength(const T (&)[n]) { + return n; +} + +// Check number width. +// TODO: Refactor these using templates. +inline bool IsIntN(unsigned n, uint32_t x) { + VIXL_ASSERT((0 < n) && (n < 32)); + uint32_t limit = UINT32_C(1) << (n - 1); + return x < limit; +} +inline bool IsIntN(unsigned n, int32_t x) { + VIXL_ASSERT((0 < n) && (n < 32)); + int32_t limit = INT32_C(1) << (n - 1); + return (-limit <= x) && (x < limit); +} +inline bool IsIntN(unsigned n, uint64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + uint64_t limit = UINT64_C(1) << (n - 1); + return x < limit; +} +inline bool IsIntN(unsigned n, int64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + int64_t limit = INT64_C(1) << (n - 1); + return (-limit <= x) && (x < limit); +} +VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) { + return IsIntN(n, x); +} + +inline bool IsUintN(unsigned n, uint32_t x) { + VIXL_ASSERT((0 < n) && (n < 32)); + return !(x >> n); +} +inline bool IsUintN(unsigned n, int32_t x) { + VIXL_ASSERT((0 < n) && (n < 32)); + // Convert to an unsigned integer to avoid implementation-defined behavior. + return !(static_cast<uint32_t>(x) >> n); +} +inline bool IsUintN(unsigned n, uint64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + return !(x >> n); +} +inline bool IsUintN(unsigned n, int64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + // Convert to an unsigned integer to avoid implementation-defined behavior. + return !(static_cast<uint64_t>(x) >> n); +} +VIXL_DEPRECATED("IsUintN", inline bool is_uintn(unsigned n, int64_t x)) { + return IsUintN(n, x); +} + +inline uint64_t TruncateToUintN(unsigned n, uint64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + return static_cast<uint64_t>(x) & ((UINT64_C(1) << n) - 1); +} +VIXL_DEPRECATED("TruncateToUintN", + inline uint64_t truncate_to_intn(unsigned n, int64_t x)) { + return TruncateToUintN(n, x); +} + +// clang-format off +#define INT_1_TO_32_LIST(V) \ +V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \ +V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \ +V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \ +V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32) + +#define INT_33_TO_63_LIST(V) \ +V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \ +V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \ +V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \ +V(57) V(58) V(59) V(60) V(61) V(62) V(63) + +#define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V) + +// clang-format on + +#define DECLARE_IS_INT_N(N) \ + inline bool IsInt##N(int64_t x) { return IsIntN(N, x); } \ + VIXL_DEPRECATED("IsInt" #N, inline bool is_int##N(int64_t x)) { \ + return IsIntN(N, x); \ + } + +#define DECLARE_IS_UINT_N(N) \ + inline bool IsUint##N(int64_t x) { return IsUintN(N, x); } \ + VIXL_DEPRECATED("IsUint" #N, inline bool is_uint##N(int64_t x)) { \ + return IsUintN(N, x); \ + } + +#define DECLARE_TRUNCATE_TO_UINT_32(N) \ + inline uint32_t TruncateToUint##N(uint64_t x) { \ + return static_cast<uint32_t>(TruncateToUintN(N, x)); \ + } \ + VIXL_DEPRECATED("TruncateToUint" #N, \ + inline uint32_t truncate_to_int##N(int64_t x)) { \ + return TruncateToUint##N(x); \ + } + +INT_1_TO_63_LIST(DECLARE_IS_INT_N) +INT_1_TO_63_LIST(DECLARE_IS_UINT_N) +INT_1_TO_32_LIST(DECLARE_TRUNCATE_TO_UINT_32) + +#undef DECLARE_IS_INT_N +#undef DECLARE_IS_UINT_N +#undef DECLARE_TRUNCATE_TO_INT_N + +// Bit field extraction. +inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) { + VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && + (msb >= lsb)); + if ((msb == 63) && (lsb == 0)) return x; + return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1); +} + + +inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) { + VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && + (msb >= lsb)); + return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x)); +} + + +inline int64_t ExtractSignedBitfield64(int msb, int lsb, int64_t x) { + VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && + (msb >= lsb)); + uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x); + // If the highest extracted bit is set, sign extend. + if ((temp >> (msb - lsb)) == 1) { + temp |= ~UINT64_C(0) << (msb - lsb); + } + int64_t result; + memcpy(&result, &temp, sizeof(result)); + return result; +} + + +inline int32_t ExtractSignedBitfield32(int msb, int lsb, int32_t x) { + VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && + (msb >= lsb)); + uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x)); + int32_t result; + memcpy(&result, &temp, sizeof(result)); + return result; +} + + +inline uint64_t RotateRight(uint64_t value, + unsigned int rotate, + unsigned int width) { + VIXL_ASSERT((width > 0) && (width <= 64)); + uint64_t width_mask = ~UINT64_C(0) >> (64 - width); + rotate &= 63; + if (rotate > 0) { + value &= width_mask; + value = (value << (width - rotate)) | (value >> rotate); + } + return value & width_mask; +} + + +// Wrapper class for passing FP16 values through the assembler. +// This is purely to aid with type checking/casting. +class Float16 { + public: + explicit Float16(double dvalue); + Float16() : rawbits_(0x0) {} + friend uint16_t Float16ToRawbits(Float16 value); + friend Float16 RawbitsToFloat16(uint16_t bits); + + protected: + uint16_t rawbits_; +}; + +// Floating point representation. +uint16_t Float16ToRawbits(Float16 value); + + +uint32_t FloatToRawbits(float value); +VIXL_DEPRECATED("FloatToRawbits", + inline uint32_t float_to_rawbits(float value)) { + return FloatToRawbits(value); +} + +uint64_t DoubleToRawbits(double value); +VIXL_DEPRECATED("DoubleToRawbits", + inline uint64_t double_to_rawbits(double value)) { + return DoubleToRawbits(value); +} + +Float16 RawbitsToFloat16(uint16_t bits); + +float RawbitsToFloat(uint32_t bits); +VIXL_DEPRECATED("RawbitsToFloat", + inline float rawbits_to_float(uint32_t bits)) { + return RawbitsToFloat(bits); +} + +double RawbitsToDouble(uint64_t bits); +VIXL_DEPRECATED("RawbitsToDouble", + inline double rawbits_to_double(uint64_t bits)) { + return RawbitsToDouble(bits); +} + +namespace internal { + +// Internal simulation class used solely by the simulator to +// provide an abstraction layer for any half-precision arithmetic. +class SimFloat16 : public Float16 { + public: + // TODO: We should investigate making this constructor explicit. + // This is currently difficult to do due to a number of templated + // functions in the simulator which rely on returning double values. + SimFloat16(double dvalue) : Float16(dvalue) {} // NOLINT(runtime/explicit) + SimFloat16(Float16 f) { // NOLINT(runtime/explicit) + this->rawbits_ = Float16ToRawbits(f); + } + SimFloat16() : Float16() {} + SimFloat16 operator-() const; + SimFloat16 operator+(SimFloat16 rhs) const; + SimFloat16 operator-(SimFloat16 rhs) const; + SimFloat16 operator*(SimFloat16 rhs) const; + SimFloat16 operator/(SimFloat16 rhs) const; + bool operator<(SimFloat16 rhs) const; + bool operator>(SimFloat16 rhs) const; + bool operator==(SimFloat16 rhs) const; + bool operator!=(SimFloat16 rhs) const; + // This is necessary for conversions peformed in (macro asm) Fmov. + bool operator==(double rhs) const; + operator double() const; +}; +} // namespace internal + +uint32_t Float16Sign(internal::SimFloat16 value); + +uint32_t Float16Exp(internal::SimFloat16 value); + +uint32_t Float16Mantissa(internal::SimFloat16 value); + +uint32_t FloatSign(float value); +VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) { + return FloatSign(value); +} + +uint32_t FloatExp(float value); +VIXL_DEPRECATED("FloatExp", inline uint32_t float_exp(float value)) { + return FloatExp(value); +} + +uint32_t FloatMantissa(float value); +VIXL_DEPRECATED("FloatMantissa", inline uint32_t float_mantissa(float value)) { + return FloatMantissa(value); +} + +uint32_t DoubleSign(double value); +VIXL_DEPRECATED("DoubleSign", inline uint32_t double_sign(double value)) { + return DoubleSign(value); +} + +uint32_t DoubleExp(double value); +VIXL_DEPRECATED("DoubleExp", inline uint32_t double_exp(double value)) { + return DoubleExp(value); +} + +uint64_t DoubleMantissa(double value); +VIXL_DEPRECATED("DoubleMantissa", + inline uint64_t double_mantissa(double value)) { + return DoubleMantissa(value); +} + +internal::SimFloat16 Float16Pack(uint16_t sign, + uint16_t exp, + uint16_t mantissa); + +float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa); +VIXL_DEPRECATED("FloatPack", + inline float float_pack(uint32_t sign, + uint32_t exp, + uint32_t mantissa)) { + return FloatPack(sign, exp, mantissa); +} + +double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa); +VIXL_DEPRECATED("DoublePack", + inline double double_pack(uint32_t sign, + uint32_t exp, + uint64_t mantissa)) { + return DoublePack(sign, exp, mantissa); +} + +// An fpclassify() function for 16-bit half-precision floats. +int Float16Classify(Float16 value); +VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) { + return Float16Classify(RawbitsToFloat16(value)); +} + +bool IsZero(Float16 value); + +inline bool IsNaN(float value) { return std::isnan(value); } + +inline bool IsNaN(double value) { return std::isnan(value); } + +inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; } + +inline bool IsInf(float value) { return std::isinf(value); } + +inline bool IsInf(double value) { return std::isinf(value); } + +inline bool IsInf(Float16 value) { + return Float16Classify(value) == FP_INFINITE; +} + + +// NaN tests. +inline bool IsSignallingNaN(double num) { + const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); + uint64_t raw = DoubleToRawbits(num); + if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) { + return true; + } + return false; +} + + +inline bool IsSignallingNaN(float num) { + const uint32_t kFP32QuietNaNMask = 0x00400000; + uint32_t raw = FloatToRawbits(num); + if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) { + return true; + } + return false; +} + + +inline bool IsSignallingNaN(Float16 num) { + const uint16_t kFP16QuietNaNMask = 0x0200; + return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0); +} + + +template <typename T> +inline bool IsQuietNaN(T num) { + return IsNaN(num) && !IsSignallingNaN(num); +} + + +// Convert the NaN in 'num' to a quiet NaN. +inline double ToQuietNaN(double num) { + const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); + VIXL_ASSERT(IsNaN(num)); + return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask); +} + + +inline float ToQuietNaN(float num) { + const uint32_t kFP32QuietNaNMask = 0x00400000; + VIXL_ASSERT(IsNaN(num)); + return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask); +} + + +inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) { + const uint16_t kFP16QuietNaNMask = 0x0200; + VIXL_ASSERT(IsNaN(num)); + return internal::SimFloat16( + RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask)); +} + + +// Fused multiply-add. +inline double FusedMultiplyAdd(double op1, double op2, double a) { + return fma(op1, op2, a); +} + + +inline float FusedMultiplyAdd(float op1, float op2, float a) { + return fmaf(op1, op2, a); +} + + +inline uint64_t LowestSetBit(uint64_t value) { return value & -value; } + + +template <typename T> +inline int HighestSetBitPosition(T value) { + VIXL_ASSERT(value != 0); + return (sizeof(value) * 8 - 1) - CountLeadingZeros(value); +} + + +template <typename V> +inline int WhichPowerOf2(V value) { + VIXL_ASSERT(IsPowerOf2(value)); + return CountTrailingZeros(value); +} + + +unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size); + + +int BitCount(uint64_t value); + + +template <typename T> +T ReverseBits(T value) { + VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || + (sizeof(value) == 4) || (sizeof(value) == 8)); + T result = 0; + for (unsigned i = 0; i < (sizeof(value) * 8); i++) { + result = (result << 1) | (value & 1); + value >>= 1; + } + return result; +} + + +template <typename T> +inline T SignExtend(T val, int bitSize) { + VIXL_ASSERT(bitSize > 0); + T mask = (T(2) << (bitSize - 1)) - T(1); + val &= mask; + T sign_bits = -((val >> (bitSize - 1)) << bitSize); + val |= sign_bits; + return val; +} + + +template <typename T> +T ReverseBytes(T value, int block_bytes_log2) { + VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8)); + VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value)); + // Split the 64-bit value into an 8-bit array, where b[0] is the least + // significant byte, and b[7] is the most significant. + uint8_t bytes[8]; + uint64_t mask = UINT64_C(0xff00000000000000); + for (int i = 7; i >= 0; i--) { + bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8); + mask >>= 8; + } + + // Permutation tables for REV instructions. + // permute_table[0] is used by REV16_x, REV16_w + // permute_table[1] is used by REV32_x, REV_w + // permute_table[2] is used by REV_x + VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4)); + static const uint8_t permute_table[3][8] = {{6, 7, 4, 5, 2, 3, 0, 1}, + {4, 5, 6, 7, 0, 1, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 7}}; + uint64_t temp = 0; + for (int i = 0; i < 8; i++) { + temp <<= 8; + temp |= bytes[permute_table[block_bytes_log2 - 1][i]]; + } + + T result; + VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(temp)); + memcpy(&result, &temp, sizeof(result)); + return result; +} + +template <unsigned MULTIPLE, typename T> +inline bool IsMultiple(T value) { + VIXL_ASSERT(IsPowerOf2(MULTIPLE)); + return (value & (MULTIPLE - 1)) == 0; +} + +template <typename T> +inline bool IsMultiple(T value, unsigned multiple) { + VIXL_ASSERT(IsPowerOf2(multiple)); + return (value & (multiple - 1)) == 0; +} + +template <typename T> +inline bool IsAligned(T pointer, int alignment) { + VIXL_ASSERT(IsPowerOf2(alignment)); + return (pointer & (alignment - 1)) == 0; +} + +// Pointer alignment +// TODO: rename/refactor to make it specific to instructions. +template <unsigned ALIGN, typename T> +inline bool IsAligned(T pointer) { + VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t)); // NOLINT(runtime/sizeof) + // Use C-style casts to get static_cast behaviour for integral types (T), and + // reinterpret_cast behaviour for other types. + return IsAligned((intptr_t)(pointer), ALIGN); +} + +template <typename T> +bool IsWordAligned(T pointer) { + return IsAligned<4>(pointer); +} + +// Increment a pointer until it has the specified alignment. The alignment must +// be a power of two. +template <class T> +T AlignUp(T pointer, + typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) { + VIXL_ASSERT(IsPowerOf2(alignment)); + // Use C-style casts to get static_cast behaviour for integral types (T), and + // reinterpret_cast behaviour for other types. + + typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = + (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); + + size_t mask = alignment - 1; + T result = (T)((pointer_raw + mask) & ~mask); + VIXL_ASSERT(result >= pointer); + + return result; +} + +// Decrement a pointer until it has the specified alignment. The alignment must +// be a power of two. +template <class T> +T AlignDown(T pointer, + typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) { + VIXL_ASSERT(IsPowerOf2(alignment)); + // Use C-style casts to get static_cast behaviour for integral types (T), and + // reinterpret_cast behaviour for other types. + + typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = + (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); + + size_t mask = alignment - 1; + return (T)(pointer_raw & ~mask); +} + + +template <typename T> +inline T ExtractBit(T value, unsigned bit) { + return (value >> bit) & T(1); +} + +template <typename Ts, typename Td> +inline Td ExtractBits(Ts value, int least_significant_bit, Td mask) { + return Td((value >> least_significant_bit) & Ts(mask)); +} + +template <typename Ts, typename Td> +inline void AssignBit(Td& dst, // NOLINT(runtime/references) + int bit, + Ts value) { + VIXL_ASSERT((value == Ts(0)) || (value == Ts(1))); + VIXL_ASSERT(bit >= 0); + VIXL_ASSERT(bit < static_cast<int>(sizeof(Td) * 8)); + Td mask(1); + dst &= ~(mask << bit); + dst |= Td(value) << bit; +} + +template <typename Td, typename Ts> +inline void AssignBits(Td& dst, // NOLINT(runtime/references) + int least_significant_bit, + Ts mask, + Ts value) { + VIXL_ASSERT(least_significant_bit >= 0); + VIXL_ASSERT(least_significant_bit < static_cast<int>(sizeof(Td) * 8)); + VIXL_ASSERT(((Td(mask) << least_significant_bit) >> least_significant_bit) == + Td(mask)); + VIXL_ASSERT((value & mask) == value); + dst &= ~(Td(mask) << least_significant_bit); + dst |= Td(value) << least_significant_bit; +} + +class VFP { + public: + static uint32_t FP32ToImm8(float imm) { + // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 + uint32_t bits = FloatToRawbits(imm); + // bit7: a000.0000 + uint32_t bit7 = ((bits >> 31) & 0x1) << 7; + // bit6: 0b00.0000 + uint32_t bit6 = ((bits >> 29) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint32_t bit5_to_0 = (bits >> 19) & 0x3f; + return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); + } + static uint32_t FP64ToImm8(double imm) { + // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 + uint64_t bits = DoubleToRawbits(imm); + // bit7: a000.0000 + uint64_t bit7 = ((bits >> 63) & 0x1) << 7; + // bit6: 0b00.0000 + uint64_t bit6 = ((bits >> 61) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint64_t bit5_to_0 = (bits >> 48) & 0x3f; + + return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); + } + static float Imm8ToFP32(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint32_t bit7 = (bits >> 7) & 0x1; + uint32_t bit6 = (bits >> 6) & 0x1; + uint32_t bit5_to_0 = bits & 0x3f; + uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19); + + return RawbitsToFloat(result); + } + static double Imm8ToFP64(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint64_t bit7 = (bits >> 7) & 0x1; + uint64_t bit6 = (bits >> 6) & 0x1; + uint64_t bit5_to_0 = bits & 0x3f; + uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48); + return RawbitsToDouble(result); + } + static bool IsImmFP32(float imm) { + // Valid values will have the form: + // aBbb.bbbc.defg.h000.0000.0000.0000.0000 + uint32_t bits = FloatToRawbits(imm); + // bits[19..0] are cleared. + if ((bits & 0x7ffff) != 0) { + return false; + } + + + // bits[29..25] are all set or all cleared. + uint32_t b_pattern = (bits >> 16) & 0x3e00; + if (b_pattern != 0 && b_pattern != 0x3e00) { + return false; + } + // bit[30] and bit[29] are opposite. + if (((bits ^ (bits << 1)) & 0x40000000) == 0) { + return false; + } + return true; + } + static bool IsImmFP64(double imm) { + // Valid values will have the form: + // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 + uint64_t bits = DoubleToRawbits(imm); + // bits[47..0] are cleared. + if ((bits & 0x0000ffffffffffff) != 0) { + return false; + } + // bits[61..54] are all set or all cleared. + uint32_t b_pattern = (bits >> 48) & 0x3fc0; + if ((b_pattern != 0) && (b_pattern != 0x3fc0)) { + return false; + } + // bit[62] and bit[61] are opposite. + if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) { + return false; + } + return true; + } +}; + +class BitField { + // ForEachBitHelper is a functor that will call + // bool ForEachBitHelper::execute(ElementType id) const + // and expects a boolean in return whether to continue (if true) + // or stop (if false) + // check_set will check if the bits are on (true) or off(false) + template <typename ForEachBitHelper, bool check_set> + bool ForEachBit(const ForEachBitHelper& helper) { + for (int i = 0; static_cast<size_t>(i) < bitfield_.size(); i++) { + if (bitfield_[i] == check_set) + if (!helper.execute(i)) return false; + } + return true; + } + + public: + explicit BitField(unsigned size) : bitfield_(size, 0) {} + + void Set(int i) { + VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size())); + bitfield_[i] = true; + } + + void Unset(int i) { + VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size())); + bitfield_[i] = true; + } + + bool IsSet(int i) const { return bitfield_[i]; } + + // For each bit not set in the bitfield call the execute functor + // execute. + // ForEachBitSetHelper::execute returns true if the iteration through + // the bits can continue, otherwise it will stop. + // struct ForEachBitSetHelper { + // bool execute(int /*id*/) { return false; } + // }; + template <typename ForEachBitNotSetHelper> + bool ForEachBitNotSet(const ForEachBitNotSetHelper& helper) { + return ForEachBit<ForEachBitNotSetHelper, false>(helper); + } + + // For each bit set in the bitfield call the execute functor + // execute. + template <typename ForEachBitSetHelper> + bool ForEachBitSet(const ForEachBitSetHelper& helper) { + return ForEachBit<ForEachBitSetHelper, true>(helper); + } + + private: + std::vector<bool> bitfield_; +}; + +namespace internal { + +typedef int64_t Int64; +class Uint64; +class Uint128; + +class Uint32 { + uint32_t data_; + + public: + // Unlike uint32_t, Uint32 has a default constructor. + Uint32() { data_ = 0; } + explicit Uint32(uint32_t data) : data_(data) {} + inline explicit Uint32(Uint64 data); + uint32_t Get() const { return data_; } + template <int N> + int32_t GetSigned() const { + return ExtractSignedBitfield32(N - 1, 0, data_); + } + int32_t GetSigned() const { return data_; } + Uint32 operator~() const { return Uint32(~data_); } + Uint32 operator-() const { return Uint32(-data_); } + bool operator==(Uint32 value) const { return data_ == value.data_; } + bool operator!=(Uint32 value) const { return data_ != value.data_; } + bool operator>(Uint32 value) const { return data_ > value.data_; } + Uint32 operator+(Uint32 value) const { return Uint32(data_ + value.data_); } + Uint32 operator-(Uint32 value) const { return Uint32(data_ - value.data_); } + Uint32 operator&(Uint32 value) const { return Uint32(data_ & value.data_); } + Uint32 operator&=(Uint32 value) { + data_ &= value.data_; + return *this; + } + Uint32 operator^(Uint32 value) const { return Uint32(data_ ^ value.data_); } + Uint32 operator^=(Uint32 value) { + data_ ^= value.data_; + return *this; + } + Uint32 operator|(Uint32 value) const { return Uint32(data_ | value.data_); } + Uint32 operator|=(Uint32 value) { + data_ |= value.data_; + return *this; + } + // Unlike uint32_t, the shift functions can accept negative shift and + // return 0 when the shift is too big. + Uint32 operator>>(int shift) const { + if (shift == 0) return *this; + if (shift < 0) { + int tmp = -shift; + if (tmp >= 32) return Uint32(0); + return Uint32(data_ << tmp); + } + int tmp = shift; + if (tmp >= 32) return Uint32(0); + return Uint32(data_ >> tmp); + } + Uint32 operator<<(int shift) const { + if (shift == 0) return *this; + if (shift < 0) { + int tmp = -shift; + if (tmp >= 32) return Uint32(0); + return Uint32(data_ >> tmp); + } + int tmp = shift; + if (tmp >= 32) return Uint32(0); + return Uint32(data_ << tmp); + } +}; + +class Uint64 { + uint64_t data_; + + public: + // Unlike uint64_t, Uint64 has a default constructor. + Uint64() { data_ = 0; } + explicit Uint64(uint64_t data) : data_(data) {} + explicit Uint64(Uint32 data) : data_(data.Get()) {} + inline explicit Uint64(Uint128 data); + uint64_t Get() const { return data_; } + int64_t GetSigned(int N) const { + return ExtractSignedBitfield64(N - 1, 0, data_); + } + int64_t GetSigned() const { return data_; } + Uint32 ToUint32() const { + VIXL_ASSERT((data_ >> 32) == 0); + return Uint32(static_cast<uint32_t>(data_)); + } + Uint32 GetHigh32() const { return Uint32(data_ >> 32); } + Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); } + Uint64 operator~() const { return Uint64(~data_); } + Uint64 operator-() const { return Uint64(-data_); } + bool operator==(Uint64 value) const { return data_ == value.data_; } + bool operator!=(Uint64 value) const { return data_ != value.data_; } + Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); } + Uint64 operator-(Uint64 value) const { return Uint64(data_ - value.data_); } + Uint64 operator&(Uint64 value) const { return Uint64(data_ & value.data_); } + Uint64 operator&=(Uint64 value) { + data_ &= value.data_; + return *this; + } + Uint64 operator^(Uint64 value) const { return Uint64(data_ ^ value.data_); } + Uint64 operator^=(Uint64 value) { + data_ ^= value.data_; + return *this; + } + Uint64 operator|(Uint64 value) const { return Uint64(data_ | value.data_); } + Uint64 operator|=(Uint64 value) { + data_ |= value.data_; + return *this; + } + // Unlike uint64_t, the shift functions can accept negative shift and + // return 0 when the shift is too big. + Uint64 operator>>(int shift) const { + if (shift == 0) return *this; + if (shift < 0) { + int tmp = -shift; + if (tmp >= 64) return Uint64(0); + return Uint64(data_ << tmp); + } + int tmp = shift; + if (tmp >= 64) return Uint64(0); + return Uint64(data_ >> tmp); + } + Uint64 operator<<(int shift) const { + if (shift == 0) return *this; + if (shift < 0) { + int tmp = -shift; + if (tmp >= 64) return Uint64(0); + return Uint64(data_ >> tmp); + } + int tmp = shift; + if (tmp >= 64) return Uint64(0); + return Uint64(data_ << tmp); + } +}; + +class Uint128 { + uint64_t data_high_; + uint64_t data_low_; + + public: + Uint128() : data_high_(0), data_low_(0) {} + explicit Uint128(uint64_t data_low) : data_high_(0), data_low_(data_low) {} + explicit Uint128(Uint64 data_low) + : data_high_(0), data_low_(data_low.Get()) {} + Uint128(uint64_t data_high, uint64_t data_low) + : data_high_(data_high), data_low_(data_low) {} + Uint64 ToUint64() const { + VIXL_ASSERT(data_high_ == 0); + return Uint64(data_low_); + } + Uint64 GetHigh64() const { return Uint64(data_high_); } + Uint64 GetLow64() const { return Uint64(data_low_); } + Uint128 operator~() const { return Uint128(~data_high_, ~data_low_); } + bool operator==(Uint128 value) const { + return (data_high_ == value.data_high_) && (data_low_ == value.data_low_); + } + Uint128 operator&(Uint128 value) const { + return Uint128(data_high_ & value.data_high_, data_low_ & value.data_low_); + } + Uint128 operator&=(Uint128 value) { + data_high_ &= value.data_high_; + data_low_ &= value.data_low_; + return *this; + } + Uint128 operator|=(Uint128 value) { + data_high_ |= value.data_high_; + data_low_ |= value.data_low_; + return *this; + } + Uint128 operator>>(int shift) const { + VIXL_ASSERT((shift >= 0) && (shift < 128)); + if (shift == 0) return *this; + if (shift >= 64) { + return Uint128(0, data_high_ >> (shift - 64)); + } + uint64_t tmp = (data_high_ << (64 - shift)) | (data_low_ >> shift); + return Uint128(data_high_ >> shift, tmp); + } + Uint128 operator<<(int shift) const { + VIXL_ASSERT((shift >= 0) && (shift < 128)); + if (shift == 0) return *this; + if (shift >= 64) { + return Uint128(data_low_ << (shift - 64), 0); + } + uint64_t tmp = (data_high_ << shift) | (data_low_ >> (64 - shift)); + return Uint128(tmp, data_low_ << shift); + } +}; + +Uint32::Uint32(Uint64 data) : data_(data.ToUint32().Get()) {} +Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {} + +Int64 BitCount(Uint32 value); + +} // namespace internal + +// The default NaN values (for FPCR.DN=1). +extern const double kFP64DefaultNaN; +extern const float kFP32DefaultNaN; +extern const Float16 kFP16DefaultNaN; + +// Floating-point infinity values. +extern const Float16 kFP16PositiveInfinity; +extern const Float16 kFP16NegativeInfinity; +extern const float kFP32PositiveInfinity; +extern const float kFP32NegativeInfinity; +extern const double kFP64PositiveInfinity; +extern const double kFP64NegativeInfinity; + +// Floating-point zero values. +extern const Float16 kFP16PositiveZero; +extern const Float16 kFP16NegativeZero; + +// AArch64 floating-point specifics. These match IEEE-754. +const unsigned kDoubleMantissaBits = 52; +const unsigned kDoubleExponentBits = 11; +const unsigned kFloatMantissaBits = 23; +const unsigned kFloatExponentBits = 8; +const unsigned kFloat16MantissaBits = 10; +const unsigned kFloat16ExponentBits = 5; + +enum FPRounding { + // The first four values are encodable directly by FPCR<RMode>. + FPTieEven = 0x0, + FPPositiveInfinity = 0x1, + FPNegativeInfinity = 0x2, + FPZero = 0x3, + + // The final rounding modes are only available when explicitly specified by + // the instruction (such as with fcvta). It cannot be set in FPCR. + FPTieAway, + FPRoundOdd +}; + +enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN }; + +// Assemble the specified IEEE-754 components into the target type and apply +// appropriate rounding. +// sign: 0 = positive, 1 = negative +// exponent: Unbiased IEEE-754 exponent. +// mantissa: The mantissa of the input. The top bit (which is not encoded for +// normal IEEE-754 values) must not be omitted. This bit has the +// value 'pow(2, exponent)'. +// +// The input value is assumed to be a normalized value. That is, the input may +// not be infinity or NaN. If the source value is subnormal, it must be +// normalized before calling this function such that the highest set bit in the +// mantissa has the value 'pow(2, exponent)'. +// +// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than +// calling a templated FPRound. +template <class T, int ebits, int mbits> +T FPRound(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + VIXL_ASSERT((sign == 0) || (sign == 1)); + + // Only FPTieEven and FPRoundOdd rounding modes are implemented. + VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); + + // Rounding can promote subnormals to normals, and normals to infinities. For + // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be + // encodable as a float, but rounding based on the low-order mantissa bits + // could make it overflow. With ties-to-even rounding, this value would become + // an infinity. + + // ---- Rounding Method ---- + // + // The exponent is irrelevant in the rounding operation, so we treat the + // lowest-order bit that will fit into the result ('onebit') as having + // the value '1'. Similarly, the highest-order bit that won't fit into + // the result ('halfbit') has the value '0.5'. The 'point' sits between + // 'onebit' and 'halfbit': + // + // These bits fit into the result. + // |---------------------| + // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + // || + // / | + // / halfbit + // onebit + // + // For subnormal outputs, the range of representable bits is smaller and + // the position of onebit and halfbit depends on the exponent of the + // input, but the method is otherwise similar. + // + // onebit(frac) + // | + // | halfbit(frac) halfbit(adjusted) + // | / / + // | | | + // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00 + // 0b00.0... -> 0b00.0... -> 0b00 + // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00 + // 0b00.1... -> 0b00.1... -> 0b01 + // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01 + // 0b01.0... -> 0b01.0... -> 0b01 + // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10 + // 0b01.1... -> 0b01.1... -> 0b10 + // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10 + // 0b10.0... -> 0b10.0... -> 0b10 + // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10 + // 0b10.1... -> 0b10.1... -> 0b11 + // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11 + // ... / | / | + // / | / | + // / | + // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / | + // + // mantissa = (mantissa >> shift) + halfbit(adjusted); + + static const int mantissa_offset = 0; + static const int exponent_offset = mantissa_offset + mbits; + static const int sign_offset = exponent_offset + ebits; + VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1)); + + // Bail out early for zero inputs. + if (mantissa == 0) { + return static_cast<T>(sign << sign_offset); + } + + // If all bits in the exponent are set, the value is infinite or NaN. + // This is true for all binary IEEE-754 formats. + static const int infinite_exponent = (1 << ebits) - 1; + static const int max_normal_exponent = infinite_exponent - 1; + + // Apply the exponent bias to encode it for the result. Doing this early makes + // it easy to detect values that will be infinite or subnormal. + exponent += max_normal_exponent >> 1; + + if (exponent > max_normal_exponent) { + // Overflow: the input is too large for the result type to represent. + if (round_mode == FPTieEven) { + // FPTieEven rounding mode handles overflows using infinities. + exponent = infinite_exponent; + mantissa = 0; + } else { + VIXL_ASSERT(round_mode == FPRoundOdd); + // FPRoundOdd rounding mode handles overflows using the largest magnitude + // normal number. + exponent = max_normal_exponent; + mantissa = (UINT64_C(1) << exponent_offset) - 1; + } + return static_cast<T>((sign << sign_offset) | + (exponent << exponent_offset) | + (mantissa << mantissa_offset)); + } + + // Calculate the shift required to move the top mantissa bit to the proper + // place in the destination type. + const int highest_significant_bit = 63 - CountLeadingZeros(mantissa); + int shift = highest_significant_bit - mbits; + + if (exponent <= 0) { + // The output will be subnormal (before rounding). + // For subnormal outputs, the shift must be adjusted by the exponent. The +1 + // is necessary because the exponent of a subnormal value (encoded as 0) is + // the same as the exponent of the smallest normal value (encoded as 1). + shift += -exponent + 1; + + // Handle inputs that would produce a zero output. + // + // Shifts higher than highest_significant_bit+1 will always produce a zero + // result. A shift of exactly highest_significant_bit+1 might produce a + // non-zero result after rounding. + if (shift > (highest_significant_bit + 1)) { + if (round_mode == FPTieEven) { + // The result will always be +/-0.0. + return static_cast<T>(sign << sign_offset); + } else { + VIXL_ASSERT(round_mode == FPRoundOdd); + VIXL_ASSERT(mantissa != 0); + // For FPRoundOdd, if the mantissa is too small to represent and + // non-zero return the next "odd" value. + return static_cast<T>((sign << sign_offset) | 1); + } + } + + // Properly encode the exponent for a subnormal output. + exponent = 0; + } else { + // Clear the topmost mantissa bit, since this is not encoded in IEEE-754 + // normal values. + mantissa &= ~(UINT64_C(1) << highest_significant_bit); + } + + // The casts below are only well-defined for unsigned integers. + VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer); + VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed); + + if (shift > 0) { + if (round_mode == FPTieEven) { + // We have to shift the mantissa to the right. Some precision is lost, so + // we need to apply rounding. + uint64_t onebit_mantissa = (mantissa >> (shift)) & 1; + uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1; + uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa); + uint64_t adjusted = mantissa - adjustment; + T halfbit_adjusted = (adjusted >> (shift - 1)) & 1; + + T result = + static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) | + ((mantissa >> shift) << mantissa_offset)); + + // A very large mantissa can overflow during rounding. If this happens, + // the exponent should be incremented and the mantissa set to 1.0 + // (encoded as 0). Applying halfbit_adjusted after assembling the float + // has the nice side-effect that this case is handled for free. + // + // This also handles cases where a very large finite value overflows to + // infinity, or where a very large subnormal value overflows to become + // normal. + return result + halfbit_adjusted; + } else { + VIXL_ASSERT(round_mode == FPRoundOdd); + // If any bits at position halfbit or below are set, onebit (ie. the + // bottom bit of the resulting mantissa) must be set. + uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1); + if (fractional_bits != 0) { + mantissa |= UINT64_C(1) << shift; + } + + return static_cast<T>((sign << sign_offset) | + (exponent << exponent_offset) | + ((mantissa >> shift) << mantissa_offset)); + } + } else { + // We have to shift the mantissa to the left (or not at all). The input + // mantissa is exactly representable in the output mantissa, so apply no + // rounding correction. + return static_cast<T>((sign << sign_offset) | + (exponent << exponent_offset) | + ((mantissa << -shift) << mantissa_offset)); + } +} + + +// See FPRound for a description of this function. +inline double FPRoundToDouble(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + uint64_t bits = + FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, + exponent, + mantissa, + round_mode); + return RawbitsToDouble(bits); +} + + +// See FPRound for a description of this function. +inline Float16 FPRoundToFloat16(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + return RawbitsToFloat16( + FPRound<uint16_t, + kFloat16ExponentBits, + kFloat16MantissaBits>(sign, exponent, mantissa, round_mode)); +} + + +// See FPRound for a description of this function. +static inline float FPRoundToFloat(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + uint32_t bits = + FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign, + exponent, + mantissa, + round_mode); + return RawbitsToFloat(bits); +} + + +float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL); +float FPToFloat(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception = NULL); + +double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL); +double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL); + +Float16 FPToFloat16(float value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception = NULL); + +Float16 FPToFloat16(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception = NULL); +} // namespace vixl + +#endif // VIXL_UTILS_H |