diff options
Diffstat (limited to '')
36 files changed, 37942 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/Architecture-x86-shared.cpp b/js/src/jit/x86-shared/Architecture-x86-shared.cpp new file mode 100644 index 0000000000..f000b09c77 --- /dev/null +++ b/js/src/jit/x86-shared/Architecture-x86-shared.cpp @@ -0,0 +1,93 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/Architecture-x86-shared.h" +#if !defined(JS_CODEGEN_X86) && !defined(JS_CODEGEN_X64) +# error "Wrong architecture. Only x86 and x64 should build this file!" +#endif + +#include <iterator> + +#include "jit/RegisterSets.h" + +const char* js::jit::FloatRegister::name() const { + static const char* const names[] = { + +#ifdef JS_CODEGEN_X64 +# define FLOAT_REGS_(TYPE) \ + "%xmm0" TYPE, "%xmm1" TYPE, "%xmm2" TYPE, "%xmm3" TYPE, "%xmm4" TYPE, \ + "%xmm5" TYPE, "%xmm6" TYPE, "%xmm7" TYPE, "%xmm8" TYPE, "%xmm9" TYPE, \ + "%xmm10" TYPE, "%xmm11" TYPE, "%xmm12" TYPE, "%xmm13" TYPE, \ + "%xmm14" TYPE, "%xmm15" TYPE +#else +# define FLOAT_REGS_(TYPE) \ + "%xmm0" TYPE, "%xmm1" TYPE, "%xmm2" TYPE, "%xmm3" TYPE, "%xmm4" TYPE, \ + "%xmm5" TYPE, "%xmm6" TYPE, "%xmm7" TYPE +#endif + + // These should be enumerated in the same order as in + // FloatRegisters::ContentType. + FLOAT_REGS_(".s"), FLOAT_REGS_(".d"), FLOAT_REGS_(".i4"), + FLOAT_REGS_(".s4") +#undef FLOAT_REGS_ + + }; + MOZ_ASSERT(size_t(code()) < std::size(names)); + return names[size_t(code())]; +} + +js::jit::FloatRegisterSet js::jit::FloatRegister::ReduceSetForPush( + const FloatRegisterSet& s) { + SetType bits = s.bits(); + + // Ignore all SIMD register, if not supported. +#ifndef ENABLE_WASM_SIMD + bits &= Codes::AllPhysMask * Codes::SpreadScalar; +#endif + + // Exclude registers which are already pushed with a larger type. High bits + // are associated with larger register types. Thus we keep the set of + // registers which are not included in larger type. + bits &= ~(bits >> (1 * Codes::TotalPhys)); + bits &= ~(bits >> (2 * Codes::TotalPhys)); + bits &= ~(bits >> (3 * Codes::TotalPhys)); + + return FloatRegisterSet(bits); +} + +uint32_t js::jit::FloatRegister::GetPushSizeInBytes(const FloatRegisterSet& s) { + SetType all = s.bits(); + SetType set128b = (all >> (uint32_t(Codes::Simd128) * Codes::TotalPhys)) & + Codes::AllPhysMask; + SetType doubleSet = (all >> (uint32_t(Codes::Double) * Codes::TotalPhys)) & + Codes::AllPhysMask; + SetType singleSet = (all >> (uint32_t(Codes::Single) * Codes::TotalPhys)) & + Codes::AllPhysMask; + + // PushRegsInMask pushes the largest register first, and thus avoids pushing + // aliased registers. So we have to filter out the physical registers which + // are already pushed as part of larger registers. + SetType set64b = doubleSet & ~set128b; + SetType set32b = singleSet & ~set64b & ~set128b; + + static_assert(Codes::AllPhysMask <= 0xffff, + "We can safely use CountPopulation32"); + uint32_t count32b = mozilla::CountPopulation32(set32b); + +#if defined(JS_CODEGEN_X64) + // If we have an odd number of 32 bits values, then we increase the size to + // keep the stack aligned on 8 bytes. Note: Keep in sync with + // PushRegsInMask, and PopRegsInMaskIgnore. + count32b += count32b & 1; +#endif + + return mozilla::CountPopulation32(set128b) * (4 * sizeof(int32_t)) + + mozilla::CountPopulation32(set64b) * sizeof(double) + + count32b * sizeof(float); +} +uint32_t js::jit::FloatRegister::getRegisterDumpOffsetInBytes() { + return uint32_t(encoding()) * sizeof(FloatRegisters::RegisterContent); +} diff --git a/js/src/jit/x86-shared/Architecture-x86-shared.h b/js/src/jit/x86-shared/Architecture-x86-shared.h new file mode 100644 index 0000000000..b4701af284 --- /dev/null +++ b/js/src/jit/x86-shared/Architecture-x86-shared.h @@ -0,0 +1,467 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Architecture_x86_h +#define jit_x86_shared_Architecture_x86_h + +#if !defined(JS_CODEGEN_X86) && !defined(JS_CODEGEN_X64) +# error "Unsupported architecture!" +#endif + +#include "mozilla/MathAlgorithms.h" + +#include <algorithm> +#include <string.h> + +#include "jit/shared/Architecture-shared.h" + +#include "jit/x86-shared/Constants-x86-shared.h" + +namespace js { +namespace jit { + +#if defined(JS_CODEGEN_X86) +// These offsets are specific to nunboxing, and capture offsets into the +// components of a js::Value. +static const int32_t NUNBOX32_TYPE_OFFSET = 4; +static const int32_t NUNBOX32_PAYLOAD_OFFSET = 0; +#endif + +#if defined(JS_CODEGEN_X64) && defined(_WIN64) +static const uint32_t ShadowStackSpace = 32; +#else +static const uint32_t ShadowStackSpace = 0; +#endif + +static const uint32_t JumpImmediateRange = INT32_MAX; + +class Registers { + public: + using Code = uint8_t; + using Encoding = X86Encoding::RegisterID; + + // Content spilled during bailouts. + union RegisterContent { + uintptr_t r; + }; + +#if defined(JS_CODEGEN_X86) + using SetType = uint8_t; + + static const char* GetName(Code code) { + return X86Encoding::GPRegName(Encoding(code)); + } + + static const uint32_t Total = 8; + static const uint32_t TotalPhys = 8; + static const uint32_t Allocatable = 7; + +#elif defined(JS_CODEGEN_X64) + using SetType = uint16_t; + + static const char* GetName(Code code) { + static const char* const Names[] = { + "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}; + return Names[code]; + } + + static const uint32_t Total = 16; + static const uint32_t TotalPhys = 16; + static const uint32_t Allocatable = 14; +#endif + + static uint32_t SetSize(SetType x) { + static_assert(sizeof(SetType) <= 4, "SetType must be, at most, 32 bits"); + return mozilla::CountPopulation32(x); + } + static uint32_t FirstBit(SetType x) { + return mozilla::CountTrailingZeroes32(x); + } + static uint32_t LastBit(SetType x) { + return 31 - mozilla::CountLeadingZeroes32(x); + } + + static Code FromName(const char* name) { + for (size_t i = 0; i < Total; i++) { + if (strcmp(GetName(Code(i)), name) == 0) { + return Code(i); + } + } + return Invalid; + } + + static const Encoding StackPointer = X86Encoding::rsp; + static const Encoding Invalid = X86Encoding::invalid_reg; + + static const SetType AllMask = (1 << Total) - 1; + +#if defined(JS_CODEGEN_X86) + static const SetType ArgRegMask = 0; + + static const SetType VolatileMask = (1 << X86Encoding::rax) | + (1 << X86Encoding::rcx) | + (1 << X86Encoding::rdx); + + static const SetType WrapperMask = VolatileMask | (1 << X86Encoding::rbx); + + static const SetType SingleByteRegs = + (1 << X86Encoding::rax) | (1 << X86Encoding::rcx) | + (1 << X86Encoding::rdx) | (1 << X86Encoding::rbx); + + static const SetType NonAllocatableMask = + (1 << X86Encoding::rsp) | (1 << X86Encoding::rbp); + + // Registers returned from a JS -> JS call. + static const SetType JSCallMask = + (1 << X86Encoding::rcx) | (1 << X86Encoding::rdx); + + // Registers returned from a JS -> C call. + static const SetType CallMask = (1 << X86Encoding::rax); + +#elif defined(JS_CODEGEN_X64) + static const SetType ArgRegMask = +# if !defined(_WIN64) + (1 << X86Encoding::rdi) | (1 << X86Encoding::rsi) | +# endif + (1 << X86Encoding::rdx) | (1 << X86Encoding::rcx) | + (1 << X86Encoding::r8) | (1 << X86Encoding::r9); + + static const SetType VolatileMask = (1 << X86Encoding::rax) | ArgRegMask | + (1 << X86Encoding::r10) | + (1 << X86Encoding::r11); + + static const SetType WrapperMask = VolatileMask; + + static const SetType SingleByteRegs = AllMask & ~(1 << X86Encoding::rsp); + + static const SetType NonAllocatableMask = + (1 << X86Encoding::rsp) | (1 << X86Encoding::rbp) | + (1 << X86Encoding::r11); // This is ScratchReg. + + // Registers returned from a JS -> JS call. + static const SetType JSCallMask = (1 << X86Encoding::rcx); + + // Registers returned from a JS -> C call. + static const SetType CallMask = (1 << X86Encoding::rax); + +#endif + + static const SetType NonVolatileMask = + AllMask & ~VolatileMask & ~(1 << X86Encoding::rsp); + + static const SetType AllocatableMask = AllMask & ~NonAllocatableMask; +}; + +using PackedRegisterMask = Registers::SetType; + +class FloatRegisters { + public: + using Encoding = X86Encoding::XMMRegisterID; + + // Observe that there is a Simd128 type on both x86 and x64 whether SIMD is + // implemented/enabled or not, and that the RegisterContent union is large + // enough for a V128 datum always. Producers and consumers of a register dump + // must be aware of this even if they don't need to save/restore values in the + // high lanes of the SIMD registers. See the DumpAllRegs() implementations, + // for example. + + enum ContentType { + Single, // 32-bit float. + Double, // 64-bit double. + Simd128, // 128-bit Wasm SIMD type. + NumTypes + }; + + // Content spilled during bailouts. + union RegisterContent { + float s; + double d; + uint8_t v128[16]; + }; + + static const char* GetName(Encoding code) { + return X86Encoding::XMMRegName(code); + } + + static Encoding FromName(const char* name) { + for (size_t i = 0; i < Total; i++) { + if (strcmp(GetName(Encoding(i)), name) == 0) { + return Encoding(i); + } + } + return Invalid; + } + + static const Encoding Invalid = X86Encoding::invalid_xmm; + +#if defined(JS_CODEGEN_X86) + static const uint32_t Total = 8 * NumTypes; + static const uint32_t TotalPhys = 8; + static const uint32_t Allocatable = 7; + using SetType = uint32_t; +#elif defined(JS_CODEGEN_X64) + static const uint32_t Total = 16 * NumTypes; + static const uint32_t TotalPhys = 16; + static const uint32_t Allocatable = 15; + using SetType = uint64_t; +#endif + + static_assert(sizeof(SetType) * 8 >= Total, + "SetType should be large enough to enumerate all registers."); + + // Magic values which are used to duplicate a mask of physical register for + // a specific type of register. A multiplication is used to copy and shift + // the bits of the physical register mask. + static const SetType SpreadSingle = SetType(1) + << (uint32_t(Single) * TotalPhys); + static const SetType SpreadDouble = SetType(1) + << (uint32_t(Double) * TotalPhys); + static const SetType SpreadSimd128 = SetType(1) + << (uint32_t(Simd128) * TotalPhys); + static const SetType SpreadScalar = SpreadSingle | SpreadDouble; + static const SetType SpreadVector = SpreadSimd128; + static const SetType Spread = SpreadScalar | SpreadVector; + + static const SetType AllPhysMask = ((1 << TotalPhys) - 1); + static const SetType AllMask = AllPhysMask * Spread; + static const SetType AllDoubleMask = AllPhysMask * SpreadDouble; + static const SetType AllSingleMask = AllPhysMask * SpreadSingle; + static const SetType AllVector128Mask = AllPhysMask * SpreadSimd128; + +#if defined(JS_CODEGEN_X86) + static const SetType NonAllocatableMask = + Spread * (1 << X86Encoding::xmm7); // This is ScratchDoubleReg. + +#elif defined(JS_CODEGEN_X64) + static const SetType NonAllocatableMask = + Spread * (1 << X86Encoding::xmm15); // This is ScratchDoubleReg. +#endif + +#if defined(JS_CODEGEN_X64) && defined(_WIN64) + static const SetType VolatileMask = + ((1 << X86Encoding::xmm0) | (1 << X86Encoding::xmm1) | + (1 << X86Encoding::xmm2) | (1 << X86Encoding::xmm3) | + (1 << X86Encoding::xmm4) | (1 << X86Encoding::xmm5)) * + Spread; +#else + static const SetType VolatileMask = AllMask; +#endif + + static const SetType NonVolatileMask = AllMask & ~VolatileMask; + static const SetType WrapperMask = VolatileMask; + static const SetType AllocatableMask = AllMask & ~NonAllocatableMask; +}; + +static const uint32_t SpillSlotSize = + std::max(sizeof(Registers::RegisterContent), + sizeof(FloatRegisters::RegisterContent)); + +template <typename T> +class TypedRegisterSet; + +struct FloatRegister { + using Codes = FloatRegisters; + using Code = size_t; + using Encoding = Codes::Encoding; + using SetType = Codes::SetType; + static uint32_t SetSize(SetType x) { + // Count the number of non-aliased registers, for the moment. + // + // Copy the set bits of each typed register to the low part of the of + // the Set, and count the number of registers. This is made to avoid + // registers which are allocated twice with different types (such as in + // AllMask). + x |= x >> (2 * Codes::TotalPhys); + x |= x >> Codes::TotalPhys; + x &= Codes::AllPhysMask; + static_assert(Codes::AllPhysMask <= 0xffff, + "We can safely use CountPopulation32"); + return mozilla::CountPopulation32(x); + } + +#if defined(JS_CODEGEN_X86) + static uint32_t FirstBit(SetType x) { + static_assert(sizeof(SetType) == 4, "SetType must be 32 bits"); + return mozilla::CountTrailingZeroes32(x); + } + static uint32_t LastBit(SetType x) { + return 31 - mozilla::CountLeadingZeroes32(x); + } + +#elif defined(JS_CODEGEN_X64) + static uint32_t FirstBit(SetType x) { + static_assert(sizeof(SetType) == 8, "SetType must be 64 bits"); + return mozilla::CountTrailingZeroes64(x); + } + static uint32_t LastBit(SetType x) { + return 63 - mozilla::CountLeadingZeroes64(x); + } +#endif + + private: + // Note: These fields are using one extra bit to make the invalid enumerated + // values fit, and thus prevent a warning. + Codes::Encoding reg_ : 5; + Codes::ContentType type_ : 3; + bool isInvalid_ : 1; + + // Constants used for exporting/importing the float register code. +#if defined(JS_CODEGEN_X86) + static const size_t RegSize = 3; +#elif defined(JS_CODEGEN_X64) + static const size_t RegSize = 4; +#endif + static const size_t RegMask = (1 << RegSize) - 1; + + public: + constexpr FloatRegister() + : reg_(Codes::Encoding(0)), type_(Codes::Single), isInvalid_(true) {} + constexpr FloatRegister(uint32_t r, Codes::ContentType k) + : reg_(Codes::Encoding(r)), type_(k), isInvalid_(false) {} + constexpr FloatRegister(Codes::Encoding r, Codes::ContentType k) + : reg_(r), type_(k), isInvalid_(false) {} + + static FloatRegister FromCode(uint32_t i) { + MOZ_ASSERT(i < Codes::Total); + return FloatRegister(i & RegMask, Codes::ContentType(i >> RegSize)); + } + + bool isSingle() const { + MOZ_ASSERT(!isInvalid()); + return type_ == Codes::Single; + } + bool isDouble() const { + MOZ_ASSERT(!isInvalid()); + return type_ == Codes::Double; + } + bool isSimd128() const { + MOZ_ASSERT(!isInvalid()); + return type_ == Codes::Simd128; + } + bool isInvalid() const { return isInvalid_; } + + FloatRegister asSingle() const { + MOZ_ASSERT(!isInvalid()); + return FloatRegister(reg_, Codes::Single); + } + FloatRegister asDouble() const { + MOZ_ASSERT(!isInvalid()); + return FloatRegister(reg_, Codes::Double); + } + FloatRegister asSimd128() const { + MOZ_ASSERT(!isInvalid()); + return FloatRegister(reg_, Codes::Simd128); + } + + uint32_t size() const { + MOZ_ASSERT(!isInvalid()); + if (isSingle()) { + return sizeof(float); + } + if (isDouble()) { + return sizeof(double); + } + MOZ_ASSERT(isSimd128()); + return 4 * sizeof(int32_t); + } + + Code code() const { + MOZ_ASSERT(!isInvalid()); + MOZ_ASSERT(uint32_t(reg_) < Codes::TotalPhys); + // :TODO: ARM is doing the same thing, but we should avoid this, except + // that the RegisterSets depends on this. + return Code(reg_ | (type_ << RegSize)); + } + Encoding encoding() const { + MOZ_ASSERT(!isInvalid()); + MOZ_ASSERT(uint32_t(reg_) < Codes::TotalPhys); + return reg_; + } + // defined in Assembler-x86-shared.cpp + const char* name() const; + bool volatile_() const { + return !!((SetType(1) << code()) & FloatRegisters::VolatileMask); + } + bool operator!=(FloatRegister other) const { + return other.reg_ != reg_ || other.type_ != type_; + } + bool operator==(FloatRegister other) const { + return other.reg_ == reg_ && other.type_ == type_; + } + bool aliases(FloatRegister other) const { return other.reg_ == reg_; } + // Check if two floating point registers have the same type. + bool equiv(FloatRegister other) const { return other.type_ == type_; } + + uint32_t numAliased() const { return Codes::NumTypes; } + uint32_t numAlignedAliased() const { return numAliased(); } + + FloatRegister aliased(uint32_t aliasIdx) const { + MOZ_ASSERT(aliasIdx < Codes::NumTypes); + return FloatRegister( + reg_, Codes::ContentType((aliasIdx + type_) % Codes::NumTypes)); + } + FloatRegister alignedAliased(uint32_t aliasIdx) const { + return aliased(aliasIdx); + } + + SetType alignedOrDominatedAliasedSet() const { return Codes::Spread << reg_; } + + static constexpr RegTypeName DefaultType = RegTypeName::Float64; + + template <RegTypeName = DefaultType> + static SetType LiveAsIndexableSet(SetType s) { + return SetType(0); + } + + template <RegTypeName Name = DefaultType> + static SetType AllocatableAsIndexableSet(SetType s) { + static_assert(Name != RegTypeName::Any, "Allocatable set are not iterable"); + return LiveAsIndexableSet<Name>(s); + } + + static TypedRegisterSet<FloatRegister> ReduceSetForPush( + const TypedRegisterSet<FloatRegister>& s); + static uint32_t GetPushSizeInBytes(const TypedRegisterSet<FloatRegister>& s); + uint32_t getRegisterDumpOffsetInBytes(); +}; + +template <> +inline FloatRegister::SetType +FloatRegister::LiveAsIndexableSet<RegTypeName::Float32>(SetType set) { + return set & FloatRegisters::AllSingleMask; +} + +template <> +inline FloatRegister::SetType +FloatRegister::LiveAsIndexableSet<RegTypeName::Float64>(SetType set) { + return set & FloatRegisters::AllDoubleMask; +} + +template <> +inline FloatRegister::SetType +FloatRegister::LiveAsIndexableSet<RegTypeName::Vector128>(SetType set) { + return set & FloatRegisters::AllVector128Mask; +} + +template <> +inline FloatRegister::SetType +FloatRegister::LiveAsIndexableSet<RegTypeName::Any>(SetType set) { + return set; +} + +// Arm/D32 has double registers that can NOT be treated as float32 +// and this requires some dances in lowering. +inline bool hasUnaliasedDouble() { return false; } + +// On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32 +// to a double as a temporary, you need a temporary double register. +inline bool hasMultiAlias() { return false; } + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Architecture_x86_h */ diff --git a/js/src/jit/x86-shared/Assembler-x86-shared.cpp b/js/src/jit/x86-shared/Assembler-x86-shared.cpp new file mode 100644 index 0000000000..65fd124cf8 --- /dev/null +++ b/js/src/jit/x86-shared/Assembler-x86-shared.cpp @@ -0,0 +1,355 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Maybe.h" + +#include <algorithm> + +#include "jit/AutoWritableJitCode.h" +#if defined(JS_CODEGEN_X86) +# include "jit/x86/MacroAssembler-x86.h" +#elif defined(JS_CODEGEN_X64) +# include "jit/x64/MacroAssembler-x64.h" +#else +# error "Wrong architecture. Only x86 and x64 should build this file!" +#endif + +#ifdef _MSC_VER +# include <intrin.h> // for __cpuid +# if defined(_M_X64) && (_MSC_FULL_VER >= 160040219) +# include <immintrin.h> // for _xgetbv +# endif +#endif + +using namespace js; +using namespace js::jit; + +void AssemblerX86Shared::copyJumpRelocationTable(uint8_t* dest) { + if (jumpRelocations_.length()) { + memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length()); + } +} + +void AssemblerX86Shared::copyDataRelocationTable(uint8_t* dest) { + if (dataRelocations_.length()) { + memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length()); + } +} + +/* static */ +void AssemblerX86Shared::TraceDataRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader) { + mozilla::Maybe<AutoWritableJitCode> awjc; + + while (reader.more()) { + size_t offset = reader.readUnsigned(); + MOZ_ASSERT(offset >= sizeof(void*) && offset <= code->instructionsSize()); + + uint8_t* src = code->raw() + offset; + void* data = X86Encoding::GetPointer(src); + +#ifdef JS_PUNBOX64 + // Data relocations can be for Values or for raw pointers. If a Value is + // zero-tagged, we can trace it as if it were a raw pointer. If a Value + // is not zero-tagged, we have to interpret it as a Value to ensure that the + // tag bits are masked off to recover the actual pointer. + + uintptr_t word = reinterpret_cast<uintptr_t>(data); + if (word >> JSVAL_TAG_SHIFT) { + // This relocation is a Value with a non-zero tag. + Value value = Value::fromRawBits(word); + MOZ_ASSERT_IF(value.isGCThing(), + gc::IsCellPointerValid(value.toGCThing())); + TraceManuallyBarrieredEdge(trc, &value, "jit-masm-value"); + if (word != value.asRawBits()) { + if (awjc.isNothing()) { + awjc.emplace(code); + } + X86Encoding::SetPointer(src, value.bitsAsPunboxPointer()); + } + continue; + } +#endif + + // This relocation is a raw pointer or a Value with a zero tag. + gc::Cell* cell = static_cast<gc::Cell*>(data); + MOZ_ASSERT(gc::IsCellPointerValid(cell)); + TraceManuallyBarrieredGenericPointerEdge(trc, &cell, "jit-masm-ptr"); + if (cell != data) { + if (awjc.isNothing()) { + awjc.emplace(code); + } + X86Encoding::SetPointer(src, cell); + } + } +} + +void AssemblerX86Shared::executableCopy(void* buffer) { + masm.executableCopy(buffer); +} + +void AssemblerX86Shared::processCodeLabels(uint8_t* rawCode) { + for (const CodeLabel& label : codeLabels_) { + Bind(rawCode, label); + } +} + +AssemblerX86Shared::Condition AssemblerX86Shared::InvertCondition( + Condition cond) { + switch (cond) { + case Zero: + return NonZero; + case NonZero: + return Zero; + case LessThan: + return GreaterThanOrEqual; + case LessThanOrEqual: + return GreaterThan; + case GreaterThan: + return LessThanOrEqual; + case GreaterThanOrEqual: + return LessThan; + case Above: + return BelowOrEqual; + case AboveOrEqual: + return Below; + case Below: + return AboveOrEqual; + case BelowOrEqual: + return Above; + default: + MOZ_CRASH("unexpected condition"); + } +} + +AssemblerX86Shared::Condition AssemblerX86Shared::UnsignedCondition( + Condition cond) { + switch (cond) { + case Zero: + case NonZero: + return cond; + case LessThan: + case Below: + return Below; + case LessThanOrEqual: + case BelowOrEqual: + return BelowOrEqual; + case GreaterThan: + case Above: + return Above; + case AboveOrEqual: + case GreaterThanOrEqual: + return AboveOrEqual; + default: + MOZ_CRASH("unexpected condition"); + } +} + +AssemblerX86Shared::Condition AssemblerX86Shared::ConditionWithoutEqual( + Condition cond) { + switch (cond) { + case LessThan: + case LessThanOrEqual: + return LessThan; + case Below: + case BelowOrEqual: + return Below; + case GreaterThan: + case GreaterThanOrEqual: + return GreaterThan; + case Above: + case AboveOrEqual: + return Above; + default: + MOZ_CRASH("unexpected condition"); + } +} + +AssemblerX86Shared::DoubleCondition AssemblerX86Shared::InvertCondition( + DoubleCondition cond) { + switch (cond) { + case DoubleEqual: + return DoubleNotEqualOrUnordered; + case DoubleEqualOrUnordered: + return DoubleNotEqual; + case DoubleNotEqualOrUnordered: + return DoubleEqual; + case DoubleNotEqual: + return DoubleEqualOrUnordered; + case DoubleLessThan: + return DoubleGreaterThanOrEqualOrUnordered; + case DoubleLessThanOrUnordered: + return DoubleGreaterThanOrEqual; + case DoubleLessThanOrEqual: + return DoubleGreaterThanOrUnordered; + case DoubleLessThanOrEqualOrUnordered: + return DoubleGreaterThan; + case DoubleGreaterThan: + return DoubleLessThanOrEqualOrUnordered; + case DoubleGreaterThanOrUnordered: + return DoubleLessThanOrEqual; + case DoubleGreaterThanOrEqual: + return DoubleLessThanOrUnordered; + case DoubleGreaterThanOrEqualOrUnordered: + return DoubleLessThan; + default: + MOZ_CRASH("unexpected condition"); + } +} + +CPUInfo::SSEVersion CPUInfo::maxSSEVersion = UnknownSSE; +CPUInfo::SSEVersion CPUInfo::maxEnabledSSEVersion = UnknownSSE; +bool CPUInfo::avxPresent = false; +#ifdef ENABLE_WASM_AVX +bool CPUInfo::avxEnabled = true; +#else +bool CPUInfo::avxEnabled = false; +#endif +bool CPUInfo::popcntPresent = false; +bool CPUInfo::bmi1Present = false; +bool CPUInfo::bmi2Present = false; +bool CPUInfo::lzcntPresent = false; +bool CPUInfo::avx2Present = false; +bool CPUInfo::fmaPresent = false; + +namespace js { +namespace jit { +bool CPUFlagsHaveBeenComputed() { return CPUInfo::FlagsHaveBeenComputed(); } +} // namespace jit +} // namespace js + +static uintptr_t ReadXGETBV() { + // We use a variety of low-level mechanisms to get at the xgetbv + // instruction, including spelling out the xgetbv instruction as bytes, + // because older compilers and assemblers may not recognize the instruction + // by name. + size_t xcr0EAX = 0; +#if defined(_XCR_XFEATURE_ENABLED_MASK) + xcr0EAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); +#elif defined(__GNUC__) + // xgetbv returns its results in %eax and %edx, and for our purposes here, + // we're only interested in the %eax value. + asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0EAX) : "c"(0) : "%edx"); +#elif defined(_MSC_VER) && defined(_M_IX86) + __asm { + xor ecx, ecx + _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 + mov xcr0EAX, eax + } +#endif + return xcr0EAX; +} + +static void ReadCPUInfo(int* flagsEax, int* flagsEbx, int* flagsEcx, + int* flagsEdx) { +#ifdef _MSC_VER + int cpuinfo[4]; + __cpuid(cpuinfo, *flagsEax); + *flagsEax = cpuinfo[0]; + *flagsEbx = cpuinfo[1]; + *flagsEcx = cpuinfo[2]; + *flagsEdx = cpuinfo[3]; +#elif defined(__GNUC__) + // Some older 32-bits processors don't fill the ecx register with cpuid, so + // clobber it before calling cpuid, so that there's no risk of picking + // random bits indicating SSE3/SSE4 are present. Also make sure that it's + // set to 0 as an input for BMI detection on all platforms. + *flagsEcx = 0; +# ifdef JS_CODEGEN_X64 + asm("cpuid;" + : "+a"(*flagsEax), "=b"(*flagsEbx), "+c"(*flagsEcx), "=d"(*flagsEdx)); +# else + // On x86, preserve ebx. The compiler needs it for PIC mode. + asm("mov %%ebx, %%edi;" + "cpuid;" + "xchg %%edi, %%ebx;" + : "+a"(*flagsEax), "=D"(*flagsEbx), "+c"(*flagsEcx), "=d"(*flagsEdx)); +# endif +#else +# error "Unsupported compiler" +#endif +} + +void CPUInfo::ComputeFlags() { + MOZ_ASSERT(!FlagsHaveBeenComputed()); + + int flagsEax = 1; + int flagsEbx = 0; + int flagsEcx = 0; + int flagsEdx = 0; + ReadCPUInfo(&flagsEax, &flagsEbx, &flagsEcx, &flagsEdx); + + static constexpr int SSEBit = 1 << 25; + static constexpr int SSE2Bit = 1 << 26; + static constexpr int SSE3Bit = 1 << 0; + static constexpr int SSSE3Bit = 1 << 9; + static constexpr int SSE41Bit = 1 << 19; + static constexpr int SSE42Bit = 1 << 20; + + if (flagsEcx & SSE42Bit) { + maxSSEVersion = SSE4_2; + } else if (flagsEcx & SSE41Bit) { + maxSSEVersion = SSE4_1; + } else if (flagsEcx & SSSE3Bit) { + maxSSEVersion = SSSE3; + } else if (flagsEcx & SSE3Bit) { + maxSSEVersion = SSE3; + } else if (flagsEdx & SSE2Bit) { + maxSSEVersion = SSE2; + } else if (flagsEdx & SSEBit) { + maxSSEVersion = SSE; + } else { + maxSSEVersion = NoSSE; + } + + if (maxEnabledSSEVersion != UnknownSSE) { + maxSSEVersion = std::min(maxSSEVersion, maxEnabledSSEVersion); + } + + static constexpr int AVXBit = 1 << 28; + static constexpr int XSAVEBit = 1 << 27; + avxPresent = (flagsEcx & AVXBit) && (flagsEcx & XSAVEBit) && avxEnabled; + + // If the hardware supports AVX, check whether the OS supports it too. + if (avxPresent) { + size_t xcr0EAX = ReadXGETBV(); + static constexpr int xcr0SSEBit = 1 << 1; + static constexpr int xcr0AVXBit = 1 << 2; + avxPresent = (xcr0EAX & xcr0SSEBit) && (xcr0EAX & xcr0AVXBit); + } + + // CMOV instruction are supposed to be supported by all CPU which have SSE2 + // enabled. While this might be true, this is not guaranteed by any + // documentation, nor AMD, nor Intel. + static constexpr int CMOVBit = 1 << 15; + MOZ_RELEASE_ASSERT(flagsEdx & CMOVBit, + "CMOVcc instruction is not recognized by this CPU."); + + static constexpr int POPCNTBit = 1 << 23; + popcntPresent = (flagsEcx & POPCNTBit); + + // Use the avxEnabled flag to enable/disable FMA. + static constexpr int FMABit = 1 << 12; + fmaPresent = (flagsEcx & FMABit) && avxEnabled; + + flagsEax = 0x80000001; + ReadCPUInfo(&flagsEax, &flagsEbx, &flagsEcx, &flagsEdx); + + static constexpr int LZCNTBit = 1 << 5; + lzcntPresent = (flagsEcx & LZCNTBit); + + flagsEax = 0x7; + ReadCPUInfo(&flagsEax, &flagsEbx, &flagsEcx, &flagsEdx); + + static constexpr int BMI1Bit = 1 << 3; + static constexpr int BMI2Bit = 1 << 8; + static constexpr int AVX2Bit = 1 << 5; + bmi1Present = (flagsEbx & BMI1Bit); + bmi2Present = bmi1Present && (flagsEbx & BMI2Bit); + avx2Present = avxPresent && (flagsEbx & AVX2Bit); + + MOZ_ASSERT(FlagsHaveBeenComputed()); +} diff --git a/js/src/jit/x86-shared/Assembler-x86-shared.h b/js/src/jit/x86-shared/Assembler-x86-shared.h new file mode 100644 index 0000000000..922a2de029 --- /dev/null +++ b/js/src/jit/x86-shared/Assembler-x86-shared.h @@ -0,0 +1,4887 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Assembler_x86_shared_h +#define jit_x86_shared_Assembler_x86_shared_h + +#include <cstddef> + +#include "jit/shared/Assembler-shared.h" + +#if defined(JS_CODEGEN_X86) +# include "jit/x86/BaseAssembler-x86.h" +#elif defined(JS_CODEGEN_X64) +# include "jit/x64/BaseAssembler-x64.h" +#else +# error "Unknown architecture!" +#endif +#include "jit/CompactBuffer.h" +#include "wasm/WasmTypeDecls.h" + +namespace js { +namespace jit { + +// Do not reference ScratchFloat32Reg_ directly, use ScratchFloat32Scope +// instead. +struct ScratchFloat32Scope : public AutoFloatRegisterScope { + explicit ScratchFloat32Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {} +}; + +// Do not reference ScratchDoubleReg_ directly, use ScratchDoubleScope instead. +struct ScratchDoubleScope : public AutoFloatRegisterScope { + explicit ScratchDoubleScope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {} +}; + +struct ScratchSimd128Scope : public AutoFloatRegisterScope { + explicit ScratchSimd128Scope(MacroAssembler& masm) + : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {} +}; + +class Operand { + public: + enum Kind { REG, MEM_REG_DISP, FPREG, MEM_SCALE, MEM_ADDRESS32 }; + + private: + Kind kind_ : 4; + // Used as a Register::Encoding and a FloatRegister::Encoding. + uint32_t base_ : 5; + Scale scale_ : 3; + // We don't use all 8 bits, of course, but GCC complains if the size of + // this field is smaller than the size of Register::Encoding. + Register::Encoding index_ : 8; + int32_t disp_; + + public: + explicit Operand(Register reg) + : kind_(REG), + base_(reg.encoding()), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(0) {} + explicit Operand(FloatRegister reg) + : kind_(FPREG), + base_(reg.encoding()), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(0) {} + explicit Operand(const Address& address) + : kind_(MEM_REG_DISP), + base_(address.base.encoding()), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(address.offset) {} + explicit Operand(const BaseIndex& address) + : kind_(MEM_SCALE), + base_(address.base.encoding()), + scale_(address.scale), + index_(address.index.encoding()), + disp_(address.offset) {} + Operand(Register base, Register index, Scale scale, int32_t disp = 0) + : kind_(MEM_SCALE), + base_(base.encoding()), + scale_(scale), + index_(index.encoding()), + disp_(disp) {} + Operand(Register reg, int32_t disp) + : kind_(MEM_REG_DISP), + base_(reg.encoding()), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(disp) {} + explicit Operand(AbsoluteAddress address) + : kind_(MEM_ADDRESS32), + base_(Registers::Invalid), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(X86Encoding::AddressImmediate(address.addr)) {} + explicit Operand(PatchedAbsoluteAddress address) + : kind_(MEM_ADDRESS32), + base_(Registers::Invalid), + scale_(TimesOne), + index_(Registers::Invalid), + disp_(X86Encoding::AddressImmediate(address.addr)) {} + + Address toAddress() const { + MOZ_ASSERT(kind() == MEM_REG_DISP); + return Address(Register::FromCode(base()), disp()); + } + + BaseIndex toBaseIndex() const { + MOZ_ASSERT(kind() == MEM_SCALE); + return BaseIndex(Register::FromCode(base()), Register::FromCode(index()), + scale(), disp()); + } + + Kind kind() const { return kind_; } + Register::Encoding reg() const { + MOZ_ASSERT(kind() == REG); + return Register::Encoding(base_); + } + Register::Encoding base() const { + MOZ_ASSERT(kind() == MEM_REG_DISP || kind() == MEM_SCALE); + return Register::Encoding(base_); + } + Register::Encoding index() const { + MOZ_ASSERT(kind() == MEM_SCALE); + return index_; + } + Scale scale() const { + MOZ_ASSERT(kind() == MEM_SCALE); + return scale_; + } + FloatRegister::Encoding fpu() const { + MOZ_ASSERT(kind() == FPREG); + return FloatRegister::Encoding(base_); + } + int32_t disp() const { + MOZ_ASSERT(kind() == MEM_REG_DISP || kind() == MEM_SCALE); + return disp_; + } + void* address() const { + MOZ_ASSERT(kind() == MEM_ADDRESS32); + return reinterpret_cast<void*>(disp_); + } + + bool containsReg(Register r) const { + switch (kind()) { + case REG: + return r.encoding() == reg(); + case MEM_REG_DISP: + return r.encoding() == base(); + case MEM_SCALE: + return r.encoding() == base() || r.encoding() == index(); + default: + return false; + } + } +}; + +inline Imm32 Imm64::firstHalf() const { return low(); } + +inline Imm32 Imm64::secondHalf() const { return hi(); } + +class CPUInfo { + public: + // As the SSE's were introduced in order, the presence of a later SSE implies + // the presence of an earlier SSE. For example, SSE4_2 support implies SSE2 + // support. + enum SSEVersion { + UnknownSSE = 0, + NoSSE = 1, + SSE = 2, + SSE2 = 3, + SSE3 = 4, + SSSE3 = 5, + SSE4_1 = 6, + SSE4_2 = 7 + }; + static const int AVX_PRESENT_BIT = 8; + + static SSEVersion GetSSEVersion() { + MOZ_ASSERT(FlagsHaveBeenComputed()); + MOZ_ASSERT_IF(maxEnabledSSEVersion != UnknownSSE, + maxSSEVersion <= maxEnabledSSEVersion); + return maxSSEVersion; + } + + static bool IsAVXPresent() { + MOZ_ASSERT(FlagsHaveBeenComputed()); + MOZ_ASSERT_IF(!avxEnabled, !avxPresent); + return avxPresent; + } + + static inline uint32_t GetFingerprint() { + return GetSSEVersion() | (IsAVXPresent() ? AVX_PRESENT_BIT : 0); + } + + private: + static SSEVersion maxSSEVersion; + static SSEVersion maxEnabledSSEVersion; + static bool avxPresent; + static bool avxEnabled; + static bool popcntPresent; + static bool bmi1Present; + static bool bmi2Present; + static bool lzcntPresent; + static bool fmaPresent; + static bool avx2Present; + + static void SetMaxEnabledSSEVersion(SSEVersion v) { + if (maxEnabledSSEVersion == UnknownSSE) { + maxEnabledSSEVersion = v; + } else { + maxEnabledSSEVersion = std::min(v, maxEnabledSSEVersion); + } + } + + public: + static bool IsSSE2Present() { +#ifdef JS_CODEGEN_X64 + return true; +#else + return GetSSEVersion() >= SSE2; +#endif + } + static bool IsSSE3Present() { return GetSSEVersion() >= SSE3; } + static bool IsSSSE3Present() { return GetSSEVersion() >= SSSE3; } + static bool IsSSE41Present() { return GetSSEVersion() >= SSE4_1; } + static bool IsSSE42Present() { return GetSSEVersion() >= SSE4_2; } + static bool IsPOPCNTPresent() { return popcntPresent; } + static bool IsBMI1Present() { return bmi1Present; } + static bool IsBMI2Present() { return bmi2Present; } + static bool IsLZCNTPresent() { return lzcntPresent; } + static bool IsFMAPresent() { return fmaPresent; } + static bool IsAVX2Present() { return avx2Present; } + + static bool FlagsHaveBeenComputed() { return maxSSEVersion != UnknownSSE; } + + static void ComputeFlags(); + + // The following should be called only before JS_Init (where the flags are + // computed). If several are called, the most restrictive setting is kept. + + static void SetSSE3Disabled() { + MOZ_ASSERT(!FlagsHaveBeenComputed()); + SetMaxEnabledSSEVersion(SSE2); + avxEnabled = false; + } + static void SetSSSE3Disabled() { + MOZ_ASSERT(!FlagsHaveBeenComputed()); + SetMaxEnabledSSEVersion(SSE3); + avxEnabled = false; + } + static void SetSSE41Disabled() { + MOZ_ASSERT(!FlagsHaveBeenComputed()); + SetMaxEnabledSSEVersion(SSSE3); + avxEnabled = false; + } + static void SetSSE42Disabled() { + MOZ_ASSERT(!FlagsHaveBeenComputed()); + SetMaxEnabledSSEVersion(SSE4_1); + avxEnabled = false; + } + static void SetAVXDisabled() { + MOZ_ASSERT(!FlagsHaveBeenComputed()); + avxEnabled = false; + } + static void SetAVXEnabled() { + MOZ_ASSERT(!FlagsHaveBeenComputed()); + avxEnabled = true; + } +}; + +class AssemblerX86Shared : public AssemblerShared { + protected: + struct RelativePatch { + int32_t offset; + void* target; + RelocationKind kind; + + RelativePatch(int32_t offset, void* target, RelocationKind kind) + : offset(offset), target(target), kind(kind) {} + }; + + CompactBufferWriter jumpRelocations_; + CompactBufferWriter dataRelocations_; + + void writeDataRelocation(ImmGCPtr ptr) { + // Raw GC pointer relocations and Value relocations both end up in + // Assembler::TraceDataRelocations. + if (ptr.value) { + if (gc::IsInsideNursery(ptr.value)) { + embedsNurseryPointers_ = true; + } + dataRelocations_.writeUnsigned(masm.currentOffset()); + } + } + + protected: + X86Encoding::BaseAssemblerSpecific masm; + + using JmpSrc = X86Encoding::JmpSrc; + using JmpDst = X86Encoding::JmpDst; + + public: + AssemblerX86Shared() { + if (!HasAVX()) { + masm.disableVEX(); + } + } + + enum Condition { + Equal = X86Encoding::ConditionE, + NotEqual = X86Encoding::ConditionNE, + Above = X86Encoding::ConditionA, + AboveOrEqual = X86Encoding::ConditionAE, + Below = X86Encoding::ConditionB, + BelowOrEqual = X86Encoding::ConditionBE, + GreaterThan = X86Encoding::ConditionG, + GreaterThanOrEqual = X86Encoding::ConditionGE, + LessThan = X86Encoding::ConditionL, + LessThanOrEqual = X86Encoding::ConditionLE, + Overflow = X86Encoding::ConditionO, + NoOverflow = X86Encoding::ConditionNO, + CarrySet = X86Encoding::ConditionC, + CarryClear = X86Encoding::ConditionNC, + Signed = X86Encoding::ConditionS, + NotSigned = X86Encoding::ConditionNS, + Zero = X86Encoding::ConditionE, + NonZero = X86Encoding::ConditionNE, + Parity = X86Encoding::ConditionP, + NoParity = X86Encoding::ConditionNP + }; + + enum class SSERoundingMode { + Nearest = int(X86Encoding::SSERoundingMode::RoundToNearest), + Floor = int(X86Encoding::SSERoundingMode::RoundDown), + Ceil = int(X86Encoding::SSERoundingMode::RoundUp), + Trunc = int(X86Encoding::SSERoundingMode::RoundToZero) + }; + + // If this bit is set, the vucomisd operands have to be inverted. + static const int DoubleConditionBitInvert = 0x10; + + // Bit set when a DoubleCondition does not map to a single x86 condition. + // The macro assembler has to special-case these conditions. + static const int DoubleConditionBitSpecial = 0x20; + static const int DoubleConditionBits = + DoubleConditionBitInvert | DoubleConditionBitSpecial; + + enum DoubleCondition { + // These conditions will only evaluate to true if the comparison is ordered + // - i.e. neither operand is NaN. + DoubleOrdered = NoParity, + DoubleEqual = Equal | DoubleConditionBitSpecial, + DoubleNotEqual = NotEqual, + DoubleGreaterThan = Above, + DoubleGreaterThanOrEqual = AboveOrEqual, + DoubleLessThan = Above | DoubleConditionBitInvert, + DoubleLessThanOrEqual = AboveOrEqual | DoubleConditionBitInvert, + // If either operand is NaN, these conditions always evaluate to true. + DoubleUnordered = Parity, + DoubleEqualOrUnordered = Equal, + DoubleNotEqualOrUnordered = NotEqual | DoubleConditionBitSpecial, + DoubleGreaterThanOrUnordered = Below | DoubleConditionBitInvert, + DoubleGreaterThanOrEqualOrUnordered = + BelowOrEqual | DoubleConditionBitInvert, + DoubleLessThanOrUnordered = Below, + DoubleLessThanOrEqualOrUnordered = BelowOrEqual + }; + + enum NaNCond { NaN_HandledByCond, NaN_IsTrue, NaN_IsFalse }; + + // If the primary condition returned by ConditionFromDoubleCondition doesn't + // handle NaNs properly, return NaN_IsFalse if the comparison should be + // overridden to return false on NaN, NaN_IsTrue if it should be overridden + // to return true on NaN, or NaN_HandledByCond if no secondary check is + // needed. + static inline NaNCond NaNCondFromDoubleCondition(DoubleCondition cond) { + switch (cond) { + case DoubleOrdered: + case DoubleNotEqual: + case DoubleGreaterThan: + case DoubleGreaterThanOrEqual: + case DoubleLessThan: + case DoubleLessThanOrEqual: + case DoubleUnordered: + case DoubleEqualOrUnordered: + case DoubleGreaterThanOrUnordered: + case DoubleGreaterThanOrEqualOrUnordered: + case DoubleLessThanOrUnordered: + case DoubleLessThanOrEqualOrUnordered: + return NaN_HandledByCond; + case DoubleEqual: + return NaN_IsFalse; + case DoubleNotEqualOrUnordered: + return NaN_IsTrue; + } + + MOZ_CRASH("Unknown double condition"); + } + + static void StaticAsserts() { + // DoubleConditionBits should not interfere with x86 condition codes. + static_assert(!((Equal | NotEqual | Above | AboveOrEqual | Below | + BelowOrEqual | Parity | NoParity) & + DoubleConditionBits)); + } + + static Condition InvertCondition(Condition cond); + static Condition UnsignedCondition(Condition cond); + static Condition ConditionWithoutEqual(Condition cond); + + static DoubleCondition InvertCondition(DoubleCondition cond); + + // Return the primary condition to test. Some primary conditions may not + // handle NaNs properly and may therefore require a secondary condition. + // Use NaNCondFromDoubleCondition to determine what else is needed. + static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) { + return static_cast<Condition>(cond & ~DoubleConditionBits); + } + + static void TraceDataRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader); + + void setUnlimitedBuffer() { + // No-op on this platform + } + bool oom() const { + return AssemblerShared::oom() || masm.oom() || jumpRelocations_.oom() || + dataRelocations_.oom(); + } + bool reserve(size_t size) { return masm.reserve(size); } + bool swapBuffer(wasm::Bytes& other) { return masm.swapBuffer(other); } + + void setPrinter(Sprinter* sp) { masm.setPrinter(sp); } + + Register getStackPointer() const { return StackPointer; } + + void executableCopy(void* buffer); + void processCodeLabels(uint8_t* rawCode); + void copyJumpRelocationTable(uint8_t* dest); + void copyDataRelocationTable(uint8_t* dest); + + // Size of the instruction stream, in bytes. + size_t size() const { return masm.size(); } + // Size of the jump relocation table, in bytes. + size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); } + size_t dataRelocationTableBytes() const { return dataRelocations_.length(); } + // Size of the data table, in bytes. + size_t bytesNeeded() const { + return size() + jumpRelocationTableBytes() + dataRelocationTableBytes(); + } + + public: + void haltingAlign(int alignment) { + MOZ_ASSERT(hasCreator()); + masm.haltingAlign(alignment); + } + void nopAlign(int alignment) { + MOZ_ASSERT(hasCreator()); + masm.nopAlign(alignment); + } + void writeCodePointer(CodeLabel* label) { + MOZ_ASSERT(hasCreator()); + // Use -1 as dummy value. This will be patched after codegen. + masm.jumpTablePointer(-1); + label->patchAt()->bind(masm.size()); + } + void cmovCCl(Condition cond, const Operand& src, Register dest) { + X86Encoding::Condition cc = static_cast<X86Encoding::Condition>(cond); + switch (src.kind()) { + case Operand::REG: + masm.cmovCCl_rr(cc, src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.cmovCCl_mr(cc, src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.cmovCCl_mr(cc, src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmovCCl(Condition cond, Register src, Register dest) { + X86Encoding::Condition cc = static_cast<X86Encoding::Condition>(cond); + masm.cmovCCl_rr(cc, src.encoding(), dest.encoding()); + } + void cmovzl(const Operand& src, Register dest) { + cmovCCl(Condition::Zero, src, dest); + } + void cmovnzl(const Operand& src, Register dest) { + cmovCCl(Condition::NonZero, src, dest); + } + void movl(Imm32 imm32, Register dest) { + MOZ_ASSERT(hasCreator()); + masm.movl_i32r(imm32.value, dest.encoding()); + } + void movl(Register src, Register dest) { + MOZ_ASSERT(hasCreator()); + masm.movl_rr(src.encoding(), dest.encoding()); + } + void movl(const Operand& src, Register dest) { + MOZ_ASSERT(hasCreator()); + switch (src.kind()) { + case Operand::REG: + masm.movl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.movl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movl_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.movl_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movl(Register src, const Operand& dest) { + MOZ_ASSERT(hasCreator()); + switch (dest.kind()) { + case Operand::REG: + masm.movl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.movl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.movl_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movl(Imm32 imm32, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.movl_i32r(imm32.value, dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.movl_i32m(imm32.value, dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movl_i32m(imm32.value, dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.movl_i32m(imm32.value, dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void xchgl(Register src, Register dest) { + masm.xchgl_rr(src.encoding(), dest.encoding()); + } + + void vmovapd(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovapd_rr(src.encoding(), dest.encoding()); + } + // Eventually vmovapd should be overloaded to support loads and + // stores too. + void vmovapd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vmovapd_rr(src.fpu(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vmovaps(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovaps_rr(src.encoding(), dest.encoding()); + } + void vmovaps(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovaps_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovaps_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + case Operand::FPREG: + masm.vmovaps_rr(src.fpu(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovaps(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovaps_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovaps_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovups(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovups_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovups_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovups(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovups_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovups_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vmovsd(const Address& src, FloatRegister dest) { + masm.vmovsd_mr(src.offset, src.base.encoding(), dest.encoding()); + } + void vmovsd(const BaseIndex& src, FloatRegister dest) { + masm.vmovsd_mr(src.offset, src.base.encoding(), src.index.encoding(), + src.scale, dest.encoding()); + } + void vmovsd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(hasCreator()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + vmovsd(src.toAddress(), dest); + break; + case Operand::MEM_SCALE: + vmovsd(src.toBaseIndex(), dest); + break; + default: + MOZ_CRASH("Unknown operand for vmovsd"); + } + } + void vmovsd(FloatRegister src, const Address& dest) { + masm.vmovsd_rm(src.encoding(), dest.offset, dest.base.encoding()); + } + void vmovsd(FloatRegister src, const BaseIndex& dest) { + masm.vmovsd_rm(src.encoding(), dest.offset, dest.base.encoding(), + dest.index.encoding(), dest.scale); + } + // Note special semantics of this - does not clobber high bits of destination. + void vmovsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + masm.vmovsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmovss(const Address& src, FloatRegister dest) { + masm.vmovss_mr(src.offset, src.base.encoding(), dest.encoding()); + } + void vmovss(const BaseIndex& src, FloatRegister dest) { + masm.vmovss_mr(src.offset, src.base.encoding(), src.index.encoding(), + src.scale, dest.encoding()); + } + void vmovss(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(hasCreator()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + vmovss(src.toAddress(), dest); + break; + case Operand::MEM_SCALE: + vmovss(src.toBaseIndex(), dest); + break; + default: + MOZ_CRASH("Unknown operand for vmovss"); + } + } + void vmovss(FloatRegister src, const Address& dest) { + masm.vmovss_rm(src.encoding(), dest.offset, dest.base.encoding()); + } + void vmovss(FloatRegister src, const BaseIndex& dest) { + masm.vmovss_rm(src.encoding(), dest.offset, dest.base.encoding(), + dest.index.encoding(), dest.scale); + } + void vmovss(FloatRegister src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + vmovss(src, dest.toAddress()); + break; + case Operand::MEM_SCALE: + vmovss(src, dest.toBaseIndex()); + break; + default: + MOZ_CRASH("Unknown operand for vmovss"); + } + } + // Note special semantics of this - does not clobber high bits of destination. + void vmovss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + masm.vmovss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmovdqu(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(hasCreator()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovdqu_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovdqu_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovdqu(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(hasCreator()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovdqu_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovdqu_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovdqa(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vmovdqa_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmovdqa_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovdqa_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovdqa(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovdqa_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovdqa_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovdqa(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovdqa_rr(src.encoding(), dest.encoding()); + } + void vcvtss2sd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtss2sd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vcvtsd2ss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtsd2ss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void movzbl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movzbl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movzbl_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movsbl(Register src, Register dest) { + masm.movsbl_rr(src.encoding(), dest.encoding()); + } + void movsbl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movsbl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movsbl_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movb(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movb_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movb_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movb(Imm32 src, Register dest) { + masm.movb_ir(src.value & 255, dest.encoding()); + } + void movb(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movb_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movb_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movb(Imm32 src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movb_im(src.value, dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movb_im(src.value, dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movzwl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.movzwl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.movzwl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movzwl_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movzwl(Register src, Register dest) { + masm.movzwl_rr(src.encoding(), dest.encoding()); + } + void movw(const Operand& src, Register dest) { + masm.prefix_16_for_32(); + movl(src, dest); + } + void movw(Imm32 src, Register dest) { + masm.prefix_16_for_32(); + movl(src, dest); + } + void movw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movw(Imm32 src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movw_im(src.value, dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.movw_im(src.value, dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movswl(Register src, Register dest) { + masm.movswl_rr(src.encoding(), dest.encoding()); + } + void movswl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movswl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.movswl_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void leal(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.leal_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.leal_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + protected: + void jSrc(Condition cond, Label* label) { + if (label->bound()) { + // The jump can be immediately encoded to the correct destination. + masm.jCC_i(static_cast<X86Encoding::Condition>(cond), + JmpDst(label->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + JmpSrc j = masm.jCC(static_cast<X86Encoding::Condition>(cond)); + JmpSrc prev; + if (label->used()) { + prev = JmpSrc(label->offset()); + } + label->use(j.offset()); + masm.setNextJump(j, prev); + } + } + void jmpSrc(Label* label) { + if (label->bound()) { + // The jump can be immediately encoded to the correct destination. + masm.jmp_i(JmpDst(label->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + JmpSrc j = masm.jmp(); + JmpSrc prev; + if (label->used()) { + prev = JmpSrc(label->offset()); + } + label->use(j.offset()); + masm.setNextJump(j, prev); + } + } + + // Comparison of EAX against the address given by a Label. + JmpSrc cmpSrc(Label* label) { + JmpSrc j = masm.cmp_eax(); + if (label->bound()) { + // The jump can be immediately patched to the correct destination. + masm.linkJump(j, JmpDst(label->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + JmpSrc prev; + if (label->used()) { + prev = JmpSrc(label->offset()); + } + label->use(j.offset()); + masm.setNextJump(j, prev); + } + return j; + } + + public: + void nop() { + MOZ_ASSERT(hasCreator()); + masm.nop(); + } + void nop(size_t n) { + MOZ_ASSERT(hasCreator()); + masm.insert_nop(n); + } + void j(Condition cond, Label* label) { + MOZ_ASSERT(hasCreator()); + jSrc(cond, label); + } + void jmp(Label* label) { + MOZ_ASSERT(hasCreator()); + jmpSrc(label); + } + + void jmp(const Operand& op) { + MOZ_ASSERT(hasCreator()); + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.jmp_m(op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.jmp_m(op.disp(), op.base(), op.index(), op.scale()); + break; + case Operand::REG: + masm.jmp_r(op.reg()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpEAX(Label* label) { cmpSrc(label); } + void bind(Label* label) { + JmpDst dst(masm.label()); + if (label->used()) { + bool more; + JmpSrc jmp(label->offset()); + do { + JmpSrc next; + more = masm.nextJump(jmp, &next); + masm.linkJump(jmp, dst); + jmp = next; + } while (more); + } + label->bind(dst.offset()); + } + void bind(CodeLabel* label) { label->target()->bind(currentOffset()); } + uint32_t currentOffset() { return masm.label().offset(); } + + // Re-routes pending jumps to a new label. + void retarget(Label* label, Label* target) { + if (!label->used()) { + return; + } + bool more; + JmpSrc jmp(label->offset()); + do { + JmpSrc next; + more = masm.nextJump(jmp, &next); + if (target->bound()) { + // The jump can be immediately patched to the correct destination. + masm.linkJump(jmp, JmpDst(target->offset())); + } else { + // Thread the jump list through the unpatched jump targets. + JmpSrc prev; + if (target->used()) { + prev = JmpSrc(target->offset()); + } + target->use(jmp.offset()); + masm.setNextJump(jmp, prev); + } + jmp = JmpSrc(next.offset()); + } while (more); + label->reset(); + } + + static void Bind(uint8_t* raw, const CodeLabel& label) { + if (label.patchAt().bound()) { + intptr_t offset = label.patchAt().offset(); + intptr_t target = label.target().offset(); + X86Encoding::SetPointer(raw + offset, raw + target); + } + } + + void ret() { + MOZ_ASSERT(hasCreator()); + masm.ret(); + } + void retn(Imm32 n) { + MOZ_ASSERT(hasCreator()); + // Remove the size of the return address which is included in the frame. + masm.ret_i(n.value - sizeof(void*)); + } + CodeOffset call(Label* label) { + JmpSrc j = masm.call(); + if (label->bound()) { + masm.linkJump(j, JmpDst(label->offset())); + } else { + JmpSrc prev; + if (label->used()) { + prev = JmpSrc(label->offset()); + } + label->use(j.offset()); + masm.setNextJump(j, prev); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset call(Register reg) { + masm.call_r(reg.encoding()); + return CodeOffset(masm.currentOffset()); + } + void call(const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.call_r(op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.call_m(op.disp(), op.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + CodeOffset callWithPatch() { return CodeOffset(masm.call().offset()); } + + void patchCall(uint32_t callerOffset, uint32_t calleeOffset) { + unsigned char* code = masm.data(); + X86Encoding::SetRel32(code + callerOffset, code + calleeOffset); + } + CodeOffset farJumpWithPatch() { return CodeOffset(masm.jmp().offset()); } + void patchFarJump(CodeOffset farJump, uint32_t targetOffset) { + unsigned char* code = masm.data(); + X86Encoding::SetRel32(code + farJump.offset(), code + targetOffset); + } + + // This is for patching during code generation, not after. + void patchAddl(CodeOffset offset, int32_t n) { + unsigned char* code = masm.data(); + X86Encoding::SetInt32(code + offset.offset(), n); + } + + static void patchFiveByteNopToCall(uint8_t* callsite, uint8_t* target) { + X86Encoding::BaseAssembler::patchFiveByteNopToCall(callsite, target); + } + static void patchCallToFiveByteNop(uint8_t* callsite) { + X86Encoding::BaseAssembler::patchCallToFiveByteNop(callsite); + } + + void breakpoint() { masm.int3(); } + CodeOffset ud2() { + MOZ_ASSERT(hasCreator()); + CodeOffset off(masm.currentOffset()); + masm.ud2(); + return off; + } + + static bool HasSSE2() { return CPUInfo::IsSSE2Present(); } + static bool HasSSE3() { return CPUInfo::IsSSE3Present(); } + static bool HasSSSE3() { return CPUInfo::IsSSSE3Present(); } + static bool HasSSE41() { return CPUInfo::IsSSE41Present(); } + static bool HasSSE42() { return CPUInfo::IsSSE42Present(); } + static bool HasPOPCNT() { return CPUInfo::IsPOPCNTPresent(); } + static bool HasBMI1() { return CPUInfo::IsBMI1Present(); } + static bool HasBMI2() { return CPUInfo::IsBMI2Present(); } + static bool HasLZCNT() { return CPUInfo::IsLZCNTPresent(); } + static bool SupportsFloatingPoint() { return CPUInfo::IsSSE2Present(); } + static bool SupportsUnalignedAccesses() { return true; } + static bool SupportsFastUnalignedFPAccesses() { return true; } + static bool SupportsWasmSimd() { return CPUInfo::IsSSE41Present(); } + static bool HasAVX() { return CPUInfo::IsAVXPresent(); } + static bool HasAVX2() { return CPUInfo::IsAVX2Present(); } + static bool HasFMA() { return CPUInfo::IsFMAPresent(); } + + static bool HasRoundInstruction(RoundingMode mode) { + switch (mode) { + case RoundingMode::Up: + case RoundingMode::Down: + case RoundingMode::NearestTiesToEven: + case RoundingMode::TowardsZero: + return CPUInfo::IsSSE41Present(); + } + MOZ_CRASH("unexpected mode"); + } + + void cmpl(Register rhs, Register lhs) { + masm.cmpl_rr(rhs.encoding(), lhs.encoding()); + } + void cmpl(const Operand& rhs, Register lhs) { + switch (rhs.kind()) { + case Operand::REG: + masm.cmpl_rr(rhs.reg(), lhs.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.cmpl_mr(rhs.disp(), rhs.base(), lhs.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpl_mr(rhs.address(), lhs.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpl(Register rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.cmpl_rr(rhs.encoding(), lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.cmpl_rm(rhs.encoding(), lhs.disp(), lhs.base()); + break; + case Operand::MEM_SCALE: + masm.cmpl_rm(rhs.encoding(), lhs.disp(), lhs.base(), lhs.index(), + lhs.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpl_rm(rhs.encoding(), lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpl(Imm32 rhs, Register lhs) { + masm.cmpl_ir(rhs.value, lhs.encoding()); + } + void cmpl(Imm32 rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.cmpl_ir(rhs.value, lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.cmpl_im(rhs.value, lhs.disp(), lhs.base()); + break; + case Operand::MEM_SCALE: + masm.cmpl_im(rhs.value, lhs.disp(), lhs.base(), lhs.index(), + lhs.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpl_im(rhs.value, lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpw(Register rhs, Register lhs) { + masm.cmpw_rr(rhs.encoding(), lhs.encoding()); + } + void cmpw(Imm32 rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.cmpw_ir(rhs.value, lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.cmpw_im(rhs.value, lhs.disp(), lhs.base()); + break; + case Operand::MEM_SCALE: + masm.cmpw_im(rhs.value, lhs.disp(), lhs.base(), lhs.index(), + lhs.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpw_im(rhs.value, lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpb(Register rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.cmpb_rr(rhs.encoding(), lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.cmpb_rm(rhs.encoding(), lhs.disp(), lhs.base()); + break; + case Operand::MEM_SCALE: + masm.cmpb_rm(rhs.encoding(), lhs.disp(), lhs.base(), lhs.index(), + lhs.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpb_rm(rhs.encoding(), lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpb(Imm32 rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.cmpb_ir(rhs.value, lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.cmpb_im(rhs.value, lhs.disp(), lhs.base()); + break; + case Operand::MEM_SCALE: + masm.cmpb_im(rhs.value, lhs.disp(), lhs.base(), lhs.index(), + lhs.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.cmpb_im(rhs.value, lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void setCC(Condition cond, Register r) { + masm.setCC_r(static_cast<X86Encoding::Condition>(cond), r.encoding()); + } + void testb(Register rhs, Register lhs) { + MOZ_ASSERT( + AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(rhs)); + MOZ_ASSERT( + AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(lhs)); + masm.testb_rr(rhs.encoding(), lhs.encoding()); + } + void testw(Register rhs, Register lhs) { + masm.testw_rr(lhs.encoding(), rhs.encoding()); + } + void testl(Register rhs, Register lhs) { + masm.testl_rr(lhs.encoding(), rhs.encoding()); + } + void testl(Imm32 rhs, Register lhs) { + masm.testl_ir(rhs.value, lhs.encoding()); + } + void testl(Imm32 rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.testl_ir(rhs.value, lhs.reg()); + break; + case Operand::MEM_REG_DISP: + masm.testl_i32m(rhs.value, lhs.disp(), lhs.base()); + break; + case Operand::MEM_ADDRESS32: + masm.testl_i32m(rhs.value, lhs.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void addl(Imm32 imm, Register dest) { + masm.addl_ir(imm.value, dest.encoding()); + } + CodeOffset addlWithPatch(Imm32 imm, Register dest) { + masm.addl_i32r(imm.value, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + void addl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.addl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.addl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_ADDRESS32: + masm.addl_im(imm.value, op.address()); + break; + case Operand::MEM_SCALE: + masm.addl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void addw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.addw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.addw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_ADDRESS32: + masm.addw_im(imm.value, op.address()); + break; + case Operand::MEM_SCALE: + masm.addw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subl(Imm32 imm, Register dest) { + masm.subl_ir(imm.value, dest.encoding()); + } + void subl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.subl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.subl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.subl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.subw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.subw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.subw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void addl(Register src, Register dest) { + masm.addl_rr(src.encoding(), dest.encoding()); + } + void addl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.addl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.addl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.addl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void addw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.addw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.addw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.addw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void sbbl(Register src, Register dest) { + masm.sbbl_rr(src.encoding(), dest.encoding()); + } + void subl(Register src, Register dest) { + masm.subl_rr(src.encoding(), dest.encoding()); + } + void subl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.subl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.subl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.subl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.subl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.subl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void subw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.subw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.subw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.subw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orl(Register reg, Register dest) { + masm.orl_rr(reg.encoding(), dest.encoding()); + } + void orl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.orl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.orl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.orl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.orw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.orw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.orw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orl(Imm32 imm, Register reg) { masm.orl_ir(imm.value, reg.encoding()); } + void orl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.orl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.orl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.orl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.orw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.orw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.orw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorl(Register src, Register dest) { + masm.xorl_rr(src.encoding(), dest.encoding()); + } + void xorl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.xorl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.xorl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.xorl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.xorw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.xorw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.xorw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorl(Imm32 imm, Register reg) { + masm.xorl_ir(imm.value, reg.encoding()); + } + void xorl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.xorl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.xorl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.xorl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.xorw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.xorw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.xorw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andl(Register src, Register dest) { + masm.andl_rr(src.encoding(), dest.encoding()); + } + void andl(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.andl_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.andl_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.andl_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andw(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.andw_rr(src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.andw_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.andw_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andl(Imm32 imm, Register dest) { + masm.andl_ir(imm.value, dest.encoding()); + } + void andl(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.andl_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.andl_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.andl_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andw(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::REG: + masm.andw_ir(imm.value, op.reg()); + break; + case Operand::MEM_REG_DISP: + masm.andw_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.andw_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void addl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.addl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.addl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void orl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.orl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.orl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xorl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.xorl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.xorl_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void andl(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.andl_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.andl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.andl_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void bsrl(const Register& src, const Register& dest) { + masm.bsrl_rr(src.encoding(), dest.encoding()); + } + void bsfl(const Register& src, const Register& dest) { + masm.bsfl_rr(src.encoding(), dest.encoding()); + } + void bswapl(Register reg) { masm.bswapl_r(reg.encoding()); } + void lzcntl(const Register& src, const Register& dest) { + masm.lzcntl_rr(src.encoding(), dest.encoding()); + } + void tzcntl(const Register& src, const Register& dest) { + masm.tzcntl_rr(src.encoding(), dest.encoding()); + } + void popcntl(const Register& src, const Register& dest) { + masm.popcntl_rr(src.encoding(), dest.encoding()); + } + void imull(Register multiplier) { + // Consumes eax as the other argument + // and clobbers edx, as result is in edx:eax + masm.imull_r(multiplier.encoding()); + } + void umull(Register multiplier) { masm.mull_r(multiplier.encoding()); } + void imull(Imm32 imm, Register dest) { + masm.imull_ir(imm.value, dest.encoding(), dest.encoding()); + } + void imull(Register src, Register dest) { + masm.imull_rr(src.encoding(), dest.encoding()); + } + void imull(Imm32 imm, Register src, Register dest) { + masm.imull_ir(imm.value, src.encoding(), dest.encoding()); + } + void imull(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::REG: + masm.imull_rr(src.reg(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.imull_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void negl(const Operand& src) { + switch (src.kind()) { + case Operand::REG: + masm.negl_r(src.reg()); + break; + case Operand::MEM_REG_DISP: + masm.negl_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void negl(Register reg) { masm.negl_r(reg.encoding()); } + void notl(const Operand& src) { + switch (src.kind()) { + case Operand::REG: + masm.notl_r(src.reg()); + break; + case Operand::MEM_REG_DISP: + masm.notl_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void notl(Register reg) { masm.notl_r(reg.encoding()); } + void shrl(const Imm32 imm, Register dest) { + masm.shrl_ir(imm.value, dest.encoding()); + } + void shll(const Imm32 imm, Register dest) { + masm.shll_ir(imm.value, dest.encoding()); + } + void sarl(const Imm32 imm, Register dest) { + masm.sarl_ir(imm.value, dest.encoding()); + } + void shrl_cl(Register dest) { masm.shrl_CLr(dest.encoding()); } + void shll_cl(Register dest) { masm.shll_CLr(dest.encoding()); } + void sarl_cl(Register dest) { masm.sarl_CLr(dest.encoding()); } + void shrdl_cl(Register src, Register dest) { + masm.shrdl_CLr(src.encoding(), dest.encoding()); + } + void shldl_cl(Register src, Register dest) { + masm.shldl_CLr(src.encoding(), dest.encoding()); + } + + void sarxl(Register src, Register shift, Register dest) { + MOZ_ASSERT(HasBMI2()); + masm.sarxl_rrr(src.encoding(), shift.encoding(), dest.encoding()); + } + void shlxl(Register src, Register shift, Register dest) { + MOZ_ASSERT(HasBMI2()); + masm.shlxl_rrr(src.encoding(), shift.encoding(), dest.encoding()); + } + void shrxl(Register src, Register shift, Register dest) { + MOZ_ASSERT(HasBMI2()); + masm.shrxl_rrr(src.encoding(), shift.encoding(), dest.encoding()); + } + + void roll(const Imm32 imm, Register dest) { + masm.roll_ir(imm.value, dest.encoding()); + } + void roll_cl(Register dest) { masm.roll_CLr(dest.encoding()); } + void rolw(const Imm32 imm, Register dest) { + masm.rolw_ir(imm.value, dest.encoding()); + } + void rorl(const Imm32 imm, Register dest) { + masm.rorl_ir(imm.value, dest.encoding()); + } + void rorl_cl(Register dest) { masm.rorl_CLr(dest.encoding()); } + + void incl(const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.incl_m32(op.disp(), op.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_incl(const Operand& op) { + masm.prefix_lock(); + incl(op); + } + + void decl(const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.decl_m32(op.disp(), op.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_decl(const Operand& op) { + masm.prefix_lock(); + decl(op); + } + + void addb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.addb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.addb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void addb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.addb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.addb_rm(src.encoding(), op.disp(), op.base(), op.index(), + op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void subb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.subb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.subb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void subb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.subb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.subb_rm(src.encoding(), op.disp(), op.base(), op.index(), + op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void andb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.andb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.andb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void andb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.andb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.andb_rm(src.encoding(), op.disp(), op.base(), op.index(), + op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void orb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.orb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.orb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void orb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.orb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.orb_rm(src.encoding(), op.disp(), op.base(), op.index(), + op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + void xorb(Imm32 imm, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.xorb_im(imm.value, op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.xorb_im(imm.value, op.disp(), op.base(), op.index(), op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + void xorb(Register src, const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + masm.xorb_rm(src.encoding(), op.disp(), op.base()); + break; + case Operand::MEM_SCALE: + masm.xorb_rm(src.encoding(), op.disp(), op.base(), op.index(), + op.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + break; + } + } + + template <typename T> + void lock_addb(T src, const Operand& op) { + masm.prefix_lock(); + addb(src, op); + } + template <typename T> + void lock_subb(T src, const Operand& op) { + masm.prefix_lock(); + subb(src, op); + } + template <typename T> + void lock_andb(T src, const Operand& op) { + masm.prefix_lock(); + andb(src, op); + } + template <typename T> + void lock_orb(T src, const Operand& op) { + masm.prefix_lock(); + orb(src, op); + } + template <typename T> + void lock_xorb(T src, const Operand& op) { + masm.prefix_lock(); + xorb(src, op); + } + + template <typename T> + void lock_addw(T src, const Operand& op) { + masm.prefix_lock(); + addw(src, op); + } + template <typename T> + void lock_subw(T src, const Operand& op) { + masm.prefix_lock(); + subw(src, op); + } + template <typename T> + void lock_andw(T src, const Operand& op) { + masm.prefix_lock(); + andw(src, op); + } + template <typename T> + void lock_orw(T src, const Operand& op) { + masm.prefix_lock(); + orw(src, op); + } + template <typename T> + void lock_xorw(T src, const Operand& op) { + masm.prefix_lock(); + xorw(src, op); + } + + // Note, lock_addl(imm, op) is used for a memory barrier on non-SSE2 systems, + // among other things. Do not optimize, replace by XADDL, or similar. + template <typename T> + void lock_addl(T src, const Operand& op) { + masm.prefix_lock(); + addl(src, op); + } + template <typename T> + void lock_subl(T src, const Operand& op) { + masm.prefix_lock(); + subl(src, op); + } + template <typename T> + void lock_andl(T src, const Operand& op) { + masm.prefix_lock(); + andl(src, op); + } + template <typename T> + void lock_orl(T src, const Operand& op) { + masm.prefix_lock(); + orl(src, op); + } + template <typename T> + void lock_xorl(T src, const Operand& op) { + masm.prefix_lock(); + xorl(src, op); + } + + void lock_cmpxchgb(Register src, const Operand& mem) { + masm.prefix_lock(); + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.cmpxchgb(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.cmpxchgb(src.encoding(), mem.disp(), mem.base(), mem.index(), + mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_cmpxchgw(Register src, const Operand& mem) { + masm.prefix_lock(); + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.cmpxchgw(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.cmpxchgw(src.encoding(), mem.disp(), mem.base(), mem.index(), + mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_cmpxchgl(Register src, const Operand& mem) { + masm.prefix_lock(); + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.cmpxchgl(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.cmpxchgl(src.encoding(), mem.disp(), mem.base(), mem.index(), + mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_cmpxchg8b(Register srcHi, Register srcLo, Register newHi, + Register newLo, const Operand& mem) { + masm.prefix_lock(); + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.cmpxchg8b(srcHi.encoding(), srcLo.encoding(), newHi.encoding(), + newLo.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.cmpxchg8b(srcHi.encoding(), srcLo.encoding(), newHi.encoding(), + newLo.encoding(), mem.disp(), mem.base(), mem.index(), + mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void xchgb(Register src, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.xchgb_rm(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.xchgb_rm(src.encoding(), mem.disp(), mem.base(), mem.index(), + mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xchgw(Register src, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.xchgw_rm(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.xchgw_rm(src.encoding(), mem.disp(), mem.base(), mem.index(), + mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void xchgl(Register src, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.xchgl_rm(src.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.xchgl_rm(src.encoding(), mem.disp(), mem.base(), mem.index(), + mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void lock_xaddb(Register srcdest, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.lock_xaddb_rm(srcdest.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.lock_xaddb_rm(srcdest.encoding(), mem.disp(), mem.base(), + mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void lock_xaddw(Register srcdest, const Operand& mem) { + masm.prefix_16_for_32(); + lock_xaddl(srcdest, mem); + } + void lock_xaddl(Register srcdest, const Operand& mem) { + switch (mem.kind()) { + case Operand::MEM_REG_DISP: + masm.lock_xaddl_rm(srcdest.encoding(), mem.disp(), mem.base()); + break; + case Operand::MEM_SCALE: + masm.lock_xaddl_rm(srcdest.encoding(), mem.disp(), mem.base(), + mem.index(), mem.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void push(const Imm32 imm) { masm.push_i(imm.value); } + + void push(const Operand& src) { + MOZ_ASSERT(hasCreator()); + switch (src.kind()) { + case Operand::REG: + masm.push_r(src.reg()); + break; + case Operand::MEM_REG_DISP: + masm.push_m(src.disp(), src.base()); + break; + case Operand::MEM_SCALE: + masm.push_m(src.disp(), src.base(), src.index(), src.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void push(Register src) { + MOZ_ASSERT(hasCreator()); + masm.push_r(src.encoding()); + } + void push(const Address& src) { + masm.push_m(src.offset, src.base.encoding()); + } + + void pop(const Operand& src) { + MOZ_ASSERT(hasCreator()); + switch (src.kind()) { + case Operand::REG: + masm.pop_r(src.reg()); + break; + case Operand::MEM_REG_DISP: + masm.pop_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void pop(Register src) { + MOZ_ASSERT(hasCreator()); + masm.pop_r(src.encoding()); + } + void pop(const Address& src) { masm.pop_m(src.offset, src.base.encoding()); } + + void pushFlags() { masm.push_flags(); } + void popFlags() { masm.pop_flags(); } + +#ifdef JS_CODEGEN_X86 + void pushAllRegs() { masm.pusha(); } + void popAllRegs() { masm.popa(); } +#endif + + // Zero-extend byte to 32-bit integer. + void movzbl(Register src, Register dest) { + masm.movzbl_rr(src.encoding(), dest.encoding()); + } + + void cdq() { masm.cdq(); } + void idiv(Register divisor) { masm.idivl_r(divisor.encoding()); } + void udiv(Register divisor) { masm.divl_r(divisor.encoding()); } + + void vpblendw(uint32_t mask, FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpblendw_irr(mask, src1.encoding(), src0.encoding(), dest.encoding()); + } + + void vpblendvb(FloatRegister mask, FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpblendvb_rr(mask.encoding(), src1.encoding(), src0.encoding(), + dest.encoding()); + } + + void vpinsrb(unsigned lane, const Operand& src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::REG: + masm.vpinsrb_irr(lane, src1.reg(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpinsrb_imr(lane, src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vpinsrb_imr(lane, src1.disp(), src1.base(), src1.index(), + src1.scale(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpinsrw(unsigned lane, const Operand& src1, FloatRegister src0, + FloatRegister dest) { + switch (src1.kind()) { + case Operand::REG: + masm.vpinsrw_irr(lane, src1.reg(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpinsrw_imr(lane, src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vpinsrw_imr(lane, src1.disp(), src1.base(), src1.index(), + src1.scale(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpinsrd(unsigned lane, Register src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpinsrd_irr(lane, src1.encoding(), src0.encoding(), dest.encoding()); + } + + void vpextrb(unsigned lane, FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE41()); + switch (dest.kind()) { + case Operand::REG: + masm.vpextrb_irr(lane, src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.vpextrb_irm(lane, src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vpextrb_irm(lane, src.encoding(), dest.disp(), dest.base(), + dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpextrw(unsigned lane, FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE41()); + switch (dest.kind()) { + case Operand::REG: + masm.vpextrw_irr(lane, src.encoding(), dest.reg()); + break; + case Operand::MEM_REG_DISP: + masm.vpextrw_irm(lane, src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vpextrw_irm(lane, src.encoding(), dest.disp(), dest.base(), + dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpextrd(unsigned lane, FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpextrd_irr(lane, src.encoding(), dest.encoding()); + } + void vpsrldq(Imm32 shift, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrldq_ir(shift.value, src0.encoding(), dest.encoding()); + } + void vpslldq(Imm32 shift, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpslldq_ir(shift.value, src.encoding(), dest.encoding()); + } + void vpsllq(Imm32 shift, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsllq_ir(shift.value, src0.encoding(), dest.encoding()); + } + void vpsllq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsllq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsrlq(Imm32 shift, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrlq_ir(shift.value, src0.encoding(), dest.encoding()); + } + void vpsrlq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrlq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpslld(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpslld_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpslld(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpslld_ir(count.value, src0.encoding(), dest.encoding()); + } + void vpsrad(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrad_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsrad(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrad_ir(count.value, src0.encoding(), dest.encoding()); + } + void vpsrld(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrld_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsrld(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrld_ir(count.value, src0.encoding(), dest.encoding()); + } + + void vpsllw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsllw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsllw(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsllw_ir(count.value, src0.encoding(), dest.encoding()); + } + void vpsraw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsraw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsraw(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsraw_ir(count.value, src0.encoding(), dest.encoding()); + } + void vpsrlw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrlw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpsrlw(Imm32 count, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpsrlw_ir(count.value, src0.encoding(), dest.encoding()); + } + + void vcvtsi2sd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::REG: + masm.vcvtsi2sd_rr(src1.reg(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vcvtsi2sd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vcvtsi2sd_mr(src1.disp(), src1.base(), src1.index(), src1.scale(), + src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vcvttsd2si(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvttsd2si_rr(src.encoding(), dest.encoding()); + } + void vcvttss2si(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvttss2si_rr(src.encoding(), dest.encoding()); + } + void vcvtsi2ss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::REG: + masm.vcvtsi2ss_rr(src1.reg(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vcvtsi2ss_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vcvtsi2ss_mr(src1.disp(), src1.base(), src1.index(), src1.scale(), + src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vcvtsi2ss(Register src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtsi2ss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vcvtsi2sd(Register src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtsi2sd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vcvttps2dq(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvttps2dq_rr(src.encoding(), dest.encoding()); + } + void vcvttpd2dq(FloatRegister src, FloatRegister dest) { + masm.vcvttpd2dq_rr(src.encoding(), dest.encoding()); + } + void vcvtdq2ps(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vcvtdq2ps_rr(src.encoding(), dest.encoding()); + } + void vcvtdq2pd(FloatRegister src, FloatRegister dest) { + masm.vcvtdq2pd_rr(src.encoding(), dest.encoding()); + } + void vcvtps2pd(FloatRegister src, FloatRegister dest) { + masm.vcvtps2pd_rr(src.encoding(), dest.encoding()); + } + void vcvtpd2ps(FloatRegister src, FloatRegister dest) { + masm.vcvtpd2ps_rr(src.encoding(), dest.encoding()); + } + void vmovmskpd(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovmskpd_rr(src.encoding(), dest.encoding()); + } + void vmovmskps(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovmskps_rr(src.encoding(), dest.encoding()); + } + void vpmovmskb(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpmovmskb_rr(src.encoding(), dest.encoding()); + } + void vptest(FloatRegister rhs, FloatRegister lhs) { + MOZ_ASSERT(HasSSE41()); + masm.vptest_rr(rhs.encoding(), lhs.encoding()); + } + void vucomisd(FloatRegister rhs, FloatRegister lhs) { + MOZ_ASSERT(HasSSE2()); + masm.vucomisd_rr(rhs.encoding(), lhs.encoding()); + } + void vucomiss(FloatRegister rhs, FloatRegister lhs) { + MOZ_ASSERT(HasSSE2()); + masm.vucomiss_rr(rhs.encoding(), lhs.encoding()); + } + + void vpcmpeqb(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpeqb_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpeqb_mr(rhs.disp(), rhs.base(), lhs.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpeqb_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpcmpgtb(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpgtb_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpgtb_mr(rhs.disp(), rhs.base(), lhs.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpgtb_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpcmpeqw(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpeqw_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpeqw_mr(rhs.disp(), rhs.base(), lhs.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpeqw_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpcmpgtw(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpgtw_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpgtw_mr(rhs.disp(), rhs.base(), lhs.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpgtw_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpcmpeqd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpeqd_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpeqd_mr(rhs.disp(), rhs.base(), lhs.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpeqd_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpcmpgtd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpgtd_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpgtd_mr(rhs.disp(), rhs.base(), lhs.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpgtd_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpcmpgtq(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE42()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpgtq_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpcmpeqq(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vpcmpeqq_rr(rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpcmpeqq_mr(rhs.disp(), rhs.base(), lhs.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpcmpeqq_mr(rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vcmpps(uint8_t order, Operand rhs, FloatRegister lhs, + FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (rhs.kind()) { + case Operand::FPREG: + masm.vcmpps_rr(order, rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vcmpps_mr(order, rhs.disp(), rhs.base(), lhs.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vcmpps_mr(order, rhs.address(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vcmpeqps(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_EQ, rhs, lhs, dest); + } + void vcmpltps(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_LT, rhs, lhs, dest); + } + void vcmpleps(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_LE, rhs, lhs, dest); + } + void vcmpunordps(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_UNORD, rhs, lhs, dest); + } + void vcmpneqps(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_NEQ, rhs, lhs, dest); + } + void vcmpordps(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmpps(X86Encoding::ConditionCmp_ORD, rhs, lhs, dest); + } + void vcmppd(uint8_t order, Operand rhs, FloatRegister lhs, + FloatRegister dest) { + switch (rhs.kind()) { + case Operand::FPREG: + masm.vcmppd_rr(order, rhs.fpu(), lhs.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("NYI"); + } + } + void vcmpeqpd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmppd(X86Encoding::ConditionCmp_EQ, rhs, lhs, dest); + } + void vcmpltpd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmppd(X86Encoding::ConditionCmp_LT, rhs, lhs, dest); + } + void vcmplepd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmppd(X86Encoding::ConditionCmp_LE, rhs, lhs, dest); + } + void vcmpneqpd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmppd(X86Encoding::ConditionCmp_NEQ, rhs, lhs, dest); + } + void vcmpordpd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmppd(X86Encoding::ConditionCmp_ORD, rhs, lhs, dest); + } + void vcmpunordpd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) { + vcmppd(X86Encoding::ConditionCmp_UNORD, rhs, lhs, dest); + } + void vrcpps(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vrcpps_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vrcpps_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vrcpps_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsqrtps(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vsqrtps_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vsqrtps_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vsqrtps_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vrsqrtps(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vrsqrtps_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vrsqrtps_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vrsqrtps_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsqrtpd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vsqrtpd_rr(src.fpu(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovd(Register src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovd_rr(src.encoding(), dest.encoding()); + } + void vmovd(FloatRegister src, Register dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovd_rr(src.encoding(), dest.encoding()); + } + void vmovd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovd_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovd_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovd(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovd_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovd_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovq_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovq(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovq_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovq_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovq_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovq(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovq_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovq_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmaddubsw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSSE3()); + masm.vpmaddubsw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpaddb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddb_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubb_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddsb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddsb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddsb_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddsb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddusb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddusb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddusb_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddusb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubsb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubsb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubsb_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubsb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubusb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubusb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubusb_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubusb_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddsw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddsw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddsw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddsw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddusw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddusw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddusw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddusw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubsw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubsw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubsw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubsw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubusw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubusw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubusw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubusw_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpaddd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpaddd_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpsubd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpsubd_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmuldq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vpmuldq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpmuludq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpmuludq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpmuludq(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmuludq_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmuludq_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmullw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmullw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmullw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmulhw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmulhw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmulhw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmulhuw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmulhuw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmulhuw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmulhrsw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmulhrsw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmulhrsw_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmulld(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmulld_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmulld_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpmulld_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmaddwd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmaddwd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpaddq(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpaddq_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpsubq(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpsubq_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vaddps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vaddps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vaddps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vaddps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsubps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vsubps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vsubps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vsubps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmulps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmulps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmulps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vdivps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vdivps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vdivps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vdivps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmaxps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmaxps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmaxps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmaxps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vminps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vminps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vminps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vminps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vminpd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vminpd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmaxpd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmaxpd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vaddpd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vaddpd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsubpd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vsubpd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulpd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmulpd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vdivpd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vdivpd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpavgb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpavgb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpavgw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpavgw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpminsb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpminsb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpminub(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpminub_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmaxsb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmaxsb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmaxub(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmaxub_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpminsw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpminsw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpminuw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpminuw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmaxsw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmaxsw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmaxuw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmaxuw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpminsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpminsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpminud(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpminud_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmaxsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmaxsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmaxud(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmaxud_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpacksswb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpacksswb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpackuswb(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpackuswb_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpackssdw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpackssdw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpackusdw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpackusdw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpabsb(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpabsb_rr(src.fpu(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpabsw(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpabsw_rr(src.fpu(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpabsd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpabsd_rr(src.fpu(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmovsxbw(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpmovsxbw_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmovsxbw_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vpmovsxbw_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmovzxbw(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpmovzxbw_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmovzxbw_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vpmovzxbw_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmovsxwd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpmovsxwd_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmovsxwd_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vpmovsxwd_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmovzxwd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpmovzxwd_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmovzxwd_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vpmovzxwd_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmovsxdq(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpmovsxdq_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmovsxdq_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vpmovsxdq_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmovzxdq(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src.kind()) { + case Operand::FPREG: + masm.vpmovzxdq_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpmovzxdq_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vpmovzxdq_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vphaddd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vphaddd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpalignr(const Operand& src1, FloatRegister src0, FloatRegister dest, + uint8_t shift) { + MOZ_ASSERT(HasSSE3()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpalignr_irr(shift, src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpunpcklbw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(src1.size() == 16); + MOZ_ASSERT(dest.size() == 16); + masm.vpunpcklbw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpunpckhbw(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(src1.size() == 16); + MOZ_ASSERT(dest.size() == 16); + masm.vpunpckhbw_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpunpcklbw(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpunpcklbw_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpunpckldq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(src1.size() == 16); + MOZ_ASSERT(dest.size() == 16); + masm.vpunpckldq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpunpckldq(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(dest.size() == 16); + switch (src1.kind()) { + case Operand::MEM_REG_DISP: + masm.vpunpckldq_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpunpckldq_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpunpcklqdq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(src1.size() == 16); + MOZ_ASSERT(dest.size() == 16); + masm.vpunpcklqdq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpunpcklqdq(const Operand& src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(dest.size() == 16); + switch (src1.kind()) { + case Operand::MEM_REG_DISP: + masm.vpunpcklqdq_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpunpcklqdq_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpunpckhdq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(src1.size() == 16); + MOZ_ASSERT(dest.size() == 16); + masm.vpunpckhdq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpunpckhqdq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(src1.size() == 16); + MOZ_ASSERT(dest.size() == 16); + masm.vpunpckhqdq_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpunpcklwd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(src1.size() == 16); + MOZ_ASSERT(dest.size() == 16); + masm.vpunpcklwd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpunpckhwd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + MOZ_ASSERT(src0.size() == 16); + MOZ_ASSERT(src1.size() == 16); + MOZ_ASSERT(dest.size() == 16); + masm.vpunpckhwd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + + void vandps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vandps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vandps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vandps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vandnps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + // Negates bits of dest and then applies AND + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vandnps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vandnps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vandnps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vorps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vorps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vorps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vorps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vxorps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vxorps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vxorps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vxorps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vandpd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vandpd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpand(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpand_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpand(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpand_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpand_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpand_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpor(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpor_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpor(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpor_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpor_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpor_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpxor(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpxor_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpxor(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpxor_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpxor_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpxor_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpandn(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpandn_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vpandn(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpandn_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpandn_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpandn_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpshufd(uint32_t mask, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpshufd_irr(mask, src.encoding(), dest.encoding()); + } + void vpshufd(uint32_t mask, const Operand& src1, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpshufd_irr(mask, src1.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vpshufd_imr(mask, src1.disp(), src1.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vpshufd_imr(mask, src1.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vpshuflw(uint32_t mask, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpshuflw_irr(mask, src.encoding(), dest.encoding()); + } + void vpshufhw(uint32_t mask, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vpshufhw_irr(mask, src.encoding(), dest.encoding()); + } + void vpshufb(FloatRegister mask, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSSE3()); + masm.vpshufb_rr(mask.encoding(), src.encoding(), dest.encoding()); + } + void vmovddup(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + switch (src.kind()) { + case Operand::FPREG: + masm.vmovddup_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmovddup_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovddup_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovhlps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovhlps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmovlhps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovlhps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vunpcklps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vunpcklps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vunpcklps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vunpcklps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vunpcklps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vunpcklps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vunpckhps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vunpckhps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vunpckhps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vunpckhps_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vunpckhps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vunpckhps_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vshufps(uint32_t mask, FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vshufps_irr(mask, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vshufps(uint32_t mask, const Operand& src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vshufps_irr(mask, src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vshufps_imr(mask, src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vshufps_imr(mask, src1.address(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vshufpd(uint32_t mask, FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vshufpd_irr(mask, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vaddsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vaddsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vaddss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vaddss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vaddsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vaddsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vaddsd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vaddsd_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vaddss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vaddss_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vaddss_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vaddss_mr(src1.address(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsubsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vsubsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vsubss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vsubss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vsubsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vsubsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vsubsd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vsubss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vsubss_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vsubss_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmulsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmulsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmulsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmulsd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmulss_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmulss_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmulss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmulss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vdivsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vdivsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vdivss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vdivss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vdivsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vdivsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vdivsd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vdivss(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vdivss_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vdivss_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vxorpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vxorpd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vxorps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vxorps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vorpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vorpd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vorps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vorps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vandpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vandpd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vandps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vandps_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vsqrtsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vsqrtsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vsqrtss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vsqrtss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vroundps(SSERoundingMode mode, const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src.kind()) { + case Operand::FPREG: + masm.vroundps_irr((X86Encoding::SSERoundingMode)mode, src.fpu(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vroundpd(SSERoundingMode mode, const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src.kind()) { + case Operand::FPREG: + masm.vroundpd_irr((X86Encoding::SSERoundingMode)mode, src.fpu(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + static X86Encoding::RoundingMode ToX86RoundingMode(RoundingMode mode) { + switch (mode) { + case RoundingMode::Up: + return X86Encoding::RoundUp; + case RoundingMode::Down: + return X86Encoding::RoundDown; + case RoundingMode::NearestTiesToEven: + return X86Encoding::RoundToNearest; + case RoundingMode::TowardsZero: + return X86Encoding::RoundToZero; + } + MOZ_CRASH("unexpected mode"); + } + void vroundsd(X86Encoding::RoundingMode mode, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vroundsd_irr(mode, src.encoding(), dest.encoding()); + } + void vroundss(X86Encoding::RoundingMode mode, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vroundss_irr(mode, src.encoding(), dest.encoding()); + } + + unsigned vinsertpsMask(unsigned sourceLane, unsigned destLane, + unsigned zeroMask = 0) { + // Note that the sourceLane bits are ignored in the case of a source + // memory operand, and the source is the given 32-bits memory location. + MOZ_ASSERT(zeroMask < 16); + unsigned ret = zeroMask; + ret |= destLane << 4; + ret |= sourceLane << 6; + MOZ_ASSERT(ret < 256); + return ret; + } + void vinsertps(uint32_t mask, FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vinsertps_irr(mask, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vinsertps(uint32_t mask, const Operand& src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vinsertps_irr(mask, src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vinsertps_imr(mask, src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vinsertps_imr(mask, src1.disp(), src1.base(), src1.index(), + src1.scale(), src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovlps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + switch (src1.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovlps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovlps_mr(src1.disp(), src1.base(), src1.index(), src1.scale(), + src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovlps(FloatRegister src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovlps_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovlps_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovhps(const Operand& src1, FloatRegister src0, FloatRegister dest) { + switch (src1.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovhps_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovhps_mr(src1.disp(), src1.base(), src1.index(), src1.scale(), + src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovhps(FloatRegister src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovhps_rm(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vmovhps_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vextractps(unsigned lane, FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE41()); + MOZ_ASSERT(lane < 4); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vextractps_rm(lane, src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_SCALE: + masm.vextractps_rm(lane, src.encoding(), dest.disp(), dest.base(), + dest.index(), dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + unsigned blendpsMask(bool x, bool y, bool z, bool w) { + return (x << 0) | (y << 1) | (z << 2) | (w << 3); + } + void vblendps(unsigned mask, FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vblendps_irr(mask, src1.encoding(), src0.encoding(), dest.encoding()); + } + void vblendps(unsigned mask, const Operand& src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vblendps_irr(mask, src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vblendps_imr(mask, src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vblendvps(FloatRegister mask, FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vblendvps_rr(mask.encoding(), src1.encoding(), src0.encoding(), + dest.encoding()); + } + void vblendvps(FloatRegister mask, const Operand& src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vblendvps_rr(mask.encoding(), src1.fpu(), src0.encoding(), + dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vblendvps_mr(mask.encoding(), src1.disp(), src1.base(), + src0.encoding(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vblendvpd(FloatRegister mask, FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vblendvpd_rr(mask.encoding(), src1.encoding(), src0.encoding(), + dest.encoding()); + } + void vmovsldup(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + masm.vmovsldup_rr(src.encoding(), dest.encoding()); + } + void vmovsldup(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + switch (src.kind()) { + case Operand::FPREG: + masm.vmovsldup_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmovsldup_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmovshdup(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + masm.vmovshdup_rr(src.encoding(), dest.encoding()); + } + void vmovshdup(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE3()); + switch (src.kind()) { + case Operand::FPREG: + masm.vmovshdup_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmovshdup_mr(src.disp(), src.base(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vminsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vminsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vminsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vminsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vminsd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vminss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vminss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmaxsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmaxsd_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vmaxsd(const Operand& src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vmaxsd_rr(src1.fpu(), src0.encoding(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vmaxsd_mr(src1.disp(), src1.base(), src0.encoding(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vmaxss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmaxss_rr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void fisttp(const Operand& dest) { + MOZ_ASSERT(HasSSE3()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fisttp_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fistp(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fistp_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fnstcw(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fnstcw_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fldcw(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fldcw_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fnstsw(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fnstsw_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fld(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fld_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fld32(const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.fld32_m(dest.disp(), dest.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fstp(const Operand& src) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.fstp_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void fstp32(const Operand& src) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.fstp32_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void vbroadcastb(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasAVX2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vbroadcastb_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vbroadcastb_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vbroadcastb_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vbroadcastw(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasAVX2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vbroadcastw_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vbroadcastw_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vbroadcastw_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vbroadcastd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasAVX2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vbroadcastd_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vbroadcastd_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vbroadcastd_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vbroadcastq(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasAVX2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vbroadcastq_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vbroadcastq_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vbroadcastq_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vbroadcastss(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasAVX2()); + switch (src.kind()) { + case Operand::FPREG: + masm.vbroadcastss_rr(src.fpu(), dest.encoding()); + break; + case Operand::MEM_REG_DISP: + masm.vbroadcastss_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vbroadcastss_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vfmadd231ps(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasFMA()); + masm.vfmadd231ps_rrr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vfnmadd231ps(FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasFMA()); + masm.vfnmadd231ps_rrr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vfmadd231pd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasFMA()); + masm.vfmadd231pd_rrr(src1.encoding(), src0.encoding(), dest.encoding()); + } + void vfnmadd231pd(FloatRegister src1, FloatRegister src0, + FloatRegister dest) { + MOZ_ASSERT(HasFMA()); + masm.vfnmadd231pd_rrr(src1.encoding(), src0.encoding(), dest.encoding()); + } + + void flushBuffer() {} + + // Patching. + + static size_t PatchWrite_NearCallSize() { return 5; } + static uintptr_t GetPointer(uint8_t* instPtr) { + uint8_t* ptr = instPtr - sizeof(uintptr_t); + return mozilla::LittleEndian::readUintptr(ptr); + } + // Write a relative call at the start location |dataLabel|. + // Note that this DOES NOT patch data that comes before |label|. + static void PatchWrite_NearCall(CodeLocationLabel startLabel, + CodeLocationLabel target) { + uint8_t* start = startLabel.raw(); + *start = 0xE8; // <CALL> rel32 + ptrdiff_t offset = target - startLabel - PatchWrite_NearCallSize(); + MOZ_ASSERT(int32_t(offset) == offset); + mozilla::LittleEndian::writeInt32(start + 1, offset); // CALL <rel32> + } + + static void PatchWrite_Imm32(CodeLocationLabel dataLabel, Imm32 toWrite) { + // dataLabel is a code location which targets the end of an instruction + // which has a 32 bits immediate. Thus writting a value requires shifting + // back to the address of the 32 bits immediate within the instruction. + uint8_t* ptr = dataLabel.raw(); + mozilla::LittleEndian::writeInt32(ptr - sizeof(int32_t), toWrite.value); + } + + static void PatchDataWithValueCheck(CodeLocationLabel data, + PatchedImmPtr newData, + PatchedImmPtr expectedData) { + // The pointer given is a pointer to *after* the data. + uint8_t* ptr = data.raw() - sizeof(uintptr_t); + MOZ_ASSERT(mozilla::LittleEndian::readUintptr(ptr) == + uintptr_t(expectedData.value)); + mozilla::LittleEndian::writeUintptr(ptr, uintptr_t(newData.value)); + } + static void PatchDataWithValueCheck(CodeLocationLabel data, ImmPtr newData, + ImmPtr expectedData) { + PatchDataWithValueCheck(data, PatchedImmPtr(newData.value), + PatchedImmPtr(expectedData.value)); + } + + static uint32_t NopSize() { return 1; } + static uint8_t* NextInstruction(uint8_t* cur, uint32_t* count) { + MOZ_CRASH("nextInstruction NYI on x86"); + } + + // Toggle a jmp or cmp emitted by toggledJump(). + static void ToggleToJmp(CodeLocationLabel inst) { + uint8_t* ptr = (uint8_t*)inst.raw(); + MOZ_ASSERT(*ptr == 0x3D); // <CMP> eax, imm32 + *ptr = 0xE9; // <JMP> rel32 + } + static void ToggleToCmp(CodeLocationLabel inst) { + uint8_t* ptr = (uint8_t*)inst.raw(); + MOZ_ASSERT(*ptr == 0xE9); // <JMP> rel32 + *ptr = 0x3D; // <CMP> eax, imm32 + } + static void ToggleCall(CodeLocationLabel inst, bool enabled) { + uint8_t* ptr = (uint8_t*)inst.raw(); + MOZ_ASSERT(*ptr == 0x3D || // <CMP> eax, imm32 + *ptr == 0xE8); // <CALL> rel32 + *ptr = enabled ? 0xE8 : 0x3D; + } + + MOZ_COLD void verifyHeapAccessDisassembly( + uint32_t begin, uint32_t end, const Disassembler::HeapAccess& heapAccess); +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Assembler_x86_shared_h */ diff --git a/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.cpp b/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.cpp new file mode 100644 index 0000000000..835d7755c9 --- /dev/null +++ b/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.cpp @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/AssemblerBuffer-x86-shared.h" + +#include "mozilla/Sprintf.h" + +using namespace js; +using namespace jit; + +bool AssemblerBuffer::swap(Vector<uint8_t, 0, SystemAllocPolicy>& bytes) { + // For now, specialize to the one use case. + MOZ_ASSERT(bytes.empty()); + + if (m_buffer.empty()) { + if (bytes.capacity() > m_buffer.capacity()) { + size_t newCapacity = bytes.capacity(); + uint8_t* newBuffer = bytes.extractRawBuffer(); + MOZ_ASSERT(newBuffer); + m_buffer.replaceRawBuffer((unsigned char*)newBuffer, 0, newCapacity); + } + return true; + } + + size_t newLength = m_buffer.length(); + size_t newCapacity = m_buffer.capacity(); + unsigned char* newBuffer = m_buffer.extractRawBuffer(); + + // NB: extractRawBuffer() only returns null if the Vector is using + // inline storage and thus a malloc would be needed. In this case, + // just make a simple copy. + if (!newBuffer) { + return bytes.append(m_buffer.begin(), m_buffer.end()); + } + + bytes.replaceRawBuffer((uint8_t*)newBuffer, newLength, newCapacity); + return true; +} + +#ifdef JS_JITSPEW +void js::jit::GenericAssembler::spew(const char* fmt, va_list va) { + // Buffer to hold the formatted string. Note that this may contain + // '%' characters, so do not pass it directly to printf functions. + char buf[200]; + + int i = VsprintfLiteral(buf, fmt, va); + if (i > -1) { + if (printer) { + printer->printf("%s\n", buf); + } + js::jit::JitSpew(js::jit::JitSpew_Codegen, "%s", buf); + } +} +#endif diff --git a/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.h b/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.h new file mode 100644 index 0000000000..e12efb600e --- /dev/null +++ b/js/src/jit/x86-shared/AssemblerBuffer-x86-shared.h @@ -0,0 +1,256 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef jit_x86_shared_AssemblerBuffer_x86_shared_h +#define jit_x86_shared_AssemblerBuffer_x86_shared_h + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/Likely.h" +#include "mozilla/Vector.h" + +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> + +#include "jit/JitContext.h" +#include "jit/JitSpewer.h" +#include "jit/ProcessExecutableMemory.h" +#include "js/AllocPolicy.h" +#include "js/Vector.h" + +// Spew formatting helpers. +#define PRETTYHEX(x) \ + (((x) < 0) ? "-" : ""), \ + ((unsigned)((x) ^ ((x) >> 31)) + ((unsigned)(x) >> 31)) + +#define MEM_o "%s0x%x" +#define MEM_os MEM_o "(,%s,%d)" +#define MEM_ob MEM_o "(%s)" +#define MEM_obs MEM_o "(%s,%s,%d)" + +#define MEM_o32 "%s0x%04x" +#define MEM_o32s MEM_o32 "(,%s,%d)" +#define MEM_o32b MEM_o32 "(%s)" +#define MEM_o32bs MEM_o32 "(%s,%s,%d)" +#define MEM_o32r ".Lfrom%d(%%rip)" + +#define ADDR_o(offset) PRETTYHEX(offset) +#define ADDR_os(offset, index, scale) \ + ADDR_o(offset), GPRegName((index)), (1 << (scale)) +#define ADDR_ob(offset, base) ADDR_o(offset), GPRegName((base)) +#define ADDR_obs(offset, base, index, scale) \ + ADDR_ob(offset, base), GPRegName((index)), (1 << (scale)) + +#define ADDR_o32(offset) ADDR_o(offset) +#define ADDR_o32s(offset, index, scale) ADDR_os(offset, index, scale) +#define ADDR_o32b(offset, base) ADDR_ob(offset, base) +#define ADDR_o32bs(offset, base, index, scale) \ + ADDR_obs(offset, base, index, scale) +#define ADDR_o32r(offset) (offset) + +namespace js { + +class JS_PUBLIC_API Sprinter; + +namespace jit { + +// AllocPolicy for AssemblerBuffer. OOMs when trying to allocate more than +// MaxCodeBytesPerProcess bytes. Use private inheritance to make sure we +// explicitly have to expose SystemAllocPolicy methods. +class AssemblerBufferAllocPolicy : private SystemAllocPolicy { + public: + using SystemAllocPolicy::checkSimulatedOOM; + using SystemAllocPolicy::free_; + using SystemAllocPolicy::reportAllocOverflow; + + template <typename T> + T* pod_realloc(T* p, size_t oldSize, size_t newSize) { + static_assert( + sizeof(T) == 1, + "AssemblerBufferAllocPolicy should only be used with byte vectors"); + MOZ_ASSERT(oldSize <= MaxCodeBytesPerProcess); + if (MOZ_UNLIKELY(newSize > MaxCodeBytesPerProcess)) { + return nullptr; + } + return SystemAllocPolicy::pod_realloc<T>(p, oldSize, newSize); + } + template <typename T> + T* pod_malloc(size_t numElems) { + static_assert( + sizeof(T) == 1, + "AssemblerBufferAllocPolicy should only be used with byte vectors"); + if (MOZ_UNLIKELY(numElems > MaxCodeBytesPerProcess)) { + return nullptr; + } + return SystemAllocPolicy::pod_malloc<T>(numElems); + } +}; + +class AssemblerBuffer { + template <size_t size, typename T> + MOZ_ALWAYS_INLINE void sizedAppendUnchecked(T value) { + m_buffer.infallibleAppend(reinterpret_cast<unsigned char*>(&value), size); + } + + template <size_t size, typename T> + MOZ_ALWAYS_INLINE void sizedAppend(T value) { + if (MOZ_UNLIKELY( + !m_buffer.append(reinterpret_cast<unsigned char*>(&value), size))) { + oomDetected(); + } + } + + public: + AssemblerBuffer() : m_oom(false) {} + + void ensureSpace(size_t space) { + // This should only be called with small |space| values to ensure + // we don't overflow below. + MOZ_ASSERT(space <= 16); + if (MOZ_UNLIKELY(!m_buffer.reserve(m_buffer.length() + space))) { + oomDetected(); + } + } + + bool isAligned(size_t alignment) const { + return !(m_buffer.length() & (alignment - 1)); + } + + MOZ_ALWAYS_INLINE void putByteUnchecked(int value) { + sizedAppendUnchecked<1>(value); + } + MOZ_ALWAYS_INLINE void putShortUnchecked(int value) { + sizedAppendUnchecked<2>(value); + } + MOZ_ALWAYS_INLINE void putIntUnchecked(int value) { + sizedAppendUnchecked<4>(value); + } + MOZ_ALWAYS_INLINE void putInt64Unchecked(int64_t value) { + sizedAppendUnchecked<8>(value); + } + + MOZ_ALWAYS_INLINE void putByte(int value) { sizedAppend<1>(value); } + MOZ_ALWAYS_INLINE void putShort(int value) { sizedAppend<2>(value); } + MOZ_ALWAYS_INLINE void putInt(int value) { sizedAppend<4>(value); } + MOZ_ALWAYS_INLINE void putInt64(int64_t value) { sizedAppend<8>(value); } + + [[nodiscard]] bool append(const unsigned char* values, size_t size) { + if (MOZ_UNLIKELY(!m_buffer.append(values, size))) { + oomDetected(); + return false; + } + return true; + } + + size_t size() const { return m_buffer.length(); } + + bool oom() const { return m_oom; } + + bool reserve(size_t size) { return !m_oom && m_buffer.reserve(size); } + + bool swap(Vector<uint8_t, 0, SystemAllocPolicy>& bytes); + + const unsigned char* buffer() const { + MOZ_RELEASE_ASSERT(!m_oom); + return m_buffer.begin(); + } + + unsigned char* data() { return m_buffer.begin(); } + + protected: + /* + * OOM handling: This class can OOM in the ensureSpace() method trying + * to allocate a new buffer. In response to an OOM, we need to avoid + * crashing and report the error. We also want to make it so that + * users of this class need to check for OOM only at certain points + * and not after every operation. + * + * Our strategy for handling an OOM is to set m_oom, and then clear (but + * not free) m_buffer, preserving the current buffer. This way, the user + * can continue assembling into the buffer, deferring OOM checking + * until the user wants to read code out of the buffer. + * + * See also the |buffer| method. + */ + void oomDetected() { + m_oom = true; + m_buffer.clear(); +#ifdef DEBUG + JitContext* context = MaybeGetJitContext(); + if (context) { + context->setOOM(); + } +#endif + } + + mozilla::Vector<unsigned char, 256, AssemblerBufferAllocPolicy> m_buffer; + bool m_oom; +}; + +class GenericAssembler { +#ifdef JS_JITSPEW + Sprinter* printer; +#endif + public: + GenericAssembler() +#ifdef JS_JITSPEW + : printer(nullptr) +#endif + { + } + + void setPrinter(Sprinter* sp) { +#ifdef JS_JITSPEW + printer = sp; +#endif + } + +#ifdef JS_JITSPEW + inline void spew(const char* fmt, ...) MOZ_FORMAT_PRINTF(2, 3) { + if (MOZ_UNLIKELY(printer || JitSpewEnabled(JitSpew_Codegen))) { + va_list va; + va_start(va, fmt); + spew(fmt, va); + va_end(va); + } + } +#else + MOZ_ALWAYS_INLINE void spew(const char* fmt, ...) MOZ_FORMAT_PRINTF(2, 3) {} +#endif + +#ifdef JS_JITSPEW + MOZ_COLD void spew(const char* fmt, va_list va) MOZ_FORMAT_PRINTF(2, 0); +#endif +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_AssemblerBuffer_x86_shared_h */ diff --git a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h new file mode 100644 index 0000000000..b7a5031757 --- /dev/null +++ b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h @@ -0,0 +1,6460 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * + * ***** BEGIN LICENSE BLOCK ***** + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef jit_x86_shared_BaseAssembler_x86_shared_h +#define jit_x86_shared_BaseAssembler_x86_shared_h + +#include "mozilla/IntegerPrintfMacros.h" + +#include "jit/x86-shared/AssemblerBuffer-x86-shared.h" +#include "jit/x86-shared/Encoding-x86-shared.h" +#include "jit/x86-shared/Patching-x86-shared.h" +#include "wasm/WasmTypeDecls.h" + +namespace js { +namespace jit { + +namespace X86Encoding { + +class BaseAssembler; + +class BaseAssembler : public GenericAssembler { + public: + BaseAssembler() : useVEX_(true) {} + + void disableVEX() { useVEX_ = false; } + + size_t size() const { return m_formatter.size(); } + const unsigned char* buffer() const { return m_formatter.buffer(); } + unsigned char* data() { return m_formatter.data(); } + bool oom() const { return m_formatter.oom(); } + bool reserve(size_t size) { return m_formatter.reserve(size); } + bool swapBuffer(wasm::Bytes& other) { return m_formatter.swapBuffer(other); } + + void nop() { + spew("nop"); + m_formatter.oneByteOp(OP_NOP); + } + + void comment(const char* msg) { spew("; %s", msg); } + + static void patchFiveByteNopToCall(uint8_t* callsite, uint8_t* target) { + // Note: the offset is relative to the address of the instruction after + // the call which is five bytes. + uint8_t* inst = callsite - sizeof(int32_t) - 1; + // The nop can be already patched as call, overriding the call. + // See also nop_five. + MOZ_ASSERT(inst[0] == OP_NOP_0F || inst[0] == OP_CALL_rel32); + MOZ_ASSERT_IF(inst[0] == OP_NOP_0F, + inst[1] == OP_NOP_1F || inst[2] == OP_NOP_44 || + inst[3] == OP_NOP_00 || inst[4] == OP_NOP_00); + inst[0] = OP_CALL_rel32; + SetRel32(callsite, target); + } + + static void patchCallToFiveByteNop(uint8_t* callsite) { + // See also patchFiveByteNopToCall and nop_five. + uint8_t* inst = callsite - sizeof(int32_t) - 1; + // The call can be already patched as nop. + if (inst[0] == OP_NOP_0F) { + MOZ_ASSERT(inst[1] == OP_NOP_1F || inst[2] == OP_NOP_44 || + inst[3] == OP_NOP_00 || inst[4] == OP_NOP_00); + return; + } + MOZ_ASSERT(inst[0] == OP_CALL_rel32); + inst[0] = OP_NOP_0F; + inst[1] = OP_NOP_1F; + inst[2] = OP_NOP_44; + inst[3] = OP_NOP_00; + inst[4] = OP_NOP_00; + } + + /* + * The nop multibytes sequences are directly taken from the Intel's + * architecture software developer manual. + * They are defined for sequences of sizes from 1 to 9 included. + */ + void nop_one() { m_formatter.oneByteOp(OP_NOP); } + + void nop_two() { + m_formatter.oneByteOp(OP_NOP_66); + m_formatter.oneByteOp(OP_NOP); + } + + void nop_three() { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_00); + } + + void nop_four() { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_40); + m_formatter.oneByteOp(OP_NOP_00); + } + + void nop_five() { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_44); + m_formatter.oneByteOp(OP_NOP_00); + m_formatter.oneByteOp(OP_NOP_00); + } + + void nop_six() { + m_formatter.oneByteOp(OP_NOP_66); + nop_five(); + } + + void nop_seven() { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_80); + for (int i = 0; i < 4; ++i) { + m_formatter.oneByteOp(OP_NOP_00); + } + } + + void nop_eight() { + m_formatter.oneByteOp(OP_NOP_0F); + m_formatter.oneByteOp(OP_NOP_1F); + m_formatter.oneByteOp(OP_NOP_84); + for (int i = 0; i < 5; ++i) { + m_formatter.oneByteOp(OP_NOP_00); + } + } + + void nop_nine() { + m_formatter.oneByteOp(OP_NOP_66); + nop_eight(); + } + + void insert_nop(int size) { + switch (size) { + case 1: + nop_one(); + break; + case 2: + nop_two(); + break; + case 3: + nop_three(); + break; + case 4: + nop_four(); + break; + case 5: + nop_five(); + break; + case 6: + nop_six(); + break; + case 7: + nop_seven(); + break; + case 8: + nop_eight(); + break; + case 9: + nop_nine(); + break; + case 10: + nop_three(); + nop_seven(); + break; + case 11: + nop_four(); + nop_seven(); + break; + case 12: + nop_six(); + nop_six(); + break; + case 13: + nop_six(); + nop_seven(); + break; + case 14: + nop_seven(); + nop_seven(); + break; + case 15: + nop_one(); + nop_seven(); + nop_seven(); + break; + default: + MOZ_CRASH("Unhandled alignment"); + } + } + + // Stack operations: + + void push_r(RegisterID reg) { + spew("push %s", GPRegName(reg)); + m_formatter.oneByteOp(OP_PUSH_EAX, reg); + } + + void pop_r(RegisterID reg) { + spew("pop %s", GPRegName(reg)); + m_formatter.oneByteOp(OP_POP_EAX, reg); + } + + void push_i(int32_t imm) { + spew("push $%s0x%x", PRETTYHEX(imm)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_PUSH_Ib); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_PUSH_Iz); + m_formatter.immediate32(imm); + } + } + + void push_i32(int32_t imm) { + spew("push $%s0x%04x", PRETTYHEX(imm)); + m_formatter.oneByteOp(OP_PUSH_Iz); + m_formatter.immediate32(imm); + } + + void push_m(int32_t offset, RegisterID base) { + spew("push " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_PUSH); + } + void push_m(int32_t offset, RegisterID base, RegisterID index, int scale) { + spew("push " MEM_obs, ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, index, scale, + GROUP5_OP_PUSH); + } + + void pop_m(int32_t offset, RegisterID base) { + spew("pop " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1A_Ev, offset, base, GROUP1A_OP_POP); + } + + void push_flags() { + spew("pushf"); + m_formatter.oneByteOp(OP_PUSHFLAGS); + } + + void pop_flags() { + spew("popf"); + m_formatter.oneByteOp(OP_POPFLAGS); + } + + // Arithmetic operations: + + void addl_rr(RegisterID src, RegisterID dst) { + spew("addl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_ADD_GvEv, src, dst); + } + + void addw_rr(RegisterID src, RegisterID dst) { + spew("addw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_ADD_GvEv, src, dst); + } + + void addl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("addl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_ADD_GvEv, offset, base, dst); + } + + void addl_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("addl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, src); + } + + void addl_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("addl %s, " MEM_obs, GPReg32Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, index, scale, src); + } + + void addl_ir(int32_t imm, RegisterID dst) { + spew("addl $%d, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_ADD_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD); + } + m_formatter.immediate32(imm); + } + } + + void addw_ir(int32_t imm, RegisterID dst) { + spew("addw $%d, %s", int16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD); + m_formatter.immediate16(imm); + } + + void addl_i32r(int32_t imm, RegisterID dst) { + // 32-bit immediate always, for patching. + spew("addl $0x%04x, %s", uint32_t(imm), GPReg32Name(dst)); + if (dst == rax) { + m_formatter.oneByteOp(OP_ADD_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD); + } + m_formatter.immediate32(imm); + } + + void addl_im(int32_t imm, int32_t offset, RegisterID base) { + spew("addl $%d, " MEM_ob, imm, ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_ADD); + m_formatter.immediate32(imm); + } + } + + void addl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("addl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_ADD); + m_formatter.immediate32(imm); + } + } + + void addl_im(int32_t imm, const void* addr) { + spew("addl $%d, %p", imm, addr); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_ADD); + m_formatter.immediate32(imm); + } + } + void addw_im(int32_t imm, const void* addr) { + spew("addw $%d, %p", int16_t(imm), addr); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_ADD); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_ADD); + m_formatter.immediate16(imm); + } + } + + void addw_im(int32_t imm, int32_t offset, RegisterID base) { + spew("addw $%d, " MEM_ob, int16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_ADD); + m_formatter.immediate16(imm); + } + + void addw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("addw $%d, " MEM_obs, int16_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_ADD); + m_formatter.immediate16(imm); + } + + void addw_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("addw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, src); + } + + void addw_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("addw %s, " MEM_obs, GPReg16Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, index, scale, src); + } + + void addb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("addb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_ADD); + m_formatter.immediate8(imm); + } + + void addb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("addb $%d, " MEM_obs, int8_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, + GROUP1_OP_ADD); + m_formatter.immediate8(imm); + } + + void addb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("addb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_ADD_EbGb, offset, base, src); + } + + void addb_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("addb %s, " MEM_obs, GPReg8Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_ADD_EbGb, offset, base, index, scale, src); + } + + void subb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("subb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_SUB); + m_formatter.immediate8(imm); + } + + void subb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("subb $%d, " MEM_obs, int8_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, + GROUP1_OP_SUB); + m_formatter.immediate8(imm); + } + + void subb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("subb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_SUB_EbGb, offset, base, src); + } + + void subb_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("subb %s, " MEM_obs, GPReg8Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_SUB_EbGb, offset, base, index, scale, src); + } + + void andb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("andb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_AND); + m_formatter.immediate8(imm); + } + + void andb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("andb $%d, " MEM_obs, int8_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, + GROUP1_OP_AND); + m_formatter.immediate8(imm); + } + + void andb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("andb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_AND_EbGb, offset, base, src); + } + + void andb_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("andb %s, " MEM_obs, GPReg8Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_AND_EbGb, offset, base, index, scale, src); + } + + void orb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("orb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_OR); + m_formatter.immediate8(imm); + } + + void orb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("orb $%d, " MEM_obs, int8_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, + GROUP1_OP_OR); + m_formatter.immediate8(imm); + } + + void orb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("orb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_OR_EbGb, offset, base, src); + } + + void orb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("orb %s, " MEM_obs, GPReg8Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_OR_EbGb, offset, base, index, scale, src); + } + + void xorb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("xorb $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_XOR); + m_formatter.immediate8(imm); + } + + void xorb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("xorb $%d, " MEM_obs, int8_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, + GROUP1_OP_XOR); + m_formatter.immediate8(imm); + } + + void xorb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("xorb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_XOR_EbGb, offset, base, src); + } + + void xorb_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("xorb %s, " MEM_obs, GPReg8Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_XOR_EbGb, offset, base, index, scale, src); + } + + void lock_xaddb_rm(RegisterID srcdest, int32_t offset, RegisterID base) { + spew("lock xaddb %s, " MEM_ob, GPReg8Name(srcdest), ADDR_ob(offset, base)); + m_formatter.oneByteOp(PRE_LOCK); + m_formatter.twoByteOp8(OP2_XADD_EbGb, offset, base, srcdest); + } + + void lock_xaddb_rm(RegisterID srcdest, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("lock xaddb %s, " MEM_obs, GPReg8Name(srcdest), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(PRE_LOCK); + m_formatter.twoByteOp8(OP2_XADD_EbGb, offset, base, index, scale, srcdest); + } + + void lock_xaddl_rm(RegisterID srcdest, int32_t offset, RegisterID base) { + spew("lock xaddl %s, " MEM_ob, GPReg32Name(srcdest), ADDR_ob(offset, base)); + m_formatter.oneByteOp(PRE_LOCK); + m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, srcdest); + } + + void lock_xaddl_rm(RegisterID srcdest, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("lock xaddl %s, " MEM_obs, GPReg32Name(srcdest), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(PRE_LOCK); + m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, index, scale, srcdest); + } + + void vpmaddubsw_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + threeByteOpSimd("vpmaddubsw", VEX_PD, OP3_PMADDUBSW_VdqWdq, ESCAPE_38, src1, + src0, dst); + } + void vpmaddubsw_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + threeByteOpSimd("vpmaddubsw", VEX_PD, OP3_PMADDUBSW_VdqWdq, ESCAPE_38, + address, src0, dst); + } + + void vpaddb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, src1, src0, dst); + } + void vpaddb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, offset, base, src0, dst); + } + void vpaddb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, address, src0, dst); + } + + void vpaddsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, src1, src0, dst); + } + void vpaddsb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, offset, base, src0, + dst); + } + void vpaddsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, address, src0, dst); + } + + void vpaddusb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, src1, src0, dst); + } + void vpaddusb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, offset, base, src0, + dst); + } + void vpaddusb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, address, src0, dst); + } + + void vpaddw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, src1, src0, dst); + } + void vpaddw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, offset, base, src0, dst); + } + void vpaddw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, address, src0, dst); + } + + void vpaddsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, src1, src0, dst); + } + void vpaddsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, offset, base, src0, + dst); + } + void vpaddsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, address, src0, dst); + } + + void vpaddusw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, src1, src0, dst); + } + void vpaddusw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, offset, base, src0, + dst); + } + void vpaddusw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, address, src0, dst); + } + + void vpaddd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, src1, src0, dst); + } + void vpaddd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, offset, base, src0, dst); + } + void vpaddd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, address, src0, dst); + } + + void vpaddq_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddq", VEX_PD, OP2_PADDQ_VdqWdq, address, src0, dst); + } + + void vpsubb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, src1, src0, dst); + } + void vpsubb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, offset, base, src0, dst); + } + void vpsubb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, address, src0, dst); + } + + void vpsubsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, src1, src0, dst); + } + void vpsubsb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, offset, base, src0, + dst); + } + void vpsubsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, address, src0, dst); + } + + void vpsubusb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, src1, src0, dst); + } + void vpsubusb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, offset, base, src0, + dst); + } + void vpsubusb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, address, src0, dst); + } + + void vpsubw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, src1, src0, dst); + } + void vpsubw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, offset, base, src0, dst); + } + void vpsubw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, address, src0, dst); + } + + void vpsubsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, src1, src0, dst); + } + void vpsubsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, offset, base, src0, + dst); + } + void vpsubsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, address, src0, dst); + } + + void vpsubusw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, src1, src0, dst); + } + void vpsubusw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, offset, base, src0, + dst); + } + void vpsubusw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, address, src0, dst); + } + + void vpsubd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, src1, src0, dst); + } + void vpsubd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, offset, base, src0, dst); + } + void vpsubd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, address, src0, dst); + } + + void vpsubq_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubq", VEX_PD, OP2_PSUBQ_VdqWdq, address, src0, dst); + } + + void vpmuldq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmuldq", VEX_PD, OP3_PMULDQ_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + + void vpmuludq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, src1, src0, dst); + } + void vpmuludq_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, offset, base, src0, + dst); + } + void vpmuludq_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, address, src0, dst); + } + + void vpmaddwd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmaddwd", VEX_PD, OP2_PMADDWD_VdqWdq, src1, src0, dst); + } + void vpmaddwd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmaddwd", VEX_PD, OP2_PMADDWD_VdqWdq, address, src0, dst); + } + + void vpmullw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmullw", VEX_PD, OP2_PMULLW_VdqWdq, src1, src0, dst); + } + void vpmulhw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmulhw", VEX_PD, OP2_PMULHW_VdqWdq, src1, src0, dst); + } + void vpmulhuw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmulhuw", VEX_PD, OP2_PMULHUW_VdqWdq, src1, src0, dst); + } + void vpmullw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpmullw", VEX_PD, OP2_PMULLW_VdqWdq, offset, base, src0, + dst); + } + void vpmulhw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpmulhw", VEX_PD, OP2_PMULHW_VdqWdq, offset, base, src0, + dst); + } + void vpmulhuw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpmulhuw", VEX_PD, OP2_PMULHUW_VdqWdq, offset, base, src0, + dst); + } + void vpmullw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmullw", VEX_PD, OP2_PMULLW_VdqWdq, address, src0, dst); + } + + void vpmulld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpmulld_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, offset, + base, src0, dst); + } + void vpmulld_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address, + src0, dst); + } + void vpmulhrsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, src1, + src0, dst); + } + void vpmulhrsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, offset, + base, src0, dst); + } + + void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst); + } + void vaddps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, offset, base, src0, dst); + } + void vaddps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, address, src0, dst); + } + + void vsubps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, src1, src0, dst); + } + void vsubps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, offset, base, src0, dst); + } + void vsubps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, address, src0, dst); + } + + void vmulps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, src1, src0, dst); + } + void vmulps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, offset, base, src0, dst); + } + void vmulps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, address, src0, dst); + } + + void vdivps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, src1, src0, dst); + } + void vdivps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, offset, base, src0, dst); + } + void vdivps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, address, src0, dst); + } + + void vmaxps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, src1, src0, dst); + } + void vmaxps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, offset, base, src0, dst); + } + void vmaxps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, address, src0, dst); + } + + void vmaxpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmaxpd", VEX_PD, OP2_MAXPD_VpdWpd, src1, src0, dst); + } + + void vminps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, src1, src0, dst); + } + void vminps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, offset, base, src0, dst); + } + void vminps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, address, src0, dst); + } + + void vminpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vminpd", VEX_PD, OP2_MINPD_VpdWpd, src1, src0, dst); + } + void vminpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vminpd", VEX_PD, OP2_MINPD_VpdWpd, address, src0, dst); + } + + void vaddpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vaddpd", VEX_PD, OP2_ADDPD_VpdWpd, src1, src0, dst); + } + void vaddpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vaddpd", VEX_PD, OP2_ADDPD_VpdWpd, address, src0, dst); + } + + void vsubpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsubpd", VEX_PD, OP2_SUBPD_VpdWpd, src1, src0, dst); + } + void vsubpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsubpd", VEX_PD, OP2_SUBPD_VpdWpd, address, src0, dst); + } + + void vmulpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmulpd", VEX_PD, OP2_MULPD_VpdWpd, src1, src0, dst); + } + void vmulpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmulpd", VEX_PD, OP2_MULPD_VpdWpd, address, src0, dst); + } + + void vdivpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vdivpd", VEX_PD, OP2_DIVPD_VpdWpd, src1, src0, dst); + } + void vdivpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vdivpd", VEX_PD, OP2_DIVPD_VpdWpd, address, src0, dst); + } + + void andl_rr(RegisterID src, RegisterID dst) { + spew("andl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_AND_GvEv, src, dst); + } + + void andw_rr(RegisterID src, RegisterID dst) { + spew("andw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_AND_GvEv, src, dst); + } + + void andl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("andl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_AND_GvEv, offset, base, dst); + } + + void andl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("andl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.oneByteOp(OP_AND_GvEv, offset, base, index, scale, dst); + } + + void andl_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("andl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_AND_EvGv, offset, base, src); + } + + void andw_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("andw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_AND_EvGv, offset, base, src); + } + + void andl_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("andl %s, " MEM_obs, GPReg32Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_AND_EvGv, offset, base, index, scale, src); + } + + void andw_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("andw %s, " MEM_obs, GPReg16Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_AND_EvGv, offset, base, index, scale, src); + } + + void andl_ir(int32_t imm, RegisterID dst) { + spew("andl $0x%x, %s", uint32_t(imm), GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_AND_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_AND); + } + m_formatter.immediate32(imm); + } + } + + void andw_ir(int32_t imm, RegisterID dst) { + spew("andw $0x%x, %s", uint16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_AND_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_AND); + } + m_formatter.immediate16(imm); + } + } + + void andl_im(int32_t imm, int32_t offset, RegisterID base) { + spew("andl $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_AND); + m_formatter.immediate32(imm); + } + } + + void andw_im(int32_t imm, int32_t offset, RegisterID base) { + spew("andw $0x%x, " MEM_ob, uint16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_AND); + m_formatter.immediate16(imm); + } + } + + void andl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("andl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_AND); + m_formatter.immediate32(imm); + } + } + + void andw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("andw $%d, " MEM_obs, int16_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_AND); + m_formatter.immediate16(imm); + } + } + + void fld_m(int32_t offset, RegisterID base) { + spew("fld " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FLD); + } + void fld32_m(int32_t offset, RegisterID base) { + spew("fld " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FLD); + } + void faddp() { + spew("addp "); + m_formatter.oneByteOp(OP_FPU6_ADDP); + m_formatter.oneByteOp(OP_ADDP_ST0_ST1); + } + void fisttp_m(int32_t offset, RegisterID base) { + spew("fisttp " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FISTTP); + } + void fistp_m(int32_t offset, RegisterID base) { + spew("fistp " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FILD, offset, base, FPU6_OP_FISTP); + } + void fstp_m(int32_t offset, RegisterID base) { + spew("fstp " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FSTP); + } + void fstp32_m(int32_t offset, RegisterID base) { + spew("fstp32 " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FSTP); + } + void fnstcw_m(int32_t offset, RegisterID base) { + spew("fnstcw " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FISTP); + } + void fldcw_m(int32_t offset, RegisterID base) { + spew("fldcw " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FLDCW); + } + void fnstsw_m(int32_t offset, RegisterID base) { + spew("fnstsw " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FISTP); + } + + void negl_r(RegisterID dst) { + spew("negl %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP3_Ev, dst, GROUP3_OP_NEG); + } + + void negl_m(int32_t offset, RegisterID base) { + spew("negl " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP3_Ev, offset, base, GROUP3_OP_NEG); + } + + void notl_r(RegisterID dst) { + spew("notl %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP3_Ev, dst, GROUP3_OP_NOT); + } + + void notl_m(int32_t offset, RegisterID base) { + spew("notl " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP3_Ev, offset, base, GROUP3_OP_NOT); + } + + void orl_rr(RegisterID src, RegisterID dst) { + spew("orl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_OR_GvEv, src, dst); + } + + void orw_rr(RegisterID src, RegisterID dst) { + spew("orw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_OR_GvEv, src, dst); + } + + void orl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("orl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_OR_GvEv, offset, base, dst); + } + + void orl_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("orl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_OR_EvGv, offset, base, src); + } + + void orw_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("orw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_OR_EvGv, offset, base, src); + } + + void orl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("orl %s, " MEM_obs, GPReg32Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_OR_EvGv, offset, base, index, scale, src); + } + + void orw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("orw %s, " MEM_obs, GPReg16Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_OR_EvGv, offset, base, index, scale, src); + } + + void orl_ir(int32_t imm, RegisterID dst) { + spew("orl $0x%x, %s", uint32_t(imm), GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_OR_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_OR); + } + m_formatter.immediate32(imm); + } + } + + void orw_ir(int32_t imm, RegisterID dst) { + spew("orw $0x%x, %s", uint16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_OR_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_OR); + } + m_formatter.immediate16(imm); + } + } + + void orl_im(int32_t imm, int32_t offset, RegisterID base) { + spew("orl $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_OR); + m_formatter.immediate32(imm); + } + } + + void orw_im(int32_t imm, int32_t offset, RegisterID base) { + spew("orw $0x%x, " MEM_ob, uint16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_OR); + m_formatter.immediate16(imm); + } + } + + void orl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("orl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_OR); + m_formatter.immediate32(imm); + } + } + + void orw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("orw $%d, " MEM_obs, int16_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_OR); + m_formatter.immediate16(imm); + } + } + + void sbbl_rr(RegisterID src, RegisterID dst) { + spew("sbbl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_SBB_GvEv, src, dst); + } + + void subl_rr(RegisterID src, RegisterID dst) { + spew("subl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_SUB_GvEv, src, dst); + } + + void subw_rr(RegisterID src, RegisterID dst) { + spew("subw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_SUB_GvEv, src, dst); + } + + void subl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("subl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_SUB_GvEv, offset, base, dst); + } + + void subl_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("subl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, src); + } + + void subw_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("subw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, src); + } + + void subl_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("subl %s, " MEM_obs, GPReg32Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, index, scale, src); + } + + void subw_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("subw %s, " MEM_obs, GPReg16Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, index, scale, src); + } + + void subl_ir(int32_t imm, RegisterID dst) { + spew("subl $%d, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_SUB_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SUB); + } + m_formatter.immediate32(imm); + } + } + + void subw_ir(int32_t imm, RegisterID dst) { + spew("subw $%d, %s", int16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_SUB_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SUB); + } + m_formatter.immediate16(imm); + } + } + + void subl_im(int32_t imm, int32_t offset, RegisterID base) { + spew("subl $%d, " MEM_ob, imm, ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_SUB); + m_formatter.immediate32(imm); + } + } + + void subw_im(int32_t imm, int32_t offset, RegisterID base) { + spew("subw $%d, " MEM_ob, int16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_SUB); + m_formatter.immediate16(imm); + } + } + + void subl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("subl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_SUB); + m_formatter.immediate32(imm); + } + } + + void subw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("subw $%d, " MEM_obs, int16_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_SUB); + m_formatter.immediate16(imm); + } + } + + void xorl_rr(RegisterID src, RegisterID dst) { + spew("xorl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_XOR_GvEv, src, dst); + } + + void xorw_rr(RegisterID src, RegisterID dst) { + spew("xorw %s, %s", GPReg16Name(src), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XOR_GvEv, src, dst); + } + + void xorl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("xorl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_XOR_GvEv, offset, base, dst); + } + + void xorl_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("xorl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, src); + } + + void xorw_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("xorw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, src); + } + + void xorl_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("xorl %s, " MEM_obs, GPReg32Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, index, scale, src); + } + + void xorw_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("xorw %s, " MEM_obs, GPReg16Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, index, scale, src); + } + + void xorl_im(int32_t imm, int32_t offset, RegisterID base) { + spew("xorl $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_XOR); + m_formatter.immediate32(imm); + } + } + + void xorw_im(int32_t imm, int32_t offset, RegisterID base) { + spew("xorw $0x%x, " MEM_ob, uint16_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_XOR); + m_formatter.immediate16(imm); + } + } + + void xorl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("xorl $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_XOR); + m_formatter.immediate32(imm); + } + } + + void xorw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("xorw $%d, " MEM_obs, int16_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_XOR); + m_formatter.immediate16(imm); + } + } + + void xorl_ir(int32_t imm, RegisterID dst) { + spew("xorl $%d, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_XOR_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_XOR); + } + m_formatter.immediate32(imm); + } + } + + void xorw_ir(int32_t imm, RegisterID dst) { + spew("xorw $%d, %s", int16_t(imm), GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_XOR); + m_formatter.immediate8s(imm); + } else { + if (dst == rax) { + m_formatter.oneByteOp(OP_XOR_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_XOR); + } + m_formatter.immediate16(imm); + } + } + + void bswapl_r(RegisterID dst) { + spew("bswap %s", GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_BSWAP, dst); + } + + void sarl_ir(int32_t imm, RegisterID dst) { + MOZ_ASSERT(imm < 32); + spew("sarl $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) { + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SAR); + } else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SAR); + m_formatter.immediate8u(imm); + } + } + + void sarl_CLr(RegisterID dst) { + spew("sarl %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SAR); + } + + void shrl_ir(int32_t imm, RegisterID dst) { + MOZ_ASSERT(imm < 32); + spew("shrl $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) { + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SHR); + } else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SHR); + m_formatter.immediate8u(imm); + } + } + + void shrl_CLr(RegisterID dst) { + spew("shrl %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SHR); + } + + void shrdl_CLr(RegisterID src, RegisterID dst) { + spew("shrdl %%cl, %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_SHRD_GvEv, dst, src); + } + + void shldl_CLr(RegisterID src, RegisterID dst) { + spew("shldl %%cl, %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_SHLD_GvEv, dst, src); + } + + void shll_ir(int32_t imm, RegisterID dst) { + MOZ_ASSERT(imm < 32); + spew("shll $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) { + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SHL); + } else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SHL); + m_formatter.immediate8u(imm); + } + } + + void shll_CLr(RegisterID dst) { + spew("shll %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SHL); + } + + void roll_ir(int32_t imm, RegisterID dst) { + MOZ_ASSERT(imm < 32); + spew("roll $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) { + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_ROL); + } else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_ROL); + m_formatter.immediate8u(imm); + } + } + void rolw_ir(int32_t imm, RegisterID dst) { + MOZ_ASSERT(imm < 32); + spew("roll $%d, %s", imm, GPReg16Name(dst)); + m_formatter.prefix(PRE_OPERAND_SIZE); + if (imm == 1) { + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_ROL); + } else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_ROL); + m_formatter.immediate8u(imm); + } + } + void roll_CLr(RegisterID dst) { + spew("roll %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_ROL); + } + + void rorl_ir(int32_t imm, RegisterID dst) { + MOZ_ASSERT(imm < 32); + spew("rorl $%d, %s", imm, GPReg32Name(dst)); + if (imm == 1) { + m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_ROR); + } else { + m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_ROR); + m_formatter.immediate8u(imm); + } + } + void rorl_CLr(RegisterID dst) { + spew("rorl %%cl, %s", GPReg32Name(dst)); + m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_ROR); + } + + void bsrl_rr(RegisterID src, RegisterID dst) { + spew("bsrl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_BSR_GvEv, src, dst); + } + + void bsfl_rr(RegisterID src, RegisterID dst) { + spew("bsfl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_BSF_GvEv, src, dst); + } + + void lzcntl_rr(RegisterID src, RegisterID dst) { + spew("lzcntl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(VEX_SS); + m_formatter.twoByteOp(OP2_LZCNT_GvEv, src, dst); + } + + void tzcntl_rr(RegisterID src, RegisterID dst) { + spew("tzcntl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(VEX_SS); + m_formatter.twoByteOp(OP2_TZCNT_GvEv, src, dst); + } + + void popcntl_rr(RegisterID src, RegisterID dst) { + spew("popcntl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(VEX_SS); + m_formatter.twoByteOp(OP2_POPCNT_GvEv, src, dst); + } + + void imull_rr(RegisterID src, RegisterID dst) { + spew("imull %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_IMUL_GvEv, src, dst); + } + + void imull_r(RegisterID multiplier) { + spew("imull %s", GPReg32Name(multiplier)); + m_formatter.oneByteOp(OP_GROUP3_Ev, multiplier, GROUP3_OP_IMUL); + } + + void imull_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("imull " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_IMUL_GvEv, offset, base, dst); + } + + void imull_ir(int32_t value, RegisterID src, RegisterID dst) { + spew("imull $%d, %s, %s", value, GPReg32Name(src), GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(value)) { + m_formatter.oneByteOp(OP_IMUL_GvEvIb, src, dst); + m_formatter.immediate8s(value); + } else { + m_formatter.oneByteOp(OP_IMUL_GvEvIz, src, dst); + m_formatter.immediate32(value); + } + } + + void mull_r(RegisterID multiplier) { + spew("mull %s", GPReg32Name(multiplier)); + m_formatter.oneByteOp(OP_GROUP3_Ev, multiplier, GROUP3_OP_MUL); + } + + void idivl_r(RegisterID divisor) { + spew("idivl %s", GPReg32Name(divisor)); + m_formatter.oneByteOp(OP_GROUP3_Ev, divisor, GROUP3_OP_IDIV); + } + + void divl_r(RegisterID divisor) { + spew("div %s", GPReg32Name(divisor)); + m_formatter.oneByteOp(OP_GROUP3_Ev, divisor, GROUP3_OP_DIV); + } + + void prefix_lock() { + spew("lock"); + m_formatter.oneByteOp(PRE_LOCK); + } + + void prefix_16_for_32() { + spew("[16-bit operands next]"); + m_formatter.prefix(PRE_OPERAND_SIZE); + } + + void incl_m32(int32_t offset, RegisterID base) { + spew("incl " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_INC); + } + + void decl_m32(int32_t offset, RegisterID base) { + spew("decl " MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_DEC); + } + + // Note that CMPXCHG performs comparison against REG = %al/%ax/%eax/%rax. + // If %REG == [%base+offset], then %src -> [%base+offset]. + // Otherwise, [%base+offset] -> %REG. + // For the 8-bit operations src must also be an 8-bit register. + + void cmpxchgb(RegisterID src, int32_t offset, RegisterID base) { + spew("cmpxchgb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.twoByteOp8(OP2_CMPXCHG_GvEb, offset, base, src); + } + void cmpxchgb(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("cmpxchgb %s, " MEM_obs, GPReg8Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.twoByteOp8(OP2_CMPXCHG_GvEb, offset, base, index, scale, src); + } + void cmpxchgw(RegisterID src, int32_t offset, RegisterID base) { + spew("cmpxchgw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, src); + } + void cmpxchgw(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("cmpxchgw %s, " MEM_obs, GPReg16Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, index, scale, src); + } + void cmpxchgl(RegisterID src, int32_t offset, RegisterID base) { + spew("cmpxchgl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, src); + } + void cmpxchgl(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("cmpxchgl %s, " MEM_obs, GPReg32Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, index, scale, src); + } + + void cmpxchg8b(RegisterID srcHi, RegisterID srcLo, RegisterID newHi, + RegisterID newLo, int32_t offset, RegisterID base) { + MOZ_ASSERT(srcHi == edx.code() && srcLo == eax.code()); + MOZ_ASSERT(newHi == ecx.code() && newLo == ebx.code()); + spew("cmpxchg8b %s, " MEM_ob, "edx:eax", ADDR_ob(offset, base)); + m_formatter.twoByteOp(OP2_CMPXCHGNB, offset, base, 1); + } + void cmpxchg8b(RegisterID srcHi, RegisterID srcLo, RegisterID newHi, + RegisterID newLo, int32_t offset, RegisterID base, + RegisterID index, int scale) { + MOZ_ASSERT(srcHi == edx.code() && srcLo == eax.code()); + MOZ_ASSERT(newHi == ecx.code() && newLo == ebx.code()); + spew("cmpxchg8b %s, " MEM_obs, "edx:eax", + ADDR_obs(offset, base, index, scale)); + m_formatter.twoByteOp(OP2_CMPXCHGNB, offset, base, index, scale, 1); + } + + // Comparisons: + + void cmpl_rr(RegisterID rhs, RegisterID lhs) { + spew("cmpl %s, %s", GPReg32Name(rhs), GPReg32Name(lhs)); + m_formatter.oneByteOp(OP_CMP_GvEv, rhs, lhs); + } + + void cmpl_rm(RegisterID rhs, int32_t offset, RegisterID base) { + spew("cmpl %s, " MEM_ob, GPReg32Name(rhs), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_CMP_EvGv, offset, base, rhs); + } + + void cmpl_rm(RegisterID rhs, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("cmpl %s, " MEM_obs, GPReg32Name(rhs), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_CMP_EvGv, offset, base, index, scale, rhs); + } + + void cmpl_mr(int32_t offset, RegisterID base, RegisterID lhs) { + spew("cmpl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(lhs)); + m_formatter.oneByteOp(OP_CMP_GvEv, offset, base, lhs); + } + + void cmpl_mr(const void* address, RegisterID lhs) { + spew("cmpl %p, %s", address, GPReg32Name(lhs)); + m_formatter.oneByteOp(OP_CMP_GvEv, address, lhs); + } + + void cmpl_ir(int32_t rhs, RegisterID lhs) { + if (rhs == 0) { + testl_rr(lhs, lhs); + return; + } + + spew("cmpl $0x%x, %s", uint32_t(rhs), GPReg32Name(lhs)); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, lhs, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + if (lhs == rax) { + m_formatter.oneByteOp(OP_CMP_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, lhs, GROUP1_OP_CMP); + } + m_formatter.immediate32(rhs); + } + } + + void cmpl_i32r(int32_t rhs, RegisterID lhs) { + spew("cmpl $0x%04x, %s", uint32_t(rhs), GPReg32Name(lhs)); + if (lhs == rax) { + m_formatter.oneByteOp(OP_CMP_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, lhs, GROUP1_OP_CMP); + } + m_formatter.immediate32(rhs); + } + + void cmpl_im(int32_t rhs, int32_t offset, RegisterID base) { + spew("cmpl $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + } + + void cmpb_rr(RegisterID rhs, RegisterID lhs) { + spew("cmpb %s, %s", GPReg8Name(rhs), GPReg8Name(lhs)); + m_formatter.oneByteOp(OP_CMP_GbEb, rhs, lhs); + } + + void cmpb_rm(RegisterID rhs, int32_t offset, RegisterID base) { + spew("cmpb %s, " MEM_ob, GPReg8Name(rhs), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_CMP_EbGb, offset, base, rhs); + } + + void cmpb_rm(RegisterID rhs, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("cmpb %s, " MEM_obs, GPReg8Name(rhs), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_CMP_EbGb, offset, base, index, scale, rhs); + } + + void cmpb_rm(RegisterID rhs, const void* addr) { + spew("cmpb %s, %p", GPReg8Name(rhs), addr); + m_formatter.oneByteOp(OP_CMP_EbGb, addr, rhs); + } + + void cmpb_ir(int32_t rhs, RegisterID lhs) { + if (rhs == 0) { + testb_rr(lhs, lhs); + return; + } + + spew("cmpb $0x%x, %s", uint32_t(rhs), GPReg8Name(lhs)); + if (lhs == rax) { + m_formatter.oneByteOp(OP_CMP_EAXIb); + } else { + m_formatter.oneByteOp(OP_GROUP1_EbIb, lhs, GROUP1_OP_CMP); + } + m_formatter.immediate8(rhs); + } + + void cmpb_im(int32_t rhs, int32_t offset, RegisterID base) { + spew("cmpb $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_CMP); + m_formatter.immediate8(rhs); + } + + void cmpb_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("cmpb $0x%x, " MEM_obs, uint32_t(rhs), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale, + GROUP1_OP_CMP); + m_formatter.immediate8(rhs); + } + + void cmpb_im(int32_t rhs, const void* addr) { + spew("cmpb $0x%x, %p", uint32_t(rhs), addr); + m_formatter.oneByteOp(OP_GROUP1_EbIb, addr, GROUP1_OP_CMP); + m_formatter.immediate8(rhs); + } + + void cmpl_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("cmpl $0x%x, " MEM_obs, uint32_t(rhs), + ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + } + + [[nodiscard]] JmpSrc cmpl_im_disp32(int32_t rhs, int32_t offset, + RegisterID base) { + spew("cmpl $0x%x, " MEM_o32b, uint32_t(rhs), ADDR_o32b(offset, base)); + JmpSrc r; + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp_disp32(OP_GROUP1_EvIb, offset, base, GROUP1_OP_CMP); + r = JmpSrc(m_formatter.size()); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp_disp32(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP); + r = JmpSrc(m_formatter.size()); + m_formatter.immediate32(rhs); + } + return r; + } + + [[nodiscard]] JmpSrc cmpl_im_disp32(int32_t rhs, const void* addr) { + spew("cmpl $0x%x, %p", uint32_t(rhs), addr); + JmpSrc r; + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp_disp32(OP_GROUP1_EvIb, addr, GROUP1_OP_CMP); + r = JmpSrc(m_formatter.size()); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp_disp32(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP); + r = JmpSrc(m_formatter.size()); + m_formatter.immediate32(rhs); + } + return r; + } + + void cmpl_i32m(int32_t rhs, int32_t offset, RegisterID base) { + spew("cmpl $0x%04x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + + void cmpl_i32m(int32_t rhs, const void* addr) { + spew("cmpl $0x%04x, %p", uint32_t(rhs), addr); + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + + void cmpl_rm(RegisterID rhs, const void* addr) { + spew("cmpl %s, %p", GPReg32Name(rhs), addr); + m_formatter.oneByteOp(OP_CMP_EvGv, addr, rhs); + } + + void cmpl_rm_disp32(RegisterID rhs, const void* addr) { + spew("cmpl %s, %p", GPReg32Name(rhs), addr); + m_formatter.oneByteOp_disp32(OP_CMP_EvGv, addr, rhs); + } + + void cmpl_im(int32_t rhs, const void* addr) { + spew("cmpl $0x%x, %p", uint32_t(rhs), addr); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP); + m_formatter.immediate32(rhs); + } + } + + void cmpw_rr(RegisterID rhs, RegisterID lhs) { + spew("cmpw %s, %s", GPReg16Name(rhs), GPReg16Name(lhs)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_CMP_GvEv, rhs, lhs); + } + + void cmpw_rm(RegisterID rhs, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("cmpw %s, " MEM_obs, GPReg16Name(rhs), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_CMP_EvGv, offset, base, index, scale, rhs); + } + + void cmpw_ir(int32_t rhs, RegisterID lhs) { + if (rhs == 0) { + testw_rr(lhs, lhs); + return; + } + + spew("cmpw $0x%x, %s", uint32_t(rhs), GPReg16Name(lhs)); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIb, lhs, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, lhs, GROUP1_OP_CMP); + m_formatter.immediate16(rhs); + } + } + + void cmpw_im(int32_t rhs, int32_t offset, RegisterID base) { + spew("cmpw $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base)); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP); + m_formatter.immediate16(rhs); + } + } + + void cmpw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("cmpw $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale, + GROUP1_OP_CMP); + m_formatter.immediate8s(imm); + } else { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale, + GROUP1_OP_CMP); + m_formatter.immediate16(imm); + } + } + + void cmpw_im(int32_t rhs, const void* addr) { + spew("cmpw $0x%x, %p", uint32_t(rhs), addr); + if (CAN_SIGN_EXTEND_8_32(rhs)) { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_CMP); + m_formatter.immediate8s(rhs); + } else { + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP); + m_formatter.immediate16(rhs); + } + } + + void testl_rr(RegisterID rhs, RegisterID lhs) { + spew("testl %s, %s", GPReg32Name(rhs), GPReg32Name(lhs)); + m_formatter.oneByteOp(OP_TEST_EvGv, lhs, rhs); + } + + void testb_rr(RegisterID rhs, RegisterID lhs) { + spew("testb %s, %s", GPReg8Name(rhs), GPReg8Name(lhs)); + m_formatter.oneByteOp(OP_TEST_EbGb, lhs, rhs); + } + + void testl_ir(int32_t rhs, RegisterID lhs) { + // If the mask fits in an 8-bit immediate, we can use testb with an + // 8-bit subreg. + if (CAN_ZERO_EXTEND_8_32(rhs) && HasSubregL(lhs)) { + testb_ir(rhs, lhs); + return; + } + // If the mask is a subset of 0xff00, we can use testb with an h reg, if + // one happens to be available. + if (CAN_ZERO_EXTEND_8H_32(rhs) && HasSubregH(lhs)) { + testb_ir_norex(rhs >> 8, GetSubregH(lhs)); + return; + } + spew("testl $0x%x, %s", uint32_t(rhs), GPReg32Name(lhs)); + if (lhs == rax) { + m_formatter.oneByteOp(OP_TEST_EAXIv); + } else { + m_formatter.oneByteOp(OP_GROUP3_EvIz, lhs, GROUP3_OP_TEST); + } + m_formatter.immediate32(rhs); + } + + void testl_i32m(int32_t rhs, int32_t offset, RegisterID base) { + spew("testl $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP3_EvIz, offset, base, GROUP3_OP_TEST); + m_formatter.immediate32(rhs); + } + + void testl_i32m(int32_t rhs, const void* addr) { + spew("testl $0x%x, %p", uint32_t(rhs), addr); + m_formatter.oneByteOp(OP_GROUP3_EvIz, addr, GROUP3_OP_TEST); + m_formatter.immediate32(rhs); + } + + void testb_im(int32_t rhs, int32_t offset, RegisterID base) { + spew("testb $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP3_EbIb, offset, base, GROUP3_OP_TEST); + m_formatter.immediate8(rhs); + } + + void testb_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("testb $0x%x, " MEM_obs, uint32_t(rhs), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP3_EbIb, offset, base, index, scale, + GROUP3_OP_TEST); + m_formatter.immediate8(rhs); + } + + void testl_i32m(int32_t rhs, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("testl $0x%4x, " MEM_obs, uint32_t(rhs), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP3_EvIz, offset, base, index, scale, + GROUP3_OP_TEST); + m_formatter.immediate32(rhs); + } + + void testw_rr(RegisterID rhs, RegisterID lhs) { + spew("testw %s, %s", GPReg16Name(rhs), GPReg16Name(lhs)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_TEST_EvGv, lhs, rhs); + } + + void testb_ir(int32_t rhs, RegisterID lhs) { + spew("testb $0x%x, %s", uint32_t(rhs), GPReg8Name(lhs)); + if (lhs == rax) { + m_formatter.oneByteOp8(OP_TEST_EAXIb); + } else { + m_formatter.oneByteOp8(OP_GROUP3_EbIb, lhs, GROUP3_OP_TEST); + } + m_formatter.immediate8(rhs); + } + + // Like testb_ir, but never emits a REX prefix. This may be used to + // reference ah..bh. + void testb_ir_norex(int32_t rhs, HRegisterID lhs) { + spew("testb $0x%x, %s", uint32_t(rhs), HRegName8(lhs)); + m_formatter.oneByteOp8_norex(OP_GROUP3_EbIb, lhs, GROUP3_OP_TEST); + m_formatter.immediate8(rhs); + } + + void setCC_r(Condition cond, RegisterID lhs) { + spew("set%s %s", CCName(cond), GPReg8Name(lhs)); + m_formatter.twoByteOp8(setccOpcode(cond), lhs, (GroupOpcodeID)0); + } + + void sete_r(RegisterID dst) { setCC_r(ConditionE, dst); } + + void setz_r(RegisterID dst) { sete_r(dst); } + + void setne_r(RegisterID dst) { setCC_r(ConditionNE, dst); } + + void setnz_r(RegisterID dst) { setne_r(dst); } + + // Various move ops: + + void cdq() { + spew("cdq "); + m_formatter.oneByteOp(OP_CDQ); + } + + void xchgb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("xchgb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_XCHG_GbEb, offset, base, src); + } + void xchgb_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("xchgb %s, " MEM_obs, GPReg8Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_XCHG_GbEb, offset, base, index, scale, src); + } + + void xchgw_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("xchgw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, src); + } + void xchgw_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("xchgw %s, " MEM_obs, GPReg16Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, index, scale, src); + } + + void xchgl_rr(RegisterID src, RegisterID dst) { + spew("xchgl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_XCHG_GvEv, src, dst); + } + void xchgl_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("xchgl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, src); + } + void xchgl_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("xchgl %s, " MEM_obs, GPReg32Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, index, scale, src); + } + + void cmovCCl_rr(Condition cond, RegisterID src, RegisterID dst) { + spew("cmov%s %s, %s", CCName(cond), GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(cmovccOpcode(cond), src, dst); + } + void cmovCCl_mr(Condition cond, int32_t offset, RegisterID base, + RegisterID dst) { + spew("cmov%s " MEM_ob ", %s", CCName(cond), ADDR_ob(offset, base), + GPReg32Name(dst)); + m_formatter.twoByteOp(cmovccOpcode(cond), offset, base, dst); + } + void cmovCCl_mr(Condition cond, int32_t offset, RegisterID base, + RegisterID index, int scale, RegisterID dst) { + spew("cmov%s " MEM_obs ", %s", CCName(cond), + ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.twoByteOp(cmovccOpcode(cond), offset, base, index, scale, dst); + } + + void movl_rr(RegisterID src, RegisterID dst) { + spew("movl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEv, src, dst); + } + + void movw_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("movw %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, src); + } + + void movw_rm_disp32(RegisterID src, int32_t offset, RegisterID base) { + spew("movw %s, " MEM_o32b, GPReg16Name(src), ADDR_o32b(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp_disp32(OP_MOV_EvGv, offset, base, src); + } + + void movw_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("movw %s, " MEM_obs, GPReg16Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, index, scale, src); + } + + void movw_rm(RegisterID src, const void* addr) { + spew("movw %s, %p", GPReg16Name(src), addr); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp_disp32(OP_MOV_EvGv, addr, src); + } + + void movl_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("movl %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, src); + } + + void movl_rm_disp32(RegisterID src, int32_t offset, RegisterID base) { + spew("movl %s, " MEM_o32b, GPReg32Name(src), ADDR_o32b(offset, base)); + m_formatter.oneByteOp_disp32(OP_MOV_EvGv, offset, base, src); + } + + void movl_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("movl %s, " MEM_obs, GPReg32Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, index, scale, src); + } + + void movl_mEAX(const void* addr) { +#ifdef JS_CODEGEN_X64 + if (IsAddressImmediate(addr)) { + movl_mr(addr, rax); + return; + } +#endif + +#ifdef JS_CODEGEN_X64 + spew("movabs %p, %%eax", addr); +#else + spew("movl %p, %%eax", addr); +#endif + m_formatter.oneByteOp(OP_MOV_EAXOv); +#ifdef JS_CODEGEN_X64 + m_formatter.immediate64(reinterpret_cast<int64_t>(addr)); +#else + m_formatter.immediate32(reinterpret_cast<int32_t>(addr)); +#endif + } + + void movl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("movl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEv, offset, base, dst); + } + + void movl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) { + spew("movl " MEM_o32b ", %s", ADDR_o32b(offset, base), + GPReg32Name(dst)); + m_formatter.oneByteOp_disp32(OP_MOV_GvEv, offset, base, dst); + } + + void movl_mr(const void* base, RegisterID index, int scale, RegisterID dst) { + int32_t disp = AddressImmediate(base); + + spew("movl " MEM_os ", %s", ADDR_os(disp, index, scale), + GPReg32Name(dst)); + m_formatter.oneByteOp_disp32(OP_MOV_GvEv, disp, index, scale, dst); + } + + void movl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("movl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEv, offset, base, index, scale, dst); + } + + void movl_mr(const void* addr, RegisterID dst) { + if (dst == rax +#ifdef JS_CODEGEN_X64 + && !IsAddressImmediate(addr) +#endif + ) { + movl_mEAX(addr); + return; + } + + spew("movl %p, %s", addr, GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEv, addr, dst); + } + + void movl_i32r(int32_t imm, RegisterID dst) { + spew("movl $0x%x, %s", uint32_t(imm), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_MOV_EAXIv, dst); + m_formatter.immediate32(imm); + } + + void movb_ir(int32_t imm, RegisterID reg) { + spew("movb $0x%x, %s", uint32_t(imm), GPReg8Name(reg)); + m_formatter.oneByteOp8(OP_MOV_EbIb, reg); + m_formatter.immediate8(imm); + } + + void movb_im(int32_t imm, int32_t offset, RegisterID base) { + spew("movb $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP11_EvIb, offset, base, GROUP11_MOV); + m_formatter.immediate8(imm); + } + + void movb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("movb $0x%x, " MEM_obs, uint32_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP11_EvIb, offset, base, index, scale, + GROUP11_MOV); + m_formatter.immediate8(imm); + } + + void movb_im(int32_t imm, const void* addr) { + spew("movb $%d, %p", imm, addr); + m_formatter.oneByteOp_disp32(OP_GROUP11_EvIb, addr, GROUP11_MOV); + m_formatter.immediate8(imm); + } + + void movw_im(int32_t imm, int32_t offset, RegisterID base) { + spew("movw $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, GROUP11_MOV); + m_formatter.immediate16(imm); + } + + void movw_im(int32_t imm, const void* addr) { + spew("movw $%d, %p", imm, addr); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp_disp32(OP_GROUP11_EvIz, addr, GROUP11_MOV); + m_formatter.immediate16(imm); + } + + void movl_i32m(int32_t imm, int32_t offset, RegisterID base) { + spew("movl $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, GROUP11_MOV); + m_formatter.immediate32(imm); + } + + void movw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("movw $0x%x, " MEM_obs, uint32_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.prefix(PRE_OPERAND_SIZE); + m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, index, scale, + GROUP11_MOV); + m_formatter.immediate16(imm); + } + + void movl_i32m(int32_t imm, int32_t offset, RegisterID base, RegisterID index, + int scale) { + spew("movl $0x%x, " MEM_obs, uint32_t(imm), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, index, scale, + GROUP11_MOV); + m_formatter.immediate32(imm); + } + + void movl_EAXm(const void* addr) { +#ifdef JS_CODEGEN_X64 + if (IsAddressImmediate(addr)) { + movl_rm(rax, addr); + return; + } +#endif + + spew("movl %%eax, %p", addr); + m_formatter.oneByteOp(OP_MOV_OvEAX); +#ifdef JS_CODEGEN_X64 + m_formatter.immediate64(reinterpret_cast<int64_t>(addr)); +#else + m_formatter.immediate32(reinterpret_cast<int32_t>(addr)); +#endif + } + + void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + // vmovq_rm can be encoded either as a true vmovq or as a vmovd with a + // REX prefix modifying it to be 64-bit. We choose the vmovq encoding + // because it's smaller (when it doesn't need a REX prefix for other + // reasons) and because it works on 32-bit x86 too. + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, invalid_xmm, + src); + } + + void vmovq_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd_disp32("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, + invalid_xmm, src); + } + + void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, index, scale, + invalid_xmm, src); + } + + void vmovq_rm(XMMRegisterID src, const void* addr) { + twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, addr, invalid_xmm, src); + } + + void vmovq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + // vmovq_mr can be encoded either as a true vmovq or as a vmovd with a + // REX prefix modifying it to be 64-bit. We choose the vmovq encoding + // because it's smaller (when it doesn't need a REX prefix for other + // reasons) and because it works on 32-bit x86 too. + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, invalid_xmm, + dst); + } + + void vmovq_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd_disp32("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, + invalid_xmm, dst); + } + + void vmovq_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, index, scale, + invalid_xmm, dst); + } + + void vmovq_mr(const void* addr, XMMRegisterID dst) { + twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, addr, invalid_xmm, dst); + } + + void movl_rm(RegisterID src, const void* addr) { + if (src == rax +#ifdef JS_CODEGEN_X64 + && !IsAddressImmediate(addr) +#endif + ) { + movl_EAXm(addr); + return; + } + + spew("movl %s, %p", GPReg32Name(src), addr); + m_formatter.oneByteOp(OP_MOV_EvGv, addr, src); + } + + void movl_i32m(int32_t imm, const void* addr) { + spew("movl $%d, %p", imm, addr); + m_formatter.oneByteOp(OP_GROUP11_EvIz, addr, GROUP11_MOV); + m_formatter.immediate32(imm); + } + + void movb_rm(RegisterID src, int32_t offset, RegisterID base) { + spew("movb %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base)); + m_formatter.oneByteOp8(OP_MOV_EbGv, offset, base, src); + } + + void movb_rm_disp32(RegisterID src, int32_t offset, RegisterID base) { + spew("movb %s, " MEM_o32b, GPReg8Name(src), ADDR_o32b(offset, base)); + m_formatter.oneByteOp8_disp32(OP_MOV_EbGv, offset, base, src); + } + + void movb_rm(RegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + spew("movb %s, " MEM_obs, GPReg8Name(src), + ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp8(OP_MOV_EbGv, offset, base, index, scale, src); + } + + void movb_rm(RegisterID src, const void* addr) { + spew("movb %s, %p", GPReg8Name(src), addr); + m_formatter.oneByteOp8(OP_MOV_EbGv, addr, src); + } + + void movb_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("movb " MEM_ob ", %s", ADDR_ob(offset, base), GPReg8Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEb, offset, base, dst); + } + + void movb_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("movb " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg8Name(dst)); + m_formatter.oneByteOp(OP_MOV_GvEb, offset, base, index, scale, dst); + } + + void movzbl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("movzbl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEb, offset, base, dst); + } + + void movzbl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) { + spew("movzbl " MEM_o32b ", %s", ADDR_o32b(offset, base), + GPReg32Name(dst)); + m_formatter.twoByteOp_disp32(OP2_MOVZX_GvEb, offset, base, dst); + } + + void movzbl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("movzbl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEb, offset, base, index, scale, dst); + } + + void movzbl_mr(const void* addr, RegisterID dst) { + spew("movzbl %p, %s", addr, GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEb, addr, dst); + } + + void movsbl_rr(RegisterID src, RegisterID dst) { + spew("movsbl %s, %s", GPReg8Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp8_movx(OP2_MOVSX_GvEb, src, dst); + } + + void movsbl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("movsbl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEb, offset, base, dst); + } + + void movsbl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) { + spew("movsbl " MEM_o32b ", %s", ADDR_o32b(offset, base), + GPReg32Name(dst)); + m_formatter.twoByteOp_disp32(OP2_MOVSX_GvEb, offset, base, dst); + } + + void movsbl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("movsbl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEb, offset, base, index, scale, dst); + } + + void movsbl_mr(const void* addr, RegisterID dst) { + spew("movsbl %p, %s", addr, GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEb, addr, dst); + } + + void movzwl_rr(RegisterID src, RegisterID dst) { + spew("movzwl %s, %s", GPReg16Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEw, src, dst); + } + + void movzwl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("movzwl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEw, offset, base, dst); + } + + void movzwl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) { + spew("movzwl " MEM_o32b ", %s", ADDR_o32b(offset, base), + GPReg32Name(dst)); + m_formatter.twoByteOp_disp32(OP2_MOVZX_GvEw, offset, base, dst); + } + + void movzwl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("movzwl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEw, offset, base, index, scale, dst); + } + + void movzwl_mr(const void* addr, RegisterID dst) { + spew("movzwl %p, %s", addr, GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVZX_GvEw, addr, dst); + } + + void movswl_rr(RegisterID src, RegisterID dst) { + spew("movswl %s, %s", GPReg16Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEw, src, dst); + } + + void movswl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("movswl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEw, offset, base, dst); + } + + void movswl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) { + spew("movswl " MEM_o32b ", %s", ADDR_o32b(offset, base), + GPReg32Name(dst)); + m_formatter.twoByteOp_disp32(OP2_MOVSX_GvEw, offset, base, dst); + } + + void movswl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("movswl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEw, offset, base, index, scale, dst); + } + + void movswl_mr(const void* addr, RegisterID dst) { + spew("movswl %p, %s", addr, GPReg32Name(dst)); + m_formatter.twoByteOp(OP2_MOVSX_GvEw, addr, dst); + } + + void movzbl_rr(RegisterID src, RegisterID dst) { + spew("movzbl %s, %s", GPReg8Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp8_movx(OP2_MOVZX_GvEb, src, dst); + } + + void leal_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("leal " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.oneByteOp(OP_LEA, offset, base, index, scale, dst); + } + + void leal_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("leal " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_LEA, offset, base, dst); + } + + // Flow control: + + [[nodiscard]] JmpSrc call() { + m_formatter.oneByteOp(OP_CALL_rel32); + JmpSrc r = m_formatter.immediateRel32(); + spew("call .Lfrom%d", r.offset()); + return r; + } + + void call_r(RegisterID dst) { + m_formatter.oneByteOp(OP_GROUP5_Ev, dst, GROUP5_OP_CALLN); + spew("call *%s", GPRegName(dst)); + } + + void call_m(int32_t offset, RegisterID base) { + spew("call *" MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_CALLN); + } + + // Comparison of EAX against a 32-bit immediate. The immediate is patched + // in as if it were a jump target. The intention is to toggle the first + // byte of the instruction between a CMP and a JMP to produce a pseudo-NOP. + [[nodiscard]] JmpSrc cmp_eax() { + m_formatter.oneByteOp(OP_CMP_EAXIv); + JmpSrc r = m_formatter.immediateRel32(); + spew("cmpl %%eax, .Lfrom%d", r.offset()); + return r; + } + + void jmp_i(JmpDst dst) { + int32_t diff = dst.offset() - m_formatter.size(); + spew("jmp .Llabel%d", dst.offset()); + + // The jump immediate is an offset from the end of the jump instruction. + // A jump instruction is either 1 byte opcode and 1 byte offset, or 1 + // byte opcode and 4 bytes offset. + if (CAN_SIGN_EXTEND_8_32(diff - 2)) { + m_formatter.oneByteOp(OP_JMP_rel8); + m_formatter.immediate8s(diff - 2); + } else { + m_formatter.oneByteOp(OP_JMP_rel32); + m_formatter.immediate32(diff - 5); + } + } + [[nodiscard]] JmpSrc jmp() { + m_formatter.oneByteOp(OP_JMP_rel32); + JmpSrc r = m_formatter.immediateRel32(); + spew("jmp .Lfrom%d", r.offset()); + return r; + } + + void jmp_r(RegisterID dst) { + spew("jmp *%s", GPRegName(dst)); + m_formatter.oneByteOp(OP_GROUP5_Ev, dst, GROUP5_OP_JMPN); + } + + void jmp_m(int32_t offset, RegisterID base) { + spew("jmp *" MEM_ob, ADDR_ob(offset, base)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_JMPN); + } + + void jmp_m(int32_t offset, RegisterID base, RegisterID index, int scale) { + spew("jmp *" MEM_obs, ADDR_obs(offset, base, index, scale)); + m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, index, scale, + GROUP5_OP_JMPN); + } + + void jCC_i(Condition cond, JmpDst dst) { + int32_t diff = dst.offset() - m_formatter.size(); + spew("j%s .Llabel%d", CCName(cond), dst.offset()); + + // The jump immediate is an offset from the end of the jump instruction. + // A conditional jump instruction is either 1 byte opcode and 1 byte + // offset, or 2 bytes opcode and 4 bytes offset. + if (CAN_SIGN_EXTEND_8_32(diff - 2)) { + m_formatter.oneByteOp(jccRel8(cond)); + m_formatter.immediate8s(diff - 2); + } else { + m_formatter.twoByteOp(jccRel32(cond)); + m_formatter.immediate32(diff - 6); + } + } + + [[nodiscard]] JmpSrc jCC(Condition cond) { + m_formatter.twoByteOp(jccRel32(cond)); + JmpSrc r = m_formatter.immediateRel32(); + spew("j%s .Lfrom%d", CCName(cond), r.offset()); + return r; + } + + // SSE operations: + + void vpcmpeqb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, src1, src0, dst); + } + void vpcmpeqb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, offset, base, src0, + dst); + } + void vpcmpeqb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, address, src0, dst); + } + + void vpcmpgtb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, src1, src0, dst); + } + void vpcmpgtb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, offset, base, src0, + dst); + } + void vpcmpgtb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, address, src0, dst); + } + + void vpcmpeqw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, src1, src0, dst); + } + void vpcmpeqw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, offset, base, src0, + dst); + } + void vpcmpeqw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, address, src0, dst); + } + + void vpcmpgtw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, src1, src0, dst); + } + void vpcmpgtw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, offset, base, src0, + dst); + } + void vpcmpgtw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, address, src0, dst); + } + + void vpcmpeqd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, src1, src0, dst); + } + void vpcmpeqd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, offset, base, src0, + dst); + } + void vpcmpeqd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, address, src0, dst); + } + + void vpcmpgtd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, src1, src0, dst); + } + void vpcmpgtd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, offset, base, src0, + dst); + } + void vpcmpgtd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, address, src0, dst); + } + + void vpcmpgtq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpcmpgtq", VEX_PD, OP3_PCMPGTQ_VdqWdq, ESCAPE_38, src1, + src0, dst); + } + + void vpcmpeqq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpcmpeqq", VEX_PD, OP3_PCMPEQQ_VdqWdq, ESCAPE_38, src1, + src0, dst); + } + void vpcmpeqq_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + threeByteOpSimd("vpcmpeqq", VEX_PD, OP3_PCMPEQQ_VdqWdq, ESCAPE_38, offset, + base, src0, dst); + } + void vpcmpeqq_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpcmpeqq", VEX_PD, OP3_PCMPEQQ_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vcmpps_rr(uint8_t order, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT_IF(!useVEX_, + order < uint8_t(X86Encoding::ConditionCmp_AVX_Enabled)); + twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, src1, src0, + dst); + } + void vcmpps_mr(uint8_t order, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + MOZ_ASSERT_IF(!useVEX_, + order < uint8_t(X86Encoding::ConditionCmp_AVX_Enabled)); + twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, offset, base, + src0, dst); + } + void vcmpps_mr(uint8_t order, const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT_IF(!useVEX_, + order < uint8_t(X86Encoding::ConditionCmp_AVX_Enabled)); + twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, address, src0, + dst); + } + + static constexpr size_t CMPPS_MR_PATCH_OFFSET = 1; + + size_t vcmpeqps_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmpps_mr(X86Encoding::ConditionCmp_EQ, address, src0, dst); + return CMPPS_MR_PATCH_OFFSET; + } + size_t vcmpneqps_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmpps_mr(X86Encoding::ConditionCmp_NEQ, address, src0, dst); + return CMPPS_MR_PATCH_OFFSET; + } + size_t vcmpltps_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmpps_mr(X86Encoding::ConditionCmp_LT, address, src0, dst); + return CMPPS_MR_PATCH_OFFSET; + } + size_t vcmpleps_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmpps_mr(X86Encoding::ConditionCmp_LE, address, src0, dst); + return CMPPS_MR_PATCH_OFFSET; + } + size_t vcmpgeps_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmpps_mr(X86Encoding::ConditionCmp_GE, address, src0, dst); + return CMPPS_MR_PATCH_OFFSET; + } + + void vcmppd_rr(uint8_t order, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpImmSimd("vcmppd", VEX_PD, OP2_CMPPD_VpdWpd, order, src1, src0, + dst); + } + void vcmppd_mr(uint8_t order, const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpImmSimd("vcmppd", VEX_PD, OP2_CMPPD_VpdWpd, order, address, src0, + dst); + } + + static constexpr size_t CMPPD_MR_PATCH_OFFSET = 1; + + size_t vcmpeqpd_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmppd_mr(X86Encoding::ConditionCmp_EQ, address, src0, dst); + return CMPPD_MR_PATCH_OFFSET; + } + size_t vcmpneqpd_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmppd_mr(X86Encoding::ConditionCmp_NEQ, address, src0, dst); + return CMPPD_MR_PATCH_OFFSET; + } + size_t vcmpltpd_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmppd_mr(X86Encoding::ConditionCmp_LT, address, src0, dst); + return CMPPD_MR_PATCH_OFFSET; + } + size_t vcmplepd_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + vcmppd_mr(X86Encoding::ConditionCmp_LE, address, src0, dst); + return CMPPD_MR_PATCH_OFFSET; + } + + void vrcpps_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, src, invalid_xmm, dst); + } + void vrcpps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, offset, base, invalid_xmm, + dst); + } + void vrcpps_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, address, invalid_xmm, + dst); + } + + void vrsqrtps_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, src, invalid_xmm, + dst); + } + void vrsqrtps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, offset, base, + invalid_xmm, dst); + } + void vrsqrtps_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, address, invalid_xmm, + dst); + } + + void vsqrtps_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, src, invalid_xmm, dst); + } + void vsqrtps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, offset, base, + invalid_xmm, dst); + } + void vsqrtps_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, address, invalid_xmm, + dst); + } + void vsqrtpd_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vsqrtpd", VEX_PD, OP2_SQRTPD_VpdWpd, src, invalid_xmm, dst); + } + + void vaddsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, src1, src0, dst); + } + + void vaddss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, src1, src0, dst); + } + + void vaddsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, offset, base, src0, dst); + } + + void vaddss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, offset, base, src0, dst); + } + + void vaddsd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, address, src0, dst); + } + void vaddss_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, address, src0, dst); + } + + void vcvtss2sd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vcvtss2sd", VEX_SS, OP2_CVTSS2SD_VsdEd, src1, src0, dst); + } + + void vcvtsd2ss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vcvtsd2ss", VEX_SD, OP2_CVTSD2SS_VsdEd, src1, src0, dst); + } + + void vcvtsi2ss_rr(RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpInt32Simd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, src1, src0, + dst); + } + + void vcvtsi2sd_rr(RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpInt32Simd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, src1, src0, + dst); + } + + void vcvttps2dq_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vcvttps2dq", VEX_SS, OP2_CVTTPS2DQ_VdqWps, src, invalid_xmm, + dst); + } + + void vcvttpd2dq_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vcvttpd2dq", VEX_PD, OP2_CVTTPD2DQ_VdqWpd, src, invalid_xmm, + dst); + } + + void vcvtdq2ps_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vcvtdq2ps", VEX_PS, OP2_CVTDQ2PS_VpsWdq, src, invalid_xmm, + dst); + } + + void vcvtdq2pd_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vcvtdq2pd", VEX_SS, OP2_CVTDQ2PD_VpdWdq, src, invalid_xmm, + dst); + } + + void vcvtpd2ps_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vcvtpd2ps", VEX_PD, OP2_CVTPD2PS_VpsWpd, src, invalid_xmm, + dst); + } + + void vcvtps2pd_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vcvtps2pd", VEX_PS, OP2_CVTPS2PD_VpdWps, src, invalid_xmm, + dst); + } + + void vcvtsi2sd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, offset, base, src0, + dst); + } + + void vcvtsi2sd_mr(int32_t offset, RegisterID base, RegisterID index, + int scale, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, offset, base, index, + scale, src0, dst); + } + + void vcvtsi2ss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, offset, base, src0, + dst); + } + + void vcvtsi2ss_mr(int32_t offset, RegisterID base, RegisterID index, + int scale, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, offset, base, index, + scale, src0, dst); + } + + void vcvttsd2si_rr(XMMRegisterID src, RegisterID dst) { + twoByteOpSimdInt32("vcvttsd2si", VEX_SD, OP2_CVTTSD2SI_GdWsd, src, dst); + } + + void vcvttss2si_rr(XMMRegisterID src, RegisterID dst) { + twoByteOpSimdInt32("vcvttss2si", VEX_SS, OP2_CVTTSD2SI_GdWsd, src, dst); + } + + void vunpcklps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, src1, src0, dst); + } + void vunpcklps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, offset, base, src0, + dst); + } + void vunpcklps_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, addr, src0, dst); + } + + void vunpckhps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, src1, src0, dst); + } + void vunpckhps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, offset, base, src0, + dst); + } + void vunpckhps_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, addr, src0, dst); + } + + void vpand_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, src1, src0, dst); + } + void vpand_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, offset, base, src0, dst); + } + void vpand_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, address, src0, dst); + } + void vpor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, src1, src0, dst); + } + void vpor_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, offset, base, src0, dst); + } + void vpor_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, address, src0, dst); + } + void vpxor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, src1, src0, dst); + } + void vpxor_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, offset, base, src0, dst); + } + void vpxor_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, address, src0, dst); + } + void vpandn_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, src1, src0, dst); + } + void vpandn_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, offset, base, src0, + dst); + } + void vpandn_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, address, src0, dst); + } + void vptest_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vptest", VEX_PD, OP3_PTEST_VdVd, ESCAPE_38, address, src0, + dst); + } + + void vpshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) { + twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, src, + invalid_xmm, dst); + } + void vpshufd_imr(uint32_t mask, int32_t offset, RegisterID base, + XMMRegisterID dst) { + twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, offset, base, + invalid_xmm, dst); + } + void vpshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst) { + twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, address, + invalid_xmm, dst); + } + + void vpshuflw_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) { + twoByteOpImmSimd("vpshuflw", VEX_SD, OP2_PSHUFLW_VdqWdqIb, mask, src, + invalid_xmm, dst); + } + + void vpshufhw_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) { + twoByteOpImmSimd("vpshufhw", VEX_SS, OP2_PSHUFHW_VdqWdqIb, mask, src, + invalid_xmm, dst); + } + + void vpshufb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpshufb", VEX_PD, OP3_PSHUFB_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpshufb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpshufb", VEX_PD, OP3_PSHUFB_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vshufps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, src1, src0, + dst); + } + void vshufps_imr(uint32_t mask, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, offset, base, + src0, dst); + } + void vshufps_imr(uint32_t mask, const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, address, + src0, dst); + } + void vshufpd_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpImmSimd("vshufpd", VEX_PD, OP2_SHUFPD_VpdWpdIb, mask, src1, src0, + dst); + } + + void vmovddup_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vmovddup", VEX_SD, OP2_MOVDDUP_VqWq, src, invalid_xmm, dst); + } + void vmovddup_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovddup", VEX_SD, OP2_MOVDDUP_VqWq, offset, base, + invalid_xmm, dst); + } + void vmovddup_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + twoByteOpSimd("vmovddup", VEX_SD, OP2_MOVDDUP_VqWq, offset, base, index, + scale, invalid_xmm, dst); + } + + void vmovhlps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmovhlps", VEX_PS, OP2_MOVHLPS_VqUq, src1, src0, dst); + } + + void vmovlhps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmovlhps", VEX_PS, OP2_MOVLHPS_VqUq, src1, src0, dst); + } + + void vpsrldq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpsrldq", OP2_PSRLDQ_Vd, ShiftID::vpsrldq, count, src, dst); + } + + void vpslldq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpslldq", OP2_PSRLDQ_Vd, ShiftID::vpslldq, count, src, dst); + } + + void vpsllq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 64); + shiftOpImmSimd("vpsllq", OP2_PSRLDQ_Vd, ShiftID::vpsllx, count, src, dst); + } + + void vpsllq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsllq", VEX_PD, OP2_PSLLQ_VdqWdq, src1, src0, dst); + } + + void vpsrlq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 64); + shiftOpImmSimd("vpsrlq", OP2_PSRLDQ_Vd, ShiftID::vpsrlx, count, src, dst); + } + + void vpsrlq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsrlq", VEX_PD, OP2_PSRLQ_VdqWdq, src1, src0, dst); + } + + void vpslld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpslld", VEX_PD, OP2_PSLLD_VdqWdq, src1, src0, dst); + } + + void vpslld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 32); + shiftOpImmSimd("vpslld", OP2_PSLLD_UdqIb, ShiftID::vpsllx, count, src, dst); + } + + void vpsrad_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsrad", VEX_PD, OP2_PSRAD_VdqWdq, src1, src0, dst); + } + + void vpsrad_ir(int32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 32); + shiftOpImmSimd("vpsrad", OP2_PSRAD_UdqIb, ShiftID::vpsrad, count, src, dst); + } + + void vpsrld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsrld", VEX_PD, OP2_PSRLD_VdqWdq, src1, src0, dst); + } + + void vpsrld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 32); + shiftOpImmSimd("vpsrld", OP2_PSRLD_UdqIb, ShiftID::vpsrlx, count, src, dst); + } + + void vpsllw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsllw", VEX_PD, OP2_PSLLW_VdqWdq, src1, src0, dst); + } + + void vpsllw_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpsllw", OP2_PSLLW_UdqIb, ShiftID::vpsllx, count, src, dst); + } + + void vpsraw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsraw", VEX_PD, OP2_PSRAW_VdqWdq, src1, src0, dst); + } + + void vpsraw_ir(int32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpsraw", OP2_PSRAW_UdqIb, ShiftID::vpsrad, count, src, dst); + } + + void vpsrlw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsrlw", VEX_PD, OP2_PSRLW_VdqWdq, src1, src0, dst); + } + + void vpsrlw_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) { + MOZ_ASSERT(count < 16); + shiftOpImmSimd("vpsrlw", OP2_PSRLW_UdqIb, ShiftID::vpsrlx, count, src, dst); + } + + void vmovmskpd_rr(XMMRegisterID src, RegisterID dst) { + twoByteOpSimdInt32("vmovmskpd", VEX_PD, OP2_MOVMSKPD_EdVd, src, dst); + } + + void vmovmskps_rr(XMMRegisterID src, RegisterID dst) { + twoByteOpSimdInt32("vmovmskps", VEX_PS, OP2_MOVMSKPD_EdVd, src, dst); + } + + void vpmovmskb_rr(XMMRegisterID src, RegisterID dst) { + twoByteOpSimdInt32("vpmovmskb", VEX_PD, OP2_PMOVMSKB_EdVd, src, dst); + } + + void vptest_rr(XMMRegisterID rhs, XMMRegisterID lhs) { + threeByteOpSimd("vptest", VEX_PD, OP3_PTEST_VdVd, ESCAPE_38, rhs, + invalid_xmm, lhs); + } + + void vmovd_rr(XMMRegisterID src, RegisterID dst) { + twoByteOpSimdInt32("vmovd", VEX_PD, OP2_MOVD_EdVd, (XMMRegisterID)dst, + (RegisterID)src); + } + + void vmovd_rr(RegisterID src, XMMRegisterID dst) { + twoByteOpInt32Simd("vmovd", VEX_PD, OP2_MOVD_VdEd, src, invalid_xmm, dst); + } + + void vmovd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, invalid_xmm, + dst); + } + + void vmovd_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, index, scale, + invalid_xmm, dst); + } + + void vmovd_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd_disp32("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, + invalid_xmm, dst); + } + + void vmovd_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, address, invalid_xmm, dst); + } + + void vmovd_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, invalid_xmm, + src); + } + + void vmovd_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, index, scale, + invalid_xmm, src); + } + + void vmovd_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd_disp32("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, + invalid_xmm, src); + } + + void vmovd_rm(XMMRegisterID src, const void* address) { + twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, address, invalid_xmm, src); + } + + void vmovsd_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm, + src); + } + + void vmovsd_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd_disp32("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, + invalid_xmm, src); + } + + void vmovss_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm, + src); + } + + void vmovss_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd_disp32("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base, + invalid_xmm, src); + } + + void vmovss_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, invalid_xmm, + dst); + } + + void vmovss_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd_disp32("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, + invalid_xmm, dst); + } + + void vmovsd_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, index, + scale, invalid_xmm, src); + } + + void vmovss_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base, index, + scale, invalid_xmm, src); + } + + void vmovss_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID dst) { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, index, + scale, invalid_xmm, dst); + } + + void vmovsd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, invalid_xmm, + dst); + } + + void vmovsd_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd_disp32("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, + invalid_xmm, dst); + } + + void vmovsd_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID dst) { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, index, + scale, invalid_xmm, dst); + } + + // Note that the register-to-register form of vmovsd does not write to the + // entire output register. For general-purpose register-to-register moves, + // use vmovapd instead. + void vmovsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, src1, src0, dst); + } + + // The register-to-register form of vmovss has the same problem as vmovsd + // above. Prefer vmovaps for register-to-register moves. + void vmovss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, src1, src0, dst); + } + + void vmovsd_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, address, invalid_xmm, + dst); + } + + void vmovss_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, address, invalid_xmm, + dst); + } + + void vmovups_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, address, invalid_xmm, + dst); + } + + void vmovdqu_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, address, invalid_xmm, + dst); + } + + void vmovsd_rm(XMMRegisterID src, const void* address) { + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, address, invalid_xmm, + src); + } + + void vmovss_rm(XMMRegisterID src, const void* address) { + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, address, invalid_xmm, + src); + } + + void vmovdqa_rm(XMMRegisterID src, const void* address) { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, address, invalid_xmm, + src); + } + + void vmovaps_rm(XMMRegisterID src, const void* address) { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, address, invalid_xmm, + src); + } + + void vmovdqu_rm(XMMRegisterID src, const void* address) { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, address, invalid_xmm, + src); + } + + void vmovups_rm(XMMRegisterID src, const void* address) { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, address, invalid_xmm, + src); + } + + void vmovaps_rr(XMMRegisterID src, XMMRegisterID dst) { +#ifdef JS_CODEGEN_X64 + // There are two opcodes that can encode this instruction. If we have + // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the + // opcode which swaps the operands, as that way we can get a two-byte + // VEX in that case. + if (src >= xmm8 && dst < xmm8) { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, dst, invalid_xmm, + src); + return; + } +#endif + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, src, invalid_xmm, dst); + } + void vmovaps_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, offset, base, + invalid_xmm, src); + } + void vmovaps_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, offset, base, index, + scale, invalid_xmm, src); + } + void vmovaps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, offset, base, + invalid_xmm, dst); + } + void vmovaps_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID dst) { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, offset, base, index, + scale, invalid_xmm, dst); + } + + void vmovups_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base, + invalid_xmm, src); + } + void vmovups_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd_disp32("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base, + invalid_xmm, src); + } + void vmovups_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base, index, + scale, invalid_xmm, src); + } + void vmovups_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, + invalid_xmm, dst); + } + void vmovups_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd_disp32("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, + invalid_xmm, dst); + } + void vmovups_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID dst) { + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, index, + scale, invalid_xmm, dst); + } + + void vmovapd_rr(XMMRegisterID src, XMMRegisterID dst) { +#ifdef JS_CODEGEN_X64 + // There are two opcodes that can encode this instruction. If we have + // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the + // opcode which swaps the operands, as that way we can get a two-byte + // VEX in that case. + if (src >= xmm8 && dst < xmm8) { + twoByteOpSimd("vmovapd", VEX_PD, OP2_MOVAPS_WsdVsd, dst, invalid_xmm, + src); + return; + } +#endif + twoByteOpSimd("vmovapd", VEX_PD, OP2_MOVAPD_VsdWsd, src, invalid_xmm, dst); + } + + void vmovdqu_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base, + invalid_xmm, src); + } + + void vmovdqu_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd_disp32("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base, + invalid_xmm, src); + } + + void vmovdqu_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base, index, + scale, invalid_xmm, src); + } + + void vmovdqu_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, + invalid_xmm, dst); + } + + void vmovdqu_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd_disp32("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, + invalid_xmm, dst); + } + + void vmovdqu_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID dst) { + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, index, + scale, invalid_xmm, dst); + } + + void vmovdqa_rr(XMMRegisterID src, XMMRegisterID dst) { +#ifdef JS_CODEGEN_X64 + // There are two opcodes that can encode this instruction. If we have + // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the + // opcode which swaps the operands, as that way we can get a two-byte + // VEX in that case. + if (src >= xmm8 && dst < xmm8) { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, dst, invalid_xmm, src); + return; + } +#endif + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, src, invalid_xmm, dst); + } + + void vmovdqa_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, offset, base, + invalid_xmm, src); + } + + void vmovdqa_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, offset, base, index, + scale, invalid_xmm, src); + } + + void vmovdqa_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, offset, base, + invalid_xmm, dst); + } + + void vmovdqa_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID dst) { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, offset, base, index, + scale, invalid_xmm, dst); + } + + void vmulsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmulsd", VEX_SD, OP2_MULSD_VsdWsd, src1, src0, dst); + } + + void vmulss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmulss", VEX_SS, OP2_MULSD_VsdWsd, src1, src0, dst); + } + + void vmulsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vmulsd", VEX_SD, OP2_MULSD_VsdWsd, offset, base, src0, dst); + } + + void vmulss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vmulss", VEX_SS, OP2_MULSD_VsdWsd, offset, base, src0, dst); + } + + void vpinsrw_irr(uint32_t whichWord, RegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT(whichWord < 8); + twoByteOpImmInt32Simd("vpinsrw", VEX_PD, OP2_PINSRW, whichWord, src1, src0, + dst); + } + void vpinsrw_imr(unsigned lane, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + MOZ_ASSERT(lane < 16); + twoByteOpImmInt32Simd("vpinsrw", VEX_PD, OP2_PINSRW, lane, offset, base, + src0, dst); + } + void vpinsrw_imr(unsigned lane, int32_t offset, RegisterID base, + RegisterID index, int32_t scale, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT(lane < 16); + twoByteOpImmInt32Simd("vpinsrw", VEX_PD, OP2_PINSRW, lane, offset, base, + index, scale, src0, dst); + } + + void vpextrw_irr(uint32_t whichWord, XMMRegisterID src, RegisterID dst) { + MOZ_ASSERT(whichWord < 8); + twoByteOpImmSimdInt32("vpextrw", VEX_PD, OP2_PEXTRW_GdUdIb, whichWord, src, + dst); + } + + void vpextrw_irm(unsigned lane, XMMRegisterID src, int32_t offset, + RegisterID base) { + MOZ_ASSERT(lane < 8); + threeByteOpImmSimdInt32("vpextrw", VEX_PD, OP3_PEXTRW_EwVdqIb, ESCAPE_3A, + lane, offset, base, (RegisterID)src); + } + + void vpextrw_irm(unsigned lane, XMMRegisterID src, int32_t offset, + RegisterID base, RegisterID index, int scale) { + MOZ_ASSERT(lane < 8); + threeByteOpImmSimdInt32("vpextrw", VEX_PD, OP3_PEXTRW_EwVdqIb, ESCAPE_3A, + lane, offset, base, index, scale, (RegisterID)src); + } + + void vsubsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsubsd", VEX_SD, OP2_SUBSD_VsdWsd, src1, src0, dst); + } + + void vsubss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsubss", VEX_SS, OP2_SUBSD_VsdWsd, src1, src0, dst); + } + + void vsubsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vsubsd", VEX_SD, OP2_SUBSD_VsdWsd, offset, base, src0, dst); + } + + void vsubss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vsubss", VEX_SS, OP2_SUBSD_VsdWsd, offset, base, src0, dst); + } + + void vucomiss_rr(XMMRegisterID rhs, XMMRegisterID lhs) { + twoByteOpSimdFlags("vucomiss", VEX_PS, OP2_UCOMISD_VsdWsd, rhs, lhs); + } + + void vucomisd_rr(XMMRegisterID rhs, XMMRegisterID lhs) { + twoByteOpSimdFlags("vucomisd", VEX_PD, OP2_UCOMISD_VsdWsd, rhs, lhs); + } + + void vucomisd_mr(int32_t offset, RegisterID base, XMMRegisterID lhs) { + twoByteOpSimdFlags("vucomisd", VEX_PD, OP2_UCOMISD_VsdWsd, offset, base, + lhs); + } + + void vdivsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vdivsd", VEX_SD, OP2_DIVSD_VsdWsd, src1, src0, dst); + } + + void vdivss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vdivss", VEX_SS, OP2_DIVSD_VsdWsd, src1, src0, dst); + } + + void vdivsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vdivsd", VEX_SD, OP2_DIVSD_VsdWsd, offset, base, src0, dst); + } + + void vdivss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vdivss", VEX_SS, OP2_DIVSD_VsdWsd, offset, base, src0, dst); + } + + void vxorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vxorpd", VEX_PD, OP2_XORPD_VpdWpd, src1, src0, dst); + } + + void vorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vorpd", VEX_PD, OP2_ORPD_VpdWpd, src1, src0, dst); + } + + void vandpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vandpd", VEX_PD, OP2_ANDPD_VpdWpd, src1, src0, dst); + } + void vandpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vandpd", VEX_PD, OP2_ANDPD_VpdWpd, address, src0, dst); + } + + void vandps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, src1, src0, dst); + } + + void vandps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, offset, base, src0, dst); + } + + void vandps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, address, src0, dst); + } + + void vandnps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, src1, src0, dst); + } + + void vandnps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, offset, base, src0, + dst); + } + + void vandnps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, address, src0, dst); + } + + void vorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, src1, src0, dst); + } + + void vorps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, offset, base, src0, dst); + } + + void vorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, address, src0, dst); + } + + void vxorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, src1, src0, dst); + } + + void vxorps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, offset, base, src0, dst); + } + + void vxorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, address, src0, dst); + } + + void vsqrtsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsqrtsd", VEX_SD, OP2_SQRTSD_VsdWsd, src1, src0, dst); + } + + void vsqrtss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsqrtss", VEX_SS, OP2_SQRTSS_VssWss, src1, src0, dst); + } + + void vroundsd_irr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst) { + threeByteOpImmSimd("vroundsd", VEX_PD, OP3_ROUNDSD_VsdWsd, ESCAPE_3A, mode, + src, invalid_xmm, dst); + } + + void vroundss_irr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst) { + threeByteOpImmSimd("vroundss", VEX_PD, OP3_ROUNDSS_VsdWsd, ESCAPE_3A, mode, + src, invalid_xmm, dst); + } + void vroundps_irr(SSERoundingMode mode, XMMRegisterID src, + XMMRegisterID dst) { + threeByteOpImmSimd("vroundps", VEX_PD, OP3_ROUNDPS_VpsWps, ESCAPE_3A, + int(mode), src, invalid_xmm, dst); + } + void vroundpd_irr(SSERoundingMode mode, XMMRegisterID src, + XMMRegisterID dst) { + threeByteOpImmSimd("vroundpd", VEX_PD, OP3_ROUNDPD_VpdWpd, ESCAPE_3A, + int(mode), src, invalid_xmm, dst); + } + + void vinsertps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_3A, + mask, src1, src0, dst); + } + void vinsertps_imr(uint32_t mask, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_3A, + mask, offset, base, src0, dst); + } + void vinsertps_imr(uint32_t mask, int32_t offset, RegisterID base, + RegisterID index, int scale, XMMRegisterID src0, + XMMRegisterID dst) { + threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_3A, + mask, offset, base, index, scale, src0, dst); + } + + void vmovlps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vmovlps", VEX_PS, OP2_MOVLPS_VqEq, offset, base, src0, dst); + } + void vmovlps_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmovlps", VEX_PS, OP2_MOVLPS_VqEq, offset, base, index, + scale, src0, dst); + } + void vmovlps_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovlps", VEX_PS, OP2_MOVLPS_EqVq, offset, base, invalid_xmm, + src); + } + void vmovlps_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovlps", VEX_PS, OP2_MOVLPS_EqVq, offset, base, index, + scale, invalid_xmm, src); + } + + void vmovhps_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vmovhps", VEX_PS, OP2_MOVHPS_VqEq, offset, base, src0, dst); + } + void vmovhps_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmovhps", VEX_PS, OP2_MOVHPS_VqEq, offset, base, index, + scale, src0, dst); + } + + void vmovhps_rm(XMMRegisterID src, int32_t offset, RegisterID base) { + twoByteOpSimd("vmovhps", VEX_PS, OP2_MOVHPS_EqVq, offset, base, invalid_xmm, + src); + } + void vmovhps_rm(XMMRegisterID src, int32_t offset, RegisterID base, + RegisterID index, int scale) { + twoByteOpSimd("vmovhps", VEX_PS, OP2_MOVHPS_EqVq, offset, base, index, + scale, invalid_xmm, src); + } + + void vextractps_rm(unsigned lane, XMMRegisterID src, int32_t offset, + RegisterID base) { + threeByteOpImmSimd("vextractps", VEX_PD, OP3_EXTRACTPS_EdVdqIb, ESCAPE_3A, + lane, offset, base, invalid_xmm, src); + } + void vextractps_rm(unsigned lane, XMMRegisterID src, int32_t offset, + RegisterID base, RegisterID index, int scale) { + threeByteOpImmSimd("vextractps", VEX_PD, OP3_EXTRACTPS_EdVdqIb, ESCAPE_3A, + lane, offset, base, index, scale, invalid_xmm, src); + } + + void vpblendw_irr(unsigned mask, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT(mask < 256); + threeByteOpImmSimd("vpblendw", VEX_PD, OP3_PBLENDW_VdqWdqIb, ESCAPE_3A, + mask, src1, src0, dst); + } + + void vpblendvb_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + vblendvOpSimd("vpblendvb", OP3_PBLENDVB_VdqWdq, OP3_VPBLENDVB_VdqWdq, mask, + src1, src0, dst); + } + + void vpinsrb_irr(unsigned lane, RegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT(lane < 16); + threeByteOpImmInt32Simd("vpinsrb", VEX_PD, OP3_PINSRB_VdqEvIb, ESCAPE_3A, + lane, src1, src0, dst); + } + void vpinsrb_imr(unsigned lane, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + MOZ_ASSERT(lane < 16); + threeByteOpImmInt32Simd("vpinsrb", VEX_PD, OP3_PINSRB_VdqEvIb, ESCAPE_3A, + lane, offset, base, src0, dst); + } + void vpinsrb_imr(unsigned lane, int32_t offset, RegisterID base, + RegisterID index, int32_t scale, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT(lane < 16); + threeByteOpImmInt32Simd("vpinsrb", VEX_PD, OP3_PINSRB_VdqEvIb, ESCAPE_3A, + lane, offset, base, index, scale, src0, dst); + } + + void vpinsrd_irr(unsigned lane, RegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT(lane < 4); + threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEvIb, ESCAPE_3A, + lane, src1, src0, dst); + } + + void vpextrb_irr(unsigned lane, XMMRegisterID src, RegisterID dst) { + MOZ_ASSERT(lane < 16); + threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EvVdqIb, ESCAPE_3A, + lane, (XMMRegisterID)dst, (RegisterID)src); + } + + void vpextrb_irm(unsigned lane, XMMRegisterID src, int32_t offset, + RegisterID base) { + MOZ_ASSERT(lane < 16); + threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EvVdqIb, ESCAPE_3A, + lane, offset, base, (RegisterID)src); + } + + void vpextrb_irm(unsigned lane, XMMRegisterID src, int32_t offset, + RegisterID base, RegisterID index, int scale) { + MOZ_ASSERT(lane < 16); + threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EvVdqIb, ESCAPE_3A, + lane, offset, base, index, scale, (RegisterID)src); + } + + void vpextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst) { + MOZ_ASSERT(lane < 4); + threeByteOpImmSimdInt32("vpextrd", VEX_PD, OP3_PEXTRD_EvVdqIb, ESCAPE_3A, + lane, (XMMRegisterID)dst, (RegisterID)src); + } + + void vblendps_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT(imm < 16); + // Despite being a "ps" instruction, vblendps is encoded with the "pd" + // prefix. + threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, + src1, src0, dst); + } + + void vblendps_imr(unsigned imm, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + MOZ_ASSERT(imm < 16); + // Despite being a "ps" instruction, vblendps is encoded with the "pd" + // prefix. + threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, + offset, base, src0, dst); + } + + void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + vblendvOpSimd("vblendvps", OP3_BLENDVPS_VdqWdq, OP3_VBLENDVPS_VdqWdq, mask, + src1, src0, dst); + } + void vblendvps_mr(XMMRegisterID mask, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + vblendvOpSimd("vblendvps", OP3_BLENDVPS_VdqWdq, OP3_VBLENDVPS_VdqWdq, mask, + offset, base, src0, dst); + } + void vblendvpd_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + vblendvOpSimd("vblendvpd", OP3_BLENDVPD_VdqWdq, OP3_VBLENDVPD_VdqWdq, mask, + src1, src0, dst); + } + + void vmovsldup_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vmovsldup", VEX_SS, OP2_MOVSLDUP_VpsWps, src, invalid_xmm, + dst); + } + void vmovsldup_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovsldup", VEX_SS, OP2_MOVSLDUP_VpsWps, offset, base, + invalid_xmm, dst); + } + + void vmovshdup_rr(XMMRegisterID src, XMMRegisterID dst) { + twoByteOpSimd("vmovshdup", VEX_SS, OP2_MOVSHDUP_VpsWps, src, invalid_xmm, + dst); + } + void vmovshdup_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + twoByteOpSimd("vmovshdup", VEX_SS, OP2_MOVSHDUP_VpsWps, offset, base, + invalid_xmm, dst); + } + + void vminsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vminsd", VEX_SD, OP2_MINSD_VsdWsd, src1, src0, dst); + } + void vminsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vminsd", VEX_SD, OP2_MINSD_VsdWsd, offset, base, src0, dst); + } + + void vminss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vminss", VEX_SS, OP2_MINSS_VssWss, src1, src0, dst); + } + + void vmaxsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmaxsd", VEX_SD, OP2_MAXSD_VsdWsd, src1, src0, dst); + } + void vmaxsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vmaxsd", VEX_SD, OP2_MAXSD_VsdWsd, offset, base, src0, dst); + } + + void vmaxss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vmaxss", VEX_SS, OP2_MAXSS_VssWss, src1, src0, dst); + } + + void vpavgb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpavgb", VEX_PD, OP2_PAVGB_VdqWdq, src1, src0, dst); + } + + void vpavgw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpavgw", VEX_PD, OP2_PAVGW_VdqWdq, src1, src0, dst); + } + + void vpminsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpminsb", VEX_PD, OP3_PMINSB_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpminsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpminsb", VEX_PD, OP3_PMINSB_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vpmaxsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmaxsb", VEX_PD, OP3_PMAXSB_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpmaxsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmaxsb", VEX_PD, OP3_PMAXSB_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vpminub_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpminub", VEX_PD, OP2_PMINUB_VdqWdq, src1, src0, dst); + } + void vpminub_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpminub", VEX_PD, OP2_PMINUB_VdqWdq, address, src0, dst); + } + + void vpmaxub_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmaxub", VEX_PD, OP2_PMAXUB_VdqWdq, src1, src0, dst); + } + void vpmaxub_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmaxub", VEX_PD, OP2_PMAXUB_VdqWdq, address, src0, dst); + } + + void vpminsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpminsw", VEX_PD, OP2_PMINSW_VdqWdq, src1, src0, dst); + } + void vpminsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpminsw", VEX_PD, OP2_PMINSW_VdqWdq, address, src0, dst); + } + + void vpmaxsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmaxsw", VEX_PD, OP2_PMAXSW_VdqWdq, src1, src0, dst); + } + void vpmaxsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpmaxsw", VEX_PD, OP2_PMAXSW_VdqWdq, address, src0, dst); + } + + void vpminuw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpminuw", VEX_PD, OP3_PMINUW_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpminuw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpminuw", VEX_PD, OP3_PMINUW_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vpmaxuw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmaxuw", VEX_PD, OP3_PMAXUW_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpmaxuw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmaxuw", VEX_PD, OP3_PMAXUW_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vpminsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpminsd", VEX_PD, OP3_PMINSD_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpminsd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpminsd", VEX_PD, OP3_PMINSD_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vpmaxsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmaxsd", VEX_PD, OP3_PMAXSD_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpmaxsd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmaxsd", VEX_PD, OP3_PMAXSD_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vpminud_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpminud", VEX_PD, OP3_PMINUD_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpminud_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpminud", VEX_PD, OP3_PMINUD_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vpmaxud_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmaxud", VEX_PD, OP3_PMAXUD_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + void vpmaxud_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpmaxud", VEX_PD, OP3_PMAXUD_VdqWdq, ESCAPE_38, address, + src0, dst); + } + + void vpacksswb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpacksswb", VEX_PD, OP2_PACKSSWB_VdqWdq, src1, src0, dst); + } + void vpacksswb_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpacksswb", VEX_PD, OP2_PACKSSWB_VdqWdq, address, src0, dst); + } + + void vpackuswb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpackuswb", VEX_PD, OP2_PACKUSWB_VdqWdq, src1, src0, dst); + } + void vpackuswb_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpackuswb", VEX_PD, OP2_PACKUSWB_VdqWdq, address, src0, dst); + } + + void vpackssdw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpackssdw", VEX_PD, OP2_PACKSSDW_VdqWdq, src1, src0, dst); + } + void vpackssdw_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpackssdw", VEX_PD, OP2_PACKSSDW_VdqWdq, address, src0, dst); + } + + void vpackusdw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vpackusdw", VEX_PD, OP3_PACKUSDW_VdqWdq, ESCAPE_38, src1, + src0, dst); + } + void vpackusdw_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + threeByteOpSimd("vpackusdw", VEX_PD, OP3_PACKUSDW_VdqWdq, ESCAPE_38, + address, src0, dst); + } + + void vpabsb_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpabsb", VEX_PD, OP3_PABSB_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + + void vpabsw_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpabsw", VEX_PD, OP3_PABSW_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + + void vpabsd_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpabsd", VEX_PD, OP3_PABSD_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + + void vpmovsxbw_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxbw", VEX_PD, OP3_PMOVSXBW_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + void vpmovsxbw_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxbw", VEX_PD, OP3_PMOVSXBW_VdqWdq, ESCAPE_38, offset, + base, invalid_xmm, dst); + } + void vpmovsxbw_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxbw", VEX_PD, OP3_PMOVSXBW_VdqWdq, ESCAPE_38, offset, + base, index, scale, invalid_xmm, dst); + } + + void vpmovzxbw_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxbw", VEX_PD, OP3_PMOVZXBW_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + void vpmovzxbw_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxbw", VEX_PD, OP3_PMOVZXBW_VdqWdq, ESCAPE_38, offset, + base, invalid_xmm, dst); + } + void vpmovzxbw_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxbw", VEX_PD, OP3_PMOVZXBW_VdqWdq, ESCAPE_38, offset, + base, index, scale, invalid_xmm, dst); + } + + void vpmovsxwd_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxwd", VEX_PD, OP3_PMOVSXWD_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + void vpmovsxwd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxwd", VEX_PD, OP3_PMOVSXWD_VdqWdq, ESCAPE_38, offset, + base, invalid_xmm, dst); + } + void vpmovsxwd_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxwd", VEX_PD, OP3_PMOVSXWD_VdqWdq, ESCAPE_38, offset, + base, index, scale, invalid_xmm, dst); + } + + void vpmovzxwd_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxwd", VEX_PD, OP3_PMOVZXWD_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + void vpmovzxwd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxwd", VEX_PD, OP3_PMOVZXWD_VdqWdq, ESCAPE_38, offset, + base, invalid_xmm, dst); + } + void vpmovzxwd_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxwd", VEX_PD, OP3_PMOVZXWD_VdqWdq, ESCAPE_38, offset, + base, index, scale, invalid_xmm, dst); + } + + void vpmovsxdq_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxwd", VEX_PD, OP3_PMOVSXDQ_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + void vpmovsxdq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxdq", VEX_PD, OP3_PMOVSXDQ_VdqWdq, ESCAPE_38, offset, + base, invalid_xmm, dst); + } + void vpmovsxdq_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vpmovsxdq", VEX_PD, OP3_PMOVSXDQ_VdqWdq, ESCAPE_38, offset, + base, index, scale, invalid_xmm, dst); + } + + void vpmovzxdq_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxwd", VEX_PD, OP3_PMOVZXDQ_VdqWdq, ESCAPE_38, src, + invalid_xmm, dst); + } + void vpmovzxdq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxdq", VEX_PD, OP3_PMOVZXDQ_VdqWdq, ESCAPE_38, offset, + base, invalid_xmm, dst); + } + void vpmovzxdq_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vpmovzxdq", VEX_PD, OP3_PMOVZXDQ_VdqWdq, ESCAPE_38, offset, + base, index, scale, invalid_xmm, dst); + } + + void vphaddd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + threeByteOpSimd("vphaddd", VEX_PD, OP3_PHADDD_VdqWdq, ESCAPE_38, src1, src0, + dst); + } + + void vpalignr_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + MOZ_ASSERT(imm < 32); + threeByteOpImmSimd("vpalignr", VEX_PD, OP3_PALIGNR_VdqWdqIb, ESCAPE_3A, imm, + src1, src0, dst); + } + + void vpunpcklbw_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpcklbw", VEX_PD, OP2_PUNPCKLBW_VdqWdq, src1, src0, dst); + } + void vpunpckhbw_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpckhbw", VEX_PD, OP2_PUNPCKHBW_VdqWdq, src1, src0, dst); + } + + void vpunpckldq_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpckldq", VEX_PD, OP2_PUNPCKLDQ_VdqWdq, src1, src0, dst); + } + void vpunpckldq_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpckldq", VEX_PD, OP2_PUNPCKLDQ_VdqWdq, offset, base, + src0, dst); + } + void vpunpckldq_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpunpckldq", VEX_PD, OP2_PUNPCKLDQ_VdqWdq, addr, src0, dst); + } + void vpunpcklqdq_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpcklqdq", VEX_PD, OP2_PUNPCKLQDQ_VdqWdq, src1, src0, + dst); + } + void vpunpcklqdq_mr(int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpcklqdq", VEX_PD, OP2_PUNPCKLQDQ_VdqWdq, offset, base, + src0, dst); + } + void vpunpcklqdq_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpunpcklqdq", VEX_PD, OP2_PUNPCKLQDQ_VdqWdq, addr, src0, + dst); + } + void vpunpckhdq_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpckhdq", VEX_PD, OP2_PUNPCKHDQ_VdqWdq, src1, src0, dst); + } + void vpunpckhqdq_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpckhqdq", VEX_PD, OP2_PUNPCKHQDQ_VdqWdq, src1, src0, + dst); + } + void vpunpcklwd_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpcklwd", VEX_PD, OP2_PUNPCKLWD_VdqWdq, src1, src0, dst); + } + void vpunpckhwd_rr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vpunpckhwd", VEX_PD, OP2_PUNPCKHWD_VdqWdq, src1, src0, dst); + } + + void vpaddq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpaddq", VEX_PD, OP2_PADDQ_VdqWdq, src1, src0, dst); + } + void vpsubq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vpsubq", VEX_PD, OP2_PSUBQ_VdqWdq, src1, src0, dst); + } + + void vbroadcastb_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastb", VEX_PD, OP3_VBROADCASTB_VxWx, ESCAPE_38, src, + invalid_xmm, dst); + } + void vbroadcastb_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastb", VEX_PD, OP3_VBROADCASTB_VxWx, ESCAPE_38, + offset, base, invalid_xmm, dst); + } + void vbroadcastb_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastb", VEX_PD, OP3_VBROADCASTB_VxWx, ESCAPE_38, + offset, base, index, scale, invalid_xmm, dst); + } + void vbroadcastw_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastw", VEX_PD, OP3_VBROADCASTW_VxWx, ESCAPE_38, src, + invalid_xmm, dst); + } + void vbroadcastw_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastw", VEX_PD, OP3_VBROADCASTW_VxWx, ESCAPE_38, + offset, base, invalid_xmm, dst); + } + void vbroadcastw_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastw", VEX_PD, OP3_VBROADCASTW_VxWx, ESCAPE_38, + offset, base, index, scale, invalid_xmm, dst); + } + void vbroadcastd_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastd", VEX_PD, OP3_VBROADCASTD_VxWx, ESCAPE_38, src, + invalid_xmm, dst); + } + void vbroadcastd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastd", VEX_PD, OP3_VBROADCASTD_VxWx, ESCAPE_38, + offset, base, invalid_xmm, dst); + } + void vbroadcastd_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastd", VEX_PD, OP3_VBROADCASTD_VxWx, ESCAPE_38, + offset, base, index, scale, invalid_xmm, dst); + } + void vbroadcastq_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastq", VEX_PD, OP3_VBROADCASTQ_VxWx, ESCAPE_38, src, + invalid_xmm, dst); + } + void vbroadcastq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastq", VEX_PD, OP3_VBROADCASTQ_VxWx, ESCAPE_38, + offset, base, invalid_xmm, dst); + } + void vbroadcastq_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastq", VEX_PD, OP3_VBROADCASTQ_VxWx, ESCAPE_38, + offset, base, index, scale, invalid_xmm, dst); + } + void vbroadcastss_rr(XMMRegisterID src, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastss", VEX_PD, OP3_VBROADCASTSS_VxWd, ESCAPE_38, + src, invalid_xmm, dst); + } + void vbroadcastss_mr(int32_t offset, RegisterID base, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastss", VEX_PD, OP3_VBROADCASTSS_VxWd, ESCAPE_38, + offset, base, invalid_xmm, dst); + } + void vbroadcastss_mr(int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID dst) { + threeByteOpSimd("vbroadcastss", VEX_PD, OP3_VBROADCASTSS_VxWd, ESCAPE_38, + offset, base, index, scale, invalid_xmm, dst); + } + + // BMI instructions: + + void sarxl_rrr(RegisterID src, RegisterID shift, RegisterID dst) { + spew("sarxl %s, %s, %s", GPReg32Name(src), GPReg32Name(shift), + GPReg32Name(dst)); + + RegisterID rm = src; + XMMRegisterID src0 = static_cast<XMMRegisterID>(shift); + int reg = dst; + m_formatter.threeByteOpVex(VEX_SS /* = F3 */, OP3_SARX_GyEyBy, ESCAPE_38, + rm, src0, reg); + } + + void shlxl_rrr(RegisterID src, RegisterID shift, RegisterID dst) { + spew("shlxl %s, %s, %s", GPReg32Name(src), GPReg32Name(shift), + GPReg32Name(dst)); + + RegisterID rm = src; + XMMRegisterID src0 = static_cast<XMMRegisterID>(shift); + int reg = dst; + m_formatter.threeByteOpVex(VEX_PD /* = 66 */, OP3_SHLX_GyEyBy, ESCAPE_38, + rm, src0, reg); + } + + void shrxl_rrr(RegisterID src, RegisterID shift, RegisterID dst) { + spew("shrxl %s, %s, %s", GPReg32Name(src), GPReg32Name(shift), + GPReg32Name(dst)); + + RegisterID rm = src; + XMMRegisterID src0 = static_cast<XMMRegisterID>(shift); + int reg = dst; + m_formatter.threeByteOpVex(VEX_SD /* = F2 */, OP3_SHRX_GyEyBy, ESCAPE_38, + rm, src0, reg); + } + + // FMA instructions: + + void vfmadd231ps_rrr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + spew("vfmadd213ps %s, %s, %s", XMMRegName(src1), XMMRegName(src0), + XMMRegName(dst)); + + m_formatter.threeByteOpVex(VEX_PD, OP3_VFMADD231PS_VxHxWx, ESCAPE_38, + (RegisterID)src1, src0, (RegisterID)dst); + } + + void vfnmadd231ps_rrr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + spew("vfnmadd213ps %s, %s, %s", XMMRegName(src1), XMMRegName(src0), + XMMRegName(dst)); + + m_formatter.threeByteOpVex(VEX_PD, OP3_VFNMADD231PS_VxHxWx, ESCAPE_38, + (RegisterID)src1, src0, (RegisterID)dst); + } + + void vfmadd231pd_rrr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + spew("vfmadd213pd %s, %s, %s", XMMRegName(src1), XMMRegName(src0), + XMMRegName(dst)); + + m_formatter.threeByteOpVex64(VEX_PD, OP3_VFMADD231PD_VxHxWx, ESCAPE_38, + (RegisterID)src1, src0, (RegisterID)dst); + } + + void vfnmadd231pd_rrr(XMMRegisterID src1, XMMRegisterID src0, + XMMRegisterID dst) { + spew("vfnmadd213pd %s, %s, %s", XMMRegName(src1), XMMRegName(src0), + XMMRegName(dst)); + + m_formatter.threeByteOpVex64(VEX_PD, OP3_VFNMADD231PD_VxHxWx, ESCAPE_38, + (RegisterID)src1, src0, (RegisterID)dst); + } + + // Misc instructions: + + void int3() { + spew("int3"); + m_formatter.oneByteOp(OP_INT3); + } + + void ud2() { + spew("ud2"); + m_formatter.twoByteOp(OP2_UD2); + } + + void ret() { + spew("ret"); + m_formatter.oneByteOp(OP_RET); + } + + void ret_i(int32_t imm) { + spew("ret $%d", imm); + m_formatter.oneByteOp(OP_RET_Iz); + m_formatter.immediate16u(imm); + } + + void lfence() { + spew("lfence"); + m_formatter.twoByteOp(OP_FENCE, (RegisterID)0, 0b101); + } + void mfence() { + spew("mfence"); + m_formatter.twoByteOp(OP_FENCE, (RegisterID)0, 0b110); + } + + // Assembler admin methods: + + JmpDst label() { + JmpDst r = JmpDst(m_formatter.size()); + spew(".set .Llabel%d, .", r.offset()); + return r; + } + + size_t currentOffset() const { return m_formatter.size(); } + + static JmpDst labelFor(JmpSrc jump, intptr_t offset = 0) { + return JmpDst(jump.offset() + offset); + } + + void haltingAlign(int alignment) { + spew(".balign %d, 0x%x # hlt", alignment, unsigned(OP_HLT)); + while (!m_formatter.isAligned(alignment)) { + m_formatter.oneByteOp(OP_HLT); + } + } + + void nopAlign(int alignment) { + spew(".balign %d", alignment); + + int remainder = m_formatter.size() % alignment; + if (remainder > 0) { + insert_nop(alignment - remainder); + } + } + + void jumpTablePointer(uintptr_t ptr) { +#ifdef JS_CODEGEN_X64 + spew(".quad 0x%" PRIxPTR, ptr); +#else + spew(".int 0x%" PRIxPTR, ptr); +#endif + m_formatter.jumpTablePointer(ptr); + } + + void doubleConstant(double d) { + spew(".double %.16g", d); + m_formatter.doubleConstant(d); + } + void floatConstant(float f) { + spew(".float %.16g", f); + m_formatter.floatConstant(f); + } + + void simd128Constant(const void* data) { + const uint32_t* dw = reinterpret_cast<const uint32_t*>(data); + spew(".int 0x%08x,0x%08x,0x%08x,0x%08x", dw[0], dw[1], dw[2], dw[3]); + MOZ_ASSERT(m_formatter.isAligned(16)); + m_formatter.simd128Constant(data); + } + + void int32Constant(int32_t i) { + spew(".int %d", i); + m_formatter.int32Constant(i); + } + void int64Constant(int64_t i) { + spew(".quad %lld", (long long)i); + m_formatter.int64Constant(i); + } + + // Linking & patching: + + void assertValidJmpSrc(JmpSrc src) { + // The target offset is stored at offset - 4. + MOZ_RELEASE_ASSERT(src.offset() > int32_t(sizeof(int32_t))); + MOZ_RELEASE_ASSERT(size_t(src.offset()) <= size()); + } + + bool nextJump(const JmpSrc& from, JmpSrc* next) { + // Sanity check - if the assembler has OOM'd, it will start overwriting + // its internal buffer and thus our links could be garbage. + if (oom()) { + return false; + } + + assertValidJmpSrc(from); + MOZ_ASSERT(from.trailing() == 0); + + const unsigned char* code = m_formatter.data(); + int32_t offset = GetInt32(code + from.offset()); + if (offset == -1) { + return false; + } + + MOZ_RELEASE_ASSERT(size_t(offset) < size(), "nextJump bogus offset"); + + *next = JmpSrc(offset); + return true; + } + void setNextJump(const JmpSrc& from, const JmpSrc& to) { + // Sanity check - if the assembler has OOM'd, it will start overwriting + // its internal buffer and thus our links could be garbage. + if (oom()) { + return; + } + + assertValidJmpSrc(from); + MOZ_ASSERT(from.trailing() == 0); + MOZ_RELEASE_ASSERT(to.offset() == -1 || size_t(to.offset()) <= size()); + + unsigned char* code = m_formatter.data(); + SetInt32(code + from.offset(), to.offset()); + } + + void linkJump(JmpSrc from, JmpDst to) { + MOZ_ASSERT(from.offset() != -1); + MOZ_ASSERT(to.offset() != -1); + + // Sanity check - if the assembler has OOM'd, it will start overwriting + // its internal buffer and thus our links could be garbage. + if (oom()) { + return; + } + + assertValidJmpSrc(from); + MOZ_RELEASE_ASSERT(size_t(to.offset()) <= size()); + + spew(".set .Lfrom%d, .Llabel%d", from.offset(), to.offset()); + unsigned char* code = m_formatter.data(); + SetRel32(code + from.offset(), code + to.offset(), from.trailing()); + } + + void executableCopy(void* dst) { + const unsigned char* src = m_formatter.buffer(); + memcpy(dst, src, size()); + } + [[nodiscard]] bool appendRawCode(const uint8_t* code, size_t numBytes) { + return m_formatter.append(code, numBytes); + } + + // `offset` is the instruction offset at the end of the instruction. + void addToPCRel4(uint32_t offset, int32_t bias) { + unsigned char* code = m_formatter.data(); + SetInt32(code + offset, GetInt32(code + offset) + bias); + } + + protected: + static bool CAN_SIGN_EXTEND_8_32(int32_t value) { + return value == (int32_t)(int8_t)value; + } + static bool CAN_SIGN_EXTEND_16_32(int32_t value) { + return value == (int32_t)(int16_t)value; + } + static bool CAN_ZERO_EXTEND_8_32(int32_t value) { + return value == (int32_t)(uint8_t)value; + } + static bool CAN_ZERO_EXTEND_8H_32(int32_t value) { + return value == (value & 0xff00); + } + static bool CAN_ZERO_EXTEND_16_32(int32_t value) { + return value == (int32_t)(uint16_t)value; + } + static bool CAN_ZERO_EXTEND_32_64(int32_t value) { return value >= 0; } + + // Methods for encoding SIMD instructions via either legacy SSE encoding or + // VEX encoding. + + bool useLegacySSEEncoding(XMMRegisterID src0, XMMRegisterID dst) { + // If we don't have AVX or it's disabled, use the legacy SSE encoding. + if (!useVEX_) { + MOZ_ASSERT( + src0 == invalid_xmm || src0 == dst, + "Legacy SSE (pre-AVX) encoding requires the output register to be " + "the same as the src0 input register"); + return true; + } + + // If src0 is the same as the output register, we might as well use + // the legacy SSE encoding, since it is smaller. However, this is only + // beneficial as long as we're not using ymm registers anywhere. + return src0 == dst; + } + + bool useLegacySSEEncodingForVblendv(XMMRegisterID mask, XMMRegisterID src0, + XMMRegisterID dst) { + // Similar to useLegacySSEEncoding, but for vblendv the Legacy SSE + // encoding also requires the mask to be in xmm0. + + if (!useVEX_) { + MOZ_ASSERT( + src0 == dst, + "Legacy SSE (pre-AVX) encoding requires the output register to be " + "the same as the src0 input register"); + MOZ_ASSERT( + mask == xmm0, + "Legacy SSE (pre-AVX) encoding for blendv requires the mask to be " + "in xmm0"); + return true; + } + + return src0 == dst && mask == xmm0; + } + + bool useLegacySSEEncodingAlways() { return !useVEX_; } + + const char* legacySSEOpName(const char* name) { + MOZ_ASSERT(name[0] == 'v'); + return name + 1; + } + + void twoByteOpSimd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, XMMRegisterID rm, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(dst), + XMMRegName(rm)); + } else { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), + XMMRegName(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, %s", name, XMMRegName(dst), XMMRegName(rm)); + } else { + spew("%-11s%s, %s", name, XMMRegName(rm), XMMRegName(dst)); + } + } else { + spew("%-11s%s, %s, %s", name, XMMRegName(rm), XMMRegName(src0), + XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst); + } + + void twoByteOpImmSimd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, uint32_t imm, XMMRegisterID rm, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm), + XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); + m_formatter.immediate8u(imm); + return; + } + + if (src0 == invalid_xmm) { + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(rm), XMMRegName(dst)); + } else { + spew("%-11s$0x%x, %s, %s, %s", name, imm, XMMRegName(rm), + XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpSimd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_ob, legacySSEOpName(name), XMMRegName(dst), + ADDR_ob(offset, base)); + } else { + spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), + ADDR_ob(offset, base), XMMRegName(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_ob, name, XMMRegName(dst), ADDR_ob(offset, base)); + } else { + spew("%-11s" MEM_ob ", %s", name, ADDR_ob(offset, base), + XMMRegName(dst)); + } + } else { + spew("%-11s" MEM_ob ", %s, %s", name, ADDR_ob(offset, base), + XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst); + } + + void twoByteOpSimd_disp32(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, int32_t offset, + RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_o32b, legacySSEOpName(name), XMMRegName(dst), + ADDR_o32b(offset, base)); + } else { + spew("%-11s" MEM_o32b ", %s", legacySSEOpName(name), + ADDR_o32b(offset, base), XMMRegName(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp_disp32(opcode, offset, base, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_o32b, name, XMMRegName(dst), + ADDR_o32b(offset, base)); + } else { + spew("%-11s" MEM_o32b ", %s", name, ADDR_o32b(offset, base), + XMMRegName(dst)); + } + } else { + spew("%-11s" MEM_o32b ", %s, %s", name, ADDR_o32b(offset, base), + XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex_disp32(ty, opcode, offset, base, src0, dst); + } + + void twoByteOpImmSimd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, uint32_t imm, int32_t offset, + RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, + ADDR_ob(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), + XMMRegName(src0), XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpSimd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID index, int scale, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_obs, legacySSEOpName(name), XMMRegName(dst), + ADDR_obs(offset, base, index, scale)); + } else { + spew("%-11s" MEM_obs ", %s", legacySSEOpName(name), + ADDR_obs(offset, base, index, scale), XMMRegName(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, index, scale, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, " MEM_obs, name, XMMRegName(dst), + ADDR_obs(offset, base, index, scale)); + } else { + spew("%-11s" MEM_obs ", %s", name, ADDR_obs(offset, base, index, scale), + XMMRegName(dst)); + } + } else { + spew("%-11s" MEM_obs ", %s, %s", name, + ADDR_obs(offset, base, index, scale), XMMRegName(src0), + XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, offset, base, index, scale, src0, dst); + } + + void twoByteOpSimd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, const void* address, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, %p", legacySSEOpName(name), XMMRegName(dst), address); + } else { + spew("%-11s%p, %s", legacySSEOpName(name), address, XMMRegName(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, address, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, %p", name, XMMRegName(dst), address); + } else { + spew("%-11s%p, %s", name, address, XMMRegName(dst)); + } + } else { + spew("%-11s%p, %s, %s", name, address, XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, address, src0, dst); + } + + void twoByteOpImmSimd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, uint32_t imm, + const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, %p, %s", legacySSEOpName(name), imm, address, + XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, address, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %p, %s, %s", name, imm, address, XMMRegName(src0), + XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, address, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpInt32Simd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, RegisterID rm, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(dst), + GPReg32Name(rm)); + } else { + spew("%-11s%s, %s", legacySSEOpName(name), GPReg32Name(rm), + XMMRegName(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, rm, dst); + return; + } + + if (src0 == invalid_xmm) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, %s", name, XMMRegName(dst), GPReg32Name(rm)); + } else { + spew("%-11s%s, %s", name, GPReg32Name(rm), XMMRegName(dst)); + } + } else { + spew("%-11s%s, %s, %s", name, GPReg32Name(rm), XMMRegName(src0), + XMMRegName(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, rm, src0, dst); + } + + void twoByteOpSimdInt32(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, XMMRegisterID rm, + RegisterID dst) { + if (useLegacySSEEncodingAlways()) { + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, %s", legacySSEOpName(name), GPReg32Name(dst), + XMMRegName(rm)); + } else if (opcode == OP2_MOVD_EdVd) { + spew("%-11s%s, %s", legacySSEOpName(name), + XMMRegName((XMMRegisterID)dst), GPReg32Name((RegisterID)rm)); + } else { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), + GPReg32Name(dst)); + } + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); + return; + } + + if (IsXMMReversedOperands(opcode)) { + spew("%-11s%s, %s", name, GPReg32Name(dst), XMMRegName(rm)); + } else if (opcode == OP2_MOVD_EdVd) { + spew("%-11s%s, %s", name, XMMRegName((XMMRegisterID)dst), + GPReg32Name((RegisterID)rm)); + } else { + spew("%-11s%s, %s", name, XMMRegName(rm), GPReg32Name(dst)); + } + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm, dst); + } + + void twoByteOpImmSimdInt32(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, uint32_t imm, + XMMRegisterID rm, RegisterID dst) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm), + GPReg32Name(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(rm), GPReg32Name(dst)); + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpImmInt32Simd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, uint32_t imm, + RegisterID rm, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, GPReg32Name(rm), + XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, rm, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %s, %s", name, imm, GPReg32Name(rm), XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, rm, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpImmInt32Simd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, uint32_t imm, + int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, + ADDR_ob(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), + XMMRegName(src0), XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpImmInt32Simd(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, uint32_t imm, + int32_t offset, RegisterID base, RegisterID index, + int scale, XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s$0x%x, " MEM_obs ", %s", legacySSEOpName(name), imm, + ADDR_obs(offset, base, index, scale), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, index, scale, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_obs ", %s, %s", name, imm, + ADDR_obs(offset, base, index, scale), XMMRegName(src0), + XMMRegName(dst)); + m_formatter.twoByteOpVex(ty, opcode, offset, base, index, scale, src0, dst); + m_formatter.immediate8u(imm); + } + + void twoByteOpSimdFlags(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, XMMRegisterID rm, + XMMRegisterID reg) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), + XMMRegName(reg)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, (RegisterID)rm, reg); + return; + } + + spew("%-11s%s, %s", name, XMMRegName(rm), XMMRegName(reg)); + m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm, + (XMMRegisterID)reg); + } + + void twoByteOpSimdFlags(const char* name, VexOperandType ty, + TwoByteOpcodeID opcode, int32_t offset, + RegisterID base, XMMRegisterID reg) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), ADDR_ob(offset, base), + XMMRegName(reg)); + m_formatter.legacySSEPrefix(ty); + m_formatter.twoByteOp(opcode, offset, base, reg); + return; + } + + spew("%-11s" MEM_ob ", %s", name, ADDR_ob(offset, base), XMMRegName(reg)); + m_formatter.twoByteOpVex(ty, opcode, offset, base, invalid_xmm, + (XMMRegisterID)reg); + } + + void threeByteOpSimd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + XMMRegisterID rm, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), + XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst); + return; + } + + if (src0 == invalid_xmm) { + spew("%-11s%s, %s", name, XMMRegName(rm), XMMRegName(dst)); + } else { + spew("%-11s%s, %s, %s", name, XMMRegName(rm), XMMRegName(src0), + XMMRegName(dst)); + } + m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst); + } + + void threeByteOpImmSimd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, XMMRegisterID rm, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm), + XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst); + m_formatter.immediate8u(imm); + return; + } + + if (src0 == invalid_xmm) { + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(rm), XMMRegName(dst)); + } else { + spew("%-11s$0x%x, %s, %s, %s", name, imm, XMMRegName(rm), + XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpSimd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), ADDR_ob(offset, base), + XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, dst); + return; + } + + if (src0 == invalid_xmm) { + spew("%-11s" MEM_ob ", %s", name, ADDR_ob(offset, base), XMMRegName(dst)); + } else { + spew("%-11s" MEM_ob ", %s, %s", name, ADDR_ob(offset, base), + XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst); + } + + void threeByteOpSimd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + int32_t offset, RegisterID base, RegisterID index, + int32_t scale, XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s" MEM_obs ", %s", legacySSEOpName(name), + ADDR_obs(offset, base, index, scale), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, index, scale, dst); + return; + } + + if (src0 == invalid_xmm) { + spew("%-11s" MEM_obs ", %s", name, ADDR_obs(offset, base, index, scale), + XMMRegName(dst)); + } else { + spew("%-11s" MEM_obs ", %s, %s", name, + ADDR_obs(offset, base, index, scale), XMMRegName(src0), + XMMRegName(dst)); + } + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, index, scale, + src0, dst); + } + + void threeByteOpImmSimd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, + ADDR_ob(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + if (src0 == invalid_xmm) { + spew("%-11s$0x%x, " MEM_ob ", %s", name, imm, ADDR_ob(offset, base), + XMMRegName(dst)); + } else { + spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), + XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmSimd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, int32_t offset, RegisterID base, + RegisterID index, int scale, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, " MEM_obs ", %s", legacySSEOpName(name), imm, + ADDR_obs(offset, base, index, scale), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, index, scale, dst); + m_formatter.immediate8u(imm); + return; + } + + if (src0 == invalid_xmm) { + spew("%-11s$0x%x, " MEM_obs ", %s", name, imm, + ADDR_obs(offset, base, index, scale), XMMRegName(dst)); + } else { + spew("%-11s$0x%x, " MEM_obs ", %s, %s", name, imm, + ADDR_obs(offset, base, index, scale), XMMRegName(src0), + XMMRegName(dst)); + } + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, index, scale, + src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpSimd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s%p, %s", legacySSEOpName(name), address, XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, address, dst); + return; + } + + if (src0 == invalid_xmm) { + spew("%-11s%p, %s", name, address, XMMRegName(dst)); + } else { + spew("%-11s%p, %s, %s", name, address, XMMRegName(src0), XMMRegName(dst)); + } + m_formatter.threeByteOpVex(ty, opcode, escape, address, src0, dst); + } + + void threeByteOpImmInt32Simd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, RegisterID src1, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, GPReg32Name(src1), + XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, src1, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, %s, %s, %s", name, imm, GPReg32Name(src1), + XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, src1, src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmInt32Simd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, int32_t offset, RegisterID base, + XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, + ADDR_ob(offset, base), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), + XMMRegName(src0), XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmInt32Simd(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, int32_t offset, RegisterID base, + RegisterID index, int scale, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s$0x%x, " MEM_obs ", %s", legacySSEOpName(name), imm, + ADDR_obs(offset, base, index, scale), XMMRegName(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, index, scale, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_obs ", %s, %s", name, imm, + ADDR_obs(offset, base, index, scale), XMMRegName(src0), + XMMRegName(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, index, scale, + src0, dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmSimdInt32(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, XMMRegisterID src, + RegisterID dst) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(src), + GPReg32Name(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, (RegisterID)src, dst); + m_formatter.immediate8u(imm); + return; + } + + if (opcode == OP3_PEXTRD_EvVdqIb) { + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName((XMMRegisterID)dst), + GPReg32Name((RegisterID)src)); + } else { + spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(src), GPReg32Name(dst)); + } + m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)src, invalid_xmm, + dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmSimdInt32(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, int32_t offset, RegisterID base, + RegisterID dst) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, + ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_ob ", %s", name, imm, ADDR_ob(offset, base), + GPReg32Name(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, invalid_xmm, + dst); + m_formatter.immediate8u(imm); + } + + void threeByteOpImmSimdInt32(const char* name, VexOperandType ty, + ThreeByteOpcodeID opcode, ThreeByteEscape escape, + uint32_t imm, int32_t offset, RegisterID base, + RegisterID index, int scale, RegisterID dst) { + if (useLegacySSEEncodingAlways()) { + spew("%-11s$0x%x, " MEM_obs ", %s", legacySSEOpName(name), imm, + ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, offset, base, index, scale, dst); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$0x%x, " MEM_obs ", %s", name, imm, + ADDR_obs(offset, base, index, scale), GPReg32Name(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, index, scale, + invalid_xmm, dst); + m_formatter.immediate8u(imm); + } + + // Blendv is a three-byte op, but the VEX encoding has a different opcode + // than the SSE encoding, so we handle it specially. + void vblendvOpSimd(const char* name, ThreeByteOpcodeID opcode, + ThreeByteOpcodeID vexOpcode, XMMRegisterID mask, + XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) { + if (useLegacySSEEncodingForVblendv(mask, src0, dst)) { + spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm), + XMMRegName(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" + // prefix. + m_formatter.legacySSEPrefix(VEX_PD); + m_formatter.threeByteOp(opcode, ESCAPE_38, (RegisterID)rm, dst); + return; + } + + spew("%-11s%s, %s, %s, %s", name, XMMRegName(mask), XMMRegName(rm), + XMMRegName(src0), XMMRegName(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix. + m_formatter.vblendvOpVex(VEX_PD, vexOpcode, ESCAPE_3A, mask, (RegisterID)rm, + src0, dst); + } + + void vblendvOpSimd(const char* name, ThreeByteOpcodeID opcode, + ThreeByteOpcodeID vexOpcode, XMMRegisterID mask, + int32_t offset, RegisterID base, XMMRegisterID src0, + XMMRegisterID dst) { + if (useLegacySSEEncodingForVblendv(mask, src0, dst)) { + spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), ADDR_ob(offset, base), + XMMRegName(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" + // prefix. + m_formatter.legacySSEPrefix(VEX_PD); + m_formatter.threeByteOp(opcode, ESCAPE_38, offset, base, dst); + return; + } + + spew("%-11s%s, " MEM_ob ", %s, %s", name, XMMRegName(mask), + ADDR_ob(offset, base), XMMRegName(src0), XMMRegName(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix. + m_formatter.vblendvOpVex(VEX_PD, vexOpcode, ESCAPE_3A, mask, offset, base, + src0, dst); + } + + void shiftOpImmSimd(const char* name, TwoByteOpcodeID opcode, + ShiftID shiftKind, uint32_t imm, XMMRegisterID src, + XMMRegisterID dst) { + if (useLegacySSEEncoding(src, dst)) { + spew("%-11s$%d, %s", legacySSEOpName(name), int32_t(imm), + XMMRegName(dst)); + m_formatter.legacySSEPrefix(VEX_PD); + m_formatter.twoByteOp(opcode, (RegisterID)dst, (int)shiftKind); + m_formatter.immediate8u(imm); + return; + } + + spew("%-11s$%d, %s, %s", name, int32_t(imm), XMMRegName(src), + XMMRegName(dst)); + // For shift instructions, destination is stored in vvvv field. + m_formatter.twoByteOpVex(VEX_PD, opcode, (RegisterID)src, dst, + (int)shiftKind); + m_formatter.immediate8u(imm); + } + + class X86InstructionFormatter { + public: + // Legacy prefix bytes: + // + // These are emmitted prior to the instruction. + + void prefix(OneByteOpcodeID pre) { m_buffer.putByte(pre); } + + void legacySSEPrefix(VexOperandType ty) { + switch (ty) { + case VEX_PS: + break; + case VEX_PD: + prefix(PRE_SSE_66); + break; + case VEX_SS: + prefix(PRE_SSE_F3); + break; + case VEX_SD: + prefix(PRE_SSE_F2); + break; + } + } + + /* clang-format off */ + // + // Word-sized operands / no operand instruction formatters. + // + // In addition to the opcode, the following operand permutations are supported: + // * None - instruction takes no operands. + // * One register - the low three bits of the RegisterID are added into the opcode. + // * Two registers - encode a register form ModRm (for all ModRm formats, the reg field is passed first, and a GroupOpcodeID may be passed in its place). + // * Three argument ModRM - a register, and a register and an offset describing a memory operand. + // * Five argument ModRM - a register, and a base register, an index, scale, and offset describing a memory operand. + // + // For 32-bit x86 targets, the address operand may also be provided as a + // void*. On 64-bit targets REX prefixes will be planted as necessary, + // where high numbered registers are used. + // + // The twoByteOp methods plant two-byte Intel instructions sequences + // (first opcode byte 0x0F). + // + /* clang-format on */ + + void oneByteOp(OneByteOpcodeID opcode) { + m_buffer.ensureSpace(MaxInstructionSize); + m_buffer.putByteUnchecked(opcode); + } + + void oneByteOp(OneByteOpcodeID opcode, RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(0, 0, reg); + m_buffer.putByteUnchecked(opcode + (reg & 7)); + } + + void oneByteOp(OneByteOpcodeID opcode, RegisterID rm, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, rm); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void oneByteOp(OneByteOpcodeID opcode, int32_t offset, RegisterID base, + int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void oneByteOp_disp32(OneByteOpcodeID opcode, int32_t offset, + RegisterID base, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, base, reg); + } + + void oneByteOp(OneByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID index, int scale, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, index, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void oneByteOp_disp32(OneByteOpcodeID opcode, int32_t offset, + RegisterID index, int scale, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, index, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, index, scale, reg); + } + + void oneByteOp(OneByteOpcodeID opcode, const void* address, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(address, reg); + } + + void oneByteOp_disp32(OneByteOpcodeID opcode, const void* address, + int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(address, reg); + } +#ifdef JS_CODEGEN_X64 + void oneByteRipOp(OneByteOpcodeID opcode, int ripOffset, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } + + void oneByteRipOp64(OneByteOpcodeID opcode, int ripOffset, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } + + void twoByteRipOp(TwoByteOpcodeID opcode, int ripOffset, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } + + void twoByteRipOpVex(VexOperandType ty, TwoByteOpcodeID opcode, + int ripOffset, XMMRegisterID src0, XMMRegisterID reg) { + int r = (reg >> 3), x = 0, b = 0; + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } +#endif + + void twoByteOp(TwoByteOpcodeID opcode) { + m_buffer.ensureSpace(MaxInstructionSize); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + } + + void twoByteOp(TwoByteOpcodeID opcode, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(0, 0, reg); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode + (reg & 7)); + } + + void twoByteOp(TwoByteOpcodeID opcode, RegisterID rm, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, RegisterID rm, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + } + + void twoByteOp(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, + int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, int32_t offset, + RegisterID base, XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, reg); + } + + void twoByteOp_disp32(TwoByteOpcodeID opcode, int32_t offset, + RegisterID base, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, base, reg); + } + + void twoByteOpVex_disp32(VexOperandType ty, TwoByteOpcodeID opcode, + int32_t offset, RegisterID base, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM_disp32(offset, base, reg); + } + + void twoByteOp(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID index, int scale, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, index, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, int32_t offset, + RegisterID base, RegisterID index, int scale, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = (index >> 3), b = (base >> 3); + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void twoByteOp(TwoByteOpcodeID opcode, const void* address, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(address, reg); + } + + void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, + const void* address, XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = 0; + int m = 1; // 0x0F + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(address, reg); + } + + void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, + RegisterID rm, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, RegisterID rm, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: + m = 2; + break; + case ESCAPE_3A: + m = 3; + break; + default: + MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + } + + void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, + int32_t offset, RegisterID base, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, + int32_t offset, RegisterID base, RegisterID index, + int32_t scale, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, index, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, int32_t offset, RegisterID base, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: + m = 2; + break; + case ESCAPE_3A: + m = 3; + break; + default: + MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, reg); + } + + void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, int32_t offset, RegisterID base, + RegisterID index, int scale, XMMRegisterID src0, + int reg) { + int r = (reg >> 3), x = (index >> 3), b = (base >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: + m = 2; + break; + case ESCAPE_3A: + m = 3; + break; + default: + MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, + const void* address, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + memoryModRM(address, reg); + } + + void threeByteRipOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, + int ripOffset, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIfNeeded(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } + + void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, const void* address, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = 0; + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: + m = 2; + break; + case ESCAPE_3A: + m = 3; + break; + default: + MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(address, reg); + } + + void threeByteRipOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, int ripOffset, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = 0; + int m = 0; + switch (escape) { + case ESCAPE_38: + m = 2; + break; + case ESCAPE_3A: + m = 3; + break; + default: + MOZ_CRASH("unexpected escape"); + } + int w = 0, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + putModRm(ModRmMemoryNoDisp, noBase, reg); + m_buffer.putIntUnchecked(ripOffset); + } + + void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, XMMRegisterID mask, RegisterID rm, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: + m = 2; + break; + case ESCAPE_3A: + m = 3; + break; + default: + MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + immediate8u(mask << 4); + } + + void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, XMMRegisterID mask, + int32_t offset, RegisterID base, XMMRegisterID src0, + int reg) { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: + m = 2; + break; + case ESCAPE_3A: + m = 3; + break; + default: + MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, reg); + immediate8u(mask << 4); + } + +#ifdef JS_CODEGEN_X64 + // Quad-word-sized operands: + // + // Used to format 64-bit operantions, planting a REX.w prefix. When + // planting d64 or f64 instructions, not requiring a REX.w prefix, the + // normal (non-'64'-postfixed) formatters should be used. + + void oneByteOp64(OneByteOpcodeID opcode) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(0, 0, 0); + m_buffer.putByteUnchecked(opcode); + } + + void oneByteOp64(OneByteOpcodeID opcode, RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(0, 0, reg); + m_buffer.putByteUnchecked(opcode + (reg & 7)); + } + + void oneByteOp64(OneByteOpcodeID opcode, RegisterID rm, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, rm); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void oneByteOp64(OneByteOpcodeID opcode, int32_t offset, RegisterID base, + int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void oneByteOp64_disp32(OneByteOpcodeID opcode, int32_t offset, + RegisterID base, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, base, reg); + } + + void oneByteOp64(OneByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID index, int scale, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, index, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void oneByteOp64(OneByteOpcodeID opcode, const void* address, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM(address, reg); + } + + void twoByteOp64(TwoByteOpcodeID opcode, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(0, 0, reg); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode + (reg & 7)); + } + + void twoByteOp64(TwoByteOpcodeID opcode, RegisterID rm, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void twoByteOp64(TwoByteOpcodeID opcode, int offset, RegisterID base, + int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void twoByteOp64(TwoByteOpcodeID opcode, int offset, RegisterID base, + RegisterID index, int scale, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, index, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void twoByteOp64(TwoByteOpcodeID opcode, const void* address, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, 0); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(address, reg); + } + + void twoByteOpVex64(VexOperandType ty, TwoByteOpcodeID opcode, + RegisterID rm, XMMRegisterID src0, XMMRegisterID reg) { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 1; // 0x0F + int w = 1, v = src0, l = 0; + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + } + + void threeByteOp64(ThreeByteOpcodeID opcode, ThreeByteEscape escape, + RegisterID rm, int reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexW(reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(escape); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } +#endif // JS_CODEGEN_X64 + + void threeByteOpVex64(VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, RegisterID rm, + XMMRegisterID src0, int reg) { + int r = (reg >> 3), x = 0, b = (rm >> 3); + int m = 0, w = 1, v = src0, l = 0; + switch (escape) { + case ESCAPE_38: + m = 2; + break; + case ESCAPE_3A: + m = 3; + break; + default: + MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + registerModRM(rm, reg); + } + + // Byte-operands: + // + // These methods format byte operations. Byte operations differ from + // the normal formatters in the circumstances under which they will + // decide to emit REX prefixes. These should be used where any register + // operand signifies a byte register. + // + // The disctinction is due to the handling of register numbers in the + // range 4..7 on x86-64. These register numbers may either represent + // the second byte of the first four registers (ah..bh) or the first + // byte of the second four registers (spl..dil). + // + // Address operands should still be checked using regRequiresRex(), + // while byteRegRequiresRex() is provided to check byte register + // operands. + + void oneByteOp8(OneByteOpcodeID opcode) { + m_buffer.ensureSpace(MaxInstructionSize); + m_buffer.putByteUnchecked(opcode); + } + + void oneByteOp8(OneByteOpcodeID opcode, RegisterID r) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(r), 0, 0, r); + m_buffer.putByteUnchecked(opcode + (r & 7)); + } + + void oneByteOp8(OneByteOpcodeID opcode, RegisterID rm, + GroupOpcodeID groupOp) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(rm), 0, 0, rm); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, groupOp); + } + + // Like oneByteOp8, but never emits a REX prefix. + void oneByteOp8_norex(OneByteOpcodeID opcode, HRegisterID rm, + GroupOpcodeID groupOp) { + MOZ_ASSERT(!regRequiresRex(RegisterID(rm))); + m_buffer.ensureSpace(MaxInstructionSize); + m_buffer.putByteUnchecked(opcode); + registerModRM(RegisterID(rm), groupOp); + } + + void oneByteOp8(OneByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg), reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void oneByteOp8_disp32(OneByteOpcodeID opcode, int32_t offset, + RegisterID base, RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg), reg, 0, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(offset, base, reg); + } + + void oneByteOp8(OneByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID index, int scale, RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg), reg, index, base); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + void oneByteOp8(OneByteOpcodeID opcode, const void* address, + RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg), reg, 0, 0); + m_buffer.putByteUnchecked(opcode); + memoryModRM_disp32(address, reg); + } + + void twoByteOp8(TwoByteOpcodeID opcode, RegisterID rm, RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg) || byteRegRequiresRex(rm), reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void twoByteOp8(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg) || regRequiresRex(base), reg, 0, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, reg); + } + + void twoByteOp8(TwoByteOpcodeID opcode, int32_t offset, RegisterID base, + RegisterID index, int scale, RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(reg) || regRequiresRex(base) || + regRequiresRex(index), + reg, index, base); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + memoryModRM(offset, base, index, scale, reg); + } + + // Like twoByteOp8 but doesn't add a REX prefix if the destination reg + // is in esp..edi. This may be used when the destination is not an 8-bit + // register (as in a movzbl instruction), so it doesn't need a REX + // prefix to disambiguate it from ah..bh. + void twoByteOp8_movx(TwoByteOpcodeID opcode, RegisterID rm, + RegisterID reg) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(regRequiresRex(reg) || byteRegRequiresRex(rm), reg, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, reg); + } + + void twoByteOp8(TwoByteOpcodeID opcode, RegisterID rm, + GroupOpcodeID groupOp) { + m_buffer.ensureSpace(MaxInstructionSize); + emitRexIf(byteRegRequiresRex(rm), 0, 0, rm); + m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE); + m_buffer.putByteUnchecked(opcode); + registerModRM(rm, groupOp); + } + + // Immediates: + // + // An immedaite should be appended where appropriate after an op has + // been emitted. The writes are unchecked since the opcode formatters + // above will have ensured space. + + // A signed 8-bit immediate. + MOZ_ALWAYS_INLINE void immediate8s(int32_t imm) { + MOZ_ASSERT(CAN_SIGN_EXTEND_8_32(imm)); + m_buffer.putByteUnchecked(imm); + } + + // An unsigned 8-bit immediate. + MOZ_ALWAYS_INLINE void immediate8u(uint32_t imm) { + MOZ_ASSERT(CAN_ZERO_EXTEND_8_32(imm)); + m_buffer.putByteUnchecked(int32_t(imm)); + } + + // An 8-bit immediate with is either signed or unsigned, for use in + // instructions which actually only operate on 8 bits. + MOZ_ALWAYS_INLINE void immediate8(int32_t imm) { + m_buffer.putByteUnchecked(imm); + } + + // A signed 16-bit immediate. + MOZ_ALWAYS_INLINE void immediate16s(int32_t imm) { + MOZ_ASSERT(CAN_SIGN_EXTEND_16_32(imm)); + m_buffer.putShortUnchecked(imm); + } + + // An unsigned 16-bit immediate. + MOZ_ALWAYS_INLINE void immediate16u(int32_t imm) { + MOZ_ASSERT(CAN_ZERO_EXTEND_16_32(imm)); + m_buffer.putShortUnchecked(imm); + } + + // A 16-bit immediate with is either signed or unsigned, for use in + // instructions which actually only operate on 16 bits. + MOZ_ALWAYS_INLINE void immediate16(int32_t imm) { + m_buffer.putShortUnchecked(imm); + } + + MOZ_ALWAYS_INLINE void immediate32(int32_t imm) { + m_buffer.putIntUnchecked(imm); + } + + MOZ_ALWAYS_INLINE void immediate64(int64_t imm) { + m_buffer.putInt64Unchecked(imm); + } + + [[nodiscard]] MOZ_ALWAYS_INLINE JmpSrc immediateRel32() { + m_buffer.putIntUnchecked(0); + return JmpSrc(m_buffer.size()); + } + + // Data: + + void jumpTablePointer(uintptr_t ptr) { + m_buffer.ensureSpace(sizeof(uintptr_t)); +#ifdef JS_CODEGEN_X64 + m_buffer.putInt64Unchecked(ptr); +#else + m_buffer.putIntUnchecked(ptr); +#endif + } + + void doubleConstant(double d) { + m_buffer.ensureSpace(sizeof(double)); + m_buffer.putInt64Unchecked(mozilla::BitwiseCast<uint64_t>(d)); + } + + void floatConstant(float f) { + m_buffer.ensureSpace(sizeof(float)); + m_buffer.putIntUnchecked(mozilla::BitwiseCast<uint32_t>(f)); + } + + void simd128Constant(const void* data) { + const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data); + m_buffer.ensureSpace(16); + for (size_t i = 0; i < 16; ++i) { + m_buffer.putByteUnchecked(bytes[i]); + } + } + + void int64Constant(int64_t i) { + m_buffer.ensureSpace(sizeof(int64_t)); + m_buffer.putInt64Unchecked(i); + } + + void int32Constant(int32_t i) { + m_buffer.ensureSpace(sizeof(int32_t)); + m_buffer.putIntUnchecked(i); + } + + // Administrative methods: + + size_t size() const { return m_buffer.size(); } + const unsigned char* buffer() const { return m_buffer.buffer(); } + unsigned char* data() { return m_buffer.data(); } + bool oom() const { return m_buffer.oom(); } + bool reserve(size_t size) { return m_buffer.reserve(size); } + bool swapBuffer(wasm::Bytes& other) { return m_buffer.swap(other); } + bool isAligned(int alignment) const { + return m_buffer.isAligned(alignment); + } + + [[nodiscard]] bool append(const unsigned char* values, size_t size) { + return m_buffer.append(values, size); + } + + private: + // Internals; ModRm and REX formatters. + + // Byte operand register spl & above requir a REX prefix, which precludes + // use of the h registers in the same instruction. + static bool byteRegRequiresRex(RegisterID reg) { +#ifdef JS_CODEGEN_X64 + return reg >= rsp; +#else + return false; +#endif + } + + // For non-byte sizes, registers r8 & above always require a REX prefix. + static bool regRequiresRex(RegisterID reg) { +#ifdef JS_CODEGEN_X64 + return reg >= r8; +#else + return false; +#endif + } + +#ifdef JS_CODEGEN_X64 + // Format a REX prefix byte. + void emitRex(bool w, int r, int x, int b) { + m_buffer.putByteUnchecked(PRE_REX | ((int)w << 3) | ((r >> 3) << 2) | + ((x >> 3) << 1) | (b >> 3)); + } + + // Used to plant a REX byte with REX.w set (for 64-bit operations). + void emitRexW(int r, int x, int b) { emitRex(true, r, x, b); } + + // Used for operations with byte operands - use byteRegRequiresRex() to + // check register operands, regRequiresRex() to check other registers + // (i.e. address base & index). + // + // NB: WebKit's use of emitRexIf() is limited such that the + // reqRequiresRex() checks are not needed. SpiderMonkey extends + // oneByteOp8 and twoByteOp8 functionality such that r, x, and b + // can all be used. + void emitRexIf(bool condition, int r, int x, int b) { + if (condition || regRequiresRex(RegisterID(r)) || + regRequiresRex(RegisterID(x)) || regRequiresRex(RegisterID(b))) { + emitRex(false, r, x, b); + } + } + + // Used for word sized operations, will plant a REX prefix if necessary + // (if any register is r8 or above). + void emitRexIfNeeded(int r, int x, int b) { emitRexIf(false, r, x, b); } +#else + // No REX prefix bytes on 32-bit x86. + void emitRexIf(bool condition, int, int, int) { + MOZ_ASSERT(!condition, "32-bit x86 should never use a REX prefix"); + } + void emitRexIfNeeded(int, int, int) {} +#endif + + void putModRm(ModRmMode mode, RegisterID rm, int reg) { + m_buffer.putByteUnchecked((mode << 6) | ((reg & 7) << 3) | (rm & 7)); + } + + void putModRmSib(ModRmMode mode, RegisterID base, RegisterID index, + int scale, int reg) { + MOZ_ASSERT(mode != ModRmRegister); + + putModRm(mode, hasSib, reg); + m_buffer.putByteUnchecked((scale << 6) | ((index & 7) << 3) | (base & 7)); + } + + void registerModRM(RegisterID rm, int reg) { + putModRm(ModRmRegister, rm, reg); + } + + void memoryModRM(int32_t offset, RegisterID base, int reg) { + // A base of esp or r12 would be interpreted as a sib, so force a + // sib with no index & put the base in there. +#ifdef JS_CODEGEN_X64 + if ((base == hasSib) || (base == hasSib2)) { +#else + if (base == hasSib) { +#endif + if (!offset) { // No need to check if the base is noBase, since we know + // it is hasSib! + putModRmSib(ModRmMemoryNoDisp, base, noIndex, 0, reg); + } else if (CAN_SIGN_EXTEND_8_32(offset)) { + putModRmSib(ModRmMemoryDisp8, base, noIndex, 0, reg); + m_buffer.putByteUnchecked(offset); + } else { + putModRmSib(ModRmMemoryDisp32, base, noIndex, 0, reg); + m_buffer.putIntUnchecked(offset); + } + } else { +#ifdef JS_CODEGEN_X64 + if (!offset && (base != noBase) && (base != noBase2)) { +#else + if (!offset && (base != noBase)) { +#endif + putModRm(ModRmMemoryNoDisp, base, reg); + } else if (CAN_SIGN_EXTEND_8_32(offset)) { + putModRm(ModRmMemoryDisp8, base, reg); + m_buffer.putByteUnchecked(offset); + } else { + putModRm(ModRmMemoryDisp32, base, reg); + m_buffer.putIntUnchecked(offset); + } + } + } + + void memoryModRM_disp32(int32_t offset, RegisterID base, int reg) { + // A base of esp or r12 would be interpreted as a sib, so force a + // sib with no index & put the base in there. +#ifdef JS_CODEGEN_X64 + if ((base == hasSib) || (base == hasSib2)) { +#else + if (base == hasSib) { +#endif + putModRmSib(ModRmMemoryDisp32, base, noIndex, 0, reg); + m_buffer.putIntUnchecked(offset); + } else { + putModRm(ModRmMemoryDisp32, base, reg); + m_buffer.putIntUnchecked(offset); + } + } + + void memoryModRM(int32_t offset, RegisterID base, RegisterID index, + int scale, int reg) { + MOZ_ASSERT(index != noIndex); + +#ifdef JS_CODEGEN_X64 + if (!offset && (base != noBase) && (base != noBase2)) { +#else + if (!offset && (base != noBase)) { +#endif + putModRmSib(ModRmMemoryNoDisp, base, index, scale, reg); + } else if (CAN_SIGN_EXTEND_8_32(offset)) { + putModRmSib(ModRmMemoryDisp8, base, index, scale, reg); + m_buffer.putByteUnchecked(offset); + } else { + putModRmSib(ModRmMemoryDisp32, base, index, scale, reg); + m_buffer.putIntUnchecked(offset); + } + } + + void memoryModRM_disp32(int32_t offset, RegisterID index, int scale, + int reg) { + MOZ_ASSERT(index != noIndex); + + // NB: the base-less memoryModRM overloads generate different code + // then the base-full memoryModRM overloads in the base == noBase + // case. The base-less overloads assume that the desired effective + // address is: + // + // reg := [scaled index] + disp32 + // + // which means the mod needs to be ModRmMemoryNoDisp. The base-full + // overloads pass ModRmMemoryDisp32 in all cases and thus, when + // base == noBase (== ebp), the effective address is: + // + // reg := [scaled index] + disp32 + [ebp] + // + // See Intel developer manual, Vol 2, 2.1.5, Table 2-3. + putModRmSib(ModRmMemoryNoDisp, noBase, index, scale, reg); + m_buffer.putIntUnchecked(offset); + } + + void memoryModRM_disp32(const void* address, int reg) { + int32_t disp = AddressImmediate(address); + +#ifdef JS_CODEGEN_X64 + // On x64-64, non-RIP-relative absolute mode requires a SIB. + putModRmSib(ModRmMemoryNoDisp, noBase, noIndex, 0, reg); +#else + // noBase + ModRmMemoryNoDisp means noBase + ModRmMemoryDisp32! + putModRm(ModRmMemoryNoDisp, noBase, reg); +#endif + m_buffer.putIntUnchecked(disp); + } + + void memoryModRM(const void* address, int reg) { + memoryModRM_disp32(address, reg); + } + + void threeOpVex(VexOperandType p, int r, int x, int b, int m, int w, int v, + int l, int opcode) { + m_buffer.ensureSpace(MaxInstructionSize); + + if (v == invalid_xmm) { + v = XMMRegisterID(0); + } + + if (x == 0 && b == 0 && m == 1 && w == 0) { + // Two byte VEX. + m_buffer.putByteUnchecked(PRE_VEX_C5); + m_buffer.putByteUnchecked(((r << 7) | (v << 3) | (l << 2) | p) ^ 0xf8); + } else { + // Three byte VEX. + m_buffer.putByteUnchecked(PRE_VEX_C4); + m_buffer.putByteUnchecked(((r << 7) | (x << 6) | (b << 5) | m) ^ 0xe0); + m_buffer.putByteUnchecked(((w << 7) | (v << 3) | (l << 2) | p) ^ 0x78); + } + + m_buffer.putByteUnchecked(opcode); + } + + AssemblerBuffer m_buffer; + } m_formatter; + + bool useVEX_; +}; + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_BaseAssembler_x86_shared_h */ diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp new file mode 100644 index 0000000000..3b1730599f --- /dev/null +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -0,0 +1,3883 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/CodeGenerator-x86-shared.h" + +#include "mozilla/DebugOnly.h" +#include "mozilla/MathAlgorithms.h" + +#include "jit/CodeGenerator.h" +#include "jit/InlineScriptTree.h" +#include "jit/JitRuntime.h" +#include "jit/RangeAnalysis.h" +#include "jit/ReciprocalMulConstants.h" +#include "js/ScalarType.h" // js::Scalar::Type +#include "util/DifferentialTesting.h" + +#include "jit/MacroAssembler-inl.h" +#include "jit/shared/CodeGenerator-shared-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::Abs; +using mozilla::DebugOnly; +using mozilla::FloorLog2; +using mozilla::NegativeInfinity; + +using JS::GenericNaN; + +namespace js { +namespace jit { + +CodeGeneratorX86Shared::CodeGeneratorX86Shared(MIRGenerator* gen, + LIRGraph* graph, + MacroAssembler* masm) + : CodeGeneratorShared(gen, graph, masm) {} + +#ifdef JS_PUNBOX64 +Operand CodeGeneratorX86Shared::ToOperandOrRegister64( + const LInt64Allocation input) { + return ToOperand(input.value()); +} +#else +Register64 CodeGeneratorX86Shared::ToOperandOrRegister64( + const LInt64Allocation input) { + return ToRegister64(input); +} +#endif + +void OutOfLineBailout::accept(CodeGeneratorX86Shared* codegen) { + codegen->visitOutOfLineBailout(this); +} + +void CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond, + MBasicBlock* mirTrue, + MBasicBlock* mirFalse, + Assembler::NaNCond ifNaN) { + if (ifNaN == Assembler::NaN_IsFalse) { + jumpToBlock(mirFalse, Assembler::Parity); + } else if (ifNaN == Assembler::NaN_IsTrue) { + jumpToBlock(mirTrue, Assembler::Parity); + } + + if (isNextBlock(mirFalse->lir())) { + jumpToBlock(mirTrue, cond); + } else { + jumpToBlock(mirFalse, Assembler::InvertCondition(cond)); + jumpToBlock(mirTrue); + } +} + +void CodeGenerator::visitDouble(LDouble* ins) { + const LDefinition* out = ins->getDef(0); + masm.loadConstantDouble(ins->value(), ToFloatRegister(out)); +} + +void CodeGenerator::visitFloat32(LFloat32* ins) { + const LDefinition* out = ins->getDef(0); + masm.loadConstantFloat32(ins->value(), ToFloatRegister(out)); +} + +void CodeGenerator::visitTestIAndBranch(LTestIAndBranch* test) { + Register input = ToRegister(test->input()); + masm.test32(input, input); + emitBranch(Assembler::NonZero, test->ifTrue(), test->ifFalse()); +} + +void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) { + const LAllocation* opd = test->input(); + + // vucomisd flags: + // Z P C + // --------- + // NaN 1 1 1 + // > 0 0 0 + // < 0 0 1 + // = 1 0 0 + // + // NaN is falsey, so comparing against 0 and then using the Z flag is + // enough to determine which branch to take. + ScratchDoubleScope scratch(masm); + masm.zeroDouble(scratch); + masm.vucomisd(scratch, ToFloatRegister(opd)); + emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); +} + +void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) { + const LAllocation* opd = test->input(); + // vucomiss flags are the same as doubles; see comment above + { + ScratchFloat32Scope scratch(masm); + masm.zeroFloat32(scratch); + masm.vucomiss(scratch, ToFloatRegister(opd)); + } + emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse()); +} + +void CodeGeneratorX86Shared::emitCompare(MCompare::CompareType type, + const LAllocation* left, + const LAllocation* right) { +#ifdef JS_CODEGEN_X64 + if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol || + type == MCompare::Compare_UIntPtr || + type == MCompare::Compare_RefOrNull) { + if (right->isConstant()) { + MOZ_ASSERT(type == MCompare::Compare_UIntPtr); + masm.cmpPtr(ToRegister(left), Imm32(ToInt32(right))); + } else { + masm.cmpPtr(ToRegister(left), ToOperand(right)); + } + return; + } +#endif + + if (right->isConstant()) { + masm.cmp32(ToRegister(left), Imm32(ToInt32(right))); + } else { + masm.cmp32(ToRegister(left), ToOperand(right)); + } +} + +void CodeGenerator::visitCompare(LCompare* comp) { + MCompare* mir = comp->mir(); + emitCompare(mir->compareType(), comp->left(), comp->right()); + masm.emitSet(JSOpToCondition(mir->compareType(), comp->jsop()), + ToRegister(comp->output())); +} + +void CodeGenerator::visitCompareAndBranch(LCompareAndBranch* comp) { + MCompare* mir = comp->cmpMir(); + emitCompare(mir->compareType(), comp->left(), comp->right()); + Assembler::Condition cond = JSOpToCondition(mir->compareType(), comp->jsop()); + emitBranch(cond, comp->ifTrue(), comp->ifFalse()); +} + +void CodeGenerator::visitCompareD(LCompareD* comp) { + FloatRegister lhs = ToFloatRegister(comp->left()); + FloatRegister rhs = ToFloatRegister(comp->right()); + + Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); + + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (comp->mir()->operandsAreNeverNaN()) { + nanCond = Assembler::NaN_HandledByCond; + } + + masm.compareDouble(cond, lhs, rhs); + masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), + ToRegister(comp->output()), nanCond); +} + +void CodeGenerator::visitCompareF(LCompareF* comp) { + FloatRegister lhs = ToFloatRegister(comp->left()); + FloatRegister rhs = ToFloatRegister(comp->right()); + + Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop()); + + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (comp->mir()->operandsAreNeverNaN()) { + nanCond = Assembler::NaN_HandledByCond; + } + + masm.compareFloat(cond, lhs, rhs); + masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), + ToRegister(comp->output()), nanCond); +} + +void CodeGenerator::visitNotI(LNotI* ins) { + masm.cmp32(ToRegister(ins->input()), Imm32(0)); + masm.emitSet(Assembler::Equal, ToRegister(ins->output())); +} + +void CodeGenerator::visitNotD(LNotD* ins) { + FloatRegister opd = ToFloatRegister(ins->input()); + + // Not returns true if the input is a NaN. We don't have to worry about + // it if we know the input is never NaN though. + Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; + if (ins->mir()->operandIsNeverNaN()) { + nanCond = Assembler::NaN_HandledByCond; + } + + ScratchDoubleScope scratch(masm); + masm.zeroDouble(scratch); + masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch); + masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond); +} + +void CodeGenerator::visitNotF(LNotF* ins) { + FloatRegister opd = ToFloatRegister(ins->input()); + + // Not returns true if the input is a NaN. We don't have to worry about + // it if we know the input is never NaN though. + Assembler::NaNCond nanCond = Assembler::NaN_IsTrue; + if (ins->mir()->operandIsNeverNaN()) { + nanCond = Assembler::NaN_HandledByCond; + } + + ScratchFloat32Scope scratch(masm); + masm.zeroFloat32(scratch); + masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch); + masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond); +} + +void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) { + FloatRegister lhs = ToFloatRegister(comp->left()); + FloatRegister rhs = ToFloatRegister(comp->right()); + + Assembler::DoubleCondition cond = + JSOpToDoubleCondition(comp->cmpMir()->jsop()); + + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (comp->cmpMir()->operandsAreNeverNaN()) { + nanCond = Assembler::NaN_HandledByCond; + } + + masm.compareDouble(cond, lhs, rhs); + emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), + comp->ifFalse(), nanCond); +} + +void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) { + FloatRegister lhs = ToFloatRegister(comp->left()); + FloatRegister rhs = ToFloatRegister(comp->right()); + + Assembler::DoubleCondition cond = + JSOpToDoubleCondition(comp->cmpMir()->jsop()); + + Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond); + if (comp->cmpMir()->operandsAreNeverNaN()) { + nanCond = Assembler::NaN_HandledByCond; + } + + masm.compareFloat(cond, lhs, rhs); + emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), + comp->ifFalse(), nanCond); +} + +void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) { + const MWasmStackArg* mir = ins->mir(); + Address dst(StackPointer, mir->spOffset()); + if (ins->arg()->isConstant()) { + masm.storePtr(ImmWord(ToInt32(ins->arg())), dst); + } else if (ins->arg()->isGeneralReg()) { + masm.storePtr(ToRegister(ins->arg()), dst); + } else { + switch (mir->input()->type()) { + case MIRType::Double: + masm.storeDouble(ToFloatRegister(ins->arg()), dst); + return; + case MIRType::Float32: + masm.storeFloat32(ToFloatRegister(ins->arg()), dst); + return; +#ifdef ENABLE_WASM_SIMD + case MIRType::Simd128: + masm.storeUnalignedSimd128(ToFloatRegister(ins->arg()), dst); + return; +#endif + default: + break; + } + MOZ_CRASH("unexpected mir type in WasmStackArg"); + } +} + +void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) { + const MWasmStackArg* mir = ins->mir(); + Address dst(StackPointer, mir->spOffset()); + if (IsConstant(ins->arg())) { + masm.store64(Imm64(ToInt64(ins->arg())), dst); + } else { + masm.store64(ToRegister64(ins->arg()), dst); + } +} + +void CodeGenerator::visitWasmSelect(LWasmSelect* ins) { + MIRType mirType = ins->mir()->type(); + + Register cond = ToRegister(ins->condExpr()); + Operand falseExpr = ToOperand(ins->falseExpr()); + + masm.test32(cond, cond); + + if (mirType == MIRType::Int32 || mirType == MIRType::RefOrNull) { + Register out = ToRegister(ins->output()); + MOZ_ASSERT(ToRegister(ins->trueExpr()) == out, + "true expr input is reused for output"); + if (mirType == MIRType::Int32) { + masm.cmovz32(falseExpr, out); + } else { + masm.cmovzPtr(falseExpr, out); + } + return; + } + + FloatRegister out = ToFloatRegister(ins->output()); + MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out, + "true expr input is reused for output"); + + Label done; + masm.j(Assembler::NonZero, &done); + + if (mirType == MIRType::Float32) { + if (falseExpr.kind() == Operand::FPREG) { + masm.moveFloat32(ToFloatRegister(ins->falseExpr()), out); + } else { + masm.loadFloat32(falseExpr, out); + } + } else if (mirType == MIRType::Double) { + if (falseExpr.kind() == Operand::FPREG) { + masm.moveDouble(ToFloatRegister(ins->falseExpr()), out); + } else { + masm.loadDouble(falseExpr, out); + } + } else if (mirType == MIRType::Simd128) { + if (falseExpr.kind() == Operand::FPREG) { + masm.moveSimd128(ToFloatRegister(ins->falseExpr()), out); + } else { + masm.loadUnalignedSimd128(falseExpr, out); + } + } else { + MOZ_CRASH("unhandled type in visitWasmSelect!"); + } + + masm.bind(&done); +} + +void CodeGenerator::visitWasmReinterpret(LWasmReinterpret* lir) { + MOZ_ASSERT(gen->compilingWasm()); + MWasmReinterpret* ins = lir->mir(); + + MIRType to = ins->type(); +#ifdef DEBUG + MIRType from = ins->input()->type(); +#endif + + switch (to) { + case MIRType::Int32: + MOZ_ASSERT(from == MIRType::Float32); + masm.vmovd(ToFloatRegister(lir->input()), ToRegister(lir->output())); + break; + case MIRType::Float32: + MOZ_ASSERT(from == MIRType::Int32); + masm.vmovd(ToRegister(lir->input()), ToFloatRegister(lir->output())); + break; + case MIRType::Double: + case MIRType::Int64: + MOZ_CRASH("not handled by this LIR opcode"); + default: + MOZ_CRASH("unexpected WasmReinterpret"); + } +} + +void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) { + const MAsmJSLoadHeap* mir = ins->mir(); + MOZ_ASSERT(mir->access().offset() == 0); + + const LAllocation* ptr = ins->ptr(); + const LAllocation* boundsCheckLimit = ins->boundsCheckLimit(); + AnyRegister out = ToAnyRegister(ins->output()); + + Scalar::Type accessType = mir->accessType(); + + OutOfLineLoadTypedArrayOutOfBounds* ool = nullptr; + if (mir->needsBoundsCheck()) { + ool = new (alloc()) OutOfLineLoadTypedArrayOutOfBounds(out, accessType); + addOutOfLineCode(ool, mir); + + masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ToRegister(ptr), + ToRegister(boundsCheckLimit), ool->entry()); + } + + Operand srcAddr = toMemoryAccessOperand(ins, 0); + masm.wasmLoad(mir->access(), srcAddr, out); + + if (ool) { + masm.bind(ool->rejoin()); + } +} + +void CodeGeneratorX86Shared::visitOutOfLineLoadTypedArrayOutOfBounds( + OutOfLineLoadTypedArrayOutOfBounds* ool) { + switch (ool->viewType()) { + case Scalar::Int64: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::Simd128: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + case Scalar::Float32: + masm.loadConstantFloat32(float(GenericNaN()), ool->dest().fpu()); + break; + case Scalar::Float64: + masm.loadConstantDouble(GenericNaN(), ool->dest().fpu()); + break; + case Scalar::Int8: + case Scalar::Uint8: + case Scalar::Int16: + case Scalar::Uint16: + case Scalar::Int32: + case Scalar::Uint32: + case Scalar::Uint8Clamped: + Register destReg = ool->dest().gpr(); + masm.mov(ImmWord(0), destReg); + break; + } + masm.jmp(ool->rejoin()); +} + +void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) { + const MAsmJSStoreHeap* mir = ins->mir(); + + const LAllocation* ptr = ins->ptr(); + const LAllocation* value = ins->value(); + const LAllocation* boundsCheckLimit = ins->boundsCheckLimit(); + + Scalar::Type accessType = mir->accessType(); + canonicalizeIfDeterministic(accessType, value); + + Label rejoin; + if (mir->needsBoundsCheck()) { + masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ToRegister(ptr), + ToRegister(boundsCheckLimit), &rejoin); + } + + Operand dstAddr = toMemoryAccessOperand(ins, 0); + masm.wasmStore(mir->access(), ToAnyRegister(value), dstAddr); + + if (rejoin.used()) { + masm.bind(&rejoin); + } +} + +void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) { + MWasmAddOffset* mir = lir->mir(); + Register base = ToRegister(lir->base()); + Register out = ToRegister(lir->output()); + + if (base != out) { + masm.move32(base, out); + } + masm.add32(Imm32(mir->offset()), out); + OutOfLineAbortingWasmTrap* ool = new (alloc()) + OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds); + addOutOfLineCode(ool, mir); + masm.j(Assembler::CarrySet, ool->entry()); +} + +void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) { + MWasmAddOffset* mir = lir->mir(); + Register64 base = ToRegister64(lir->base()); + Register64 out = ToOutRegister64(lir); + + if (base != out) { + masm.move64(base, out); + } + masm.add64(Imm64(mir->offset()), out); + OutOfLineAbortingWasmTrap* ool = new (alloc()) + OutOfLineAbortingWasmTrap(mir->bytecodeOffset(), wasm::Trap::OutOfBounds); + addOutOfLineCode(ool, mir); + masm.j(Assembler::CarrySet, ool->entry()); +} + +void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) { + FloatRegister input = ToFloatRegister(lir->input()); + Register output = ToRegister(lir->output()); + + MWasmTruncateToInt32* mir = lir->mir(); + MIRType inputType = mir->input()->type(); + + MOZ_ASSERT(inputType == MIRType::Double || inputType == MIRType::Float32); + + auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output); + addOutOfLineCode(ool, mir); + + Label* oolEntry = ool->entry(); + if (mir->isUnsigned()) { + if (inputType == MIRType::Double) { + masm.wasmTruncateDoubleToUInt32(input, output, mir->isSaturating(), + oolEntry); + } else if (inputType == MIRType::Float32) { + masm.wasmTruncateFloat32ToUInt32(input, output, mir->isSaturating(), + oolEntry); + } else { + MOZ_CRASH("unexpected type"); + } + if (mir->isSaturating()) { + masm.bind(ool->rejoin()); + } + return; + } + + if (inputType == MIRType::Double) { + masm.wasmTruncateDoubleToInt32(input, output, mir->isSaturating(), + oolEntry); + } else if (inputType == MIRType::Float32) { + masm.wasmTruncateFloat32ToInt32(input, output, mir->isSaturating(), + oolEntry); + } else { + MOZ_CRASH("unexpected type"); + } + + masm.bind(ool->rejoin()); +} + +bool CodeGeneratorX86Shared::generateOutOfLineCode() { + if (!CodeGeneratorShared::generateOutOfLineCode()) { + return false; + } + + if (deoptLabel_.used()) { + // All non-table-based bailouts will go here. + masm.bind(&deoptLabel_); + + // Push the frame size, so the handler can recover the IonScript. + masm.push(Imm32(frameSize())); + + TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler(); + masm.jump(handler); + } + + return !masm.oom(); +} + +class BailoutJump { + Assembler::Condition cond_; + + public: + explicit BailoutJump(Assembler::Condition cond) : cond_(cond) {} +#ifdef JS_CODEGEN_X86 + void operator()(MacroAssembler& masm, uint8_t* code) const { + masm.j(cond_, ImmPtr(code), RelocationKind::HARDCODED); + } +#endif + void operator()(MacroAssembler& masm, Label* label) const { + masm.j(cond_, label); + } +}; + +class BailoutLabel { + Label* label_; + + public: + explicit BailoutLabel(Label* label) : label_(label) {} +#ifdef JS_CODEGEN_X86 + void operator()(MacroAssembler& masm, uint8_t* code) const { + masm.retarget(label_, ImmPtr(code), RelocationKind::HARDCODED); + } +#endif + void operator()(MacroAssembler& masm, Label* label) const { + masm.retarget(label_, label); + } +}; + +template <typename T> +void CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot) { + encode(snapshot); + + // All bailout code is associated with the bytecodeSite of the block we are + // bailing out from. + InlineScriptTree* tree = snapshot->mir()->block()->trackedTree(); + OutOfLineBailout* ool = new (alloc()) OutOfLineBailout(snapshot); + addOutOfLineCode(ool, + new (alloc()) BytecodeSite(tree, tree->script()->code())); + + binder(masm, ool->entry()); +} + +void CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition, + LSnapshot* snapshot) { + bailout(BailoutJump(condition), snapshot); +} + +void CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition, + LSnapshot* snapshot) { + MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) == + Assembler::NaN_HandledByCond); + bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot); +} + +void CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot) { + MOZ_ASSERT_IF(!masm.oom(), label->used() && !label->bound()); + bailout(BailoutLabel(label), snapshot); +} + +void CodeGeneratorX86Shared::bailout(LSnapshot* snapshot) { + Label label; + masm.jump(&label); + bailoutFrom(&label, snapshot); +} + +void CodeGeneratorX86Shared::visitOutOfLineBailout(OutOfLineBailout* ool) { + masm.push(Imm32(ool->snapshot()->snapshotOffset())); + masm.jmp(&deoptLabel_); +} + +void CodeGenerator::visitMinMaxD(LMinMaxD* ins) { + FloatRegister first = ToFloatRegister(ins->first()); + FloatRegister second = ToFloatRegister(ins->second()); +#ifdef DEBUG + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT(first == output); +#endif + + bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN(); + + if (ins->mir()->isMax()) { + masm.maxDouble(second, first, handleNaN); + } else { + masm.minDouble(second, first, handleNaN); + } +} + +void CodeGenerator::visitMinMaxF(LMinMaxF* ins) { + FloatRegister first = ToFloatRegister(ins->first()); + FloatRegister second = ToFloatRegister(ins->second()); +#ifdef DEBUG + FloatRegister output = ToFloatRegister(ins->output()); + MOZ_ASSERT(first == output); +#endif + + bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN(); + + if (ins->mir()->isMax()) { + masm.maxFloat32(second, first, handleNaN); + } else { + masm.minFloat32(second, first, handleNaN); + } +} + +void CodeGenerator::visitClzI(LClzI* ins) { + Register input = ToRegister(ins->input()); + Register output = ToRegister(ins->output()); + bool knownNotZero = ins->mir()->operandIsNeverZero(); + + masm.clz32(input, output, knownNotZero); +} + +void CodeGenerator::visitCtzI(LCtzI* ins) { + Register input = ToRegister(ins->input()); + Register output = ToRegister(ins->output()); + bool knownNotZero = ins->mir()->operandIsNeverZero(); + + masm.ctz32(input, output, knownNotZero); +} + +void CodeGenerator::visitPopcntI(LPopcntI* ins) { + Register input = ToRegister(ins->input()); + Register output = ToRegister(ins->output()); + Register temp = + ins->temp0()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp0()); + + masm.popcnt32(input, output, temp); +} + +void CodeGenerator::visitPowHalfD(LPowHalfD* ins) { + FloatRegister input = ToFloatRegister(ins->input()); + FloatRegister output = ToFloatRegister(ins->output()); + + ScratchDoubleScope scratch(masm); + + Label done, sqrt; + + if (!ins->mir()->operandIsNeverNegativeInfinity()) { + // Branch if not -Infinity. + masm.loadConstantDouble(NegativeInfinity<double>(), scratch); + + Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered; + if (ins->mir()->operandIsNeverNaN()) { + cond = Assembler::DoubleNotEqual; + } + masm.branchDouble(cond, input, scratch, &sqrt); + + // Math.pow(-Infinity, 0.5) == Infinity. + masm.zeroDouble(output); + masm.subDouble(scratch, output); + masm.jump(&done); + + masm.bind(&sqrt); + } + + if (!ins->mir()->operandIsNeverNegativeZero()) { + // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). + // Adding 0 converts any -0 to 0. + masm.zeroDouble(scratch); + masm.addDouble(input, scratch); + masm.vsqrtsd(scratch, output, output); + } else { + masm.vsqrtsd(input, output, output); + } + + masm.bind(&done); +} + +class OutOfLineUndoALUOperation + : public OutOfLineCodeBase<CodeGeneratorX86Shared> { + LInstruction* ins_; + + public: + explicit OutOfLineUndoALUOperation(LInstruction* ins) : ins_(ins) {} + + virtual void accept(CodeGeneratorX86Shared* codegen) override { + codegen->visitOutOfLineUndoALUOperation(this); + } + LInstruction* ins() const { return ins_; } +}; + +void CodeGenerator::visitAddI(LAddI* ins) { + if (ins->rhs()->isConstant()) { + masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); + } else { + masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); + } + + if (ins->snapshot()) { + if (ins->recoversInput()) { + OutOfLineUndoALUOperation* ool = + new (alloc()) OutOfLineUndoALUOperation(ins); + addOutOfLineCode(ool, ins->mir()); + masm.j(Assembler::Overflow, ool->entry()); + } else { + bailoutIf(Assembler::Overflow, ins->snapshot()); + } + } +} + +void CodeGenerator::visitAddI64(LAddI64* lir) { + const LInt64Allocation lhs = lir->getInt64Operand(LAddI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LAddI64::Rhs); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + if (IsConstant(rhs)) { + masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + return; + } + + masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); +} + +void CodeGenerator::visitSubI(LSubI* ins) { + if (ins->rhs()->isConstant()) { + masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs())); + } else { + masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs())); + } + + if (ins->snapshot()) { + if (ins->recoversInput()) { + OutOfLineUndoALUOperation* ool = + new (alloc()) OutOfLineUndoALUOperation(ins); + addOutOfLineCode(ool, ins->mir()); + masm.j(Assembler::Overflow, ool->entry()); + } else { + bailoutIf(Assembler::Overflow, ins->snapshot()); + } + } +} + +void CodeGenerator::visitSubI64(LSubI64* lir) { + const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + if (IsConstant(rhs)) { + masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + return; + } + + masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); +} + +void CodeGeneratorX86Shared::visitOutOfLineUndoALUOperation( + OutOfLineUndoALUOperation* ool) { + LInstruction* ins = ool->ins(); + Register reg = ToRegister(ins->getDef(0)); + + DebugOnly<LAllocation*> lhs = ins->getOperand(0); + LAllocation* rhs = ins->getOperand(1); + + MOZ_ASSERT(reg == ToRegister(lhs)); + MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs)); + + // Undo the effect of the ALU operation, which was performed on the output + // register and overflowed. Writing to the output register clobbered an + // input reg, and the original value of the input needs to be recovered + // to satisfy the constraint imposed by any RECOVERED_INPUT operands to + // the bailout snapshot. + + if (rhs->isConstant()) { + Imm32 constant(ToInt32(rhs)); + if (ins->isAddI()) { + masm.subl(constant, reg); + } else { + masm.addl(constant, reg); + } + } else { + if (ins->isAddI()) { + masm.subl(ToOperand(rhs), reg); + } else { + masm.addl(ToOperand(rhs), reg); + } + } + + bailout(ool->ins()->snapshot()); +} + +class MulNegativeZeroCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared> { + LMulI* ins_; + + public: + explicit MulNegativeZeroCheck(LMulI* ins) : ins_(ins) {} + + virtual void accept(CodeGeneratorX86Shared* codegen) override { + codegen->visitMulNegativeZeroCheck(this); + } + LMulI* ins() const { return ins_; } +}; + +void CodeGenerator::visitMulI(LMulI* ins) { + const LAllocation* lhs = ins->lhs(); + const LAllocation* rhs = ins->rhs(); + MMul* mul = ins->mir(); + MOZ_ASSERT_IF(mul->mode() == MMul::Integer, + !mul->canBeNegativeZero() && !mul->canOverflow()); + + if (rhs->isConstant()) { + // Bailout on -0.0 + int32_t constant = ToInt32(rhs); + if (mul->canBeNegativeZero() && constant <= 0) { + Assembler::Condition bailoutCond = + (constant == 0) ? Assembler::Signed : Assembler::Equal; + masm.test32(ToRegister(lhs), ToRegister(lhs)); + bailoutIf(bailoutCond, ins->snapshot()); + } + + switch (constant) { + case -1: + masm.negl(ToOperand(lhs)); + break; + case 0: + masm.xorl(ToOperand(lhs), ToRegister(lhs)); + return; // escape overflow check; + case 1: + // nop + return; // escape overflow check; + case 2: + masm.addl(ToOperand(lhs), ToRegister(lhs)); + break; + default: + if (!mul->canOverflow() && constant > 0) { + // Use shift if cannot overflow and constant is power of 2 + int32_t shift = FloorLog2(constant); + if ((1 << shift) == constant) { + masm.shll(Imm32(shift), ToRegister(lhs)); + return; + } + } + masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs)); + } + + // Bailout on overflow + if (mul->canOverflow()) { + bailoutIf(Assembler::Overflow, ins->snapshot()); + } + } else { + masm.imull(ToOperand(rhs), ToRegister(lhs)); + + // Bailout on overflow + if (mul->canOverflow()) { + bailoutIf(Assembler::Overflow, ins->snapshot()); + } + + if (mul->canBeNegativeZero()) { + // Jump to an OOL path if the result is 0. + MulNegativeZeroCheck* ool = new (alloc()) MulNegativeZeroCheck(ins); + addOutOfLineCode(ool, mul); + + masm.test32(ToRegister(lhs), ToRegister(lhs)); + masm.j(Assembler::Zero, ool->entry()); + masm.bind(ool->rejoin()); + } + } +} + +void CodeGenerator::visitMulI64(LMulI64* lir) { + const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs); + + MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir)); + + if (IsConstant(rhs)) { + int64_t constant = ToInt64(rhs); + switch (constant) { + case -1: + masm.neg64(ToRegister64(lhs)); + return; + case 0: + masm.xor64(ToRegister64(lhs), ToRegister64(lhs)); + return; + case 1: + // nop + return; + case 2: + masm.add64(ToRegister64(lhs), ToRegister64(lhs)); + return; + default: + if (constant > 0) { + // Use shift if constant is power of 2. + int32_t shift = mozilla::FloorLog2(constant); + if (int64_t(1) << shift == constant) { + masm.lshift64(Imm32(shift), ToRegister64(lhs)); + return; + } + } + Register temp = ToTempRegisterOrInvalid(lir->temp()); + masm.mul64(Imm64(constant), ToRegister64(lhs), temp); + } + } else { + Register temp = ToTempRegisterOrInvalid(lir->temp()); + masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp); + } +} + +class ReturnZero : public OutOfLineCodeBase<CodeGeneratorX86Shared> { + Register reg_; + + public: + explicit ReturnZero(Register reg) : reg_(reg) {} + + virtual void accept(CodeGeneratorX86Shared* codegen) override { + codegen->visitReturnZero(this); + } + Register reg() const { return reg_; } +}; + +void CodeGeneratorX86Shared::visitReturnZero(ReturnZero* ool) { + masm.mov(ImmWord(0), ool->reg()); + masm.jmp(ool->rejoin()); +} + +void CodeGenerator::visitUDivOrMod(LUDivOrMod* ins) { + Register lhs = ToRegister(ins->lhs()); + Register rhs = ToRegister(ins->rhs()); + Register output = ToRegister(ins->output()); + + MOZ_ASSERT_IF(lhs != rhs, rhs != eax); + MOZ_ASSERT(rhs != edx); + MOZ_ASSERT_IF(output == eax, ToRegister(ins->remainder()) == edx); + + ReturnZero* ool = nullptr; + + // Put the lhs in eax. + if (lhs != eax) { + masm.mov(lhs, eax); + } + + // Prevent divide by zero. + if (ins->canBeDivideByZero()) { + masm.test32(rhs, rhs); + if (ins->mir()->isTruncated()) { + if (ins->trapOnError()) { + Label nonZero; + masm.j(Assembler::NonZero, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->bytecodeOffset()); + masm.bind(&nonZero); + } else { + ool = new (alloc()) ReturnZero(output); + masm.j(Assembler::Zero, ool->entry()); + } + } else { + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit. + masm.mov(ImmWord(0), edx); + masm.udiv(rhs); + + // If the remainder is > 0, bailout since this must be a double. + if (ins->mir()->isDiv() && !ins->mir()->toDiv()->canTruncateRemainder()) { + Register remainder = ToRegister(ins->remainder()); + masm.test32(remainder, remainder); + bailoutIf(Assembler::NonZero, ins->snapshot()); + } + + // Unsigned div or mod can return a value that's not a signed int32. + // If our users aren't expecting that, bail. + if (!ins->mir()->isTruncated()) { + masm.test32(output, output); + bailoutIf(Assembler::Signed, ins->snapshot()); + } + + if (ool) { + addOutOfLineCode(ool, ins->mir()); + masm.bind(ool->rejoin()); + } +} + +void CodeGenerator::visitUDivOrModConstant(LUDivOrModConstant* ins) { + Register lhs = ToRegister(ins->numerator()); + Register output = ToRegister(ins->output()); + uint32_t d = ins->denominator(); + + // This emits the division answer into edx or the modulus answer into eax. + MOZ_ASSERT(output == eax || output == edx); + MOZ_ASSERT(lhs != eax && lhs != edx); + bool isDiv = (output == edx); + + if (d == 0) { + if (ins->mir()->isTruncated()) { + if (ins->trapOnError()) { + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->bytecodeOffset()); + } else { + masm.xorl(output, output); + } + } else { + bailout(ins->snapshot()); + } + return; + } + + // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI). + MOZ_ASSERT((d & (d - 1)) != 0); + + auto rmc = ReciprocalMulConstants::computeUnsignedDivisionConstants(d); + + // We first compute (M * n) >> 32, where M = rmc.multiplier. + masm.movl(Imm32(rmc.multiplier), eax); + masm.umull(lhs); + if (rmc.multiplier > UINT32_MAX) { + // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that + // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, + // contradicting the proof of correctness in computeDivisionConstants. + MOZ_ASSERT(rmc.shiftAmount > 0); + MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33)); + + // We actually computed edx = ((uint32_t(M) * n) >> 32) instead. Since + // (M * n) >> (32 + shift) is the same as (edx + n) >> shift, we can + // correct for the overflow. This case is a bit trickier than the signed + // case, though, as the (edx + n) addition itself can overflow; however, + // note that (edx + n) >> shift == (((n - edx) >> 1) + edx) >> (shift - 1), + // which is overflow-free. See Hacker's Delight, section 10-8 for details. + + // Compute (n - edx) >> 1 into eax. + masm.movl(lhs, eax); + masm.subl(edx, eax); + masm.shrl(Imm32(1), eax); + + // Finish the computation. + masm.addl(eax, edx); + masm.shrl(Imm32(rmc.shiftAmount - 1), edx); + } else { + masm.shrl(Imm32(rmc.shiftAmount), edx); + } + + // We now have the truncated division value in edx. If we're + // computing a modulus or checking whether the division resulted + // in an integer, we need to multiply the obtained value by d and + // finish the computation/check. + if (!isDiv) { + masm.imull(Imm32(d), edx, edx); + masm.movl(lhs, eax); + masm.subl(edx, eax); + + // The final result of the modulus op, just computed above by the + // sub instruction, can be a number in the range [2^31, 2^32). If + // this is the case and the modulus is not truncated, we must bail + // out. + if (!ins->mir()->isTruncated()) { + bailoutIf(Assembler::Signed, ins->snapshot()); + } + } else if (!ins->mir()->isTruncated()) { + masm.imull(Imm32(d), edx, eax); + masm.cmpl(lhs, eax); + bailoutIf(Assembler::NotEqual, ins->snapshot()); + } +} + +void CodeGeneratorX86Shared::visitMulNegativeZeroCheck( + MulNegativeZeroCheck* ool) { + LMulI* ins = ool->ins(); + Register result = ToRegister(ins->output()); + Operand lhsCopy = ToOperand(ins->lhsCopy()); + Operand rhs = ToOperand(ins->rhs()); + MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG, lhsCopy.reg() != result.code()); + + // Result is -0 if lhs or rhs is negative. + masm.movl(lhsCopy, result); + masm.orl(rhs, result); + bailoutIf(Assembler::Signed, ins->snapshot()); + + masm.mov(ImmWord(0), result); + masm.jmp(ool->rejoin()); +} + +void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) { + Register lhs = ToRegister(ins->numerator()); + DebugOnly<Register> output = ToRegister(ins->output()); + + int32_t shift = ins->shift(); + bool negativeDivisor = ins->negativeDivisor(); + MDiv* mir = ins->mir(); + + // We use defineReuseInput so these should always be the same, which is + // convenient since all of our instructions here are two-address. + MOZ_ASSERT(lhs == output); + + if (!mir->isTruncated() && negativeDivisor) { + // 0 divided by a negative number must return a double. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + + if (shift) { + if (!mir->isTruncated()) { + // If the remainder is != 0, bailout since this must be a double. + masm.test32(lhs, Imm32(UINT32_MAX >> (32 - shift))); + bailoutIf(Assembler::NonZero, ins->snapshot()); + } + + if (mir->isUnsigned()) { + masm.shrl(Imm32(shift), lhs); + } else { + // Adjust the value so that shifting produces a correctly + // rounded result when the numerator is negative. See 10-1 + // "Signed Division by a Known Power of 2" in Henry + // S. Warren, Jr.'s Hacker's Delight. + if (mir->canBeNegativeDividend() && mir->isTruncated()) { + // Note: There is no need to execute this code, which handles how to + // round the signed integer division towards 0, if we previously bailed + // due to a non-zero remainder. + Register lhsCopy = ToRegister(ins->numeratorCopy()); + MOZ_ASSERT(lhsCopy != lhs); + if (shift > 1) { + // Copy the sign bit of the numerator. (= (2^32 - 1) or 0) + masm.sarl(Imm32(31), lhs); + } + // Divide by 2^(32 - shift) + // i.e. (= (2^32 - 1) / 2^(32 - shift) or 0) + // i.e. (= (2^shift - 1) or 0) + masm.shrl(Imm32(32 - shift), lhs); + // If signed, make any 1 bit below the shifted bits to bubble up, such + // that once shifted the value would be rounded towards 0. + masm.addl(lhsCopy, lhs); + } + masm.sarl(Imm32(shift), lhs); + + if (negativeDivisor) { + masm.negl(lhs); + } + } + return; + } + + if (negativeDivisor) { + // INT32_MIN / -1 overflows. + masm.negl(lhs); + if (!mir->isTruncated()) { + bailoutIf(Assembler::Overflow, ins->snapshot()); + } else if (mir->trapOnError()) { + Label ok; + masm.j(Assembler::NoOverflow, &ok); + masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset()); + masm.bind(&ok); + } + } else if (mir->isUnsigned() && !mir->isTruncated()) { + // Unsigned division by 1 can overflow if output is not + // truncated. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Signed, ins->snapshot()); + } +} + +void CodeGenerator::visitDivOrModConstantI(LDivOrModConstantI* ins) { + Register lhs = ToRegister(ins->numerator()); + Register output = ToRegister(ins->output()); + int32_t d = ins->denominator(); + + // This emits the division answer into edx or the modulus answer into eax. + MOZ_ASSERT(output == eax || output == edx); + MOZ_ASSERT(lhs != eax && lhs != edx); + bool isDiv = (output == edx); + + // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI + // and LModPowTwoI). + MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0); + + // We will first divide by Abs(d), and negate the answer if d is negative. + // If desired, this can be avoided by generalizing computeDivisionConstants. + auto rmc = ReciprocalMulConstants::computeSignedDivisionConstants(Abs(d)); + + // We first compute (M * n) >> 32, where M = rmc.multiplier. + masm.movl(Imm32(rmc.multiplier), eax); + masm.imull(lhs); + if (rmc.multiplier > INT32_MAX) { + MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32)); + + // We actually computed edx = ((int32_t(M) * n) >> 32) instead. Since + // (M * n) >> 32 is the same as (edx + n), we can correct for the overflow. + // (edx + n) can't overflow, as n and edx have opposite signs because + // int32_t(M) is negative. + masm.addl(lhs, edx); + } + // (M * n) >> (32 + shift) is the truncated division answer if n is + // non-negative, as proved in the comments of computeDivisionConstants. We + // must add 1 later if n is negative to get the right answer in all cases. + masm.sarl(Imm32(rmc.shiftAmount), edx); + + // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be + // computed with just a sign-extending shift of 31 bits. + if (ins->canBeNegativeDividend()) { + masm.movl(lhs, eax); + masm.sarl(Imm32(31), eax); + masm.subl(eax, edx); + } + + // After this, edx contains the correct truncated division result. + if (d < 0) { + masm.negl(edx); + } + + if (!isDiv) { + masm.imull(Imm32(-d), edx, eax); + masm.addl(lhs, eax); + } + + if (!ins->mir()->isTruncated()) { + if (isDiv) { + // This is a division op. Multiply the obtained value by d to check if + // the correct answer is an integer. This cannot overflow, since |d| > 1. + masm.imull(Imm32(d), edx, eax); + masm.cmp32(lhs, eax); + bailoutIf(Assembler::NotEqual, ins->snapshot()); + + // If lhs is zero and the divisor is negative, the answer should have + // been -0. + if (d < 0) { + masm.test32(lhs, lhs); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } else if (ins->canBeNegativeDividend()) { + // This is a mod op. If the computed value is zero and lhs + // is negative, the answer should have been -0. + Label done; + + masm.cmp32(lhs, Imm32(0)); + masm.j(Assembler::GreaterThanOrEqual, &done); + + masm.test32(eax, eax); + bailoutIf(Assembler::Zero, ins->snapshot()); + + masm.bind(&done); + } + } +} + +void CodeGenerator::visitDivI(LDivI* ins) { + Register remainder = ToRegister(ins->remainder()); + Register lhs = ToRegister(ins->lhs()); + Register rhs = ToRegister(ins->rhs()); + Register output = ToRegister(ins->output()); + + MDiv* mir = ins->mir(); + + MOZ_ASSERT_IF(lhs != rhs, rhs != eax); + MOZ_ASSERT(rhs != edx); + MOZ_ASSERT(remainder == edx); + MOZ_ASSERT(output == eax); + + Label done; + ReturnZero* ool = nullptr; + + // Put the lhs in eax, for either the negative overflow case or the regular + // divide case. + if (lhs != eax) { + masm.mov(lhs, eax); + } + + // Handle divide by zero. + if (mir->canBeDivideByZero()) { + masm.test32(rhs, rhs); + if (mir->trapOnError()) { + Label nonZero; + masm.j(Assembler::NonZero, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset()); + masm.bind(&nonZero); + } else if (mir->canTruncateInfinities()) { + // Truncated division by zero is zero (Infinity|0 == 0) + if (!ool) { + ool = new (alloc()) ReturnZero(output); + } + masm.j(Assembler::Zero, ool->entry()); + } else { + MOZ_ASSERT(mir->fallible()); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + // Handle an integer overflow exception from -2147483648 / -1. + if (mir->canBeNegativeOverflow()) { + Label notOverflow; + masm.cmp32(lhs, Imm32(INT32_MIN)); + masm.j(Assembler::NotEqual, ¬Overflow); + masm.cmp32(rhs, Imm32(-1)); + if (mir->trapOnError()) { + masm.j(Assembler::NotEqual, ¬Overflow); + masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset()); + } else if (mir->canTruncateOverflow()) { + // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the + // output register (lhs == eax). + masm.j(Assembler::Equal, &done); + } else { + MOZ_ASSERT(mir->fallible()); + bailoutIf(Assembler::Equal, ins->snapshot()); + } + masm.bind(¬Overflow); + } + + // Handle negative 0. + if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) { + Label nonzero; + masm.test32(lhs, lhs); + masm.j(Assembler::NonZero, &nonzero); + masm.cmp32(rhs, Imm32(0)); + bailoutIf(Assembler::LessThan, ins->snapshot()); + masm.bind(&nonzero); + } + + // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit. + if (lhs != eax) { + masm.mov(lhs, eax); + } + masm.cdq(); + masm.idiv(rhs); + + if (!mir->canTruncateRemainder()) { + // If the remainder is > 0, bailout since this must be a double. + masm.test32(remainder, remainder); + bailoutIf(Assembler::NonZero, ins->snapshot()); + } + + masm.bind(&done); + + if (ool) { + addOutOfLineCode(ool, mir); + masm.bind(ool->rejoin()); + } +} + +void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) { + Register lhs = ToRegister(ins->getOperand(0)); + int32_t shift = ins->shift(); + + Label negative; + + if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) { + // Switch based on sign of the lhs. + // Positive numbers are just a bitmask + masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); + } + + masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); + + if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) { + Label done; + masm.jump(&done); + + // Negative numbers need a negate, bitmask, negate + masm.bind(&negative); + + // Unlike in the visitModI case, we are not computing the mod by means of a + // division. Therefore, the divisor = -1 case isn't problematic (the andl + // always returns 0, which is what we expect). + // + // The negl instruction overflows if lhs == INT32_MIN, but this is also not + // a problem: shift is at most 31, and so the andl also always returns 0. + masm.negl(lhs); + masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs); + masm.negl(lhs); + + // Since a%b has the same sign as b, and a is negative in this branch, + // an answer of 0 means the correct result is actually -0. Bail out. + if (!ins->mir()->isTruncated()) { + bailoutIf(Assembler::Zero, ins->snapshot()); + } + masm.bind(&done); + } +} + +class ModOverflowCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared> { + Label done_; + LModI* ins_; + Register rhs_; + + public: + explicit ModOverflowCheck(LModI* ins, Register rhs) : ins_(ins), rhs_(rhs) {} + + virtual void accept(CodeGeneratorX86Shared* codegen) override { + codegen->visitModOverflowCheck(this); + } + Label* done() { return &done_; } + LModI* ins() const { return ins_; } + Register rhs() const { return rhs_; } +}; + +void CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool) { + masm.cmp32(ool->rhs(), Imm32(-1)); + if (ool->ins()->mir()->isTruncated()) { + masm.j(Assembler::NotEqual, ool->rejoin()); + masm.mov(ImmWord(0), edx); + masm.jmp(ool->done()); + } else { + bailoutIf(Assembler::Equal, ool->ins()->snapshot()); + masm.jmp(ool->rejoin()); + } +} + +void CodeGenerator::visitModI(LModI* ins) { + Register remainder = ToRegister(ins->remainder()); + Register lhs = ToRegister(ins->lhs()); + Register rhs = ToRegister(ins->rhs()); + + // Required to use idiv. + MOZ_ASSERT_IF(lhs != rhs, rhs != eax); + MOZ_ASSERT(rhs != edx); + MOZ_ASSERT(remainder == edx); + MOZ_ASSERT(ToRegister(ins->getTemp(0)) == eax); + + Label done; + ReturnZero* ool = nullptr; + ModOverflowCheck* overflow = nullptr; + + // Set up eax in preparation for doing a div. + if (lhs != eax) { + masm.mov(lhs, eax); + } + + MMod* mir = ins->mir(); + + // Prevent divide by zero. + if (mir->canBeDivideByZero()) { + masm.test32(rhs, rhs); + if (mir->isTruncated()) { + if (mir->trapOnError()) { + Label nonZero; + masm.j(Assembler::NonZero, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->bytecodeOffset()); + masm.bind(&nonZero); + } else { + if (!ool) { + ool = new (alloc()) ReturnZero(edx); + } + masm.j(Assembler::Zero, ool->entry()); + } + } else { + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + Label negative; + + // Switch based on sign of the lhs. + if (mir->canBeNegativeDividend()) { + masm.branchTest32(Assembler::Signed, lhs, lhs, &negative); + } + + // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive. + { + // Check if rhs is a power-of-two. + if (mir->canBePowerOfTwoDivisor()) { + MOZ_ASSERT(rhs != remainder); + + // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if + // y is any negative number other than INT32_MIN, both y and + // y-1 will have the sign bit set so these are never optimized + // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX + // and because lhs >= 0 at this point, lhs & INT32_MAX returns + // the correct value. + Label notPowerOfTwo; + masm.mov(rhs, remainder); + masm.subl(Imm32(1), remainder); + masm.branchTest32(Assembler::NonZero, remainder, rhs, ¬PowerOfTwo); + { + masm.andl(lhs, remainder); + masm.jmp(&done); + } + masm.bind(¬PowerOfTwo); + } + + // Since lhs >= 0, the sign-extension will be 0 + masm.mov(ImmWord(0), edx); + masm.idiv(rhs); + } + + // Otherwise, we have to beware of two special cases: + if (mir->canBeNegativeDividend()) { + masm.jump(&done); + + masm.bind(&negative); + + // Prevent an integer overflow exception from -2147483648 % -1 + masm.cmp32(lhs, Imm32(INT32_MIN)); + overflow = new (alloc()) ModOverflowCheck(ins, rhs); + masm.j(Assembler::Equal, overflow->entry()); + masm.bind(overflow->rejoin()); + masm.cdq(); + masm.idiv(rhs); + + if (!mir->isTruncated()) { + // A remainder of 0 means that the rval must be -0, which is a double. + masm.test32(remainder, remainder); + bailoutIf(Assembler::Zero, ins->snapshot()); + } + } + + masm.bind(&done); + + if (overflow) { + addOutOfLineCode(overflow, mir); + masm.bind(overflow->done()); + } + + if (ool) { + addOutOfLineCode(ool, mir); + masm.bind(ool->rejoin()); + } +} + +void CodeGenerator::visitBitNotI(LBitNotI* ins) { + const LAllocation* input = ins->getOperand(0); + MOZ_ASSERT(!input->isConstant()); + + masm.notl(ToOperand(input)); +} + +void CodeGenerator::visitBitOpI(LBitOpI* ins) { + const LAllocation* lhs = ins->getOperand(0); + const LAllocation* rhs = ins->getOperand(1); + + switch (ins->bitop()) { + case JSOp::BitOr: + if (rhs->isConstant()) { + masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs)); + } else { + masm.orl(ToOperand(rhs), ToRegister(lhs)); + } + break; + case JSOp::BitXor: + if (rhs->isConstant()) { + masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs)); + } else { + masm.xorl(ToOperand(rhs), ToRegister(lhs)); + } + break; + case JSOp::BitAnd: + if (rhs->isConstant()) { + masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs)); + } else { + masm.andl(ToOperand(rhs), ToRegister(lhs)); + } + break; + default: + MOZ_CRASH("unexpected binary opcode"); + } +} + +void CodeGenerator::visitBitOpI64(LBitOpI64* lir) { + const LInt64Allocation lhs = lir->getInt64Operand(LBitOpI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LBitOpI64::Rhs); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + switch (lir->bitop()) { + case JSOp::BitOr: + if (IsConstant(rhs)) { + masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + } else { + masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); + } + break; + case JSOp::BitXor: + if (IsConstant(rhs)) { + masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + } else { + masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); + } + break; + case JSOp::BitAnd: + if (IsConstant(rhs)) { + masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs)); + } else { + masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs)); + } + break; + default: + MOZ_CRASH("unexpected binary opcode"); + } +} + +void CodeGenerator::visitShiftI(LShiftI* ins) { + Register lhs = ToRegister(ins->lhs()); + const LAllocation* rhs = ins->rhs(); + + if (rhs->isConstant()) { + int32_t shift = ToInt32(rhs) & 0x1F; + switch (ins->bitop()) { + case JSOp::Lsh: + if (shift) { + masm.lshift32(Imm32(shift), lhs); + } + break; + case JSOp::Rsh: + if (shift) { + masm.rshift32Arithmetic(Imm32(shift), lhs); + } + break; + case JSOp::Ursh: + if (shift) { + masm.rshift32(Imm32(shift), lhs); + } else if (ins->mir()->toUrsh()->fallible()) { + // x >>> 0 can overflow. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Signed, ins->snapshot()); + } + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } else { + Register shift = ToRegister(rhs); + switch (ins->bitop()) { + case JSOp::Lsh: + masm.lshift32(shift, lhs); + break; + case JSOp::Rsh: + masm.rshift32Arithmetic(shift, lhs); + break; + case JSOp::Ursh: + masm.rshift32(shift, lhs); + if (ins->mir()->toUrsh()->fallible()) { + // x >>> 0 can overflow. + masm.test32(lhs, lhs); + bailoutIf(Assembler::Signed, ins->snapshot()); + } + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + } +} + +void CodeGenerator::visitShiftI64(LShiftI64* lir) { + const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs); + LAllocation* rhs = lir->getOperand(LShiftI64::Rhs); + + MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs)); + + if (rhs->isConstant()) { + int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F); + switch (lir->bitop()) { + case JSOp::Lsh: + if (shift) { + masm.lshift64(Imm32(shift), ToRegister64(lhs)); + } + break; + case JSOp::Rsh: + if (shift) { + masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs)); + } + break; + case JSOp::Ursh: + if (shift) { + masm.rshift64(Imm32(shift), ToRegister64(lhs)); + } + break; + default: + MOZ_CRASH("Unexpected shift op"); + } + return; + } + + Register shift = ToRegister(rhs); +#ifdef JS_CODEGEN_X86 + MOZ_ASSERT(shift == ecx); +#endif + switch (lir->bitop()) { + case JSOp::Lsh: + masm.lshift64(shift, ToRegister64(lhs)); + break; + case JSOp::Rsh: + masm.rshift64Arithmetic(shift, ToRegister64(lhs)); + break; + case JSOp::Ursh: + masm.rshift64(shift, ToRegister64(lhs)); + break; + default: + MOZ_CRASH("Unexpected shift op"); + } +} + +void CodeGenerator::visitUrshD(LUrshD* ins) { + Register lhs = ToRegister(ins->lhs()); + MOZ_ASSERT(ToRegister(ins->temp()) == lhs); + + const LAllocation* rhs = ins->rhs(); + FloatRegister out = ToFloatRegister(ins->output()); + + if (rhs->isConstant()) { + int32_t shift = ToInt32(rhs) & 0x1F; + if (shift) { + masm.shrl(Imm32(shift), lhs); + } + } else { + Register shift = ToRegister(rhs); + masm.rshift32(shift, lhs); + } + + masm.convertUInt32ToDouble(lhs, out); +} + +Operand CodeGeneratorX86Shared::ToOperand(const LAllocation& a) { + if (a.isGeneralReg()) { + return Operand(a.toGeneralReg()->reg()); + } + if (a.isFloatReg()) { + return Operand(a.toFloatReg()->reg()); + } + return Operand(ToAddress(a)); +} + +Operand CodeGeneratorX86Shared::ToOperand(const LAllocation* a) { + return ToOperand(*a); +} + +Operand CodeGeneratorX86Shared::ToOperand(const LDefinition* def) { + return ToOperand(def->output()); +} + +MoveOperand CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const { + if (a.isGeneralReg()) { + return MoveOperand(ToRegister(a)); + } + if (a.isFloatReg()) { + return MoveOperand(ToFloatRegister(a)); + } + MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress + : MoveOperand::Kind::Memory; + return MoveOperand(ToAddress(a), kind); +} + +class OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorX86Shared> { + MTableSwitch* mir_; + CodeLabel jumpLabel_; + + void accept(CodeGeneratorX86Shared* codegen) override { + codegen->visitOutOfLineTableSwitch(this); + } + + public: + explicit OutOfLineTableSwitch(MTableSwitch* mir) : mir_(mir) {} + + MTableSwitch* mir() const { return mir_; } + + CodeLabel* jumpLabel() { return &jumpLabel_; } +}; + +void CodeGeneratorX86Shared::visitOutOfLineTableSwitch( + OutOfLineTableSwitch* ool) { + MTableSwitch* mir = ool->mir(); + + masm.haltingAlign(sizeof(void*)); + masm.bind(ool->jumpLabel()); + masm.addCodeLabel(*ool->jumpLabel()); + + for (size_t i = 0; i < mir->numCases(); i++) { + LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir(); + Label* caseheader = caseblock->label(); + uint32_t caseoffset = caseheader->offset(); + + // The entries of the jump table need to be absolute addresses and thus + // must be patched after codegen is finished. + CodeLabel cl; + masm.writeCodePointer(&cl); + cl.target()->bind(caseoffset); + masm.addCodeLabel(cl); + } +} + +void CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir, + Register index, + Register base) { + Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label(); + + // Lower value with low value + if (mir->low() != 0) { + masm.subl(Imm32(mir->low()), index); + } + + // Jump to default case if input is out of range + int32_t cases = mir->numCases(); + masm.cmp32(index, Imm32(cases)); + masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase); + + // To fill in the CodeLabels for the case entries, we need to first + // generate the case entries (we don't yet know their offsets in the + // instruction stream). + OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(mir); + addOutOfLineCode(ool, mir); + + // Compute the position where a pointer to the right case stands. + masm.mov(ool->jumpLabel(), base); + BaseIndex pointer(base, index, ScalePointer); + + // Jump to the right case + masm.branchToComputedAddress(pointer); +} + +void CodeGenerator::visitMathD(LMathD* math) { + FloatRegister lhs = ToFloatRegister(math->lhs()); + Operand rhs = ToOperand(math->rhs()); + FloatRegister output = ToFloatRegister(math->output()); + + switch (math->jsop()) { + case JSOp::Add: + masm.vaddsd(rhs, lhs, output); + break; + case JSOp::Sub: + masm.vsubsd(rhs, lhs, output); + break; + case JSOp::Mul: + masm.vmulsd(rhs, lhs, output); + break; + case JSOp::Div: + masm.vdivsd(rhs, lhs, output); + break; + default: + MOZ_CRASH("unexpected opcode"); + } +} + +void CodeGenerator::visitMathF(LMathF* math) { + FloatRegister lhs = ToFloatRegister(math->lhs()); + Operand rhs = ToOperand(math->rhs()); + FloatRegister output = ToFloatRegister(math->output()); + + switch (math->jsop()) { + case JSOp::Add: + masm.vaddss(rhs, lhs, output); + break; + case JSOp::Sub: + masm.vsubss(rhs, lhs, output); + break; + case JSOp::Mul: + masm.vmulss(rhs, lhs, output); + break; + case JSOp::Div: + masm.vdivss(rhs, lhs, output); + break; + default: + MOZ_CRASH("unexpected opcode"); + } +} + +void CodeGenerator::visitNearbyInt(LNearbyInt* lir) { + FloatRegister input = ToFloatRegister(lir->input()); + FloatRegister output = ToFloatRegister(lir->output()); + + RoundingMode roundingMode = lir->mir()->roundingMode(); + masm.nearbyIntDouble(roundingMode, input, output); +} + +void CodeGenerator::visitNearbyIntF(LNearbyIntF* lir) { + FloatRegister input = ToFloatRegister(lir->input()); + FloatRegister output = ToFloatRegister(lir->output()); + + RoundingMode roundingMode = lir->mir()->roundingMode(); + masm.nearbyIntFloat32(roundingMode, input, output); +} + +void CodeGenerator::visitEffectiveAddress(LEffectiveAddress* ins) { + const MEffectiveAddress* mir = ins->mir(); + Register base = ToRegister(ins->base()); + Register index = ToRegister(ins->index()); + Register output = ToRegister(ins->output()); + masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output); +} + +void CodeGeneratorX86Shared::generateInvalidateEpilogue() { + // Ensure that there is enough space in the buffer for the OsiPoint + // patching to occur. Otherwise, we could overwrite the invalidation + // epilogue. + for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize()) { + masm.nop(); + } + + masm.bind(&invalidate_); + + // Push the Ion script onto the stack (when we determine what that pointer + // is). + invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1))); + + // Jump to the invalidator which will replace the current frame. + TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk(); + masm.jump(thunk); +} + +void CodeGenerator::visitNegI(LNegI* ins) { + Register input = ToRegister(ins->input()); + MOZ_ASSERT(input == ToRegister(ins->output())); + + masm.neg32(input); +} + +void CodeGenerator::visitNegI64(LNegI64* ins) { + Register64 input = ToRegister64(ins->getInt64Operand(0)); + MOZ_ASSERT(input == ToOutRegister64(ins)); + masm.neg64(input); +} + +void CodeGenerator::visitNegD(LNegD* ins) { + FloatRegister input = ToFloatRegister(ins->input()); + MOZ_ASSERT(input == ToFloatRegister(ins->output())); + + masm.negateDouble(input); +} + +void CodeGenerator::visitNegF(LNegF* ins) { + FloatRegister input = ToFloatRegister(ins->input()); + MOZ_ASSERT(input == ToFloatRegister(ins->output())); + + masm.negateFloat(input); +} + +void CodeGenerator::visitCompareExchangeTypedArrayElement( + LCompareExchangeTypedArrayElement* lir) { + Register elements = ToRegister(lir->elements()); + AnyRegister output = ToAnyRegister(lir->output()); + Register temp = + lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); + + Register oldval = ToRegister(lir->oldval()); + Register newval = ToRegister(lir->newval()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval, + newval, temp, output); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval, + newval, temp, output); + } +} + +void CodeGenerator::visitAtomicExchangeTypedArrayElement( + LAtomicExchangeTypedArrayElement* lir) { + Register elements = ToRegister(lir->elements()); + AnyRegister output = ToAnyRegister(lir->output()); + Register temp = + lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); + + Register value = ToRegister(lir->value()); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp, + output); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value, temp, + output); + } +} + +template <typename T> +static inline void AtomicBinopToTypedArray(MacroAssembler& masm, AtomicOp op, + Scalar::Type arrayType, + const LAllocation* value, + const T& mem, Register temp1, + Register temp2, AnyRegister output) { + if (value->isConstant()) { + masm.atomicFetchOpJS(arrayType, Synchronization::Full(), op, + Imm32(ToInt32(value)), mem, temp1, temp2, output); + } else { + masm.atomicFetchOpJS(arrayType, Synchronization::Full(), op, + ToRegister(value), mem, temp1, temp2, output); + } +} + +void CodeGenerator::visitAtomicTypedArrayElementBinop( + LAtomicTypedArrayElementBinop* lir) { + MOZ_ASSERT(!lir->mir()->isForEffect()); + + AnyRegister output = ToAnyRegister(lir->output()); + Register elements = ToRegister(lir->elements()); + Register temp1 = + lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1()); + Register temp2 = + lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2()); + const LAllocation* value = lir->value(); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + if (lir->index()->isConstant()) { + Address mem = ToAddress(elements, lir->index(), arrayType); + AtomicBinopToTypedArray(masm, lir->mir()->operation(), arrayType, value, + mem, temp1, temp2, output); + } else { + BaseIndex mem(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + AtomicBinopToTypedArray(masm, lir->mir()->operation(), arrayType, value, + mem, temp1, temp2, output); + } +} + +template <typename T> +static inline void AtomicBinopToTypedArray(MacroAssembler& masm, + Scalar::Type arrayType, AtomicOp op, + const LAllocation* value, + const T& mem) { + if (value->isConstant()) { + masm.atomicEffectOpJS(arrayType, Synchronization::Full(), op, + Imm32(ToInt32(value)), mem, InvalidReg); + } else { + masm.atomicEffectOpJS(arrayType, Synchronization::Full(), op, + ToRegister(value), mem, InvalidReg); + } +} + +void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect( + LAtomicTypedArrayElementBinopForEffect* lir) { + MOZ_ASSERT(lir->mir()->isForEffect()); + + Register elements = ToRegister(lir->elements()); + const LAllocation* value = lir->value(); + Scalar::Type arrayType = lir->mir()->arrayType(); + + if (lir->index()->isConstant()) { + Address mem = ToAddress(elements, lir->index(), arrayType); + AtomicBinopToTypedArray(masm, arrayType, lir->mir()->operation(), value, + mem); + } else { + BaseIndex mem(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + AtomicBinopToTypedArray(masm, arrayType, lir->mir()->operation(), value, + mem); + } +} + +void CodeGenerator::visitMemoryBarrier(LMemoryBarrier* ins) { + if (ins->type() & MembarStoreLoad) { + masm.storeLoadFence(); + } +} + +void CodeGeneratorX86Shared::visitOutOfLineWasmTruncateCheck( + OutOfLineWasmTruncateCheck* ool) { + FloatRegister input = ool->input(); + Register output = ool->output(); + Register64 output64 = ool->output64(); + MIRType fromType = ool->fromType(); + MIRType toType = ool->toType(); + Label* oolRejoin = ool->rejoin(); + TruncFlags flags = ool->flags(); + wasm::BytecodeOffset off = ool->bytecodeOffset(); + + if (fromType == MIRType::Float32) { + if (toType == MIRType::Int32) { + masm.oolWasmTruncateCheckF32ToI32(input, output, flags, off, oolRejoin); + } else if (toType == MIRType::Int64) { + masm.oolWasmTruncateCheckF32ToI64(input, output64, flags, off, oolRejoin); + } else { + MOZ_CRASH("unexpected type"); + } + } else if (fromType == MIRType::Double) { + if (toType == MIRType::Int32) { + masm.oolWasmTruncateCheckF64ToI32(input, output, flags, off, oolRejoin); + } else if (toType == MIRType::Int64) { + masm.oolWasmTruncateCheckF64ToI64(input, output64, flags, off, oolRejoin); + } else { + MOZ_CRASH("unexpected type"); + } + } else { + MOZ_CRASH("unexpected type"); + } +} + +void CodeGeneratorX86Shared::canonicalizeIfDeterministic( + Scalar::Type type, const LAllocation* value) { +#ifdef DEBUG + if (!js::SupportDifferentialTesting()) { + return; + } + + switch (type) { + case Scalar::Float32: { + FloatRegister in = ToFloatRegister(value); + masm.canonicalizeFloatIfDeterministic(in); + break; + } + case Scalar::Float64: { + FloatRegister in = ToFloatRegister(value); + masm.canonicalizeDoubleIfDeterministic(in); + break; + } + default: { + // Other types don't need canonicalization. + break; + } + } +#endif // DEBUG +} + +template <typename T> +Operand CodeGeneratorX86Shared::toMemoryAccessOperand(T* lir, int32_t disp) { + const LAllocation* ptr = lir->ptr(); +#ifdef JS_CODEGEN_X86 + const LAllocation* memoryBase = lir->memoryBase(); + Operand destAddr = ptr->isBogus() ? Operand(ToRegister(memoryBase), disp) + : Operand(ToRegister(memoryBase), + ToRegister(ptr), TimesOne, disp); +#else + Operand destAddr = ptr->isBogus() + ? Operand(HeapReg, disp) + : Operand(HeapReg, ToRegister(ptr), TimesOne, disp); +#endif + return destAddr; +} + +void CodeGenerator::visitCopySignF(LCopySignF* lir) { + FloatRegister lhs = ToFloatRegister(lir->getOperand(0)); + FloatRegister rhs = ToFloatRegister(lir->getOperand(1)); + + FloatRegister out = ToFloatRegister(lir->output()); + + if (lhs == rhs) { + if (lhs != out) { + masm.moveFloat32(lhs, out); + } + return; + } + + masm.copySignFloat32(lhs, rhs, out); +} + +void CodeGenerator::visitCopySignD(LCopySignD* lir) { + FloatRegister lhs = ToFloatRegister(lir->getOperand(0)); + FloatRegister rhs = ToFloatRegister(lir->getOperand(1)); + + FloatRegister out = ToFloatRegister(lir->output()); + + if (lhs == rhs) { + if (lhs != out) { + masm.moveDouble(lhs, out); + } + return; + } + + masm.copySignDouble(lhs, rhs, out); +} + +void CodeGenerator::visitRotateI64(LRotateI64* lir) { + MRotate* mir = lir->mir(); + LAllocation* count = lir->count(); + + Register64 input = ToRegister64(lir->input()); + Register64 output = ToOutRegister64(lir); + Register temp = ToTempRegisterOrInvalid(lir->temp()); + + MOZ_ASSERT(input == output); + + if (count->isConstant()) { + int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F); + if (!c) { + return; + } + if (mir->isLeftRotate()) { + masm.rotateLeft64(Imm32(c), input, output, temp); + } else { + masm.rotateRight64(Imm32(c), input, output, temp); + } + } else { + if (mir->isLeftRotate()) { + masm.rotateLeft64(ToRegister(count), input, output, temp); + } else { + masm.rotateRight64(ToRegister(count), input, output, temp); + } + } +} + +void CodeGenerator::visitPopcntI64(LPopcntI64* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + Register64 output = ToOutRegister64(lir); + Register temp = InvalidReg; + if (!AssemblerX86Shared::HasPOPCNT()) { + temp = ToRegister(lir->getTemp(0)); + } + + masm.popcnt64(input, output, temp); +} + +void CodeGenerator::visitSimd128(LSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + const LDefinition* out = ins->getDef(0); + masm.loadConstantSimd128(ins->simd128(), ToFloatRegister(out)); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + switch (ins->simdOp()) { + case wasm::SimdOp::V128Bitselect: { + FloatRegister lhsDest = ToFloatRegister(ins->v0()); + FloatRegister rhs = ToFloatRegister(ins->v1()); + FloatRegister control = ToFloatRegister(ins->v2()); + FloatRegister temp = ToFloatRegister(ins->temp()); + masm.bitwiseSelectSimd128(control, lhsDest, rhs, lhsDest, temp); + break; + } + case wasm::SimdOp::F32x4RelaxedFma: + masm.fmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + case wasm::SimdOp::F32x4RelaxedFnma: + masm.fnmaFloat32x4(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + case wasm::SimdOp::F64x2RelaxedFma: + masm.fmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + case wasm::SimdOp::F64x2RelaxedFnma: + masm.fnmaFloat64x2(ToFloatRegister(ins->v0()), ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + case wasm::SimdOp::I8x16RelaxedLaneSelect: + case wasm::SimdOp::I16x8RelaxedLaneSelect: + case wasm::SimdOp::I32x4RelaxedLaneSelect: + case wasm::SimdOp::I64x2RelaxedLaneSelect: { + FloatRegister lhs = ToFloatRegister(ins->v0()); + FloatRegister rhs = ToFloatRegister(ins->v1()); + FloatRegister mask = ToFloatRegister(ins->v2()); + FloatRegister dest = ToFloatRegister(ins->output()); + masm.laneSelectSimd128(mask, lhs, rhs, dest); + break; + } + case wasm::SimdOp::I32x4DotI8x16I7x16AddS: + masm.dotInt8x16Int7x16ThenAdd(ToFloatRegister(ins->v0()), + ToFloatRegister(ins->v1()), + ToFloatRegister(ins->v2())); + break; + default: + MOZ_CRASH("NYI"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister lhs = ToFloatRegister(ins->lhsDest()); + FloatRegister rhs = ToFloatRegister(ins->rhs()); + FloatRegister temp1 = ToTempFloatRegisterOrInvalid(ins->getTemp(0)); + FloatRegister temp2 = ToTempFloatRegisterOrInvalid(ins->getTemp(1)); + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::V128And: + masm.bitwiseAndSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::V128Or: + masm.bitwiseOrSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::V128Xor: + masm.bitwiseXorSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::V128AndNot: + // x86/x64 specific: The CPU provides ~A & B. The operands were swapped + // during lowering, and we'll compute A & ~B here as desired. + masm.bitwiseNotAndSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16AvgrU: + masm.unsignedAverageInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8AvgrU: + masm.unsignedAverageInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Add: + masm.addInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16AddSatS: + masm.addSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16AddSatU: + masm.unsignedAddSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Sub: + masm.subInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16SubSatS: + masm.subSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16SubSatU: + masm.unsignedSubSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MinS: + masm.minInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MinU: + masm.unsignedMinInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MaxS: + masm.maxInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MaxU: + masm.unsignedMaxInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Add: + masm.addInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8AddSatS: + masm.addSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8AddSatU: + masm.unsignedAddSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Sub: + masm.subInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8SubSatS: + masm.subSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8SubSatU: + masm.unsignedSubSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Mul: + masm.mulInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MinS: + masm.minInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MinU: + masm.unsignedMinInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MaxS: + masm.maxInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MaxU: + masm.unsignedMaxInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Add: + masm.addInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Sub: + masm.subInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Mul: + masm.mulInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MinS: + masm.minInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MinU: + masm.unsignedMinInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MaxS: + masm.maxInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MaxU: + masm.unsignedMaxInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Add: + masm.addInt64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Sub: + masm.subInt64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Mul: + masm.mulInt64x2(lhs, rhs, dest, temp1); + break; + case wasm::SimdOp::F32x4Add: + masm.addFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Sub: + masm.subFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Mul: + masm.mulFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Div: + masm.divFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Min: + masm.minFloat32x4(lhs, rhs, dest, temp1, temp2); + break; + case wasm::SimdOp::F32x4Max: + masm.maxFloat32x4(lhs, rhs, dest, temp1, temp2); + break; + case wasm::SimdOp::F64x2Add: + masm.addFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Sub: + masm.subFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Mul: + masm.mulFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Div: + masm.divFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Min: + masm.minFloat64x2(lhs, rhs, dest, temp1, temp2); + break; + case wasm::SimdOp::F64x2Max: + masm.maxFloat64x2(lhs, rhs, dest, temp1, temp2); + break; + case wasm::SimdOp::I8x16Swizzle: + masm.swizzleInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16RelaxedSwizzle: + masm.swizzleInt8x16Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16NarrowI16x8S: + masm.narrowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16NarrowI16x8U: + masm.unsignedNarrowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8NarrowI32x4S: + masm.narrowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8NarrowI32x4U: + masm.unsignedNarrowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Eq: + masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Ne: + masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LtS: + masm.compareInt8x16(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GtS: + masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LeS: + masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GeS: + masm.compareInt8x16(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LtU: + masm.compareInt8x16(Assembler::Below, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GtU: + masm.compareInt8x16(Assembler::Above, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LeU: + masm.compareInt8x16(Assembler::BelowOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GeU: + masm.compareInt8x16(Assembler::AboveOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Eq: + masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Ne: + masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LtS: + masm.compareInt16x8(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GtS: + masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LeS: + masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GeS: + masm.compareInt16x8(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LtU: + masm.compareInt16x8(Assembler::Below, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GtU: + masm.compareInt16x8(Assembler::Above, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LeU: + masm.compareInt16x8(Assembler::BelowOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GeU: + masm.compareInt16x8(Assembler::AboveOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Eq: + masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Ne: + masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LtS: + masm.compareInt32x4(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GtS: + masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LeS: + masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GeS: + masm.compareInt32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LtU: + masm.compareInt32x4(Assembler::Below, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GtU: + masm.compareInt32x4(Assembler::Above, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LeU: + masm.compareInt32x4(Assembler::BelowOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GeU: + masm.compareInt32x4(Assembler::AboveOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Eq: + masm.compareForEqualityInt64x2(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Ne: + masm.compareForEqualityInt64x2(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2LtS: + masm.compareForOrderingInt64x2(Assembler::LessThan, lhs, rhs, dest, temp1, + temp2); + break; + case wasm::SimdOp::I64x2GtS: + masm.compareForOrderingInt64x2(Assembler::GreaterThan, lhs, rhs, dest, + temp1, temp2); + break; + case wasm::SimdOp::I64x2LeS: + masm.compareForOrderingInt64x2(Assembler::LessThanOrEqual, lhs, rhs, dest, + temp1, temp2); + break; + case wasm::SimdOp::I64x2GeS: + masm.compareForOrderingInt64x2(Assembler::GreaterThanOrEqual, lhs, rhs, + dest, temp1, temp2); + break; + case wasm::SimdOp::F32x4Eq: + masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Ne: + masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Lt: + masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Le: + masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Eq: + masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Ne: + masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Lt: + masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Le: + masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4PMax: + // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. + masm.pseudoMaxFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4PMin: + // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. + masm.pseudoMinFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2PMax: + // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. + masm.pseudoMaxFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2PMin: + // `lhs` and `rhs` are swapped, for non-VEX platforms the output is rhs. + masm.pseudoMinFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4DotI16x8S: + masm.widenDotInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ExtmulLowI8x16S: + masm.extMulLowInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ExtmulHighI8x16S: + masm.extMulHighInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ExtmulLowI8x16U: + masm.unsignedExtMulLowInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8ExtmulHighI8x16U: + masm.unsignedExtMulHighInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ExtmulLowI16x8S: + masm.extMulLowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ExtmulHighI16x8S: + masm.extMulHighInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ExtmulLowI16x8U: + masm.unsignedExtMulLowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4ExtmulHighI16x8U: + masm.unsignedExtMulHighInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ExtmulLowI32x4S: + masm.extMulLowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ExtmulHighI32x4S: + masm.extMulHighInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ExtmulLowI32x4U: + masm.unsignedExtMulLowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2ExtmulHighI32x4U: + masm.unsignedExtMulHighInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Q15MulrSatS: + masm.q15MulrSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4RelaxedMin: + masm.minFloat32x4Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4RelaxedMax: + masm.maxFloat32x4Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2RelaxedMin: + masm.minFloat64x2Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2RelaxedMax: + masm.maxFloat64x2Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8RelaxedQ15MulrS: + masm.q15MulrInt16x8Relaxed(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8DotI8x16I7x16S: + masm.dotInt8x16Int7x16(lhs, rhs, dest); + break; + case wasm::SimdOp::MozPMADDUBSW: + masm.vpmaddubsw(rhs, lhs, dest); + break; + default: + MOZ_CRASH("Binary SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmBinarySimd128WithConstant( + LWasmBinarySimd128WithConstant* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister lhs = ToFloatRegister(ins->lhsDest()); + const SimdConstant& rhs = ins->rhs(); + FloatRegister dest = ToFloatRegister(ins->output()); + FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->getTemp(0)); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Add: + masm.addInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Add: + masm.addInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Add: + masm.addInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Add: + masm.addInt64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Sub: + masm.subInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Sub: + masm.subInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Sub: + masm.subInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Sub: + masm.subInt64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Mul: + masm.mulInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Mul: + masm.mulInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16AddSatS: + masm.addSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16AddSatU: + masm.unsignedAddSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8AddSatS: + masm.addSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8AddSatU: + masm.unsignedAddSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16SubSatS: + masm.subSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16SubSatU: + masm.unsignedSubSatInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8SubSatS: + masm.subSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8SubSatU: + masm.unsignedSubSatInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MinS: + masm.minInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MinU: + masm.unsignedMinInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MinS: + masm.minInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MinU: + masm.unsignedMinInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MinS: + masm.minInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MinU: + masm.unsignedMinInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MaxS: + masm.maxInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16MaxU: + masm.unsignedMaxInt8x16(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MaxS: + masm.maxInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8MaxU: + masm.unsignedMaxInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MaxS: + masm.maxInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4MaxU: + masm.unsignedMaxInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::V128And: + masm.bitwiseAndSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::V128Or: + masm.bitwiseOrSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::V128Xor: + masm.bitwiseXorSimd128(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Eq: + masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16Ne: + masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16GtS: + masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16LeS: + masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Eq: + masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8Ne: + masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8GtS: + masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8LeS: + masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Eq: + masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4Ne: + masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4GtS: + masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4LeS: + masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I64x2Mul: + masm.mulInt64x2(lhs, rhs, dest, temp); + break; + case wasm::SimdOp::F32x4Eq: + masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Ne: + masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Lt: + masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Le: + masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Eq: + masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Ne: + masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Lt: + masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Le: + masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest); + break; + case wasm::SimdOp::I32x4DotI16x8S: + masm.widenDotInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Add: + masm.addFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Add: + masm.addFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Sub: + masm.subFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Sub: + masm.subFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Div: + masm.divFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Div: + masm.divFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::F32x4Mul: + masm.mulFloat32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::F64x2Mul: + masm.mulFloat64x2(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16NarrowI16x8S: + masm.narrowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I8x16NarrowI16x8U: + masm.unsignedNarrowInt16x8(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8NarrowI32x4S: + masm.narrowInt32x4(lhs, rhs, dest); + break; + case wasm::SimdOp::I16x8NarrowI32x4U: + masm.unsignedNarrowInt32x4(lhs, rhs, dest); + break; + default: + MOZ_CRASH("Binary SimdOp with constant not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmVariableShiftSimd128( + LWasmVariableShiftSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister lhsDest = ToFloatRegister(ins->lhsDest()); + Register rhs = ToRegister(ins->rhs()); + FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->getTemp(0)); + + MOZ_ASSERT(ToFloatRegister(ins->output()) == lhsDest); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Shl: + masm.leftShiftInt8x16(rhs, lhsDest, temp); + break; + case wasm::SimdOp::I8x16ShrS: + masm.rightShiftInt8x16(rhs, lhsDest, temp); + break; + case wasm::SimdOp::I8x16ShrU: + masm.unsignedRightShiftInt8x16(rhs, lhsDest, temp); + break; + case wasm::SimdOp::I16x8Shl: + masm.leftShiftInt16x8(rhs, lhsDest); + break; + case wasm::SimdOp::I16x8ShrS: + masm.rightShiftInt16x8(rhs, lhsDest); + break; + case wasm::SimdOp::I16x8ShrU: + masm.unsignedRightShiftInt16x8(rhs, lhsDest); + break; + case wasm::SimdOp::I32x4Shl: + masm.leftShiftInt32x4(rhs, lhsDest); + break; + case wasm::SimdOp::I32x4ShrS: + masm.rightShiftInt32x4(rhs, lhsDest); + break; + case wasm::SimdOp::I32x4ShrU: + masm.unsignedRightShiftInt32x4(rhs, lhsDest); + break; + case wasm::SimdOp::I64x2Shl: + masm.leftShiftInt64x2(rhs, lhsDest); + break; + case wasm::SimdOp::I64x2ShrS: + masm.rightShiftInt64x2(rhs, lhsDest, temp); + break; + case wasm::SimdOp::I64x2ShrU: + masm.unsignedRightShiftInt64x2(rhs, lhsDest); + break; + default: + MOZ_CRASH("Shift SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmConstantShiftSimd128( + LWasmConstantShiftSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + int32_t shift = ins->shift(); + + if (shift == 0) { + masm.moveSimd128(src, dest); + return; + } + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Shl: + masm.leftShiftInt8x16(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I8x16ShrS: + masm.rightShiftInt8x16(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I8x16ShrU: + masm.unsignedRightShiftInt8x16(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I16x8Shl: + masm.leftShiftInt16x8(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I16x8ShrS: + masm.rightShiftInt16x8(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I16x8ShrU: + masm.unsignedRightShiftInt16x8(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I32x4Shl: + masm.leftShiftInt32x4(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I32x4ShrS: + masm.rightShiftInt32x4(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I32x4ShrU: + masm.unsignedRightShiftInt32x4(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I64x2Shl: + masm.leftShiftInt64x2(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I64x2ShrS: + masm.rightShiftInt64x2(Imm32(shift), src, dest); + break; + case wasm::SimdOp::I64x2ShrU: + masm.unsignedRightShiftInt64x2(Imm32(shift), src, dest); + break; + default: + MOZ_CRASH("Shift SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmSignReplicationSimd128( + LWasmSignReplicationSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16ShrS: + masm.signReplicationInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ShrS: + masm.signReplicationInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ShrS: + masm.signReplicationInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2ShrS: + masm.signReplicationInt64x2(src, dest); + break; + default: + MOZ_CRASH("Shift SimdOp unsupported sign replication optimization"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister lhsDest = ToFloatRegister(ins->lhsDest()); + FloatRegister rhs = ToFloatRegister(ins->rhs()); + SimdConstant control = ins->control(); + FloatRegister output = ToFloatRegister(ins->output()); + switch (ins->op()) { + case SimdShuffleOp::BLEND_8x16: { + masm.blendInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), + lhsDest, rhs, output, ToFloatRegister(ins->temp())); + break; + } + case SimdShuffleOp::BLEND_16x8: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.blendInt16x8(reinterpret_cast<const uint16_t*>(control.asInt16x8()), + lhsDest, rhs, output); + break; + } + case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + int8_t count = 16 - control.asInt8x16()[0]; + MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); + masm.concatAndRightShiftSimd128(lhsDest, rhs, output, count); + break; + } + case SimdShuffleOp::INTERLEAVE_HIGH_8x16: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.interleaveHighInt8x16(lhsDest, rhs, output); + break; + } + case SimdShuffleOp::INTERLEAVE_HIGH_16x8: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.interleaveHighInt16x8(lhsDest, rhs, output); + break; + } + case SimdShuffleOp::INTERLEAVE_HIGH_32x4: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.interleaveHighInt32x4(lhsDest, rhs, output); + break; + } + case SimdShuffleOp::INTERLEAVE_HIGH_64x2: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.interleaveHighInt64x2(lhsDest, rhs, output); + break; + } + case SimdShuffleOp::INTERLEAVE_LOW_8x16: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.interleaveLowInt8x16(lhsDest, rhs, output); + break; + } + case SimdShuffleOp::INTERLEAVE_LOW_16x8: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.interleaveLowInt16x8(lhsDest, rhs, output); + break; + } + case SimdShuffleOp::INTERLEAVE_LOW_32x4: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.interleaveLowInt32x4(lhsDest, rhs, output); + break; + } + case SimdShuffleOp::INTERLEAVE_LOW_64x2: { + MOZ_ASSERT(ins->temp()->isBogusTemp()); + masm.interleaveLowInt64x2(lhsDest, rhs, output); + break; + } + case SimdShuffleOp::SHUFFLE_BLEND_8x16: { + masm.shuffleInt8x16(reinterpret_cast<const uint8_t*>(control.asInt8x16()), + lhsDest, rhs, output); + break; + } + default: { + MOZ_CRASH("Unsupported SIMD shuffle operation"); + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +#ifdef ENABLE_WASM_SIMD + +enum PermuteX64I16x8Action : uint16_t { + UNAVAILABLE = 0, + SWAP_QWORDS = 1, // Swap qwords first + PERM_LOW = 2, // Permute low qword by control_[0..3] + PERM_HIGH = 4 // Permute high qword by control_[4..7] +}; + +// Skip lanes that equal v starting at i, returning the index just beyond the +// last of those. There is no requirement that the initial lanes[i] == v. +template <typename T> +static int ScanConstant(const T* lanes, int v, int i) { + int len = int(16 / sizeof(T)); + MOZ_ASSERT(i <= len); + while (i < len && lanes[i] == v) { + i++; + } + return i; +} + +// Apply a transformation to each lane value. +template <typename T> +static void MapLanes(T* result, const T* input, int (*f)(int)) { + int len = int(16 / sizeof(T)); + for (int i = 0; i < len; i++) { + result[i] = f(input[i]); + } +} + +// Recognize part of an identity permutation starting at start, with +// the first value of the permutation expected to be bias. +template <typename T> +static bool IsIdentity(const T* lanes, int start, int len, int bias) { + if (lanes[start] != bias) { + return false; + } + for (int i = start + 1; i < start + len; i++) { + if (lanes[i] != lanes[i - 1] + 1) { + return false; + } + } + return true; +} + +// We can permute by words if the mask is reducible to a word mask, but the x64 +// lowering is only efficient if we can permute the high and low quadwords +// separately, possibly after swapping quadwords. +static PermuteX64I16x8Action CalculateX64Permute16x8(SimdConstant* control) { + const SimdConstant::I16x8& lanes = control->asInt16x8(); + SimdConstant::I16x8 mapped; + MapLanes(mapped, lanes, [](int x) -> int { return x < 4 ? 0 : 1; }); + int i = ScanConstant(mapped, mapped[0], 0); + if (i != 4) { + return PermuteX64I16x8Action::UNAVAILABLE; + } + i = ScanConstant(mapped, mapped[4], 4); + if (i != 8) { + return PermuteX64I16x8Action::UNAVAILABLE; + } + // Now compute the operation bits. `mapped` holds the adjusted lane mask. + memcpy(mapped, lanes, sizeof(mapped)); + uint16_t op = 0; + if (mapped[0] > mapped[4]) { + op |= PermuteX64I16x8Action::SWAP_QWORDS; + } + for (auto& m : mapped) { + m &= 3; + } + if (!IsIdentity(mapped, 0, 4, 0)) { + op |= PermuteX64I16x8Action::PERM_LOW; + } + if (!IsIdentity(mapped, 4, 4, 0)) { + op |= PermuteX64I16x8Action::PERM_HIGH; + } + MOZ_ASSERT(op != PermuteX64I16x8Action::UNAVAILABLE); + *control = SimdConstant::CreateX8(mapped); + return (PermuteX64I16x8Action)op; +} + +#endif + +void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + SimdConstant control = ins->control(); + switch (ins->op()) { + // For broadcast, would MOVDDUP be better than PSHUFD for the last step? + case SimdPermuteOp::BROADCAST_8x16: { + const SimdConstant::I8x16& mask = control.asInt8x16(); + int8_t source = mask[0]; + if (source == 0 && Assembler::HasAVX2()) { + masm.vbroadcastb(Operand(src), dest); + break; + } + MOZ_ASSERT_IF(!Assembler::HasAVX(), src == dest); + if (source < 8) { + masm.interleaveLowInt8x16(src, src, dest); + } else { + masm.interleaveHighInt8x16(src, src, dest); + source -= 8; + } + uint16_t v = uint16_t(source & 3); + uint16_t wordMask[4] = {v, v, v, v}; + if (source < 4) { + masm.permuteLowInt16x8(wordMask, dest, dest); + uint32_t dwordMask[4] = {0, 0, 0, 0}; + masm.permuteInt32x4(dwordMask, dest, dest); + } else { + masm.permuteHighInt16x8(wordMask, dest, dest); + uint32_t dwordMask[4] = {2, 2, 2, 2}; + masm.permuteInt32x4(dwordMask, dest, dest); + } + break; + } + case SimdPermuteOp::BROADCAST_16x8: { + const SimdConstant::I16x8& mask = control.asInt16x8(); + int16_t source = mask[0]; + if (source == 0 && Assembler::HasAVX2()) { + masm.vbroadcastw(Operand(src), dest); + break; + } + uint16_t v = uint16_t(source & 3); + uint16_t wordMask[4] = {v, v, v, v}; + if (source < 4) { + masm.permuteLowInt16x8(wordMask, src, dest); + uint32_t dwordMask[4] = {0, 0, 0, 0}; + masm.permuteInt32x4(dwordMask, dest, dest); + } else { + masm.permuteHighInt16x8(wordMask, src, dest); + uint32_t dwordMask[4] = {2, 2, 2, 2}; + masm.permuteInt32x4(dwordMask, dest, dest); + } + break; + } + case SimdPermuteOp::MOVE: { + masm.moveSimd128(src, dest); + break; + } + case SimdPermuteOp::PERMUTE_8x16: { + const SimdConstant::I8x16& mask = control.asInt8x16(); +# ifdef DEBUG + DebugOnly<int> i; + for (i = 0; i < 16 && mask[i] == i; i++) { + } + MOZ_ASSERT(i < 16, "Should have been a MOVE operation"); +# endif + masm.permuteInt8x16(reinterpret_cast<const uint8_t*>(mask), src, dest); + break; + } + case SimdPermuteOp::PERMUTE_16x8: { +# ifdef DEBUG + const SimdConstant::I16x8& mask = control.asInt16x8(); + DebugOnly<int> i; + for (i = 0; i < 8 && mask[i] == i; i++) { + } + MOZ_ASSERT(i < 8, "Should have been a MOVE operation"); +# endif + PermuteX64I16x8Action op = CalculateX64Permute16x8(&control); + if (op != PermuteX64I16x8Action::UNAVAILABLE) { + const SimdConstant::I16x8& mask = control.asInt16x8(); + if (op & PermuteX64I16x8Action::SWAP_QWORDS) { + uint32_t dwordMask[4] = {2, 3, 0, 1}; + masm.permuteInt32x4(dwordMask, src, dest); + src = dest; + } + if (op & PermuteX64I16x8Action::PERM_LOW) { + masm.permuteLowInt16x8(reinterpret_cast<const uint16_t*>(mask) + 0, + src, dest); + src = dest; + } + if (op & PermuteX64I16x8Action::PERM_HIGH) { + masm.permuteHighInt16x8(reinterpret_cast<const uint16_t*>(mask) + 4, + src, dest); + src = dest; + } + } else { + const SimdConstant::I16x8& wmask = control.asInt16x8(); + uint8_t mask[16]; + for (unsigned i = 0; i < 16; i += 2) { + mask[i] = wmask[i / 2] * 2; + mask[i + 1] = wmask[i / 2] * 2 + 1; + } + masm.permuteInt8x16(mask, src, dest); + } + break; + } + case SimdPermuteOp::PERMUTE_32x4: { + const SimdConstant::I32x4& mask = control.asInt32x4(); + if (Assembler::HasAVX2() && mask[0] == 0 && mask[1] == 0 && + mask[2] == 0 && mask[3] == 0) { + masm.vbroadcastd(Operand(src), dest); + break; + } +# ifdef DEBUG + DebugOnly<int> i; + for (i = 0; i < 4 && mask[i] == i; i++) { + } + MOZ_ASSERT(i < 4, "Should have been a MOVE operation"); +# endif + masm.permuteInt32x4(reinterpret_cast<const uint32_t*>(mask), src, dest); + break; + } + case SimdPermuteOp::ROTATE_RIGHT_8x16: { + MOZ_ASSERT_IF(!Assembler::HasAVX(), src == dest); + int8_t count = control.asInt8x16()[0]; + MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); + masm.concatAndRightShiftSimd128(src, src, dest, count); + break; + } + case SimdPermuteOp::SHIFT_LEFT_8x16: { + int8_t count = control.asInt8x16()[0]; + MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); + masm.leftShiftSimd128(Imm32(count), src, dest); + break; + } + case SimdPermuteOp::SHIFT_RIGHT_8x16: { + int8_t count = control.asInt8x16()[0]; + MOZ_ASSERT(count > 0, "Should have been a MOVE operation"); + masm.rightShiftSimd128(Imm32(count), src, dest); + break; + } + case SimdPermuteOp::REVERSE_16x8: + masm.reverseInt16x8(src, dest); + break; + case SimdPermuteOp::REVERSE_32x4: + masm.reverseInt32x4(src, dest); + break; + case SimdPermuteOp::REVERSE_64x2: + masm.reverseInt64x2(src, dest); + break; + default: { + MOZ_CRASH("Unsupported SIMD permutation operation"); + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister lhs = ToFloatRegister(ins->lhsDest()); + FloatRegister dest = ToFloatRegister(ins->output()); + const LAllocation* rhs = ins->rhs(); + uint32_t laneIndex = ins->laneIndex(); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16ReplaceLane: + masm.replaceLaneInt8x16(laneIndex, lhs, ToRegister(rhs), dest); + break; + case wasm::SimdOp::I16x8ReplaceLane: + masm.replaceLaneInt16x8(laneIndex, lhs, ToRegister(rhs), dest); + break; + case wasm::SimdOp::I32x4ReplaceLane: + masm.replaceLaneInt32x4(laneIndex, lhs, ToRegister(rhs), dest); + break; + case wasm::SimdOp::F32x4ReplaceLane: + masm.replaceLaneFloat32x4(laneIndex, lhs, ToFloatRegister(rhs), dest); + break; + case wasm::SimdOp::F64x2ReplaceLane: + masm.replaceLaneFloat64x2(laneIndex, lhs, ToFloatRegister(rhs), dest); + break; + default: + MOZ_CRASH("ReplaceLane SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReplaceInt64LaneSimd128( + LWasmReplaceInt64LaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_RELEASE_ASSERT(ins->simdOp() == wasm::SimdOp::I64x2ReplaceLane); + masm.replaceLaneInt64x2(ins->laneIndex(), ToFloatRegister(ins->lhs()), + ToRegister64(ins->rhs()), + ToFloatRegister(ins->output())); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Splat: + masm.splatX16(ToRegister(ins->src()), dest); + break; + case wasm::SimdOp::I16x8Splat: + masm.splatX8(ToRegister(ins->src()), dest); + break; + case wasm::SimdOp::I32x4Splat: + masm.splatX4(ToRegister(ins->src()), dest); + break; + case wasm::SimdOp::F32x4Splat: + masm.splatX4(ToFloatRegister(ins->src()), dest); + break; + case wasm::SimdOp::F64x2Splat: + masm.splatX2(ToFloatRegister(ins->src()), dest); + break; + default: + MOZ_CRASH("ScalarToSimd128 SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + Register64 src = ToRegister64(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::I64x2Splat: + masm.splatX2(src, dest); + break; + case wasm::SimdOp::V128Load8x8S: + masm.moveGPR64ToDouble(src, dest); + masm.widenLowInt8x16(dest, dest); + break; + case wasm::SimdOp::V128Load8x8U: + masm.moveGPR64ToDouble(src, dest); + masm.unsignedWidenLowInt8x16(dest, dest); + break; + case wasm::SimdOp::V128Load16x4S: + masm.moveGPR64ToDouble(src, dest); + masm.widenLowInt16x8(dest, dest); + break; + case wasm::SimdOp::V128Load16x4U: + masm.moveGPR64ToDouble(src, dest); + masm.unsignedWidenLowInt16x8(dest, dest); + break; + case wasm::SimdOp::V128Load32x2S: + masm.moveGPR64ToDouble(src, dest); + masm.widenLowInt32x4(dest, dest); + break; + case wasm::SimdOp::V128Load32x2U: + masm.moveGPR64ToDouble(src, dest); + masm.unsignedWidenLowInt32x4(dest, dest); + break; + default: + MOZ_CRASH("Int64ToSimd128 SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + FloatRegister dest = ToFloatRegister(ins->output()); + + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Neg: + masm.negInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8Neg: + masm.negInt16x8(src, dest); + break; + case wasm::SimdOp::I16x8ExtendLowI8x16S: + masm.widenLowInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ExtendHighI8x16S: + masm.widenHighInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ExtendLowI8x16U: + masm.unsignedWidenLowInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ExtendHighI8x16U: + masm.unsignedWidenHighInt8x16(src, dest); + break; + case wasm::SimdOp::I32x4Neg: + masm.negInt32x4(src, dest); + break; + case wasm::SimdOp::I32x4ExtendLowI16x8S: + masm.widenLowInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ExtendHighI16x8S: + masm.widenHighInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ExtendLowI16x8U: + masm.unsignedWidenLowInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ExtendHighI16x8U: + masm.unsignedWidenHighInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4TruncSatF32x4S: + masm.truncSatFloat32x4ToInt32x4(src, dest); + break; + case wasm::SimdOp::I32x4TruncSatF32x4U: + masm.unsignedTruncSatFloat32x4ToInt32x4(src, dest, + ToFloatRegister(ins->temp())); + break; + case wasm::SimdOp::I64x2Neg: + masm.negInt64x2(src, dest); + break; + case wasm::SimdOp::I64x2ExtendLowI32x4S: + masm.widenLowInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2ExtendHighI32x4S: + masm.widenHighInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2ExtendLowI32x4U: + masm.unsignedWidenLowInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2ExtendHighI32x4U: + masm.unsignedWidenHighInt32x4(src, dest); + break; + case wasm::SimdOp::F32x4Abs: + masm.absFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Neg: + masm.negFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Sqrt: + masm.sqrtFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4ConvertI32x4S: + masm.convertInt32x4ToFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4ConvertI32x4U: + masm.unsignedConvertInt32x4ToFloat32x4(src, dest); + break; + case wasm::SimdOp::F64x2Abs: + masm.absFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Neg: + masm.negFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Sqrt: + masm.sqrtFloat64x2(src, dest); + break; + case wasm::SimdOp::V128Not: + masm.bitwiseNotSimd128(src, dest); + break; + case wasm::SimdOp::I8x16Popcnt: + masm.popcntInt8x16(src, dest, ToFloatRegister(ins->temp())); + break; + case wasm::SimdOp::I8x16Abs: + masm.absInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8Abs: + masm.absInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4Abs: + masm.absInt32x4(src, dest); + break; + case wasm::SimdOp::I64x2Abs: + masm.absInt64x2(src, dest); + break; + case wasm::SimdOp::F32x4Ceil: + masm.ceilFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Floor: + masm.floorFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Trunc: + masm.truncFloat32x4(src, dest); + break; + case wasm::SimdOp::F32x4Nearest: + masm.nearestFloat32x4(src, dest); + break; + case wasm::SimdOp::F64x2Ceil: + masm.ceilFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Floor: + masm.floorFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Trunc: + masm.truncFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2Nearest: + masm.nearestFloat64x2(src, dest); + break; + case wasm::SimdOp::F32x4DemoteF64x2Zero: + masm.convertFloat64x2ToFloat32x4(src, dest); + break; + case wasm::SimdOp::F64x2PromoteLowF32x4: + masm.convertFloat32x4ToFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2ConvertLowI32x4S: + masm.convertInt32x4ToFloat64x2(src, dest); + break; + case wasm::SimdOp::F64x2ConvertLowI32x4U: + masm.unsignedConvertInt32x4ToFloat64x2(src, dest); + break; + case wasm::SimdOp::I32x4TruncSatF64x2SZero: + masm.truncSatFloat64x2ToInt32x4(src, dest, ToFloatRegister(ins->temp())); + break; + case wasm::SimdOp::I32x4TruncSatF64x2UZero: + masm.unsignedTruncSatFloat64x2ToInt32x4(src, dest, + ToFloatRegister(ins->temp())); + break; + case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S: + masm.extAddPairwiseInt8x16(src, dest); + break; + case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U: + masm.unsignedExtAddPairwiseInt8x16(src, dest); + break; + case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S: + masm.extAddPairwiseInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U: + masm.unsignedExtAddPairwiseInt16x8(src, dest); + break; + case wasm::SimdOp::I32x4RelaxedTruncF32x4S: + masm.truncFloat32x4ToInt32x4Relaxed(src, dest); + break; + case wasm::SimdOp::I32x4RelaxedTruncF32x4U: + masm.unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest); + break; + case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero: + masm.truncFloat64x2ToInt32x4Relaxed(src, dest); + break; + case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero: + masm.unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest); + break; + default: + MOZ_CRASH("Unary SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + const LDefinition* dest = ins->output(); + uint32_t imm = ins->imm(); + + switch (ins->simdOp()) { + case wasm::SimdOp::V128AnyTrue: + masm.anyTrueSimd128(src, ToRegister(dest)); + break; + case wasm::SimdOp::I8x16AllTrue: + masm.allTrueInt8x16(src, ToRegister(dest)); + break; + case wasm::SimdOp::I16x8AllTrue: + masm.allTrueInt16x8(src, ToRegister(dest)); + break; + case wasm::SimdOp::I32x4AllTrue: + masm.allTrueInt32x4(src, ToRegister(dest)); + break; + case wasm::SimdOp::I64x2AllTrue: + masm.allTrueInt64x2(src, ToRegister(dest)); + break; + case wasm::SimdOp::I8x16Bitmask: + masm.bitmaskInt8x16(src, ToRegister(dest)); + break; + case wasm::SimdOp::I16x8Bitmask: + masm.bitmaskInt16x8(src, ToRegister(dest)); + break; + case wasm::SimdOp::I32x4Bitmask: + masm.bitmaskInt32x4(src, ToRegister(dest)); + break; + case wasm::SimdOp::I64x2Bitmask: + masm.bitmaskInt64x2(src, ToRegister(dest)); + break; + case wasm::SimdOp::I8x16ExtractLaneS: + masm.extractLaneInt8x16(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::I8x16ExtractLaneU: + masm.unsignedExtractLaneInt8x16(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::I16x8ExtractLaneS: + masm.extractLaneInt16x8(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::I16x8ExtractLaneU: + masm.unsignedExtractLaneInt16x8(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::I32x4ExtractLane: + masm.extractLaneInt32x4(imm, src, ToRegister(dest)); + break; + case wasm::SimdOp::F32x4ExtractLane: + masm.extractLaneFloat32x4(imm, src, ToFloatRegister(dest)); + break; + case wasm::SimdOp::F64x2ExtractLane: + masm.extractLaneFloat64x2(imm, src, ToFloatRegister(dest)); + break; + default: + MOZ_CRASH("Reduce SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReduceAndBranchSimd128( + LWasmReduceAndBranchSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + + switch (ins->simdOp()) { + case wasm::SimdOp::V128AnyTrue: + // Set the zero flag if all of the lanes are zero, and branch on that. + masm.vptest(src, src); + emitBranch(Assembler::NotEqual, ins->ifTrue(), ins->ifFalse()); + break; + case wasm::SimdOp::I8x16AllTrue: + case wasm::SimdOp::I16x8AllTrue: + case wasm::SimdOp::I32x4AllTrue: + case wasm::SimdOp::I64x2AllTrue: { + // Compare all lanes to zero, set the zero flag if none of the lanes are + // zero, and branch on that. + ScratchSimd128Scope tmp(masm); + masm.vpxor(tmp, tmp, tmp); + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16AllTrue: + masm.vpcmpeqb(Operand(src), tmp, tmp); + break; + case wasm::SimdOp::I16x8AllTrue: + masm.vpcmpeqw(Operand(src), tmp, tmp); + break; + case wasm::SimdOp::I32x4AllTrue: + masm.vpcmpeqd(Operand(src), tmp, tmp); + break; + case wasm::SimdOp::I64x2AllTrue: + masm.vpcmpeqq(Operand(src), tmp, tmp); + break; + default: + MOZ_CRASH(); + } + masm.vptest(tmp, tmp); + emitBranch(Assembler::Equal, ins->ifTrue(), ins->ifFalse()); + break; + } + case wasm::SimdOp::I16x8Bitmask: { + masm.bitwiseTestSimd128(SimdConstant::SplatX8(0x8000), src); + emitBranch(Assembler::NotEqual, ins->ifTrue(), ins->ifFalse()); + break; + } + default: + MOZ_CRASH("Reduce-and-branch SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmReduceSimd128ToInt64( + LWasmReduceSimd128ToInt64* ins) { +#ifdef ENABLE_WASM_SIMD + FloatRegister src = ToFloatRegister(ins->src()); + Register64 dest = ToOutRegister64(ins); + uint32_t imm = ins->imm(); + + switch (ins->simdOp()) { + case wasm::SimdOp::I64x2ExtractLane: + masm.extractLaneInt64x2(imm, src, dest); + break; + default: + MOZ_CRASH("Reduce SimdOp not implemented"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + const MWasmLoadLaneSimd128* mir = ins->mir(); + const wasm::MemoryAccessDesc& access = mir->access(); + + uint32_t offset = access.offset(); + MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); + + const LAllocation* value = ins->src(); + Operand srcAddr = toMemoryAccessOperand(ins, offset); + + masm.append(access, masm.size()); + switch (ins->laneSize()) { + case 1: { + masm.vpinsrb(ins->laneIndex(), srcAddr, ToFloatRegister(value), + ToFloatRegister(value)); + break; + } + case 2: { + masm.vpinsrw(ins->laneIndex(), srcAddr, ToFloatRegister(value), + ToFloatRegister(value)); + break; + } + case 4: { + masm.vinsertps(ins->laneIndex() << 4, srcAddr, ToFloatRegister(value), + ToFloatRegister(value)); + break; + } + case 8: { + if (ins->laneIndex() == 0) { + masm.vmovlps(srcAddr, ToFloatRegister(value), ToFloatRegister(value)); + } else { + masm.vmovhps(srcAddr, ToFloatRegister(value), ToFloatRegister(value)); + } + break; + } + default: + MOZ_CRASH("Unsupported load lane size"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + const MWasmStoreLaneSimd128* mir = ins->mir(); + const wasm::MemoryAccessDesc& access = mir->access(); + + uint32_t offset = access.offset(); + MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); + + const LAllocation* src = ins->src(); + Operand destAddr = toMemoryAccessOperand(ins, offset); + + masm.append(access, masm.size()); + switch (ins->laneSize()) { + case 1: { + masm.vpextrb(ins->laneIndex(), ToFloatRegister(src), destAddr); + break; + } + case 2: { + masm.vpextrw(ins->laneIndex(), ToFloatRegister(src), destAddr); + break; + } + case 4: { + unsigned lane = ins->laneIndex(); + if (lane == 0) { + masm.vmovss(ToFloatRegister(src), destAddr); + } else { + masm.vextractps(lane, ToFloatRegister(src), destAddr); + } + break; + } + case 8: { + if (ins->laneIndex() == 0) { + masm.vmovlps(ToFloatRegister(src), destAddr); + } else { + masm.vmovhps(ToFloatRegister(src), destAddr); + } + break; + } + default: + MOZ_CRASH("Unsupported store lane size"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +} // namespace jit +} // namespace js diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h new file mode 100644 index 0000000000..74bcb91149 --- /dev/null +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h @@ -0,0 +1,189 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_CodeGenerator_x86_shared_h +#define jit_x86_shared_CodeGenerator_x86_shared_h + +#include "jit/shared/CodeGenerator-shared.h" +#include "js/ScalarType.h" // js::Scalar::Type + +namespace js { +namespace jit { + +class CodeGeneratorX86Shared; +class OutOfLineBailout; +class OutOfLineUndoALUOperation; +class OutOfLineLoadTypedArrayOutOfBounds; +class MulNegativeZeroCheck; +class ModOverflowCheck; +class ReturnZero; +class OutOfLineTableSwitch; + +using OutOfLineWasmTruncateCheck = + OutOfLineWasmTruncateCheckBase<CodeGeneratorX86Shared>; + +class CodeGeneratorX86Shared : public CodeGeneratorShared { + friend class MoveResolverX86; + + template <typename T> + void bailout(const T& t, LSnapshot* snapshot); + + protected: + CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, + MacroAssembler* masm); + + // Load a NaN or zero into a register for an out of bounds AsmJS or static + // typed array load. + class OutOfLineLoadTypedArrayOutOfBounds + : public OutOfLineCodeBase<CodeGeneratorX86Shared> { + AnyRegister dest_; + Scalar::Type viewType_; + + public: + OutOfLineLoadTypedArrayOutOfBounds(AnyRegister dest, Scalar::Type viewType) + : dest_(dest), viewType_(viewType) {} + + AnyRegister dest() const { return dest_; } + Scalar::Type viewType() const { return viewType_; } + void accept(CodeGeneratorX86Shared* codegen) override { + codegen->visitOutOfLineLoadTypedArrayOutOfBounds(this); + } + }; + + NonAssertingLabel deoptLabel_; + + Operand ToOperand(const LAllocation& a); + Operand ToOperand(const LAllocation* a); + Operand ToOperand(const LDefinition* def); + +#ifdef JS_PUNBOX64 + Operand ToOperandOrRegister64(const LInt64Allocation input); +#else + Register64 ToOperandOrRegister64(const LInt64Allocation input); +#endif + + MoveOperand toMoveOperand(LAllocation a) const; + + void bailoutIf(Assembler::Condition condition, LSnapshot* snapshot); + void bailoutIf(Assembler::DoubleCondition condition, LSnapshot* snapshot); + void bailoutFrom(Label* label, LSnapshot* snapshot); + void bailout(LSnapshot* snapshot); + + template <typename T1, typename T2> + void bailoutCmpPtr(Assembler::Condition c, T1 lhs, T2 rhs, + LSnapshot* snapshot) { + masm.cmpPtr(lhs, rhs); + bailoutIf(c, snapshot); + } + void bailoutTestPtr(Assembler::Condition c, Register lhs, Register rhs, + LSnapshot* snapshot) { + masm.testPtr(lhs, rhs); + bailoutIf(c, snapshot); + } + template <typename T1, typename T2> + void bailoutCmp32(Assembler::Condition c, T1 lhs, T2 rhs, + LSnapshot* snapshot) { + masm.cmp32(lhs, rhs); + bailoutIf(c, snapshot); + } + template <typename T1, typename T2> + void bailoutTest32(Assembler::Condition c, T1 lhs, T2 rhs, + LSnapshot* snapshot) { + masm.test32(lhs, rhs); + bailoutIf(c, snapshot); + } + void bailoutIfFalseBool(Register reg, LSnapshot* snapshot) { + masm.test32(reg, Imm32(0xFF)); + bailoutIf(Assembler::Zero, snapshot); + } + void bailoutCvttsd2si(FloatRegister src, Register dest, LSnapshot* snapshot) { + Label bail; + masm.truncateDoubleToInt32(src, dest, &bail); + bailoutFrom(&bail, snapshot); + } + void bailoutCvttss2si(FloatRegister src, Register dest, LSnapshot* snapshot) { + Label bail; + masm.truncateFloat32ToInt32(src, dest, &bail); + bailoutFrom(&bail, snapshot); + } + + bool generateOutOfLineCode(); + + void emitCompare(MCompare::CompareType type, const LAllocation* left, + const LAllocation* right); + + // Emits a branch that directs control flow to the true block if |cond| is + // true, and the false block if |cond| is false. + void emitBranch(Assembler::Condition cond, MBasicBlock* ifTrue, + MBasicBlock* ifFalse, + Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond); + void emitBranch(Assembler::DoubleCondition cond, MBasicBlock* ifTrue, + MBasicBlock* ifFalse); + + void testNullEmitBranch(Assembler::Condition cond, const ValueOperand& value, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) { + cond = masm.testNull(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + void testUndefinedEmitBranch(Assembler::Condition cond, + const ValueOperand& value, MBasicBlock* ifTrue, + MBasicBlock* ifFalse) { + cond = masm.testUndefined(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + void testObjectEmitBranch(Assembler::Condition cond, + const ValueOperand& value, MBasicBlock* ifTrue, + MBasicBlock* ifFalse) { + cond = masm.testObject(cond, value); + emitBranch(cond, ifTrue, ifFalse); + } + + void testZeroEmitBranch(Assembler::Condition cond, Register reg, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + masm.cmpPtr(reg, ImmWord(0)); + emitBranch(cond, ifTrue, ifFalse); + } + + void emitTableSwitchDispatch(MTableSwitch* mir, Register index, + Register base); + + void generateInvalidateEpilogue(); + + void canonicalizeIfDeterministic(Scalar::Type type, const LAllocation* value); + + template <typename T> + Operand toMemoryAccessOperand(T* lir, int32_t disp); + + public: + // Out of line visitors. + void visitOutOfLineBailout(OutOfLineBailout* ool); + void visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation* ool); + void visitMulNegativeZeroCheck(MulNegativeZeroCheck* ool); + void visitModOverflowCheck(ModOverflowCheck* ool); + void visitReturnZero(ReturnZero* ool); + void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool); + void visitOutOfLineLoadTypedArrayOutOfBounds( + OutOfLineLoadTypedArrayOutOfBounds* ool); + void visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool); +}; + +// An out-of-line bailout thunk. +class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorX86Shared> { + LSnapshot* snapshot_; + + public: + explicit OutOfLineBailout(LSnapshot* snapshot) : snapshot_(snapshot) {} + + void accept(CodeGeneratorX86Shared* codegen) override; + + LSnapshot* snapshot() const { return snapshot_; } +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_CodeGenerator_x86_shared_h */ diff --git a/js/src/jit/x86-shared/Constants-x86-shared.h b/js/src/jit/x86-shared/Constants-x86-shared.h new file mode 100644 index 0000000000..6c59515b21 --- /dev/null +++ b/js/src/jit/x86-shared/Constants-x86-shared.h @@ -0,0 +1,326 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Constants_x86_shared_h +#define jit_x86_shared_Constants_x86_shared_h + +#include "mozilla/Assertions.h" + +#include <iterator> +#include <stddef.h> +#include <stdint.h> + +namespace js { +namespace jit { + +namespace X86Encoding { + +enum RegisterID : uint8_t { + rax, + rcx, + rdx, + rbx, + rsp, + rbp, + rsi, + rdi +#ifdef JS_CODEGEN_X64 + , + r8, + r9, + r10, + r11, + r12, + r13, + r14, + r15 +#endif + , + invalid_reg +}; + +enum HRegisterID { ah = rsp, ch = rbp, dh = rsi, bh = rdi }; + +enum XMMRegisterID +// GCC < 8.0 has a bug with bitfields of enums with an underlying type. +#if defined(__clang__) || __GNUC__ > 7 + : uint8_t +#endif +{ + xmm0 = 0, + xmm1, + xmm2, + xmm3, + xmm4, + xmm5, + xmm6, + xmm7 +#ifdef JS_CODEGEN_X64 + , + xmm8, + xmm9, + xmm10, + xmm11, + xmm12, + xmm13, + xmm14, + xmm15 +#endif + , + invalid_xmm +}; + +inline const char* XMMRegName(XMMRegisterID reg) { + static const char* const names[] = {"%xmm0", + "%xmm1", + "%xmm2", + "%xmm3", + "%xmm4", + "%xmm5", + "%xmm6", + "%xmm7" +#ifdef JS_CODEGEN_X64 + , + "%xmm8", + "%xmm9", + "%xmm10", + "%xmm11", + "%xmm12", + "%xmm13", + "%xmm14", + "%xmm15" +#endif + }; + MOZ_ASSERT(size_t(reg) < std::size(names)); + return names[reg]; +} + +#ifdef JS_CODEGEN_X64 +inline const char* GPReg64Name(RegisterID reg) { + static const char* const names[] = {"%rax", + "%rcx", + "%rdx", + "%rbx", + "%rsp", + "%rbp", + "%rsi", + "%rdi" +# ifdef JS_CODEGEN_X64 + , + "%r8", + "%r9", + "%r10", + "%r11", + "%r12", + "%r13", + "%r14", + "%r15" +# endif + }; + MOZ_ASSERT(size_t(reg) < std::size(names)); + return names[reg]; +} +#endif + +inline const char* GPReg32Name(RegisterID reg) { + static const char* const names[] = {"%eax", + "%ecx", + "%edx", + "%ebx", + "%esp", + "%ebp", + "%esi", + "%edi" +#ifdef JS_CODEGEN_X64 + , + "%r8d", + "%r9d", + "%r10d", + "%r11d", + "%r12d", + "%r13d", + "%r14d", + "%r15d" +#endif + }; + MOZ_ASSERT(size_t(reg) < std::size(names)); + return names[reg]; +} + +inline const char* GPReg16Name(RegisterID reg) { + static const char* const names[] = {"%ax", + "%cx", + "%dx", + "%bx", + "%sp", + "%bp", + "%si", + "%di" +#ifdef JS_CODEGEN_X64 + , + "%r8w", + "%r9w", + "%r10w", + "%r11w", + "%r12w", + "%r13w", + "%r14w", + "%r15w" +#endif + }; + MOZ_ASSERT(size_t(reg) < std::size(names)); + return names[reg]; +} + +inline const char* GPReg8Name(RegisterID reg) { + static const char* const names[] = {"%al", + "%cl", + "%dl", + "%bl" +#ifdef JS_CODEGEN_X64 + , + "%spl", + "%bpl", + "%sil", + "%dil", + "%r8b", + "%r9b", + "%r10b", + "%r11b", + "%r12b", + "%r13b", + "%r14b", + "%r15b" +#endif + }; + MOZ_ASSERT(size_t(reg) < std::size(names)); + return names[reg]; +} + +inline const char* GPRegName(RegisterID reg) { +#ifdef JS_CODEGEN_X64 + return GPReg64Name(reg); +#else + return GPReg32Name(reg); +#endif +} + +inline bool HasSubregL(RegisterID reg) { +#ifdef JS_CODEGEN_X64 + // In 64-bit mode, all registers have an 8-bit lo subreg. + return true; +#else + // In 32-bit mode, only the first four registers do. + return reg <= rbx; +#endif +} + +inline bool HasSubregH(RegisterID reg) { + // The first four registers always have h registers. However, note that + // on x64, h registers may not be used in instructions using REX + // prefixes. Also note that this may depend on what other registers are + // used! + return reg <= rbx; +} + +inline HRegisterID GetSubregH(RegisterID reg) { + MOZ_ASSERT(HasSubregH(reg)); + return HRegisterID(reg + 4); +} + +inline const char* HRegName8(HRegisterID reg) { + static const char* const names[] = {"%ah", "%ch", "%dh", "%bh"}; + size_t index = reg - GetSubregH(rax); + MOZ_ASSERT(index < std::size(names)); + return names[index]; +} + +enum Condition { + ConditionO, + ConditionNO, + ConditionB, + ConditionAE, + ConditionE, + ConditionNE, + ConditionBE, + ConditionA, + ConditionS, + ConditionNS, + ConditionP, + ConditionNP, + ConditionL, + ConditionGE, + ConditionLE, + ConditionG, + + ConditionC = ConditionB, + ConditionNC = ConditionAE +}; + +inline const char* CCName(Condition cc) { + static const char* const names[] = {"o ", "no", "b ", "ae", "e ", "ne", + "be", "a ", "s ", "ns", "p ", "np", + "l ", "ge", "le", "g "}; + MOZ_ASSERT(size_t(cc) < std::size(names)); + return names[cc]; +} + +// Conditions for CMP instructions (CMPSS, CMPSD, CMPPS, CMPPD, etc). +enum ConditionCmp { + ConditionCmp_EQ = 0x0, + ConditionCmp_LT = 0x1, + ConditionCmp_LE = 0x2, + ConditionCmp_UNORD = 0x3, + ConditionCmp_NEQ = 0x4, + ConditionCmp_NLT = 0x5, + ConditionCmp_NLE = 0x6, + ConditionCmp_ORD = 0x7, + ConditionCmp_AVX_Enabled = 0x8, + ConditionCmp_GE = 0xD, +}; + +// Rounding modes for ROUNDSS / ROUNDSD. +enum RoundingMode { + RoundToNearest = 0x0, + RoundDown = 0x1, + RoundUp = 0x2, + RoundToZero = 0x3 +}; + +// Rounding modes for ROUNDPS / ROUNDPD. Note these are the same as for +// RoundingMode above but incorporate the 'inexact' bit which says not to signal +// exceptions for lost precision. It's not obvious that this bit is needed; it +// was however suggested in the wasm SIMD proposal that led to these encodings. +enum class SSERoundingMode { + RoundToNearest = 0x08, + RoundDown = 0x09, + RoundUp = 0x0A, + RoundToZero = 0x0B +}; + +// Test whether the given address will fit in an address immediate field. +// This is always true on x86, but on x64 it's only true for addreses which +// fit in the 32-bit immediate field. +inline bool IsAddressImmediate(const void* address) { + intptr_t value = reinterpret_cast<intptr_t>(address); + int32_t immediate = static_cast<int32_t>(value); + return value == immediate; +} + +// Convert the given address to a 32-bit immediate field value. This is a +// no-op on x86, but on x64 it asserts that the address is actually a valid +// address immediate. +inline int32_t AddressImmediate(const void* address) { + MOZ_ASSERT(IsAddressImmediate(address)); + return static_cast<int32_t>(reinterpret_cast<intptr_t>(address)); +} + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Constants_x86_shared_h */ diff --git a/js/src/jit/x86-shared/Encoding-x86-shared.h b/js/src/jit/x86-shared/Encoding-x86-shared.h new file mode 100644 index 0000000000..d3d78f4e5e --- /dev/null +++ b/js/src/jit/x86-shared/Encoding-x86-shared.h @@ -0,0 +1,508 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Encoding_x86_shared_h +#define jit_x86_shared_Encoding_x86_shared_h + +#include <type_traits> + +#include "jit/x86-shared/Constants-x86-shared.h" + +namespace js { +namespace jit { + +namespace X86Encoding { + +static const size_t MaxInstructionSize = 16; + +// These enumerated values are following the Intel documentation Volume 2C [1], +// Appendix A.2 and Appendix A.3. +// +// Operand size/types as listed in the Appendix A.2. Tables of the instructions +// and their operands can be found in the Appendix A.3. +// +// B = reg (VEX.vvvv of VEX prefix) +// E = reg/mem +// G = reg (reg field of ModR/M) +// U = xmm (R/M field of ModR/M) +// V = xmm (reg field of ModR/M) +// W = xmm/mem64 +// I = immediate +// O = offset +// +// b = byte (8-bit) +// w = word (16-bit) +// v = register size +// d = double (32-bit) +// dq = double-quad (128-bit) (xmm) +// ss = scalar float 32 (xmm) +// ps = packed float 32 (xmm) +// sd = scalar double (xmm) +// pd = packed double (xmm) +// y = 32/64-bit +// z = 16/32/64-bit +// vqp = (*) +// +// (*) Some website [2] provides a convenient list of all instructions, but be +// aware that they do not follow the Intel documentation naming, as the +// following enumeration does. Do not use these names as a reference for adding +// new instructions. +// +// [1] +// http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-manual-325462.html +// [2] http://ref.x86asm.net/geek.html +// +// OPn_NAME_DstSrc +enum OneByteOpcodeID { + OP_NOP_00 = 0x00, + OP_ADD_EbGb = 0x00, + OP_ADD_EvGv = 0x01, + OP_ADD_GvEv = 0x03, + OP_ADD_EAXIv = 0x05, + OP_OR_EbGb = 0x08, + OP_OR_EvGv = 0x09, + OP_OR_GvEv = 0x0B, + OP_OR_EAXIv = 0x0D, + OP_2BYTE_ESCAPE = 0x0F, + OP_NOP_0F = 0x0F, + OP_ADC_GvEv = 0x13, + OP_SBB_GvEv = 0x1B, + OP_NOP_1F = 0x1F, + OP_AND_EbGb = 0x20, + OP_AND_EvGv = 0x21, + OP_AND_GvEv = 0x23, + OP_AND_EAXIv = 0x25, + OP_SUB_EbGb = 0x28, + OP_SUB_EvGv = 0x29, + OP_SUB_GvEv = 0x2B, + OP_SUB_EAXIv = 0x2D, + PRE_PREDICT_BRANCH_NOT_TAKEN = 0x2E, + OP_XOR_EbGb = 0x30, + OP_XOR_EvGv = 0x31, + OP_XOR_GvEv = 0x33, + OP_XOR_EAXIv = 0x35, + OP_CMP_EbGb = 0x38, + OP_CMP_EvGv = 0x39, + OP_CMP_GbEb = 0x3A, + OP_CMP_GvEv = 0x3B, + OP_CMP_EAXIb = 0x3C, + OP_CMP_EAXIv = 0x3D, +#ifdef JS_CODEGEN_X64 + PRE_REX = 0x40, +#endif + OP_NOP_40 = 0x40, + OP_NOP_44 = 0x44, + OP_PUSH_EAX = 0x50, + OP_POP_EAX = 0x58, +#ifdef JS_CODEGEN_X86 + OP_PUSHA = 0x60, + OP_POPA = 0x61, +#endif +#ifdef JS_CODEGEN_X64 + OP_MOVSXD_GvEv = 0x63, +#endif + PRE_OPERAND_SIZE = 0x66, + PRE_SSE_66 = 0x66, + OP_NOP_66 = 0x66, + OP_PUSH_Iz = 0x68, + OP_IMUL_GvEvIz = 0x69, + OP_PUSH_Ib = 0x6a, + OP_IMUL_GvEvIb = 0x6b, + OP_JCC_rel8 = 0x70, + OP_GROUP1_EbIb = 0x80, + OP_NOP_80 = 0x80, + OP_GROUP1_EvIz = 0x81, + OP_GROUP1_EvIb = 0x83, + OP_TEST_EbGb = 0x84, + OP_NOP_84 = 0x84, + OP_TEST_EvGv = 0x85, + OP_XCHG_GbEb = 0x86, + OP_XCHG_GvEv = 0x87, + OP_MOV_EbGv = 0x88, + OP_MOV_EvGv = 0x89, + OP_MOV_GvEb = 0x8A, + OP_MOV_GvEv = 0x8B, + OP_LEA = 0x8D, + OP_GROUP1A_Ev = 0x8F, + OP_NOP = 0x90, + OP_PUSHFLAGS = 0x9C, + OP_POPFLAGS = 0x9D, + OP_CDQ = 0x99, + OP_MOV_EAXOv = 0xA1, + OP_MOV_OvEAX = 0xA3, + OP_TEST_EAXIb = 0xA8, + OP_TEST_EAXIv = 0xA9, + OP_MOV_EbIb = 0xB0, + OP_MOV_EAXIv = 0xB8, + OP_GROUP2_EvIb = 0xC1, + OP_ADDP_ST0_ST1 = 0xC1, + OP_RET_Iz = 0xC2, + PRE_VEX_C4 = 0xC4, + PRE_VEX_C5 = 0xC5, + OP_RET = 0xC3, + OP_GROUP11_EvIb = 0xC6, + OP_GROUP11_EvIz = 0xC7, + OP_INT3 = 0xCC, + OP_GROUP2_Ev1 = 0xD1, + OP_GROUP2_EvCL = 0xD3, + OP_FPU6 = 0xDD, + OP_FPU6_F32 = 0xD9, + OP_FPU6_ADDP = 0xDE, + OP_FILD = 0xDF, + OP_CALL_rel32 = 0xE8, + OP_JMP_rel32 = 0xE9, + OP_JMP_rel8 = 0xEB, + PRE_LOCK = 0xF0, + PRE_SSE_F2 = 0xF2, + PRE_SSE_F3 = 0xF3, + OP_HLT = 0xF4, + OP_GROUP3_EbIb = 0xF6, + OP_GROUP3_Ev = 0xF7, + OP_GROUP3_EvIz = + 0xF7, // OP_GROUP3_Ev has an immediate, when instruction is a test. + OP_GROUP5_Ev = 0xFF +}; + +enum class ShiftID { + vpsrlx = 2, + vpsrldq = 3, + vpsrad = 4, + vpsllx = 6, + vpslldq = 7 +}; + +enum TwoByteOpcodeID { + OP2_UD2 = 0x0B, + OP2_MOVSD_VsdWsd = 0x10, + OP2_MOVPS_VpsWps = 0x10, + OP2_MOVSD_WsdVsd = 0x11, + OP2_MOVPS_WpsVps = 0x11, + OP2_MOVDDUP_VqWq = 0x12, + OP2_MOVHLPS_VqUq = 0x12, + OP2_MOVSLDUP_VpsWps = 0x12, + OP2_MOVLPS_VqEq = 0x12, + OP2_MOVLPS_EqVq = 0x13, + OP2_UNPCKLPS_VsdWsd = 0x14, + OP2_UNPCKHPS_VsdWsd = 0x15, + OP2_MOVLHPS_VqUq = 0x16, + OP2_MOVSHDUP_VpsWps = 0x16, + OP2_MOVHPS_VqEq = 0x16, + OP2_MOVHPS_EqVq = 0x17, + OP2_MOVAPD_VsdWsd = 0x28, + OP2_MOVAPS_VsdWsd = 0x28, + OP2_MOVAPS_WsdVsd = 0x29, + OP2_CVTSI2SD_VsdEd = 0x2A, + OP2_CVTTSD2SI_GdWsd = 0x2C, + OP2_UCOMISD_VsdWsd = 0x2E, + OP2_CMOVCC_GvEv = 0x40, + OP2_MOVMSKPD_EdVd = 0x50, + OP2_ANDPS_VpsWps = 0x54, + OP2_ANDNPS_VpsWps = 0x55, + OP2_ORPS_VpsWps = 0x56, + OP2_XORPS_VpsWps = 0x57, + OP2_ADDSD_VsdWsd = 0x58, + OP2_ADDPS_VpsWps = 0x58, + OP2_ADDPD_VpdWpd = 0x58, + OP2_MULSD_VsdWsd = 0x59, + OP2_MULPD_VpdWpd = 0x59, + OP2_MULPS_VpsWps = 0x59, + OP2_CVTSS2SD_VsdEd = 0x5A, + OP2_CVTSD2SS_VsdEd = 0x5A, + OP2_CVTPS2PD_VpdWps = 0x5A, + OP2_CVTPD2PS_VpsWpd = 0x5A, + OP2_CVTTPS2DQ_VdqWps = 0x5B, + OP2_CVTDQ2PS_VpsWdq = 0x5B, + OP2_SUBSD_VsdWsd = 0x5C, + OP2_SUBPS_VpsWps = 0x5C, + OP2_SUBPD_VpdWpd = 0x5C, + OP2_MINSD_VsdWsd = 0x5D, + OP2_MINSS_VssWss = 0x5D, + OP2_MINPS_VpsWps = 0x5D, + OP2_MINPD_VpdWpd = 0x5D, + OP2_DIVSD_VsdWsd = 0x5E, + OP2_DIVPS_VpsWps = 0x5E, + OP2_DIVPD_VpdWpd = 0x5E, + OP2_MAXSD_VsdWsd = 0x5F, + OP2_MAXSS_VssWss = 0x5F, + OP2_MAXPS_VpsWps = 0x5F, + OP2_MAXPD_VpdWpd = 0x5F, + OP2_SQRTSD_VsdWsd = 0x51, + OP2_SQRTSS_VssWss = 0x51, + OP2_SQRTPS_VpsWps = 0x51, + OP2_SQRTPD_VpdWpd = 0x51, + OP2_RSQRTPS_VpsWps = 0x52, + OP2_RCPPS_VpsWps = 0x53, + OP2_ANDPD_VpdWpd = 0x54, + OP2_ORPD_VpdWpd = 0x56, + OP2_XORPD_VpdWpd = 0x57, + OP2_PUNPCKLBW_VdqWdq = 0x60, + OP2_PUNPCKLWD_VdqWdq = 0x61, + OP2_PUNPCKLDQ_VdqWdq = 0x62, + OP2_PACKSSWB_VdqWdq = 0x63, + OP2_PCMPGTB_VdqWdq = 0x64, + OP2_PCMPGTW_VdqWdq = 0x65, + OP2_PCMPGTD_VdqWdq = 0x66, + OP2_PACKUSWB_VdqWdq = 0x67, + OP2_PUNPCKHBW_VdqWdq = 0x68, + OP2_PUNPCKHWD_VdqWdq = 0x69, + OP2_PUNPCKHDQ_VdqWdq = 0x6A, + OP2_PACKSSDW_VdqWdq = 0x6B, + OP2_PUNPCKLQDQ_VdqWdq = 0x6C, + OP2_PUNPCKHQDQ_VdqWdq = 0x6D, + OP2_MOVD_VdEd = 0x6E, + OP2_MOVDQ_VsdWsd = 0x6F, + OP2_MOVDQ_VdqWdq = 0x6F, + OP2_PSHUFD_VdqWdqIb = 0x70, + OP2_PSHUFLW_VdqWdqIb = 0x70, + OP2_PSHUFHW_VdqWdqIb = 0x70, + OP2_PSLLW_UdqIb = 0x71, + OP2_PSRAW_UdqIb = 0x71, + OP2_PSRLW_UdqIb = 0x71, + OP2_PSLLD_UdqIb = 0x72, + OP2_PSRAD_UdqIb = 0x72, + OP2_PSRLD_UdqIb = 0x72, + OP2_PSRLDQ_Vd = 0x73, + OP2_PCMPEQB_VdqWdq = 0x74, + OP2_PCMPEQW_VdqWdq = 0x75, + OP2_PCMPEQD_VdqWdq = 0x76, + OP2_HADDPD = 0x7C, + OP2_MOVD_EdVd = 0x7E, + OP2_MOVQ_VdWd = 0x7E, + OP2_MOVDQ_WdqVdq = 0x7F, + OP2_JCC_rel32 = 0x80, + OP_SETCC = 0x90, + OP2_SHLD = 0xA4, + OP2_SHLD_GvEv = 0xA5, + OP2_SHRD = 0xAC, + OP2_SHRD_GvEv = 0xAD, + OP_FENCE = 0xAE, + OP2_IMUL_GvEv = 0xAF, + OP2_CMPXCHG_GvEb = 0xB0, + OP2_CMPXCHG_GvEw = 0xB1, + OP2_POPCNT_GvEv = 0xB8, + OP2_BSF_GvEv = 0xBC, + OP2_TZCNT_GvEv = 0xBC, + OP2_BSR_GvEv = 0xBD, + OP2_LZCNT_GvEv = 0xBD, + OP2_MOVSX_GvEb = 0xBE, + OP2_MOVSX_GvEw = 0xBF, + OP2_MOVZX_GvEb = 0xB6, + OP2_MOVZX_GvEw = 0xB7, + OP2_XADD_EbGb = 0xC0, + OP2_XADD_EvGv = 0xC1, + OP2_CMPPS_VpsWps = 0xC2, + OP2_CMPPD_VpdWpd = 0xC2, + OP2_PINSRW = 0xC4, + OP2_PEXTRW_GdUdIb = 0xC5, + OP2_SHUFPS_VpsWpsIb = 0xC6, + OP2_SHUFPD_VpdWpdIb = 0xC6, + OP2_CMPXCHGNB = 0xC7, // CMPXCHG8B; CMPXCHG16B with REX + OP2_BSWAP = 0xC8, + OP2_PSRLW_VdqWdq = 0xD1, + OP2_PSRLD_VdqWdq = 0xD2, + OP2_PSRLQ_VdqWdq = 0xD3, + OP2_PADDQ_VdqWdq = 0xD4, + OP2_PMULLW_VdqWdq = 0xD5, + OP2_MOVQ_WdVd = 0xD6, + OP2_PMOVMSKB_EdVd = 0xD7, + OP2_PSUBUSB_VdqWdq = 0xD8, + OP2_PSUBUSW_VdqWdq = 0xD9, + OP2_PMINUB_VdqWdq = 0xDA, + OP2_PANDDQ_VdqWdq = 0xDB, + OP2_PADDUSB_VdqWdq = 0xDC, + OP2_PADDUSW_VdqWdq = 0xDD, + OP2_PMAXUB_VdqWdq = 0xDE, + OP2_PANDNDQ_VdqWdq = 0xDF, + OP2_PAVGB_VdqWdq = 0xE0, + OP2_PSRAW_VdqWdq = 0xE1, + OP2_PSRAD_VdqWdq = 0xE2, + OP2_PAVGW_VdqWdq = 0xE3, + OP2_PMULHUW_VdqWdq = 0xE4, + OP2_PMULHW_VdqWdq = 0xE5, + OP2_CVTDQ2PD_VpdWdq = 0xE6, + OP2_CVTTPD2DQ_VdqWpd = 0xE6, + OP2_PSUBSB_VdqWdq = 0xE8, + OP2_PSUBSW_VdqWdq = 0xE9, + OP2_PMINSW_VdqWdq = 0xEA, + OP2_PORDQ_VdqWdq = 0xEB, + OP2_PADDSB_VdqWdq = 0xEC, + OP2_PADDSW_VdqWdq = 0xED, + OP2_PMAXSW_VdqWdq = 0xEE, + OP2_PXORDQ_VdqWdq = 0xEF, + OP2_PSLLW_VdqWdq = 0xF1, + OP2_PSLLD_VdqWdq = 0xF2, + OP2_PSLLQ_VdqWdq = 0xF3, + OP2_PMULUDQ_VdqWdq = 0xF4, + OP2_PMADDWD_VdqWdq = 0xF5, + OP2_PSUBB_VdqWdq = 0xF8, + OP2_PSUBW_VdqWdq = 0xF9, + OP2_PSUBD_VdqWdq = 0xFA, + OP2_PSUBQ_VdqWdq = 0xFB, + OP2_PADDB_VdqWdq = 0xFC, + OP2_PADDW_VdqWdq = 0xFD, + OP2_PADDD_VdqWdq = 0xFE +}; + +enum ThreeByteOpcodeID { + OP3_PSHUFB_VdqWdq = 0x00, + OP3_PHADDD_VdqWdq = 0x02, + OP3_PMADDUBSW_VdqWdq = 0x04, + OP3_ROUNDPS_VpsWps = 0x08, + OP3_ROUNDPD_VpdWpd = 0x09, + OP3_ROUNDSS_VsdWsd = 0x0A, + OP3_ROUNDSD_VsdWsd = 0x0B, + OP3_PMULHRSW_VdqWdq = 0x0B, + OP3_BLENDPS_VpsWpsIb = 0x0C, + OP3_PBLENDW_VdqWdqIb = 0x0E, + OP3_PALIGNR_VdqWdqIb = 0x0F, + OP3_PBLENDVB_VdqWdq = 0x10, + OP3_BLENDVPS_VdqWdq = 0x14, + OP3_PEXTRB_EvVdqIb = 0x14, + OP3_PEXTRW_EwVdqIb = 0x15, + OP3_BLENDVPD_VdqWdq = 0x15, + OP3_PEXTRD_EvVdqIb = 0x16, + OP3_PEXTRQ_EvVdqIb = 0x16, + OP3_PTEST_VdVd = 0x17, + OP3_EXTRACTPS_EdVdqIb = 0x17, + OP3_VBROADCASTSS_VxWd = 0x18, + OP3_PABSB_VdqWdq = 0x1C, + OP3_PABSW_VdqWdq = 0x1D, + OP3_PABSD_VdqWdq = 0x1E, + OP3_PINSRB_VdqEvIb = 0x20, + OP3_PMOVSXBW_VdqWdq = 0x20, + OP3_INSERTPS_VpsUps = 0x21, + OP3_PINSRD_VdqEvIb = 0x22, + OP3_PINSRQ_VdqEvIb = 0x22, + OP3_PMOVSXWD_VdqWdq = 0x23, + OP3_PMOVSXDQ_VdqWdq = 0x25, + OP3_PMULDQ_VdqWdq = 0x28, + OP3_PCMPEQQ_VdqWdq = 0x29, + OP3_PACKUSDW_VdqWdq = 0x2B, + OP3_PMOVZXBW_VdqWdq = 0x30, + OP3_PMOVZXWD_VdqWdq = 0x33, + OP3_PMOVZXDQ_VdqWdq = 0x35, + OP3_PCMPGTQ_VdqWdq = 0x37, + OP3_PMINSB_VdqWdq = 0x38, + OP3_PMINSD_VdqWdq = 0x39, + OP3_PMINUW_VdqWdq = 0x3A, + OP3_PMINUD_VdqWdq = 0x3B, + OP3_PMAXSB_VdqWdq = 0x3C, + OP3_PMAXSD_VdqWdq = 0x3D, + OP3_PMAXUW_VdqWdq = 0x3E, + OP3_PMAXUD_VdqWdq = 0x3F, + OP3_PMULLD_VdqWdq = 0x40, + OP3_VBLENDVPS_VdqWdq = 0x4A, + OP3_VBLENDVPD_VdqWdq = 0x4B, + OP3_VPBLENDVB_VdqWdq = 0x4C, + OP3_VBROADCASTD_VxWx = 0x58, + OP3_VBROADCASTQ_VxWx = 0x59, + OP3_VBROADCASTB_VxWx = 0x78, + OP3_VBROADCASTW_VxWx = 0x79, + OP3_VFMADD231PS_VxHxWx = 0xB8, + OP3_VFMADD231PD_VxHxWx = 0xB8, + OP3_VFNMADD231PS_VxHxWx = 0xBC, + OP3_VFNMADD231PD_VxHxWx = 0xBC, + OP3_SHLX_GyEyBy = 0xF7, + OP3_SARX_GyEyBy = 0xF7, + OP3_SHRX_GyEyBy = 0xF7, +}; + +// Test whether the given opcode should be printed with its operands reversed. +inline bool IsXMMReversedOperands(TwoByteOpcodeID opcode) { + switch (opcode) { + case OP2_MOVSD_WsdVsd: // also OP2_MOVPS_WpsVps + case OP2_MOVAPS_WsdVsd: + case OP2_MOVDQ_WdqVdq: + return true; + default: + break; + } + return false; +} + +enum ThreeByteEscape { ESCAPE_38 = 0x38, ESCAPE_3A = 0x3A }; + +enum VexOperandType { VEX_PS = 0, VEX_PD = 1, VEX_SS = 2, VEX_SD = 3 }; + +inline OneByteOpcodeID jccRel8(Condition cond) { + return OneByteOpcodeID(OP_JCC_rel8 + std::underlying_type_t<Condition>(cond)); +} +inline TwoByteOpcodeID jccRel32(Condition cond) { + return TwoByteOpcodeID(OP2_JCC_rel32 + + std::underlying_type_t<Condition>(cond)); +} +inline TwoByteOpcodeID setccOpcode(Condition cond) { + return TwoByteOpcodeID(OP_SETCC + std::underlying_type_t<Condition>(cond)); +} +inline TwoByteOpcodeID cmovccOpcode(Condition cond) { + return TwoByteOpcodeID(OP2_CMOVCC_GvEv + + std::underlying_type_t<Condition>(cond)); +} + +enum GroupOpcodeID { + GROUP1_OP_ADD = 0, + GROUP1_OP_OR = 1, + GROUP1_OP_ADC = 2, + GROUP1_OP_SBB = 3, + GROUP1_OP_AND = 4, + GROUP1_OP_SUB = 5, + GROUP1_OP_XOR = 6, + GROUP1_OP_CMP = 7, + + GROUP1A_OP_POP = 0, + + GROUP2_OP_ROL = 0, + GROUP2_OP_ROR = 1, + GROUP2_OP_SHL = 4, + GROUP2_OP_SHR = 5, + GROUP2_OP_SAR = 7, + + GROUP3_OP_TEST = 0, + GROUP3_OP_NOT = 2, + GROUP3_OP_NEG = 3, + GROUP3_OP_MUL = 4, + GROUP3_OP_IMUL = 5, + GROUP3_OP_DIV = 6, + GROUP3_OP_IDIV = 7, + + GROUP5_OP_INC = 0, + GROUP5_OP_DEC = 1, + GROUP5_OP_CALLN = 2, + GROUP5_OP_JMPN = 4, + GROUP5_OP_PUSH = 6, + + FILD_OP_64 = 5, + + FPU6_OP_FLD = 0, + FPU6_OP_FISTTP = 1, + FPU6_OP_FSTP = 3, + FPU6_OP_FLDCW = 5, + FPU6_OP_FISTP = 7, + + GROUP11_MOV = 0 +}; + +static const RegisterID noBase = rbp; +static const RegisterID hasSib = rsp; +static const RegisterID noIndex = rsp; +#ifdef JS_CODEGEN_X64 +static const RegisterID noBase2 = r13; +static const RegisterID hasSib2 = r12; +#endif + +enum ModRmMode { + ModRmMemoryNoDisp, + ModRmMemoryDisp8, + ModRmMemoryDisp32, + ModRmRegister +}; + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Encoding_x86_shared_h */ diff --git a/js/src/jit/x86-shared/LIR-x86-shared.h b/js/src/jit/x86-shared/LIR-x86-shared.h new file mode 100644 index 0000000000..27f9f86468 --- /dev/null +++ b/js/src/jit/x86-shared/LIR-x86-shared.h @@ -0,0 +1,304 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_LIR_x86_shared_h +#define jit_x86_shared_LIR_x86_shared_h + +namespace js { +namespace jit { + +class LDivI : public LBinaryMath<1> { + public: + LIR_HEADER(DivI) + + LDivI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + setTemp(0, temp); + } + + const char* extraName() const { + if (mir()->isTruncated()) { + if (mir()->canBeNegativeZero()) { + return mir()->canBeNegativeOverflow() + ? "Truncate_NegativeZero_NegativeOverflow" + : "Truncate_NegativeZero"; + } + return mir()->canBeNegativeOverflow() ? "Truncate_NegativeOverflow" + : "Truncate"; + } + if (mir()->canBeNegativeZero()) { + return mir()->canBeNegativeOverflow() ? "NegativeZero_NegativeOverflow" + : "NegativeZero"; + } + return mir()->canBeNegativeOverflow() ? "NegativeOverflow" : nullptr; + } + + const LDefinition* remainder() { return getTemp(0); } + MDiv* mir() const { return mir_->toDiv(); } +}; + +// Signed division by a power-of-two constant. +class LDivPowTwoI : public LBinaryMath<0> { + const int32_t shift_; + const bool negativeDivisor_; + + public: + LIR_HEADER(DivPowTwoI) + + LDivPowTwoI(const LAllocation& lhs, const LAllocation& lhsCopy, int32_t shift, + bool negativeDivisor) + : LBinaryMath(classOpcode), + shift_(shift), + negativeDivisor_(negativeDivisor) { + setOperand(0, lhs); + setOperand(1, lhsCopy); + } + + const LAllocation* numerator() { return getOperand(0); } + const LAllocation* numeratorCopy() { return getOperand(1); } + int32_t shift() const { return shift_; } + bool negativeDivisor() const { return negativeDivisor_; } + MDiv* mir() const { return mir_->toDiv(); } +}; + +class LDivOrModConstantI : public LInstructionHelper<1, 1, 1> { + const int32_t denominator_; + + public: + LIR_HEADER(DivOrModConstantI) + + LDivOrModConstantI(const LAllocation& lhs, int32_t denominator, + const LDefinition& temp) + : LInstructionHelper(classOpcode), denominator_(denominator) { + setOperand(0, lhs); + setTemp(0, temp); + } + + const LAllocation* numerator() { return getOperand(0); } + int32_t denominator() const { return denominator_; } + MBinaryArithInstruction* mir() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + return static_cast<MBinaryArithInstruction*>(mir_); + } + bool canBeNegativeDividend() const { + if (mir_->isMod()) { + return mir_->toMod()->canBeNegativeDividend(); + } + return mir_->toDiv()->canBeNegativeDividend(); + } +}; + +class LModI : public LBinaryMath<1> { + public: + LIR_HEADER(ModI) + + LModI(const LAllocation& lhs, const LAllocation& rhs, const LDefinition& temp) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + setTemp(0, temp); + } + + const char* extraName() const { + return mir()->isTruncated() ? "Truncated" : nullptr; + } + + const LDefinition* remainder() { return getDef(0); } + MMod* mir() const { return mir_->toMod(); } +}; + +// This class performs a simple x86 'div', yielding either a quotient or +// remainder depending on whether this instruction is defined to output eax +// (quotient) or edx (remainder). +class LUDivOrMod : public LBinaryMath<1> { + public: + LIR_HEADER(UDivOrMod); + + LUDivOrMod(const LAllocation& lhs, const LAllocation& rhs, + const LDefinition& temp) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + setTemp(0, temp); + } + + const LDefinition* remainder() { return getTemp(0); } + + const char* extraName() const { + return mir()->isTruncated() ? "Truncated" : nullptr; + } + + MBinaryArithInstruction* mir() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + return static_cast<MBinaryArithInstruction*>(mir_); + } + + bool canBeDivideByZero() const { + if (mir_->isMod()) { + return mir_->toMod()->canBeDivideByZero(); + } + return mir_->toDiv()->canBeDivideByZero(); + } + + bool trapOnError() const { + if (mir_->isMod()) { + return mir_->toMod()->trapOnError(); + } + return mir_->toDiv()->trapOnError(); + } + + wasm::BytecodeOffset bytecodeOffset() const { + if (mir_->isMod()) { + return mir_->toMod()->bytecodeOffset(); + } + return mir_->toDiv()->bytecodeOffset(); + } +}; + +class LUDivOrModConstant : public LInstructionHelper<1, 1, 1> { + const uint32_t denominator_; + + public: + LIR_HEADER(UDivOrModConstant) + + LUDivOrModConstant(const LAllocation& lhs, uint32_t denominator, + const LDefinition& temp) + : LInstructionHelper(classOpcode), denominator_(denominator) { + setOperand(0, lhs); + setTemp(0, temp); + } + + const LAllocation* numerator() { return getOperand(0); } + uint32_t denominator() const { return denominator_; } + MBinaryArithInstruction* mir() const { + MOZ_ASSERT(mir_->isDiv() || mir_->isMod()); + return static_cast<MBinaryArithInstruction*>(mir_); + } + bool canBeNegativeDividend() const { + if (mir_->isMod()) { + return mir_->toMod()->canBeNegativeDividend(); + } + return mir_->toDiv()->canBeNegativeDividend(); + } + bool trapOnError() const { + if (mir_->isMod()) { + return mir_->toMod()->trapOnError(); + } + return mir_->toDiv()->trapOnError(); + } + wasm::BytecodeOffset bytecodeOffset() const { + if (mir_->isMod()) { + return mir_->toMod()->bytecodeOffset(); + } + return mir_->toDiv()->bytecodeOffset(); + } +}; + +class LModPowTwoI : public LInstructionHelper<1, 1, 0> { + const int32_t shift_; + + public: + LIR_HEADER(ModPowTwoI) + + LModPowTwoI(const LAllocation& lhs, int32_t shift) + : LInstructionHelper(classOpcode), shift_(shift) { + setOperand(0, lhs); + } + + int32_t shift() const { return shift_; } + const LDefinition* remainder() { return getDef(0); } + MMod* mir() const { return mir_->toMod(); } +}; + +// Takes a tableswitch with an integer to decide +class LTableSwitch : public LInstructionHelper<0, 1, 2> { + public: + LIR_HEADER(TableSwitch) + + LTableSwitch(const LAllocation& in, const LDefinition& inputCopy, + const LDefinition& jumpTablePointer, MTableSwitch* ins) + : LInstructionHelper(classOpcode) { + setOperand(0, in); + setTemp(0, inputCopy); + setTemp(1, jumpTablePointer); + setMir(ins); + } + + MTableSwitch* mir() const { return mir_->toTableSwitch(); } + + const LAllocation* index() { return getOperand(0); } + const LDefinition* tempInt() { return getTemp(0); } + const LDefinition* tempPointer() { return getTemp(1); } +}; + +// Takes a tableswitch with a value to decide +class LTableSwitchV : public LInstructionHelper<0, BOX_PIECES, 3> { + public: + LIR_HEADER(TableSwitchV) + + LTableSwitchV(const LBoxAllocation& input, const LDefinition& inputCopy, + const LDefinition& floatCopy, + const LDefinition& jumpTablePointer, MTableSwitch* ins) + : LInstructionHelper(classOpcode) { + setBoxOperand(InputValue, input); + setTemp(0, inputCopy); + setTemp(1, floatCopy); + setTemp(2, jumpTablePointer); + setMir(ins); + } + + MTableSwitch* mir() const { return mir_->toTableSwitch(); } + + static const size_t InputValue = 0; + + const LDefinition* tempInt() { return getTemp(0); } + const LDefinition* tempFloat() { return getTemp(1); } + const LDefinition* tempPointer() { return getTemp(2); } +}; + +class LMulI : public LBinaryMath<0, 1> { + public: + LIR_HEADER(MulI) + + LMulI(const LAllocation& lhs, const LAllocation& rhs, + const LAllocation& lhsCopy) + : LBinaryMath(classOpcode) { + setOperand(0, lhs); + setOperand(1, rhs); + setOperand(2, lhsCopy); + } + + const char* extraName() const { + return (mir()->mode() == MMul::Integer) + ? "Integer" + : (mir()->canBeNegativeZero() ? "CanBeNegativeZero" : nullptr); + } + + MMul* mir() const { return mir_->toMul(); } + const LAllocation* lhsCopy() { return this->getOperand(2); } +}; + +class LInt64ToFloatingPoint : public LInstructionHelper<1, INT64_PIECES, 1> { + public: + LIR_HEADER(Int64ToFloatingPoint); + + LInt64ToFloatingPoint(const LInt64Allocation& in, const LDefinition& temp) + : LInstructionHelper(classOpcode) { + setInt64Operand(0, in); + setTemp(0, temp); + } + + MInt64ToFloatingPoint* mir() const { return mir_->toInt64ToFloatingPoint(); } + + const LDefinition* temp() { return getTemp(0); } +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_LIR_x86_shared_h */ diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp new file mode 100644 index 0000000000..bef178b2f5 --- /dev/null +++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -0,0 +1,1863 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/Lowering-x86-shared.h" + +#include "mozilla/MathAlgorithms.h" + +#include "jit/Lowering.h" +#include "jit/MIR.h" + +#include "jit/shared/Lowering-shared-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::Abs; +using mozilla::FloorLog2; +using mozilla::Maybe; +using mozilla::Nothing; +using mozilla::Some; + +LTableSwitch* LIRGeneratorX86Shared::newLTableSwitch( + const LAllocation& in, const LDefinition& inputCopy, + MTableSwitch* tableswitch) { + return new (alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch); +} + +LTableSwitchV* LIRGeneratorX86Shared::newLTableSwitchV( + MTableSwitch* tableswitch) { + return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(), + tempDouble(), temp(), tableswitch); +} + +void LIRGenerator::visitPowHalf(MPowHalf* ins) { + MDefinition* input = ins->input(); + MOZ_ASSERT(input->type() == MIRType::Double); + LPowHalfD* lir = new (alloc()) LPowHalfD(useRegisterAtStart(input)); + define(lir, ins); +} + +void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, + MDefinition* mir, MDefinition* lhs, + MDefinition* rhs) { + ins->setOperand(0, useRegisterAtStart(lhs)); + + // Shift operand should be constant or, unless BMI2 is available, in register + // ecx. x86 can't shift a non-ecx register. + if (rhs->isConstant()) { + ins->setOperand(1, useOrConstantAtStart(rhs)); + } else if (Assembler::HasBMI2() && !mir->isRotate()) { + ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) + ? useRegister(rhs) + : useRegisterAtStart(rhs)); + } else { + ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) + ? useFixed(rhs, ecx) + : useFixedAtStart(rhs, ecx)); + } + + defineReuseInput(ins, mir, 0); +} + +template <size_t Temps> +void LIRGeneratorX86Shared::lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs) { + ins->setInt64Operand(0, useInt64RegisterAtStart(lhs)); +#if defined(JS_NUNBOX32) + if (mir->isRotate()) { + ins->setTemp(0, temp()); + } +#endif + + static_assert(LShiftI64::Rhs == INT64_PIECES, + "Assume Rhs is located at INT64_PIECES."); + static_assert(LRotateI64::Count == INT64_PIECES, + "Assume Count is located at INT64_PIECES."); + + // Shift operand should be constant or, unless BMI2 is available, in register + // ecx. x86 can't shift a non-ecx register. + if (rhs->isConstant()) { + ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs)); +#ifdef JS_CODEGEN_X64 + } else if (Assembler::HasBMI2() && !mir->isRotate()) { + ins->setOperand(INT64_PIECES, useRegister(rhs)); +#endif + } else { + // The operands are int64, but we only care about the lower 32 bits of + // the RHS. On 32-bit, the code below will load that part in ecx and + // will discard the upper half. + ensureDefined(rhs); + LUse use(ecx); + use.setVirtualRegister(rhs->virtualRegister()); + ins->setOperand(INT64_PIECES, use); + } + + defineInt64ReuseInput(ins, mir, 0); +} + +template void LIRGeneratorX86Shared::lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); +template void LIRGeneratorX86Shared::lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); + +void LIRGeneratorX86Shared::lowerForCompareI64AndBranch( + MTest* mir, MCompare* comp, JSOp op, MDefinition* left, MDefinition* right, + MBasicBlock* ifTrue, MBasicBlock* ifFalse) { + auto* lir = new (alloc()) + LCompareI64AndBranch(comp, op, useInt64Register(left), + useInt64OrConstant(right), ifTrue, ifFalse); + add(lir, mir); +} + +void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, + MDefinition* mir, MDefinition* input) { + ins->setOperand(0, useRegisterAtStart(input)); + defineReuseInput(ins, mir, 0); +} + +void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, + MDefinition* mir, MDefinition* lhs, + MDefinition* rhs) { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) + ? useOrConstant(rhs) + : useOrConstantAtStart(rhs)); + defineReuseInput(ins, mir, 0); +} + +template <size_t Temps> +void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, + MDefinition* mir, MDefinition* lhs, + MDefinition* rhs) { + // Without AVX, we'll need to use the x86 encodings where one of the + // inputs must be the same location as the output. + if (!Assembler::HasAVX()) { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand( + 1, willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs)); + defineReuseInput(ins, mir, 0); + } else { + ins->setOperand(0, useRegisterAtStart(lhs)); + ins->setOperand(1, useAtStart(rhs)); + define(ins, mir); + } +} + +template void LIRGeneratorX86Shared::lowerForFPU( + LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs, + MDefinition* rhs); +template void LIRGeneratorX86Shared::lowerForFPU( + LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, MDefinition* lhs, + MDefinition* rhs); + +void LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, + MInstruction* mir, + MDefinition* lhs, + MDefinition* rhs) { + baab->setOperand(0, useRegisterAtStart(lhs)); + baab->setOperand(1, useRegisterOrConstantAtStart(rhs)); + add(baab, mir); +} + +void LIRGeneratorX86Shared::lowerNegI(MInstruction* ins, MDefinition* input) { + defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(input)), ins, 0); +} + +void LIRGeneratorX86Shared::lowerNegI64(MInstruction* ins, MDefinition* input) { + defineInt64ReuseInput(new (alloc()) LNegI64(useInt64RegisterAtStart(input)), + ins, 0); +} + +void LIRGenerator::visitAbs(MAbs* ins) { + defineReuseInput(allocateAbs(ins, useRegisterAtStart(ins->input())), ins, 0); +} + +void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, + MDefinition* rhs) { + // Note: If we need a negative zero check, lhs is used twice. + LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation(); + LMulI* lir = new (alloc()) + LMulI(useRegisterAtStart(lhs), + willHaveDifferentLIRNodes(lhs, rhs) ? useOrConstant(rhs) + : useOrConstantAtStart(rhs), + lhsCopy); + if (mul->fallible()) { + assignSnapshot(lir, mul->bailoutKind()); + } + defineReuseInput(lir, mul, 0); +} + +void LIRGeneratorX86Shared::lowerDivI(MDiv* div) { + if (div->isUnsigned()) { + lowerUDiv(div); + return; + } + + // Division instructions are slow. Division by constant denominators can be + // rewritten to use other instructions. + if (div->rhs()->isConstant()) { + int32_t rhs = div->rhs()->toConstant()->toInt32(); + + // Division by powers of two can be done by shifting, and division by + // other numbers can be done by a reciprocal multiplication technique. + int32_t shift = FloorLog2(Abs(rhs)); + if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { + LAllocation lhs = useRegisterAtStart(div->lhs()); + LDivPowTwoI* lir; + // When truncated with maybe a non-zero remainder, we have to round the + // result toward 0. This requires an extra register to round up/down + // whether the left-hand-side is signed. + bool needRoundNeg = div->canBeNegativeDividend() && div->isTruncated(); + if (!needRoundNeg) { + // Numerator is unsigned, so does not need adjusting. + lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0); + } else { + // Numerator might be signed, and needs adjusting, and an extra lhs copy + // is needed to round the result of the integer division towards zero. + lir = new (alloc()) + LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0); + } + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + defineReuseInput(lir, div, 0); + return; + } + if (rhs != 0) { + LDivOrModConstantI* lir; + lir = new (alloc()) + LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax)); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + defineFixed(lir, div, LAllocation(AnyRegister(edx))); + return; + } + } + + LDivI* lir = new (alloc()) + LDivI(useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx)); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + defineFixed(lir, div, LAllocation(AnyRegister(eax))); +} + +void LIRGeneratorX86Shared::lowerModI(MMod* mod) { + if (mod->isUnsigned()) { + lowerUMod(mod); + return; + } + + if (mod->rhs()->isConstant()) { + int32_t rhs = mod->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(Abs(rhs)); + if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { + LModPowTwoI* lir = + new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + defineReuseInput(lir, mod, 0); + return; + } + if (rhs != 0) { + LDivOrModConstantI* lir; + lir = new (alloc()) + LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx)); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + defineFixed(lir, mod, LAllocation(AnyRegister(eax))); + return; + } + } + + LModI* lir = new (alloc()) + LModI(useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax)); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + defineFixed(lir, mod, LAllocation(AnyRegister(edx))); +} + +void LIRGenerator::visitWasmNeg(MWasmNeg* ins) { + switch (ins->type()) { + case MIRType::Int32: + defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(ins->input())), + ins, 0); + break; + case MIRType::Float32: + defineReuseInput(new (alloc()) LNegF(useRegisterAtStart(ins->input())), + ins, 0); + break; + case MIRType::Double: + defineReuseInput(new (alloc()) LNegD(useRegisterAtStart(ins->input())), + ins, 0); + break; + default: + MOZ_CRASH(); + } +} + +void LIRGeneratorX86Shared::lowerWasmSelectI(MWasmSelect* select) { + auto* lir = new (alloc()) + LWasmSelect(useRegisterAtStart(select->trueExpr()), + useAny(select->falseExpr()), useRegister(select->condExpr())); + defineReuseInput(lir, select, LWasmSelect::TrueExprIndex); +} + +void LIRGeneratorX86Shared::lowerWasmSelectI64(MWasmSelect* select) { + auto* lir = new (alloc()) LWasmSelectI64( + useInt64RegisterAtStart(select->trueExpr()), + useInt64(select->falseExpr()), useRegister(select->condExpr())); + defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex); +} + +void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) { + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + MDefinition* boundsCheckLimit = ins->boundsCheckLimit(); + MOZ_ASSERT_IF(ins->needsBoundsCheck(), + boundsCheckLimit->type() == MIRType::Int32); + + // For simplicity, require a register if we're going to emit a bounds-check + // branch, so that we don't have special cases for constants. This should + // only happen in rare constant-folding cases since asm.js sets the minimum + // heap size based when accessed via constant. + LAllocation baseAlloc = ins->needsBoundsCheck() + ? useRegisterAtStart(base) + : useRegisterOrZeroAtStart(base); + + LAllocation limitAlloc = ins->needsBoundsCheck() + ? useRegisterAtStart(boundsCheckLimit) + : LAllocation(); + LAllocation memoryBaseAlloc = ins->hasMemoryBase() + ? useRegisterAtStart(ins->memoryBase()) + : LAllocation(); + + auto* lir = + new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc); + define(lir, ins); +} + +void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) { + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + MDefinition* boundsCheckLimit = ins->boundsCheckLimit(); + MOZ_ASSERT_IF(ins->needsBoundsCheck(), + boundsCheckLimit->type() == MIRType::Int32); + + // For simplicity, require a register if we're going to emit a bounds-check + // branch, so that we don't have special cases for constants. This should + // only happen in rare constant-folding cases since asm.js sets the minimum + // heap size based when accessed via constant. + LAllocation baseAlloc = ins->needsBoundsCheck() + ? useRegisterAtStart(base) + : useRegisterOrZeroAtStart(base); + + LAllocation limitAlloc = ins->needsBoundsCheck() + ? useRegisterAtStart(boundsCheckLimit) + : LAllocation(); + LAllocation memoryBaseAlloc = ins->hasMemoryBase() + ? useRegisterAtStart(ins->memoryBase()) + : LAllocation(); + + LAsmJSStoreHeap* lir = nullptr; + switch (ins->access().type()) { + case Scalar::Int8: + case Scalar::Uint8: +#ifdef JS_CODEGEN_X86 + // See comment for LIRGeneratorX86::useByteOpRegister. + lir = new (alloc()) LAsmJSStoreHeap( + baseAlloc, useFixed(ins->value(), eax), limitAlloc, memoryBaseAlloc); + break; +#endif + case Scalar::Int16: + case Scalar::Uint16: + case Scalar::Int32: + case Scalar::Uint32: + case Scalar::Float32: + case Scalar::Float64: + // For now, don't allow constant values. The immediate operand affects + // instruction layout which affects patching. + lir = new (alloc()) + LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()), + limitAlloc, memoryBaseAlloc); + break; + case Scalar::Int64: + case Scalar::Simd128: + MOZ_CRASH("NYI"); + case Scalar::Uint8Clamped: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + } + add(lir, ins); +} + +void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) { + if (div->rhs()->isConstant()) { + // NOTE: the result of toInt32 is coerced to uint32_t. + uint32_t rhs = div->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(rhs); + + LAllocation lhs = useRegisterAtStart(div->lhs()); + if (rhs != 0 && uint32_t(1) << shift == rhs) { + LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, false); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + defineReuseInput(lir, div, 0); + } else { + LUDivOrModConstant* lir = new (alloc()) + LUDivOrModConstant(useRegister(div->lhs()), rhs, tempFixed(eax)); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + defineFixed(lir, div, LAllocation(AnyRegister(edx))); + } + return; + } + + LUDivOrMod* lir = new (alloc()) LUDivOrMod( + useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx)); + if (div->fallible()) { + assignSnapshot(lir, div->bailoutKind()); + } + defineFixed(lir, div, LAllocation(AnyRegister(eax))); +} + +void LIRGeneratorX86Shared::lowerUMod(MMod* mod) { + if (mod->rhs()->isConstant()) { + uint32_t rhs = mod->rhs()->toConstant()->toInt32(); + int32_t shift = FloorLog2(rhs); + + if (rhs != 0 && uint32_t(1) << shift == rhs) { + LModPowTwoI* lir = + new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + defineReuseInput(lir, mod, 0); + } else { + LUDivOrModConstant* lir = new (alloc()) + LUDivOrModConstant(useRegister(mod->lhs()), rhs, tempFixed(edx)); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + defineFixed(lir, mod, LAllocation(AnyRegister(eax))); + } + return; + } + + LUDivOrMod* lir = new (alloc()) LUDivOrMod( + useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax)); + if (mod->fallible()) { + assignSnapshot(lir, mod->bailoutKind()); + } + defineFixed(lir, mod, LAllocation(AnyRegister(edx))); +} + +void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) { + MDefinition* lhs = mir->lhs(); + MDefinition* rhs = mir->rhs(); + + MOZ_ASSERT(lhs->type() == MIRType::Int32); + MOZ_ASSERT(rhs->type() == MIRType::Int32); + MOZ_ASSERT(mir->type() == MIRType::Double); + +#ifdef JS_CODEGEN_X64 + static_assert(ecx == rcx); +#endif + + // Without BMI2, x86 can only shift by ecx. + LUse lhsUse = useRegisterAtStart(lhs); + LAllocation rhsAlloc; + if (rhs->isConstant()) { + rhsAlloc = useOrConstant(rhs); + } else if (Assembler::HasBMI2()) { + rhsAlloc = useRegister(rhs); + } else { + rhsAlloc = useFixed(rhs, ecx); + } + + LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0)); + define(lir, mir); +} + +void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) { + int32_t base = mir->input()->toConstant()->toInt32(); + MDefinition* power = mir->power(); + + // Shift operand should be in register ecx, unless BMI2 is available. + // x86 can't shift a non-ecx register. + LAllocation powerAlloc = + Assembler::HasBMI2() ? useRegister(power) : useFixed(power, ecx); + auto* lir = new (alloc()) LPowOfTwoI(powerAlloc, base); + assignSnapshot(lir, mir->bailoutKind()); + define(lir, mir); +} + +void LIRGeneratorX86Shared::lowerBigIntLsh(MBigIntLsh* ins) { + // Shift operand should be in register ecx, unless BMI2 is available. + // x86 can't shift a non-ecx register. + LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx); + auto* lir = + new (alloc()) LBigIntLsh(useRegister(ins->lhs()), useRegister(ins->rhs()), + temp(), shiftAlloc, temp()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +void LIRGeneratorX86Shared::lowerBigIntRsh(MBigIntRsh* ins) { + // Shift operand should be in register ecx, unless BMI2 is available. + // x86 can't shift a non-ecx register. + LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx); + auto* lir = + new (alloc()) LBigIntRsh(useRegister(ins->lhs()), useRegister(ins->rhs()), + temp(), shiftAlloc, temp()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +void LIRGeneratorX86Shared::lowerWasmBuiltinTruncateToInt32( + MWasmBuiltinTruncateToInt32* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32); + + LDefinition maybeTemp = + Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble(); + if (opd->type() == MIRType::Double) { + define(new (alloc()) LWasmBuiltinTruncateDToInt32( + useRegister(opd), useFixed(ins->instance(), InstanceReg), + maybeTemp), + ins); + return; + } + + define( + new (alloc()) LWasmBuiltinTruncateFToInt32( + useRegister(opd), useFixed(ins->instance(), InstanceReg), maybeTemp), + ins); +} + +void LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Double); + + LDefinition maybeTemp = + Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble(); + define(new (alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins); +} + +void LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Float32); + + LDefinition maybeTemp = + Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32(); + define(new (alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins); +} + +void LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement( + MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters) { + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + + // If the target is a floating register then we need a temp at the + // lower level; that temp must be eax. + // + // Otherwise the target (if used) is an integer register, which + // must be eax. If the target is not used the machine code will + // still clobber eax, so just pretend it's used. + // + // oldval must be in a register. + // + // newval must be in a register. If the source is a byte array + // then newval must be a register that has a byte size: on x86 + // this must be ebx, ecx, or edx (eax is taken for the output). + // + // Bug #1077036 describes some further optimization opportunities. + + bool fixedOutput = false; + LDefinition tempDef = LDefinition::BogusTemp(); + LAllocation newval; + if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { + tempDef = tempFixed(eax); + newval = useRegister(ins->newval()); + } else { + fixedOutput = true; + if (useI386ByteRegisters && ins->isByteArray()) { + newval = useFixed(ins->newval(), ebx); + } else { + newval = useRegister(ins->newval()); + } + } + + const LAllocation oldval = useRegister(ins->oldval()); + + LCompareExchangeTypedArrayElement* lir = + new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, + newval, tempDef); + + if (fixedOutput) { + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + } else { + define(lir, ins); + } +} + +void LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement( + MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters) { + MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + const LAllocation value = useRegister(ins->value()); + + // The underlying instruction is XCHG, which can operate on any + // register. + // + // If the target is a floating register (for Uint32) then we need + // a temp into which to exchange. + // + // If the source is a byte array then we need a register that has + // a byte size; in this case -- on x86 only -- pin the output to + // an appropriate register and use that as a temp in the back-end. + + LDefinition tempDef = LDefinition::BogusTemp(); + if (ins->arrayType() == Scalar::Uint32) { + MOZ_ASSERT(ins->type() == MIRType::Double); + tempDef = temp(); + } + + LAtomicExchangeTypedArrayElement* lir = new (alloc()) + LAtomicExchangeTypedArrayElement(elements, index, value, tempDef); + + if (useI386ByteRegisters && ins->isByteArray()) { + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + } else { + define(lir, ins); + } +} + +void LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop( + MAtomicTypedArrayElementBinop* ins, bool useI386ByteRegisters) { + MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + + // Case 1: the result of the operation is not used. + // + // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND, + // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case. + + if (ins->isForEffect()) { + LAllocation value; + if (useI386ByteRegisters && ins->isByteArray() && + !ins->value()->isConstant()) { + value = useFixed(ins->value(), ebx); + } else { + value = useRegisterOrConstant(ins->value()); + } + + LAtomicTypedArrayElementBinopForEffect* lir = new (alloc()) + LAtomicTypedArrayElementBinopForEffect(elements, index, value); + + add(lir, ins); + return; + } + + // Case 2: the result of the operation is used. + // + // For ADD and SUB we'll use XADD: + // + // movl src, output + // lock xaddl output, mem + // + // For the 8-bit variants XADD needs a byte register for the output. + // + // For AND/OR/XOR we need to use a CMPXCHG loop: + // + // movl *mem, eax + // L: mov eax, temp + // andl src, temp + // lock cmpxchg temp, mem ; reads eax also + // jnz L + // ; result in eax + // + // Note the placement of L, cmpxchg will update eax with *mem if + // *mem does not have the expected value, so reloading it at the + // top of the loop would be redundant. + // + // If the array is not a uint32 array then: + // - eax should be the output (one result of the cmpxchg) + // - there is a temp, which must have a byte register if + // the array has 1-byte elements elements + // + // If the array is a uint32 array then: + // - eax is the first temp + // - we also need a second temp + // + // There are optimization opportunities: + // - better register allocation in the x86 8-bit case, Bug #1077036. + + bool bitOp = !(ins->operation() == AtomicFetchAddOp || + ins->operation() == AtomicFetchSubOp); + bool fixedOutput = true; + bool reuseInput = false; + LDefinition tempDef1 = LDefinition::BogusTemp(); + LDefinition tempDef2 = LDefinition::BogusTemp(); + LAllocation value; + + if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { + value = useRegisterOrConstant(ins->value()); + fixedOutput = false; + if (bitOp) { + tempDef1 = tempFixed(eax); + tempDef2 = temp(); + } else { + tempDef1 = temp(); + } + } else if (useI386ByteRegisters && ins->isByteArray()) { + if (ins->value()->isConstant()) { + value = useRegisterOrConstant(ins->value()); + } else { + value = useFixed(ins->value(), ebx); + } + if (bitOp) { + tempDef1 = tempFixed(ecx); + } + } else if (bitOp) { + value = useRegisterOrConstant(ins->value()); + tempDef1 = temp(); + } else if (ins->value()->isConstant()) { + fixedOutput = false; + value = useRegisterOrConstant(ins->value()); + } else { + fixedOutput = false; + reuseInput = true; + value = useRegisterAtStart(ins->value()); + } + + LAtomicTypedArrayElementBinop* lir = new (alloc()) + LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2); + + if (fixedOutput) { + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + } else if (reuseInput) { + defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp); + } else { + define(lir, ins); + } +} + +void LIRGenerator::visitCopySign(MCopySign* ins) { + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + MOZ_ASSERT(IsFloatingPointType(lhs->type())); + MOZ_ASSERT(lhs->type() == rhs->type()); + MOZ_ASSERT(lhs->type() == ins->type()); + + LInstructionHelper<1, 2, 2>* lir; + if (lhs->type() == MIRType::Double) { + lir = new (alloc()) LCopySignD(); + } else { + lir = new (alloc()) LCopySignF(); + } + + // As lowerForFPU, but we want rhs to be in a FP register too. + lir->setOperand(0, useRegisterAtStart(lhs)); + if (!Assembler::HasAVX()) { + lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs) + ? useRegister(rhs) + : useRegisterAtStart(rhs)); + defineReuseInput(lir, ins, 0); + } else { + lir->setOperand(1, useRegisterAtStart(rhs)); + define(lir, ins); + } +} + +// These lowerings are really x86-shared but some Masm APIs are not yet +// available on x86. + +// Ternary and binary operators require the dest register to be the same as +// their first input register, leading to a pattern of useRegisterAtStart + +// defineReuseInput. + +void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->v0()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->v1()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->v2()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + switch (ins->simdOp()) { + case wasm::SimdOp::V128Bitselect: { + // Enforcing lhs == output avoids one setup move. We would like to also + // enforce merging the control with the temp (with + // usRegisterAtStart(control) and tempCopy()), but the register allocator + // ignores those constraints at present. + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegisterAtStart(ins->v0()), useRegister(ins->v1()), + useRegister(ins->v2()), tempSimd128()); + defineReuseInput(lir, ins, LWasmTernarySimd128::V0); + break; + } + case wasm::SimdOp::F32x4RelaxedFma: + case wasm::SimdOp::F32x4RelaxedFnma: + case wasm::SimdOp::F64x2RelaxedFma: + case wasm::SimdOp::F64x2RelaxedFnma: { + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()), + useRegisterAtStart(ins->v2())); + defineReuseInput(lir, ins, LWasmTernarySimd128::V2); + break; + } + case wasm::SimdOp::I32x4DotI8x16I7x16AddS: { + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()), + useRegisterAtStart(ins->v2())); + defineReuseInput(lir, ins, LWasmTernarySimd128::V2); + break; + } + case wasm::SimdOp::I8x16RelaxedLaneSelect: + case wasm::SimdOp::I16x8RelaxedLaneSelect: + case wasm::SimdOp::I32x4RelaxedLaneSelect: + case wasm::SimdOp::I64x2RelaxedLaneSelect: { + if (Assembler::HasAVX()) { + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegisterAtStart(ins->v0()), + useRegisterAtStart(ins->v1()), useRegisterAtStart(ins->v2())); + define(lir, ins); + } else { + auto* lir = new (alloc()) LWasmTernarySimd128( + ins->simdOp(), useRegister(ins->v0()), + useRegisterAtStart(ins->v1()), useFixed(ins->v2(), vmm0)); + defineReuseInput(lir, ins, LWasmTernarySimd128::V1); + } + break; + } + default: + MOZ_CRASH("NYI"); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + wasm::SimdOp op = ins->simdOp(); + + MOZ_ASSERT(lhs->type() == MIRType::Simd128); + MOZ_ASSERT(rhs->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + // Note MWasmBinarySimd128::foldsTo has already specialized operations that + // have a constant operand, so this takes care of more general cases of + // reordering, see ReorderCommutative. + if (ins->isCommutative()) { + ReorderCommutative(&lhs, &rhs, ins); + } + + // Swap operands and change operation if necessary, these are all x86/x64 + // dependent transformations. Except where noted, this is about avoiding + // unnecessary moves and fixups in the code generator macros. + bool swap = false; + switch (op) { + case wasm::SimdOp::V128AndNot: { + // Code generation requires the operands to be reversed. + swap = true; + break; + } + case wasm::SimdOp::I8x16LtS: { + swap = true; + op = wasm::SimdOp::I8x16GtS; + break; + } + case wasm::SimdOp::I8x16GeS: { + swap = true; + op = wasm::SimdOp::I8x16LeS; + break; + } + case wasm::SimdOp::I16x8LtS: { + swap = true; + op = wasm::SimdOp::I16x8GtS; + break; + } + case wasm::SimdOp::I16x8GeS: { + swap = true; + op = wasm::SimdOp::I16x8LeS; + break; + } + case wasm::SimdOp::I32x4LtS: { + swap = true; + op = wasm::SimdOp::I32x4GtS; + break; + } + case wasm::SimdOp::I32x4GeS: { + swap = true; + op = wasm::SimdOp::I32x4LeS; + break; + } + case wasm::SimdOp::F32x4Gt: { + swap = true; + op = wasm::SimdOp::F32x4Lt; + break; + } + case wasm::SimdOp::F32x4Ge: { + swap = true; + op = wasm::SimdOp::F32x4Le; + break; + } + case wasm::SimdOp::F64x2Gt: { + swap = true; + op = wasm::SimdOp::F64x2Lt; + break; + } + case wasm::SimdOp::F64x2Ge: { + swap = true; + op = wasm::SimdOp::F64x2Le; + break; + } + case wasm::SimdOp::F32x4PMin: + case wasm::SimdOp::F32x4PMax: + case wasm::SimdOp::F64x2PMin: + case wasm::SimdOp::F64x2PMax: { + // Code generation requires the operations to be reversed (the rhs is the + // output register). + swap = true; + break; + } + default: + break; + } + if (swap) { + MDefinition* tmp = lhs; + lhs = rhs; + rhs = tmp; + } + + // Allocate temp registers + LDefinition tempReg0 = LDefinition::BogusTemp(); + LDefinition tempReg1 = LDefinition::BogusTemp(); + switch (op) { + case wasm::SimdOp::I64x2Mul: + tempReg0 = tempSimd128(); + break; + case wasm::SimdOp::F32x4Min: + case wasm::SimdOp::F32x4Max: + case wasm::SimdOp::F64x2Min: + case wasm::SimdOp::F64x2Max: + tempReg0 = tempSimd128(); + tempReg1 = tempSimd128(); + break; + case wasm::SimdOp::I64x2LtS: + case wasm::SimdOp::I64x2GtS: + case wasm::SimdOp::I64x2LeS: + case wasm::SimdOp::I64x2GeS: + // The compareForOrderingInt64x2AVX implementation does not require + // temps but needs SSE4.2 support. Checking if both AVX and SSE4.2 + // are enabled. + if (!(Assembler::HasAVX() && Assembler::HasSSE42())) { + tempReg0 = tempSimd128(); + tempReg1 = tempSimd128(); + } + break; + default: + break; + } + + // For binary ops, without AVX support, the Masm API always is usually + // (rhs, lhsDest) and requires AtStart+ReuseInput for the lhs. + // + // For a few ops, the API is actually (rhsDest, lhs) and the rules are the + // same but the reversed. We swapped operands above; they will be swapped + // again in the code generator to emit the right code. + // + // If AVX support is enabled, some binary ops can use output as destination, + // useRegisterAtStart is applied for both operands and no need for ReuseInput. + + switch (op) { + case wasm::SimdOp::I8x16AvgrU: + case wasm::SimdOp::I16x8AvgrU: + case wasm::SimdOp::I8x16Add: + case wasm::SimdOp::I8x16AddSatS: + case wasm::SimdOp::I8x16AddSatU: + case wasm::SimdOp::I8x16Sub: + case wasm::SimdOp::I8x16SubSatS: + case wasm::SimdOp::I8x16SubSatU: + case wasm::SimdOp::I16x8Mul: + case wasm::SimdOp::I16x8MinS: + case wasm::SimdOp::I16x8MinU: + case wasm::SimdOp::I16x8MaxS: + case wasm::SimdOp::I16x8MaxU: + case wasm::SimdOp::I32x4Add: + case wasm::SimdOp::I32x4Sub: + case wasm::SimdOp::I32x4Mul: + case wasm::SimdOp::I32x4MinS: + case wasm::SimdOp::I32x4MinU: + case wasm::SimdOp::I32x4MaxS: + case wasm::SimdOp::I32x4MaxU: + case wasm::SimdOp::I64x2Add: + case wasm::SimdOp::I64x2Sub: + case wasm::SimdOp::I64x2Mul: + case wasm::SimdOp::F32x4Add: + case wasm::SimdOp::F32x4Sub: + case wasm::SimdOp::F32x4Mul: + case wasm::SimdOp::F32x4Div: + case wasm::SimdOp::F64x2Add: + case wasm::SimdOp::F64x2Sub: + case wasm::SimdOp::F64x2Mul: + case wasm::SimdOp::F64x2Div: + case wasm::SimdOp::F32x4Eq: + case wasm::SimdOp::F32x4Ne: + case wasm::SimdOp::F32x4Lt: + case wasm::SimdOp::F32x4Le: + case wasm::SimdOp::F64x2Eq: + case wasm::SimdOp::F64x2Ne: + case wasm::SimdOp::F64x2Lt: + case wasm::SimdOp::F64x2Le: + case wasm::SimdOp::F32x4PMin: + case wasm::SimdOp::F32x4PMax: + case wasm::SimdOp::F64x2PMin: + case wasm::SimdOp::F64x2PMax: + case wasm::SimdOp::I8x16Swizzle: + case wasm::SimdOp::I8x16RelaxedSwizzle: + case wasm::SimdOp::I8x16Eq: + case wasm::SimdOp::I8x16Ne: + case wasm::SimdOp::I8x16GtS: + case wasm::SimdOp::I8x16LeS: + case wasm::SimdOp::I8x16LtU: + case wasm::SimdOp::I8x16GtU: + case wasm::SimdOp::I8x16LeU: + case wasm::SimdOp::I8x16GeU: + case wasm::SimdOp::I16x8Eq: + case wasm::SimdOp::I16x8Ne: + case wasm::SimdOp::I16x8GtS: + case wasm::SimdOp::I16x8LeS: + case wasm::SimdOp::I16x8LtU: + case wasm::SimdOp::I16x8GtU: + case wasm::SimdOp::I16x8LeU: + case wasm::SimdOp::I16x8GeU: + case wasm::SimdOp::I32x4Eq: + case wasm::SimdOp::I32x4Ne: + case wasm::SimdOp::I32x4GtS: + case wasm::SimdOp::I32x4LeS: + case wasm::SimdOp::I32x4LtU: + case wasm::SimdOp::I32x4GtU: + case wasm::SimdOp::I32x4LeU: + case wasm::SimdOp::I32x4GeU: + case wasm::SimdOp::I64x2Eq: + case wasm::SimdOp::I64x2Ne: + case wasm::SimdOp::I64x2LtS: + case wasm::SimdOp::I64x2GtS: + case wasm::SimdOp::I64x2LeS: + case wasm::SimdOp::I64x2GeS: + case wasm::SimdOp::V128And: + case wasm::SimdOp::V128Or: + case wasm::SimdOp::V128Xor: + case wasm::SimdOp::V128AndNot: + case wasm::SimdOp::F32x4Min: + case wasm::SimdOp::F32x4Max: + case wasm::SimdOp::F64x2Min: + case wasm::SimdOp::F64x2Max: + case wasm::SimdOp::I8x16NarrowI16x8S: + case wasm::SimdOp::I8x16NarrowI16x8U: + case wasm::SimdOp::I16x8NarrowI32x4S: + case wasm::SimdOp::I16x8NarrowI32x4U: + case wasm::SimdOp::I32x4DotI16x8S: + case wasm::SimdOp::I16x8ExtmulLowI8x16S: + case wasm::SimdOp::I16x8ExtmulHighI8x16S: + case wasm::SimdOp::I16x8ExtmulLowI8x16U: + case wasm::SimdOp::I16x8ExtmulHighI8x16U: + case wasm::SimdOp::I32x4ExtmulLowI16x8S: + case wasm::SimdOp::I32x4ExtmulHighI16x8S: + case wasm::SimdOp::I32x4ExtmulLowI16x8U: + case wasm::SimdOp::I32x4ExtmulHighI16x8U: + case wasm::SimdOp::I64x2ExtmulLowI32x4S: + case wasm::SimdOp::I64x2ExtmulHighI32x4S: + case wasm::SimdOp::I64x2ExtmulLowI32x4U: + case wasm::SimdOp::I64x2ExtmulHighI32x4U: + case wasm::SimdOp::I16x8Q15MulrSatS: + case wasm::SimdOp::F32x4RelaxedMin: + case wasm::SimdOp::F32x4RelaxedMax: + case wasm::SimdOp::F64x2RelaxedMin: + case wasm::SimdOp::F64x2RelaxedMax: + case wasm::SimdOp::I16x8RelaxedQ15MulrS: + case wasm::SimdOp::I16x8DotI8x16I7x16S: + case wasm::SimdOp::MozPMADDUBSW: + if (isThreeOpAllowed()) { + auto* lir = new (alloc()) + LWasmBinarySimd128(op, useRegisterAtStart(lhs), + useRegisterAtStart(rhs), tempReg0, tempReg1); + define(lir, ins); + break; + } + [[fallthrough]]; + default: { + LAllocation lhsDestAlloc = useRegisterAtStart(lhs); + LAllocation rhsAlloc = willHaveDifferentLIRNodes(lhs, rhs) + ? useRegister(rhs) + : useRegisterAtStart(rhs); + auto* lir = new (alloc()) + LWasmBinarySimd128(op, lhsDestAlloc, rhsAlloc, tempReg0, tempReg1); + defineReuseInput(lir, ins, LWasmBinarySimd128::LhsDest); + break; + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +#ifdef ENABLE_WASM_SIMD +bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle( + int8_t shuffle[16]) { + if (simdOp() != wasm::SimdOp::V128Bitselect) { + return false; + } + + // Optimization when control vector is a mask with all 0 or all 1 per lane. + // On x86, there is no bitselect, blend operations will be a win, + // e.g. via PBLENDVB or PBLENDW. + SimdConstant constant = static_cast<MWasmFloatConstant*>(v2())->toSimd128(); + const SimdConstant::I8x16& bytes = constant.asInt8x16(); + for (int8_t i = 0; i < 16; i++) { + if (bytes[i] == -1) { + shuffle[i] = i + 16; + } else if (bytes[i] == 0) { + shuffle[i] = i; + } else { + return false; + } + } + return true; +} +bool MWasmTernarySimd128::canRelaxBitselect() { + wasm::SimdOp simdOp; + if (v2()->isWasmBinarySimd128()) { + simdOp = v2()->toWasmBinarySimd128()->simdOp(); + } else if (v2()->isWasmBinarySimd128WithConstant()) { + simdOp = v2()->toWasmBinarySimd128WithConstant()->simdOp(); + } else { + return false; + } + switch (simdOp) { + case wasm::SimdOp::I8x16Eq: + case wasm::SimdOp::I8x16Ne: + case wasm::SimdOp::I8x16GtS: + case wasm::SimdOp::I8x16GeS: + case wasm::SimdOp::I8x16LtS: + case wasm::SimdOp::I8x16LeS: + case wasm::SimdOp::I8x16GtU: + case wasm::SimdOp::I8x16GeU: + case wasm::SimdOp::I8x16LtU: + case wasm::SimdOp::I8x16LeU: + case wasm::SimdOp::I16x8Eq: + case wasm::SimdOp::I16x8Ne: + case wasm::SimdOp::I16x8GtS: + case wasm::SimdOp::I16x8GeS: + case wasm::SimdOp::I16x8LtS: + case wasm::SimdOp::I16x8LeS: + case wasm::SimdOp::I16x8GtU: + case wasm::SimdOp::I16x8GeU: + case wasm::SimdOp::I16x8LtU: + case wasm::SimdOp::I16x8LeU: + case wasm::SimdOp::I32x4Eq: + case wasm::SimdOp::I32x4Ne: + case wasm::SimdOp::I32x4GtS: + case wasm::SimdOp::I32x4GeS: + case wasm::SimdOp::I32x4LtS: + case wasm::SimdOp::I32x4LeS: + case wasm::SimdOp::I32x4GtU: + case wasm::SimdOp::I32x4GeU: + case wasm::SimdOp::I32x4LtU: + case wasm::SimdOp::I32x4LeU: + case wasm::SimdOp::I64x2Eq: + case wasm::SimdOp::I64x2Ne: + case wasm::SimdOp::I64x2GtS: + case wasm::SimdOp::I64x2GeS: + case wasm::SimdOp::I64x2LtS: + case wasm::SimdOp::I64x2LeS: + case wasm::SimdOp::F32x4Eq: + case wasm::SimdOp::F32x4Ne: + case wasm::SimdOp::F32x4Gt: + case wasm::SimdOp::F32x4Ge: + case wasm::SimdOp::F32x4Lt: + case wasm::SimdOp::F32x4Le: + case wasm::SimdOp::F64x2Eq: + case wasm::SimdOp::F64x2Ne: + case wasm::SimdOp::F64x2Gt: + case wasm::SimdOp::F64x2Ge: + case wasm::SimdOp::F64x2Lt: + case wasm::SimdOp::F64x2Le: + return true; + default: + break; + } + return false; +} + +bool MWasmBinarySimd128::canPmaddubsw() { + MOZ_ASSERT(Assembler::HasSSE3()); + return true; +} +#endif + +bool MWasmBinarySimd128::specializeForConstantRhs() { + // The order follows MacroAssembler.h, generally + switch (simdOp()) { + // Operations implemented by a single native instruction where it is + // plausible that the rhs (after commutation if available) could be a + // constant. + // + // Swizzle is not here because it was handled earlier in the pipeline. + // + // Integer compares >= and < are not here because they are not supported in + // the hardware. + // + // Floating compares are not here because our patching machinery can't + // handle them yet. + // + // Floating-point min and max (including pmin and pmax) are not here because + // they are not straightforward to implement. + case wasm::SimdOp::I8x16Add: + case wasm::SimdOp::I16x8Add: + case wasm::SimdOp::I32x4Add: + case wasm::SimdOp::I64x2Add: + case wasm::SimdOp::I8x16Sub: + case wasm::SimdOp::I16x8Sub: + case wasm::SimdOp::I32x4Sub: + case wasm::SimdOp::I64x2Sub: + case wasm::SimdOp::I16x8Mul: + case wasm::SimdOp::I32x4Mul: + case wasm::SimdOp::I8x16AddSatS: + case wasm::SimdOp::I8x16AddSatU: + case wasm::SimdOp::I16x8AddSatS: + case wasm::SimdOp::I16x8AddSatU: + case wasm::SimdOp::I8x16SubSatS: + case wasm::SimdOp::I8x16SubSatU: + case wasm::SimdOp::I16x8SubSatS: + case wasm::SimdOp::I16x8SubSatU: + case wasm::SimdOp::I8x16MinS: + case wasm::SimdOp::I8x16MinU: + case wasm::SimdOp::I16x8MinS: + case wasm::SimdOp::I16x8MinU: + case wasm::SimdOp::I32x4MinS: + case wasm::SimdOp::I32x4MinU: + case wasm::SimdOp::I8x16MaxS: + case wasm::SimdOp::I8x16MaxU: + case wasm::SimdOp::I16x8MaxS: + case wasm::SimdOp::I16x8MaxU: + case wasm::SimdOp::I32x4MaxS: + case wasm::SimdOp::I32x4MaxU: + case wasm::SimdOp::V128And: + case wasm::SimdOp::V128Or: + case wasm::SimdOp::V128Xor: + case wasm::SimdOp::I8x16Eq: + case wasm::SimdOp::I8x16Ne: + case wasm::SimdOp::I8x16GtS: + case wasm::SimdOp::I8x16LeS: + case wasm::SimdOp::I16x8Eq: + case wasm::SimdOp::I16x8Ne: + case wasm::SimdOp::I16x8GtS: + case wasm::SimdOp::I16x8LeS: + case wasm::SimdOp::I32x4Eq: + case wasm::SimdOp::I32x4Ne: + case wasm::SimdOp::I32x4GtS: + case wasm::SimdOp::I32x4LeS: + case wasm::SimdOp::I64x2Mul: + case wasm::SimdOp::F32x4Eq: + case wasm::SimdOp::F32x4Ne: + case wasm::SimdOp::F32x4Lt: + case wasm::SimdOp::F32x4Le: + case wasm::SimdOp::F64x2Eq: + case wasm::SimdOp::F64x2Ne: + case wasm::SimdOp::F64x2Lt: + case wasm::SimdOp::F64x2Le: + case wasm::SimdOp::I32x4DotI16x8S: + case wasm::SimdOp::F32x4Add: + case wasm::SimdOp::F64x2Add: + case wasm::SimdOp::F32x4Sub: + case wasm::SimdOp::F64x2Sub: + case wasm::SimdOp::F32x4Div: + case wasm::SimdOp::F64x2Div: + case wasm::SimdOp::F32x4Mul: + case wasm::SimdOp::F64x2Mul: + case wasm::SimdOp::I8x16NarrowI16x8S: + case wasm::SimdOp::I8x16NarrowI16x8U: + case wasm::SimdOp::I16x8NarrowI32x4S: + case wasm::SimdOp::I16x8NarrowI32x4U: + return true; + default: + return false; + } +} + +void LIRGenerator::visitWasmBinarySimd128WithConstant( + MWasmBinarySimd128WithConstant* ins) { +#ifdef ENABLE_WASM_SIMD + MDefinition* lhs = ins->lhs(); + + MOZ_ASSERT(lhs->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + // Allocate temp registers + LDefinition tempReg = LDefinition::BogusTemp(); + switch (ins->simdOp()) { + case wasm::SimdOp::I64x2Mul: + tempReg = tempSimd128(); + break; + default: + break; + } + + if (isThreeOpAllowed()) { + // The non-destructive versions of instructions will be available + // when AVX is enabled. + LAllocation lhsAlloc = useRegisterAtStart(lhs); + auto* lir = new (alloc()) + LWasmBinarySimd128WithConstant(lhsAlloc, ins->rhs(), tempReg); + define(lir, ins); + } else { + // Always beneficial to reuse the lhs register here, see discussion in + // visitWasmBinarySimd128() and also code in specializeForConstantRhs(). + LAllocation lhsDestAlloc = useRegisterAtStart(lhs); + auto* lir = new (alloc()) + LWasmBinarySimd128WithConstant(lhsDestAlloc, ins->rhs(), tempReg); + defineReuseInput(lir, ins, LWasmBinarySimd128WithConstant::LhsDest); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MDefinition* lhs = ins->lhs(); + MDefinition* rhs = ins->rhs(); + + MOZ_ASSERT(lhs->type() == MIRType::Simd128); + MOZ_ASSERT(rhs->type() == MIRType::Int32); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + if (rhs->isConstant()) { + int32_t shiftCountMask; + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I8x16ShrS: + shiftCountMask = 7; + break; + case wasm::SimdOp::I16x8Shl: + case wasm::SimdOp::I16x8ShrU: + case wasm::SimdOp::I16x8ShrS: + shiftCountMask = 15; + break; + case wasm::SimdOp::I32x4Shl: + case wasm::SimdOp::I32x4ShrU: + case wasm::SimdOp::I32x4ShrS: + shiftCountMask = 31; + break; + case wasm::SimdOp::I64x2Shl: + case wasm::SimdOp::I64x2ShrU: + case wasm::SimdOp::I64x2ShrS: + shiftCountMask = 63; + break; + default: + MOZ_CRASH("Unexpected shift operation"); + } + + int32_t shiftCount = rhs->toConstant()->toInt32() & shiftCountMask; + if (shiftCount == shiftCountMask) { + // Check if possible to apply sign replication optimization. + // For some ops the input shall be reused. + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16ShrS: { + auto* lir = + new (alloc()) LWasmSignReplicationSimd128(useRegister(lhs)); + define(lir, ins); + return; + } + case wasm::SimdOp::I16x8ShrS: + case wasm::SimdOp::I32x4ShrS: + case wasm::SimdOp::I64x2ShrS: { + auto* lir = new (alloc()) + LWasmSignReplicationSimd128(useRegisterAtStart(lhs)); + if (isThreeOpAllowed()) { + define(lir, ins); + } else { + // For non-AVX, it is always beneficial to reuse the input. + defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src); + } + return; + } + default: + break; + } + } + +# ifdef DEBUG + js::wasm::ReportSimdAnalysis("shift -> constant shift"); +# endif + auto* lir = new (alloc()) + LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount); + if (isThreeOpAllowed()) { + define(lir, ins); + } else { + // For non-AVX, it is always beneficial to reuse the input. + defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src); + } + return; + } + +# ifdef DEBUG + js::wasm::ReportSimdAnalysis("shift -> variable shift"); +# endif + + LDefinition tempReg = LDefinition::BogusTemp(); + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrS: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I64x2ShrS: + tempReg = tempSimd128(); + break; + default: + break; + } + + // Reusing the input if possible is never detrimental. + LAllocation lhsDestAlloc = useRegisterAtStart(lhs); + LAllocation rhsAlloc = useRegisterAtStart(rhs); + auto* lir = + new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg); + defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsDest); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + SimdShuffle s = ins->shuffle(); + switch (s.opd) { + case SimdShuffle::Operand::LEFT: + case SimdShuffle::Operand::RIGHT: { + LAllocation src; + bool reuse = false; + switch (*s.permuteOp) { + case SimdPermuteOp::MOVE: + reuse = true; + break; + case SimdPermuteOp::BROADCAST_8x16: + case SimdPermuteOp::BROADCAST_16x8: + case SimdPermuteOp::PERMUTE_8x16: + case SimdPermuteOp::PERMUTE_16x8: + case SimdPermuteOp::PERMUTE_32x4: + case SimdPermuteOp::ROTATE_RIGHT_8x16: + case SimdPermuteOp::SHIFT_LEFT_8x16: + case SimdPermuteOp::SHIFT_RIGHT_8x16: + case SimdPermuteOp::REVERSE_16x8: + case SimdPermuteOp::REVERSE_32x4: + case SimdPermuteOp::REVERSE_64x2: + // No need to reuse registers when VEX instructions are enabled. + reuse = !Assembler::HasAVX(); + break; + default: + MOZ_CRASH("Unexpected operator"); + } + if (s.opd == SimdShuffle::Operand::LEFT) { + src = useRegisterAtStart(ins->lhs()); + } else { + src = useRegisterAtStart(ins->rhs()); + } + auto* lir = + new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control); + if (reuse) { + defineReuseInput(lir, ins, LWasmPermuteSimd128::Src); + } else { + define(lir, ins); + } + break; + } + case SimdShuffle::Operand::BOTH: + case SimdShuffle::Operand::BOTH_SWAPPED: { + LDefinition temp = LDefinition::BogusTemp(); + switch (*s.shuffleOp) { + case SimdShuffleOp::BLEND_8x16: + temp = Assembler::HasAVX() ? tempSimd128() : tempFixed(xmm0); + break; + default: + break; + } + if (isThreeOpAllowed()) { + LAllocation lhs; + LAllocation rhs; + if (s.opd == SimdShuffle::Operand::BOTH) { + lhs = useRegisterAtStart(ins->lhs()); + rhs = useRegisterAtStart(ins->rhs()); + } else { + lhs = useRegisterAtStart(ins->rhs()); + rhs = useRegisterAtStart(ins->lhs()); + } + auto* lir = new (alloc()) + LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control); + define(lir, ins); + } else { + LAllocation lhs; + LAllocation rhs; + if (s.opd == SimdShuffle::Operand::BOTH) { + lhs = useRegisterAtStart(ins->lhs()); + rhs = useRegister(ins->rhs()); + } else { + lhs = useRegisterAtStart(ins->rhs()); + rhs = useRegister(ins->lhs()); + } + auto* lir = new (alloc()) + LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control); + defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsDest); + } + break; + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + // If AVX support is disabled, the Masm API is (rhs, lhsDest) and requires + // AtStart+ReuseInput for the lhs. For type reasons, the rhs will never be + // the same as the lhs and is therefore a plain Use. + // + // If AVX support is enabled, useRegisterAtStart is preferred. + + if (ins->rhs()->type() == MIRType::Int64) { + if (isThreeOpAllowed()) { + auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128( + useRegisterAtStart(ins->lhs()), useInt64RegisterAtStart(ins->rhs())); + define(lir, ins); + } else { + auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128( + useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs())); + defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsDest); + } + } else { + if (isThreeOpAllowed()) { + auto* lir = new (alloc()) LWasmReplaceLaneSimd128( + useRegisterAtStart(ins->lhs()), useRegisterAtStart(ins->rhs())); + define(lir, ins); + } else { + auto* lir = new (alloc()) LWasmReplaceLaneSimd128( + useRegisterAtStart(ins->lhs()), useRegister(ins->rhs())); + defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsDest); + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + switch (ins->input()->type()) { + case MIRType::Int64: { + // 64-bit integer splats. + // Load-and-(sign|zero)extend. + auto* lir = new (alloc()) + LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input())); + define(lir, ins); + break; + } + case MIRType::Float32: + case MIRType::Double: { + // Floating-point splats. + // Ideally we save a move on SSE systems by reusing the input register, + // but since the input and output register types differ, we can't. + auto* lir = + new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input())); + define(lir, ins); + break; + } + default: { + // 32-bit integer splats. + auto* lir = + new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input())); + define(lir, ins); + break; + } + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) { +#ifdef ENABLE_WASM_SIMD + MOZ_ASSERT(ins->input()->type() == MIRType::Simd128); + MOZ_ASSERT(ins->type() == MIRType::Simd128); + + bool useAtStart = false; + bool reuseInput = false; + LDefinition tempReg = LDefinition::BogusTemp(); + switch (ins->simdOp()) { + case wasm::SimdOp::I8x16Neg: + case wasm::SimdOp::I16x8Neg: + case wasm::SimdOp::I32x4Neg: + case wasm::SimdOp::I64x2Neg: + case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S: + // Prefer src != dest to avoid an unconditional src->temp move. + MOZ_ASSERT(!reuseInput); + // If AVX is enabled, we prefer useRegisterAtStart. + useAtStart = isThreeOpAllowed(); + break; + case wasm::SimdOp::F32x4Neg: + case wasm::SimdOp::F64x2Neg: + case wasm::SimdOp::F32x4Abs: + case wasm::SimdOp::F64x2Abs: + case wasm::SimdOp::V128Not: + case wasm::SimdOp::F32x4Sqrt: + case wasm::SimdOp::F64x2Sqrt: + case wasm::SimdOp::I8x16Abs: + case wasm::SimdOp::I16x8Abs: + case wasm::SimdOp::I32x4Abs: + case wasm::SimdOp::I64x2Abs: + case wasm::SimdOp::I32x4TruncSatF32x4S: + case wasm::SimdOp::F32x4ConvertI32x4U: + case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U: + case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S: + case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U: + case wasm::SimdOp::I32x4RelaxedTruncF32x4S: + case wasm::SimdOp::I32x4RelaxedTruncF32x4U: + case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero: + case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero: + case wasm::SimdOp::I64x2ExtendHighI32x4S: + case wasm::SimdOp::I64x2ExtendHighI32x4U: + // Prefer src == dest to avoid an unconditional src->dest move + // for better performance in non-AVX mode (e.g. non-PSHUFD use). + useAtStart = true; + reuseInput = !isThreeOpAllowed(); + break; + case wasm::SimdOp::I32x4TruncSatF32x4U: + case wasm::SimdOp::I32x4TruncSatF64x2SZero: + case wasm::SimdOp::I32x4TruncSatF64x2UZero: + case wasm::SimdOp::I8x16Popcnt: + tempReg = tempSimd128(); + // Prefer src == dest to avoid an unconditional src->dest move + // in non-AVX mode. + useAtStart = true; + reuseInput = !isThreeOpAllowed(); + break; + case wasm::SimdOp::I16x8ExtendLowI8x16S: + case wasm::SimdOp::I16x8ExtendHighI8x16S: + case wasm::SimdOp::I16x8ExtendLowI8x16U: + case wasm::SimdOp::I16x8ExtendHighI8x16U: + case wasm::SimdOp::I32x4ExtendLowI16x8S: + case wasm::SimdOp::I32x4ExtendHighI16x8S: + case wasm::SimdOp::I32x4ExtendLowI16x8U: + case wasm::SimdOp::I32x4ExtendHighI16x8U: + case wasm::SimdOp::I64x2ExtendLowI32x4S: + case wasm::SimdOp::I64x2ExtendLowI32x4U: + case wasm::SimdOp::F32x4ConvertI32x4S: + case wasm::SimdOp::F32x4Ceil: + case wasm::SimdOp::F32x4Floor: + case wasm::SimdOp::F32x4Trunc: + case wasm::SimdOp::F32x4Nearest: + case wasm::SimdOp::F64x2Ceil: + case wasm::SimdOp::F64x2Floor: + case wasm::SimdOp::F64x2Trunc: + case wasm::SimdOp::F64x2Nearest: + case wasm::SimdOp::F32x4DemoteF64x2Zero: + case wasm::SimdOp::F64x2PromoteLowF32x4: + case wasm::SimdOp::F64x2ConvertLowI32x4S: + case wasm::SimdOp::F64x2ConvertLowI32x4U: + // Prefer src == dest to exert the lowest register pressure on the + // surrounding code. + useAtStart = true; + MOZ_ASSERT(!reuseInput); + break; + default: + MOZ_CRASH("Unary SimdOp not implemented"); + } + + LUse inputUse = + useAtStart ? useRegisterAtStart(ins->input()) : useRegister(ins->input()); + LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(inputUse, tempReg); + if (reuseInput) { + defineReuseInput(lir, ins, LWasmUnarySimd128::Src); + } else { + define(lir, ins); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + // A trick: On 32-bit systems, the base pointer is 32 bits (it was bounds + // checked and then chopped). On 64-bit systems, it can be 32 bits or 64 + // bits. Either way, it fits in a GPR so we can ignore the + // Register/Register64 distinction here. +# ifndef JS_64BIT + MOZ_ASSERT(ins->base()->type() == MIRType::Int32); +# endif + LUse base = useRegisterAtStart(ins->base()); + LUse inputUse = useRegisterAtStart(ins->value()); + LAllocation memoryBase = ins->hasMemoryBase() + ? useRegisterAtStart(ins->memoryBase()) + : LAllocation(); + LWasmLoadLaneSimd128* lir = new (alloc()) LWasmLoadLaneSimd128( + base, inputUse, LDefinition::BogusTemp(), memoryBase); + defineReuseInput(lir, ins, LWasmLoadLaneSimd128::Src); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + // See comment above. +# ifndef JS_64BIT + MOZ_ASSERT(ins->base()->type() == MIRType::Int32); +# endif + LUse base = useRegisterAtStart(ins->base()); + LUse input = useRegisterAtStart(ins->value()); + LAllocation memoryBase = ins->hasMemoryBase() + ? useRegisterAtStart(ins->memoryBase()) + : LAllocation(); + LWasmStoreLaneSimd128* lir = new (alloc()) + LWasmStoreLaneSimd128(base, input, LDefinition::BogusTemp(), memoryBase); + add(lir, ins); +#else + MOZ_CRASH("No SIMD"); +#endif +} + +#ifdef ENABLE_WASM_SIMD + +bool LIRGeneratorX86Shared::canFoldReduceSimd128AndBranch(wasm::SimdOp op) { + switch (op) { + case wasm::SimdOp::V128AnyTrue: + case wasm::SimdOp::I8x16AllTrue: + case wasm::SimdOp::I16x8AllTrue: + case wasm::SimdOp::I32x4AllTrue: + case wasm::SimdOp::I64x2AllTrue: + case wasm::SimdOp::I16x8Bitmask: + return true; + default: + return false; + } +} + +bool LIRGeneratorX86Shared::canEmitWasmReduceSimd128AtUses( + MWasmReduceSimd128* ins) { + if (!ins->canEmitAtUses()) { + return false; + } + // Only specific ops generating int32. + if (ins->type() != MIRType::Int32) { + return false; + } + if (!canFoldReduceSimd128AndBranch(ins->simdOp())) { + return false; + } + // If never used then defer (it will be removed). + MUseIterator iter(ins->usesBegin()); + if (iter == ins->usesEnd()) { + return true; + } + // We require an MTest consumer. + MNode* node = iter->consumer(); + if (!node->isDefinition() || !node->toDefinition()->isTest()) { + return false; + } + // Defer only if there's only one use. + iter++; + return iter == ins->usesEnd(); +} + +#endif // ENABLE_WASM_SIMD + +void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) { +#ifdef ENABLE_WASM_SIMD + if (canEmitWasmReduceSimd128AtUses(ins)) { + emitAtUses(ins); + return; + } + + // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer + // useRegisterAtStart: + // + // - In most cases, the input type differs from the output type, so there's no + // conflict and it doesn't really matter. + // + // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero + // code being generated. + // + // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register + // to be targeted lowers register pressure if it's the last use of the + // input. + + if (ins->type() == MIRType::Int64) { + auto* lir = new (alloc()) + LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input())); + defineInt64(lir, ins); + } else { + // Ideally we would reuse the input register for floating extract_lane if + // the lane is zero, but constraints in the register allocator require the + // input and output register types to be the same. + auto* lir = new (alloc()) LWasmReduceSimd128( + useRegisterAtStart(ins->input()), LDefinition::BogusTemp()); + define(lir, ins); + } +#else + MOZ_CRASH("No SIMD"); +#endif +} diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.h b/js/src/jit/x86-shared/Lowering-x86-shared.h new file mode 100644 index 0000000000..69d367270a --- /dev/null +++ b/js/src/jit/x86-shared/Lowering-x86-shared.h @@ -0,0 +1,78 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Lowering_x86_shared_h +#define jit_x86_shared_Lowering_x86_shared_h + +#include "jit/shared/Lowering-shared.h" + +namespace js { +namespace jit { + +class LIRGeneratorX86Shared : public LIRGeneratorShared { + protected: + LIRGeneratorX86Shared(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph) + : LIRGeneratorShared(gen, graph, lirGraph) {} + + LTableSwitch* newLTableSwitch(const LAllocation& in, + const LDefinition& inputCopy, + MTableSwitch* ins); + LTableSwitchV* newLTableSwitchV(MTableSwitch* ins); + + void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, + MDefinition* input); + void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + + template <size_t Temps> + void lowerForShiftInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); + + void lowerForCompareI64AndBranch(MTest* mir, MCompare* comp, JSOp op, + MDefinition* left, MDefinition* right, + MBasicBlock* ifTrue, MBasicBlock* ifFalse); + template <size_t Temps> + void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, + MDefinition* lhs, MDefinition* rhs); + void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir, + MDefinition* lhs, MDefinition* rhs); + void lowerNegI(MInstruction* ins, MDefinition* input); + void lowerNegI64(MInstruction* ins, MDefinition* input); + void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs); + void lowerDivI(MDiv* div); + void lowerModI(MMod* mod); + void lowerUDiv(MDiv* div); + void lowerUMod(MMod* mod); + void lowerUrshD(MUrsh* mir); + void lowerPowOfTwoI(MPow* mir); + void lowerWasmSelectI(MWasmSelect* select); + void lowerWasmSelectI64(MWasmSelect* select); + void lowerBigIntLsh(MBigIntLsh* ins); + void lowerBigIntRsh(MBigIntRsh* ins); + void lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32* ins); + void lowerTruncateDToInt32(MTruncateToInt32* ins); + void lowerTruncateFToInt32(MTruncateToInt32* ins); + void lowerCompareExchangeTypedArrayElement( + MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters); + void lowerAtomicExchangeTypedArrayElement( + MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters); + void lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins, + bool useI386ByteRegisters); + +#ifdef ENABLE_WASM_SIMD + bool isThreeOpAllowed() { return Assembler::HasAVX(); } + bool canFoldReduceSimd128AndBranch(wasm::SimdOp op); + bool canEmitWasmReduceSimd128AtUses(MWasmReduceSimd128* ins); +#endif +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Lowering_x86_shared_h */ diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp new file mode 100644 index 0000000000..51fa65e40d --- /dev/null +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp @@ -0,0 +1,1484 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/MacroAssembler.h" +#include "jit/x86-shared/MacroAssembler-x86-shared.h" + +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::DebugOnly; +using mozilla::FloatingPoint; +using mozilla::Maybe; +using mozilla::SpecificNaN; + +void MacroAssemblerX86Shared::splatX16(Register input, FloatRegister output) { + ScratchSimd128Scope scratch(asMasm()); + + vmovd(input, output); + if (HasAVX2()) { + vbroadcastb(Operand(output), output); + return; + } + vpxor(scratch, scratch, scratch); + vpshufb(scratch, output, output); +} + +void MacroAssemblerX86Shared::splatX8(Register input, FloatRegister output) { + vmovd(input, output); + if (HasAVX2()) { + vbroadcastw(Operand(output), output); + return; + } + vpshuflw(0, output, output); + vpshufd(0, output, output); +} + +void MacroAssemblerX86Shared::splatX4(Register input, FloatRegister output) { + vmovd(input, output); + if (HasAVX2()) { + vbroadcastd(Operand(output), output); + return; + } + vpshufd(0, output, output); +} + +void MacroAssemblerX86Shared::splatX4(FloatRegister input, + FloatRegister output) { + MOZ_ASSERT(input.isSingle() && output.isSimd128()); + if (HasAVX2()) { + vbroadcastss(Operand(input), output); + return; + } + input = asMasm().moveSimd128FloatIfNotAVX(input.asSimd128(), output); + vshufps(0, input, input, output); +} + +void MacroAssemblerX86Shared::splatX2(FloatRegister input, + FloatRegister output) { + MOZ_ASSERT(input.isDouble() && output.isSimd128()); + vmovddup(Operand(input.asSimd128()), output); +} + +void MacroAssemblerX86Shared::extractLaneInt32x4(FloatRegister input, + Register output, + unsigned lane) { + if (lane == 0) { + // The value we want to extract is in the low double-word + moveLowInt32(input, output); + } else { + vpextrd(lane, input, output); + } +} + +void MacroAssemblerX86Shared::extractLaneFloat32x4(FloatRegister input, + FloatRegister output, + unsigned lane) { + MOZ_ASSERT(input.isSimd128() && output.isSingle()); + if (lane == 0) { + // The value we want to extract is in the low double-word + if (input.asSingle() != output) { + moveFloat32(input, output); + } + } else if (lane == 2) { + moveHighPairToLowPairFloat32(input, output); + } else { + uint32_t mask = MacroAssembler::ComputeShuffleMask(lane); + FloatRegister dest = output.asSimd128(); + input = moveSimd128FloatIfNotAVX(input, dest); + vshufps(mask, input, input, dest); + } +} + +void MacroAssemblerX86Shared::extractLaneFloat64x2(FloatRegister input, + FloatRegister output, + unsigned lane) { + MOZ_ASSERT(input.isSimd128() && output.isDouble()); + if (lane == 0) { + // The value we want to extract is in the low quadword + if (input.asDouble() != output) { + moveDouble(input, output); + } + } else { + vpalignr(Operand(input), output, output, 8); + } +} + +void MacroAssemblerX86Shared::extractLaneInt16x8(FloatRegister input, + Register output, unsigned lane, + SimdSign sign) { + vpextrw(lane, input, Operand(output)); + if (sign == SimdSign::Signed) { + movswl(output, output); + } +} + +void MacroAssemblerX86Shared::extractLaneInt8x16(FloatRegister input, + Register output, unsigned lane, + SimdSign sign) { + vpextrb(lane, input, Operand(output)); + if (sign == SimdSign::Signed) { + if (!AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(output)) { + xchgl(eax, output); + movsbl(eax, eax); + xchgl(eax, output); + } else { + movsbl(output, output); + } + } +} + +void MacroAssemblerX86Shared::replaceLaneFloat32x4(unsigned lane, + FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + MOZ_ASSERT(lhs.isSimd128() && rhs.isSingle()); + + if (lane == 0) { + if (rhs.asSimd128() == lhs) { + // no-op, although this should not normally happen for type checking + // reasons higher up in the stack. + moveSimd128Float(lhs, dest); + } else { + // move low dword of value into low dword of output + vmovss(rhs, lhs, dest); + } + } else { + vinsertps(vinsertpsMask(0, lane), rhs, lhs, dest); + } +} + +void MacroAssemblerX86Shared::replaceLaneFloat64x2(unsigned lane, + FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + MOZ_ASSERT(lhs.isSimd128() && rhs.isDouble()); + + if (lane == 0) { + if (rhs.asSimd128() == lhs) { + // no-op, although this should not normally happen for type checking + // reasons higher up in the stack. + moveSimd128Float(lhs, dest); + } else { + // move low qword of value into low qword of output + vmovsd(rhs, lhs, dest); + } + } else { + // move low qword of value into high qword of output + vshufpd(0, rhs, lhs, dest); + } +} + +void MacroAssemblerX86Shared::blendInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister output, + FloatRegister temp, + const uint8_t lanes[16]) { + asMasm().loadConstantSimd128Int( + SimdConstant::CreateX16(reinterpret_cast<const int8_t*>(lanes)), temp); + vpblendvb(temp, rhs, lhs, output); +} + +void MacroAssemblerX86Shared::blendInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister output, + const uint16_t lanes[8]) { + uint32_t mask = 0; + for (unsigned i = 0; i < 8; i++) { + if (lanes[i]) { + mask |= (1 << i); + } + } + vpblendw(mask, rhs, lhs, output); +} + +void MacroAssemblerX86Shared::laneSelectSimd128(FloatRegister mask, + FloatRegister lhs, + FloatRegister rhs, + FloatRegister output) { + vpblendvb(mask, lhs, rhs, output); +} + +void MacroAssemblerX86Shared::shuffleInt8x16(FloatRegister lhs, + FloatRegister rhs, + FloatRegister output, + const uint8_t lanes[16]) { + ScratchSimd128Scope scratch(asMasm()); + + // Use pshufb instructions to gather the lanes from each source vector. + // A negative index creates a zero lane, so the two vectors can be combined. + + // Set scratch = lanes from rhs. + int8_t idx[16]; + for (unsigned i = 0; i < 16; i++) { + idx[i] = lanes[i] >= 16 ? lanes[i] - 16 : -1; + } + rhs = moveSimd128IntIfNotAVX(rhs, scratch); + asMasm().vpshufbSimd128(SimdConstant::CreateX16(idx), rhs, scratch); + + // Set output = lanes from lhs. + for (unsigned i = 0; i < 16; i++) { + idx[i] = lanes[i] < 16 ? lanes[i] : -1; + } + lhs = moveSimd128IntIfNotAVX(lhs, output); + asMasm().vpshufbSimd128(SimdConstant::CreateX16(idx), lhs, output); + + // Combine. + vpor(scratch, output, output); +} + +static inline FloatRegister ToSimdFloatRegister(const Operand& op) { + return FloatRegister(op.fpu(), FloatRegister::Codes::ContentType::Simd128); +} + +void MacroAssemblerX86Shared::compareInt8x16(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, + FloatRegister output) { + switch (cond) { + case Assembler::Condition::GreaterThan: + vpcmpgtb(rhs, lhs, output); + break; + case Assembler::Condition::Equal: + vpcmpeqb(rhs, lhs, output); + break; + case Assembler::Condition::LessThan: { + ScratchSimd128Scope scratch(asMasm()); + if (lhs == output) { + moveSimd128Int(lhs, scratch); + lhs = scratch; + } + if (rhs.kind() == Operand::FPREG) { + moveSimd128Int(ToSimdFloatRegister(rhs), output); + } else { + loadAlignedSimd128Int(rhs, output); + } + vpcmpgtb(Operand(lhs), output, output); + break; + } + case Assembler::Condition::NotEqual: + vpcmpeqb(rhs, lhs, output); + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Condition::GreaterThanOrEqual: { + ScratchSimd128Scope scratch(asMasm()); + if (lhs == output) { + moveSimd128Int(lhs, scratch); + lhs = scratch; + } + if (rhs.kind() == Operand::FPREG) { + moveSimd128Int(ToSimdFloatRegister(rhs), output); + } else { + loadAlignedSimd128Int(rhs, output); + } + vpcmpgtb(Operand(lhs), output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Condition::LessThanOrEqual: + // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here. + vpcmpgtb(rhs, lhs, output); + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Above: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpminub(rhs, lhs, output); + vpcmpeqb(Operand(lhs), output, output); + } else { + vpmaxub(rhs, lhs, output); + vpcmpeqb(rhs, output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::BelowOrEqual: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpminub(rhs, lhs, output); + vpcmpeqb(Operand(lhs), output, output); + } else { + vpmaxub(rhs, lhs, output); + vpcmpeqb(rhs, output, output); + } + break; + case Assembler::Below: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpmaxub(rhs, lhs, output); + vpcmpeqb(Operand(lhs), output, output); + } else { + vpminub(rhs, lhs, output); + vpcmpeqb(rhs, output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::AboveOrEqual: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpmaxub(rhs, lhs, output); + vpcmpeqb(Operand(lhs), output, output); + } else { + vpminub(rhs, lhs, output); + vpcmpeqb(rhs, output, output); + } + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareInt8x16(Assembler::Condition cond, + FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + bool complement = false; + switch (cond) { + case Assembler::Condition::NotEqual: + complement = true; + [[fallthrough]]; + case Assembler::Condition::Equal: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpcmpeqb, + &MacroAssembler::vpcmpeqbSimd128); + break; + case Assembler::Condition::LessThanOrEqual: + complement = true; + [[fallthrough]]; + case Assembler::Condition::GreaterThan: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpcmpgtb, + &MacroAssembler::vpcmpgtbSimd128); + break; + default: + MOZ_CRASH("unexpected condition op"); + } + if (complement) { + asMasm().bitwiseXorSimd128(dest, SimdConstant::SplatX16(-1), dest); + } +} + +void MacroAssemblerX86Shared::compareInt16x8(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, + FloatRegister output) { + switch (cond) { + case Assembler::Condition::GreaterThan: + vpcmpgtw(rhs, lhs, output); + break; + case Assembler::Condition::Equal: + vpcmpeqw(rhs, lhs, output); + break; + case Assembler::Condition::LessThan: { + ScratchSimd128Scope scratch(asMasm()); + if (lhs == output) { + moveSimd128Int(lhs, scratch); + lhs = scratch; + } + if (rhs.kind() == Operand::FPREG) { + moveSimd128Int(ToSimdFloatRegister(rhs), output); + } else { + loadAlignedSimd128Int(rhs, output); + } + vpcmpgtw(Operand(lhs), output, output); + break; + } + case Assembler::Condition::NotEqual: + vpcmpeqw(rhs, lhs, output); + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Condition::GreaterThanOrEqual: { + ScratchSimd128Scope scratch(asMasm()); + if (lhs == output) { + moveSimd128Int(lhs, scratch); + lhs = scratch; + } + if (rhs.kind() == Operand::FPREG) { + moveSimd128Int(ToSimdFloatRegister(rhs), output); + } else { + loadAlignedSimd128Int(rhs, output); + } + vpcmpgtw(Operand(lhs), output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Condition::LessThanOrEqual: + // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here. + vpcmpgtw(rhs, lhs, output); + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Above: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpminuw(rhs, lhs, output); + vpcmpeqw(Operand(lhs), output, output); + } else { + vpmaxuw(rhs, lhs, output); + vpcmpeqw(rhs, output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::BelowOrEqual: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpminuw(rhs, lhs, output); + vpcmpeqw(Operand(lhs), output, output); + } else { + vpmaxuw(rhs, lhs, output); + vpcmpeqw(rhs, output, output); + } + break; + case Assembler::Below: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpmaxuw(rhs, lhs, output); + vpcmpeqw(Operand(lhs), output, output); + } else { + vpminuw(rhs, lhs, output); + vpcmpeqw(rhs, output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::AboveOrEqual: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpmaxuw(rhs, lhs, output); + vpcmpeqw(Operand(lhs), output, output); + } else { + vpminuw(rhs, lhs, output); + vpcmpeqw(rhs, output, output); + } + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareInt16x8(Assembler::Condition cond, + FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + bool complement = false; + switch (cond) { + case Assembler::Condition::NotEqual: + complement = true; + [[fallthrough]]; + case Assembler::Condition::Equal: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpcmpeqw, + &MacroAssembler::vpcmpeqwSimd128); + break; + case Assembler::Condition::LessThanOrEqual: + complement = true; + [[fallthrough]]; + case Assembler::Condition::GreaterThan: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpcmpgtw, + &MacroAssembler::vpcmpgtwSimd128); + break; + default: + MOZ_CRASH("unexpected condition op"); + } + if (complement) { + asMasm().bitwiseXorSimd128(dest, SimdConstant::SplatX16(-1), dest); + } +} + +void MacroAssemblerX86Shared::compareInt32x4(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, + FloatRegister output) { + switch (cond) { + case Assembler::Condition::GreaterThan: + vpcmpgtd(rhs, lhs, output); + break; + case Assembler::Condition::Equal: + vpcmpeqd(rhs, lhs, output); + break; + case Assembler::Condition::LessThan: { + ScratchSimd128Scope scratch(asMasm()); + if (lhs == output) { + moveSimd128Int(lhs, scratch); + lhs = scratch; + } + if (rhs.kind() == Operand::FPREG) { + moveSimd128Int(ToSimdFloatRegister(rhs), output); + } else { + loadAlignedSimd128Int(rhs, output); + } + vpcmpgtd(Operand(lhs), output, output); + break; + } + case Assembler::Condition::NotEqual: + vpcmpeqd(rhs, lhs, output); + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Condition::GreaterThanOrEqual: { + ScratchSimd128Scope scratch(asMasm()); + if (lhs == output) { + moveSimd128Int(lhs, scratch); + lhs = scratch; + } + if (rhs.kind() == Operand::FPREG) { + moveSimd128Int(ToSimdFloatRegister(rhs), output); + } else { + loadAlignedSimd128Int(rhs, output); + } + vpcmpgtd(Operand(lhs), output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Condition::LessThanOrEqual: + // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here. + vpcmpgtd(rhs, lhs, output); + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::Above: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpminud(rhs, lhs, output); + vpcmpeqd(Operand(lhs), output, output); + } else { + vpmaxud(rhs, lhs, output); + vpcmpeqd(rhs, output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::BelowOrEqual: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpminud(rhs, lhs, output); + vpcmpeqd(Operand(lhs), output, output); + } else { + vpmaxud(rhs, lhs, output); + vpcmpeqd(rhs, output, output); + } + break; + case Assembler::Below: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpmaxud(rhs, lhs, output); + vpcmpeqd(Operand(lhs), output, output); + } else { + vpminud(rhs, lhs, output); + vpcmpeqd(rhs, output, output); + } + asMasm().bitwiseNotSimd128(output, output); + break; + case Assembler::AboveOrEqual: + if (rhs.kind() == Operand::FPREG && ToSimdFloatRegister(rhs) == output) { + vpmaxud(rhs, lhs, output); + vpcmpeqd(Operand(lhs), output, output); + } else { + vpminud(rhs, lhs, output); + vpcmpeqd(rhs, output, output); + } + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareInt32x4(Assembler::Condition cond, + FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + bool complement = false; + switch (cond) { + case Assembler::Condition::NotEqual: + complement = true; + [[fallthrough]]; + case Assembler::Condition::Equal: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpcmpeqd, + &MacroAssembler::vpcmpeqdSimd128); + break; + case Assembler::Condition::LessThanOrEqual: + complement = true; + [[fallthrough]]; + case Assembler::Condition::GreaterThan: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpcmpgtd, + &MacroAssembler::vpcmpgtdSimd128); + break; + default: + MOZ_CRASH("unexpected condition op"); + } + if (complement) { + asMasm().bitwiseXorSimd128(dest, SimdConstant::SplatX16(-1), dest); + } +} + +void MacroAssemblerX86Shared::compareForEqualityInt64x2( + FloatRegister lhs, Operand rhs, Assembler::Condition cond, + FloatRegister output) { + static const SimdConstant allOnes = SimdConstant::SplatX4(-1); + switch (cond) { + case Assembler::Condition::Equal: + vpcmpeqq(rhs, lhs, output); + break; + case Assembler::Condition::NotEqual: + vpcmpeqq(rhs, lhs, output); + asMasm().bitwiseXorSimd128(output, allOnes, output); + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareForOrderingInt64x2( + FloatRegister lhs, Operand rhs, Assembler::Condition cond, + FloatRegister temp1, FloatRegister temp2, FloatRegister output) { + static const SimdConstant allOnes = SimdConstant::SplatX4(-1); + // The pseudo code is for (e.g. > comparison): + // __m128i pcmpgtq_sse2 (__m128i a, __m128i b) { + // __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + // r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + // return _mm_shuffle_epi32(r, _MM_SHUFFLE(3,3,1,1)); + // } + // Credits to https://stackoverflow.com/a/65175746 + switch (cond) { + case Assembler::Condition::GreaterThan: + vmovdqa(rhs, temp1); + vmovdqa(Operand(lhs), temp2); + vpsubq(Operand(lhs), temp1, temp1); + vpcmpeqd(rhs, temp2, temp2); + vandpd(temp2, temp1, temp1); + lhs = asMasm().moveSimd128IntIfNotAVX(lhs, output); + vpcmpgtd(rhs, lhs, output); + vpor(Operand(temp1), output, output); + vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output); + break; + case Assembler::Condition::LessThan: + vmovdqa(rhs, temp1); + vmovdqa(Operand(lhs), temp2); + vpcmpgtd(Operand(lhs), temp1, temp1); + vpcmpeqd(Operand(rhs), temp2, temp2); + lhs = asMasm().moveSimd128IntIfNotAVX(lhs, output); + vpsubq(rhs, lhs, output); + vandpd(temp2, output, output); + vpor(Operand(temp1), output, output); + vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output); + break; + case Assembler::Condition::GreaterThanOrEqual: + vmovdqa(rhs, temp1); + vmovdqa(Operand(lhs), temp2); + vpcmpgtd(Operand(lhs), temp1, temp1); + vpcmpeqd(Operand(rhs), temp2, temp2); + lhs = asMasm().moveSimd128IntIfNotAVX(lhs, output); + vpsubq(rhs, lhs, output); + vandpd(temp2, output, output); + vpor(Operand(temp1), output, output); + vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output); + asMasm().bitwiseXorSimd128(output, allOnes, output); + break; + case Assembler::Condition::LessThanOrEqual: + vmovdqa(rhs, temp1); + vmovdqa(Operand(lhs), temp2); + vpsubq(Operand(lhs), temp1, temp1); + vpcmpeqd(rhs, temp2, temp2); + vandpd(temp2, temp1, temp1); + lhs = asMasm().moveSimd128IntIfNotAVX(lhs, output); + vpcmpgtd(rhs, lhs, output); + vpor(Operand(temp1), output, output); + vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output); + asMasm().bitwiseXorSimd128(output, allOnes, output); + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareForOrderingInt64x2AVX( + FloatRegister lhs, FloatRegister rhs, Assembler::Condition cond, + FloatRegister output) { + MOZ_ASSERT(HasSSE42()); + static const SimdConstant allOnes = SimdConstant::SplatX4(-1); + switch (cond) { + case Assembler::Condition::GreaterThan: + vpcmpgtq(Operand(rhs), lhs, output); + break; + case Assembler::Condition::LessThan: + vpcmpgtq(Operand(lhs), rhs, output); + break; + case Assembler::Condition::GreaterThanOrEqual: + vpcmpgtq(Operand(lhs), rhs, output); + asMasm().bitwiseXorSimd128(output, allOnes, output); + break; + case Assembler::Condition::LessThanOrEqual: + vpcmpgtq(Operand(rhs), lhs, output); + asMasm().bitwiseXorSimd128(output, allOnes, output); + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareFloat32x4(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, + FloatRegister output) { + // TODO Can do better here with three-address compares + + // Move lhs to output if lhs!=output; move rhs out of the way if rhs==output. + // This is bad, but Ion does not need this fixup. + ScratchSimd128Scope scratch(asMasm()); + if (!HasAVX() && !lhs.aliases(output)) { + if (rhs.kind() == Operand::FPREG && + output.aliases(FloatRegister::FromCode(rhs.fpu()))) { + vmovaps(rhs, scratch); + rhs = Operand(scratch); + } + vmovaps(lhs, output); + lhs = output; + } + + switch (cond) { + case Assembler::Condition::Equal: + vcmpeqps(rhs, lhs, output); + break; + case Assembler::Condition::LessThan: + vcmpltps(rhs, lhs, output); + break; + case Assembler::Condition::LessThanOrEqual: + vcmpleps(rhs, lhs, output); + break; + case Assembler::Condition::NotEqual: + vcmpneqps(rhs, lhs, output); + break; + case Assembler::Condition::GreaterThanOrEqual: + case Assembler::Condition::GreaterThan: + // We reverse these operations in the -inl.h file so that we don't have to + // copy into and out of temporaries after codegen. + MOZ_CRASH("should have reversed this"); + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareFloat32x4(Assembler::Condition cond, + FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + switch (cond) { + case Assembler::Condition::Equal: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vcmpeqps, + &MacroAssembler::vcmpeqpsSimd128); + break; + case Assembler::Condition::LessThan: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vcmpltps, + &MacroAssembler::vcmpltpsSimd128); + break; + case Assembler::Condition::LessThanOrEqual: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vcmpleps, + &MacroAssembler::vcmplepsSimd128); + break; + case Assembler::Condition::NotEqual: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vcmpneqps, + &MacroAssembler::vcmpneqpsSimd128); + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareFloat64x2(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, + FloatRegister output) { + // TODO Can do better here with three-address compares + + // Move lhs to output if lhs!=output; move rhs out of the way if rhs==output. + // This is bad, but Ion does not need this fixup. + ScratchSimd128Scope scratch(asMasm()); + if (!HasAVX() && !lhs.aliases(output)) { + if (rhs.kind() == Operand::FPREG && + output.aliases(FloatRegister::FromCode(rhs.fpu()))) { + vmovapd(rhs, scratch); + rhs = Operand(scratch); + } + vmovapd(lhs, output); + lhs = output; + } + + switch (cond) { + case Assembler::Condition::Equal: + vcmpeqpd(rhs, lhs, output); + break; + case Assembler::Condition::LessThan: + vcmpltpd(rhs, lhs, output); + break; + case Assembler::Condition::LessThanOrEqual: + vcmplepd(rhs, lhs, output); + break; + case Assembler::Condition::NotEqual: + vcmpneqpd(rhs, lhs, output); + break; + case Assembler::Condition::GreaterThanOrEqual: + case Assembler::Condition::GreaterThan: + // We reverse these operations in the -inl.h file so that we don't have to + // copy into and out of temporaries after codegen. + MOZ_CRASH("should have reversed this"); + default: + MOZ_CRASH("unexpected condition op"); + } +} + +void MacroAssemblerX86Shared::compareFloat64x2(Assembler::Condition cond, + FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + switch (cond) { + case Assembler::Condition::Equal: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vcmpeqpd, + &MacroAssembler::vcmpeqpdSimd128); + break; + case Assembler::Condition::LessThan: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vcmpltpd, + &MacroAssembler::vcmpltpdSimd128); + break; + case Assembler::Condition::LessThanOrEqual: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vcmplepd, + &MacroAssembler::vcmplepdSimd128); + break; + case Assembler::Condition::NotEqual: + binarySimd128(lhs, rhs, dest, &MacroAssembler::vcmpneqpd, + &MacroAssembler::vcmpneqpdSimd128); + break; + default: + MOZ_CRASH("unexpected condition op"); + } +} + +// Semantics of wasm max and min. +// +// * -0 < 0 +// * If one input is NaN then that NaN is the output +// * If both inputs are NaN then the output is selected nondeterministically +// * Any returned NaN is always made quiet +// * The MVP spec 2.2.3 says "No distinction is made between signalling and +// quiet NaNs", suggesting SNaN inputs are allowed and should not fault +// +// Semantics of maxps/minps/maxpd/minpd: +// +// * If the values are both +/-0 the rhs is returned +// * If the rhs is SNaN then the rhs is returned +// * If either value is NaN then the rhs is returned +// * An SNaN operand does not appear to give rise to an exception, at least +// not in the JS shell on Linux, though the Intel spec lists Invalid +// as one of the possible exceptions + +// Various unaddressed considerations: +// +// It's pretty insane for this to take an Operand rhs - it really needs to be +// a register, given the number of times we access it. +// +// Constant load can be folded into the ANDPS. Do we care? It won't save us +// any registers, since output/temp1/temp2/scratch are all live at the same time +// after the first instruction of the slow path. +// +// Can we use blend for the NaN extraction/insertion? We'd need xmm0 for the +// mask, which is no fun. But it would be lhs UNORD lhs -> mask, blend; +// rhs UNORD rhs -> mask; blend. Better than the mess we have below. But +// we'd still need to setup the QNaN bits, unless we can blend those too +// with the lhs UNORD rhs mask? +// +// If we could determine that both input lanes are NaN then the result of the +// fast path should be fine modulo the QNaN bits, but it's not obvious this is +// much of an advantage. + +void MacroAssemblerX86Shared::minMaxFloat32x4(bool isMin, FloatRegister lhs, + Operand rhs, FloatRegister temp1, + FloatRegister temp2, + FloatRegister output) { + ScratchSimd128Scope scratch(asMasm()); + Label l; + SimdConstant quietBits(SimdConstant::SplatX4(int32_t(0x00400000))); + + /* clang-format off */ /* leave my comments alone */ + lhs = moveSimd128FloatIfNotAVXOrOther(lhs, scratch, output); + if (isMin) { + vmovaps(lhs, output); // compute + vminps(rhs, output, output); // min lhs, rhs + vmovaps(rhs, temp1); // compute + vminps(Operand(lhs), temp1, temp1); // min rhs, lhs + vorps(temp1, output, output); // fix min(-0, 0) with OR + } else { + vmovaps(lhs, output); // compute + vmaxps(rhs, output, output); // max lhs, rhs + vmovaps(rhs, temp1); // compute + vmaxps(Operand(lhs), temp1, temp1); // max rhs, lhs + vandps(temp1, output, output); // fix max(-0, 0) with AND + } + vmovaps(lhs, temp1); // compute + vcmpunordps(rhs, temp1, temp1); // lhs UNORD rhs + vptest(temp1, temp1); // check if any unordered + j(Assembler::Equal, &l); // and exit if not + + // Slow path. + // output has result for non-NaN lanes, garbage in NaN lanes. + // temp1 has lhs UNORD rhs. + // temp2 is dead. + + vmovaps(temp1, temp2); // clear NaN lanes of result + vpandn(output, temp2, temp2); // result now in temp2 + asMasm().vpandSimd128(quietBits, temp1, temp1); // setup QNaN bits in NaN lanes + vorps(temp1, temp2, temp2); // and OR into result + vmovaps(lhs, temp1); // find NaN lanes + vcmpunordps(Operand(temp1), temp1, temp1); // in lhs + vmovaps(temp1, output); // (and save them for later) + vandps(lhs, temp1, temp1); // and extract the NaNs + vorps(temp1, temp2, temp2); // and add to the result + vmovaps(rhs, temp1); // find NaN lanes + vcmpunordps(Operand(temp1), temp1, temp1); // in rhs + vpandn(temp1, output, output); // except if they were in lhs + vandps(rhs, output, output); // and extract the NaNs + vorps(temp2, output, output); // and add to the result + + bind(&l); + /* clang-format on */ +} + +void MacroAssemblerX86Shared::minMaxFloat32x4AVX(bool isMin, FloatRegister lhs, + FloatRegister rhs, + FloatRegister temp1, + FloatRegister temp2, + FloatRegister output) { + ScratchSimd128Scope scratch(asMasm()); + Label l; + SimdConstant quietBits(SimdConstant::SplatX4(int32_t(0x00400000))); + + /* clang-format off */ /* leave my comments alone */ + FloatRegister lhsCopy = moveSimd128FloatIfEqual(lhs, scratch, output); + // Allow rhs be assigned to scratch when rhs == lhs and == output -- + // don't make a special case since the semantics require setup QNaN bits. + FloatRegister rhsCopy = moveSimd128FloatIfEqual(rhs, scratch, output); + if (isMin) { + vminps(Operand(rhs), lhs, temp2); // min lhs, rhs + vminps(Operand(lhs), rhs, temp1); // min rhs, lhs + vorps(temp1, temp2, output); // fix min(-0, 0) with OR + } else { + vmaxps(Operand(rhs), lhs, temp2); // max lhs, rhs + vmaxps(Operand(lhs), rhs, temp1); // max rhs, lhs + vandps(temp1, temp2, output); // fix max(-0, 0) with AND + } + vcmpunordps(Operand(rhsCopy), lhsCopy, temp1); // lhs UNORD rhs + vptest(temp1, temp1); // check if any unordered + j(Assembler::Equal, &l); // and exit if not + + // Slow path. + // output has result for non-NaN lanes, garbage in NaN lanes. + // temp1 has lhs UNORD rhs. + // temp2 is dead. + vcmpunordps(Operand(lhsCopy), lhsCopy, temp2); // find NaN lanes in lhs + vblendvps(temp2, lhsCopy, rhsCopy, temp2); // add other lines from rhs + asMasm().vporSimd128(quietBits, temp2, temp2); // setup QNaN bits in NaN lanes + vblendvps(temp1, temp2, output, output); // replace NaN lines from temp2 + + bind(&l); + /* clang-format on */ +} + +// Exactly as above. +void MacroAssemblerX86Shared::minMaxFloat64x2(bool isMin, FloatRegister lhs, + Operand rhs, FloatRegister temp1, + FloatRegister temp2, + FloatRegister output) { + ScratchSimd128Scope scratch(asMasm()); + Label l; + SimdConstant quietBits(SimdConstant::SplatX2(int64_t(0x0008000000000000ull))); + + /* clang-format off */ /* leave my comments alone */ + lhs = moveSimd128FloatIfNotAVXOrOther(lhs, scratch, output); + if (isMin) { + vmovapd(lhs, output); // compute + vminpd(rhs, output, output); // min lhs, rhs + vmovapd(rhs, temp1); // compute + vminpd(Operand(lhs), temp1, temp1); // min rhs, lhs + vorpd(temp1, output, output); // fix min(-0, 0) with OR + } else { + vmovapd(lhs, output); // compute + vmaxpd(rhs, output, output); // max lhs, rhs + vmovapd(rhs, temp1); // compute + vmaxpd(Operand(lhs), temp1, temp1); // max rhs, lhs + vandpd(temp1, output, output); // fix max(-0, 0) with AND + } + vmovapd(lhs, temp1); // compute + vcmpunordpd(rhs, temp1, temp1); // lhs UNORD rhs + vptest(temp1, temp1); // check if any unordered + j(Assembler::Equal, &l); // and exit if not + + // Slow path. + // output has result for non-NaN lanes, garbage in NaN lanes. + // temp1 has lhs UNORD rhs. + // temp2 is dead. + + vmovapd(temp1, temp2); // clear NaN lanes of result + vpandn(output, temp2, temp2); // result now in temp2 + asMasm().vpandSimd128(quietBits, temp1, temp1); // setup QNaN bits in NaN lanes + vorpd(temp1, temp2, temp2); // and OR into result + vmovapd(lhs, temp1); // find NaN lanes + vcmpunordpd(Operand(temp1), temp1, temp1); // in lhs + vmovapd(temp1, output); // (and save them for later) + vandpd(lhs, temp1, temp1); // and extract the NaNs + vorpd(temp1, temp2, temp2); // and add to the result + vmovapd(rhs, temp1); // find NaN lanes + vcmpunordpd(Operand(temp1), temp1, temp1); // in rhs + vpandn(temp1, output, output); // except if they were in lhs + vandpd(rhs, output, output); // and extract the NaNs + vorpd(temp2, output, output); // and add to the result + + bind(&l); + /* clang-format on */ +} + +void MacroAssemblerX86Shared::minMaxFloat64x2AVX(bool isMin, FloatRegister lhs, + FloatRegister rhs, + FloatRegister temp1, + FloatRegister temp2, + FloatRegister output) { + ScratchSimd128Scope scratch(asMasm()); + Label l; + SimdConstant quietBits(SimdConstant::SplatX2(int64_t(0x0008000000000000ull))); + + /* clang-format off */ /* leave my comments alone */ + FloatRegister lhsCopy = moveSimd128FloatIfEqual(lhs, scratch, output); + // Allow rhs be assigned to scratch when rhs == lhs and == output -- + // don't make a special case since the semantics require setup QNaN bits. + FloatRegister rhsCopy = moveSimd128FloatIfEqual(rhs, scratch, output); + if (isMin) { + vminpd(Operand(rhs), lhs, temp2); // min lhs, rhs + vminpd(Operand(lhs), rhs, temp1); // min rhs, lhs + vorpd(temp1, temp2, output); // fix min(-0, 0) with OR + } else { + vmaxpd(Operand(rhs), lhs, temp2); // max lhs, rhs + vmaxpd(Operand(lhs), rhs, temp1); // max rhs, lhs + vandpd(temp1, temp2, output); // fix max(-0, 0) with AND + } + vcmpunordpd(Operand(rhsCopy), lhsCopy, temp1); // lhs UNORD rhs + vptest(temp1, temp1); // check if any unordered + j(Assembler::Equal, &l); // and exit if not + + // Slow path. + // output has result for non-NaN lanes, garbage in NaN lanes. + // temp1 has lhs UNORD rhs. + // temp2 is dead. + vcmpunordpd(Operand(lhsCopy), lhsCopy, temp2); // find NaN lanes in lhs + vblendvpd(temp2, lhsCopy, rhsCopy, temp2); // add other lines from rhs + asMasm().vporSimd128(quietBits, temp2, temp2); // setup QNaN bits in NaN lanes + vblendvpd(temp1, temp2, output, output); // replace NaN lines from temp2 + + bind(&l); + /* clang-format on */ +} + +void MacroAssemblerX86Shared::minFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister temp1, + FloatRegister temp2, + FloatRegister output) { + if (HasAVX()) { + minMaxFloat32x4AVX(/*isMin=*/true, lhs, rhs, temp1, temp2, output); + return; + } + minMaxFloat32x4(/*isMin=*/true, lhs, Operand(rhs), temp1, temp2, output); +} + +void MacroAssemblerX86Shared::maxFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister temp1, + FloatRegister temp2, + FloatRegister output) { + if (HasAVX()) { + minMaxFloat32x4AVX(/*isMin=*/false, lhs, rhs, temp1, temp2, output); + return; + } + minMaxFloat32x4(/*isMin=*/false, lhs, Operand(rhs), temp1, temp2, output); +} + +void MacroAssemblerX86Shared::minFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister temp1, + FloatRegister temp2, + FloatRegister output) { + if (HasAVX()) { + minMaxFloat64x2AVX(/*isMin=*/true, lhs, rhs, temp1, temp2, output); + return; + } + minMaxFloat64x2(/*isMin=*/true, lhs, Operand(rhs), temp1, temp2, output); +} + +void MacroAssemblerX86Shared::maxFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister temp1, + FloatRegister temp2, + FloatRegister output) { + if (HasAVX()) { + minMaxFloat64x2AVX(/*isMin=*/false, lhs, rhs, temp1, temp2, output); + return; + } + minMaxFloat64x2(/*isMin=*/false, lhs, Operand(rhs), temp1, temp2, output); +} + +void MacroAssemblerX86Shared::packedShiftByScalarInt8x16( + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest, + void (MacroAssemblerX86Shared::*shift)(FloatRegister, FloatRegister, + FloatRegister), + void (MacroAssemblerX86Shared::*extend)(const Operand&, FloatRegister)) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + + // High bytes + vpalignr(Operand(in), xtmp, xtmp, 8); + (this->*extend)(Operand(xtmp), xtmp); + (this->*shift)(scratch, xtmp, xtmp); + + // Low bytes + (this->*extend)(Operand(dest), dest); + (this->*shift)(scratch, dest, dest); + + // Mask off garbage to avoid saturation during packing + asMasm().loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x00FF00FF)), + scratch); + vpand(Operand(scratch), xtmp, xtmp); + vpand(Operand(scratch), dest, dest); + + vpackuswb(Operand(xtmp), dest, dest); +} + +void MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16( + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest) { + packedShiftByScalarInt8x16(in, count, xtmp, dest, + &MacroAssemblerX86Shared::vpsllw, + &MacroAssemblerX86Shared::vpmovzxbw); +} + +void MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16( + Imm32 count, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(count.value <= 7); + if (MOZ_UNLIKELY(count.value == 0)) { + moveSimd128Int(src, dest); + return; + } + src = asMasm().moveSimd128IntIfNotAVX(src, dest); + // Use the doubling trick for low shift counts, otherwise mask off the bits + // that are shifted out of the low byte of each word and use word shifts. The + // optimal cutoff remains to be explored. + if (count.value <= 3) { + vpaddb(Operand(src), src, dest); + for (int32_t shift = count.value - 1; shift > 0; --shift) { + vpaddb(Operand(dest), dest, dest); + } + } else { + asMasm().bitwiseAndSimd128(src, SimdConstant::SplatX16(0xFF >> count.value), + dest); + vpsllw(count, dest, dest); + } +} + +void MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16( + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest) { + packedShiftByScalarInt8x16(in, count, xtmp, dest, + &MacroAssemblerX86Shared::vpsraw, + &MacroAssemblerX86Shared::vpmovsxbw); +} + +void MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16( + Imm32 count, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(count.value <= 7); + ScratchSimd128Scope scratch(asMasm()); + + vpunpckhbw(src, scratch, scratch); + vpunpcklbw(src, dest, dest); + vpsraw(Imm32(count.value + 8), scratch, scratch); + vpsraw(Imm32(count.value + 8), dest, dest); + vpacksswb(Operand(scratch), dest, dest); +} + +void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16( + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest) { + packedShiftByScalarInt8x16(in, count, xtmp, dest, + &MacroAssemblerX86Shared::vpsrlw, + &MacroAssemblerX86Shared::vpmovzxbw); +} + +void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16( + Imm32 count, FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(count.value <= 7); + src = asMasm().moveSimd128IntIfNotAVX(src, dest); + asMasm().bitwiseAndSimd128( + src, SimdConstant::SplatX16((0xFF << count.value) & 0xFF), dest); + vpsrlw(count, dest, dest); +} + +void MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8( + FloatRegister in, Register count, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + vpsllw(scratch, in, dest); +} + +void MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8( + FloatRegister in, Register count, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + vpsraw(scratch, in, dest); +} + +void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8( + FloatRegister in, Register count, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + vpsrlw(scratch, in, dest); +} + +void MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4( + FloatRegister in, Register count, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + vpslld(scratch, in, dest); +} + +void MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4( + FloatRegister in, Register count, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + vpsrad(scratch, in, dest); +} + +void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4( + FloatRegister in, Register count, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + vpsrld(scratch, in, dest); +} + +void MacroAssemblerX86Shared::packedLeftShiftByScalarInt64x2( + FloatRegister in, Register count, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + vpsllq(scratch, in, dest); +} + +void MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2( + FloatRegister in, Register count, FloatRegister temp, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, temp); + asMasm().signReplicationInt64x2(in, scratch); + in = asMasm().moveSimd128FloatIfNotAVX(in, dest); + // Invert if negative, shift all, invert back if negative. + vpxor(Operand(scratch), in, dest); + vpsrlq(temp, dest, dest); + vpxor(Operand(scratch), dest, dest); +} + +void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt64x2( + FloatRegister in, Register count, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + vmovd(count, scratch); + vpsrlq(scratch, in, dest); +} + +void MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2( + Imm32 count, FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + asMasm().signReplicationInt64x2(src, scratch); + // Invert if negative, shift all, invert back if negative. + src = asMasm().moveSimd128FloatIfNotAVX(src, dest); + vpxor(Operand(scratch), src, dest); + vpsrlq(Imm32(count.value & 63), dest, dest); + vpxor(Operand(scratch), dest, dest); +} + +void MacroAssemblerX86Shared::selectSimd128(FloatRegister mask, + FloatRegister onTrue, + FloatRegister onFalse, + FloatRegister temp, + FloatRegister output) { + // Normally the codegen will attempt to enforce these register assignments so + // that the moves are avoided. + + onTrue = asMasm().moveSimd128IntIfNotAVX(onTrue, output); + if (MOZ_UNLIKELY(mask == onTrue)) { + vpor(Operand(onFalse), onTrue, output); + return; + } + + mask = asMasm().moveSimd128IntIfNotAVX(mask, temp); + + vpand(Operand(mask), onTrue, output); + vpandn(Operand(onFalse), mask, temp); + vpor(Operand(temp), output, output); +} + +// Code sequences for int32x4<->float32x4 culled from v8; commentary added. + +void MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat32x4( + FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + src = asMasm().moveSimd128IntIfNotAVX(src, dest); + vpxor(Operand(scratch), scratch, scratch); // extract low bits + vpblendw(0x55, src, scratch, scratch); // into scratch + vpsubd(Operand(scratch), src, dest); // and high bits into dest + vcvtdq2ps(scratch, scratch); // convert low bits + vpsrld(Imm32(1), dest, dest); // get high into unsigned range + vcvtdq2ps(dest, dest); // convert + vaddps(Operand(dest), dest, dest); // and back into signed + vaddps(Operand(scratch), dest, dest); // combine high+low: may round +} + +void MacroAssemblerX86Shared::truncSatFloat32x4ToInt32x4(FloatRegister src, + FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + + // The cvttps2dq instruction is the workhorse but does not handle NaN or out + // of range values as we need it to. We want to saturate too-large positive + // values to 7FFFFFFFh and too-large negative values to 80000000h. NaN and -0 + // become 0. + + // Convert NaN to 0 by masking away values that compare unordered to itself. + if (HasAVX()) { + vcmpeqps(Operand(src), src, scratch); + vpand(Operand(scratch), src, dest); + } else { + vmovaps(src, scratch); + vcmpeqps(Operand(scratch), scratch, scratch); + moveSimd128Float(src, dest); + vpand(Operand(scratch), dest, dest); + } + + // Make lanes in scratch == 0xFFFFFFFFh, if dest overflows during cvttps2dq, + // otherwise 0. + static const SimdConstant minOverflowedInt = + SimdConstant::SplatX4(2147483648.f); + if (HasAVX()) { + asMasm().vcmpgepsSimd128(minOverflowedInt, dest, scratch); + } else { + asMasm().loadConstantSimd128Float(minOverflowedInt, scratch); + vcmpleps(Operand(dest), scratch, scratch); + } + + // Convert. This will make the output 80000000h if the input is out of range. + vcvttps2dq(dest, dest); + + // Convert overflow lanes to 0x7FFFFFFF. + vpxor(Operand(scratch), dest, dest); +} + +void MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4( + FloatRegister src, FloatRegister temp, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + src = asMasm().moveSimd128FloatIfNotAVX(src, dest); + + // The cvttps2dq instruction is the workhorse but does not handle NaN or out + // of range values as we need it to. We want to saturate too-large positive + // values to FFFFFFFFh and negative values to zero. NaN and -0 become 0. + + // Convert NaN and negative values to zeroes in dest. + vxorps(Operand(scratch), scratch, scratch); + vmaxps(Operand(scratch), src, dest); + + // Place the largest positive signed integer in all lanes in scratch. + // We use it to bias the conversion to handle edge cases. + asMasm().loadConstantSimd128Float(SimdConstant::SplatX4(2147483647.f), + scratch); + + // temp = dest - 7FFFFFFFh (as floating), this brings integers in the unsigned + // range but above the signed range into the signed range; 0 => -7FFFFFFFh. + vmovaps(dest, temp); + vsubps(Operand(scratch), temp, temp); + + // scratch = mask of biased values that are greater than 7FFFFFFFh. + vcmpleps(Operand(temp), scratch, scratch); + + // Convert the biased values to integer. Positive values above 7FFFFFFFh will + // have been converted to 80000000h, all others become the expected integer. + vcvttps2dq(temp, temp); + + // As lanes of scratch are ~0 where the result overflows, this computes + // 7FFFFFFF in lanes of temp that are 80000000h, and leaves other lanes + // untouched as the biased integer. + vpxor(Operand(scratch), temp, temp); + + // Convert negative biased lanes in temp to zero. After this, temp will be + // zero where the result should be zero or is less than 80000000h, 7FFFFFFF + // where the result overflows, and will have the converted biased result in + // other lanes (for input values >= 80000000h). + vpxor(Operand(scratch), scratch, scratch); + vpmaxsd(Operand(scratch), temp, temp); + + // Convert. Overflow lanes above 7FFFFFFFh will be 80000000h, other lanes will + // be what they should be. + vcvttps2dq(dest, dest); + + // Add temp to the result. Overflow lanes with 80000000h becomes FFFFFFFFh, + // biased high-value unsigned lanes become unbiased, everything else is left + // unchanged. + vpaddd(Operand(temp), dest, dest); +} + +void MacroAssemblerX86Shared::unsignedTruncFloat32x4ToInt32x4Relaxed( + FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + src = asMasm().moveSimd128FloatIfNotAVX(src, dest); + + // Place lanes below 80000000h into dest, otherwise into scratch. + // Keep dest or scratch 0 as default. + asMasm().loadConstantSimd128Float(SimdConstant::SplatX4(0x4f000000), scratch); + vcmpltps(Operand(src), scratch, scratch); + vpand(Operand(src), scratch, scratch); + vpxor(Operand(scratch), src, dest); + + // Convert lanes below 80000000h into unsigned int without issues. + vcvttps2dq(dest, dest); + // Knowing IEEE-754 number representation: convert lanes above 7FFFFFFFh, + // mutiply by 2 (to add 1 in exponent) and shift to the left by 8 bits. + vaddps(Operand(scratch), scratch, scratch); + vpslld(Imm32(8), scratch, scratch); + + // Combine the results. + vpaddd(Operand(scratch), dest, dest); +} + +void MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat64x2( + FloatRegister src, FloatRegister dest) { + src = asMasm().moveSimd128FloatIfNotAVX(src, dest); + asMasm().vunpcklpsSimd128(SimdConstant::SplatX4(0x43300000), src, dest); + asMasm().vsubpdSimd128(SimdConstant::SplatX2(4503599627370496.0), dest, dest); +} + +void MacroAssemblerX86Shared::truncSatFloat64x2ToInt32x4(FloatRegister src, + FloatRegister temp, + FloatRegister dest) { + FloatRegister srcForTemp = asMasm().moveSimd128FloatIfNotAVX(src, temp); + vcmpeqpd(Operand(srcForTemp), srcForTemp, temp); + + src = asMasm().moveSimd128FloatIfNotAVX(src, dest); + asMasm().vandpdSimd128(SimdConstant::SplatX2(2147483647.0), temp, temp); + vminpd(Operand(temp), src, dest); + vcvttpd2dq(dest, dest); +} + +void MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4( + FloatRegister src, FloatRegister temp, FloatRegister dest) { + src = asMasm().moveSimd128FloatIfNotAVX(src, dest); + + vxorpd(temp, temp, temp); + vmaxpd(Operand(temp), src, dest); + + asMasm().vminpdSimd128(SimdConstant::SplatX2(4294967295.0), dest, dest); + vroundpd(SSERoundingMode::Trunc, Operand(dest), dest); + asMasm().vaddpdSimd128(SimdConstant::SplatX2(4503599627370496.0), dest, dest); + + // temp == 0 + vshufps(0x88, temp, dest, dest); +} + +void MacroAssemblerX86Shared::unsignedTruncFloat64x2ToInt32x4Relaxed( + FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(asMasm()); + + // The same as unsignedConvertInt32x4ToFloat64x2, but without NaN + // and out-of-bounds checks. + vroundpd(SSERoundingMode::Trunc, Operand(src), dest); + asMasm().loadConstantSimd128Float(SimdConstant::SplatX2(4503599627370496.0), + scratch); + vaddpd(Operand(scratch), dest, dest); + // The scratch has zeros in f32x4 lanes with index 0 and 2. The in-memory + // representantation of the splatted double contantant contains zero in its + // low bits. + vshufps(0x88, scratch, dest, dest); +} + +void MacroAssemblerX86Shared::popcntInt8x16(FloatRegister src, + FloatRegister temp, + FloatRegister output) { + ScratchSimd128Scope scratch(asMasm()); + asMasm().loadConstantSimd128Int(SimdConstant::SplatX16(0x0f), scratch); + FloatRegister srcForTemp = asMasm().moveSimd128IntIfNotAVX(src, temp); + vpand(scratch, srcForTemp, temp); + vpandn(src, scratch, scratch); + int8_t counts[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; + asMasm().loadConstantSimd128(SimdConstant::CreateX16(counts), output); + vpsrlw(Imm32(4), scratch, scratch); + vpshufb(temp, output, output); + asMasm().loadConstantSimd128(SimdConstant::CreateX16(counts), temp); + vpshufb(scratch, temp, temp); + vpaddb(Operand(temp), output, output); +} diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h new file mode 100644 index 0000000000..4985e072d8 --- /dev/null +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h @@ -0,0 +1,3396 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_MacroAssembler_x86_shared_inl_h +#define jit_x86_shared_MacroAssembler_x86_shared_inl_h + +#include "jit/x86-shared/MacroAssembler-x86-shared.h" + +#include "mozilla/MathAlgorithms.h" + +namespace js { +namespace jit { + +//{{{ check_macroassembler_style +// =============================================================== +// Move instructions + +void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) { + vmovd(src, dest); +} + +void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) { + vmovd(src, dest); +} + +void MacroAssembler::move8SignExtend(Register src, Register dest) { + movsbl(src, dest); +} + +void MacroAssembler::move16SignExtend(Register src, Register dest) { + movswl(src, dest); +} + +void MacroAssembler::loadAbiReturnAddress(Register dest) { + loadPtr(Address(getStackPointer(), 0), dest); +} + +// =============================================================== +// Logical instructions + +void MacroAssembler::not32(Register reg) { notl(reg); } + +void MacroAssembler::and32(Register src, Register dest) { andl(src, dest); } + +void MacroAssembler::and32(Imm32 imm, Register dest) { andl(imm, dest); } + +void MacroAssembler::and32(Imm32 imm, const Address& dest) { + andl(imm, Operand(dest)); +} + +void MacroAssembler::and32(const Address& src, Register dest) { + andl(Operand(src), dest); +} + +void MacroAssembler::or32(Register src, Register dest) { orl(src, dest); } + +void MacroAssembler::or32(Imm32 imm, Register dest) { orl(imm, dest); } + +void MacroAssembler::or32(Imm32 imm, const Address& dest) { + orl(imm, Operand(dest)); +} + +void MacroAssembler::xor32(Register src, Register dest) { xorl(src, dest); } + +void MacroAssembler::xor32(Imm32 imm, Register dest) { xorl(imm, dest); } + +void MacroAssembler::xor32(Imm32 imm, const Address& dest) { + xorl(imm, Operand(dest)); +} + +void MacroAssembler::xor32(const Address& src, Register dest) { + xorl(Operand(src), dest); +} + +void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) { + if (AssemblerX86Shared::HasLZCNT()) { + lzcntl(src, dest); + return; + } + + bsrl(src, dest); + if (!knownNotZero) { + // If the source is zero then bsrl leaves garbage in the destination. + Label nonzero; + j(Assembler::NonZero, &nonzero); + movl(Imm32(0x3F), dest); + bind(&nonzero); + } + xorl(Imm32(0x1F), dest); +} + +void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) { + if (AssemblerX86Shared::HasBMI1()) { + tzcntl(src, dest); + return; + } + + bsfl(src, dest); + if (!knownNotZero) { + Label nonzero; + j(Assembler::NonZero, &nonzero); + movl(Imm32(32), dest); + bind(&nonzero); + } +} + +void MacroAssembler::popcnt32(Register input, Register output, Register tmp) { + if (AssemblerX86Shared::HasPOPCNT()) { + popcntl(input, output); + return; + } + + MOZ_ASSERT(tmp != InvalidReg); + + // Equivalent to mozilla::CountPopulation32() + + movl(input, tmp); + if (input != output) { + movl(input, output); + } + shrl(Imm32(1), output); + andl(Imm32(0x55555555), output); + subl(output, tmp); + movl(tmp, output); + andl(Imm32(0x33333333), output); + shrl(Imm32(2), tmp); + andl(Imm32(0x33333333), tmp); + addl(output, tmp); + movl(tmp, output); + shrl(Imm32(4), output); + addl(tmp, output); + andl(Imm32(0xF0F0F0F), output); + imull(Imm32(0x1010101), output, output); + shrl(Imm32(24), output); +} + +// =============================================================== +// Swap instructions + +void MacroAssembler::byteSwap16SignExtend(Register reg) { + rolw(Imm32(8), reg); + movswl(reg, reg); +} + +void MacroAssembler::byteSwap16ZeroExtend(Register reg) { + rolw(Imm32(8), reg); + movzwl(reg, reg); +} + +void MacroAssembler::byteSwap32(Register reg) { bswapl(reg); } + +// =============================================================== +// Arithmetic instructions + +void MacroAssembler::add32(Register src, Register dest) { addl(src, dest); } + +void MacroAssembler::add32(Imm32 imm, Register dest) { addl(imm, dest); } + +void MacroAssembler::add32(Imm32 imm, const Address& dest) { + addl(imm, Operand(dest)); +} + +void MacroAssembler::add32(Imm32 imm, const AbsoluteAddress& dest) { + addl(imm, Operand(dest)); +} + +void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) { + vaddss(src, dest, dest); +} + +void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) { + vaddsd(src, dest, dest); +} + +void MacroAssembler::sub32(Register src, Register dest) { subl(src, dest); } + +void MacroAssembler::sub32(Imm32 imm, Register dest) { subl(imm, dest); } + +void MacroAssembler::sub32(const Address& src, Register dest) { + subl(Operand(src), dest); +} + +void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) { + vsubsd(src, dest, dest); +} + +void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) { + vsubss(src, dest, dest); +} + +void MacroAssembler::mul32(Register rhs, Register srcDest) { + imull(rhs, srcDest); +} + +void MacroAssembler::mul32(Imm32 imm, Register srcDest) { imull(imm, srcDest); } + +void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) { + vmulss(src, dest, dest); +} + +void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) { + vmulsd(src, dest, dest); +} + +void MacroAssembler::quotient32(Register rhs, Register srcDest, + Register tempEdx, bool isUnsigned) { + MOZ_ASSERT(srcDest == eax && tempEdx == edx); + + // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. + if (isUnsigned) { + mov(ImmWord(0), edx); + udiv(rhs); + } else { + cdq(); + idiv(rhs); + } +} + +void MacroAssembler::remainder32(Register rhs, Register srcDest, + Register tempEdx, bool isUnsigned) { + MOZ_ASSERT(srcDest == eax && tempEdx == edx); + + // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. + if (isUnsigned) { + mov(ImmWord(0), edx); + udiv(rhs); + } else { + cdq(); + idiv(rhs); + } + mov(edx, eax); +} + +void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) { + vdivss(src, dest, dest); +} + +void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) { + vdivsd(src, dest, dest); +} + +void MacroAssembler::neg32(Register reg) { negl(reg); } + +void MacroAssembler::negateFloat(FloatRegister reg) { + ScratchFloat32Scope scratch(*this); + vpcmpeqw(Operand(scratch), scratch, scratch); + vpsllq(Imm32(31), scratch, scratch); + + // XOR the float in a float register with -0.0. + vxorps(scratch, reg, reg); // s ^ 0x80000000 +} + +void MacroAssembler::negateDouble(FloatRegister reg) { + // From MacroAssemblerX86Shared::maybeInlineDouble + ScratchDoubleScope scratch(*this); + vpcmpeqw(Operand(scratch), scratch, scratch); + vpsllq(Imm32(63), scratch, scratch); + + // XOR the float in a float register with -0.0. + vxorpd(scratch, reg, reg); // s ^ 0x80000000000000 +} + +void MacroAssembler::abs32(Register src, Register dest) { + if (src != dest) { + move32(src, dest); + } + Label positive; + branchTest32(Assembler::NotSigned, dest, dest, &positive); + neg32(dest); + bind(&positive); +} + +void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) { + ScratchFloat32Scope scratch(*this); + loadConstantFloat32(mozilla::SpecificNaN<float>( + 0, mozilla::FloatingPoint<float>::kSignificandBits), + scratch); + vandps(scratch, src, dest); +} + +void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) { + ScratchDoubleScope scratch(*this); + loadConstantDouble(mozilla::SpecificNaN<double>( + 0, mozilla::FloatingPoint<double>::kSignificandBits), + scratch); + vandpd(scratch, src, dest); +} + +void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) { + vsqrtss(src, dest, dest); +} + +void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) { + vsqrtsd(src, dest, dest); +} + +void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest, + bool handleNaN) { + minMaxFloat32(srcDest, other, handleNaN, false); +} + +void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest, + bool handleNaN) { + minMaxDouble(srcDest, other, handleNaN, false); +} + +void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest, + bool handleNaN) { + minMaxFloat32(srcDest, other, handleNaN, true); +} + +void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest, + bool handleNaN) { + minMaxDouble(srcDest, other, handleNaN, true); +} + +// =============================================================== +// Rotation instructions +void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) { + MOZ_ASSERT(input == dest, "defineReuseInput"); + count.value &= 0x1f; + if (count.value) { + roll(count, input); + } +} + +void MacroAssembler::rotateLeft(Register count, Register input, Register dest) { + MOZ_ASSERT(input == dest, "defineReuseInput"); + MOZ_ASSERT(count == ecx, "defineFixed(ecx)"); + roll_cl(input); +} + +void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) { + MOZ_ASSERT(input == dest, "defineReuseInput"); + count.value &= 0x1f; + if (count.value) { + rorl(count, input); + } +} + +void MacroAssembler::rotateRight(Register count, Register input, + Register dest) { + MOZ_ASSERT(input == dest, "defineReuseInput"); + MOZ_ASSERT(count == ecx, "defineFixed(ecx)"); + rorl_cl(input); +} + +// =============================================================== +// Shift instructions + +void MacroAssembler::lshift32(Register shift, Register srcDest) { + if (HasBMI2()) { + shlxl(srcDest, shift, srcDest); + return; + } + MOZ_ASSERT(shift == ecx); + shll_cl(srcDest); +} + +void MacroAssembler::flexibleLshift32(Register shift, Register srcDest) { + if (HasBMI2()) { + shlxl(srcDest, shift, srcDest); + return; + } + if (shift == ecx) { + shll_cl(srcDest); + } else { + // Shift amount must be in ecx. + xchg(shift, ecx); + shll_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest); + xchg(shift, ecx); + } +} + +void MacroAssembler::rshift32(Register shift, Register srcDest) { + if (HasBMI2()) { + shrxl(srcDest, shift, srcDest); + return; + } + MOZ_ASSERT(shift == ecx); + shrl_cl(srcDest); +} + +void MacroAssembler::flexibleRshift32(Register shift, Register srcDest) { + if (HasBMI2()) { + shrxl(srcDest, shift, srcDest); + return; + } + if (shift == ecx) { + shrl_cl(srcDest); + } else { + // Shift amount must be in ecx. + xchg(shift, ecx); + shrl_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest); + xchg(shift, ecx); + } +} + +void MacroAssembler::rshift32Arithmetic(Register shift, Register srcDest) { + if (HasBMI2()) { + sarxl(srcDest, shift, srcDest); + return; + } + MOZ_ASSERT(shift == ecx); + sarl_cl(srcDest); +} + +void MacroAssembler::flexibleRshift32Arithmetic(Register shift, + Register srcDest) { + if (HasBMI2()) { + sarxl(srcDest, shift, srcDest); + return; + } + if (shift == ecx) { + sarl_cl(srcDest); + } else { + // Shift amount must be in ecx. + xchg(shift, ecx); + sarl_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest); + xchg(shift, ecx); + } +} + +void MacroAssembler::lshift32(Imm32 shift, Register srcDest) { + shll(shift, srcDest); +} + +void MacroAssembler::rshift32(Imm32 shift, Register srcDest) { + shrl(shift, srcDest); +} + +void MacroAssembler::rshift32Arithmetic(Imm32 shift, Register srcDest) { + sarl(shift, srcDest); +} + +// =============================================================== +// Condition functions + +void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs, + Register dest) { + cmp8(lhs, rhs); + emitSet(cond, dest); +} + +void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs, + Register dest) { + cmp16(lhs, rhs); + emitSet(cond, dest); +} + +template <typename T1, typename T2> +void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) { + cmp32(lhs, rhs); + emitSet(cond, dest); +} + +// =============================================================== +// Branch instructions + +void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs, + Label* label) { + cmp8(lhs, rhs); + j(cond, label); +} + +void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs, + Label* label) { + cmp8(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs, + Label* label) { + cmp16(lhs, rhs); + j(cond, label); +} + +template <class L> +void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs, + L label) { + cmp32(lhs, rhs); + j(cond, label); +} + +template <class L> +void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 rhs, + L label) { + cmp32(lhs, rhs); + j(cond, label); +} + +void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs, + Label* label) { + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 rhs, + Label* label) { + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, + Register rhs, Label* label) { + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs, + Label* label) { + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branch32(Condition cond, const Operand& lhs, Register rhs, + Label* label) { + cmp32(lhs, rhs); + j(cond, label); +} + +void MacroAssembler::branch32(Condition cond, const Operand& lhs, Imm32 rhs, + Label* label) { + cmp32(lhs, rhs); + j(cond, label); +} + +template <class L> +void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs, + L label) { + cmpPtr(lhs, rhs); + j(cond, label); +} + +void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs, + Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs, + Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs, + Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs, + Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +template <class L> +void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs, + L label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs, + Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs, + Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs, + Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs, + ImmWord rhs, Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs, + Register rhs, Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +template <typename T, typename S, typename L> +void MacroAssembler::branchPtrImpl(Condition cond, const T& lhs, const S& rhs, + L label) { + cmpPtr(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs, + FloatRegister rhs, Label* label) { + compareFloat(cond, lhs, rhs); + + if (cond == DoubleEqual) { + Label unordered; + j(Parity, &unordered); + j(Equal, label); + bind(&unordered); + return; + } + + if (cond == DoubleNotEqualOrUnordered) { + j(NotEqual, label); + j(Parity, label); + return; + } + + MOZ_ASSERT(!(cond & DoubleConditionBitSpecial)); + j(ConditionFromDoubleCondition(cond), label); +} + +void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs, + FloatRegister rhs, Label* label) { + compareDouble(cond, lhs, rhs); + + if (cond == DoubleEqual) { + Label unordered; + j(Parity, &unordered); + j(Equal, label); + bind(&unordered); + return; + } + if (cond == DoubleNotEqualOrUnordered) { + j(NotEqual, label); + j(Parity, label); + return; + } + + MOZ_ASSERT(!(cond & DoubleConditionBitSpecial)); + j(ConditionFromDoubleCondition(cond), label); +} + +template <typename T> +void MacroAssembler::branchAdd32(Condition cond, T src, Register dest, + Label* label) { + addl(src, dest); + j(cond, label); +} + +template <typename T> +void MacroAssembler::branchSub32(Condition cond, T src, Register dest, + Label* label) { + subl(src, dest); + j(cond, label); +} + +template <typename T> +void MacroAssembler::branchMul32(Condition cond, T src, Register dest, + Label* label) { + mul32(src, dest); + j(cond, label); +} + +template <typename T> +void MacroAssembler::branchRshift32(Condition cond, T src, Register dest, + Label* label) { + MOZ_ASSERT(cond == Zero || cond == NonZero); + rshift32(src, dest); + j(cond, label); +} + +void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) { + MOZ_ASSERT(cond == Overflow); + neg32(reg); + j(cond, label); +} + +template <typename T> +void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest, + Label* label) { + addPtr(src, dest); + j(cond, label); +} + +template <typename T> +void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest, + Label* label) { + subPtr(src, dest); + j(cond, label); +} + +void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest, + Label* label) { + mulPtr(src, dest); + j(cond, label); +} + +void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs, + Label* label) { + subPtr(rhs, lhs); + j(cond, label); +} + +template <class L> +void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs, + L label) { + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || + cond == NotSigned); + test32(lhs, rhs); + j(cond, label); +} + +template <class L> +void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs, + L label) { + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || + cond == NotSigned); + test32(lhs, rhs); + j(cond, label); +} + +void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs, + Label* label) { + MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || + cond == NotSigned); + test32(Operand(lhs), rhs); + j(cond, label); +} + +template <class L> +void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs, + L label) { + testPtr(lhs, rhs); + j(cond, label); +} + +void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs, + Label* label) { + testPtr(lhs, rhs); + j(cond, label); +} + +void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs, + Imm32 rhs, Label* label) { + testPtr(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branchTestUndefined(Condition cond, Register tag, + Label* label) { + branchTestUndefinedImpl(cond, tag, label); +} + +void MacroAssembler::branchTestUndefined(Condition cond, const Address& address, + Label* label) { + branchTestUndefinedImpl(cond, address, label); +} + +void MacroAssembler::branchTestUndefined(Condition cond, + const BaseIndex& address, + Label* label) { + branchTestUndefinedImpl(cond, address, label); +} + +void MacroAssembler::branchTestUndefined(Condition cond, + const ValueOperand& value, + Label* label) { + branchTestUndefinedImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t, + Label* label) { + cond = testUndefined(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestInt32(Condition cond, Register tag, + Label* label) { + branchTestInt32Impl(cond, tag, label); +} + +void MacroAssembler::branchTestInt32(Condition cond, const Address& address, + Label* label) { + branchTestInt32Impl(cond, address, label); +} + +void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address, + Label* label) { + branchTestInt32Impl(cond, address, label); +} + +void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value, + Label* label) { + branchTestInt32Impl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestInt32Impl(Condition cond, const T& t, + Label* label) { + cond = testInt32(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestInt32Truthy(bool truthy, + const ValueOperand& value, + Label* label) { + Condition cond = testInt32Truthy(truthy, value); + j(cond, label); +} + +void MacroAssembler::branchTestDouble(Condition cond, Register tag, + Label* label) { + branchTestDoubleImpl(cond, tag, label); +} + +void MacroAssembler::branchTestDouble(Condition cond, const Address& address, + Label* label) { + branchTestDoubleImpl(cond, address, label); +} + +void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address, + Label* label) { + branchTestDoubleImpl(cond, address, label); +} + +void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value, + Label* label) { + branchTestDoubleImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t, + Label* label) { + cond = testDouble(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg, + Label* label) { + Condition cond = testDoubleTruthy(truthy, reg); + j(cond, label); +} + +void MacroAssembler::branchTestNumber(Condition cond, Register tag, + Label* label) { + branchTestNumberImpl(cond, tag, label); +} + +void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value, + Label* label) { + branchTestNumberImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestNumberImpl(Condition cond, const T& t, + Label* label) { + cond = testNumber(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestBoolean(Condition cond, Register tag, + Label* label) { + branchTestBooleanImpl(cond, tag, label); +} + +void MacroAssembler::branchTestBoolean(Condition cond, const Address& address, + Label* label) { + branchTestBooleanImpl(cond, address, label); +} + +void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address, + Label* label) { + branchTestBooleanImpl(cond, address, label); +} + +void MacroAssembler::branchTestBoolean(Condition cond, + const ValueOperand& value, + Label* label) { + branchTestBooleanImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestBooleanImpl(Condition cond, const T& t, + Label* label) { + cond = testBoolean(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestString(Condition cond, Register tag, + Label* label) { + branchTestStringImpl(cond, tag, label); +} + +void MacroAssembler::branchTestString(Condition cond, const Address& address, + Label* label) { + branchTestStringImpl(cond, address, label); +} + +void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address, + Label* label) { + branchTestStringImpl(cond, address, label); +} + +void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value, + Label* label) { + branchTestStringImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestStringImpl(Condition cond, const T& t, + Label* label) { + cond = testString(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestStringTruthy(bool truthy, + const ValueOperand& value, + Label* label) { + Condition cond = testStringTruthy(truthy, value); + j(cond, label); +} + +void MacroAssembler::branchTestSymbol(Condition cond, Register tag, + Label* label) { + branchTestSymbolImpl(cond, tag, label); +} + +void MacroAssembler::branchTestSymbol(Condition cond, const Address& address, + Label* label) { + branchTestSymbolImpl(cond, address, label); +} + +void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address, + Label* label) { + branchTestSymbolImpl(cond, address, label); +} + +void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value, + Label* label) { + branchTestSymbolImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t, + Label* label) { + cond = testSymbol(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestBigInt(Condition cond, Register tag, + Label* label) { + branchTestBigIntImpl(cond, tag, label); +} + +void MacroAssembler::branchTestBigInt(Condition cond, const Address& address, + Label* label) { + branchTestBigIntImpl(cond, address, label); +} + +void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address, + Label* label) { + branchTestBigIntImpl(cond, address, label); +} + +void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value, + Label* label) { + branchTestBigIntImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestBigIntImpl(Condition cond, const T& t, + Label* label) { + cond = testBigInt(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestBigIntTruthy(bool truthy, + const ValueOperand& value, + Label* label) { + Condition cond = testBigIntTruthy(truthy, value); + j(cond, label); +} + +void MacroAssembler::branchTestNull(Condition cond, Register tag, + Label* label) { + branchTestNullImpl(cond, tag, label); +} + +void MacroAssembler::branchTestNull(Condition cond, const Address& address, + Label* label) { + branchTestNullImpl(cond, address, label); +} + +void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address, + Label* label) { + branchTestNullImpl(cond, address, label); +} + +void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value, + Label* label) { + branchTestNullImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestNullImpl(Condition cond, const T& t, + Label* label) { + cond = testNull(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestObject(Condition cond, Register tag, + Label* label) { + branchTestObjectImpl(cond, tag, label); +} + +void MacroAssembler::branchTestObject(Condition cond, const Address& address, + Label* label) { + branchTestObjectImpl(cond, address, label); +} + +void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address, + Label* label) { + branchTestObjectImpl(cond, address, label); +} + +void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value, + Label* label) { + branchTestObjectImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestObjectImpl(Condition cond, const T& t, + Label* label) { + cond = testObject(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestGCThing(Condition cond, const Address& address, + Label* label) { + branchTestGCThingImpl(cond, address, label); +} + +void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address, + Label* label) { + branchTestGCThingImpl(cond, address, label); +} + +void MacroAssembler::branchTestGCThing(Condition cond, + const ValueOperand& value, + Label* label) { + branchTestGCThingImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& t, + Label* label) { + cond = testGCThing(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestPrimitive(Condition cond, Register tag, + Label* label) { + branchTestPrimitiveImpl(cond, tag, label); +} + +void MacroAssembler::branchTestPrimitive(Condition cond, + const ValueOperand& value, + Label* label) { + branchTestPrimitiveImpl(cond, value, label); +} + +template <typename T> +void MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t, + Label* label) { + cond = testPrimitive(cond, t); + j(cond, label); +} + +void MacroAssembler::branchTestMagic(Condition cond, Register tag, + Label* label) { + branchTestMagicImpl(cond, tag, label); +} + +void MacroAssembler::branchTestMagic(Condition cond, const Address& address, + Label* label) { + branchTestMagicImpl(cond, address, label); +} + +void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address, + Label* label) { + branchTestMagicImpl(cond, address, label); +} + +template <class L> +void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value, + L label) { + branchTestMagicImpl(cond, value, label); +} + +template <typename T, class L> +void MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label) { + cond = testMagic(cond, t); + j(cond, label); +} + +template <typename T> +void MacroAssembler::testNumberSet(Condition cond, const T& src, + Register dest) { + cond = testNumber(cond, src); + emitSet(cond, dest); +} + +template <typename T> +void MacroAssembler::testBooleanSet(Condition cond, const T& src, + Register dest) { + cond = testBoolean(cond, src); + emitSet(cond, dest); +} + +template <typename T> +void MacroAssembler::testStringSet(Condition cond, const T& src, + Register dest) { + cond = testString(cond, src); + emitSet(cond, dest); +} + +template <typename T> +void MacroAssembler::testSymbolSet(Condition cond, const T& src, + Register dest) { + cond = testSymbol(cond, src); + emitSet(cond, dest); +} + +template <typename T> +void MacroAssembler::testBigIntSet(Condition cond, const T& src, + Register dest) { + cond = testBigInt(cond, src); + emitSet(cond, dest); +} + +void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, + Register src, Register dest) { + cmp32(lhs, rhs); + cmovCCl(cond, src, dest); +} + +void MacroAssembler::cmp32Move32(Condition cond, Register lhs, + const Address& rhs, Register src, + Register dest) { + cmp32(lhs, Operand(rhs)); + cmovCCl(cond, src, dest); +} + +void MacroAssembler::cmp32Load32(Condition cond, Register lhs, + const Address& rhs, const Address& src, + Register dest) { + cmp32(lhs, Operand(rhs)); + cmovCCl(cond, Operand(src), dest); +} + +void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs, + const Address& src, Register dest) { + cmp32(lhs, rhs); + cmovCCl(cond, Operand(src), dest); +} + +void MacroAssembler::spectreZeroRegister(Condition cond, Register scratch, + Register dest) { + // Note: use movl instead of move32/xorl to ensure flags are not clobbered. + movl(Imm32(0), scratch); + spectreMovePtr(cond, scratch, dest); +} + +// ======================================================================== +// Memory access primitives. +void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, + const Address& dest) { + vmovsd(src, dest); +} +void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, + const BaseIndex& dest) { + vmovsd(src, dest); +} +void MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, + const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + storeUncanonicalizedDouble(src, dest.toAddress()); + break; + case Operand::MEM_SCALE: + storeUncanonicalizedDouble(src, dest.toBaseIndex()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } +} + +template void MacroAssembler::storeDouble(FloatRegister src, + const Operand& dest); + +void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, + const Address& dest) { + vmovss(src, dest); +} +void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, + const BaseIndex& dest) { + vmovss(src, dest); +} +void MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, + const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + storeUncanonicalizedFloat32(src, dest.toAddress()); + break; + case Operand::MEM_SCALE: + storeUncanonicalizedFloat32(src, dest.toBaseIndex()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } +} + +template void MacroAssembler::storeFloat32(FloatRegister src, + const Operand& dest); + +void MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) { + if (barrier & MembarStoreLoad) { + storeLoadFence(); + } +} + +// ======================================================================== +// Wasm SIMD +// +// Some parts of the masm API are currently agnostic as to the data's +// interpretation as int or float, despite the Intel architecture having +// separate functional units and sometimes penalizing type-specific instructions +// that operate on data in the "wrong" unit. +// +// For the time being, we always choose the integer interpretation when we are +// forced to choose blind, but whether that is right or wrong depends on the +// application. This applies to moveSimd128, loadConstantSimd128, +// loadUnalignedSimd128, and storeUnalignedSimd128, at least. +// +// SSE4.1 or better is assumed. +// +// The order of operations here follows the header file. + +// Moves. See comments above regarding integer operation. + +void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) { + MacroAssemblerX86Shared::moveSimd128Int(src, dest); +} + +// Constants. See comments above regarding integer operation. + +void MacroAssembler::loadConstantSimd128(const SimdConstant& v, + FloatRegister dest) { + if (v.isFloatingType()) { + loadConstantSimd128Float(v, dest); + } else { + loadConstantSimd128Int(v, dest); + } +} + +// Splat + +void MacroAssembler::splatX16(Register src, FloatRegister dest) { + MacroAssemblerX86Shared::splatX16(src, dest); +} + +void MacroAssembler::splatX8(Register src, FloatRegister dest) { + MacroAssemblerX86Shared::splatX8(src, dest); +} + +void MacroAssembler::splatX4(Register src, FloatRegister dest) { + MacroAssemblerX86Shared::splatX4(src, dest); +} + +void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) { + MacroAssemblerX86Shared::splatX4(src, dest); +} + +void MacroAssembler::splatX2(FloatRegister src, FloatRegister dest) { + MacroAssemblerX86Shared::splatX2(src, dest); +} + +// Extract lane as scalar + +void MacroAssembler::extractLaneInt8x16(uint32_t lane, FloatRegister src, + Register dest) { + MacroAssemblerX86Shared::extractLaneInt8x16(src, dest, lane, + SimdSign::Signed); +} + +void MacroAssembler::unsignedExtractLaneInt8x16(uint32_t lane, + FloatRegister src, + Register dest) { + MacroAssemblerX86Shared::extractLaneInt8x16(src, dest, lane, + SimdSign::Unsigned); +} + +void MacroAssembler::extractLaneInt16x8(uint32_t lane, FloatRegister src, + Register dest) { + MacroAssemblerX86Shared::extractLaneInt16x8(src, dest, lane, + SimdSign::Signed); +} + +void MacroAssembler::unsignedExtractLaneInt16x8(uint32_t lane, + FloatRegister src, + Register dest) { + MacroAssemblerX86Shared::extractLaneInt16x8(src, dest, lane, + SimdSign::Unsigned); +} + +void MacroAssembler::extractLaneInt32x4(uint32_t lane, FloatRegister src, + Register dest) { + MacroAssemblerX86Shared::extractLaneInt32x4(src, dest, lane); +} + +void MacroAssembler::extractLaneFloat32x4(uint32_t lane, FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::extractLaneFloat32x4(src, dest, lane); +} + +void MacroAssembler::extractLaneFloat64x2(uint32_t lane, FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::extractLaneFloat64x2(src, dest, lane); +} + +// Replace lane value + +void MacroAssembler::replaceLaneInt8x16(unsigned lane, FloatRegister lhs, + Register rhs, FloatRegister dest) { + vpinsrb(lane, Operand(rhs), lhs, dest); +} + +void MacroAssembler::replaceLaneInt8x16(unsigned lane, Register rhs, + FloatRegister lhsDest) { + vpinsrb(lane, Operand(rhs), lhsDest, lhsDest); +} + +void MacroAssembler::replaceLaneInt16x8(unsigned lane, FloatRegister lhs, + Register rhs, FloatRegister dest) { + vpinsrw(lane, Operand(rhs), lhs, dest); +} + +void MacroAssembler::replaceLaneInt16x8(unsigned lane, Register rhs, + FloatRegister lhsDest) { + vpinsrw(lane, Operand(rhs), lhsDest, lhsDest); +} + +void MacroAssembler::replaceLaneInt32x4(unsigned lane, FloatRegister lhs, + Register rhs, FloatRegister dest) { + vpinsrd(lane, rhs, lhs, dest); +} + +void MacroAssembler::replaceLaneInt32x4(unsigned lane, Register rhs, + FloatRegister lhsDest) { + vpinsrd(lane, rhs, lhsDest, lhsDest); +} + +void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + MacroAssemblerX86Shared::replaceLaneFloat32x4(lane, lhs, rhs, dest); +} + +void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister rhs, + FloatRegister lhsDest) { + MacroAssemblerX86Shared::replaceLaneFloat32x4(lane, lhsDest, rhs, lhsDest); +} + +void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + MacroAssemblerX86Shared::replaceLaneFloat64x2(lane, lhs, rhs, dest); +} + +void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister rhs, + FloatRegister lhsDest) { + MacroAssemblerX86Shared::replaceLaneFloat64x2(lane, lhsDest, rhs, lhsDest); +} + +// Shuffle - permute with immediate indices + +void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister rhs, + FloatRegister lhsDest) { + MacroAssemblerX86Shared::shuffleInt8x16(lhsDest, rhs, lhsDest, lanes); +} + +void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister lhs, + FloatRegister rhs, FloatRegister dest) { + MacroAssemblerX86Shared::shuffleInt8x16(lhs, rhs, dest, lanes); +} + +void MacroAssembler::blendInt8x16(const uint8_t lanes[16], FloatRegister lhs, + FloatRegister rhs, FloatRegister dest, + FloatRegister temp) { + MacroAssemblerX86Shared::blendInt8x16(lhs, rhs, dest, temp, lanes); +} + +void MacroAssembler::blendInt16x8(const uint16_t lanes[8], FloatRegister lhs, + FloatRegister rhs, FloatRegister dest) { + MacroAssemblerX86Shared::blendInt16x8(lhs, rhs, dest, lanes); +} + +void MacroAssembler::laneSelectSimd128(FloatRegister mask, FloatRegister lhs, + FloatRegister rhs, FloatRegister dest) { + MacroAssemblerX86Shared::laneSelectSimd128(mask, lhs, rhs, dest); +} + +void MacroAssembler::interleaveHighInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpunpckhwd(rhs, lhs, dest); +} + +void MacroAssembler::interleaveHighInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpunpckhdq(rhs, lhs, dest); +} + +void MacroAssembler::interleaveHighInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpunpckhqdq(rhs, lhs, dest); +} + +void MacroAssembler::interleaveHighInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpunpckhbw(rhs, lhs, dest); +} + +void MacroAssembler::interleaveLowInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpunpcklwd(rhs, lhs, dest); +} + +void MacroAssembler::interleaveLowInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpunpckldq(rhs, lhs, dest); +} + +void MacroAssembler::interleaveLowInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpunpcklqdq(rhs, lhs, dest); +} + +void MacroAssembler::interleaveLowInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpunpcklbw(rhs, lhs, dest); +} + +void MacroAssembler::permuteInt8x16(const uint8_t lanes[16], FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest); +} + +void MacroAssembler::permuteLowInt16x8(const uint16_t lanes[4], + FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(lanes[0] < 4 && lanes[1] < 4 && lanes[2] < 4 && lanes[3] < 4); + vpshuflw(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src, + dest); +} + +void MacroAssembler::permuteHighInt16x8(const uint16_t lanes[4], + FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(lanes[0] < 4 && lanes[1] < 4 && lanes[2] < 4 && lanes[3] < 4); + vpshufhw(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src, + dest); +} + +void MacroAssembler::permuteInt32x4(const uint32_t lanes[4], FloatRegister src, + FloatRegister dest) { + vpshufd(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src, + dest); +} + +void MacroAssembler::concatAndRightShiftSimd128(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest, + uint32_t shift) { + vpalignr(Operand(rhs), lhs, dest, shift); +} + +void MacroAssembler::leftShiftSimd128(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpslldq(count, src, dest); +} + +void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsrldq(count, src, dest); +} + +// Reverse bytes in lanes. + +void MacroAssembler::reverseInt16x8(FloatRegister src, FloatRegister dest) { + // Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB. + ScratchSimd128Scope scratch(*this); + FloatRegister srcForScratch = moveSimd128IntIfNotAVX(src, scratch); + vpsrlw(Imm32(8), srcForScratch, scratch); + src = moveSimd128IntIfNotAVX(src, dest); + vpsllw(Imm32(8), src, dest); + vpor(scratch, dest, dest); +} + +void MacroAssembler::reverseInt32x4(FloatRegister src, FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + int8_t lanes[] = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}; + vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest); +} + +void MacroAssembler::reverseInt64x2(FloatRegister src, FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + int8_t lanes[] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}; + vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest); +} + +// Any lane true, ie any bit set + +void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest) { + vptest(src, src); + emitSetRegisterIf(Condition::NonZero, dest); +} + +// All lanes true + +void MacroAssembler::allTrueInt8x16(FloatRegister src, Register dest) { + ScratchSimd128Scope xtmp(*this); + // xtmp is all-00h + vpxor(xtmp, xtmp, xtmp); + // Set FFh if byte==0 otherwise 00h + // Operand ordering constraint: lhs==output + vpcmpeqb(Operand(src), xtmp, xtmp); + // Check if xtmp is 0. + vptest(xtmp, xtmp); + emitSetRegisterIf(Condition::Zero, dest); +} + +void MacroAssembler::allTrueInt16x8(FloatRegister src, Register dest) { + ScratchSimd128Scope xtmp(*this); + // xtmp is all-00h + vpxor(xtmp, xtmp, xtmp); + // Set FFFFh if word==0 otherwise 0000h + // Operand ordering constraint: lhs==output + vpcmpeqw(Operand(src), xtmp, xtmp); + // Check if xtmp is 0. + vptest(xtmp, xtmp); + emitSetRegisterIf(Condition::Zero, dest); +} + +void MacroAssembler::allTrueInt32x4(FloatRegister src, Register dest) { + ScratchSimd128Scope xtmp(*this); + // xtmp is all-00h + vpxor(xtmp, xtmp, xtmp); + // Set FFFFFFFFh if doubleword==0 otherwise 00000000h + // Operand ordering constraint: lhs==output + vpcmpeqd(Operand(src), xtmp, xtmp); + // Check if xtmp is 0. + vptest(xtmp, xtmp); + emitSetRegisterIf(Condition::Zero, dest); +} + +void MacroAssembler::allTrueInt64x2(FloatRegister src, Register dest) { + ScratchSimd128Scope xtmp(*this); + // xtmp is all-00h + vpxor(xtmp, xtmp, xtmp); + // Set FFFFFFFFFFFFFFFFh if quadword==0 otherwise 0000000000000000h + // Operand ordering constraint: lhs==output + vpcmpeqq(Operand(src), xtmp, xtmp); + // Check if xtmp is 0. + vptest(xtmp, xtmp); + emitSetRegisterIf(Condition::Zero, dest); +} + +// Bitmask + +void MacroAssembler::bitmaskInt8x16(FloatRegister src, Register dest) { + vpmovmskb(src, dest); +} + +void MacroAssembler::bitmaskInt16x8(FloatRegister src, Register dest) { + ScratchSimd128Scope scratch(*this); + // A three-instruction sequence is possible by using scratch as a don't-care + // input and shifting rather than masking at the end, but creates a false + // dependency on the old value of scratch. The better fix is to allow src to + // be clobbered. + src = moveSimd128IntIfNotAVX(src, scratch); + vpacksswb(Operand(src), src, scratch); + vpmovmskb(scratch, dest); + andl(Imm32(0xFF), dest); +} + +void MacroAssembler::bitmaskInt32x4(FloatRegister src, Register dest) { + vmovmskps(src, dest); +} + +void MacroAssembler::bitmaskInt64x2(FloatRegister src, Register dest) { + vmovmskpd(src, dest); +} + +// Swizzle - permute with variable indices + +void MacroAssembler::swizzleInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + rhs = moveSimd128IntIfNotAVX(rhs, scratch); + // Set high bit to 1 for values > 15 via adding with saturation. + vpaddusbSimd128(SimdConstant::SplatX16(0x70), rhs, scratch); + vpshufb(scratch, lhs, dest); // permute +} + +void MacroAssembler::swizzleInt8x16Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpshufb(rhs, lhs, dest); +} + +// Integer Add + +void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpaddb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::addInt8x16(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddb, + &MacroAssembler::vpaddbSimd128); +} + +void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpaddw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::addInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddw, + &MacroAssembler::vpaddwSimd128); +} + +void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpaddd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::addInt32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddd, + &MacroAssembler::vpadddSimd128); +} + +void MacroAssembler::addInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpaddq(Operand(rhs), lhs, dest); +} + +void MacroAssembler::addInt64x2(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddq, + &MacroAssembler::vpaddqSimd128); +} + +// Integer subtract + +void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpsubb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::subInt8x16(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubb, + &MacroAssembler::vpsubbSimd128); +} + +void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpsubw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::subInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubw, + &MacroAssembler::vpsubwSimd128); +} + +void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpsubd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::subInt32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubd, + &MacroAssembler::vpsubdSimd128); +} + +void MacroAssembler::subInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpsubq(Operand(rhs), lhs, dest); +} + +void MacroAssembler::subInt64x2(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubq, + &MacroAssembler::vpsubqSimd128); +} + +// Integer multiply + +void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmullw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::mulInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmullw, + &MacroAssembler::vpmullwSimd128); +} + +void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmulld(Operand(rhs), lhs, dest); +} + +void MacroAssembler::mulInt32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmulld, + &MacroAssembler::vpmulldSimd128); +} + +void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest, FloatRegister temp) { + ScratchSimd128Scope temp2(*this); + // lhs = <D C> <B A> + // rhs = <H G> <F E> + // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low> + FloatRegister lhsForTemp = + moveSimd128IntIfNotAVX(lhs, temp); // temp = <D C> <B A> + vpsrlq(Imm32(32), lhsForTemp, temp); // temp = <0 D> <0 B> + vpmuludq(rhs, temp, temp); // temp = <DG> <BE> + FloatRegister rhsForTemp = + moveSimd128IntIfNotAVX(rhs, temp2); // temp2 = <H G> <F E> + vpsrlq(Imm32(32), rhsForTemp, temp2); // temp2 = <0 H> <0 F> + vpmuludq(lhs, temp2, temp2); // temp2 = <CH> <AF> + vpaddq(Operand(temp), temp2, temp2); // temp2 = <DG+CH> <BE+AF> + vpsllq(Imm32(32), temp2, temp2); // temp2 = <(DG+CH)_low 0> + // <(BE+AF)_low 0> + vpmuludq(rhs, lhs, dest); // dest = <CG_high CG_low> + // <AE_high AE_low> + vpaddq(Operand(temp2), dest, dest); // dest = + // <(DG+CH)_low+CG_high CG_low> + // <(BE+AF)_low+AE_high AE_low> +} + +void MacroAssembler::mulInt64x2(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest, FloatRegister temp) { + // Check if we can specialize that to less than eight instructions + // (in comparison with the above mulInt64x2 version). + const int64_t* c = static_cast<const int64_t*>(rhs.bytes()); + const int64_t val = c[0]; + if (val == c[1]) { + switch (mozilla::CountPopulation64(val)) { + case 0: // val == 0 + vpxor(Operand(dest), dest, dest); + return; + case 64: // val == -1 + negInt64x2(lhs, dest); + return; + case 1: // val == power of 2 + if (val == 1) { + moveSimd128Int(lhs, dest); + } else { + lhs = moveSimd128IntIfNotAVX(lhs, dest); + vpsllq(Imm32(mozilla::CountTrailingZeroes64(val)), lhs, dest); + } + return; + case 2: { + // Constants with 2 bits set, such as 3, 5, 10, etc. + int i0 = mozilla::CountTrailingZeroes64(val); + int i1 = mozilla::CountTrailingZeroes64(val & (val - 1)); + FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp); + vpsllq(Imm32(i1), lhsForTemp, temp); + lhs = moveSimd128IntIfNotAVX(lhs, dest); + if (i0 > 0) { + vpsllq(Imm32(i0), lhs, dest); + lhs = dest; + } + vpaddq(Operand(temp), lhs, dest); + return; + } + case 63: { + // Some constants with 1 bit unset, such as -2, -3, -5, etc. + FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp); + vpsllq(Imm32(mozilla::CountTrailingZeroes64(~val)), lhsForTemp, temp); + negInt64x2(lhs, dest); + vpsubq(Operand(temp), dest, dest); + return; + } + } + } + + // lhs = <D C> <B A> + // rhs = <H G> <F E> + // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low> + + if ((c[0] >> 32) == 0 && (c[1] >> 32) == 0) { + // If the H and F == 0, simplify calculations: + // result = <DG_low+CG_high CG_low> <BE_low+AE_high AE_low> + const int64_t rhsShifted[2] = {c[0] << 32, c[1] << 32}; + FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp); + vpmulldSimd128(SimdConstant::CreateSimd128(rhsShifted), lhsForTemp, temp); + vpmuludqSimd128(rhs, lhs, dest); + vpaddq(Operand(temp), dest, dest); + return; + } + + const int64_t rhsSwapped[2] = { + static_cast<int64_t>(static_cast<uint64_t>(c[0]) >> 32) | (c[0] << 32), + static_cast<int64_t>(static_cast<uint64_t>(c[1]) >> 32) | (c[1] << 32), + }; // rhsSwapped = <G H> <E F> + FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp); + vpmulldSimd128(SimdConstant::CreateSimd128(rhsSwapped), lhsForTemp, + temp); // temp = <DG CH> <BE AF> + vphaddd(Operand(temp), temp, temp); // temp = <xx xx> <DG+CH BE+AF> + vpmovzxdq(Operand(temp), temp); // temp = <0 DG+CG> <0 BE+AF> + vpmuludqSimd128(rhs, lhs, dest); // dest = <CG_high CG_low> + // <AE_high AE_low> + vpsllq(Imm32(32), temp, temp); // temp = <(DG+CH)_low 0> + // <(BE+AF)_low 0> + vpaddq(Operand(temp), dest, dest); +} + +// Code generation from the PR: https://github.com/WebAssembly/simd/pull/376. +// The double PSHUFD for the 32->64 case is not great, and there's some +// discussion on the PR (scroll down far enough) on how to avoid one of them, +// but we need benchmarking + correctness proofs. + +void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + widenLowInt8x16(rhs, scratch); + widenLowInt8x16(lhs, dest); + mulInt16x8(dest, scratch, dest); +} + +void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + widenHighInt8x16(rhs, scratch); + widenHighInt8x16(lhs, dest); + mulInt16x8(dest, scratch, dest); +} + +void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + unsignedWidenLowInt8x16(rhs, scratch); + unsignedWidenLowInt8x16(lhs, dest); + mulInt16x8(dest, scratch, dest); +} + +void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + unsignedWidenHighInt8x16(rhs, scratch); + unsignedWidenHighInt8x16(lhs, dest); + mulInt16x8(dest, scratch, dest); +} + +void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch); + vpmulhw(Operand(rhs), lhsCopy, scratch); + vpmullw(Operand(rhs), lhs, dest); + vpunpcklwd(scratch, dest, dest); +} + +void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch); + vpmulhw(Operand(rhs), lhsCopy, scratch); + vpmullw(Operand(rhs), lhs, dest); + vpunpckhwd(scratch, dest, dest); +} + +void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch); + vpmulhuw(Operand(rhs), lhsCopy, scratch); + vpmullw(Operand(rhs), lhs, dest); + vpunpcklwd(scratch, dest, dest); +} + +void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch); + vpmulhuw(Operand(rhs), lhsCopy, scratch); + vpmullw(Operand(rhs), lhs, dest); + vpunpckhwd(scratch, dest, dest); +} + +void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch); + vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest); + vpmuldq(scratch, dest, dest); +} + +void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch); + vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest); + vpmuldq(scratch, dest, dest); +} + +void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch); + vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest); + vpmuludq(Operand(scratch), dest, dest); +} + +void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch); + vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest); + vpmuludq(Operand(scratch), dest, dest); +} + +void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + vpmulhrsw(Operand(rhs), lhs, dest); + FloatRegister destCopy = moveSimd128IntIfNotAVX(dest, scratch); + vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), destCopy, scratch); + vpxor(scratch, dest, dest); +} + +void MacroAssembler::q15MulrInt16x8Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmulhrsw(Operand(rhs), lhs, dest); +} + +// Integer negate + +void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (src == dest) { + moveSimd128Int(src, scratch); + src = scratch; + } + vpxor(Operand(dest), dest, dest); + vpsubb(Operand(src), dest, dest); +} + +void MacroAssembler::negInt16x8(FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (src == dest) { + moveSimd128Int(src, scratch); + src = scratch; + } + vpxor(Operand(dest), dest, dest); + vpsubw(Operand(src), dest, dest); +} + +void MacroAssembler::negInt32x4(FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (src == dest) { + moveSimd128Int(src, scratch); + src = scratch; + } + vpxor(Operand(dest), dest, dest); + vpsubd(Operand(src), dest, dest); +} + +void MacroAssembler::negInt64x2(FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (src == dest) { + moveSimd128Int(src, scratch); + src = scratch; + } + vpxor(Operand(dest), dest, dest); + vpsubq(Operand(src), dest, dest); +} + +// Saturating integer add + +void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpaddsb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::addSatInt8x16(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddsb, + &MacroAssembler::vpaddsbSimd128); +} + +void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpaddusb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddusb, + &MacroAssembler::vpaddusbSimd128); +} + +void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpaddsw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::addSatInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddsw, + &MacroAssembler::vpaddswSimd128); +} + +void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpaddusw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddusw, + &MacroAssembler::vpadduswSimd128); +} + +// Saturating integer subtract + +void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpsubsb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::subSatInt8x16(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubsb, + &MacroAssembler::vpsubsbSimd128); +} + +void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpsubusb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubusb, + &MacroAssembler::vpsubusbSimd128); +} + +void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpsubsw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::subSatInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubsw, + &MacroAssembler::vpsubswSimd128); +} + +void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpsubusw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubusw, + &MacroAssembler::vpsubuswSimd128); +} + +// Lane-wise integer minimum + +void MacroAssembler::minInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpminsb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::minInt8x16(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsb, + &MacroAssembler::vpminsbSimd128); +} + +void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpminub(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminub, + &MacroAssembler::vpminubSimd128); +} + +void MacroAssembler::minInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpminsw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::minInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsw, + &MacroAssembler::vpminswSimd128); +} + +void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpminuw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminuw, + &MacroAssembler::vpminuwSimd128); +} + +void MacroAssembler::minInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpminsd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::minInt32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsd, + &MacroAssembler::vpminsdSimd128); +} + +void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpminud(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminud, + &MacroAssembler::vpminudSimd128); +} + +// Lane-wise integer maximum + +void MacroAssembler::maxInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmaxsb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::maxInt8x16(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsb, + &MacroAssembler::vpmaxsbSimd128); +} + +void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmaxub(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxub, + &MacroAssembler::vpmaxubSimd128); +} + +void MacroAssembler::maxInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmaxsw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::maxInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsw, + &MacroAssembler::vpmaxswSimd128); +} + +void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmaxuw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxuw, + &MacroAssembler::vpmaxuwSimd128); +} + +void MacroAssembler::maxInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmaxsd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::maxInt32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsd, + &MacroAssembler::vpmaxsdSimd128); +} + +void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmaxud(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxud, + &MacroAssembler::vpmaxudSimd128); +} + +// Lane-wise integer rounding average + +void MacroAssembler::unsignedAverageInt8x16(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + vpavgb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedAverageInt16x8(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + vpavgw(Operand(rhs), lhs, dest); +} + +// Lane-wise integer absolute value + +void MacroAssembler::absInt8x16(FloatRegister src, FloatRegister dest) { + vpabsb(Operand(src), dest); +} + +void MacroAssembler::absInt16x8(FloatRegister src, FloatRegister dest) { + vpabsw(Operand(src), dest); +} + +void MacroAssembler::absInt32x4(FloatRegister src, FloatRegister dest) { + vpabsd(Operand(src), dest); +} + +void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + signReplicationInt64x2(src, scratch); + src = moveSimd128IntIfNotAVX(src, dest); + vpxor(Operand(scratch), src, dest); + vpsubq(Operand(scratch), dest, dest); +} + +// Left shift by scalar + +void MacroAssembler::leftShiftInt8x16(Register rhs, FloatRegister lhsDest, + FloatRegister temp) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(lhsDest, rhs, temp, + lhsDest); +} + +void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(count, src, dest); +} + +void MacroAssembler::leftShiftInt16x8(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8(lhsDest, rhs, + lhsDest); +} + +void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsllw(count, src, dest); +} + +void MacroAssembler::leftShiftInt32x4(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4(lhsDest, rhs, + lhsDest); +} + +void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpslld(count, src, dest); +} + +void MacroAssembler::leftShiftInt64x2(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt64x2(lhsDest, rhs, + lhsDest); +} + +void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsllq(count, src, dest); +} + +// Right shift by scalar + +void MacroAssembler::rightShiftInt8x16(Register rhs, FloatRegister lhsDest, + FloatRegister temp) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16(lhsDest, rhs, temp, + lhsDest); +} + +void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16(count, src, dest); +} + +void MacroAssembler::unsignedRightShiftInt8x16(Register rhs, + FloatRegister lhsDest, + FloatRegister temp) { + MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16( + lhsDest, rhs, temp, lhsDest); +} + +void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16(count, src, + dest); +} + +void MacroAssembler::rightShiftInt16x8(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8(lhsDest, rhs, + lhsDest); +} + +void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsraw(count, src, dest); +} + +void MacroAssembler::unsignedRightShiftInt16x8(Register rhs, + FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8(lhsDest, rhs, + lhsDest); +} + +void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsrlw(count, src, dest); +} + +void MacroAssembler::rightShiftInt32x4(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4(lhsDest, rhs, + lhsDest); +} + +void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsrad(count, src, dest); +} + +void MacroAssembler::unsignedRightShiftInt32x4(Register rhs, + FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4(lhsDest, rhs, + lhsDest); +} + +void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsrld(count, src, dest); +} + +void MacroAssembler::rightShiftInt64x2(Register rhs, FloatRegister lhsDest, + FloatRegister temp) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(lhsDest, rhs, temp, + lhsDest); +} + +void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(count, src, dest); +} + +void MacroAssembler::unsignedRightShiftInt64x2(Register rhs, + FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt64x2(lhsDest, rhs, + lhsDest); +} + +void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsrlq(count, src, dest); +} + +// Sign replication operation + +void MacroAssembler::signReplicationInt8x16(FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(src != dest); + vpxor(Operand(dest), dest, dest); + vpcmpgtb(Operand(src), dest, dest); +} + +void MacroAssembler::signReplicationInt16x8(FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsraw(Imm32(15), src, dest); +} + +void MacroAssembler::signReplicationInt32x4(FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpsrad(Imm32(31), src, dest); +} + +void MacroAssembler::signReplicationInt64x2(FloatRegister src, + FloatRegister dest) { + vpshufd(ComputeShuffleMask(1, 1, 3, 3), src, dest); + vpsrad(Imm32(31), dest, dest); +} + +// Bitwise and, or, xor, not + +void MacroAssembler::bitwiseAndSimd128(FloatRegister rhs, + FloatRegister lhsDest) { + vpand(Operand(rhs), lhsDest, lhsDest); +} + +void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpand(Operand(rhs), lhs, dest); +} + +void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpand, + &MacroAssembler::vpandSimd128); +} + +void MacroAssembler::bitwiseOrSimd128(FloatRegister rhs, + FloatRegister lhsDest) { + vpor(Operand(rhs), lhsDest, lhsDest); +} + +void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpor(Operand(rhs), lhs, dest); +} + +void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpor, + &MacroAssembler::vporSimd128); +} + +void MacroAssembler::bitwiseXorSimd128(FloatRegister rhs, + FloatRegister lhsDest) { + vpxor(Operand(rhs), lhsDest, lhsDest); +} + +void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpxor(Operand(rhs), lhs, dest); +} + +void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpxor, + &MacroAssembler::vpxorSimd128); +} + +void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + bitwiseXorSimd128(src, SimdConstant::SplatX16(-1), dest); +} + +// Bitwise and-not + +void MacroAssembler::bitwiseNotAndSimd128(FloatRegister rhs, + FloatRegister lhsDest) { + vpandn(Operand(rhs), lhsDest, lhsDest); +} + +void MacroAssembler::bitwiseNotAndSimd128(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpandn(Operand(rhs), lhs, dest); +} + +// Bitwise select + +void MacroAssembler::bitwiseSelectSimd128(FloatRegister mask, + FloatRegister onTrue, + FloatRegister onFalse, + FloatRegister dest, + FloatRegister temp) { + MacroAssemblerX86Shared::selectSimd128(mask, onTrue, onFalse, temp, dest); +} + +// Population count + +void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest, + FloatRegister temp) { + MacroAssemblerX86Shared::popcntInt8x16(src, temp, dest); +} + +// Comparisons (integer and floating-point) + +void MacroAssembler::compareInt8x16(Assembler::Condition cond, + FloatRegister rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::compareInt8x16(lhsDest, Operand(rhs), cond, lhsDest); +} + +void MacroAssembler::compareInt8x16(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + MacroAssemblerX86Shared::compareInt8x16(lhs, Operand(rhs), cond, dest); +} + +void MacroAssembler::compareInt8x16(Assembler::Condition cond, + FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + MOZ_ASSERT(cond != Assembler::Condition::LessThan && + cond != Assembler::Condition::GreaterThanOrEqual); + MacroAssemblerX86Shared::compareInt8x16(cond, lhs, rhs, dest); +} + +void MacroAssembler::compareInt16x8(Assembler::Condition cond, + FloatRegister rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::compareInt16x8(lhsDest, Operand(rhs), cond, lhsDest); +} + +void MacroAssembler::compareInt16x8(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + MacroAssemblerX86Shared::compareInt16x8(lhs, Operand(rhs), cond, dest); +} + +void MacroAssembler::compareInt16x8(Assembler::Condition cond, + FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + MOZ_ASSERT(cond != Assembler::Condition::LessThan && + cond != Assembler::Condition::GreaterThanOrEqual); + MacroAssemblerX86Shared::compareInt16x8(cond, lhs, rhs, dest); +} + +void MacroAssembler::compareInt32x4(Assembler::Condition cond, + FloatRegister rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::compareInt32x4(lhsDest, Operand(rhs), cond, lhsDest); +} + +void MacroAssembler::compareInt32x4(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + MacroAssemblerX86Shared::compareInt32x4(lhs, Operand(rhs), cond, dest); +} + +void MacroAssembler::compareInt32x4(Assembler::Condition cond, + FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + MOZ_ASSERT(cond != Assembler::Condition::LessThan && + cond != Assembler::Condition::GreaterThanOrEqual); + MacroAssemblerX86Shared::compareInt32x4(cond, lhs, rhs, dest); +} + +void MacroAssembler::compareForEqualityInt64x2(Assembler::Condition cond, + FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + MacroAssemblerX86Shared::compareForEqualityInt64x2(lhs, Operand(rhs), cond, + dest); +} + +void MacroAssembler::compareForOrderingInt64x2( + Assembler::Condition cond, FloatRegister lhs, FloatRegister rhs, + FloatRegister dest, FloatRegister temp1, FloatRegister temp2) { + if (HasAVX() && HasSSE42()) { + MacroAssemblerX86Shared::compareForOrderingInt64x2AVX(lhs, rhs, cond, dest); + } else { + MacroAssemblerX86Shared::compareForOrderingInt64x2(lhs, Operand(rhs), cond, + temp1, temp2, dest); + } +} + +void MacroAssembler::compareFloat32x4(Assembler::Condition cond, + FloatRegister rhs, + FloatRegister lhsDest) { + // Code in the SIMD implementation allows operands to be reversed like this, + // this benefits the baseline compiler. Ion takes care of the reversing + // itself and never generates GT/GE. + if (cond == Assembler::GreaterThan) { + MacroAssemblerX86Shared::compareFloat32x4(rhs, Operand(lhsDest), + Assembler::LessThan, lhsDest); + } else if (cond == Assembler::GreaterThanOrEqual) { + MacroAssemblerX86Shared::compareFloat32x4( + rhs, Operand(lhsDest), Assembler::LessThanOrEqual, lhsDest); + } else { + MacroAssemblerX86Shared::compareFloat32x4(lhsDest, Operand(rhs), cond, + lhsDest); + } +} + +void MacroAssembler::compareFloat32x4(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + MacroAssemblerX86Shared::compareFloat32x4(lhs, Operand(rhs), cond, dest); +} + +void MacroAssembler::compareFloat32x4(Assembler::Condition cond, + FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + MOZ_ASSERT(cond != Assembler::Condition::GreaterThan && + cond != Assembler::Condition::GreaterThanOrEqual); + MacroAssemblerX86Shared::compareFloat32x4(cond, lhs, rhs, dest); +} + +void MacroAssembler::compareFloat64x2(Assembler::Condition cond, + FloatRegister rhs, + FloatRegister lhsDest) { + compareFloat64x2(cond, lhsDest, rhs, lhsDest); +} + +void MacroAssembler::compareFloat64x2(Assembler::Condition cond, + FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + // Code in the SIMD implementation allows operands to be reversed like this, + // this benefits the baseline compiler. Ion takes care of the reversing + // itself and never generates GT/GE. + if (cond == Assembler::GreaterThan) { + MacroAssemblerX86Shared::compareFloat64x2(rhs, Operand(lhs), + Assembler::LessThan, dest); + } else if (cond == Assembler::GreaterThanOrEqual) { + MacroAssemblerX86Shared::compareFloat64x2(rhs, Operand(lhs), + Assembler::LessThanOrEqual, dest); + } else { + MacroAssemblerX86Shared::compareFloat64x2(lhs, Operand(rhs), cond, dest); + } +} + +void MacroAssembler::compareFloat64x2(Assembler::Condition cond, + FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + MOZ_ASSERT(cond != Assembler::Condition::GreaterThan && + cond != Assembler::Condition::GreaterThanOrEqual); + MacroAssemblerX86Shared::compareFloat64x2(cond, lhs, rhs, dest); +} + +// Load. See comments above regarding integer operation. + +void MacroAssembler::loadUnalignedSimd128(const Operand& src, + FloatRegister dest) { + loadUnalignedSimd128Int(src, dest); +} + +void MacroAssembler::loadUnalignedSimd128(const Address& src, + FloatRegister dest) { + loadUnalignedSimd128Int(src, dest); +} + +void MacroAssembler::loadUnalignedSimd128(const BaseIndex& src, + FloatRegister dest) { + loadUnalignedSimd128Int(src, dest); +} + +// Store. See comments above regarding integer operation. + +void MacroAssembler::storeUnalignedSimd128(FloatRegister src, + const Address& dest) { + storeUnalignedSimd128Int(src, dest); +} + +void MacroAssembler::storeUnalignedSimd128(FloatRegister src, + const BaseIndex& dest) { + storeUnalignedSimd128Int(src, dest); +} + +// Floating point negation + +void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) { + src = moveSimd128FloatIfNotAVX(src, dest); + bitwiseXorSimd128(src, SimdConstant::SplatX4(-0.f), dest); +} + +void MacroAssembler::negFloat64x2(FloatRegister src, FloatRegister dest) { + src = moveSimd128FloatIfNotAVX(src, dest); + bitwiseXorSimd128(src, SimdConstant::SplatX2(-0.0), dest); +} + +// Floating point absolute value + +void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) { + src = moveSimd128FloatIfNotAVX(src, dest); + bitwiseAndSimd128(src, SimdConstant::SplatX4(0x7FFFFFFF), dest); +} + +void MacroAssembler::absFloat64x2(FloatRegister src, FloatRegister dest) { + src = moveSimd128FloatIfNotAVX(src, dest); + bitwiseAndSimd128(src, SimdConstant::SplatX2(int64_t(0x7FFFFFFFFFFFFFFFll)), + dest); +} + +// NaN-propagating minimum + +void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest, FloatRegister temp1, + FloatRegister temp2) { + MacroAssemblerX86Shared::minFloat32x4(lhs, rhs, temp1, temp2, dest); +} + +void MacroAssembler::minFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest, FloatRegister temp1, + FloatRegister temp2) { + MacroAssemblerX86Shared::minFloat64x2(lhs, rhs, temp1, temp2, dest); +} + +// NaN-propagating maximum + +void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest, FloatRegister temp1, + FloatRegister temp2) { + MacroAssemblerX86Shared::maxFloat32x4(lhs, rhs, temp1, temp2, dest); +} + +void MacroAssembler::maxFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest, FloatRegister temp1, + FloatRegister temp2) { + MacroAssemblerX86Shared::maxFloat64x2(lhs, rhs, temp1, temp2, dest); +} + +// Compare-based minimum + +void MacroAssembler::pseudoMinFloat32x4(FloatRegister rhsOrRhsDest, + FloatRegister lhsOrLhsDest) { + // Shut up the linter by using the same names as in the declaration, then + // aliasing here. + FloatRegister rhsDest = rhsOrRhsDest; + FloatRegister lhs = lhsOrLhsDest; + vminps(Operand(lhs), rhsDest, rhsDest); +} + +void MacroAssembler::pseudoMinFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vminps(Operand(rhs), lhs, dest); +} + +void MacroAssembler::pseudoMinFloat64x2(FloatRegister rhsOrRhsDest, + FloatRegister lhsOrLhsDest) { + FloatRegister rhsDest = rhsOrRhsDest; + FloatRegister lhs = lhsOrLhsDest; + vminpd(Operand(lhs), rhsDest, rhsDest); +} + +void MacroAssembler::pseudoMinFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vminpd(Operand(rhs), lhs, dest); +} + +// Compare-based maximum + +void MacroAssembler::pseudoMaxFloat32x4(FloatRegister rhsOrRhsDest, + FloatRegister lhsOrLhsDest) { + FloatRegister rhsDest = rhsOrRhsDest; + FloatRegister lhs = lhsOrLhsDest; + vmaxps(Operand(lhs), rhsDest, rhsDest); +} + +void MacroAssembler::pseudoMaxFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vmaxps(Operand(rhs), lhs, dest); +} + +void MacroAssembler::pseudoMaxFloat64x2(FloatRegister rhsOrRhsDest, + FloatRegister lhsOrLhsDest) { + FloatRegister rhsDest = rhsOrRhsDest; + FloatRegister lhs = lhsOrLhsDest; + vmaxpd(Operand(lhs), rhsDest, rhsDest); +} + +void MacroAssembler::pseudoMaxFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vmaxpd(Operand(rhs), lhs, dest); +} + +// Widening/pairwise integer dot product + +void MacroAssembler::widenDotInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpmaddwd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::widenDotInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaddwd, + &MacroAssembler::vpmaddwdSimd128); +} + +void MacroAssembler::dotInt8x16Int7x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (lhs == dest && !HasAVX()) { + moveSimd128Int(lhs, scratch); + lhs = scratch; + } + rhs = moveSimd128IntIfNotAVX(rhs, dest); + vpmaddubsw(lhs, rhs, dest); +} + +void MacroAssembler::dotInt8x16Int7x16ThenAdd(FloatRegister lhs, + FloatRegister rhs, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + rhs = moveSimd128IntIfNotAVX(rhs, scratch); + vpmaddubsw(lhs, rhs, scratch); + vpmaddwdSimd128(SimdConstant::SplatX8(1), scratch, scratch); + vpaddd(Operand(scratch), dest, dest); +} + +// Rounding + +void MacroAssembler::ceilFloat32x4(FloatRegister src, FloatRegister dest) { + vroundps(Assembler::SSERoundingMode::Ceil, Operand(src), dest); +} + +void MacroAssembler::ceilFloat64x2(FloatRegister src, FloatRegister dest) { + vroundpd(Assembler::SSERoundingMode::Ceil, Operand(src), dest); +} + +void MacroAssembler::floorFloat32x4(FloatRegister src, FloatRegister dest) { + vroundps(Assembler::SSERoundingMode::Floor, Operand(src), dest); +} + +void MacroAssembler::floorFloat64x2(FloatRegister src, FloatRegister dest) { + vroundpd(Assembler::SSERoundingMode::Floor, Operand(src), dest); +} + +void MacroAssembler::truncFloat32x4(FloatRegister src, FloatRegister dest) { + vroundps(Assembler::SSERoundingMode::Trunc, Operand(src), dest); +} + +void MacroAssembler::truncFloat64x2(FloatRegister src, FloatRegister dest) { + vroundpd(Assembler::SSERoundingMode::Trunc, Operand(src), dest); +} + +void MacroAssembler::nearestFloat32x4(FloatRegister src, FloatRegister dest) { + vroundps(Assembler::SSERoundingMode::Nearest, Operand(src), dest); +} + +void MacroAssembler::nearestFloat64x2(FloatRegister src, FloatRegister dest) { + vroundpd(Assembler::SSERoundingMode::Nearest, Operand(src), dest); +} + +// Floating add + +void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vaddps(Operand(rhs), lhs, dest); +} + +void MacroAssembler::addFloat32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vaddps, + &MacroAssembler::vaddpsSimd128); +} + +void MacroAssembler::addFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vaddpd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::addFloat64x2(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vaddpd, + &MacroAssembler::vaddpdSimd128); +} + +// Floating subtract + +void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vsubps(Operand(rhs), lhs, dest); +} + +void MacroAssembler::subFloat32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vsubps, + &MacroAssembler::vsubpsSimd128); +} + +void MacroAssembler::subFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + AssemblerX86Shared::vsubpd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::subFloat64x2(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vsubpd, + &MacroAssembler::vsubpdSimd128); +} + +// Floating division + +void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vdivps(Operand(rhs), lhs, dest); +} + +void MacroAssembler::divFloat32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vdivps, + &MacroAssembler::vdivpsSimd128); +} + +void MacroAssembler::divFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vdivpd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::divFloat64x2(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vdivpd, + &MacroAssembler::vdivpdSimd128); +} + +// Floating Multiply + +void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vmulps(Operand(rhs), lhs, dest); +} + +void MacroAssembler::mulFloat32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vmulps, + &MacroAssembler::vmulpsSimd128); +} + +void MacroAssembler::mulFloat64x2(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vmulpd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::mulFloat64x2(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vmulpd, + &MacroAssembler::vmulpdSimd128); +} + +// Pairwise add + +void MacroAssembler::extAddPairwiseInt8x16(FloatRegister src, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + if (dest == src) { + moveSimd128(src, scratch); + src = scratch; + } + loadConstantSimd128Int(SimdConstant::SplatX16(1), dest); + vpmaddubsw(src, dest, dest); +} + +void MacroAssembler::unsignedExtAddPairwiseInt8x16(FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpmaddubswSimd128(SimdConstant::SplatX16(1), src, dest); +} + +void MacroAssembler::extAddPairwiseInt16x8(FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpmaddwdSimd128(SimdConstant::SplatX8(1), src, dest); +} + +void MacroAssembler::unsignedExtAddPairwiseInt16x8(FloatRegister src, + FloatRegister dest) { + src = moveSimd128IntIfNotAVX(src, dest); + vpxorSimd128(SimdConstant::SplatX8(-0x8000), src, dest); + vpmaddwdSimd128(SimdConstant::SplatX8(1), dest, dest); + vpadddSimd128(SimdConstant::SplatX4(0x00010000), dest, dest); +} + +// Floating square root + +void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) { + vsqrtps(Operand(src), dest); +} + +void MacroAssembler::sqrtFloat64x2(FloatRegister src, FloatRegister dest) { + vsqrtpd(Operand(src), dest); +} + +// Integer to floating point with rounding + +void MacroAssembler::convertInt32x4ToFloat32x4(FloatRegister src, + FloatRegister dest) { + vcvtdq2ps(src, dest); +} + +void MacroAssembler::unsignedConvertInt32x4ToFloat32x4(FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat32x4(src, dest); +} + +void MacroAssembler::convertInt32x4ToFloat64x2(FloatRegister src, + FloatRegister dest) { + vcvtdq2pd(src, dest); +} + +void MacroAssembler::unsignedConvertInt32x4ToFloat64x2(FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat64x2(src, dest); +} + +// Floating point to integer with saturation + +void MacroAssembler::truncSatFloat32x4ToInt32x4(FloatRegister src, + FloatRegister dest) { + MacroAssemblerX86Shared::truncSatFloat32x4ToInt32x4(src, dest); +} + +void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, + FloatRegister dest, + FloatRegister temp) { + MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4(src, temp, dest); +} + +void MacroAssembler::truncSatFloat64x2ToInt32x4(FloatRegister src, + FloatRegister dest, + FloatRegister temp) { + MacroAssemblerX86Shared::truncSatFloat64x2ToInt32x4(src, temp, dest); +} + +void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src, + FloatRegister dest, + FloatRegister temp) { + MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(src, temp, dest); +} + +void MacroAssembler::truncFloat32x4ToInt32x4Relaxed(FloatRegister src, + FloatRegister dest) { + vcvttps2dq(src, dest); +} + +void MacroAssembler::unsignedTruncFloat32x4ToInt32x4Relaxed( + FloatRegister src, FloatRegister dest) { + MacroAssemblerX86Shared::unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest); +} + +void MacroAssembler::truncFloat64x2ToInt32x4Relaxed(FloatRegister src, + FloatRegister dest) { + vcvttpd2dq(src, dest); +} + +void MacroAssembler::unsignedTruncFloat64x2ToInt32x4Relaxed( + FloatRegister src, FloatRegister dest) { + MacroAssemblerX86Shared::unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest); +} + +// Floating point widening + +void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src, + FloatRegister dest) { + vcvtpd2ps(src, dest); +} + +void MacroAssembler::convertFloat32x4ToFloat64x2(FloatRegister src, + FloatRegister dest) { + vcvtps2pd(src, dest); +} + +// Integer to integer narrowing + +void MacroAssembler::narrowInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpacksswb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::narrowInt16x8(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpacksswb, + &MacroAssembler::vpacksswbSimd128); +} + +void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpackuswb(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackuswb, + &MacroAssembler::vpackuswbSimd128); +} + +void MacroAssembler::narrowInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpackssdw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::narrowInt32x4(FloatRegister lhs, const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackssdw, + &MacroAssembler::vpackssdwSimd128); +} + +void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vpackusdw(Operand(rhs), lhs, dest); +} + +void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, + const SimdConstant& rhs, + FloatRegister dest) { + binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackusdw, + &MacroAssembler::vpackusdwSimd128); +} + +// Integer to integer widening + +void MacroAssembler::widenLowInt8x16(FloatRegister src, FloatRegister dest) { + vpmovsxbw(Operand(src), dest); +} + +void MacroAssembler::widenHighInt8x16(FloatRegister src, FloatRegister dest) { + vpalignr(Operand(src), dest, dest, 8); + vpmovsxbw(Operand(dest), dest); +} + +void MacroAssembler::unsignedWidenLowInt8x16(FloatRegister src, + FloatRegister dest) { + vpmovzxbw(Operand(src), dest); +} + +void MacroAssembler::unsignedWidenHighInt8x16(FloatRegister src, + FloatRegister dest) { + vpalignr(Operand(src), dest, dest, 8); + vpmovzxbw(Operand(dest), dest); +} + +void MacroAssembler::widenLowInt16x8(FloatRegister src, FloatRegister dest) { + vpmovsxwd(Operand(src), dest); +} + +void MacroAssembler::widenHighInt16x8(FloatRegister src, FloatRegister dest) { + vpalignr(Operand(src), dest, dest, 8); + vpmovsxwd(Operand(dest), dest); +} + +void MacroAssembler::unsignedWidenLowInt16x8(FloatRegister src, + FloatRegister dest) { + vpmovzxwd(Operand(src), dest); +} + +void MacroAssembler::unsignedWidenHighInt16x8(FloatRegister src, + FloatRegister dest) { + vpalignr(Operand(src), dest, dest, 8); + vpmovzxwd(Operand(dest), dest); +} + +void MacroAssembler::widenLowInt32x4(FloatRegister src, FloatRegister dest) { + vpmovsxdq(Operand(src), dest); +} + +void MacroAssembler::unsignedWidenLowInt32x4(FloatRegister src, + FloatRegister dest) { + vpmovzxdq(Operand(src), dest); +} + +void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) { + if (src == dest || HasAVX()) { + vmovhlps(src, src, dest); + } else { + vpshufd(ComputeShuffleMask(2, 3, 2, 3), src, dest); + } + vpmovsxdq(Operand(dest), dest); +} + +void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src, + FloatRegister dest) { + ScratchSimd128Scope scratch(*this); + src = moveSimd128IntIfNotAVX(src, dest); + vpxor(scratch, scratch, scratch); + vpunpckhdq(scratch, src, dest); +} + +// Floating multiply-accumulate: srcDest [+-]= src1 * src2 +// The Intel FMA feature is some AVX* special sauce, no support yet. + +void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + if (HasFMA()) { + vfmadd231ps(src2, src1, srcDest); + return; + } + ScratchSimd128Scope scratch(*this); + src1 = moveSimd128FloatIfNotAVX(src1, scratch); + mulFloat32x4(src1, src2, scratch); + addFloat32x4(srcDest, scratch, srcDest); +} + +void MacroAssembler::fnmaFloat32x4(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + if (HasFMA()) { + vfnmadd231ps(src2, src1, srcDest); + return; + } + ScratchSimd128Scope scratch(*this); + src1 = moveSimd128FloatIfNotAVX(src1, scratch); + mulFloat32x4(src1, src2, scratch); + subFloat32x4(srcDest, scratch, srcDest); +} + +void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + if (HasFMA()) { + vfmadd231pd(src2, src1, srcDest); + return; + } + ScratchSimd128Scope scratch(*this); + src1 = moveSimd128FloatIfNotAVX(src1, scratch); + mulFloat64x2(src1, src2, scratch); + addFloat64x2(srcDest, scratch, srcDest); +} + +void MacroAssembler::fnmaFloat64x2(FloatRegister src1, FloatRegister src2, + FloatRegister srcDest) { + if (HasFMA()) { + vfnmadd231pd(src2, src1, srcDest); + return; + } + ScratchSimd128Scope scratch(*this); + src1 = moveSimd128FloatIfNotAVX(src1, scratch); + mulFloat64x2(src1, src2, scratch); + subFloat64x2(srcDest, scratch, srcDest); +} + +void MacroAssembler::minFloat32x4Relaxed(FloatRegister src, + FloatRegister srcDest) { + vminps(Operand(src), srcDest, srcDest); +} + +void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vminps(Operand(rhs), lhs, dest); +} + +void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src, + FloatRegister srcDest) { + vmaxps(Operand(src), srcDest, srcDest); +} + +void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vmaxps(Operand(rhs), lhs, dest); +} + +void MacroAssembler::minFloat64x2Relaxed(FloatRegister src, + FloatRegister srcDest) { + vminpd(Operand(src), srcDest, srcDest); +} + +void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vminpd(Operand(rhs), lhs, dest); +} + +void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src, + FloatRegister srcDest) { + vmaxpd(Operand(src), srcDest, srcDest); +} + +void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs, + FloatRegister dest) { + vmaxpd(Operand(rhs), lhs, dest); +} + +// ======================================================================== +// Truncate floating point. + +void MacroAssembler::truncateFloat32ToInt64(Address src, Address dest, + Register temp) { + if (Assembler::HasSSE3()) { + fld32(Operand(src)); + fisttp(Operand(dest)); + return; + } + + if (src.base == esp) { + src.offset += 2 * sizeof(int32_t); + } + if (dest.base == esp) { + dest.offset += 2 * sizeof(int32_t); + } + + reserveStack(2 * sizeof(int32_t)); + + // Set conversion to truncation. + fnstcw(Operand(esp, 0)); + load32(Operand(esp, 0), temp); + andl(Imm32(~0xFF00), temp); + orl(Imm32(0xCFF), temp); + store32(temp, Address(esp, sizeof(int32_t))); + fldcw(Operand(esp, sizeof(int32_t))); + + // Load double on fp stack, convert and load regular stack. + fld32(Operand(src)); + fistp(Operand(dest)); + + // Reset the conversion flag. + fldcw(Operand(esp, 0)); + + freeStack(2 * sizeof(int32_t)); +} +void MacroAssembler::truncateDoubleToInt64(Address src, Address dest, + Register temp) { + if (Assembler::HasSSE3()) { + fld(Operand(src)); + fisttp(Operand(dest)); + return; + } + + if (src.base == esp) { + src.offset += 2 * sizeof(int32_t); + } + if (dest.base == esp) { + dest.offset += 2 * sizeof(int32_t); + } + + reserveStack(2 * sizeof(int32_t)); + + // Set conversion to truncation. + fnstcw(Operand(esp, 0)); + load32(Operand(esp, 0), temp); + andl(Imm32(~0xFF00), temp); + orl(Imm32(0xCFF), temp); + store32(temp, Address(esp, 1 * sizeof(int32_t))); + fldcw(Operand(esp, 1 * sizeof(int32_t))); + + // Load double on fp stack, convert and load regular stack. + fld(Operand(src)); + fistp(Operand(dest)); + + // Reset the conversion flag. + fldcw(Operand(esp, 0)); + + freeStack(2 * sizeof(int32_t)); +} + +// =============================================================== +// Clamping functions. + +void MacroAssembler::clampIntToUint8(Register reg) { + Label inRange; + branchTest32(Assembler::Zero, reg, Imm32(0xffffff00), &inRange); + { + sarl(Imm32(31), reg); + notl(reg); + andl(Imm32(255), reg); + } + bind(&inRange); +} + +//}}} check_macroassembler_style +// =============================================================== + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_MacroAssembler_x86_shared_inl_h */ diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp new file mode 100644 index 0000000000..185c555be0 --- /dev/null +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp @@ -0,0 +1,2132 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/MacroAssembler-x86-shared.h" + +#include "mozilla/Casting.h" + +#include "jsmath.h" + +#include "jit/JitFrames.h" +#include "jit/MacroAssembler.h" +#include "js/ScalarType.h" // js::Scalar::Type + +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +// Note: this function clobbers the input register. +void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { + ScratchDoubleScope scratch(*this); + MOZ_ASSERT(input != scratch); + Label positive, done; + + // <= 0 or NaN --> 0 + zeroDouble(scratch); + branchDouble(DoubleGreaterThan, input, scratch, &positive); + { + move32(Imm32(0), output); + jump(&done); + } + + bind(&positive); + + // Add 0.5 and truncate. + loadConstantDouble(0.5, scratch); + addDouble(scratch, input); + + Label outOfRange; + + // Truncate to int32 and ensure the result <= 255. This relies on the + // processor setting output to a value > 255 for doubles outside the int32 + // range (for instance 0x80000000). + vcvttsd2si(input, output); + branch32(Assembler::Above, output, Imm32(255), &outOfRange); + { + // Check if we had a tie. + convertInt32ToDouble(output, scratch); + branchDouble(DoubleNotEqual, input, scratch, &done); + + // It was a tie. Mask out the ones bit to get an even value. + // See also js_TypedArray_uint8_clamp_double. + and32(Imm32(~1), output); + jump(&done); + } + + // > 255 --> 255 + bind(&outOfRange); + { move32(Imm32(255), output); } + + bind(&done); +} + +bool MacroAssemblerX86Shared::buildOOLFakeExitFrame(void* fakeReturnAddr) { + asMasm().PushFrameDescriptor(FrameType::IonJS); + asMasm().Push(ImmPtr(fakeReturnAddr)); + asMasm().Push(FramePointer); + return true; +} + +void MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, + Register scratch, Label* label, + bool maybeNonZero) { + // Determines whether the low double contained in the XMM register reg + // is equal to -0.0. + +#if defined(JS_CODEGEN_X86) + Label nonZero; + + // if not already compared to zero + if (maybeNonZero) { + ScratchDoubleScope scratchDouble(asMasm()); + + // Compare to zero. Lets through {0, -0}. + zeroDouble(scratchDouble); + + // If reg is non-zero, jump to nonZero. + asMasm().branchDouble(DoubleNotEqual, reg, scratchDouble, &nonZero); + } + // Input register is either zero or negative zero. Retrieve sign of input. + vmovmskpd(reg, scratch); + + // If reg is 1 or 3, input is negative zero. + // If reg is 0 or 2, input is a normal zero. + asMasm().branchTest32(NonZero, scratch, Imm32(1), label); + + bind(&nonZero); +#elif defined(JS_CODEGEN_X64) + vmovq(reg, scratch); + cmpq(Imm32(1), scratch); + j(Overflow, label); +#endif +} + +void MacroAssemblerX86Shared::branchNegativeZeroFloat32(FloatRegister reg, + Register scratch, + Label* label) { + vmovd(reg, scratch); + cmp32(scratch, Imm32(1)); + j(Overflow, label); +} + +MacroAssembler& MacroAssemblerX86Shared::asMasm() { + return *static_cast<MacroAssembler*>(this); +} + +const MacroAssembler& MacroAssemblerX86Shared::asMasm() const { + return *static_cast<const MacroAssembler*>(this); +} + +template <class T, class Map> +T* MacroAssemblerX86Shared::getConstant(const typename T::Pod& value, Map& map, + Vector<T, 0, SystemAllocPolicy>& vec) { + using AddPtr = typename Map::AddPtr; + size_t index; + if (AddPtr p = map.lookupForAdd(value)) { + index = p->value(); + } else { + index = vec.length(); + enoughMemory_ &= vec.append(T(value)); + if (!enoughMemory_) { + return nullptr; + } + enoughMemory_ &= map.add(p, value, index); + if (!enoughMemory_) { + return nullptr; + } + } + return &vec[index]; +} + +MacroAssemblerX86Shared::Float* MacroAssemblerX86Shared::getFloat(float f) { + return getConstant<Float, FloatMap>(f, floatMap_, floats_); +} + +MacroAssemblerX86Shared::Double* MacroAssemblerX86Shared::getDouble(double d) { + return getConstant<Double, DoubleMap>(d, doubleMap_, doubles_); +} + +MacroAssemblerX86Shared::SimdData* MacroAssemblerX86Shared::getSimdData( + const SimdConstant& v) { + return getConstant<SimdData, SimdMap>(v, simdMap_, simds_); +} + +void MacroAssemblerX86Shared::binarySimd128( + const SimdConstant& rhs, FloatRegister lhsDest, + void (MacroAssembler::*regOp)(const Operand&, FloatRegister, FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister)) { + ScratchSimd128Scope scratch(asMasm()); + if (maybeInlineSimd128Int(rhs, scratch)) { + (asMasm().*regOp)(Operand(scratch), lhsDest, lhsDest); + } else { + (asMasm().*constOp)(rhs, lhsDest); + } +} + +void MacroAssemblerX86Shared::binarySimd128( + FloatRegister lhs, const SimdConstant& rhs, FloatRegister dest, + void (MacroAssembler::*regOp)(const Operand&, FloatRegister, FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister, + FloatRegister)) { + ScratchSimd128Scope scratch(asMasm()); + if (maybeInlineSimd128Int(rhs, scratch)) { + (asMasm().*regOp)(Operand(scratch), lhs, dest); + } else { + (asMasm().*constOp)(rhs, lhs, dest); + } +} + +void MacroAssemblerX86Shared::binarySimd128( + const SimdConstant& rhs, FloatRegister lhs, + void (MacroAssembler::*regOp)(const Operand&, FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister)) { + ScratchSimd128Scope scratch(asMasm()); + if (maybeInlineSimd128Int(rhs, scratch)) { + (asMasm().*regOp)(Operand(scratch), lhs); + } else { + (asMasm().*constOp)(rhs, lhs); + } +} + +void MacroAssemblerX86Shared::bitwiseTestSimd128(const SimdConstant& rhs, + FloatRegister lhs) { + ScratchSimd128Scope scratch(asMasm()); + if (maybeInlineSimd128Int(rhs, scratch)) { + vptest(scratch, lhs); + } else { + asMasm().vptestSimd128(rhs, lhs); + } +} + +void MacroAssemblerX86Shared::minMaxDouble(FloatRegister first, + FloatRegister second, bool canBeNaN, + bool isMax) { + Label done, nan, minMaxInst; + + // Do a vucomisd to catch equality and NaNs, which both require special + // handling. If the operands are ordered and inequal, we branch straight to + // the min/max instruction. If we wanted, we could also branch for less-than + // or greater-than here instead of using min/max, however these conditions + // will sometimes be hard on the branch predictor. + vucomisd(second, first); + j(Assembler::NotEqual, &minMaxInst); + if (canBeNaN) { + j(Assembler::Parity, &nan); + } + + // Ordered and equal. The operands are bit-identical unless they are zero + // and negative zero. These instructions merge the sign bits in that + // case, and are no-ops otherwise. + if (isMax) { + vandpd(second, first, first); + } else { + vorpd(second, first, first); + } + jump(&done); + + // x86's min/max are not symmetric; if either operand is a NaN, they return + // the read-only operand. We need to return a NaN if either operand is a + // NaN, so we explicitly check for a NaN in the read-write operand. + if (canBeNaN) { + bind(&nan); + vucomisd(first, first); + j(Assembler::Parity, &done); + } + + // When the values are inequal, or second is NaN, x86's min and max will + // return the value we need. + bind(&minMaxInst); + if (isMax) { + vmaxsd(second, first, first); + } else { + vminsd(second, first, first); + } + + bind(&done); +} + +void MacroAssemblerX86Shared::minMaxFloat32(FloatRegister first, + FloatRegister second, bool canBeNaN, + bool isMax) { + Label done, nan, minMaxInst; + + // Do a vucomiss to catch equality and NaNs, which both require special + // handling. If the operands are ordered and inequal, we branch straight to + // the min/max instruction. If we wanted, we could also branch for less-than + // or greater-than here instead of using min/max, however these conditions + // will sometimes be hard on the branch predictor. + vucomiss(second, first); + j(Assembler::NotEqual, &minMaxInst); + if (canBeNaN) { + j(Assembler::Parity, &nan); + } + + // Ordered and equal. The operands are bit-identical unless they are zero + // and negative zero. These instructions merge the sign bits in that + // case, and are no-ops otherwise. + if (isMax) { + vandps(second, first, first); + } else { + vorps(second, first, first); + } + jump(&done); + + // x86's min/max are not symmetric; if either operand is a NaN, they return + // the read-only operand. We need to return a NaN if either operand is a + // NaN, so we explicitly check for a NaN in the read-write operand. + if (canBeNaN) { + bind(&nan); + vucomiss(first, first); + j(Assembler::Parity, &done); + } + + // When the values are inequal, or second is NaN, x86's min and max will + // return the value we need. + bind(&minMaxInst); + if (isMax) { + vmaxss(second, first, first); + } else { + vminss(second, first, first); + } + + bind(&done); +} + +#ifdef ENABLE_WASM_SIMD +bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { + switch (op) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I8x16ShrS: + *mask = 7; + break; + case wasm::SimdOp::I16x8Shl: + case wasm::SimdOp::I16x8ShrU: + case wasm::SimdOp::I16x8ShrS: + *mask = 15; + break; + case wasm::SimdOp::I32x4Shl: + case wasm::SimdOp::I32x4ShrU: + case wasm::SimdOp::I32x4ShrS: + *mask = 31; + break; + case wasm::SimdOp::I64x2Shl: + case wasm::SimdOp::I64x2ShrU: + case wasm::SimdOp::I64x2ShrS: + *mask = 63; + break; + default: + MOZ_CRASH("Unexpected shift operation"); + } + return true; +} +#endif + +//{{{ check_macroassembler_style +// =============================================================== +// MacroAssembler high-level usage. + +void MacroAssembler::flush() {} + +void MacroAssembler::comment(const char* msg) { masm.comment(msg); } + +// This operation really consists of five phases, in order to enforce the +// restriction that on x86_shared, srcDest must be eax and edx will be +// clobbered. +// +// Input: { rhs, lhsOutput } +// +// [PUSH] Preserve registers +// [MOVE] Generate moves to specific registers +// +// [DIV] Input: { regForRhs, EAX } +// [DIV] extend EAX into EDX +// [DIV] x86 Division operator +// [DIV] Ouptut: { EAX, EDX } +// +// [MOVE] Move specific registers to outputs +// [POP] Restore registers +// +// Output: { lhsOutput, remainderOutput } +void MacroAssembler::flexibleDivMod32(Register rhs, Register lhsOutput, + Register remOutput, bool isUnsigned, + const LiveRegisterSet&) { + // Currently this helper can't handle this situation. + MOZ_ASSERT(lhsOutput != rhs); + MOZ_ASSERT(lhsOutput != remOutput); + + // Choose a register that is not edx, or eax to hold the rhs; + // ebx is chosen arbitrarily, and will be preserved if necessary. + Register regForRhs = (rhs == eax || rhs == edx) ? ebx : rhs; + + // Add registers we will be clobbering as live, but + // also remove the set we do not restore. + LiveRegisterSet preserve; + preserve.add(edx); + preserve.add(eax); + preserve.add(regForRhs); + + preserve.takeUnchecked(lhsOutput); + preserve.takeUnchecked(remOutput); + + PushRegsInMask(preserve); + + // Shuffle input into place. + moveRegPair(lhsOutput, rhs, eax, regForRhs); + if (oom()) { + return; + } + + // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. + if (isUnsigned) { + mov(ImmWord(0), edx); + udiv(regForRhs); + } else { + cdq(); + idiv(regForRhs); + } + + moveRegPair(eax, edx, lhsOutput, remOutput); + if (oom()) { + return; + } + + PopRegsInMask(preserve); +} + +void MacroAssembler::flexibleQuotient32( + Register rhs, Register srcDest, bool isUnsigned, + const LiveRegisterSet& volatileLiveRegs) { + // Choose an arbitrary register that isn't eax, edx, rhs or srcDest; + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + regs.takeUnchecked(eax); + regs.takeUnchecked(edx); + regs.takeUnchecked(rhs); + regs.takeUnchecked(srcDest); + + Register remOut = regs.takeAny(); + push(remOut); + flexibleDivMod32(rhs, srcDest, remOut, isUnsigned, volatileLiveRegs); + pop(remOut); +} + +void MacroAssembler::flexibleRemainder32( + Register rhs, Register srcDest, bool isUnsigned, + const LiveRegisterSet& volatileLiveRegs) { + // Choose an arbitrary register that isn't eax, edx, rhs or srcDest + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + regs.takeUnchecked(eax); + regs.takeUnchecked(edx); + regs.takeUnchecked(rhs); + regs.takeUnchecked(srcDest); + + Register remOut = regs.takeAny(); + push(remOut); + flexibleDivMod32(rhs, srcDest, remOut, isUnsigned, volatileLiveRegs); + mov(remOut, srcDest); + pop(remOut); +} + +// =============================================================== +// Stack manipulation functions. + +size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) { + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + return set.gprs().size() * sizeof(intptr_t) + fpuSet.getPushSizeInBytes(); +} + +void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { + mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); + + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + unsigned numFpu = fpuSet.size(); + int32_t diffF = fpuSet.getPushSizeInBytes(); + int32_t diffG = set.gprs().size() * sizeof(intptr_t); + + // On x86, always use push to push the integer registers, as it's fast + // on modern hardware and it's a small instruction. + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + Push(*iter); + } + MOZ_ASSERT(diffG == 0); + (void)diffG; + + reserveStack(diffF); + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + diffF -= reg.size(); + numFpu -= 1; + Address spillAddress(StackPointer, diffF); + if (reg.isDouble()) { + storeDouble(reg, spillAddress); + } else if (reg.isSingle()) { + storeFloat32(reg, spillAddress); + } else if (reg.isSimd128()) { + storeUnalignedSimd128(reg, spillAddress); + } else { + MOZ_CRASH("Unknown register type."); + } + } + MOZ_ASSERT(numFpu == 0); + (void)numFpu; + + // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with + // GetPushSizeInBytes. + size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t); + MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4); + MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0); + diffF -= alignExtra; + MOZ_ASSERT(diffF == 0); + + // The macroassembler will keep the stack sizeof(uintptr_t)-aligned, so + // we don't need to take into account `alignExtra` here. + MOZ_ASSERT(framePushed() - framePushedInitial == + PushRegsInMaskSizeInBytes(set)); +} + +void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, + Register) { + mozilla::DebugOnly<size_t> offsetInitial = dest.offset; + + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + unsigned numFpu = fpuSet.size(); + int32_t diffF = fpuSet.getPushSizeInBytes(); + int32_t diffG = set.gprs().size() * sizeof(intptr_t); + + MOZ_ASSERT(dest.offset >= diffG + diffF); + + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + dest.offset -= sizeof(intptr_t); + storePtr(*iter, dest); + } + MOZ_ASSERT(diffG == 0); + (void)diffG; + + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + diffF -= reg.size(); + numFpu -= 1; + dest.offset -= reg.size(); + if (reg.isDouble()) { + storeDouble(reg, dest); + } else if (reg.isSingle()) { + storeFloat32(reg, dest); + } else if (reg.isSimd128()) { + storeUnalignedSimd128(reg, dest); + } else { + MOZ_CRASH("Unknown register type."); + } + } + MOZ_ASSERT(numFpu == 0); + (void)numFpu; + + // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with + // GetPushSizeInBytes. + size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t); + MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4); + MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0); + diffF -= alignExtra; + MOZ_ASSERT(diffF == 0); + + // What this means is: if `alignExtra` is nonzero, then the save area size + // actually used is `alignExtra` bytes smaller than what + // PushRegsInMaskSizeInBytes claims. Hence we need to compensate for that. + MOZ_ASSERT(alignExtra + offsetInitial - dest.offset == + PushRegsInMaskSizeInBytes(set)); +} + +void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, + LiveRegisterSet ignore) { + mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); + + FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); + unsigned numFpu = fpuSet.size(); + int32_t diffG = set.gprs().size() * sizeof(intptr_t); + int32_t diffF = fpuSet.getPushSizeInBytes(); + const int32_t reservedG = diffG; + const int32_t reservedF = diffF; + + for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { + FloatRegister reg = *iter; + diffF -= reg.size(); + numFpu -= 1; + if (ignore.has(reg)) { + continue; + } + + Address spillAddress(StackPointer, diffF); + if (reg.isDouble()) { + loadDouble(spillAddress, reg); + } else if (reg.isSingle()) { + loadFloat32(spillAddress, reg); + } else if (reg.isSimd128()) { + loadUnalignedSimd128(spillAddress, reg); + } else { + MOZ_CRASH("Unknown register type."); + } + } + freeStack(reservedF); + MOZ_ASSERT(numFpu == 0); + (void)numFpu; + // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with + // GetPushBytesInSize. + diffF -= diffF % sizeof(uintptr_t); + MOZ_ASSERT(diffF == 0); + + // On x86, use pop to pop the integer registers, if we're not going to + // ignore any slots, as it's fast on modern hardware and it's a small + // instruction. + if (ignore.emptyGeneral()) { + for (GeneralRegisterForwardIterator iter(set.gprs()); iter.more(); ++iter) { + diffG -= sizeof(intptr_t); + Pop(*iter); + } + } else { + for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); + ++iter) { + diffG -= sizeof(intptr_t); + if (!ignore.has(*iter)) { + loadPtr(Address(StackPointer, diffG), *iter); + } + } + freeStack(reservedG); + } + MOZ_ASSERT(diffG == 0); + + MOZ_ASSERT(framePushedInitial - framePushed() == + PushRegsInMaskSizeInBytes(set)); +} + +void MacroAssembler::Push(const Operand op) { + push(op); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(Register reg) { + push(reg); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const Imm32 imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmWord imm) { + push(imm); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(const ImmPtr imm) { + Push(ImmWord(uintptr_t(imm.value))); +} + +void MacroAssembler::Push(const ImmGCPtr ptr) { + push(ptr); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Push(FloatRegister t) { + push(t); + adjustFrame(sizeof(double)); +} + +void MacroAssembler::PushFlags() { + pushFlags(); + adjustFrame(sizeof(intptr_t)); +} + +void MacroAssembler::Pop(const Operand op) { + pop(op); + implicitPop(sizeof(intptr_t)); +} + +void MacroAssembler::Pop(Register reg) { + pop(reg); + implicitPop(sizeof(intptr_t)); +} + +void MacroAssembler::Pop(FloatRegister reg) { + pop(reg); + implicitPop(sizeof(double)); +} + +void MacroAssembler::Pop(const ValueOperand& val) { + popValue(val); + implicitPop(sizeof(Value)); +} + +void MacroAssembler::PopFlags() { + popFlags(); + implicitPop(sizeof(intptr_t)); +} + +void MacroAssembler::PopStackPtr() { Pop(StackPointer); } + +// =============================================================== +// Simple call functions. + +CodeOffset MacroAssembler::call(Register reg) { return Assembler::call(reg); } + +CodeOffset MacroAssembler::call(Label* label) { return Assembler::call(label); } + +void MacroAssembler::call(const Address& addr) { + Assembler::call(Operand(addr.base, addr.offset)); +} + +CodeOffset MacroAssembler::call(wasm::SymbolicAddress target) { + mov(target, eax); + return Assembler::call(eax); +} + +void MacroAssembler::call(ImmWord target) { Assembler::call(target); } + +void MacroAssembler::call(ImmPtr target) { Assembler::call(target); } + +void MacroAssembler::call(JitCode* target) { Assembler::call(target); } + +CodeOffset MacroAssembler::callWithPatch() { + return Assembler::callWithPatch(); +} +void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) { + Assembler::patchCall(callerOffset, calleeOffset); +} + +void MacroAssembler::callAndPushReturnAddress(Register reg) { call(reg); } + +void MacroAssembler::callAndPushReturnAddress(Label* label) { call(label); } + +// =============================================================== +// Patchable near/far jumps. + +CodeOffset MacroAssembler::farJumpWithPatch() { + return Assembler::farJumpWithPatch(); +} + +void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) { + Assembler::patchFarJump(farJump, targetOffset); +} + +CodeOffset MacroAssembler::nopPatchableToCall() { + masm.nop_five(); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::patchNopToCall(uint8_t* callsite, uint8_t* target) { + Assembler::patchFiveByteNopToCall(callsite, target); +} + +void MacroAssembler::patchCallToNop(uint8_t* callsite) { + Assembler::patchCallToFiveByteNop(callsite); +} + +// =============================================================== +// Jit Frames. + +uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) { + CodeLabel cl; + + mov(&cl, scratch); + Push(scratch); + bind(&cl); + uint32_t retAddr = currentOffset(); + + addCodeLabel(cl); + return retAddr; +} + +// =============================================================== +// WebAssembly + +CodeOffset MacroAssembler::wasmTrapInstruction() { return ud2(); } + +void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, + Register boundsCheckLimit, Label* ok) { + cmp32(index, boundsCheckLimit); + j(cond, ok); + if (JitOptions.spectreIndexMasking) { + cmovCCl(cond, Operand(boundsCheckLimit), index); + } +} + +void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, + Address boundsCheckLimit, Label* ok) { + cmp32(index, Operand(boundsCheckLimit)); + j(cond, ok); + if (JitOptions.spectreIndexMasking) { + cmovCCl(cond, Operand(boundsCheckLimit), index); + } +} + +// RAII class that generates the jumps to traps when it's destructed, to +// prevent some code duplication in the outOfLineWasmTruncateXtoY methods. +struct MOZ_RAII AutoHandleWasmTruncateToIntErrors { + MacroAssembler& masm; + Label inputIsNaN; + Label intOverflow; + wasm::BytecodeOffset off; + + explicit AutoHandleWasmTruncateToIntErrors(MacroAssembler& masm, + wasm::BytecodeOffset off) + : masm(masm), off(off) {} + + ~AutoHandleWasmTruncateToIntErrors() { + // Handle errors. These cases are not in arbitrary order: code will + // fall through to intOverflow. + masm.bind(&intOverflow); + masm.wasmTrap(wasm::Trap::IntegerOverflow, off); + + masm.bind(&inputIsNaN); + masm.wasmTrap(wasm::Trap::InvalidConversionToInteger, off); + } +}; + +void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input, + Register output, + bool isSaturating, + Label* oolEntry) { + vcvttsd2si(input, output); + cmp32(output, Imm32(1)); + j(Assembler::Overflow, oolEntry); +} + +void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input, + Register output, + bool isSaturating, + Label* oolEntry) { + vcvttss2si(input, output); + cmp32(output, Imm32(1)); + j(Assembler::Overflow, oolEntry); +} + +void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input, + Register output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + bool isUnsigned = flags & TRUNC_UNSIGNED; + bool isSaturating = flags & TRUNC_SATURATING; + + if (isSaturating) { + if (isUnsigned) { + // Negative overflow and NaN both are converted to 0, and the only + // other case is positive overflow which is converted to + // UINT32_MAX. + Label nonNegative; + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &nonNegative); + move32(Imm32(0), output); + jump(rejoin); + + bind(&nonNegative); + move32(Imm32(UINT32_MAX), output); + } else { + // Negative overflow is already saturated to INT32_MIN, so we only + // have to handle NaN and positive overflow here. + Label notNaN; + branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); + move32(Imm32(0), output); + jump(rejoin); + + bind(¬NaN); + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleLessThan, input, fpscratch, rejoin); + sub32(Imm32(1), output); + } + jump(rejoin); + return; + } + + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // For unsigned, fall through to intOverflow failure case. + if (isUnsigned) { + return; + } + + // Handle special values. + + // We've used vcvttsd2si. The only valid double values that can + // truncate to INT32_MIN are in ]INT32_MIN - 1; INT32_MIN]. + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(double(INT32_MIN) - 1.0, fpscratch); + branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch, + &traps.intOverflow); + + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, + &traps.intOverflow); + jump(rejoin); +} + +void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input, + Register output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + bool isUnsigned = flags & TRUNC_UNSIGNED; + bool isSaturating = flags & TRUNC_SATURATING; + + if (isSaturating) { + if (isUnsigned) { + // Negative overflow and NaN both are converted to 0, and the only + // other case is positive overflow which is converted to + // UINT32_MAX. + Label nonNegative; + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, + &nonNegative); + move32(Imm32(0), output); + jump(rejoin); + + bind(&nonNegative); + move32(Imm32(UINT32_MAX), output); + } else { + // Negative overflow is already saturated to INT32_MIN, so we only + // have to handle NaN and positive overflow here. + Label notNaN; + branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); + move32(Imm32(0), output); + jump(rejoin); + + bind(¬NaN); + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleLessThan, input, fpscratch, rejoin); + sub32(Imm32(1), output); + } + jump(rejoin); + return; + } + + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // For unsigned, fall through to intOverflow failure case. + if (isUnsigned) { + return; + } + + // Handle special values. + + // We've used vcvttss2si. Check that the input wasn't + // float(INT32_MIN), which is the only legimitate input that + // would truncate to INT32_MIN. + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(float(INT32_MIN), fpscratch); + branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); + jump(rejoin); +} + +void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input, + Register64 output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + bool isUnsigned = flags & TRUNC_UNSIGNED; + bool isSaturating = flags & TRUNC_SATURATING; + + if (isSaturating) { + if (isUnsigned) { + // Negative overflow and NaN both are converted to 0, and the only + // other case is positive overflow which is converted to + // UINT64_MAX. + Label positive; + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, &positive); + move64(Imm64(0), output); + jump(rejoin); + + bind(&positive); + move64(Imm64(UINT64_MAX), output); + } else { + // Negative overflow is already saturated to INT64_MIN, so we only + // have to handle NaN and positive overflow here. + Label notNaN; + branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); + move64(Imm64(0), output); + jump(rejoin); + + bind(¬NaN); + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleLessThan, input, fpscratch, rejoin); + sub64(Imm64(1), output); + } + jump(rejoin); + return; + } + + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // Handle special values. + if (isUnsigned) { + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(0.0, fpscratch); + branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, + &traps.intOverflow); + loadConstantDouble(-1.0, fpscratch); + branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch, + &traps.intOverflow); + jump(rejoin); + return; + } + + // We've used vcvtsd2sq. The only legit value whose i64 + // truncation is INT64_MIN is double(INT64_MIN): exponent is so + // high that the highest resolution around is much more than 1. + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(double(int64_t(INT64_MIN)), fpscratch); + branchDouble(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); + jump(rejoin); +} + +void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input, + Register64 output, + TruncFlags flags, + wasm::BytecodeOffset off, + Label* rejoin) { + bool isUnsigned = flags & TRUNC_UNSIGNED; + bool isSaturating = flags & TRUNC_SATURATING; + + if (isSaturating) { + if (isUnsigned) { + // Negative overflow and NaN both are converted to 0, and the only + // other case is positive overflow which is converted to + // UINT64_MAX. + Label positive; + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, &positive); + move64(Imm64(0), output); + jump(rejoin); + + bind(&positive); + move64(Imm64(UINT64_MAX), output); + } else { + // Negative overflow is already saturated to INT64_MIN, so we only + // have to handle NaN and positive overflow here. + Label notNaN; + branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); + move64(Imm64(0), output); + jump(rejoin); + + bind(¬NaN); + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleLessThan, input, fpscratch, rejoin); + sub64(Imm64(1), output); + } + jump(rejoin); + return; + } + + AutoHandleWasmTruncateToIntErrors traps(*this, off); + + // Eagerly take care of NaNs. + branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN); + + // Handle special values. + if (isUnsigned) { + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(0.0f, fpscratch); + branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, + &traps.intOverflow); + loadConstantFloat32(-1.0f, fpscratch); + branchFloat(Assembler::DoubleLessThanOrEqual, input, fpscratch, + &traps.intOverflow); + jump(rejoin); + return; + } + + // We've used vcvtss2sq. See comment in outOfLineWasmTruncateDoubleToInt64. + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(float(int64_t(INT64_MIN)), fpscratch); + branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow); + jump(rejoin); +} + +void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch, + ExitFrameType type) { + enterFakeExitFrame(cxreg, scratch, type); +} + +// ======================================================================== +// Primitive atomic operations. + +static void ExtendTo32(MacroAssembler& masm, Scalar::Type type, Register r) { + switch (Scalar::byteSize(type)) { + case 1: + if (Scalar::isSignedIntType(type)) { + masm.movsbl(r, r); + } else { + masm.movzbl(r, r); + } + break; + case 2: + if (Scalar::isSignedIntType(type)) { + masm.movswl(r, r); + } else { + masm.movzwl(r, r); + } + break; + default: + break; + } +} + +static inline void CheckBytereg(Register r) { +#ifdef DEBUG + AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); + MOZ_ASSERT(byteRegs.has(r)); +#endif +} + +static inline void CheckBytereg(Imm32 r) { + // Nothing +} + +template <typename T> +static void CompareExchange(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, const T& mem, Register oldval, + Register newval, Register output) { + MOZ_ASSERT(output == eax); + + if (oldval != output) { + masm.movl(oldval, output); + } + + if (access) { + masm.append(*access, masm.size()); + } + + // NOTE: the generated code must match the assembly code in gen_cmpxchg in + // GenerateAtomicOperations.py + switch (Scalar::byteSize(type)) { + case 1: + CheckBytereg(newval); + masm.lock_cmpxchgb(newval, Operand(mem)); + break; + case 2: + masm.lock_cmpxchgw(newval, Operand(mem)); + break; + case 4: + masm.lock_cmpxchgl(newval, Operand(mem)); + break; + } + + ExtendTo32(masm, type, output); +} + +void MacroAssembler::compareExchange(Scalar::Type type, const Synchronization&, + const Address& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, nullptr, type, mem, oldval, newval, output); +} + +void MacroAssembler::compareExchange(Scalar::Type type, const Synchronization&, + const BaseIndex& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, nullptr, type, mem, oldval, newval, output); +} + +void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, + const Address& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, &access, access.type(), mem, oldval, newval, output); +} + +void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register oldval, + Register newval, Register output) { + CompareExchange(*this, &access, access.type(), mem, oldval, newval, output); +} + +template <typename T> +static void AtomicExchange(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type type, const T& mem, Register value, + Register output) +// NOTE: the generated code must match the assembly code in gen_exchange in +// GenerateAtomicOperations.py +{ + if (value != output) { + masm.movl(value, output); + } + + if (access) { + masm.append(*access, masm.size()); + } + + switch (Scalar::byteSize(type)) { + case 1: + CheckBytereg(output); + masm.xchgb(output, Operand(mem)); + break; + case 2: + masm.xchgw(output, Operand(mem)); + break; + case 4: + masm.xchgl(output, Operand(mem)); + break; + default: + MOZ_CRASH("Invalid"); + } + ExtendTo32(masm, type, output); +} + +void MacroAssembler::atomicExchange(Scalar::Type type, const Synchronization&, + const Address& mem, Register value, + Register output) { + AtomicExchange(*this, nullptr, type, mem, value, output); +} + +void MacroAssembler::atomicExchange(Scalar::Type type, const Synchronization&, + const BaseIndex& mem, Register value, + Register output) { + AtomicExchange(*this, nullptr, type, mem, value, output); +} + +void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, + const Address& mem, Register value, + Register output) { + AtomicExchange(*this, &access, access.type(), mem, value, output); +} + +void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register value, + Register output) { + AtomicExchange(*this, &access, access.type(), mem, value, output); +} + +static void SetupValue(MacroAssembler& masm, AtomicOp op, Imm32 src, + Register output) { + if (op == AtomicFetchSubOp) { + masm.movl(Imm32(-src.value), output); + } else { + masm.movl(src, output); + } +} + +static void SetupValue(MacroAssembler& masm, AtomicOp op, Register src, + Register output) { + if (src != output) { + masm.movl(src, output); + } + if (op == AtomicFetchSubOp) { + masm.negl(output); + } +} + +template <typename T, typename V> +static void AtomicFetchOp(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type arrayType, AtomicOp op, V value, + const T& mem, Register temp, Register output) { + // Note value can be an Imm or a Register. + + // NOTE: the generated code must match the assembly code in gen_fetchop in + // GenerateAtomicOperations.py +#define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG) \ + do { \ + MOZ_ASSERT(output != temp); \ + MOZ_ASSERT(output == eax); \ + if (access) masm.append(*access, masm.size()); \ + masm.LOAD(Operand(mem), eax); \ + Label again; \ + masm.bind(&again); \ + masm.movl(eax, temp); \ + masm.OP(value, temp); \ + masm.LOCK_CMPXCHG(temp, Operand(mem)); \ + masm.j(MacroAssembler::NonZero, &again); \ + } while (0) + + MOZ_ASSERT_IF(op == AtomicFetchAddOp || op == AtomicFetchSubOp, + temp == InvalidReg); + + switch (Scalar::byteSize(arrayType)) { + case 1: + CheckBytereg(output); + switch (op) { + case AtomicFetchAddOp: + case AtomicFetchSubOp: + CheckBytereg(value); // But not for the bitwise ops + SetupValue(masm, op, value, output); + if (access) masm.append(*access, masm.size()); + masm.lock_xaddb(output, Operand(mem)); + break; + case AtomicFetchAndOp: + CheckBytereg(temp); + ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb); + break; + case AtomicFetchOrOp: + CheckBytereg(temp); + ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb); + break; + case AtomicFetchXorOp: + CheckBytereg(temp); + ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb); + break; + default: + MOZ_CRASH(); + } + break; + case 2: + switch (op) { + case AtomicFetchAddOp: + case AtomicFetchSubOp: + SetupValue(masm, op, value, output); + if (access) masm.append(*access, masm.size()); + masm.lock_xaddw(output, Operand(mem)); + break; + case AtomicFetchAndOp: + ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw); + break; + case AtomicFetchOrOp: + ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw); + break; + case AtomicFetchXorOp: + ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw); + break; + default: + MOZ_CRASH(); + } + break; + case 4: + switch (op) { + case AtomicFetchAddOp: + case AtomicFetchSubOp: + SetupValue(masm, op, value, output); + if (access) masm.append(*access, masm.size()); + masm.lock_xaddl(output, Operand(mem)); + break; + case AtomicFetchAndOp: + ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchgl); + break; + case AtomicFetchOrOp: + ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchgl); + break; + case AtomicFetchXorOp: + ATOMIC_BITOP_BODY(movl, xorl, lock_cmpxchgl); + break; + default: + MOZ_CRASH(); + } + break; + } + ExtendTo32(masm, arrayType, output); + +#undef ATOMIC_BITOP_BODY +} + +void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp, Register output) { + AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Register value, const Address& mem, + Register temp, Register output) { + AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Imm32 value, const BaseIndex& mem, + Register temp, Register output) { + AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); +} + +void MacroAssembler::atomicFetchOp(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Imm32 value, const Address& mem, + Register temp, Register output) { + AtomicFetchOp(*this, nullptr, arrayType, op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const Address& mem, Register temp, + Register output) { + AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Imm32 value, + const Address& mem, Register temp, + Register output) { + AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const BaseIndex& mem, Register temp, + Register output) { + AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Imm32 value, + const BaseIndex& mem, Register temp, + Register output) { + AtomicFetchOp(*this, &access, access.type(), op, value, mem, temp, output); +} + +template <typename T, typename V> +static void AtomicEffectOp(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + Scalar::Type arrayType, AtomicOp op, V value, + const T& mem) { + if (access) { + masm.append(*access, masm.size()); + } + + switch (Scalar::byteSize(arrayType)) { + case 1: + switch (op) { + case AtomicFetchAddOp: + masm.lock_addb(value, Operand(mem)); + break; + case AtomicFetchSubOp: + masm.lock_subb(value, Operand(mem)); + break; + case AtomicFetchAndOp: + masm.lock_andb(value, Operand(mem)); + break; + case AtomicFetchOrOp: + masm.lock_orb(value, Operand(mem)); + break; + case AtomicFetchXorOp: + masm.lock_xorb(value, Operand(mem)); + break; + default: + MOZ_CRASH(); + } + break; + case 2: + switch (op) { + case AtomicFetchAddOp: + masm.lock_addw(value, Operand(mem)); + break; + case AtomicFetchSubOp: + masm.lock_subw(value, Operand(mem)); + break; + case AtomicFetchAndOp: + masm.lock_andw(value, Operand(mem)); + break; + case AtomicFetchOrOp: + masm.lock_orw(value, Operand(mem)); + break; + case AtomicFetchXorOp: + masm.lock_xorw(value, Operand(mem)); + break; + default: + MOZ_CRASH(); + } + break; + case 4: + switch (op) { + case AtomicFetchAddOp: + masm.lock_addl(value, Operand(mem)); + break; + case AtomicFetchSubOp: + masm.lock_subl(value, Operand(mem)); + break; + case AtomicFetchAndOp: + masm.lock_andl(value, Operand(mem)); + break; + case AtomicFetchOrOp: + masm.lock_orl(value, Operand(mem)); + break; + case AtomicFetchXorOp: + masm.lock_xorl(value, Operand(mem)); + break; + default: + MOZ_CRASH(); + } + break; + default: + MOZ_CRASH(); + } +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const Address& mem, Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, &access, access.type(), op, value, mem); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Imm32 value, + const Address& mem, Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, &access, access.type(), op, value, mem); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Register value, + const BaseIndex& mem, Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, &access, access.type(), op, value, mem); +} + +void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, + AtomicOp op, Imm32 value, + const BaseIndex& mem, Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, &access, access.type(), op, value, mem); +} + +// ======================================================================== +// JS atomic operations. + +template <typename T> +static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, const T& mem, + Register oldval, Register newval, Register temp, + AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.compareExchange(arrayType, sync, mem, oldval, newval, temp); + masm.convertUInt32ToDouble(temp, output.fpu()); + } else { + masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr()); + } +} + +void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const Address& mem, Register oldval, + Register newval, Register temp, + AnyRegister output) { + CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); +} + +void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const BaseIndex& mem, Register oldval, + Register newval, Register temp, + AnyRegister output) { + CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); +} + +template <typename T> +static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, const T& mem, + Register value, Register temp, + AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicExchange(arrayType, sync, mem, value, temp); + masm.convertUInt32ToDouble(temp, output.fpu()); + } else { + masm.atomicExchange(arrayType, sync, mem, value, output.gpr()); + } +} + +void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const Address& mem, Register value, + Register temp, AnyRegister output) { + AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); +} + +void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, + const Synchronization& sync, + const BaseIndex& mem, Register value, + Register temp, AnyRegister output) { + AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); +} + +template <typename T> +static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const T& mem, Register temp1, + Register temp2, AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); + masm.convertUInt32ToDouble(temp1, output.fpu()); + } else { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); + } +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const Address& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Register value, const BaseIndex& mem, + Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Register value, const Address& mem, + Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization&, AtomicOp op, + Imm32 value, const Address& mem, + Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); +} + +void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Imm32 value, const BaseIndex& mem, + Register temp) { + MOZ_ASSERT(temp == InvalidReg); + AtomicEffectOp(*this, nullptr, arrayType, op, value, mem); +} + +template <typename T> +static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Imm32 value, const T& mem, Register temp1, + Register temp2, AnyRegister output) { + if (arrayType == Scalar::Uint32) { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); + masm.convertUInt32ToDouble(temp1, output.fpu()); + } else { + masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); + } +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Imm32 value, const Address& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, + const Synchronization& sync, AtomicOp op, + Imm32 value, const BaseIndex& mem, + Register temp1, Register temp2, + AnyRegister output) { + AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); +} + +// ======================================================================== +// Spectre Mitigations. + +void MacroAssembler::speculationBarrier() { + // Spectre mitigation recommended by Intel and AMD suggest to use lfence as + // a way to force all speculative execution of instructions to end. + MOZ_ASSERT(HasSSE2()); + masm.lfence(); +} + +void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + if (HasSSE41()) { + // Fail on negative-zero. + branchNegativeZeroFloat32(src, dest, fail); + + // Round toward -Infinity. + { + ScratchFloat32Scope scratch(*this); + vroundss(X86Encoding::RoundDown, src, scratch); + truncateFloat32ToInt32(scratch, dest, fail); + } + } else { + Label negative, end; + + // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. + { + ScratchFloat32Scope scratch(*this); + zeroFloat32(scratch); + branchFloat(Assembler::DoubleLessThan, src, scratch, &negative); + } + + // Fail on negative-zero. + branchNegativeZeroFloat32(src, dest, fail); + + // Input is non-negative, so truncation correctly rounds. + truncateFloat32ToInt32(src, dest, fail); + jump(&end); + + // Input is negative, but isn't -0. + // Negative values go on a comparatively expensive path, since no + // native rounding mode matches JS semantics. Still better than callVM. + bind(&negative); + { + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + truncateFloat32ToInt32(src, dest, fail); + + // Test whether the input double was integer-valued. + { + ScratchFloat32Scope scratch(*this); + convertInt32ToFloat32(dest, scratch); + branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end); + } + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + subl(Imm32(1), dest); + // Cannot overflow: output was already checked against INT_MIN. + } + + bind(&end); + } +} + +void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + if (HasSSE41()) { + // Fail on negative-zero. + branchNegativeZero(src, dest, fail); + + // Round toward -Infinity. + { + ScratchDoubleScope scratch(*this); + vroundsd(X86Encoding::RoundDown, src, scratch); + truncateDoubleToInt32(scratch, dest, fail); + } + } else { + Label negative, end; + + // Branch to a slow path for negative inputs. Doesn't catch NaN or -0. + { + ScratchDoubleScope scratch(*this); + zeroDouble(scratch); + branchDouble(Assembler::DoubleLessThan, src, scratch, &negative); + } + + // Fail on negative-zero. + branchNegativeZero(src, dest, fail); + + // Input is non-negative, so truncation correctly rounds. + truncateDoubleToInt32(src, dest, fail); + jump(&end); + + // Input is negative, but isn't -0. + // Negative values go on a comparatively expensive path, since no + // native rounding mode matches JS semantics. Still better than callVM. + bind(&negative); + { + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + truncateDoubleToInt32(src, dest, fail); + + // Test whether the input double was integer-valued. + { + ScratchDoubleScope scratch(*this); + convertInt32ToDouble(dest, scratch); + branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end); + } + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + subl(Imm32(1), dest); + // Cannot overflow: output was already checked against INT_MIN. + } + + bind(&end); + } +} + +void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + ScratchFloat32Scope scratch(*this); + + Label lessThanOrEqualMinusOne; + + // If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. + // Fail if x > -1 and the sign bit is set. + loadConstantFloat32(-1.f, scratch); + branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, + &lessThanOrEqualMinusOne); + vmovmskps(src, dest); + branchTest32(Assembler::NonZero, dest, Imm32(1), fail); + + if (HasSSE41()) { + // x <= -1 or x > -0 + bind(&lessThanOrEqualMinusOne); + // Round toward +Infinity. + vroundss(X86Encoding::RoundUp, src, scratch); + truncateFloat32ToInt32(scratch, dest, fail); + return; + } + + // No SSE4.1 + Label end; + + // x >= 0 and x is not -0.0. We can truncate integer values, and truncate and + // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, + // as the truncate operation will return INT_MIN and we'll fail. + truncateFloat32ToInt32(src, dest, fail); + convertInt32ToFloat32(dest, scratch); + branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end); + + // Input is not integer-valued, add 1 to obtain the ceiling value. + // If input > INT_MAX, output == INT_MAX so adding 1 will overflow. + branchAdd32(Assembler::Overflow, Imm32(1), dest, fail); + jump(&end); + + // x <= -1, truncation is the way to go. + bind(&lessThanOrEqualMinusOne); + truncateFloat32ToInt32(src, dest, fail); + + bind(&end); +} + +void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + ScratchDoubleScope scratch(*this); + + Label lessThanOrEqualMinusOne; + + // If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. + // Fail if x > -1 and the sign bit is set. + loadConstantDouble(-1.0, scratch); + branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, + &lessThanOrEqualMinusOne); + vmovmskpd(src, dest); + branchTest32(Assembler::NonZero, dest, Imm32(1), fail); + + if (HasSSE41()) { + // x <= -1 or x > -0 + bind(&lessThanOrEqualMinusOne); + // Round toward +Infinity. + vroundsd(X86Encoding::RoundUp, src, scratch); + truncateDoubleToInt32(scratch, dest, fail); + return; + } + + // No SSE4.1 + Label end; + + // x >= 0 and x is not -0.0. We can truncate integer values, and truncate and + // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, + // as the truncate operation will return INT_MIN and we'll fail. + truncateDoubleToInt32(src, dest, fail); + convertInt32ToDouble(dest, scratch); + branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end); + + // Input is not integer-valued, add 1 to obtain the ceiling value. + // If input > INT_MAX, output == INT_MAX so adding 1 will overflow. + branchAdd32(Assembler::Overflow, Imm32(1), dest, fail); + jump(&end); + + // x <= -1, truncation is the way to go. + bind(&lessThanOrEqualMinusOne); + truncateDoubleToInt32(src, dest, fail); + + bind(&end); +} + +void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest, + Label* fail) { + Label lessThanOrEqualMinusOne; + + // Bail on ]-1; -0] range + { + ScratchDoubleScope scratch(*this); + loadConstantDouble(-1, scratch); + branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, + &lessThanOrEqualMinusOne); + } + + // Test for remaining values with the sign bit set, i.e. ]-1; -0] + vmovmskpd(src, dest); + branchTest32(Assembler::NonZero, dest, Imm32(1), fail); + + // x <= -1 or x >= +0, truncation is the way to go. + bind(&lessThanOrEqualMinusOne); + truncateDoubleToInt32(src, dest, fail); +} + +void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest, + Label* fail) { + Label lessThanOrEqualMinusOne; + + // Bail on ]-1; -0] range + { + ScratchFloat32Scope scratch(*this); + loadConstantFloat32(-1.f, scratch); + branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch, + &lessThanOrEqualMinusOne); + } + + // Test for remaining values with the sign bit set, i.e. ]-1; -0] + vmovmskps(src, dest); + branchTest32(Assembler::NonZero, dest, Imm32(1), fail); + + // x <= -1 or x >= +0, truncation is the way to go. + bind(&lessThanOrEqualMinusOne); + truncateFloat32ToInt32(src, dest, fail); +} + +void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest, + FloatRegister temp, Label* fail) { + ScratchFloat32Scope scratch(*this); + + Label negativeOrZero, negative, end; + + // Branch to a slow path for non-positive inputs. Doesn't catch NaN. + zeroFloat32(scratch); + loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); + branchFloat(Assembler::DoubleLessThanOrEqual, src, scratch, &negativeOrZero); + { + // Input is strictly positive or NaN. Add the biggest float less than 0.5 + // and truncate, rounding down (because if the input is the biggest float + // less than 0.5, adding 0.5 would undesirably round up to 1). Note that we + // have to add the input to the temp register because we're not allowed to + // modify the input register. + addFloat32(src, temp); + truncateFloat32ToInt32(temp, dest, fail); + jump(&end); + } + + // Input is negative, +0 or -0. + bind(&negativeOrZero); + { + // Branch on negative input. + j(Assembler::NotEqual, &negative); + + // Fail on negative-zero. + branchNegativeZeroFloat32(src, dest, fail); + + // Input is +0. + xor32(dest, dest); + jump(&end); + } + + // Input is negative. + bind(&negative); + { + // Inputs in [-0.5, 0) are rounded to -0. Fail. + loadConstantFloat32(-0.5f, scratch); + branchFloat(Assembler::DoubleGreaterThanOrEqual, src, scratch, fail); + + // Other negative inputs need the biggest float less than 0.5 added. + // + // The result is stored in the temp register (currently contains the biggest + // float less than 0.5). + addFloat32(src, temp); + + if (HasSSE41()) { + // Round toward -Infinity. + vroundss(X86Encoding::RoundDown, temp, scratch); + + // Truncate. + truncateFloat32ToInt32(scratch, dest, fail); + } else { + // Round toward -Infinity without the benefit of ROUNDSS. + + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + truncateFloat32ToInt32(temp, dest, fail); + + // Test whether the truncated float was integer-valued. + convertInt32ToFloat32(dest, scratch); + branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + subl(Imm32(1), dest); + // Cannot overflow: output was already checked against INT_MIN. + } + } + + bind(&end); +} + +void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest, + FloatRegister temp, Label* fail) { + ScratchDoubleScope scratch(*this); + + Label negativeOrZero, negative, end; + + // Branch to a slow path for non-positive inputs. Doesn't catch NaN. + zeroDouble(scratch); + loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); + branchDouble(Assembler::DoubleLessThanOrEqual, src, scratch, &negativeOrZero); + { + // Input is strictly positive or NaN. Add the biggest double less than 0.5 + // and truncate, rounding down (because if the input is the biggest double + // less than 0.5, adding 0.5 would undesirably round up to 1). Note that we + // have to add the input to the temp register because we're not allowed to + // modify the input register. + addDouble(src, temp); + truncateDoubleToInt32(temp, dest, fail); + jump(&end); + } + + // Input is negative, +0 or -0. + bind(&negativeOrZero); + { + // Branch on negative input. + j(Assembler::NotEqual, &negative); + + // Fail on negative-zero. + branchNegativeZero(src, dest, fail, /* maybeNonZero = */ false); + + // Input is +0 + xor32(dest, dest); + jump(&end); + } + + // Input is negative. + bind(&negative); + { + // Inputs in [-0.5, 0) are rounded to -0. Fail. + loadConstantDouble(-0.5, scratch); + branchDouble(Assembler::DoubleGreaterThanOrEqual, src, scratch, fail); + + // Other negative inputs need the biggest double less than 0.5 added. + // + // The result is stored in the temp register (currently contains the biggest + // double less than 0.5). + addDouble(src, temp); + + if (HasSSE41()) { + // Round toward -Infinity. + vroundsd(X86Encoding::RoundDown, temp, scratch); + + // Truncate. + truncateDoubleToInt32(scratch, dest, fail); + } else { + // Round toward -Infinity without the benefit of ROUNDSD. + + // Truncate and round toward zero. + // This is off-by-one for everything but integer-valued inputs. + truncateDoubleToInt32(temp, dest, fail); + + // Test whether the truncated double was integer-valued. + convertInt32ToDouble(dest, scratch); + branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end); + + // Input is not integer-valued, so we rounded off-by-one in the + // wrong direction. Correct by subtraction. + subl(Imm32(1), dest); + // Cannot overflow: output was already checked against INT_MIN. + } + } + + bind(&end); +} + +void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(HasRoundInstruction(mode)); + vroundsd(Assembler::ToX86RoundingMode(mode), src, dest); +} + +void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(HasRoundInstruction(mode)); + vroundss(Assembler::ToX86RoundingMode(mode), src, dest); +} + +void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, + FloatRegister output) { + ScratchDoubleScope scratch(*this); + + // TODO Support AVX2 + if (rhs == output) { + MOZ_ASSERT(lhs != rhs); + double keepSignMask = mozilla::BitwiseCast<double>(INT64_MIN); + loadConstantDouble(keepSignMask, scratch); + vandpd(scratch, rhs, output); + + double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX); + loadConstantDouble(clearSignMask, scratch); + vandpd(lhs, scratch, scratch); + } else { + double clearSignMask = mozilla::BitwiseCast<double>(INT64_MAX); + loadConstantDouble(clearSignMask, scratch); + vandpd(scratch, lhs, output); + + double keepSignMask = mozilla::BitwiseCast<double>(INT64_MIN); + loadConstantDouble(keepSignMask, scratch); + vandpd(rhs, scratch, scratch); + } + + vorpd(scratch, output, output); +} + +void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs, + FloatRegister output) { + ScratchFloat32Scope scratch(*this); + + // TODO Support AVX2 + if (rhs == output) { + MOZ_ASSERT(lhs != rhs); + float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN); + loadConstantFloat32(keepSignMask, scratch); + vandps(scratch, output, output); + + float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX); + loadConstantFloat32(clearSignMask, scratch); + vandps(lhs, scratch, scratch); + } else { + float clearSignMask = mozilla::BitwiseCast<float>(INT32_MAX); + loadConstantFloat32(clearSignMask, scratch); + vandps(scratch, lhs, output); + + float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN); + loadConstantFloat32(keepSignMask, scratch); + vandps(rhs, scratch, scratch); + } + + vorps(scratch, output, output); +} + +void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) { + if (IsShiftInScaleRange(shift)) { + computeEffectiveAddress( + BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer); + return; + } + lshift32(Imm32(shift), indexTemp32); + addPtr(indexTemp32, pointer); +} + +//}}} check_macroassembler_style diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h new file mode 100644 index 0000000000..9185abd647 --- /dev/null +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h @@ -0,0 +1,998 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_MacroAssembler_x86_shared_h +#define jit_x86_shared_MacroAssembler_x86_shared_h + +#if defined(JS_CODEGEN_X86) +# include "jit/x86/Assembler-x86.h" +#elif defined(JS_CODEGEN_X64) +# include "jit/x64/Assembler-x64.h" +#endif + +namespace js { +namespace jit { + +class MacroAssembler; + +class MacroAssemblerX86Shared : public Assembler { + private: + // Perform a downcast. Should be removed by Bug 996602. + MacroAssembler& asMasm(); + const MacroAssembler& asMasm() const; + + public: +#ifdef JS_CODEGEN_X64 + typedef X86Encoding::JmpSrc UsesItem; +#else + typedef CodeOffset UsesItem; +#endif + + typedef Vector<UsesItem, 0, SystemAllocPolicy> UsesVector; + static_assert(sizeof(UsesItem) == 4); + + protected: + // For Double, Float and SimdData, make the move ctors explicit so that MSVC + // knows what to use instead of copying these data structures. + template <class T> + struct Constant { + using Pod = T; + + T value; + UsesVector uses; + + explicit Constant(const T& value) : value(value) {} + Constant(Constant<T>&& other) + : value(other.value), uses(std::move(other.uses)) {} + explicit Constant(const Constant<T>&) = delete; + }; + + // Containers use SystemAllocPolicy since wasm releases memory after each + // function is compiled, and these need to live until after all functions + // are compiled. + using Double = Constant<double>; + Vector<Double, 0, SystemAllocPolicy> doubles_; + typedef HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy> + DoubleMap; + DoubleMap doubleMap_; + + using Float = Constant<float>; + Vector<Float, 0, SystemAllocPolicy> floats_; + typedef HashMap<float, size_t, DefaultHasher<float>, SystemAllocPolicy> + FloatMap; + FloatMap floatMap_; + + struct SimdData : public Constant<SimdConstant> { + explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {} + SimdData(SimdData&& d) : Constant<SimdConstant>(std::move(d)) {} + explicit SimdData(const SimdData&) = delete; + SimdConstant::Type type() const { return value.type(); } + }; + + Vector<SimdData, 0, SystemAllocPolicy> simds_; + typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy> + SimdMap; + SimdMap simdMap_; + + template <class T, class Map> + T* getConstant(const typename T::Pod& value, Map& map, + Vector<T, 0, SystemAllocPolicy>& vec); + + Float* getFloat(float f); + Double* getDouble(double d); + SimdData* getSimdData(const SimdConstant& v); + + public: + using Assembler::call; + + MacroAssemblerX86Shared() = default; + + bool appendRawCode(const uint8_t* code, size_t numBytes) { + return masm.appendRawCode(code, numBytes); + } + + void addToPCRel4(uint32_t offset, int32_t bias) { + return masm.addToPCRel4(offset, bias); + } + + // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second). + // Checks for NaN if canBeNaN is true. + void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN, + bool isMax); + void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN, + bool isMax); + + void compareDouble(DoubleCondition cond, FloatRegister lhs, + FloatRegister rhs) { + if (cond & DoubleConditionBitInvert) { + vucomisd(lhs, rhs); + } else { + vucomisd(rhs, lhs); + } + } + + void compareFloat(DoubleCondition cond, FloatRegister lhs, + FloatRegister rhs) { + if (cond & DoubleConditionBitInvert) { + vucomiss(lhs, rhs); + } else { + vucomiss(rhs, lhs); + } + } + + void branchNegativeZero(FloatRegister reg, Register scratch, Label* label, + bool maybeNonZero = true); + void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, + Label* label); + + void move32(Imm32 imm, Register dest) { + // Use the ImmWord version of mov to register, which has special + // optimizations. Casting to uint32_t here ensures that the value + // is zero-extended. + mov(ImmWord(uint32_t(imm.value)), dest); + } + void move32(Imm32 imm, const Operand& dest) { movl(imm, dest); } + void move32(Register src, Register dest) { movl(src, dest); } + void move32(Register src, const Operand& dest) { movl(src, dest); } + void test32(Register lhs, Register rhs) { testl(rhs, lhs); } + void test32(const Address& addr, Imm32 imm) { testl(imm, Operand(addr)); } + void test32(const Operand lhs, Imm32 imm) { testl(imm, lhs); } + void test32(Register lhs, Imm32 rhs) { testl(rhs, lhs); } + void cmp32(Register lhs, Imm32 rhs) { cmpl(rhs, lhs); } + void cmp32(Register lhs, Register rhs) { cmpl(rhs, lhs); } + void cmp32(const Address& lhs, Register rhs) { cmp32(Operand(lhs), rhs); } + void cmp32(const Address& lhs, Imm32 rhs) { cmp32(Operand(lhs), rhs); } + void cmp32(const Operand& lhs, Imm32 rhs) { cmpl(rhs, lhs); } + void cmp32(const Operand& lhs, Register rhs) { cmpl(rhs, lhs); } + void cmp32(Register lhs, const Operand& rhs) { cmpl(rhs, lhs); } + + void cmp16(const Address& lhs, Imm32 rhs) { cmp16(Operand(lhs), rhs); } + void cmp16(const Operand& lhs, Imm32 rhs) { cmpw(rhs, lhs); } + + void cmp8(const Address& lhs, Imm32 rhs) { cmp8(Operand(lhs), rhs); } + void cmp8(const Operand& lhs, Imm32 rhs) { cmpb(rhs, lhs); } + void cmp8(const Operand& lhs, Register rhs) { cmpb(rhs, lhs); } + + void atomic_inc32(const Operand& addr) { lock_incl(addr); } + void atomic_dec32(const Operand& addr) { lock_decl(addr); } + + void storeLoadFence() { + // This implementation follows Linux. + if (HasSSE2()) { + masm.mfence(); + } else { + lock_addl(Imm32(0), Operand(Address(esp, 0))); + } + } + + void branch16(Condition cond, Register lhs, Register rhs, Label* label) { + cmpw(rhs, lhs); + j(cond, label); + } + void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) { + testw(rhs, lhs); + j(cond, label); + } + + void jump(Label* label) { jmp(label); } + void jump(JitCode* code) { jmp(code); } + void jump(TrampolinePtr code) { jmp(ImmPtr(code.value)); } + void jump(ImmPtr ptr) { jmp(ptr); } + void jump(Register reg) { jmp(Operand(reg)); } + void jump(const Address& addr) { jmp(Operand(addr)); } + + void convertInt32ToDouble(Register src, FloatRegister dest) { + // vcvtsi2sd and friends write only part of their output register, which + // causes slowdowns on out-of-order processors. Explicitly break + // dependencies with vxorpd (and vxorps elsewhere), which are handled + // specially in modern CPUs, for this purpose. See sections 8.14, 9.8, + // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture + // document. + zeroDouble(dest); + vcvtsi2sd(src, dest, dest); + } + void convertInt32ToDouble(const Address& src, FloatRegister dest) { + convertInt32ToDouble(Operand(src), dest); + } + void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) { + convertInt32ToDouble(Operand(src), dest); + } + void convertInt32ToDouble(const Operand& src, FloatRegister dest) { + // Clear the output register first to break dependencies; see above; + zeroDouble(dest); + vcvtsi2sd(Operand(src), dest, dest); + } + void convertInt32ToFloat32(Register src, FloatRegister dest) { + // Clear the output register first to break dependencies; see above; + zeroFloat32(dest); + vcvtsi2ss(src, dest, dest); + } + void convertInt32ToFloat32(const Address& src, FloatRegister dest) { + convertInt32ToFloat32(Operand(src), dest); + } + void convertInt32ToFloat32(const Operand& src, FloatRegister dest) { + // Clear the output register first to break dependencies; see above; + zeroFloat32(dest); + vcvtsi2ss(src, dest, dest); + } + Condition testDoubleTruthy(bool truthy, FloatRegister reg) { + ScratchDoubleScope scratch(asMasm()); + zeroDouble(scratch); + vucomisd(reg, scratch); + return truthy ? NonZero : Zero; + } + + // Class which ensures that registers used in byte ops are compatible with + // such instructions, even if the original register passed in wasn't. This + // only applies to x86, as on x64 all registers are valid single byte regs. + // This doesn't lead to great code but helps to simplify code generation. + // + // Note that this can currently only be used in cases where the register is + // read from by the guarded instruction, not written to. + class AutoEnsureByteRegister { + MacroAssemblerX86Shared* masm; + Register original_; + Register substitute_; + + public: + template <typename T> + AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address, + Register reg) + : masm(masm), original_(reg) { + AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs); + if (singleByteRegs.has(reg)) { + substitute_ = reg; + } else { + MOZ_ASSERT(address.base != StackPointer); + do { + substitute_ = singleByteRegs.takeAny(); + } while (Operand(address).containsReg(substitute_)); + + masm->push(substitute_); + masm->mov(reg, substitute_); + } + } + + ~AutoEnsureByteRegister() { + if (original_ != substitute_) { + masm->pop(substitute_); + } + } + + Register reg() { return substitute_; } + }; + + void load8ZeroExtend(const Operand& src, Register dest) { movzbl(src, dest); } + void load8ZeroExtend(const Address& src, Register dest) { + movzbl(Operand(src), dest); + } + void load8ZeroExtend(const BaseIndex& src, Register dest) { + movzbl(Operand(src), dest); + } + void load8SignExtend(const Operand& src, Register dest) { movsbl(src, dest); } + void load8SignExtend(const Address& src, Register dest) { + movsbl(Operand(src), dest); + } + void load8SignExtend(const BaseIndex& src, Register dest) { + movsbl(Operand(src), dest); + } + template <typename T> + void store8(Imm32 src, const T& dest) { + movb(src, Operand(dest)); + } + template <typename T> + void store8(Register src, const T& dest) { + AutoEnsureByteRegister ensure(this, dest, src); + movb(ensure.reg(), Operand(dest)); + } + void load16ZeroExtend(const Operand& src, Register dest) { + movzwl(src, dest); + } + void load16ZeroExtend(const Address& src, Register dest) { + movzwl(Operand(src), dest); + } + void load16ZeroExtend(const BaseIndex& src, Register dest) { + movzwl(Operand(src), dest); + } + template <typename S> + void load16UnalignedZeroExtend(const S& src, Register dest) { + load16ZeroExtend(src, dest); + } + template <typename S, typename T> + void store16(const S& src, const T& dest) { + movw(src, Operand(dest)); + } + template <typename S, typename T> + void store16Unaligned(const S& src, const T& dest) { + store16(src, dest); + } + void load16SignExtend(const Operand& src, Register dest) { + movswl(src, dest); + } + void load16SignExtend(const Address& src, Register dest) { + movswl(Operand(src), dest); + } + void load16SignExtend(const BaseIndex& src, Register dest) { + movswl(Operand(src), dest); + } + template <typename S> + void load16UnalignedSignExtend(const S& src, Register dest) { + load16SignExtend(src, dest); + } + void load32(const Address& address, Register dest) { + movl(Operand(address), dest); + } + void load32(const BaseIndex& src, Register dest) { movl(Operand(src), dest); } + void load32(const Operand& src, Register dest) { movl(src, dest); } + template <typename S> + void load32Unaligned(const S& src, Register dest) { + load32(src, dest); + } + template <typename S, typename T> + void store32(const S& src, const T& dest) { + movl(src, Operand(dest)); + } + template <typename S, typename T> + void store32Unaligned(const S& src, const T& dest) { + store32(src, dest); + } + void loadDouble(const Address& src, FloatRegister dest) { vmovsd(src, dest); } + void loadDouble(const BaseIndex& src, FloatRegister dest) { + vmovsd(src, dest); + } + void loadDouble(const Operand& src, FloatRegister dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + loadDouble(src.toAddress(), dest); + break; + case Operand::MEM_SCALE: + loadDouble(src.toBaseIndex(), dest); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void moveDouble(FloatRegister src, FloatRegister dest) { + // Use vmovapd instead of vmovsd to avoid dependencies. + vmovapd(src, dest); + } + void zeroDouble(FloatRegister reg) { vxorpd(reg, reg, reg); } + void zeroFloat32(FloatRegister reg) { vxorps(reg, reg, reg); } + void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) { + vcvtss2sd(src, dest, dest); + } + void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) { + vcvtsd2ss(src, dest, dest); + } + + void loadInt32x4(const Address& addr, FloatRegister dest) { + vmovdqa(Operand(addr), dest); + } + void loadFloat32x4(const Address& addr, FloatRegister dest) { + vmovaps(Operand(addr), dest); + } + void storeInt32x4(FloatRegister src, const Address& addr) { + vmovdqa(src, Operand(addr)); + } + void storeFloat32x4(FloatRegister src, const Address& addr) { + vmovaps(src, Operand(addr)); + } + + void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) { + // Note that if the conversion failed (because the converted + // result is larger than the maximum signed int32, or less than the + // least signed int32, or NaN), this will return the undefined integer + // value (0x8000000). + vcvttps2dq(src, dest); + } + void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) { + vcvtdq2ps(src, dest); + } + + void binarySimd128(const SimdConstant& rhs, FloatRegister lhsDest, + void (MacroAssembler::*regOp)(const Operand&, + FloatRegister, + FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, + FloatRegister)); + void binarySimd128( + FloatRegister lhs, const SimdConstant& rhs, FloatRegister dest, + void (MacroAssembler::*regOp)(const Operand&, FloatRegister, + FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, FloatRegister, + FloatRegister)); + void binarySimd128(const SimdConstant& rhs, FloatRegister lhsDest, + void (MacroAssembler::*regOp)(const Operand&, + FloatRegister), + void (MacroAssembler::*constOp)(const SimdConstant&, + FloatRegister)); + + // SIMD methods, defined in MacroAssembler-x86-shared-SIMD.cpp. + + void unsignedConvertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest); + void unsignedConvertInt32x4ToFloat64x2(FloatRegister src, FloatRegister dest); + void bitwiseTestSimd128(const SimdConstant& rhs, FloatRegister lhs); + + void truncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest); + void unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister temp, + FloatRegister dest); + void unsignedTruncFloat32x4ToInt32x4Relaxed(FloatRegister src, + FloatRegister dest); + void truncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister temp, + FloatRegister dest); + void unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister temp, + FloatRegister dest); + void unsignedTruncFloat64x2ToInt32x4Relaxed(FloatRegister src, + FloatRegister dest); + + void splatX16(Register input, FloatRegister output); + void splatX8(Register input, FloatRegister output); + void splatX4(Register input, FloatRegister output); + void splatX4(FloatRegister input, FloatRegister output); + void splatX2(FloatRegister input, FloatRegister output); + + void extractLaneInt32x4(FloatRegister input, Register output, unsigned lane); + void extractLaneFloat32x4(FloatRegister input, FloatRegister output, + unsigned lane); + void extractLaneFloat64x2(FloatRegister input, FloatRegister output, + unsigned lane); + void extractLaneInt16x8(FloatRegister input, Register output, unsigned lane, + SimdSign sign); + void extractLaneInt8x16(FloatRegister input, Register output, unsigned lane, + SimdSign sign); + + void replaceLaneFloat32x4(unsigned lane, FloatRegister lhs, FloatRegister rhs, + FloatRegister dest); + void replaceLaneFloat64x2(unsigned lane, FloatRegister lhs, FloatRegister rhs, + FloatRegister dest); + + void shuffleInt8x16(FloatRegister lhs, FloatRegister rhs, + FloatRegister output, const uint8_t lanes[16]); + void blendInt8x16(FloatRegister lhs, FloatRegister rhs, FloatRegister output, + FloatRegister temp, const uint8_t lanes[16]); + void blendInt16x8(FloatRegister lhs, FloatRegister rhs, FloatRegister output, + const uint16_t lanes[8]); + void laneSelectSimd128(FloatRegister mask, FloatRegister lhs, + FloatRegister rhs, FloatRegister output); + + void compareInt8x16(FloatRegister lhs, Operand rhs, Assembler::Condition cond, + FloatRegister output); + void compareInt8x16(Assembler::Condition cond, FloatRegister lhs, + const SimdConstant& rhs, FloatRegister dest); + void compareInt16x8(FloatRegister lhs, Operand rhs, Assembler::Condition cond, + FloatRegister output); + void compareInt16x8(Assembler::Condition cond, FloatRegister lhs, + const SimdConstant& rhs, FloatRegister dest); + void compareInt32x4(FloatRegister lhs, Operand rhs, Assembler::Condition cond, + FloatRegister output); + void compareInt32x4(Assembler::Condition cond, FloatRegister lhs, + const SimdConstant& rhs, FloatRegister dest); + void compareForEqualityInt64x2(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, + FloatRegister output); + void compareForOrderingInt64x2(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, FloatRegister temp1, + FloatRegister temp2, FloatRegister output); + void compareForOrderingInt64x2AVX(FloatRegister lhs, FloatRegister rhs, + Assembler::Condition cond, + FloatRegister output); + void compareFloat32x4(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, FloatRegister output); + void compareFloat32x4(Assembler::Condition cond, FloatRegister lhs, + const SimdConstant& rhs, FloatRegister dest); + void compareFloat64x2(FloatRegister lhs, Operand rhs, + Assembler::Condition cond, FloatRegister output); + void compareFloat64x2(Assembler::Condition cond, FloatRegister lhs, + const SimdConstant& rhs, FloatRegister dest); + + void minMaxFloat32x4(bool isMin, FloatRegister lhs, Operand rhs, + FloatRegister temp1, FloatRegister temp2, + FloatRegister output); + void minMaxFloat32x4AVX(bool isMin, FloatRegister lhs, FloatRegister rhs, + FloatRegister temp1, FloatRegister temp2, + FloatRegister output); + void minMaxFloat64x2(bool isMin, FloatRegister lhs, Operand rhs, + FloatRegister temp1, FloatRegister temp2, + FloatRegister output); + void minMaxFloat64x2AVX(bool isMin, FloatRegister lhs, FloatRegister rhs, + FloatRegister temp1, FloatRegister temp2, + FloatRegister output); + void minFloat32x4(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1, + FloatRegister temp2, FloatRegister output); + void maxFloat32x4(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1, + FloatRegister temp2, FloatRegister output); + + void minFloat64x2(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1, + FloatRegister temp2, FloatRegister output); + void maxFloat64x2(FloatRegister lhs, FloatRegister rhs, FloatRegister temp1, + FloatRegister temp2, FloatRegister output); + + void packedShiftByScalarInt8x16( + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest, + void (MacroAssemblerX86Shared::*shift)(FloatRegister, FloatRegister, + FloatRegister), + void (MacroAssemblerX86Shared::*extend)(const Operand&, FloatRegister)); + + void packedLeftShiftByScalarInt8x16(FloatRegister in, Register count, + FloatRegister xtmp, FloatRegister dest); + void packedLeftShiftByScalarInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest); + void packedRightShiftByScalarInt8x16(FloatRegister in, Register count, + FloatRegister xtmp, FloatRegister dest); + void packedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest); + void packedUnsignedRightShiftByScalarInt8x16(FloatRegister in, Register count, + FloatRegister xtmp, + FloatRegister dest); + void packedUnsignedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src, + FloatRegister dest); + + void packedLeftShiftByScalarInt16x8(FloatRegister in, Register count, + FloatRegister dest); + void packedRightShiftByScalarInt16x8(FloatRegister in, Register count, + FloatRegister dest); + void packedUnsignedRightShiftByScalarInt16x8(FloatRegister in, Register count, + FloatRegister dest); + + void packedLeftShiftByScalarInt32x4(FloatRegister in, Register count, + FloatRegister dest); + void packedRightShiftByScalarInt32x4(FloatRegister in, Register count, + FloatRegister dest); + void packedUnsignedRightShiftByScalarInt32x4(FloatRegister in, Register count, + FloatRegister dest); + void packedLeftShiftByScalarInt64x2(FloatRegister in, Register count, + FloatRegister dest); + void packedRightShiftByScalarInt64x2(FloatRegister in, Register count, + FloatRegister temp, FloatRegister dest); + void packedRightShiftByScalarInt64x2(Imm32 count, FloatRegister src, + FloatRegister dest); + void packedUnsignedRightShiftByScalarInt64x2(FloatRegister in, Register count, + FloatRegister dest); + void selectSimd128(FloatRegister mask, FloatRegister onTrue, + FloatRegister onFalse, FloatRegister temp, + FloatRegister output); + void popcntInt8x16(FloatRegister src, FloatRegister temp, + FloatRegister output); + + // SIMD inline methods private to the implementation, that appear to be used. + + template <class T, class Reg> + inline void loadScalar(const Operand& src, Reg dest); + template <class T, class Reg> + inline void storeScalar(Reg src, const Address& dest); + template <class T> + inline void loadAlignedVector(const Address& src, FloatRegister dest); + template <class T> + inline void storeAlignedVector(FloatRegister src, const Address& dest); + + void loadAlignedSimd128Int(const Address& src, FloatRegister dest) { + vmovdqa(Operand(src), dest); + } + void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) { + vmovdqa(src, dest); + } + void storeAlignedSimd128Int(FloatRegister src, const Address& dest) { + vmovdqa(src, Operand(dest)); + } + void moveSimd128Int(FloatRegister src, FloatRegister dest) { + if (src != dest) { + vmovdqa(src, dest); + } + } + FloatRegister moveSimd128IntIfNotAVX(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); + if (HasAVX()) { + return src; + } + moveSimd128Int(src, dest); + return dest; + } + FloatRegister selectDestIfAVX(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); + return HasAVX() ? dest : src; + } + void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) { + vmovdqu(Operand(src), dest); + } + void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) { + vmovdqu(Operand(src), dest); + } + void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) { + vmovdqu(src, dest); + } + void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) { + vmovdqu(src, Operand(dest)); + } + void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) { + vmovdqu(src, Operand(dest)); + } + void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) { + vmovdqu(src, dest); + } + void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) { + count.value &= 15; + vpsllw(count, dest, dest); + } + void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) { + count.value &= 15; + vpsraw(count, dest, dest); + } + void packedUnsignedRightShiftByScalarInt16x8(Imm32 count, + FloatRegister dest) { + count.value &= 15; + vpsrlw(count, dest, dest); + } + void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) { + count.value &= 31; + vpslld(count, dest, dest); + } + void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) { + count.value &= 31; + vpsrad(count, dest, dest); + } + void packedUnsignedRightShiftByScalarInt32x4(Imm32 count, + FloatRegister dest) { + count.value &= 31; + vpsrld(count, dest, dest); + } + void loadAlignedSimd128Float(const Address& src, FloatRegister dest) { + vmovaps(Operand(src), dest); + } + void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) { + vmovaps(src, dest); + } + void storeAlignedSimd128Float(FloatRegister src, const Address& dest) { + vmovaps(src, Operand(dest)); + } + void moveSimd128Float(FloatRegister src, FloatRegister dest) { + if (src != dest) { + vmovaps(src, dest); + } + } + FloatRegister moveSimd128FloatIfNotAVX(FloatRegister src, + FloatRegister dest) { + MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); + if (HasAVX()) { + return src; + } + moveSimd128Float(src, dest); + return dest; + } + FloatRegister moveSimd128FloatIfEqual(FloatRegister src, FloatRegister dest, + FloatRegister other) { + MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); + if (src != other) { + return src; + } + moveSimd128Float(src, dest); + return dest; + } + FloatRegister moveSimd128FloatIfNotAVXOrOther(FloatRegister src, + FloatRegister dest, + FloatRegister other) { + MOZ_ASSERT(src.isSimd128() && dest.isSimd128()); + if (HasAVX() && src != other) { + return src; + } + moveSimd128Float(src, dest); + return dest; + } + + void loadUnalignedSimd128(const Operand& src, FloatRegister dest) { + vmovups(src, dest); + } + void storeUnalignedSimd128(FloatRegister src, const Operand& dest) { + vmovups(src, dest); + } + + static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1, + uint32_t z = 2, uint32_t w = 3) { + MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4); + uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0); + MOZ_ASSERT(r < 256); + return r; + } + + void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) { + vpshufd(mask, src, dest); + } + void moveLowInt32(FloatRegister src, Register dest) { vmovd(src, dest); } + + void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) { + vmovhlps(src, dest, dest); + } + void moveFloatAsDouble(Register src, FloatRegister dest) { + vmovd(src, dest); + vcvtss2sd(dest, dest, dest); + } + void loadFloatAsDouble(const Address& src, FloatRegister dest) { + vmovss(src, dest); + vcvtss2sd(dest, dest, dest); + } + void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) { + vmovss(src, dest); + vcvtss2sd(dest, dest, dest); + } + void loadFloatAsDouble(const Operand& src, FloatRegister dest) { + loadFloat32(src, dest); + vcvtss2sd(dest, dest, dest); + } + void loadFloat32(const Address& src, FloatRegister dest) { + vmovss(src, dest); + } + void loadFloat32(const BaseIndex& src, FloatRegister dest) { + vmovss(src, dest); + } + void loadFloat32(const Operand& src, FloatRegister dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + loadFloat32(src.toAddress(), dest); + break; + case Operand::MEM_SCALE: + loadFloat32(src.toBaseIndex(), dest); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void moveFloat32(FloatRegister src, FloatRegister dest) { + // Use vmovaps instead of vmovss to avoid dependencies. + vmovaps(src, dest); + } + + // Checks whether a double is representable as a 32-bit integer. If so, the + // integer is written to the output register. Otherwise, a bailout is taken to + // the given snapshot. This function overwrites the scratch float register. + void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail, + bool negativeZeroCheck = true) { + // Check for -0.0 + if (negativeZeroCheck) { + branchNegativeZero(src, dest, fail); + } + + ScratchDoubleScope scratch(asMasm()); + vcvttsd2si(src, dest); + convertInt32ToDouble(dest, scratch); + vucomisd(scratch, src); + j(Assembler::Parity, fail); + j(Assembler::NotEqual, fail); + } + + // Checks whether a float32 is representable as a 32-bit integer. If so, the + // integer is written to the output register. Otherwise, a bailout is taken to + // the given snapshot. This function overwrites the scratch float register. + void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail, + bool negativeZeroCheck = true) { + // Check for -0.0 + if (negativeZeroCheck) { + branchNegativeZeroFloat32(src, dest, fail); + } + + ScratchFloat32Scope scratch(asMasm()); + vcvttss2si(src, dest); + convertInt32ToFloat32(dest, scratch); + vucomiss(scratch, src); + j(Assembler::Parity, fail); + j(Assembler::NotEqual, fail); + } + + void truncateDoubleToInt32(FloatRegister src, Register dest, Label* fail) { + // vcvttsd2si returns 0x80000000 on failure. Test for it by + // subtracting 1 and testing overflow. The other possibility is to test + // equality for INT_MIN after a comparison, but 1 costs fewer bytes to + // materialize. + vcvttsd2si(src, dest); + cmp32(dest, Imm32(1)); + j(Assembler::Overflow, fail); + } + void truncateFloat32ToInt32(FloatRegister src, Register dest, Label* fail) { + // Same trick as explained in the above comment. + vcvttss2si(src, dest); + cmp32(dest, Imm32(1)); + j(Assembler::Overflow, fail); + } + + inline void clampIntToUint8(Register reg); + + bool maybeInlineDouble(double d, FloatRegister dest) { + // Loading zero with xor is specially optimized in hardware. + if (mozilla::IsPositiveZero(d)) { + zeroDouble(dest); + return true; + } + + // It is also possible to load several common constants using vpcmpeqw + // to get all ones and then vpsllq and vpsrlq to get zeros at the ends, + // as described in "13.4 Generating constants" of + // "2. Optimizing subroutines in assembly language" by Agner Fog, and as + // previously implemented here. However, with x86 and x64 both using + // constant pool loads for double constants, this is probably only + // worthwhile in cases where a load is likely to be delayed. + + return false; + } + + bool maybeInlineFloat(float f, FloatRegister dest) { + // See comment above + if (mozilla::IsPositiveZero(f)) { + zeroFloat32(dest); + return true; + } + return false; + } + + bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) { + if (v.isZeroBits()) { + vpxor(dest, dest, dest); + return true; + } + if (v.isOneBits()) { + vpcmpeqw(Operand(dest), dest, dest); + return true; + } + return false; + } + bool maybeInlineSimd128Float(const SimdConstant& v, + const FloatRegister& dest) { + if (v.isZeroBits()) { + vxorps(dest, dest, dest); + return true; + } + return false; + } + + void convertBoolToInt32(Register source, Register dest) { + // Note that C++ bool is only 1 byte, so zero extend it to clear the + // higher-order bits. + movzbl(source, dest); + } + + void emitSet(Assembler::Condition cond, Register dest, + Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) { + if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) { + // If the register we're defining is a single byte register, + // take advantage of the setCC instruction + setCC(cond, dest); + movzbl(dest, dest); + + if (ifNaN != Assembler::NaN_HandledByCond) { + Label noNaN; + j(Assembler::NoParity, &noNaN); + mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest); + bind(&noNaN); + } + } else { + Label end; + Label ifFalse; + + if (ifNaN == Assembler::NaN_IsFalse) { + j(Assembler::Parity, &ifFalse); + } + // Note a subtlety here: FLAGS is live at this point, and the + // mov interface doesn't guarantee to preserve FLAGS. Use + // movl instead of mov, because the movl instruction + // preserves FLAGS. + movl(Imm32(1), dest); + j(cond, &end); + if (ifNaN == Assembler::NaN_IsTrue) { + j(Assembler::Parity, &end); + } + bind(&ifFalse); + mov(ImmWord(0), dest); + + bind(&end); + } + } + + void emitSetRegisterIf(AssemblerX86Shared::Condition cond, Register dest) { + if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) { + // If the register we're defining is a single byte register, + // take advantage of the setCC instruction + setCC(cond, dest); + movzbl(dest, dest); + } else { + Label end; + movl(Imm32(1), dest); + j(cond, &end); + mov(ImmWord(0), dest); + bind(&end); + } + } + + // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp(). + CodeOffset toggledJump(Label* label) { + CodeOffset offset(size()); + jump(label); + return offset; + } + + template <typename T> + void computeEffectiveAddress(const T& address, Register dest) { + lea(Operand(address), dest); + } + + void checkStackAlignment() { + // Exists for ARM compatibility. + } + + void abiret() { ret(); } + + protected: + bool buildOOLFakeExitFrame(void* fakeReturnAddr); +}; + +// Specialize for float to use movaps. Use movdqa for everything else. +template <> +inline void MacroAssemblerX86Shared::loadAlignedVector<float>( + const Address& src, FloatRegister dest) { + loadAlignedSimd128Float(src, dest); +} + +template <typename T> +inline void MacroAssemblerX86Shared::loadAlignedVector(const Address& src, + FloatRegister dest) { + loadAlignedSimd128Int(src, dest); +} + +// Specialize for float to use movaps. Use movdqa for everything else. +template <> +inline void MacroAssemblerX86Shared::storeAlignedVector<float>( + FloatRegister src, const Address& dest) { + storeAlignedSimd128Float(src, dest); +} + +template <typename T> +inline void MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src, + const Address& dest) { + storeAlignedSimd128Int(src, dest); +} + +template <> +inline void MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src, + Register dest) { + load8ZeroExtend(src, dest); +} +template <> +inline void MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src, + Register dest) { + load16ZeroExtend(src, dest); +} +template <> +inline void MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src, + Register dest) { + load32(src, dest); +} +template <> +inline void MacroAssemblerX86Shared::loadScalar<float>(const Operand& src, + FloatRegister dest) { + loadFloat32(src, dest); +} + +template <> +inline void MacroAssemblerX86Shared::storeScalar<int8_t>(Register src, + const Address& dest) { + store8(src, dest); +} +template <> +inline void MacroAssemblerX86Shared::storeScalar<int16_t>(Register src, + const Address& dest) { + store16(src, dest); +} +template <> +inline void MacroAssemblerX86Shared::storeScalar<int32_t>(Register src, + const Address& dest) { + store32(src, dest); +} +template <> +inline void MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src, + const Address& dest) { + vmovss(src, dest); +} + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_MacroAssembler_x86_shared_h */ diff --git a/js/src/jit/x86-shared/MoveEmitter-x86-shared.cpp b/js/src/jit/x86-shared/MoveEmitter-x86-shared.cpp new file mode 100644 index 0000000000..590bd90a37 --- /dev/null +++ b/js/src/jit/x86-shared/MoveEmitter-x86-shared.cpp @@ -0,0 +1,528 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86-shared/MoveEmitter-x86-shared.h" + +#include "jit/MacroAssembler-inl.h" + +using namespace js; +using namespace js::jit; + +using mozilla::Maybe; + +MoveEmitterX86::MoveEmitterX86(MacroAssembler& masm) + : inCycle_(false), masm(masm), pushedAtCycle_(-1) { + pushedAtStart_ = masm.framePushed(); +} + +// Examine the cycle in moves starting at position i. Determine if it's a +// simple cycle consisting of all register-to-register moves in a single class, +// and whether it can be implemented entirely by swaps. +size_t MoveEmitterX86::characterizeCycle(const MoveResolver& moves, size_t i, + bool* allGeneralRegs, + bool* allFloatRegs) { + size_t swapCount = 0; + + for (size_t j = i;; j++) { + const MoveOp& move = moves.getMove(j); + + // If it isn't a cycle of registers of the same kind, we won't be able + // to optimize it. + if (!move.to().isGeneralReg()) { + *allGeneralRegs = false; + } + if (!move.to().isFloatReg()) { + *allFloatRegs = false; + } + if (!*allGeneralRegs && !*allFloatRegs) { + return -1; + } + + // Stop iterating when we see the last one. + if (j != i && move.isCycleEnd()) { + break; + } + + // Check that this move is actually part of the cycle. This is + // over-conservative when there are multiple reads from the same source, + // but that's expected to be rare. + if (move.from() != moves.getMove(j + 1).to()) { + *allGeneralRegs = false; + *allFloatRegs = false; + return -1; + } + + swapCount++; + } + + // Check that the last move cycles back to the first move. + const MoveOp& move = moves.getMove(i + swapCount); + if (move.from() != moves.getMove(i).to()) { + *allGeneralRegs = false; + *allFloatRegs = false; + return -1; + } + + return swapCount; +} + +// If we can emit optimized code for the cycle in moves starting at position i, +// do so, and return true. +bool MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver& moves, + size_t i, bool allGeneralRegs, + bool allFloatRegs, + size_t swapCount) { + if (allGeneralRegs && swapCount <= 2) { + // Use x86's swap-integer-registers instruction if we only have a few + // swaps. (x86 also has a swap between registers and memory but it's + // slow.) + for (size_t k = 0; k < swapCount; k++) { + masm.xchg(moves.getMove(i + k).to().reg(), + moves.getMove(i + k + 1).to().reg()); + } + return true; + } + + if (allFloatRegs && swapCount == 1) { + // There's no xchg for xmm registers, but if we only need a single swap, + // it's cheap to do an XOR swap. + FloatRegister a = moves.getMove(i).to().floatReg(); + FloatRegister b = moves.getMove(i + 1).to().floatReg(); + masm.vxorpd(a, b, b); + masm.vxorpd(b, a, a); + masm.vxorpd(a, b, b); + return true; + } + + return false; +} + +void MoveEmitterX86::emit(const MoveResolver& moves) { +#if defined(JS_CODEGEN_X86) && defined(DEBUG) + // Clobber any scratch register we have, to make regalloc bugs more visible. + if (scratchRegister_.isSome()) { + masm.mov(ImmWord(0xdeadbeef), scratchRegister_.value()); + } +#endif + + for (size_t i = 0; i < moves.numMoves(); i++) { +#if defined(JS_CODEGEN_X86) && defined(DEBUG) + if (!scratchRegister_.isSome()) { + Maybe<Register> reg = findScratchRegister(moves, i); + if (reg.isSome()) { + masm.mov(ImmWord(0xdeadbeef), reg.value()); + } + } +#endif + + const MoveOp& move = moves.getMove(i); + const MoveOperand& from = move.from(); + const MoveOperand& to = move.to(); + + if (move.isCycleEnd()) { + MOZ_ASSERT(inCycle_); + completeCycle(to, move.type()); + inCycle_ = false; + continue; + } + + if (move.isCycleBegin()) { + MOZ_ASSERT(!inCycle_); + + // Characterize the cycle. + bool allGeneralRegs = true, allFloatRegs = true; + size_t swapCount = + characterizeCycle(moves, i, &allGeneralRegs, &allFloatRegs); + + // Attempt to optimize it to avoid using the stack. + if (maybeEmitOptimizedCycle(moves, i, allGeneralRegs, allFloatRegs, + swapCount)) { + i += swapCount; + continue; + } + + // Otherwise use the stack. + breakCycle(to, move.endCycleType()); + inCycle_ = true; + } + + // A normal move which is not part of a cycle. + switch (move.type()) { + case MoveOp::FLOAT32: + emitFloat32Move(from, to); + break; + case MoveOp::DOUBLE: + emitDoubleMove(from, to); + break; + case MoveOp::INT32: + emitInt32Move(from, to, moves, i); + break; + case MoveOp::GENERAL: + emitGeneralMove(from, to, moves, i); + break; + case MoveOp::SIMD128: + emitSimd128Move(from, to); + break; + default: + MOZ_CRASH("Unexpected move type"); + } + } +} + +MoveEmitterX86::~MoveEmitterX86() { assertDone(); } + +Address MoveEmitterX86::cycleSlot() { + if (pushedAtCycle_ == -1) { + // Reserve stack for cycle resolution + static_assert(SpillSlotSize == 16); + masm.reserveStack(SpillSlotSize); + pushedAtCycle_ = masm.framePushed(); + } + + return Address(StackPointer, masm.framePushed() - pushedAtCycle_); +} + +Address MoveEmitterX86::toAddress(const MoveOperand& operand) const { + if (operand.base() != StackPointer) { + return Address(operand.base(), operand.disp()); + } + + MOZ_ASSERT(operand.disp() >= 0); + + // Otherwise, the stack offset may need to be adjusted. + return Address(StackPointer, + operand.disp() + (masm.framePushed() - pushedAtStart_)); +} + +// Warning, do not use the resulting operand with pop instructions, since they +// compute the effective destination address after altering the stack pointer. +// Use toPopOperand if an Operand is needed for a pop. +Operand MoveEmitterX86::toOperand(const MoveOperand& operand) const { + if (operand.isMemoryOrEffectiveAddress()) { + return Operand(toAddress(operand)); + } + if (operand.isGeneralReg()) { + return Operand(operand.reg()); + } + + MOZ_ASSERT(operand.isFloatReg()); + return Operand(operand.floatReg()); +} + +// This is the same as toOperand except that it computes an Operand suitable for +// use in a pop. +Operand MoveEmitterX86::toPopOperand(const MoveOperand& operand) const { + if (operand.isMemory()) { + if (operand.base() != StackPointer) { + return Operand(operand.base(), operand.disp()); + } + + MOZ_ASSERT(operand.disp() >= 0); + + // Otherwise, the stack offset may need to be adjusted. + // Note the adjustment by the stack slot here, to offset for the fact that + // pop computes its effective address after incrementing the stack pointer. + return Operand( + StackPointer, + operand.disp() + (masm.framePushed() - sizeof(void*) - pushedAtStart_)); + } + if (operand.isGeneralReg()) { + return Operand(operand.reg()); + } + + MOZ_ASSERT(operand.isFloatReg()); + return Operand(operand.floatReg()); +} + +void MoveEmitterX86::breakCycle(const MoveOperand& to, MoveOp::Type type) { + // There is some pattern: + // (A -> B) + // (B -> A) + // + // This case handles (A -> B), which we reach first. We save B, then allow + // the original move to continue. + switch (type) { + case MoveOp::SIMD128: + if (to.isMemory()) { + ScratchSimd128Scope scratch(masm); + masm.loadUnalignedSimd128(toAddress(to), scratch); + masm.storeUnalignedSimd128(scratch, cycleSlot()); + } else { + masm.storeUnalignedSimd128(to.floatReg(), cycleSlot()); + } + break; + case MoveOp::FLOAT32: + if (to.isMemory()) { + ScratchFloat32Scope scratch(masm); + masm.loadFloat32(toAddress(to), scratch); + masm.storeFloat32(scratch, cycleSlot()); + } else { + masm.storeFloat32(to.floatReg(), cycleSlot()); + } + break; + case MoveOp::DOUBLE: + if (to.isMemory()) { + ScratchDoubleScope scratch(masm); + masm.loadDouble(toAddress(to), scratch); + masm.storeDouble(scratch, cycleSlot()); + } else { + masm.storeDouble(to.floatReg(), cycleSlot()); + } + break; + case MoveOp::INT32: +#ifdef JS_CODEGEN_X64 + // x64 can't pop to a 32-bit destination, so don't push. + if (to.isMemory()) { + masm.load32(toAddress(to), ScratchReg); + masm.store32(ScratchReg, cycleSlot()); + } else { + masm.store32(to.reg(), cycleSlot()); + } + break; +#endif + case MoveOp::GENERAL: + masm.Push(toOperand(to)); + break; + default: + MOZ_CRASH("Unexpected move type"); + } +} + +void MoveEmitterX86::completeCycle(const MoveOperand& to, MoveOp::Type type) { + // There is some pattern: + // (A -> B) + // (B -> A) + // + // This case handles (B -> A), which we reach last. We emit a move from the + // saved value of B, to A. + switch (type) { + case MoveOp::SIMD128: + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= Simd128DataSize); + if (to.isMemory()) { + ScratchSimd128Scope scratch(masm); + masm.loadUnalignedSimd128(cycleSlot(), scratch); + masm.storeUnalignedSimd128(scratch, toAddress(to)); + } else { + masm.loadUnalignedSimd128(cycleSlot(), to.floatReg()); + } + break; + case MoveOp::FLOAT32: + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(float)); + if (to.isMemory()) { + ScratchFloat32Scope scratch(masm); + masm.loadFloat32(cycleSlot(), scratch); + masm.storeFloat32(scratch, toAddress(to)); + } else { + masm.loadFloat32(cycleSlot(), to.floatReg()); + } + break; + case MoveOp::DOUBLE: + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(double)); + if (to.isMemory()) { + ScratchDoubleScope scratch(masm); + masm.loadDouble(cycleSlot(), scratch); + masm.storeDouble(scratch, toAddress(to)); + } else { + masm.loadDouble(cycleSlot(), to.floatReg()); + } + break; + case MoveOp::INT32: +#ifdef JS_CODEGEN_X64 + MOZ_ASSERT(pushedAtCycle_ != -1); + MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(int32_t)); + // x64 can't pop to a 32-bit destination. + if (to.isMemory()) { + masm.load32(cycleSlot(), ScratchReg); + masm.store32(ScratchReg, toAddress(to)); + } else { + masm.load32(cycleSlot(), to.reg()); + } + break; +#endif + case MoveOp::GENERAL: + MOZ_ASSERT(masm.framePushed() - pushedAtStart_ >= sizeof(intptr_t)); + masm.Pop(toPopOperand(to)); + break; + default: + MOZ_CRASH("Unexpected move type"); + } +} + +void MoveEmitterX86::emitInt32Move(const MoveOperand& from, + const MoveOperand& to, + const MoveResolver& moves, size_t i) { + if (from.isGeneralReg()) { + masm.move32(from.reg(), toOperand(to)); + } else if (to.isGeneralReg()) { + MOZ_ASSERT(from.isMemory()); + masm.load32(toAddress(from), to.reg()); + } else { + // Memory to memory gpr move. + MOZ_ASSERT(from.isMemory()); + Maybe<Register> reg = findScratchRegister(moves, i); + if (reg.isSome()) { + masm.load32(toAddress(from), reg.value()); + masm.move32(reg.value(), toOperand(to)); + } else { + // No scratch register available; bounce it off the stack. + masm.Push(toOperand(from)); + masm.Pop(toPopOperand(to)); + } + } +} + +void MoveEmitterX86::emitGeneralMove(const MoveOperand& from, + const MoveOperand& to, + const MoveResolver& moves, size_t i) { + if (from.isGeneralReg()) { + masm.mov(from.reg(), toOperand(to)); + } else if (to.isGeneralReg()) { + MOZ_ASSERT(from.isMemoryOrEffectiveAddress()); + if (from.isMemory()) { + masm.loadPtr(toAddress(from), to.reg()); + } else { + masm.lea(toOperand(from), to.reg()); + } + } else if (from.isMemory()) { + // Memory to memory gpr move. + Maybe<Register> reg = findScratchRegister(moves, i); + if (reg.isSome()) { + masm.loadPtr(toAddress(from), reg.value()); + masm.mov(reg.value(), toOperand(to)); + } else { + // No scratch register available; bounce it off the stack. + masm.Push(toOperand(from)); + masm.Pop(toPopOperand(to)); + } + } else { + // Effective address to memory move. + MOZ_ASSERT(from.isEffectiveAddress()); + Maybe<Register> reg = findScratchRegister(moves, i); + if (reg.isSome()) { + masm.lea(toOperand(from), reg.value()); + masm.mov(reg.value(), toOperand(to)); + } else { + // This is tricky without a scratch reg. We can't do an lea. Bounce the + // base register off the stack, then add the offset in place. Note that + // this clobbers FLAGS! + masm.Push(from.base()); + masm.Pop(toPopOperand(to)); + MOZ_ASSERT(to.isMemoryOrEffectiveAddress()); + masm.addPtr(Imm32(from.disp()), toAddress(to)); + } + } +} + +void MoveEmitterX86::emitFloat32Move(const MoveOperand& from, + const MoveOperand& to) { + MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isSingle()); + MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isSingle()); + + if (from.isFloatReg()) { + if (to.isFloatReg()) { + masm.moveFloat32(from.floatReg(), to.floatReg()); + } else { + masm.storeFloat32(from.floatReg(), toAddress(to)); + } + } else if (to.isFloatReg()) { + masm.loadFloat32(toAddress(from), to.floatReg()); + } else { + // Memory to memory move. + MOZ_ASSERT(from.isMemory()); + ScratchFloat32Scope scratch(masm); + masm.loadFloat32(toAddress(from), scratch); + masm.storeFloat32(scratch, toAddress(to)); + } +} + +void MoveEmitterX86::emitDoubleMove(const MoveOperand& from, + const MoveOperand& to) { + MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isDouble()); + MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isDouble()); + + if (from.isFloatReg()) { + if (to.isFloatReg()) { + masm.moveDouble(from.floatReg(), to.floatReg()); + } else { + masm.storeDouble(from.floatReg(), toAddress(to)); + } + } else if (to.isFloatReg()) { + masm.loadDouble(toAddress(from), to.floatReg()); + } else { + // Memory to memory move. + MOZ_ASSERT(from.isMemory()); + ScratchDoubleScope scratch(masm); + masm.loadDouble(toAddress(from), scratch); + masm.storeDouble(scratch, toAddress(to)); + } +} + +void MoveEmitterX86::emitSimd128Move(const MoveOperand& from, + const MoveOperand& to) { + MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isSimd128()); + MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isSimd128()); + + if (from.isFloatReg()) { + if (to.isFloatReg()) { + masm.moveSimd128(from.floatReg(), to.floatReg()); + } else { + masm.storeUnalignedSimd128(from.floatReg(), toAddress(to)); + } + } else if (to.isFloatReg()) { + masm.loadUnalignedSimd128(toAddress(from), to.floatReg()); + } else { + // Memory to memory move. + MOZ_ASSERT(from.isMemory()); + ScratchSimd128Scope scratch(masm); + masm.loadUnalignedSimd128(toAddress(from), scratch); + masm.storeUnalignedSimd128(scratch, toAddress(to)); + } +} + +void MoveEmitterX86::assertDone() { MOZ_ASSERT(!inCycle_); } + +void MoveEmitterX86::finish() { + assertDone(); + + masm.freeStack(masm.framePushed() - pushedAtStart_); +} + +Maybe<Register> MoveEmitterX86::findScratchRegister(const MoveResolver& moves, + size_t initial) { +#ifdef JS_CODEGEN_X86 + if (scratchRegister_.isSome()) { + return scratchRegister_; + } + + // All registers are either in use by this move group or are live + // afterwards. Look through the remaining moves for a register which is + // clobbered before it is used, and is thus dead at this point. + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + for (size_t i = initial; i < moves.numMoves(); i++) { + const MoveOp& move = moves.getMove(i); + if (move.from().isGeneralReg()) { + regs.takeUnchecked(move.from().reg()); + } else if (move.from().isMemoryOrEffectiveAddress()) { + regs.takeUnchecked(move.from().base()); + } + if (move.to().isGeneralReg()) { + if (i != initial && !move.isCycleBegin() && regs.has(move.to().reg())) { + return mozilla::Some(move.to().reg()); + } + regs.takeUnchecked(move.to().reg()); + } else if (move.to().isMemoryOrEffectiveAddress()) { + regs.takeUnchecked(move.to().base()); + } + } + + return mozilla::Nothing(); +#else + return mozilla::Some(ScratchReg); +#endif +} diff --git a/js/src/jit/x86-shared/MoveEmitter-x86-shared.h b/js/src/jit/x86-shared/MoveEmitter-x86-shared.h new file mode 100644 index 0000000000..15a1680c9a --- /dev/null +++ b/js/src/jit/x86-shared/MoveEmitter-x86-shared.h @@ -0,0 +1,83 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_MoveEmitter_x86_shared_h +#define jit_MoveEmitter_x86_shared_h + +#include "mozilla/Maybe.h" + +#include <stddef.h> +#include <stdint.h> + +#include "jit/MoveResolver.h" +#include "jit/Registers.h" + +namespace js { +namespace jit { + +struct Address; +class MacroAssembler; +class Operand; + +class MoveEmitterX86 { + bool inCycle_; + MacroAssembler& masm; + + // Original stack push value. + uint32_t pushedAtStart_; + + // This is a store stack offset for the cycle-break spill slot, snapshotting + // codegen->framePushed_ at the time it is allocated. -1 if not allocated. + int32_t pushedAtCycle_; + +#ifdef JS_CODEGEN_X86 + // Optional scratch register for performing moves. + mozilla::Maybe<Register> scratchRegister_; +#endif + + void assertDone(); + Address cycleSlot(); + Address toAddress(const MoveOperand& operand) const; + Operand toOperand(const MoveOperand& operand) const; + Operand toPopOperand(const MoveOperand& operand) const; + + size_t characterizeCycle(const MoveResolver& moves, size_t i, + bool* allGeneralRegs, bool* allFloatRegs); + bool maybeEmitOptimizedCycle(const MoveResolver& moves, size_t i, + bool allGeneralRegs, bool allFloatRegs, + size_t swapCount); + void emitInt32Move(const MoveOperand& from, const MoveOperand& to, + const MoveResolver& moves, size_t i); + void emitGeneralMove(const MoveOperand& from, const MoveOperand& to, + const MoveResolver& moves, size_t i); + void emitFloat32Move(const MoveOperand& from, const MoveOperand& to); + void emitDoubleMove(const MoveOperand& from, const MoveOperand& to); + void emitSimd128Move(const MoveOperand& from, const MoveOperand& to); + void breakCycle(const MoveOperand& to, MoveOp::Type type); + void completeCycle(const MoveOperand& to, MoveOp::Type type); + + public: + explicit MoveEmitterX86(MacroAssembler& masm); + ~MoveEmitterX86(); + void emit(const MoveResolver& moves); + void finish(); + + void setScratchRegister(Register reg) { +#ifdef JS_CODEGEN_X86 + scratchRegister_.emplace(reg); +#endif + } + + mozilla::Maybe<Register> findScratchRegister(const MoveResolver& moves, + size_t i); +}; + +using MoveEmitter = MoveEmitterX86; + +} // namespace jit +} // namespace js + +#endif /* jit_MoveEmitter_x86_shared_h */ diff --git a/js/src/jit/x86-shared/Patching-x86-shared.h b/js/src/jit/x86-shared/Patching-x86-shared.h new file mode 100644 index 0000000000..85c523cd15 --- /dev/null +++ b/js/src/jit/x86-shared/Patching-x86-shared.h @@ -0,0 +1,113 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_shared_Patching_x86_shared_h +#define jit_x86_shared_Patching_x86_shared_h + +namespace js { +namespace jit { + +namespace X86Encoding { + +inline void* GetPointer(const void* where) { + void* res; + memcpy(&res, (const char*)where - sizeof(void*), sizeof(void*)); + return res; +} + +inline void SetPointer(void* where, const void* value) { + memcpy((char*)where - sizeof(void*), &value, sizeof(void*)); +} + +inline int32_t GetInt32(const void* where) { + int32_t res; + memcpy(&res, (const char*)where - sizeof(int32_t), sizeof(int32_t)); + return res; +} + +inline void SetInt32(void* where, int32_t value, uint32_t trailing = 0) { + memcpy((char*)where - trailing - sizeof(int32_t), &value, sizeof(int32_t)); +} + +inline void SetRel32(void* from, void* to, uint32_t trailing = 0) { + intptr_t offset = + reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(from); + MOZ_ASSERT(offset == static_cast<int32_t>(offset), + "offset is too great for a 32-bit relocation"); + if (offset != static_cast<int32_t>(offset)) { + MOZ_CRASH("offset is too great for a 32-bit relocation"); + } + + SetInt32(from, offset, trailing); +} + +inline void* GetRel32Target(void* where) { + int32_t rel = GetInt32(where); + return (char*)where + rel; +} + +// JmpSrc represents a positive offset within a code buffer, or an uninitialized +// value. Lots of code depends on uninitialized JmpSrc holding the value -1, on +// -1 being a legal value of JmpSrc, and on being able to initialize a JmpSrc +// with the value -1. +// +// The value of the `offset` is always positive and <= MaxCodeBytesPerProcess, +// see ProcessExecutableMemory.h. The latter quantity in turn must fit in an +// i32. But we further require that the value is not precisely INT32_MAX, so as +// to allow the JmpSrc value -1 to mean "uninitialized" without ambiguity. +// +// The quantity `trailing` denotes the number of bytes of data that follow the +// patch field in the instruction. The offset points to the end of the +// instruction as per normal. The information about trailing bytes is needed +// separately from the offset to correctly patch instructions that have +// immediates trailing the patch field (eg CMPSS and CMPSD). Currently the only +// allowed values for `trailing` are 0 and 1. + +static_assert(MaxCodeBytesPerProcess < size_t(INT32_MAX), "Invariant"); + +class JmpSrc { + public: + JmpSrc() : offset_(INT32_MAX), trailing_(0) {} + explicit JmpSrc(int32_t offset) : offset_(offset), trailing_(0) { + // offset -1 is stored as INT32_MAX + MOZ_ASSERT(offset == -1 || (offset >= 0 && offset < INT32_MAX)); + } + JmpSrc(int32_t offset, uint32_t trailing) + : offset_(offset), trailing_(trailing) { + // Disallow offset -1 in this situation, it does not apply. + MOZ_ASSERT(offset >= 0 && offset < INT32_MAX); + MOZ_ASSERT(trailing <= 1); + } + int32_t offset() const { + return offset_ == INT32_MAX ? -1 : int32_t(offset_); + } + uint32_t trailing() const { return trailing_; } + + private: + uint32_t offset_ : 31; + uint32_t trailing_ : 1; +}; + +class JmpDst { + public: + explicit JmpDst(int32_t offset) : offset_(offset) {} + int32_t offset() const { return offset_; } + + private: + int32_t offset_; +}; + +inline bool CanRelinkJump(void* from, void* to) { + intptr_t offset = static_cast<char*>(to) - static_cast<char*>(from); + return (offset == static_cast<int32_t>(offset)); +} + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_shared_Patching_x86_shared_h */ diff --git a/js/src/jit/x86/Assembler-x86.cpp b/js/src/jit/x86/Assembler-x86.cpp new file mode 100644 index 0000000000..7296f77291 --- /dev/null +++ b/js/src/jit/x86/Assembler-x86.cpp @@ -0,0 +1,85 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86/Assembler-x86.h" + +#include "gc/Marking.h" +#include "util/Memory.h" + +using namespace js; +using namespace js::jit; + +ABIArgGenerator::ABIArgGenerator() : stackOffset_(0), current_() {} + +ABIArg ABIArgGenerator::next(MIRType type) { + switch (type) { + case MIRType::Int32: + case MIRType::Float32: + case MIRType::Pointer: + case MIRType::RefOrNull: + case MIRType::StackResults: + current_ = ABIArg(stackOffset_); + stackOffset_ += sizeof(uint32_t); + break; + case MIRType::Double: + case MIRType::Int64: + current_ = ABIArg(stackOffset_); + stackOffset_ += sizeof(uint64_t); + break; + case MIRType::Simd128: + // On Win64, >64 bit args need to be passed by reference. However, wasm + // doesn't allow passing SIMD values to JS, so the only way to reach this + // is wasm to wasm calls. Ergo we can break the native ABI here and use + // the Wasm ABI instead. + stackOffset_ = AlignBytes(stackOffset_, SimdMemoryAlignment); + current_ = ABIArg(stackOffset_); + stackOffset_ += Simd128DataSize; + break; + default: + MOZ_CRASH("Unexpected argument type"); + } + return current_; +} + +void Assembler::executableCopy(uint8_t* buffer) { + AssemblerX86Shared::executableCopy(buffer); + for (RelativePatch& rp : jumps_) { + X86Encoding::SetRel32(buffer + rp.offset, rp.target); + } +} + +class RelocationIterator { + CompactBufferReader reader_; + uint32_t offset_; + + public: + explicit RelocationIterator(CompactBufferReader& reader) : reader_(reader) {} + + bool read() { + if (!reader_.more()) { + return false; + } + offset_ = reader_.readUnsigned(); + return true; + } + + uint32_t offset() const { return offset_; } +}; + +static inline JitCode* CodeFromJump(uint8_t* jump) { + uint8_t* target = (uint8_t*)X86Encoding::GetRel32Target(jump); + return JitCode::FromExecutable(target); +} + +void Assembler::TraceJumpRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader) { + RelocationIterator iter(reader); + while (iter.read()) { + JitCode* child = CodeFromJump(code->raw() + iter.offset()); + TraceManuallyBarrieredEdge(trc, &child, "rel32"); + MOZ_ASSERT(child == CodeFromJump(code->raw() + iter.offset())); + } +} diff --git a/js/src/jit/x86/Assembler-x86.h b/js/src/jit/x86/Assembler-x86.h new file mode 100644 index 0000000000..12d790740e --- /dev/null +++ b/js/src/jit/x86/Assembler-x86.h @@ -0,0 +1,1079 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_Assembler_x86_h +#define jit_x86_Assembler_x86_h + +#include <iterator> + +#include "jit/CompactBuffer.h" +#include "jit/JitCode.h" +#include "jit/shared/Assembler-shared.h" +#include "jit/x86-shared/Constants-x86-shared.h" + +namespace js { +namespace jit { + +static constexpr Register eax{X86Encoding::rax}; +static constexpr Register ecx{X86Encoding::rcx}; +static constexpr Register edx{X86Encoding::rdx}; +static constexpr Register ebx{X86Encoding::rbx}; +static constexpr Register esp{X86Encoding::rsp}; +static constexpr Register ebp{X86Encoding::rbp}; +static constexpr Register esi{X86Encoding::rsi}; +static constexpr Register edi{X86Encoding::rdi}; + +static constexpr FloatRegister xmm0 = + FloatRegister(X86Encoding::xmm0, FloatRegisters::Double); +static constexpr FloatRegister xmm1 = + FloatRegister(X86Encoding::xmm1, FloatRegisters::Double); +static constexpr FloatRegister xmm2 = + FloatRegister(X86Encoding::xmm2, FloatRegisters::Double); +static constexpr FloatRegister xmm3 = + FloatRegister(X86Encoding::xmm3, FloatRegisters::Double); +static constexpr FloatRegister xmm4 = + FloatRegister(X86Encoding::xmm4, FloatRegisters::Double); +static constexpr FloatRegister xmm5 = + FloatRegister(X86Encoding::xmm5, FloatRegisters::Double); +static constexpr FloatRegister xmm6 = + FloatRegister(X86Encoding::xmm6, FloatRegisters::Double); +static constexpr FloatRegister xmm7 = + FloatRegister(X86Encoding::xmm7, FloatRegisters::Double); + +// Vector registers fixed for use with some instructions, e.g. PBLENDVB. +static constexpr FloatRegister vmm0 = + FloatRegister(X86Encoding::xmm0, FloatRegisters::Simd128); + +static constexpr Register InvalidReg{X86Encoding::invalid_reg}; +static constexpr FloatRegister InvalidFloatReg = FloatRegister(); + +static constexpr Register JSReturnReg_Type = ecx; +static constexpr Register JSReturnReg_Data = edx; +static constexpr Register StackPointer = esp; +static constexpr Register FramePointer = ebp; +static constexpr Register ReturnReg = eax; +static constexpr FloatRegister ReturnFloat32Reg = + FloatRegister(X86Encoding::xmm0, FloatRegisters::Single); +static constexpr FloatRegister ReturnDoubleReg = + FloatRegister(X86Encoding::xmm0, FloatRegisters::Double); +static constexpr FloatRegister ReturnSimd128Reg = + FloatRegister(X86Encoding::xmm0, FloatRegisters::Simd128); +static constexpr FloatRegister ScratchFloat32Reg_ = + FloatRegister(X86Encoding::xmm7, FloatRegisters::Single); +static constexpr FloatRegister ScratchDoubleReg_ = + FloatRegister(X86Encoding::xmm7, FloatRegisters::Double); +static constexpr FloatRegister ScratchSimd128Reg = + FloatRegister(X86Encoding::xmm7, FloatRegisters::Simd128); + +// Note, EDX:EAX is the system ABI 64-bit return register, and it is to our +// advantage to keep the SpiderMonkey ABI in sync with the system ABI. +// +// However, using EDX here means that we have to use a register that does not +// have a word or byte part (eg DX/DH/DL) in some other places; notably, +// ABINonArgReturnReg1 is EDI. If this becomes a problem and ReturnReg64 has to +// be something other than EDX:EAX, then jitted code that calls directly to C++ +// will need to shuffle the return value from EDX:EAX into ReturnReg64 directly +// after the call. See bug 1730161 for discussion and a patch that does that. +static constexpr Register64 ReturnReg64(edx, eax); + +// Avoid ebp, which is the FramePointer, which is unavailable in some modes. +static constexpr Register CallTempReg0 = edi; +static constexpr Register CallTempReg1 = eax; +static constexpr Register CallTempReg2 = ebx; +static constexpr Register CallTempReg3 = ecx; +static constexpr Register CallTempReg4 = esi; +static constexpr Register CallTempReg5 = edx; + +// We have no arg regs, so our NonArgRegs are just our CallTempReg* +static constexpr Register CallTempNonArgRegs[] = {edi, eax, ebx, ecx, esi, edx}; +static constexpr uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs); + +class ABIArgGenerator { + uint32_t stackOffset_; + ABIArg current_; + + public: + ABIArgGenerator(); + ABIArg next(MIRType argType); + ABIArg& current() { return current_; } + uint32_t stackBytesConsumedSoFar() const { return stackOffset_; } + void increaseStackOffset(uint32_t bytes) { stackOffset_ += bytes; } +}; + +// These registers may be volatile or nonvolatile. +static constexpr Register ABINonArgReg0 = eax; +static constexpr Register ABINonArgReg1 = ebx; +static constexpr Register ABINonArgReg2 = ecx; +static constexpr Register ABINonArgReg3 = edx; + +// This register may be volatile or nonvolatile. Avoid xmm7 which is the +// ScratchDoubleReg_. +static constexpr FloatRegister ABINonArgDoubleReg = + FloatRegister(X86Encoding::xmm0, FloatRegisters::Double); + +// These registers may be volatile or nonvolatile. +// Note: these three registers are all guaranteed to be different +static constexpr Register ABINonArgReturnReg0 = ecx; +static constexpr Register ABINonArgReturnReg1 = edi; +static constexpr Register ABINonVolatileReg = ebx; + +// This register is guaranteed to be clobberable during the prologue and +// epilogue of an ABI call which must preserve both ABI argument, return +// and non-volatile registers. +static constexpr Register ABINonArgReturnVolatileReg = ecx; + +// Instance pointer argument register for WebAssembly functions. This must not +// alias any other register used for passing function arguments or return +// values. Preserved by WebAssembly functions. +static constexpr Register InstanceReg = esi; + +// Registers used for asm.js/wasm table calls. These registers must be disjoint +// from the ABI argument registers, InstanceReg and each other. +static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0; +static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1; +static constexpr Register WasmTableCallSigReg = ABINonArgReg2; +static constexpr Register WasmTableCallIndexReg = ABINonArgReg3; + +// Registers used for ref calls. +static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0; +static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1; +static constexpr Register WasmCallRefReg = ABINonArgReg3; + +// Register used as a scratch along the return path in the fast js -> wasm stub +// code. This must not overlap ReturnReg, JSReturnOperand, or InstanceReg. +// It must be a volatile register. +static constexpr Register WasmJitEntryReturnScratch = ebx; + +static constexpr Register OsrFrameReg = edx; +static constexpr Register PreBarrierReg = edx; + +// Not enough registers for a PC register (R0-R2 use 2 registers each). +static constexpr Register InterpreterPCReg = InvalidReg; + +// Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use +// JSReturnOperand). +static constexpr Register RegExpMatcherRegExpReg = CallTempReg0; +static constexpr Register RegExpMatcherStringReg = CallTempReg1; +static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2; + +// Registers used by RegExpExecTest stub (do not use ReturnReg). +static constexpr Register RegExpExecTestRegExpReg = CallTempReg0; +static constexpr Register RegExpExecTestStringReg = CallTempReg2; + +// Registers used by RegExpSearcher stub (do not use ReturnReg). +static constexpr Register RegExpSearcherRegExpReg = CallTempReg0; +static constexpr Register RegExpSearcherStringReg = CallTempReg2; +static constexpr Register RegExpSearcherLastIndexReg = CallTempReg3; + +// GCC stack is aligned on 16 bytes. Ion does not maintain this for internal +// calls. wasm code does. +#if defined(__GNUC__) && !defined(__MINGW32__) +static constexpr uint32_t ABIStackAlignment = 16; +#else +static constexpr uint32_t ABIStackAlignment = 4; +#endif +static constexpr uint32_t CodeAlignment = 16; +static constexpr uint32_t JitStackAlignment = 16; + +static constexpr uint32_t JitStackValueAlignment = + JitStackAlignment / sizeof(Value); +static_assert(JitStackAlignment % sizeof(Value) == 0 && + JitStackValueAlignment >= 1, + "Stack alignment should be a non-zero multiple of sizeof(Value)"); + +static constexpr uint32_t SimdMemoryAlignment = 16; + +static_assert(CodeAlignment % SimdMemoryAlignment == 0, + "Code alignment should be larger than any of the alignments " + "which are used for " + "the constant sections of the code buffer. Thus it should be " + "larger than the " + "alignment for SIMD constants."); + +static_assert(JitStackAlignment % SimdMemoryAlignment == 0, + "Stack alignment should be larger than any of the alignments " + "which are used for " + "spilled values. Thus it should be larger than the alignment " + "for SIMD accesses."); + +static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment; +static constexpr uint32_t WasmTrapInstructionLength = 2; + +// See comments in wasm::GenerateFunctionPrologue. The difference between these +// is the size of the largest callable prologue on the platform. (We could make +// the tail offset 3, but I have opted for 4 as that results in a better-aligned +// branch target.) +static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; + +struct ImmTag : public Imm32 { + explicit ImmTag(JSValueTag mask) : Imm32(int32_t(mask)) {} +}; + +struct ImmType : public ImmTag { + explicit ImmType(JSValueType type) : ImmTag(JSVAL_TYPE_TO_TAG(type)) {} +}; + +static constexpr Scale ScalePointer = TimesFour; + +} // namespace jit +} // namespace js + +#include "jit/x86-shared/Assembler-x86-shared.h" + +namespace js { +namespace jit { + +static inline Operand LowWord(const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + return Operand(LowWord(op.toAddress())); + case Operand::MEM_SCALE: + return Operand(LowWord(op.toBaseIndex())); + default: + MOZ_CRASH("Invalid operand type"); + } +} + +static inline Operand HighWord(const Operand& op) { + switch (op.kind()) { + case Operand::MEM_REG_DISP: + return Operand(HighWord(op.toAddress())); + case Operand::MEM_SCALE: + return Operand(HighWord(op.toBaseIndex())); + default: + MOZ_CRASH("Invalid operand type"); + } +} + +// Return operand from a JS -> JS call. +static constexpr ValueOperand JSReturnOperand{JSReturnReg_Type, + JSReturnReg_Data}; + +class Assembler : public AssemblerX86Shared { + Vector<RelativePatch, 8, SystemAllocPolicy> jumps_; + + void addPendingJump(JmpSrc src, ImmPtr target, RelocationKind kind) { + enoughMemory_ &= + jumps_.append(RelativePatch(src.offset(), target.value, kind)); + if (kind == RelocationKind::JITCODE) { + jumpRelocations_.writeUnsigned(src.offset()); + } + } + + public: + using AssemblerX86Shared::call; + using AssemblerX86Shared::cmpl; + using AssemblerX86Shared::j; + using AssemblerX86Shared::jmp; + using AssemblerX86Shared::movl; + using AssemblerX86Shared::pop; + using AssemblerX86Shared::push; + using AssemblerX86Shared::retarget; + using AssemblerX86Shared::vmovsd; + using AssemblerX86Shared::vmovss; + + static void TraceJumpRelocations(JSTracer* trc, JitCode* code, + CompactBufferReader& reader); + + // Copy the assembly code to the given buffer, and perform any pending + // relocations relying on the target address. + void executableCopy(uint8_t* buffer); + + void assertNoGCThings() const { +#ifdef DEBUG + MOZ_ASSERT(dataRelocations_.length() == 0); + for (auto& j : jumps_) { + MOZ_ASSERT(j.kind == RelocationKind::HARDCODED); + } +#endif + } + + // Actual assembly emitting functions. + + void push(ImmGCPtr ptr) { + masm.push_i32(int32_t(ptr.value)); + writeDataRelocation(ptr); + } + void push(const ImmWord imm) { push(Imm32(imm.value)); } + void push(const ImmPtr imm) { push(ImmWord(uintptr_t(imm.value))); } + void push(FloatRegister src) { + subl(Imm32(sizeof(double)), StackPointer); + vmovsd(src, Address(StackPointer, 0)); + } + + CodeOffset pushWithPatch(ImmWord word) { + masm.push_i32(int32_t(word.value)); + return CodeOffset(masm.currentOffset()); + } + + void pop(FloatRegister src) { + vmovsd(Address(StackPointer, 0), src); + addl(Imm32(sizeof(double)), StackPointer); + } + + CodeOffset movWithPatch(ImmWord word, Register dest) { + movl(Imm32(word.value), dest); + return CodeOffset(masm.currentOffset()); + } + CodeOffset movWithPatch(ImmPtr imm, Register dest) { + return movWithPatch(ImmWord(uintptr_t(imm.value)), dest); + } + + void movl(ImmGCPtr ptr, Register dest) { + masm.movl_i32r(uintptr_t(ptr.value), dest.encoding()); + writeDataRelocation(ptr); + } + void movl(ImmGCPtr ptr, const Operand& dest) { + switch (dest.kind()) { + case Operand::REG: + masm.movl_i32r(uintptr_t(ptr.value), dest.reg()); + writeDataRelocation(ptr); + break; + case Operand::MEM_REG_DISP: + masm.movl_i32m(uintptr_t(ptr.value), dest.disp(), dest.base()); + writeDataRelocation(ptr); + break; + case Operand::MEM_SCALE: + masm.movl_i32m(uintptr_t(ptr.value), dest.disp(), dest.base(), + dest.index(), dest.scale()); + writeDataRelocation(ptr); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void movl(ImmWord imm, Register dest) { + masm.movl_i32r(imm.value, dest.encoding()); + } + void movl(ImmPtr imm, Register dest) { + movl(ImmWord(uintptr_t(imm.value)), dest); + } + void mov(ImmWord imm, Register dest) { + // Use xor for setting registers to zero, as it is specially optimized + // for this purpose on modern hardware. Note that it does clobber FLAGS + // though. + if (imm.value == 0) { + xorl(dest, dest); + } else { + movl(imm, dest); + } + } + void mov(ImmPtr imm, Register dest) { + mov(ImmWord(uintptr_t(imm.value)), dest); + } + void mov(wasm::SymbolicAddress imm, Register dest) { + masm.movl_i32r(-1, dest.encoding()); + append(wasm::SymbolicAccess(CodeOffset(masm.currentOffset()), imm)); + } + void mov(const Operand& src, Register dest) { movl(src, dest); } + void mov(Register src, const Operand& dest) { movl(src, dest); } + void mov(Imm32 imm, const Operand& dest) { movl(imm, dest); } + void mov(CodeLabel* label, Register dest) { + // Put a placeholder value in the instruction stream. + masm.movl_i32r(0, dest.encoding()); + label->patchAt()->bind(masm.size()); + } + void mov(Register src, Register dest) { movl(src, dest); } + void xchg(Register src, Register dest) { xchgl(src, dest); } + void lea(const Operand& src, Register dest) { return leal(src, dest); } + void cmovz32(const Operand& src, Register dest) { return cmovzl(src, dest); } + void cmovzPtr(const Operand& src, Register dest) { return cmovzl(src, dest); } + + void fstp32(const Operand& src) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.fstp32_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void faddp() { masm.faddp(); } + + void cmpl(ImmWord rhs, Register lhs) { + masm.cmpl_ir(rhs.value, lhs.encoding()); + } + void cmpl(ImmPtr rhs, Register lhs) { + cmpl(ImmWord(uintptr_t(rhs.value)), lhs); + } + void cmpl(ImmGCPtr rhs, Register lhs) { + masm.cmpl_i32r(uintptr_t(rhs.value), lhs.encoding()); + writeDataRelocation(rhs); + } + void cmpl(Register rhs, Register lhs) { + masm.cmpl_rr(rhs.encoding(), lhs.encoding()); + } + void cmpl(ImmGCPtr rhs, const Operand& lhs) { + switch (lhs.kind()) { + case Operand::REG: + masm.cmpl_i32r(uintptr_t(rhs.value), lhs.reg()); + writeDataRelocation(rhs); + break; + case Operand::MEM_REG_DISP: + masm.cmpl_i32m(uintptr_t(rhs.value), lhs.disp(), lhs.base()); + writeDataRelocation(rhs); + break; + case Operand::MEM_ADDRESS32: + masm.cmpl_i32m(uintptr_t(rhs.value), lhs.address()); + writeDataRelocation(rhs); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpl(Register rhs, wasm::SymbolicAddress lhs) { + masm.cmpl_rm_disp32(rhs.encoding(), (void*)-1); + append(wasm::SymbolicAccess(CodeOffset(masm.currentOffset()), lhs)); + } + void cmpl(Imm32 rhs, wasm::SymbolicAddress lhs) { + JmpSrc src = masm.cmpl_im_disp32(rhs.value, (void*)-1); + append(wasm::SymbolicAccess(CodeOffset(src.offset()), lhs)); + } + + void adcl(Imm32 imm, Register dest) { + masm.adcl_ir(imm.value, dest.encoding()); + } + void adcl(Register src, Register dest) { + masm.adcl_rr(src.encoding(), dest.encoding()); + } + void adcl(Operand src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.adcl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.adcl_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void sbbl(Imm32 imm, Register dest) { + masm.sbbl_ir(imm.value, dest.encoding()); + } + void sbbl(Register src, Register dest) { + masm.sbbl_rr(src.encoding(), dest.encoding()); + } + void sbbl(Operand src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.sbbl_mr(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.sbbl_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void mull(Register multiplier) { masm.mull_r(multiplier.encoding()); } + + void shldl(const Imm32 imm, Register src, Register dest) { + masm.shldl_irr(imm.value, src.encoding(), dest.encoding()); + } + void shrdl(const Imm32 imm, Register src, Register dest) { + masm.shrdl_irr(imm.value, src.encoding(), dest.encoding()); + } + + void vhaddpd(FloatRegister rhs, FloatRegister lhsDest) { + MOZ_ASSERT(HasSSE3()); + MOZ_ASSERT(rhs.size() == 16); + MOZ_ASSERT(lhsDest.size() == 16); + masm.vhaddpd_rr(rhs.encoding(), lhsDest.encoding(), lhsDest.encoding()); + } + + void fild(const Operand& src) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.fild_m(src.disp(), src.base()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + + void jmp(ImmPtr target, RelocationKind reloc = RelocationKind::HARDCODED) { + JmpSrc src = masm.jmp(); + addPendingJump(src, target, reloc); + } + void j(Condition cond, ImmPtr target, + RelocationKind reloc = RelocationKind::HARDCODED) { + JmpSrc src = masm.jCC(static_cast<X86Encoding::Condition>(cond)); + addPendingJump(src, target, reloc); + } + + void jmp(JitCode* target) { + jmp(ImmPtr(target->raw()), RelocationKind::JITCODE); + } + void j(Condition cond, JitCode* target) { + j(cond, ImmPtr(target->raw()), RelocationKind::JITCODE); + } + void call(JitCode* target) { + JmpSrc src = masm.call(); + addPendingJump(src, ImmPtr(target->raw()), RelocationKind::JITCODE); + } + void call(ImmWord target) { call(ImmPtr((void*)target.value)); } + void call(ImmPtr target) { + JmpSrc src = masm.call(); + addPendingJump(src, target, RelocationKind::HARDCODED); + } + + // Emit a CALL or CMP (nop) instruction. ToggleCall can be used to patch + // this instruction. + CodeOffset toggledCall(JitCode* target, bool enabled) { + CodeOffset offset(size()); + JmpSrc src = enabled ? masm.call() : masm.cmp_eax(); + addPendingJump(src, ImmPtr(target->raw()), RelocationKind::JITCODE); + MOZ_ASSERT_IF(!oom(), size() - offset.offset() == ToggledCallSize(nullptr)); + return offset; + } + + static size_t ToggledCallSize(uint8_t* code) { + // Size of a call instruction. + return 5; + } + + // Re-routes pending jumps to an external target, flushing the label in the + // process. + void retarget(Label* label, ImmPtr target, RelocationKind reloc) { + if (label->used()) { + bool more; + X86Encoding::JmpSrc jmp(label->offset()); + do { + X86Encoding::JmpSrc next; + more = masm.nextJump(jmp, &next); + addPendingJump(jmp, target, reloc); + jmp = next; + } while (more); + } + label->reset(); + } + + // Move a 32-bit immediate into a register where the immediate can be + // patched. + CodeOffset movlWithPatch(Imm32 imm, Register dest) { + masm.movl_i32r(imm.value, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + + // Load from *(base + disp32) where disp32 can be patched. + CodeOffset movsblWithPatch(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movsbl_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.movsbl_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset movzblWithPatch(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movzbl_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.movzbl_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset movswlWithPatch(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movswl_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.movswl_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset movzwlWithPatch(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movzwl_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.movzwl_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset movlWithPatch(const Operand& src, Register dest) { + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.movl_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.movl_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovssWithPatch(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovss_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovss_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + void vmovss(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovss_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovss_mr(src.address(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovss_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + CodeOffset vmovdWithPatch(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovd_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovd_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovqWithPatch(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovq_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovq_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovsdWithPatch(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovsd_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovsd_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + void vmovsd(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovsd_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovsd_mr(src.address(), dest.encoding()); + break; + case Operand::MEM_SCALE: + masm.vmovsd_mr(src.disp(), src.base(), src.index(), src.scale(), + dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + CodeOffset vmovupsWithPatch(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovups_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovups_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovdquWithPatch(const Operand& src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovdqu_mr_disp32(src.disp(), src.base(), dest.encoding()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovdqu_mr(src.address(), dest.encoding()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + + // Store to *(base + disp32) where disp32 can be patched. + CodeOffset movbWithPatch(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movb_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.movb_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset movwWithPatch(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movw_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.movw_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset movlWithPatch(Register src, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.movl_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.movl_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset movlWithPatchLow(Register regLow, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: { + return movlWithPatch(regLow, LowWord(dest)); + } + case Operand::MEM_ADDRESS32: { + Operand low( + PatchedAbsoluteAddress(uint32_t(dest.address()) + INT64LOW_OFFSET)); + return movlWithPatch(regLow, low); + } + default: + MOZ_CRASH("unexpected operand kind"); + } + } + CodeOffset movlWithPatchHigh(Register regHigh, const Operand& dest) { + switch (dest.kind()) { + case Operand::MEM_REG_DISP: { + return movlWithPatch(regHigh, HighWord(dest)); + } + case Operand::MEM_ADDRESS32: { + Operand high(PatchedAbsoluteAddress(uint32_t(dest.address()) + + INT64HIGH_OFFSET)); + return movlWithPatch(regHigh, high); + } + default: + MOZ_CRASH("unexpected operand kind"); + } + } + CodeOffset vmovdWithPatch(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovd_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovd_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovqWithPatch(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovq_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovq_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovssWithPatch(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovss_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovss_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + void vmovss(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovss_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovss_rm(src.encoding(), dest.address()); + break; + case Operand::MEM_SCALE: + masm.vmovss_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + CodeOffset vmovsdWithPatch(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovsd_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovsd_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + void vmovsd(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovsd_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovsd_rm(src.encoding(), dest.address()); + break; + case Operand::MEM_SCALE: + masm.vmovsd_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), + dest.scale()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + CodeOffset vmovupsWithPatch(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovups_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovups_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovdquWithPatch(FloatRegister src, const Operand& dest) { + MOZ_ASSERT(HasSSE2()); + switch (dest.kind()) { + case Operand::MEM_REG_DISP: + masm.vmovdqu_rm_disp32(src.encoding(), dest.disp(), dest.base()); + break; + case Operand::MEM_ADDRESS32: + masm.vmovdqu_rm(src.encoding(), dest.address()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + return CodeOffset(masm.currentOffset()); + } + + // Load from *(addr + index*scale) where addr can be patched. + CodeOffset movlWithPatch(PatchedAbsoluteAddress addr, Register index, + Scale scale, Register dest) { + masm.movl_mr(addr.addr, index.encoding(), scale, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + + // Load from *src where src can be patched. + CodeOffset movsblWithPatch(PatchedAbsoluteAddress src, Register dest) { + masm.movsbl_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset movzblWithPatch(PatchedAbsoluteAddress src, Register dest) { + masm.movzbl_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset movswlWithPatch(PatchedAbsoluteAddress src, Register dest) { + masm.movswl_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset movzwlWithPatch(PatchedAbsoluteAddress src, Register dest) { + masm.movzwl_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset movlWithPatch(PatchedAbsoluteAddress src, Register dest) { + masm.movl_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovssWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovss_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovd_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovqWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovq_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovsdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovsd_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovdqaWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovdqa_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovdquWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovdqu_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovapsWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovaps_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovupsWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovups_mr(src.addr, dest.encoding()); + return CodeOffset(masm.currentOffset()); + } + + // Store to *dest where dest can be patched. + CodeOffset movbWithPatch(Register src, PatchedAbsoluteAddress dest) { + masm.movb_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset movwWithPatch(Register src, PatchedAbsoluteAddress dest) { + masm.movw_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset movlWithPatch(Register src, PatchedAbsoluteAddress dest) { + masm.movl_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovssWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovss_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovdWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovd_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovqWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovq_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovsdWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovsd_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovdqaWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovdqa_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovapsWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovaps_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovdquWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovdqu_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } + CodeOffset vmovupsWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) { + MOZ_ASSERT(HasSSE2()); + masm.vmovups_rm(src.encoding(), dest.addr); + return CodeOffset(masm.currentOffset()); + } +}; + +// Get a register in which we plan to put a quantity that will be used as an +// integer argument. This differs from GetIntArgReg in that if we have no more +// actual argument registers to use we will fall back on using whatever +// CallTempReg* don't overlap the argument registers, and only fail once those +// run out too. +static inline bool GetTempRegForIntArg(uint32_t usedIntArgs, + uint32_t usedFloatArgs, Register* out) { + if (usedIntArgs >= NumCallTempNonArgRegs) { + return false; + } + *out = CallTempNonArgRegs[usedIntArgs]; + return true; +} + +} // namespace jit +} // namespace js + +#endif /* jit_x86_Assembler_x86_h */ diff --git a/js/src/jit/x86/BaseAssembler-x86.h b/js/src/jit/x86/BaseAssembler-x86.h new file mode 100644 index 0000000000..a5a5f67bf2 --- /dev/null +++ b/js/src/jit/x86/BaseAssembler-x86.h @@ -0,0 +1,190 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_BaseAssembler_x86_h +#define jit_x86_BaseAssembler_x86_h + +#include "jit/x86-shared/BaseAssembler-x86-shared.h" + +namespace js { +namespace jit { + +namespace X86Encoding { + +class BaseAssemblerX86 : public BaseAssembler { + public: + // Arithmetic operations: + + void adcl_ir(int32_t imm, RegisterID dst) { + spew("adcl $%d, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_ADC); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADC); + m_formatter.immediate32(imm); + } + } + + void adcl_im(int32_t imm, const void* addr) { + spew("adcl %d, %p", imm, addr); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_ADC); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_ADC); + m_formatter.immediate32(imm); + } + } + + void adcl_rr(RegisterID src, RegisterID dst) { + spew("adcl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_ADC_GvEv, src, dst); + } + + void adcl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("adcl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_ADC_GvEv, offset, base, dst); + } + + void adcl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("adcl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.oneByteOp(OP_ADC_GvEv, offset, base, index, scale, dst); + } + + void sbbl_ir(int32_t imm, RegisterID dst) { + spew("sbbl $%d, %s", imm, GPReg32Name(dst)); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SBB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SBB); + m_formatter.immediate32(imm); + } + } + + void sbbl_rr(RegisterID src, RegisterID dst) { + spew("sbbl %s, %s", GPReg32Name(src), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_SBB_GvEv, src, dst); + } + + void sbbl_mr(int32_t offset, RegisterID base, RegisterID dst) { + spew("sbbl " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst)); + m_formatter.oneByteOp(OP_SBB_GvEv, offset, base, dst); + } + + void sbbl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, + RegisterID dst) { + spew("sbbl " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), + GPReg32Name(dst)); + m_formatter.oneByteOp(OP_SBB_GvEv, offset, base, index, scale, dst); + } + + using BaseAssembler::andl_im; + void andl_im(int32_t imm, const void* addr) { + spew("andl $0x%x, %p", imm, addr); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_AND); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_AND); + m_formatter.immediate32(imm); + } + } + + using BaseAssembler::orl_im; + void orl_im(int32_t imm, const void* addr) { + spew("orl $0x%x, %p", imm, addr); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_OR); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_OR); + m_formatter.immediate32(imm); + } + } + + using BaseAssembler::subl_im; + void subl_im(int32_t imm, const void* addr) { + spew("subl $%d, %p", imm, addr); + if (CAN_SIGN_EXTEND_8_32(imm)) { + m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_SUB); + m_formatter.immediate8s(imm); + } else { + m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_SUB); + m_formatter.immediate32(imm); + } + } + + void shldl_irr(int32_t imm, RegisterID src, RegisterID dst) { + MOZ_ASSERT(imm < 32); + spew("shldl $%d, %s, %s", imm, GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp8(OP2_SHLD, dst, src); + m_formatter.immediate8u(imm); + } + + void shrdl_irr(int32_t imm, RegisterID src, RegisterID dst) { + MOZ_ASSERT(imm < 32); + spew("shrdl $%d, %s, %s", imm, GPReg32Name(src), GPReg32Name(dst)); + m_formatter.twoByteOp8(OP2_SHRD, dst, src); + m_formatter.immediate8u(imm); + } + + // SSE operations: + + using BaseAssembler::vcvtsi2sd_mr; + void vcvtsi2sd_mr(const void* address, XMMRegisterID src0, + XMMRegisterID dst) { + twoByteOpSimd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, address, src0, dst); + } + + using BaseAssembler::vmovaps_mr; + void vmovaps_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, address, invalid_xmm, + dst); + } + + using BaseAssembler::vmovdqa_mr; + void vmovdqa_mr(const void* address, XMMRegisterID dst) { + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, address, invalid_xmm, + dst); + } + + void vhaddpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vhaddpd", VEX_PD, OP2_HADDPD, src1, src0, dst); + } + + void vsubpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { + twoByteOpSimd("vsubpd", VEX_PD, OP2_SUBPS_VpsWps, src1, src0, dst); + } + + void fild_m(int32_t offset, RegisterID base) { + m_formatter.oneByteOp(OP_FILD, offset, base, FILD_OP_64); + } + + // Misc instructions: + + void pusha() { + spew("pusha"); + m_formatter.oneByteOp(OP_PUSHA); + } + + void popa() { + spew("popa"); + m_formatter.oneByteOp(OP_POPA); + } +}; + +typedef BaseAssemblerX86 BaseAssemblerSpecific; + +} // namespace X86Encoding + +} // namespace jit +} // namespace js + +#endif /* jit_x86_BaseAssembler_x86_h */ diff --git a/js/src/jit/x86/CodeGenerator-x86.cpp b/js/src/jit/x86/CodeGenerator-x86.cpp new file mode 100644 index 0000000000..ce0fb01d9e --- /dev/null +++ b/js/src/jit/x86/CodeGenerator-x86.cpp @@ -0,0 +1,1509 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86/CodeGenerator-x86.h" + +#include "mozilla/Casting.h" +#include "mozilla/DebugOnly.h" + +#include <iterator> + +#include "jsnum.h" + +#include "jit/CodeGenerator.h" +#include "jit/MIR.h" +#include "jit/MIRGraph.h" +#include "js/Conversions.h" +#include "vm/Shape.h" +#include "wasm/WasmBuiltins.h" +#include "wasm/WasmCodegenTypes.h" +#include "wasm/WasmInstanceData.h" + +#include "jit/MacroAssembler-inl.h" +#include "jit/shared/CodeGenerator-shared-inl.h" +#include "vm/JSScript-inl.h" + +using namespace js; +using namespace js::jit; + +using JS::GenericNaN; +using mozilla::BitwiseCast; +using mozilla::DebugOnly; +using mozilla::FloatingPoint; + +CodeGeneratorX86::CodeGeneratorX86(MIRGenerator* gen, LIRGraph* graph, + MacroAssembler* masm) + : CodeGeneratorX86Shared(gen, graph, masm) {} + +ValueOperand CodeGeneratorX86::ToValue(LInstruction* ins, size_t pos) { + Register typeReg = ToRegister(ins->getOperand(pos + TYPE_INDEX)); + Register payloadReg = ToRegister(ins->getOperand(pos + PAYLOAD_INDEX)); + return ValueOperand(typeReg, payloadReg); +} + +ValueOperand CodeGeneratorX86::ToTempValue(LInstruction* ins, size_t pos) { + Register typeReg = ToRegister(ins->getTemp(pos + TYPE_INDEX)); + Register payloadReg = ToRegister(ins->getTemp(pos + PAYLOAD_INDEX)); + return ValueOperand(typeReg, payloadReg); +} + +void CodeGenerator::visitValue(LValue* value) { + const ValueOperand out = ToOutValue(value); + masm.moveValue(value->value(), out); +} + +void CodeGenerator::visitBox(LBox* box) { + const LDefinition* type = box->getDef(TYPE_INDEX); + + DebugOnly<const LAllocation*> a = box->getOperand(0); + MOZ_ASSERT(!a->isConstant()); + + // On x86, the input operand and the output payload have the same + // virtual register. All that needs to be written is the type tag for + // the type definition. + masm.mov(ImmWord(MIRTypeToTag(box->type())), ToRegister(type)); +} + +void CodeGenerator::visitBoxFloatingPoint(LBoxFloatingPoint* box) { + const AnyRegister in = ToAnyRegister(box->getOperand(0)); + const ValueOperand out = ToOutValue(box); + + masm.moveValue(TypedOrValueRegister(box->type(), in), out); + + if (JitOptions.spectreValueMasking) { + Register scratch = ToRegister(box->spectreTemp()); + masm.move32(Imm32(JSVAL_TAG_CLEAR), scratch); + masm.cmp32Move32(Assembler::Below, scratch, out.typeReg(), scratch, + out.typeReg()); + } +} + +void CodeGenerator::visitUnbox(LUnbox* unbox) { + // Note that for unbox, the type and payload indexes are switched on the + // inputs. + Operand type = ToOperand(unbox->type()); + Operand payload = ToOperand(unbox->payload()); + Register output = ToRegister(unbox->output()); + MUnbox* mir = unbox->mir(); + + JSValueTag tag = MIRTypeToTag(mir->type()); + if (mir->fallible()) { + masm.cmp32(type, Imm32(tag)); + bailoutIf(Assembler::NotEqual, unbox->snapshot()); + } else { +#ifdef DEBUG + Label ok; + masm.branch32(Assembler::Equal, type, Imm32(tag), &ok); + masm.assumeUnreachable("Infallible unbox type mismatch"); + masm.bind(&ok); +#endif + } + + // Note: If spectreValueMasking is disabled, then this instruction will + // default to a no-op as long as the lowering allocate the same register for + // the output and the payload. + masm.unboxNonDouble(type, payload, output, ValueTypeFromMIRType(mir->type())); +} + +void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) { + Register elements = ToRegister(lir->elements()); + Register temp = ToRegister(lir->temp()); + Register64 temp64 = ToRegister64(lir->temp64()); + Register out = ToRegister(lir->output()); + + MOZ_ASSERT(out == ecx); + MOZ_ASSERT(temp == ebx); + MOZ_ASSERT(temp64 == Register64(edx, eax)); + + const MLoadUnboxedScalar* mir = lir->mir(); + + Scalar::Type storageType = mir->storageType(); + + if (lir->index()->isConstant()) { + Address source = + ToAddress(elements, lir->index(), storageType, mir->offsetAdjustment()); + masm.atomicLoad64(Synchronization::Load(), source, Register64(ecx, ebx), + Register64(edx, eax)); + } else { + BaseIndex source(elements, ToRegister(lir->index()), + ScaleFromScalarType(storageType), mir->offsetAdjustment()); + masm.atomicLoad64(Synchronization::Load(), source, Register64(ecx, ebx), + Register64(edx, eax)); + } + + emitCreateBigInt(lir, storageType, temp64, out, temp); +} + +void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) { + Register elements = ToRegister(lir->elements()); + Register value = ToRegister(lir->value()); + Register64 temp1 = ToRegister64(lir->temp1()); + Register64 temp2 = Register64(value, ToRegister(lir->tempLow())); + + MOZ_ASSERT(temp1 == Register64(ecx, ebx)); + MOZ_ASSERT(temp2 == Register64(edx, eax)); + + Scalar::Type writeType = lir->mir()->writeType(); + + masm.loadBigInt64(value, temp1); + + masm.push(value); + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), writeType); + masm.atomicStore64(Synchronization::Store(), dest, temp1, temp2); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(writeType)); + masm.atomicStore64(Synchronization::Store(), dest, temp1, temp2); + } + masm.pop(value); +} + +void CodeGenerator::visitCompareExchangeTypedArrayElement64( + LCompareExchangeTypedArrayElement64* lir) { + Register elements = ToRegister(lir->elements()); + Register oldval = ToRegister(lir->oldval()); + DebugOnly<Register> newval = ToRegister(lir->newval()); + DebugOnly<Register> temp = ToRegister(lir->tempLow()); + Register out = ToRegister(lir->output()); + + MOZ_ASSERT(elements == esi); + MOZ_ASSERT(oldval == eax); + MOZ_ASSERT(newval.inspect() == edx); + MOZ_ASSERT(temp.inspect() == ebx); + MOZ_ASSERT(out == ecx); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + DebugOnly<uint32_t> framePushed = masm.framePushed(); + + // Save eax and edx before they're clobbered below. + masm.push(eax); + masm.push(edx); + + auto restoreSavedRegisters = [&]() { + masm.pop(edx); + masm.pop(eax); + }; + + Register64 expected = Register64(edx, eax); + Register64 replacement = Register64(ecx, ebx); + + // Load |oldval| and |newval| into |expected| resp. |replacement|. + { + // Use `esi` as a temp register. + Register bigInt = esi; + masm.push(bigInt); + + masm.mov(oldval, bigInt); + masm.loadBigInt64(bigInt, expected); + + // |newval| is stored in `edx`, which is already pushed onto the stack. + masm.loadPtr(Address(masm.getStackPointer(), sizeof(uintptr_t)), bigInt); + masm.loadBigInt64(bigInt, replacement); + + masm.pop(bigInt); + } + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.compareExchange64(Synchronization::Full(), dest, expected, replacement, + expected); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.compareExchange64(Synchronization::Full(), dest, expected, replacement, + expected); + } + + // Move the result from `edx:eax` to `ecx:ebx`. + masm.move64(expected, replacement); + + // OutOfLineCallVM tracks the currently pushed stack entries as reported by + // |masm.framePushed()|. We mustn't have any additional entries on the stack + // which weren't previously recorded by the safepoint, otherwise the GC + // complains when tracing the Ion frames, because the stack frames don't + // have their expected layout. + MOZ_ASSERT(framePushed == masm.framePushed()); + + OutOfLineCode* ool = createBigIntOutOfLine(lir, arrayType, replacement, out); + + // Use `edx:eax`, which are both already on the stack, as temp registers. + Register bigInt = eax; + Register temp2 = edx; + + Label fail; + masm.newGCBigInt(bigInt, temp2, initialBigIntHeap(), &fail); + masm.initializeBigInt64(arrayType, bigInt, replacement); + masm.mov(bigInt, out); + restoreSavedRegisters(); + masm.jump(ool->rejoin()); + + // Couldn't create the BigInt. Restore `edx:eax` and call into the VM. + masm.bind(&fail); + restoreSavedRegisters(); + masm.jump(ool->entry()); + + // At this point `edx:eax` must have been restored to their original values. + masm.bind(ool->rejoin()); +} + +void CodeGenerator::visitAtomicExchangeTypedArrayElement64( + LAtomicExchangeTypedArrayElement64* lir) { + Register elements = ToRegister(lir->elements()); + Register value = ToRegister(lir->value()); + Register64 temp1 = ToRegister64(lir->temp1()); + Register out = ToRegister(lir->output()); + Register64 temp2 = Register64(value, out); + + MOZ_ASSERT(value == edx); + MOZ_ASSERT(temp1 == Register64(ecx, ebx)); + MOZ_ASSERT(temp2 == Register64(edx, eax)); + MOZ_ASSERT(out == eax); + + Scalar::Type arrayType = lir->mir()->arrayType(); + + DebugOnly<uint32_t> framePushed = masm.framePushed(); + + // Save edx before it's clobbered below. + masm.push(edx); + + auto restoreSavedRegisters = [&]() { masm.pop(edx); }; + + masm.loadBigInt64(value, temp1); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.atomicExchange64(Synchronization::Full(), dest, temp1, temp2); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicExchange64(Synchronization::Full(), dest, temp1, temp2); + } + + // Move the result from `edx:eax` to `ecx:ebx`. + masm.move64(temp2, temp1); + + // OutOfLineCallVM tracks the currently pushed stack entries as reported by + // |masm.framePushed()|. We mustn't have any additional entries on the stack + // which weren't previously recorded by the safepoint, otherwise the GC + // complains when tracing the Ion frames, because the stack frames don't + // have their expected layout. + MOZ_ASSERT(framePushed == masm.framePushed()); + + OutOfLineCode* ool = createBigIntOutOfLine(lir, arrayType, temp1, out); + + // Use `edx`, which is already on the stack, as a temp register. + Register temp = edx; + + Label fail; + masm.newGCBigInt(out, temp, initialBigIntHeap(), &fail); + masm.initializeBigInt64(arrayType, out, temp1); + restoreSavedRegisters(); + masm.jump(ool->rejoin()); + + // Couldn't create the BigInt. Restore `edx` and call into the VM. + masm.bind(&fail); + restoreSavedRegisters(); + masm.jump(ool->entry()); + + // At this point `edx` must have been restored to its original value. + masm.bind(ool->rejoin()); +} + +void CodeGenerator::visitAtomicTypedArrayElementBinop64( + LAtomicTypedArrayElementBinop64* lir) { + MOZ_ASSERT(!lir->mir()->isForEffect()); + + Register elements = ToRegister(lir->elements()); + Register value = ToRegister(lir->value()); + Register64 temp1 = ToRegister64(lir->temp1()); + Register out = ToRegister(lir->output()); + Register64 temp2 = Register64(value, out); + + MOZ_ASSERT(value == edx); + MOZ_ASSERT(temp1 == Register64(ecx, ebx)); + MOZ_ASSERT(temp2 == Register64(edx, eax)); + MOZ_ASSERT(out == eax); + + Scalar::Type arrayType = lir->mir()->arrayType(); + AtomicOp atomicOp = lir->mir()->operation(); + + DebugOnly<uint32_t> framePushed = masm.framePushed(); + + // Save edx before it's clobbered below. + masm.push(edx); + + auto restoreSavedRegisters = [&]() { masm.pop(edx); }; + + masm.loadBigInt64(value, temp1); + + masm.Push(temp1); + + Address addr(masm.getStackPointer(), 0); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.atomicFetchOp64(Synchronization::Full(), atomicOp, addr, dest, temp1, + temp2); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicFetchOp64(Synchronization::Full(), atomicOp, addr, dest, temp1, + temp2); + } + + masm.freeStack(sizeof(uint64_t)); + + // Move the result from `edx:eax` to `ecx:ebx`. + masm.move64(temp2, temp1); + + // OutOfLineCallVM tracks the currently pushed stack entries as reported by + // |masm.framePushed()|. We mustn't have any additional entries on the stack + // which weren't previously recorded by the safepoint, otherwise the GC + // complains when tracing the Ion frames, because the stack frames don't + // have their expected layout. + MOZ_ASSERT(framePushed == masm.framePushed()); + + OutOfLineCode* ool = createBigIntOutOfLine(lir, arrayType, temp1, out); + + // Use `edx`, which is already on the stack, as a temp register. + Register temp = edx; + + Label fail; + masm.newGCBigInt(out, temp, initialBigIntHeap(), &fail); + masm.initializeBigInt64(arrayType, out, temp1); + restoreSavedRegisters(); + masm.jump(ool->rejoin()); + + // Couldn't create the BigInt. Restore `edx` and call into the VM. + masm.bind(&fail); + restoreSavedRegisters(); + masm.jump(ool->entry()); + + // At this point `edx` must have been restored to its original value. + masm.bind(ool->rejoin()); +} + +void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect64( + LAtomicTypedArrayElementBinopForEffect64* lir) { + MOZ_ASSERT(lir->mir()->isForEffect()); + + Register elements = ToRegister(lir->elements()); + Register value = ToRegister(lir->value()); + Register64 temp1 = ToRegister64(lir->temp1()); + Register tempLow = ToRegister(lir->tempLow()); + Register64 temp2 = Register64(value, tempLow); + + MOZ_ASSERT(value == edx); + MOZ_ASSERT(temp1 == Register64(ecx, ebx)); + MOZ_ASSERT(temp2 == Register64(edx, eax)); + MOZ_ASSERT(tempLow == eax); + + Scalar::Type arrayType = lir->mir()->arrayType(); + AtomicOp atomicOp = lir->mir()->operation(); + + // Save edx before it's clobbered below. + masm.push(edx); + + masm.loadBigInt64(value, temp1); + + masm.Push(temp1); + + Address addr(masm.getStackPointer(), 0); + + if (lir->index()->isConstant()) { + Address dest = ToAddress(elements, lir->index(), arrayType); + masm.atomicFetchOp64(Synchronization::Full(), atomicOp, addr, dest, temp1, + temp2); + } else { + BaseIndex dest(elements, ToRegister(lir->index()), + ScaleFromScalarType(arrayType)); + masm.atomicFetchOp64(Synchronization::Full(), atomicOp, addr, dest, temp1, + temp2); + } + + masm.freeStack(sizeof(uint64_t)); + + masm.pop(edx); +} + +void CodeGenerator::visitWasmUint32ToDouble(LWasmUint32ToDouble* lir) { + Register input = ToRegister(lir->input()); + Register temp = ToRegister(lir->temp()); + + if (input != temp) { + masm.mov(input, temp); + } + + // Beware: convertUInt32ToDouble clobbers input. + masm.convertUInt32ToDouble(temp, ToFloatRegister(lir->output())); +} + +void CodeGenerator::visitWasmUint32ToFloat32(LWasmUint32ToFloat32* lir) { + Register input = ToRegister(lir->input()); + Register temp = ToRegister(lir->temp()); + FloatRegister output = ToFloatRegister(lir->output()); + + if (input != temp) { + masm.mov(input, temp); + } + + // Beware: convertUInt32ToFloat32 clobbers input. + masm.convertUInt32ToFloat32(temp, output); +} + +void CodeGenerator::visitWasmHeapBase(LWasmHeapBase* ins) { + masm.loadPtr(Address(ToRegister(ins->instance()), + wasm::Instance::offsetOfMemoryBase()), + ToRegister(ins->output())); +} + +template <typename T> +void CodeGeneratorX86::emitWasmLoad(T* ins) { + const MWasmLoad* mir = ins->mir(); + + uint32_t offset = mir->access().offset(); + MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); + + const LAllocation* ptr = ins->ptr(); + const LAllocation* memoryBase = ins->memoryBase(); + + // Lowering has set things up so that we can use a BaseIndex form if the + // pointer is constant and the offset is zero, or if the pointer is zero. + + Operand srcAddr = + ptr->isBogus() + ? Operand(ToRegister(memoryBase), + offset ? offset : mir->base()->toConstant()->toInt32()) + : Operand(ToRegister(memoryBase), ToRegister(ptr), TimesOne, offset); + + if (mir->type() == MIRType::Int64) { + MOZ_ASSERT_IF(mir->access().isAtomic(), + mir->access().type() != Scalar::Int64); + masm.wasmLoadI64(mir->access(), srcAddr, ToOutRegister64(ins)); + } else { + masm.wasmLoad(mir->access(), srcAddr, ToAnyRegister(ins->output())); + } +} + +void CodeGenerator::visitWasmLoad(LWasmLoad* ins) { emitWasmLoad(ins); } + +void CodeGenerator::visitWasmLoadI64(LWasmLoadI64* ins) { emitWasmLoad(ins); } + +template <typename T> +void CodeGeneratorX86::emitWasmStore(T* ins) { + const MWasmStore* mir = ins->mir(); + + uint32_t offset = mir->access().offset(); + MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); + + const LAllocation* ptr = ins->ptr(); + const LAllocation* memoryBase = ins->memoryBase(); + + // Lowering has set things up so that we can use a BaseIndex form if the + // pointer is constant and the offset is zero, or if the pointer is zero. + + Operand dstAddr = + ptr->isBogus() + ? Operand(ToRegister(memoryBase), + offset ? offset : mir->base()->toConstant()->toInt32()) + : Operand(ToRegister(memoryBase), ToRegister(ptr), TimesOne, offset); + + if (mir->access().type() == Scalar::Int64) { + Register64 value = + ToRegister64(ins->getInt64Operand(LWasmStoreI64::ValueIndex)); + masm.wasmStoreI64(mir->access(), value, dstAddr); + } else { + AnyRegister value = ToAnyRegister(ins->getOperand(LWasmStore::ValueIndex)); + masm.wasmStore(mir->access(), value, dstAddr); + } +} + +void CodeGenerator::visitWasmStore(LWasmStore* ins) { emitWasmStore(ins); } + +void CodeGenerator::visitWasmStoreI64(LWasmStoreI64* ins) { + emitWasmStore(ins); +} + +void CodeGenerator::visitWasmCompareExchangeHeap( + LWasmCompareExchangeHeap* ins) { + MWasmCompareExchangeHeap* mir = ins->mir(); + + Register ptrReg = ToRegister(ins->ptr()); + Register oldval = ToRegister(ins->oldValue()); + Register newval = ToRegister(ins->newValue()); + Register addrTemp = ToRegister(ins->addrTemp()); + Register memoryBase = ToRegister(ins->memoryBase()); + Register output = ToRegister(ins->output()); + + masm.leal(Operand(memoryBase, ptrReg, TimesOne, mir->access().offset()), + addrTemp); + + Address memAddr(addrTemp, 0); + masm.wasmCompareExchange(mir->access(), memAddr, oldval, newval, output); +} + +void CodeGenerator::visitWasmAtomicExchangeHeap(LWasmAtomicExchangeHeap* ins) { + MWasmAtomicExchangeHeap* mir = ins->mir(); + + Register ptrReg = ToRegister(ins->ptr()); + Register value = ToRegister(ins->value()); + Register addrTemp = ToRegister(ins->addrTemp()); + Register memoryBase = ToRegister(ins->memoryBase()); + Register output = ToRegister(ins->output()); + + masm.leal(Operand(memoryBase, ptrReg, TimesOne, mir->access().offset()), + addrTemp); + + Address memAddr(addrTemp, 0); + masm.wasmAtomicExchange(mir->access(), memAddr, value, output); +} + +void CodeGenerator::visitWasmAtomicBinopHeap(LWasmAtomicBinopHeap* ins) { + MWasmAtomicBinopHeap* mir = ins->mir(); + + Register ptrReg = ToRegister(ins->ptr()); + Register temp = + ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp()); + Register addrTemp = ToRegister(ins->addrTemp()); + Register out = ToRegister(ins->output()); + const LAllocation* value = ins->value(); + AtomicOp op = mir->operation(); + Register memoryBase = ToRegister(ins->memoryBase()); + + masm.leal(Operand(memoryBase, ptrReg, TimesOne, mir->access().offset()), + addrTemp); + + Address memAddr(addrTemp, 0); + if (value->isConstant()) { + masm.wasmAtomicFetchOp(mir->access(), op, Imm32(ToInt32(value)), memAddr, + temp, out); + } else { + masm.wasmAtomicFetchOp(mir->access(), op, ToRegister(value), memAddr, temp, + out); + } +} + +void CodeGenerator::visitWasmAtomicBinopHeapForEffect( + LWasmAtomicBinopHeapForEffect* ins) { + MWasmAtomicBinopHeap* mir = ins->mir(); + MOZ_ASSERT(!mir->hasUses()); + + Register ptrReg = ToRegister(ins->ptr()); + Register addrTemp = ToRegister(ins->addrTemp()); + const LAllocation* value = ins->value(); + AtomicOp op = mir->operation(); + Register memoryBase = ToRegister(ins->memoryBase()); + + masm.leal(Operand(memoryBase, ptrReg, TimesOne, mir->access().offset()), + addrTemp); + + Address memAddr(addrTemp, 0); + if (value->isConstant()) { + masm.wasmAtomicEffectOp(mir->access(), op, Imm32(ToInt32(value)), memAddr, + InvalidReg); + } else { + masm.wasmAtomicEffectOp(mir->access(), op, ToRegister(value), memAddr, + InvalidReg); + } +} + +void CodeGenerator::visitWasmAtomicLoadI64(LWasmAtomicLoadI64* ins) { + uint32_t offset = ins->mir()->access().offset(); + MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); + + const LAllocation* memoryBase = ins->memoryBase(); + const LAllocation* ptr = ins->ptr(); + BaseIndex srcAddr(ToRegister(memoryBase), ToRegister(ptr), TimesOne, offset); + + MOZ_ASSERT(ToRegister(ins->t1()) == ecx); + MOZ_ASSERT(ToRegister(ins->t2()) == ebx); + MOZ_ASSERT(ToOutRegister64(ins).high == edx); + MOZ_ASSERT(ToOutRegister64(ins).low == eax); + + masm.wasmAtomicLoad64(ins->mir()->access(), srcAddr, Register64(ecx, ebx), + Register64(edx, eax)); +} + +void CodeGenerator::visitWasmCompareExchangeI64(LWasmCompareExchangeI64* ins) { + uint32_t offset = ins->mir()->access().offset(); + MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); + + const LAllocation* memoryBase = ins->memoryBase(); + const LAllocation* ptr = ins->ptr(); + Operand srcAddr(ToRegister(memoryBase), ToRegister(ptr), TimesOne, offset); + + MOZ_ASSERT(ToRegister64(ins->expected()).low == eax); + MOZ_ASSERT(ToRegister64(ins->expected()).high == edx); + MOZ_ASSERT(ToRegister64(ins->replacement()).low == ebx); + MOZ_ASSERT(ToRegister64(ins->replacement()).high == ecx); + MOZ_ASSERT(ToOutRegister64(ins).low == eax); + MOZ_ASSERT(ToOutRegister64(ins).high == edx); + + masm.append(ins->mir()->access(), masm.size()); + masm.lock_cmpxchg8b(edx, eax, ecx, ebx, srcAddr); +} + +template <typename T> +void CodeGeneratorX86::emitWasmStoreOrExchangeAtomicI64( + T* ins, const wasm::MemoryAccessDesc& access) { + MOZ_ASSERT(access.offset() < masm.wasmMaxOffsetGuardLimit()); + + const LAllocation* memoryBase = ins->memoryBase(); + const LAllocation* ptr = ins->ptr(); + Operand srcAddr(ToRegister(memoryBase), ToRegister(ptr), TimesOne, + access.offset()); + + DebugOnly<const LInt64Allocation> value = ins->value(); + MOZ_ASSERT(ToRegister64(value).low == ebx); + MOZ_ASSERT(ToRegister64(value).high == ecx); + + // eax and edx will be overwritten every time through the loop but + // memoryBase and ptr must remain live for a possible second iteration. + + MOZ_ASSERT(ToRegister(memoryBase) != edx && ToRegister(memoryBase) != eax); + MOZ_ASSERT(ToRegister(ptr) != edx && ToRegister(ptr) != eax); + + Label again; + masm.bind(&again); + masm.append(access, masm.size()); + masm.lock_cmpxchg8b(edx, eax, ecx, ebx, srcAddr); + masm.j(Assembler::Condition::NonZero, &again); +} + +void CodeGenerator::visitWasmAtomicStoreI64(LWasmAtomicStoreI64* ins) { + MOZ_ASSERT(ToRegister(ins->t1()) == edx); + MOZ_ASSERT(ToRegister(ins->t2()) == eax); + + emitWasmStoreOrExchangeAtomicI64(ins, ins->mir()->access()); +} + +void CodeGenerator::visitWasmAtomicExchangeI64(LWasmAtomicExchangeI64* ins) { + MOZ_ASSERT(ToOutRegister64(ins).high == edx); + MOZ_ASSERT(ToOutRegister64(ins).low == eax); + + emitWasmStoreOrExchangeAtomicI64(ins, ins->access()); +} + +void CodeGenerator::visitWasmAtomicBinopI64(LWasmAtomicBinopI64* ins) { + uint32_t offset = ins->access().offset(); + MOZ_ASSERT(offset < masm.wasmMaxOffsetGuardLimit()); + + const LAllocation* memoryBase = ins->memoryBase(); + const LAllocation* ptr = ins->ptr(); + + BaseIndex srcAddr(ToRegister(memoryBase), ToRegister(ptr), TimesOne, offset); + + MOZ_ASSERT(ToRegister(memoryBase) == esi || ToRegister(memoryBase) == edi); + MOZ_ASSERT(ToRegister(ptr) == esi || ToRegister(ptr) == edi); + + Register64 value = ToRegister64(ins->value()); + + MOZ_ASSERT(value.low == ebx); + MOZ_ASSERT(value.high == ecx); + + Register64 output = ToOutRegister64(ins); + + MOZ_ASSERT(output.low == eax); + MOZ_ASSERT(output.high == edx); + + masm.Push(ecx); + masm.Push(ebx); + + Address valueAddr(esp, 0); + + // Here the `value` register acts as a temp, we'll restore it below. + masm.wasmAtomicFetchOp64(ins->access(), ins->operation(), valueAddr, srcAddr, + value, output); + + masm.Pop(ebx); + masm.Pop(ecx); +} + +namespace js { +namespace jit { + +class OutOfLineTruncate : public OutOfLineCodeBase<CodeGeneratorX86> { + LInstruction* ins_; + + public: + explicit OutOfLineTruncate(LInstruction* ins) : ins_(ins) { + MOZ_ASSERT(ins_->isTruncateDToInt32() || + ins_->isWasmBuiltinTruncateDToInt32()); + } + + void accept(CodeGeneratorX86* codegen) override { + codegen->visitOutOfLineTruncate(this); + } + + LAllocation* input() { return ins_->getOperand(0); } + LDefinition* output() { return ins_->getDef(0); } + LDefinition* tempFloat() { return ins_->getTemp(0); } + + wasm::BytecodeOffset bytecodeOffset() const { + if (ins_->isTruncateDToInt32()) { + return ins_->toTruncateDToInt32()->mir()->bytecodeOffset(); + } + + return ins_->toWasmBuiltinTruncateDToInt32()->mir()->bytecodeOffset(); + } +}; + +class OutOfLineTruncateFloat32 : public OutOfLineCodeBase<CodeGeneratorX86> { + LInstruction* ins_; + + public: + explicit OutOfLineTruncateFloat32(LInstruction* ins) : ins_(ins) { + MOZ_ASSERT(ins_->isTruncateFToInt32() || + ins_->isWasmBuiltinTruncateFToInt32()); + } + + void accept(CodeGeneratorX86* codegen) override { + codegen->visitOutOfLineTruncateFloat32(this); + } + + LAllocation* input() { return ins_->getOperand(0); } + LDefinition* output() { return ins_->getDef(0); } + LDefinition* tempFloat() { return ins_->getTemp(0); } + + wasm::BytecodeOffset bytecodeOffset() const { + if (ins_->isTruncateFToInt32()) { + return ins_->toTruncateDToInt32()->mir()->bytecodeOffset(); + } + + return ins_->toWasmBuiltinTruncateFToInt32()->mir()->bytecodeOffset(); + } +}; + +} // namespace jit +} // namespace js + +void CodeGenerator::visitTruncateDToInt32(LTruncateDToInt32* ins) { + FloatRegister input = ToFloatRegister(ins->input()); + Register output = ToRegister(ins->output()); + + OutOfLineTruncate* ool = new (alloc()) OutOfLineTruncate(ins); + addOutOfLineCode(ool, ins->mir()); + + masm.branchTruncateDoubleMaybeModUint32(input, output, ool->entry()); + masm.bind(ool->rejoin()); +} + +void CodeGenerator::visitWasmBuiltinTruncateDToInt32( + LWasmBuiltinTruncateDToInt32* lir) { + FloatRegister input = ToFloatRegister(lir->getOperand(0)); + Register output = ToRegister(lir->getDef(0)); + + OutOfLineTruncate* ool = new (alloc()) OutOfLineTruncate(lir); + addOutOfLineCode(ool, lir->mir()); + + masm.branchTruncateDoubleMaybeModUint32(input, output, ool->entry()); + masm.bind(ool->rejoin()); +} + +void CodeGenerator::visitTruncateFToInt32(LTruncateFToInt32* ins) { + FloatRegister input = ToFloatRegister(ins->input()); + Register output = ToRegister(ins->output()); + + OutOfLineTruncateFloat32* ool = new (alloc()) OutOfLineTruncateFloat32(ins); + addOutOfLineCode(ool, ins->mir()); + + masm.branchTruncateFloat32MaybeModUint32(input, output, ool->entry()); + masm.bind(ool->rejoin()); +} + +void CodeGenerator::visitWasmBuiltinTruncateFToInt32( + LWasmBuiltinTruncateFToInt32* lir) { + FloatRegister input = ToFloatRegister(lir->getOperand(0)); + Register output = ToRegister(lir->getDef(0)); + + OutOfLineTruncateFloat32* ool = new (alloc()) OutOfLineTruncateFloat32(lir); + addOutOfLineCode(ool, lir->mir()); + + masm.branchTruncateFloat32MaybeModUint32(input, output, ool->entry()); + masm.bind(ool->rejoin()); +} + +void CodeGeneratorX86::visitOutOfLineTruncate(OutOfLineTruncate* ool) { + FloatRegister input = ToFloatRegister(ool->input()); + Register output = ToRegister(ool->output()); + + Label fail; + + if (Assembler::HasSSE3()) { + Label failPopDouble; + // Push double. + masm.subl(Imm32(sizeof(double)), esp); + masm.storeDouble(input, Operand(esp, 0)); + + // Check exponent to avoid fp exceptions. + masm.branchDoubleNotInInt64Range(Address(esp, 0), output, &failPopDouble); + + // Load double, perform 64-bit truncation. + masm.truncateDoubleToInt64(Address(esp, 0), Address(esp, 0), output); + + // Load low word, pop double and jump back. + masm.load32(Address(esp, 0), output); + masm.addl(Imm32(sizeof(double)), esp); + masm.jump(ool->rejoin()); + + masm.bind(&failPopDouble); + masm.addl(Imm32(sizeof(double)), esp); + masm.jump(&fail); + } else { + FloatRegister temp = ToFloatRegister(ool->tempFloat()); + + // Try to convert doubles representing integers within 2^32 of a signed + // integer, by adding/subtracting 2^32 and then trying to convert to int32. + // This has to be an exact conversion, as otherwise the truncation works + // incorrectly on the modified value. + { + ScratchDoubleScope fpscratch(masm); + masm.zeroDouble(fpscratch); + masm.vucomisd(fpscratch, input); + masm.j(Assembler::Parity, &fail); + } + + { + Label positive; + masm.j(Assembler::Above, &positive); + + masm.loadConstantDouble(4294967296.0, temp); + Label skip; + masm.jmp(&skip); + + masm.bind(&positive); + masm.loadConstantDouble(-4294967296.0, temp); + masm.bind(&skip); + } + + masm.addDouble(input, temp); + masm.vcvttsd2si(temp, output); + ScratchDoubleScope fpscratch(masm); + masm.vcvtsi2sd(output, fpscratch, fpscratch); + + masm.vucomisd(fpscratch, temp); + masm.j(Assembler::Parity, &fail); + masm.j(Assembler::Equal, ool->rejoin()); + } + + masm.bind(&fail); + { + if (gen->compilingWasm()) { + masm.Push(InstanceReg); + } + int32_t framePushedAfterInstance = masm.framePushed(); + + saveVolatile(output); + + if (gen->compilingWasm()) { + masm.setupWasmABICall(); + masm.passABIArg(input, MoveOp::DOUBLE); + + int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance; + masm.callWithABI(ool->bytecodeOffset(), wasm::SymbolicAddress::ToInt32, + mozilla::Some(instanceOffset)); + } else { + using Fn = int32_t (*)(double); + masm.setupUnalignedABICall(output); + masm.passABIArg(input, MoveOp::DOUBLE); + masm.callWithABI<Fn, JS::ToInt32>(MoveOp::GENERAL, + CheckUnsafeCallWithABI::DontCheckOther); + } + masm.storeCallInt32Result(output); + + restoreVolatile(output); + + if (gen->compilingWasm()) { + masm.Pop(InstanceReg); + } + } + + masm.jump(ool->rejoin()); +} + +void CodeGeneratorX86::visitOutOfLineTruncateFloat32( + OutOfLineTruncateFloat32* ool) { + FloatRegister input = ToFloatRegister(ool->input()); + Register output = ToRegister(ool->output()); + + Label fail; + + if (Assembler::HasSSE3()) { + Label failPopFloat; + + // Push float32, but subtracts 64 bits so that the value popped by fisttp + // fits + masm.subl(Imm32(sizeof(uint64_t)), esp); + masm.storeFloat32(input, Operand(esp, 0)); + + // Check exponent to avoid fp exceptions. + masm.branchFloat32NotInInt64Range(Address(esp, 0), output, &failPopFloat); + + // Load float, perform 32-bit truncation. + masm.truncateFloat32ToInt64(Address(esp, 0), Address(esp, 0), output); + + // Load low word, pop 64bits and jump back. + masm.load32(Address(esp, 0), output); + masm.addl(Imm32(sizeof(uint64_t)), esp); + masm.jump(ool->rejoin()); + + masm.bind(&failPopFloat); + masm.addl(Imm32(sizeof(uint64_t)), esp); + masm.jump(&fail); + } else { + FloatRegister temp = ToFloatRegister(ool->tempFloat()); + + // Try to convert float32 representing integers within 2^32 of a signed + // integer, by adding/subtracting 2^32 and then trying to convert to int32. + // This has to be an exact conversion, as otherwise the truncation works + // incorrectly on the modified value. + { + ScratchFloat32Scope fpscratch(masm); + masm.zeroFloat32(fpscratch); + masm.vucomiss(fpscratch, input); + masm.j(Assembler::Parity, &fail); + } + + { + Label positive; + masm.j(Assembler::Above, &positive); + + masm.loadConstantFloat32(4294967296.f, temp); + Label skip; + masm.jmp(&skip); + + masm.bind(&positive); + masm.loadConstantFloat32(-4294967296.f, temp); + masm.bind(&skip); + } + + masm.addFloat32(input, temp); + masm.vcvttss2si(temp, output); + ScratchFloat32Scope fpscratch(masm); + masm.vcvtsi2ss(output, fpscratch, fpscratch); + + masm.vucomiss(fpscratch, temp); + masm.j(Assembler::Parity, &fail); + masm.j(Assembler::Equal, ool->rejoin()); + } + + masm.bind(&fail); + { + if (gen->compilingWasm()) { + masm.Push(InstanceReg); + } + int32_t framePushedAfterInstance = masm.framePushed(); + + saveVolatile(output); + + masm.Push(input); + + if (gen->compilingWasm()) { + masm.setupWasmABICall(); + } else { + masm.setupUnalignedABICall(output); + } + + masm.vcvtss2sd(input, input, input); + masm.passABIArg(input.asDouble(), MoveOp::DOUBLE); + + if (gen->compilingWasm()) { + int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance; + masm.callWithABI(ool->bytecodeOffset(), wasm::SymbolicAddress::ToInt32, + mozilla::Some(instanceOffset)); + } else { + using Fn = int32_t (*)(double); + masm.callWithABI<Fn, JS::ToInt32>(MoveOp::GENERAL, + CheckUnsafeCallWithABI::DontCheckOther); + } + + masm.storeCallInt32Result(output); + masm.Pop(input); + + restoreVolatile(output); + + if (gen->compilingWasm()) { + masm.Pop(InstanceReg); + } + } + + masm.jump(ool->rejoin()); +} + +void CodeGenerator::visitCompareI64(LCompareI64* lir) { + MCompare* mir = lir->mir(); + MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 || + mir->compareType() == MCompare::Compare_UInt64); + + const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs); + Register64 lhsRegs = ToRegister64(lhs); + Register output = ToRegister(lir->output()); + + bool isSigned = mir->compareType() == MCompare::Compare_Int64; + Assembler::Condition condition = JSOpToCondition(lir->jsop(), isSigned); + Label done; + + masm.move32(Imm32(1), output); + + if (IsConstant(rhs)) { + Imm64 imm = Imm64(ToInt64(rhs)); + masm.branch64(condition, lhsRegs, imm, &done); + } else { + Register64 rhsRegs = ToRegister64(rhs); + masm.branch64(condition, lhsRegs, rhsRegs, &done); + } + + masm.xorl(output, output); + masm.bind(&done); +} + +void CodeGenerator::visitCompareI64AndBranch(LCompareI64AndBranch* lir) { + MCompare* mir = lir->cmpMir(); + MOZ_ASSERT(mir->compareType() == MCompare::Compare_Int64 || + mir->compareType() == MCompare::Compare_UInt64); + + const LInt64Allocation lhs = lir->getInt64Operand(LCompareI64::Lhs); + const LInt64Allocation rhs = lir->getInt64Operand(LCompareI64::Rhs); + Register64 lhsRegs = ToRegister64(lhs); + + bool isSigned = mir->compareType() == MCompare::Compare_Int64; + Assembler::Condition condition = JSOpToCondition(lir->jsop(), isSigned); + + Label* trueLabel = getJumpLabelForBranch(lir->ifTrue()); + Label* falseLabel = getJumpLabelForBranch(lir->ifFalse()); + + if (isNextBlock(lir->ifFalse()->lir())) { + falseLabel = nullptr; + } else if (isNextBlock(lir->ifTrue()->lir())) { + condition = Assembler::InvertCondition(condition); + trueLabel = falseLabel; + falseLabel = nullptr; + } + + if (IsConstant(rhs)) { + Imm64 imm = Imm64(ToInt64(rhs)); + masm.branch64(condition, lhsRegs, imm, trueLabel, falseLabel); + } else { + Register64 rhsRegs = ToRegister64(rhs); + masm.branch64(condition, lhsRegs, rhsRegs, trueLabel, falseLabel); + } +} + +void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) { + MOZ_ASSERT(gen->compilingWasm()); + MOZ_ASSERT(ToRegister(lir->getOperand(LDivOrModI64::Instance)) == + InstanceReg); + + masm.Push(InstanceReg); + int32_t framePushedAfterInstance = masm.framePushed(); + + Register64 lhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Lhs)); + Register64 rhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Rhs)); + Register64 output = ToOutRegister64(lir); + + MOZ_ASSERT(output == ReturnReg64); + + Label done; + + // Handle divide by zero. + if (lir->canBeDivideByZero()) { + Label nonZero; + // We can use InstanceReg as temp register because we preserved it + // before. + masm.branchTest64(Assembler::NonZero, rhs, rhs, InstanceReg, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset()); + masm.bind(&nonZero); + } + + MDefinition* mir = lir->mir(); + + // Handle an integer overflow exception from INT64_MIN / -1. + if (lir->canBeNegativeOverflow()) { + Label notOverflow; + masm.branch64(Assembler::NotEqual, lhs, Imm64(INT64_MIN), ¬Overflow); + masm.branch64(Assembler::NotEqual, rhs, Imm64(-1), ¬Overflow); + if (mir->isWasmBuiltinModI64()) { + masm.xor64(output, output); + } else { + masm.wasmTrap(wasm::Trap::IntegerOverflow, lir->bytecodeOffset()); + } + masm.jump(&done); + masm.bind(¬Overflow); + } + + masm.setupWasmABICall(); + masm.passABIArg(lhs.high); + masm.passABIArg(lhs.low); + masm.passABIArg(rhs.high); + masm.passABIArg(rhs.low); + + int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance; + if (mir->isWasmBuiltinModI64()) { + masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::ModI64, + mozilla::Some(instanceOffset)); + } else { + masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::DivI64, + mozilla::Some(instanceOffset)); + } + + // output in edx:eax, move to output register. + masm.movl(edx, output.high); + MOZ_ASSERT(eax == output.low); + + masm.bind(&done); + masm.Pop(InstanceReg); +} + +void CodeGenerator::visitUDivOrModI64(LUDivOrModI64* lir) { + MOZ_ASSERT(gen->compilingWasm()); + MOZ_ASSERT(ToRegister(lir->getOperand(LDivOrModI64::Instance)) == + InstanceReg); + + masm.Push(InstanceReg); + int32_t framePushedAfterInstance = masm.framePushed(); + + Register64 lhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Lhs)); + Register64 rhs = ToRegister64(lir->getInt64Operand(LDivOrModI64::Rhs)); + Register64 output = ToOutRegister64(lir); + + MOZ_ASSERT(output == ReturnReg64); + + // Prevent divide by zero. + if (lir->canBeDivideByZero()) { + Label nonZero; + // We can use InstanceReg as temp register because we preserved it + // before. + masm.branchTest64(Assembler::NonZero, rhs, rhs, InstanceReg, &nonZero); + masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->bytecodeOffset()); + masm.bind(&nonZero); + } + + masm.setupWasmABICall(); + masm.passABIArg(lhs.high); + masm.passABIArg(lhs.low); + masm.passABIArg(rhs.high); + masm.passABIArg(rhs.low); + + MDefinition* mir = lir->mir(); + int32_t instanceOffset = masm.framePushed() - framePushedAfterInstance; + if (mir->isWasmBuiltinModI64()) { + masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::UModI64, + mozilla::Some(instanceOffset)); + } else { + masm.callWithABI(lir->bytecodeOffset(), wasm::SymbolicAddress::UDivI64, + mozilla::Some(instanceOffset)); + } + + // output in edx:eax, move to output register. + masm.movl(edx, output.high); + MOZ_ASSERT(eax == output.low); + + masm.Pop(InstanceReg); +} + +void CodeGeneratorX86::emitBigIntDiv(LBigIntDiv* ins, Register dividend, + Register divisor, Register output, + Label* fail) { + // Callers handle division by zero and integer overflow. + + MOZ_ASSERT(dividend == eax); + MOZ_ASSERT(output == edx); + + // Sign extend the lhs into rdx to make rdx:rax. + masm.cdq(); + + masm.idiv(divisor); + + // Create and return the result. + masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail); + masm.initializeBigInt(output, dividend); +} + +void CodeGeneratorX86::emitBigIntMod(LBigIntMod* ins, Register dividend, + Register divisor, Register output, + Label* fail) { + // Callers handle division by zero and integer overflow. + + MOZ_ASSERT(dividend == eax); + MOZ_ASSERT(output == edx); + + // Sign extend the lhs into rdx to make edx:eax. + masm.cdq(); + + masm.idiv(divisor); + + // Move the remainder from edx. + masm.movl(output, dividend); + + // Create and return the result. + masm.newGCBigInt(output, divisor, initialBigIntHeap(), fail); + masm.initializeBigInt(output, dividend); +} + +void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) { + MOZ_ASSERT(lir->mir()->type() == MIRType::Int64); + + Register cond = ToRegister(lir->condExpr()); + Register64 falseExpr = ToRegister64(lir->falseExpr()); + Register64 out = ToOutRegister64(lir); + + MOZ_ASSERT(ToRegister64(lir->trueExpr()) == out, + "true expr is reused for input"); + + Label done; + masm.branchTest32(Assembler::NonZero, cond, cond, &done); + masm.movl(falseExpr.low, out.low); + masm.movl(falseExpr.high, out.high); + masm.bind(&done); +} + +// We expect to handle only the case where compare is {U,}Int32 and select is +// {U,}Int32. Some values may be stack allocated, and the "true" input is +// reused for the output. +void CodeGenerator::visitWasmCompareAndSelect(LWasmCompareAndSelect* ins) { + bool cmpIs32bit = ins->compareType() == MCompare::Compare_Int32 || + ins->compareType() == MCompare::Compare_UInt32; + bool selIs32bit = ins->mir()->type() == MIRType::Int32; + + MOZ_RELEASE_ASSERT( + cmpIs32bit && selIs32bit, + "CodeGenerator::visitWasmCompareAndSelect: unexpected types"); + + Register trueExprAndDest = ToRegister(ins->output()); + MOZ_ASSERT(ToRegister(ins->ifTrueExpr()) == trueExprAndDest, + "true expr input is reused for output"); + + Assembler::Condition cond = Assembler::InvertCondition( + JSOpToCondition(ins->compareType(), ins->jsop())); + const LAllocation* rhs = ins->rightExpr(); + const LAllocation* falseExpr = ins->ifFalseExpr(); + Register lhs = ToRegister(ins->leftExpr()); + + if (rhs->isRegister()) { + if (falseExpr->isRegister()) { + masm.cmp32Move32(cond, lhs, ToRegister(rhs), ToRegister(falseExpr), + trueExprAndDest); + } else { + masm.cmp32Load32(cond, lhs, ToRegister(rhs), ToAddress(falseExpr), + trueExprAndDest); + } + } else { + if (falseExpr->isRegister()) { + masm.cmp32Move32(cond, lhs, ToAddress(rhs), ToRegister(falseExpr), + trueExprAndDest); + } else { + masm.cmp32Load32(cond, lhs, ToAddress(rhs), ToAddress(falseExpr), + trueExprAndDest); + } + } +} + +void CodeGenerator::visitWasmReinterpretFromI64(LWasmReinterpretFromI64* lir) { + MOZ_ASSERT(lir->mir()->type() == MIRType::Double); + MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Int64); + Register64 input = ToRegister64(lir->getInt64Operand(0)); + + masm.Push(input.high); + masm.Push(input.low); + masm.vmovq(Operand(esp, 0), ToFloatRegister(lir->output())); + masm.freeStack(sizeof(uint64_t)); +} + +void CodeGenerator::visitWasmReinterpretToI64(LWasmReinterpretToI64* lir) { + MOZ_ASSERT(lir->mir()->type() == MIRType::Int64); + MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Double); + Register64 output = ToOutRegister64(lir); + + masm.reserveStack(sizeof(uint64_t)); + masm.vmovq(ToFloatRegister(lir->input()), Operand(esp, 0)); + masm.Pop(output.low); + masm.Pop(output.high); +} + +void CodeGenerator::visitExtendInt32ToInt64(LExtendInt32ToInt64* lir) { + Register64 output = ToOutRegister64(lir); + Register input = ToRegister(lir->input()); + + if (lir->mir()->isUnsigned()) { + if (output.low != input) { + masm.movl(input, output.low); + } + masm.xorl(output.high, output.high); + } else { + MOZ_ASSERT(output.low == input); + MOZ_ASSERT(output.low == eax); + MOZ_ASSERT(output.high == edx); + masm.cdq(); + } +} + +void CodeGenerator::visitSignExtendInt64(LSignExtendInt64* lir) { +#ifdef DEBUG + Register64 input = ToRegister64(lir->getInt64Operand(0)); + Register64 output = ToOutRegister64(lir); + MOZ_ASSERT(input.low == eax); + MOZ_ASSERT(output.low == eax); + MOZ_ASSERT(input.high == edx); + MOZ_ASSERT(output.high == edx); +#endif + switch (lir->mode()) { + case MSignExtendInt64::Byte: + masm.move8SignExtend(eax, eax); + break; + case MSignExtendInt64::Half: + masm.move16SignExtend(eax, eax); + break; + case MSignExtendInt64::Word: + break; + } + masm.cdq(); +} + +void CodeGenerator::visitWrapInt64ToInt32(LWrapInt64ToInt32* lir) { + const LInt64Allocation& input = lir->getInt64Operand(0); + Register output = ToRegister(lir->output()); + + if (lir->mir()->bottomHalf()) { + masm.movl(ToRegister(input.low()), output); + } else { + masm.movl(ToRegister(input.high()), output); + } +} + +void CodeGenerator::visitWasmExtendU32Index(LWasmExtendU32Index*) { + MOZ_CRASH("64-bit only"); +} + +void CodeGenerator::visitWasmWrapU32Index(LWasmWrapU32Index* lir) { + // Generates no code on this platform because we just return the low part of + // the input register pair. + MOZ_ASSERT(ToRegister(lir->input()) == ToRegister(lir->output())); +} + +void CodeGenerator::visitClzI64(LClzI64* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + Register64 output = ToOutRegister64(lir); + + masm.clz64(input, output.low); + masm.xorl(output.high, output.high); +} + +void CodeGenerator::visitCtzI64(LCtzI64* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + Register64 output = ToOutRegister64(lir); + + masm.ctz64(input, output.low); + masm.xorl(output.high, output.high); +} + +void CodeGenerator::visitNotI64(LNotI64* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + Register output = ToRegister(lir->output()); + + if (input.high == output) { + masm.orl(input.low, output); + } else if (input.low == output) { + masm.orl(input.high, output); + } else { + masm.movl(input.high, output); + masm.orl(input.low, output); + } + + masm.cmpl(Imm32(0), output); + masm.emitSet(Assembler::Equal, output); +} + +void CodeGenerator::visitWasmTruncateToInt64(LWasmTruncateToInt64* lir) { + FloatRegister input = ToFloatRegister(lir->input()); + Register64 output = ToOutRegister64(lir); + + MWasmTruncateToInt64* mir = lir->mir(); + FloatRegister floatTemp = ToFloatRegister(lir->temp()); + + Label fail, convert; + + MOZ_ASSERT(mir->input()->type() == MIRType::Double || + mir->input()->type() == MIRType::Float32); + + auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output); + addOutOfLineCode(ool, mir); + + bool isSaturating = mir->isSaturating(); + if (mir->input()->type() == MIRType::Float32) { + if (mir->isUnsigned()) { + masm.wasmTruncateFloat32ToUInt64(input, output, isSaturating, + ool->entry(), ool->rejoin(), floatTemp); + } else { + masm.wasmTruncateFloat32ToInt64(input, output, isSaturating, ool->entry(), + ool->rejoin(), floatTemp); + } + } else { + if (mir->isUnsigned()) { + masm.wasmTruncateDoubleToUInt64(input, output, isSaturating, ool->entry(), + ool->rejoin(), floatTemp); + } else { + masm.wasmTruncateDoubleToInt64(input, output, isSaturating, ool->entry(), + ool->rejoin(), floatTemp); + } + } +} + +void CodeGenerator::visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + FloatRegister output = ToFloatRegister(lir->output()); + Register temp = + lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); + + MIRType outputType = lir->mir()->type(); + MOZ_ASSERT(outputType == MIRType::Double || outputType == MIRType::Float32); + + if (outputType == MIRType::Double) { + if (lir->mir()->isUnsigned()) { + masm.convertUInt64ToDouble(input, output, temp); + } else { + masm.convertInt64ToDouble(input, output); + } + } else { + if (lir->mir()->isUnsigned()) { + masm.convertUInt64ToFloat32(input, output, temp); + } else { + masm.convertInt64ToFloat32(input, output); + } + } +} + +void CodeGenerator::visitBitNotI64(LBitNotI64* ins) { + const LInt64Allocation input = ins->getInt64Operand(0); + Register64 inputR = ToRegister64(input); + MOZ_ASSERT(inputR == ToOutRegister64(ins)); + masm.notl(inputR.high); + masm.notl(inputR.low); +} + +void CodeGenerator::visitTestI64AndBranch(LTestI64AndBranch* lir) { + Register64 input = ToRegister64(lir->getInt64Operand(0)); + + masm.testl(input.high, input.high); + jumpToBlock(lir->ifTrue(), Assembler::NonZero); + masm.testl(input.low, input.low); + emitBranch(Assembler::NonZero, lir->ifTrue(), lir->ifFalse()); +} + +void CodeGenerator::visitBitAndAndBranch(LBitAndAndBranch* baab) { + // LBitAndAndBranch only represents single-word ANDs, hence it can't be + // 64-bit here. + MOZ_ASSERT(!baab->is64()); + Register regL = ToRegister(baab->left()); + if (baab->right()->isConstant()) { + masm.test32(regL, Imm32(ToInt32(baab->right()))); + } else { + masm.test32(regL, ToRegister(baab->right())); + } + emitBranch(baab->cond(), baab->ifTrue(), baab->ifFalse()); +} diff --git a/js/src/jit/x86/CodeGenerator-x86.h b/js/src/jit/x86/CodeGenerator-x86.h new file mode 100644 index 0000000000..4f92bf615f --- /dev/null +++ b/js/src/jit/x86/CodeGenerator-x86.h @@ -0,0 +1,49 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_CodeGenerator_x86_h +#define jit_x86_CodeGenerator_x86_h + +#include "jit/x86-shared/CodeGenerator-x86-shared.h" +#include "jit/x86/Assembler-x86.h" + +namespace js { +namespace jit { + +class OutOfLineTruncate; +class OutOfLineTruncateFloat32; + +class CodeGeneratorX86 : public CodeGeneratorX86Shared { + protected: + CodeGeneratorX86(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm); + + ValueOperand ToValue(LInstruction* ins, size_t pos); + ValueOperand ToTempValue(LInstruction* ins, size_t pos); + + void emitBigIntDiv(LBigIntDiv* ins, Register dividend, Register divisor, + Register output, Label* fail); + void emitBigIntMod(LBigIntMod* ins, Register dividend, Register divisor, + Register output, Label* fail); + + template <typename T> + void emitWasmLoad(T* ins); + template <typename T> + void emitWasmStore(T* ins); + template <typename T> + void emitWasmStoreOrExchangeAtomicI64(T* ins, + const wasm::MemoryAccessDesc& access); + + public: + void visitOutOfLineTruncate(OutOfLineTruncate* ool); + void visitOutOfLineTruncateFloat32(OutOfLineTruncateFloat32* ool); +}; + +typedef CodeGeneratorX86 CodeGeneratorSpecific; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_CodeGenerator_x86_h */ diff --git a/js/src/jit/x86/LIR-x86.h b/js/src/jit/x86/LIR-x86.h new file mode 100644 index 0000000000..c7c9587e20 --- /dev/null +++ b/js/src/jit/x86/LIR-x86.h @@ -0,0 +1,308 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_LIR_x86_h +#define jit_x86_LIR_x86_h + +namespace js { +namespace jit { + +class LBoxFloatingPoint : public LInstructionHelper<2, 1, 2> { + MIRType type_; + + public: + LIR_HEADER(BoxFloatingPoint); + + LBoxFloatingPoint(const LAllocation& in, const LDefinition& temp, + const LDefinition& spectreTemp, MIRType type) + : LInstructionHelper(classOpcode), type_(type) { + MOZ_ASSERT(IsFloatingPointType(type)); + setOperand(0, in); + setTemp(0, temp); + setTemp(1, spectreTemp); + } + + const LDefinition* spectreTemp() { return getTemp(1); } + + MIRType type() const { return type_; } + const char* extraName() const { return StringFromMIRType(type_); } +}; + +class LUnbox : public LInstructionHelper<1, 2, 0> { + public: + LIR_HEADER(Unbox); + + LUnbox() : LInstructionHelper(classOpcode) {} + + MUnbox* mir() const { return mir_->toUnbox(); } + const LAllocation* payload() { return getOperand(0); } + const LAllocation* type() { return getOperand(1); } + const char* extraName() const { return StringFromMIRType(mir()->type()); } +}; + +class LUnboxFloatingPoint : public LInstructionHelper<1, 2, 0> { + MIRType type_; + + public: + LIR_HEADER(UnboxFloatingPoint); + + static const size_t Input = 0; + + LUnboxFloatingPoint(const LBoxAllocation& input, MIRType type) + : LInstructionHelper(classOpcode), type_(type) { + setBoxOperand(Input, input); + } + + MUnbox* mir() const { return mir_->toUnbox(); } + + MIRType type() const { return type_; } + const char* extraName() const { return StringFromMIRType(type_); } +}; + +// Convert a 32-bit unsigned integer to a double. +class LWasmUint32ToDouble : public LInstructionHelper<1, 1, 1> { + public: + LIR_HEADER(WasmUint32ToDouble) + + LWasmUint32ToDouble(const LAllocation& input, const LDefinition& temp) + : LInstructionHelper(classOpcode) { + setOperand(0, input); + setTemp(0, temp); + } + const LDefinition* temp() { return getTemp(0); } +}; + +// Convert a 32-bit unsigned integer to a float32. +class LWasmUint32ToFloat32 : public LInstructionHelper<1, 1, 1> { + public: + LIR_HEADER(WasmUint32ToFloat32) + + LWasmUint32ToFloat32(const LAllocation& input, const LDefinition& temp) + : LInstructionHelper(classOpcode) { + setOperand(0, input); + setTemp(0, temp); + } + const LDefinition* temp() { return getTemp(0); } +}; + +class LDivOrModI64 + : public LCallInstructionHelper<INT64_PIECES, INT64_PIECES * 2 + 1, 0> { + public: + LIR_HEADER(DivOrModI64) + + static const size_t Lhs = 0; + static const size_t Rhs = INT64_PIECES; + static const size_t Instance = 2 * INT64_PIECES; + + LDivOrModI64(const LInt64Allocation& lhs, const LInt64Allocation& rhs, + const LAllocation& instance) + : LCallInstructionHelper(classOpcode) { + setInt64Operand(Lhs, lhs); + setInt64Operand(Rhs, rhs); + setOperand(Instance, instance); + } + + MDefinition* mir() const { + MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64()); + return mir_; + } + bool canBeDivideByZero() const { + if (mir_->isWasmBuiltinModI64()) { + return mir_->toWasmBuiltinModI64()->canBeDivideByZero(); + } + return mir_->toWasmBuiltinDivI64()->canBeDivideByZero(); + } + bool canBeNegativeOverflow() const { + if (mir_->isWasmBuiltinModI64()) { + return mir_->toWasmBuiltinModI64()->canBeNegativeDividend(); + } + return mir_->toWasmBuiltinDivI64()->canBeNegativeOverflow(); + } + wasm::BytecodeOffset bytecodeOffset() const { + MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64()); + if (mir_->isWasmBuiltinModI64()) { + return mir_->toWasmBuiltinModI64()->bytecodeOffset(); + } + return mir_->toWasmBuiltinDivI64()->bytecodeOffset(); + } +}; + +class LUDivOrModI64 + : public LCallInstructionHelper<INT64_PIECES, INT64_PIECES * 2 + 1, 0> { + public: + LIR_HEADER(UDivOrModI64) + + static const size_t Lhs = 0; + static const size_t Rhs = INT64_PIECES; + static const size_t Instance = 2 * INT64_PIECES; + + LUDivOrModI64(const LInt64Allocation& lhs, const LInt64Allocation& rhs, + const LAllocation& instance) + : LCallInstructionHelper(classOpcode) { + setInt64Operand(Lhs, lhs); + setInt64Operand(Rhs, rhs); + setOperand(Instance, instance); + } + + MDefinition* mir() const { + MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64()); + return mir_; + } + bool canBeDivideByZero() const { + if (mir_->isWasmBuiltinModI64()) { + return mir_->toWasmBuiltinModI64()->canBeDivideByZero(); + } + return mir_->toWasmBuiltinDivI64()->canBeDivideByZero(); + } + bool canBeNegativeOverflow() const { + if (mir_->isWasmBuiltinModI64()) { + return mir_->toWasmBuiltinModI64()->canBeNegativeDividend(); + } + return mir_->toWasmBuiltinDivI64()->canBeNegativeOverflow(); + } + wasm::BytecodeOffset bytecodeOffset() const { + MOZ_ASSERT(mir_->isWasmBuiltinDivI64() || mir_->isWasmBuiltinModI64()); + if (mir_->isWasmBuiltinModI64()) { + return mir_->toWasmBuiltinModI64()->bytecodeOffset(); + } + return mir_->toWasmBuiltinDivI64()->bytecodeOffset(); + } +}; + +class LWasmTruncateToInt64 : public LInstructionHelper<INT64_PIECES, 1, 1> { + public: + LIR_HEADER(WasmTruncateToInt64); + + LWasmTruncateToInt64(const LAllocation& in, const LDefinition& temp) + : LInstructionHelper(classOpcode) { + setOperand(0, in); + setTemp(0, temp); + } + + MWasmTruncateToInt64* mir() const { return mir_->toWasmTruncateToInt64(); } + + const LDefinition* temp() { return getTemp(0); } +}; + +class LWasmAtomicLoadI64 : public LInstructionHelper<INT64_PIECES, 2, 2> { + public: + LIR_HEADER(WasmAtomicLoadI64); + + LWasmAtomicLoadI64(const LAllocation& memoryBase, const LAllocation& ptr, + const LDefinition& t1, const LDefinition& t2) + : LInstructionHelper(classOpcode) { + setOperand(0, memoryBase); + setOperand(1, ptr); + setTemp(0, t1); + setTemp(1, t2); + } + + MWasmLoad* mir() const { return mir_->toWasmLoad(); } + const LAllocation* memoryBase() { return getOperand(0); } + const LAllocation* ptr() { return getOperand(1); } + const LDefinition* t1() { return getTemp(0); } + const LDefinition* t2() { return getTemp(1); } +}; + +class LWasmAtomicStoreI64 : public LInstructionHelper<0, 2 + INT64_PIECES, 2> { + public: + LIR_HEADER(WasmAtomicStoreI64); + + LWasmAtomicStoreI64(const LAllocation& memoryBase, const LAllocation& ptr, + const LInt64Allocation& value, const LDefinition& t1, + const LDefinition& t2) + : LInstructionHelper(classOpcode) { + setOperand(0, memoryBase); + setOperand(1, ptr); + setInt64Operand(2, value); + setTemp(0, t1); + setTemp(1, t2); + } + + MWasmStore* mir() const { return mir_->toWasmStore(); } + const LAllocation* memoryBase() { return getOperand(0); } + const LAllocation* ptr() { return getOperand(1); } + const LInt64Allocation value() { return getInt64Operand(2); } + const LDefinition* t1() { return getTemp(0); } + const LDefinition* t2() { return getTemp(1); } +}; + +class LWasmCompareExchangeI64 + : public LInstructionHelper<INT64_PIECES, 2 + 2 * INT64_PIECES, 0> { + public: + LIR_HEADER(WasmCompareExchangeI64); + + LWasmCompareExchangeI64(const LAllocation& memoryBase, const LAllocation& ptr, + const LInt64Allocation& expected, + const LInt64Allocation& replacement) + : LInstructionHelper(classOpcode) { + setOperand(0, memoryBase); + setOperand(1, ptr); + setInt64Operand(2, expected); + setInt64Operand(2 + INT64_PIECES, replacement); + } + + MWasmCompareExchangeHeap* mir() const { + return mir_->toWasmCompareExchangeHeap(); + } + const LAllocation* memoryBase() { return getOperand(0); } + const LAllocation* ptr() { return getOperand(1); } + const LInt64Allocation expected() { return getInt64Operand(2); } + const LInt64Allocation replacement() { + return getInt64Operand(2 + INT64_PIECES); + } +}; + +class LWasmAtomicExchangeI64 + : public LInstructionHelper<INT64_PIECES, 2 + INT64_PIECES, 0> { + const wasm::MemoryAccessDesc& access_; + + public: + LIR_HEADER(WasmAtomicExchangeI64); + + LWasmAtomicExchangeI64(const LAllocation& memoryBase, const LAllocation& ptr, + const LInt64Allocation& value, + const wasm::MemoryAccessDesc& access) + : LInstructionHelper(classOpcode), access_(access) { + setOperand(0, memoryBase); + setOperand(1, ptr); + setInt64Operand(2, value); + } + + const LAllocation* memoryBase() { return getOperand(0); } + const LAllocation* ptr() { return getOperand(1); } + const LInt64Allocation value() { return getInt64Operand(2); } + const wasm::MemoryAccessDesc& access() { return access_; } +}; + +class LWasmAtomicBinopI64 + : public LInstructionHelper<INT64_PIECES, 2 + INT64_PIECES, 0> { + const wasm::MemoryAccessDesc& access_; + AtomicOp op_; + + public: + LIR_HEADER(WasmAtomicBinopI64); + + LWasmAtomicBinopI64(const LAllocation& memoryBase, const LAllocation& ptr, + const LInt64Allocation& value, + const wasm::MemoryAccessDesc& access, AtomicOp op) + : LInstructionHelper(classOpcode), access_(access), op_(op) { + setOperand(0, memoryBase); + setOperand(1, ptr); + setInt64Operand(2, value); + } + + const LAllocation* memoryBase() { return getOperand(0); } + const LAllocation* ptr() { return getOperand(1); } + const LInt64Allocation value() { return getInt64Operand(2); } + const wasm::MemoryAccessDesc& access() { return access_; } + AtomicOp operation() const { return op_; } +}; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_LIR_x86_h */ diff --git a/js/src/jit/x86/Lowering-x86.cpp b/js/src/jit/x86/Lowering-x86.cpp new file mode 100644 index 0000000000..968b5baf14 --- /dev/null +++ b/js/src/jit/x86/Lowering-x86.cpp @@ -0,0 +1,840 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86/Lowering-x86.h" + +#include "jit/Lowering.h" +#include "jit/MIR.h" +#include "jit/x86/Assembler-x86.h" + +#include "jit/shared/Lowering-shared-inl.h" + +using namespace js; +using namespace js::jit; + +LBoxAllocation LIRGeneratorX86::useBoxFixed(MDefinition* mir, Register reg1, + Register reg2, bool useAtStart) { + MOZ_ASSERT(mir->type() == MIRType::Value); + MOZ_ASSERT(reg1 != reg2); + + ensureDefined(mir); + return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart), + LUse(reg2, VirtualRegisterOfPayload(mir), useAtStart)); +} + +LAllocation LIRGeneratorX86::useByteOpRegister(MDefinition* mir) { + return useFixed(mir, eax); +} + +LAllocation LIRGeneratorX86::useByteOpRegisterAtStart(MDefinition* mir) { + return useFixedAtStart(mir, eax); +} + +LAllocation LIRGeneratorX86::useByteOpRegisterOrNonDoubleConstant( + MDefinition* mir) { + return useFixed(mir, eax); +} + +LDefinition LIRGeneratorX86::tempByteOpRegister() { return tempFixed(eax); } + +void LIRGenerator::visitBox(MBox* box) { + MDefinition* inner = box->getOperand(0); + + // If the box wrapped a double, it needs a new register. + if (IsFloatingPointType(inner->type())) { + LDefinition spectreTemp = + JitOptions.spectreValueMasking ? temp() : LDefinition::BogusTemp(); + defineBox(new (alloc()) LBoxFloatingPoint(useRegisterAtStart(inner), + tempCopy(inner, 0), spectreTemp, + inner->type()), + box); + return; + } + + if (box->canEmitAtUses()) { + emitAtUses(box); + return; + } + + if (inner->isConstant()) { + defineBox(new (alloc()) LValue(inner->toConstant()->toJSValue()), box); + return; + } + + LBox* lir = new (alloc()) LBox(use(inner), inner->type()); + + // Otherwise, we should not define a new register for the payload portion + // of the output, so bypass defineBox(). + uint32_t vreg = getVirtualRegister(); + + // Note that because we're using BogusTemp(), we do not change the type of + // the definition. We also do not define the first output as "TYPE", + // because it has no corresponding payload at (vreg + 1). Also note that + // although we copy the input's original type for the payload half of the + // definition, this is only for clarity. BogusTemp() definitions are + // ignored. + lir->setDef(0, LDefinition(vreg, LDefinition::GENERAL)); + lir->setDef(1, LDefinition::BogusTemp()); + box->setVirtualRegister(vreg); + add(lir); +} + +void LIRGenerator::visitUnbox(MUnbox* unbox) { + MDefinition* inner = unbox->getOperand(0); + + // An unbox on x86 reads in a type tag (either in memory or a register) and + // a payload. Unlike most instructions consuming a box, we ask for the type + // second, so that the result can re-use the first input. + MOZ_ASSERT(inner->type() == MIRType::Value); + + ensureDefined(inner); + + if (IsFloatingPointType(unbox->type())) { + LUnboxFloatingPoint* lir = + new (alloc()) LUnboxFloatingPoint(useBox(inner), unbox->type()); + if (unbox->fallible()) { + assignSnapshot(lir, unbox->bailoutKind()); + } + define(lir, unbox); + return; + } + + // Swap the order we use the box pieces so we can re-use the payload register. + LUnbox* lir = new (alloc()) LUnbox; + bool reusePayloadReg = !JitOptions.spectreValueMasking || + unbox->type() == MIRType::Int32 || + unbox->type() == MIRType::Boolean; + if (reusePayloadReg) { + lir->setOperand(0, usePayloadInRegisterAtStart(inner)); + lir->setOperand(1, useType(inner, LUse::ANY)); + } else { + lir->setOperand(0, usePayload(inner, LUse::REGISTER)); + lir->setOperand(1, useType(inner, LUse::ANY)); + } + + if (unbox->fallible()) { + assignSnapshot(lir, unbox->bailoutKind()); + } + + // Types and payloads form two separate intervals. If the type becomes dead + // before the payload, it could be used as a Value without the type being + // recoverable. Unbox's purpose is to eagerly kill the definition of a type + // tag, so keeping both alive (for the purpose of gcmaps) is unappealing. + // Instead, we create a new virtual register. + if (reusePayloadReg) { + defineReuseInput(lir, unbox, 0); + } else { + define(lir, unbox); + } +} + +void LIRGenerator::visitReturnImpl(MDefinition* opd, bool isGenerator) { + MOZ_ASSERT(opd->type() == MIRType::Value); + + LReturn* ins = new (alloc()) LReturn(isGenerator); + ins->setOperand(0, LUse(JSReturnReg_Type)); + ins->setOperand(1, LUse(JSReturnReg_Data)); + fillBoxUses(ins, 0, opd); + add(ins); +} + +void LIRGeneratorX86::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, + LBlock* block, size_t lirIndex) { + MDefinition* operand = phi->getOperand(inputPosition); + LPhi* type = block->getPhi(lirIndex + VREG_TYPE_OFFSET); + LPhi* payload = block->getPhi(lirIndex + VREG_DATA_OFFSET); + type->setOperand( + inputPosition, + LUse(operand->virtualRegister() + VREG_TYPE_OFFSET, LUse::ANY)); + payload->setOperand(inputPosition, + LUse(VirtualRegisterOfPayload(operand), LUse::ANY)); +} + +void LIRGeneratorX86::defineInt64Phi(MPhi* phi, size_t lirIndex) { + LPhi* low = current->getPhi(lirIndex + INT64LOW_INDEX); + LPhi* high = current->getPhi(lirIndex + INT64HIGH_INDEX); + + uint32_t lowVreg = getVirtualRegister(); + + phi->setVirtualRegister(lowVreg); + + uint32_t highVreg = getVirtualRegister(); + MOZ_ASSERT(lowVreg + INT64HIGH_INDEX == highVreg + INT64LOW_INDEX); + + low->setDef(0, LDefinition(lowVreg, LDefinition::INT32)); + high->setDef(0, LDefinition(highVreg, LDefinition::INT32)); + annotate(high); + annotate(low); +} + +void LIRGeneratorX86::lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, + LBlock* block, size_t lirIndex) { + MDefinition* operand = phi->getOperand(inputPosition); + LPhi* low = block->getPhi(lirIndex + INT64LOW_INDEX); + LPhi* high = block->getPhi(lirIndex + INT64HIGH_INDEX); + low->setOperand(inputPosition, + LUse(operand->virtualRegister() + INT64LOW_INDEX, LUse::ANY)); + high->setOperand( + inputPosition, + LUse(operand->virtualRegister() + INT64HIGH_INDEX, LUse::ANY)); +} + +void LIRGeneratorX86::lowerForALUInt64( + LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, MDefinition* mir, + MDefinition* input) { + ins->setInt64Operand(0, useInt64RegisterAtStart(input)); + defineInt64ReuseInput(ins, mir, 0); +} + +void LIRGeneratorX86::lowerForALUInt64( + LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs) { + ins->setInt64Operand(0, useInt64RegisterAtStart(lhs)); + ins->setInt64Operand(INT64_PIECES, useInt64OrConstant(rhs)); + defineInt64ReuseInput(ins, mir, 0); +} + +void LIRGeneratorX86::lowerForMulInt64(LMulI64* ins, MMul* mir, + MDefinition* lhs, MDefinition* rhs) { + bool needsTemp = true; + + if (rhs->isConstant()) { + int64_t constant = rhs->toConstant()->toInt64(); + int32_t shift = mozilla::FloorLog2(constant); + // See special cases in CodeGeneratorX86Shared::visitMulI64. + if (constant >= -1 && constant <= 2) { + needsTemp = false; + } + if (constant > 0 && int64_t(1) << shift == constant) { + needsTemp = false; + } + } + + // MulI64 on x86 needs output to be in edx, eax; + ins->setInt64Operand( + 0, useInt64Fixed(lhs, Register64(edx, eax), /*useAtStart = */ true)); + ins->setInt64Operand(INT64_PIECES, useInt64OrConstant(rhs)); + if (needsTemp) { + ins->setTemp(0, temp()); + } + + defineInt64Fixed(ins, mir, + LInt64Allocation(LAllocation(AnyRegister(edx)), + LAllocation(AnyRegister(eax)))); +} + +void LIRGenerator::visitCompareExchangeTypedArrayElement( + MCompareExchangeTypedArrayElement* ins) { + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + if (Scalar::isBigIntType(ins->arrayType())) { + LUse elements = useFixed(ins->elements(), esi); + LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + LUse oldval = useFixed(ins->oldval(), eax); + LUse newval = useFixed(ins->newval(), edx); + LDefinition temp = tempFixed(ebx); + + auto* lir = new (alloc()) LCompareExchangeTypedArrayElement64( + elements, index, oldval, newval, temp); + defineFixed(lir, ins, LAllocation(AnyRegister(ecx))); + assignSafepoint(lir, ins); + return; + } + + lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ true); +} + +void LIRGenerator::visitAtomicExchangeTypedArrayElement( + MAtomicExchangeTypedArrayElement* ins) { + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + if (Scalar::isBigIntType(ins->arrayType())) { + LUse elements = useRegister(ins->elements()); + LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + LAllocation value = useFixed(ins->value(), edx); + LInt64Definition temp = tempInt64Fixed(Register64(ecx, ebx)); + + auto* lir = new (alloc()) + LAtomicExchangeTypedArrayElement64(elements, index, value, temp); + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + assignSafepoint(lir, ins); + return; + } + + lowerAtomicExchangeTypedArrayElement(ins, /*useI386ByteRegisters=*/true); +} + +void LIRGenerator::visitAtomicTypedArrayElementBinop( + MAtomicTypedArrayElementBinop* ins) { + MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); + MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr); + + if (Scalar::isBigIntType(ins->arrayType())) { + LUse elements = useRegister(ins->elements()); + LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->arrayType()); + LAllocation value = useFixed(ins->value(), edx); + LInt64Definition temp = tempInt64Fixed(Register64(ecx, ebx)); + + // Case 1: the result of the operation is not used. + // + // We can omit allocating the result BigInt. + + if (ins->isForEffect()) { + LDefinition tempLow = tempFixed(eax); + + auto* lir = new (alloc()) LAtomicTypedArrayElementBinopForEffect64( + elements, index, value, temp, tempLow); + add(lir, ins); + return; + } + + // Case 2: the result of the operation is used. + + auto* lir = new (alloc()) + LAtomicTypedArrayElementBinop64(elements, index, value, temp); + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + assignSafepoint(lir, ins); + return; + } + + lowerAtomicTypedArrayElementBinop(ins, /* useI386ByteRegisters = */ true); +} + +void LIRGeneratorX86::lowerAtomicLoad64(MLoadUnboxedScalar* ins) { + const LUse elements = useRegister(ins->elements()); + const LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->storageType()); + + auto* lir = new (alloc()) LAtomicLoad64(elements, index, tempFixed(ebx), + tempInt64Fixed(Register64(edx, eax))); + defineFixed(lir, ins, LAllocation(AnyRegister(ecx))); + assignSafepoint(lir, ins); +} + +void LIRGeneratorX86::lowerAtomicStore64(MStoreUnboxedScalar* ins) { + LUse elements = useRegister(ins->elements()); + LAllocation index = + useRegisterOrIndexConstant(ins->index(), ins->writeType()); + LAllocation value = useFixed(ins->value(), edx); + LInt64Definition temp1 = tempInt64Fixed(Register64(ecx, ebx)); + LDefinition temp2 = tempFixed(eax); + + add(new (alloc()) LAtomicStore64(elements, index, value, temp1, temp2), ins); +} + +void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) { + MOZ_ASSERT(ins->input()->type() == MIRType::Int32); + LWasmUint32ToDouble* lir = new (alloc()) + LWasmUint32ToDouble(useRegisterAtStart(ins->input()), temp()); + define(lir, ins); +} + +void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) { + MOZ_ASSERT(ins->input()->type() == MIRType::Int32); + LWasmUint32ToFloat32* lir = new (alloc()) + LWasmUint32ToFloat32(useRegisterAtStart(ins->input()), temp()); + define(lir, ins); +} + +// If the base is a constant, and it is zero or its offset is zero, then +// code generation will fold the values into the access. Allocate the +// pointer to a register only if that can't happen. + +static bool OptimizableConstantAccess(MDefinition* base, + const wasm::MemoryAccessDesc& access) { + MOZ_ASSERT(base->isConstant()); + MOZ_ASSERT(base->type() == MIRType::Int32); + + if (!(base->toConstant()->isInt32(0) || access.offset() == 0)) { + return false; + } + if (access.type() == Scalar::Int64) { + // For int64 accesses on 32-bit systems we will need to add another offset + // of 4 to access the high part of the value; make sure this does not + // overflow the value. + int32_t v; + if (base->toConstant()->isInt32(0)) { + v = access.offset(); + } else { + v = base->toConstant()->toInt32(); + } + return v <= int32_t(INT32_MAX - INT64HIGH_OFFSET); + } + return true; +} + +void LIRGenerator::visitWasmHeapBase(MWasmHeapBase* ins) { + auto* lir = new (alloc()) LWasmHeapBase(useRegisterAtStart(ins->instance())); + define(lir, ins); +} + +void LIRGenerator::visitWasmLoad(MWasmLoad* ins) { + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + MDefinition* memoryBase = ins->memoryBase(); + MOZ_ASSERT(memoryBase->type() == MIRType::Pointer); + + if (ins->access().type() == Scalar::Int64 && ins->access().isAtomic()) { + auto* lir = new (alloc()) + LWasmAtomicLoadI64(useRegister(memoryBase), useRegister(base), + tempFixed(ecx), tempFixed(ebx)); + defineInt64Fixed(lir, ins, + LInt64Allocation(LAllocation(AnyRegister(edx)), + LAllocation(AnyRegister(eax)))); + return; + } + + LAllocation baseAlloc; + if (!base->isConstant() || !OptimizableConstantAccess(base, ins->access())) { + baseAlloc = ins->type() == MIRType::Int64 ? useRegister(base) + : useRegisterAtStart(base); + } + + if (ins->type() != MIRType::Int64) { + auto* lir = + new (alloc()) LWasmLoad(baseAlloc, useRegisterAtStart(memoryBase)); + define(lir, ins); + return; + } + + // "AtStart" register usage does not work for the 64-bit case because we + // clobber two registers for the result and may need two registers for a + // scaled address; we can't guarantee non-interference. + + auto* lir = new (alloc()) LWasmLoadI64(baseAlloc, useRegister(memoryBase)); + + Scalar::Type accessType = ins->access().type(); + if (accessType == Scalar::Int8 || accessType == Scalar::Int16 || + accessType == Scalar::Int32) { + // We use cdq to sign-extend the result and cdq demands these registers. + defineInt64Fixed(lir, ins, + LInt64Allocation(LAllocation(AnyRegister(edx)), + LAllocation(AnyRegister(eax)))); + return; + } + + defineInt64(lir, ins); +} + +void LIRGenerator::visitWasmStore(MWasmStore* ins) { + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + MDefinition* memoryBase = ins->memoryBase(); + MOZ_ASSERT(memoryBase->type() == MIRType::Pointer); + + if (ins->access().type() == Scalar::Int64 && ins->access().isAtomic()) { + auto* lir = new (alloc()) + LWasmAtomicStoreI64(useRegister(memoryBase), useRegister(base), + useInt64Fixed(ins->value(), Register64(ecx, ebx)), + tempFixed(edx), tempFixed(eax)); + add(lir, ins); + return; + } + + LAllocation baseAlloc; + if (!base->isConstant() || !OptimizableConstantAccess(base, ins->access())) { + baseAlloc = useRegisterAtStart(base); + } + + LAllocation valueAlloc; + switch (ins->access().type()) { + case Scalar::Int8: + case Scalar::Uint8: + // See comment for LIRGeneratorX86::useByteOpRegister. + valueAlloc = useFixed(ins->value(), eax); + break; + case Scalar::Int16: + case Scalar::Uint16: + case Scalar::Int32: + case Scalar::Uint32: + case Scalar::Float32: + case Scalar::Float64: + // For now, don't allow constant values. The immediate operand affects + // instruction layout which affects patching. + valueAlloc = useRegisterAtStart(ins->value()); + break; + case Scalar::Simd128: +#ifdef ENABLE_WASM_SIMD + valueAlloc = useRegisterAtStart(ins->value()); + break; +#else + MOZ_CRASH("unexpected array type"); +#endif + case Scalar::Int64: { + LInt64Allocation valueAlloc = useInt64RegisterAtStart(ins->value()); + auto* lir = new (alloc()) + LWasmStoreI64(baseAlloc, valueAlloc, useRegisterAtStart(memoryBase)); + add(lir, ins); + return; + } + case Scalar::Uint8Clamped: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + } + + auto* lir = new (alloc()) + LWasmStore(baseAlloc, valueAlloc, useRegisterAtStart(memoryBase)); + add(lir, ins); +} + +void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) { + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + MDefinition* memoryBase = ins->memoryBase(); + MOZ_ASSERT(memoryBase->type() == MIRType::Pointer); + + if (ins->access().type() == Scalar::Int64) { + auto* lir = new (alloc()) LWasmCompareExchangeI64( + useRegisterAtStart(memoryBase), useRegisterAtStart(base), + useInt64FixedAtStart(ins->oldValue(), Register64(edx, eax)), + useInt64FixedAtStart(ins->newValue(), Register64(ecx, ebx))); + defineInt64Fixed(lir, ins, + LInt64Allocation(LAllocation(AnyRegister(edx)), + LAllocation(AnyRegister(eax)))); + return; + } + + MOZ_ASSERT(ins->access().type() < Scalar::Float32); + + bool byteArray = byteSize(ins->access().type()) == 1; + + // Register allocation: + // + // The output may not be used, but eax will be clobbered regardless + // so pin the output to eax. + // + // oldval must be in a register. + // + // newval must be in a register. If the source is a byte array + // then newval must be a register that has a byte size: this must + // be ebx, ecx, or edx (eax is taken). + // + // Bug #1077036 describes some optimization opportunities. + + const LAllocation oldval = useRegister(ins->oldValue()); + const LAllocation newval = + byteArray ? useFixed(ins->newValue(), ebx) : useRegister(ins->newValue()); + + LWasmCompareExchangeHeap* lir = new (alloc()) LWasmCompareExchangeHeap( + useRegister(base), oldval, newval, useRegister(memoryBase)); + + lir->setAddrTemp(temp()); + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); +} + +void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) { + MDefinition* memoryBase = ins->memoryBase(); + MOZ_ASSERT(memoryBase->type() == MIRType::Pointer); + + if (ins->access().type() == Scalar::Int64) { + MDefinition* base = ins->base(); + auto* lir = new (alloc()) LWasmAtomicExchangeI64( + useRegister(memoryBase), useRegister(base), + useInt64Fixed(ins->value(), Register64(ecx, ebx)), ins->access()); + defineInt64Fixed(lir, ins, + LInt64Allocation(LAllocation(AnyRegister(edx)), + LAllocation(AnyRegister(eax)))); + return; + } + + const LAllocation base = useRegister(ins->base()); + const LAllocation value = useRegister(ins->value()); + + LWasmAtomicExchangeHeap* lir = new (alloc()) + LWasmAtomicExchangeHeap(base, value, useRegister(memoryBase)); + + lir->setAddrTemp(temp()); + if (byteSize(ins->access().type()) == 1) { + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + } else { + define(lir, ins); + } +} + +void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) { + MDefinition* base = ins->base(); + MOZ_ASSERT(base->type() == MIRType::Int32); + + MDefinition* memoryBase = ins->memoryBase(); + MOZ_ASSERT(memoryBase->type() == MIRType::Pointer); + + if (ins->access().type() == Scalar::Int64) { + auto* lir = new (alloc()) + LWasmAtomicBinopI64(useRegister(memoryBase), useRegister(base), + useInt64Fixed(ins->value(), Register64(ecx, ebx)), + ins->access(), ins->operation()); + defineInt64Fixed(lir, ins, + LInt64Allocation(LAllocation(AnyRegister(edx)), + LAllocation(AnyRegister(eax)))); + return; + } + + MOZ_ASSERT(ins->access().type() < Scalar::Float32); + + bool byteArray = byteSize(ins->access().type()) == 1; + + // Case 1: the result of the operation is not used. + // + // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND, + // LOCK OR, or LOCK XOR. These can all take an immediate. + + if (!ins->hasUses()) { + LAllocation value; + if (byteArray && !ins->value()->isConstant()) { + value = useFixed(ins->value(), ebx); + } else { + value = useRegisterOrConstant(ins->value()); + } + LWasmAtomicBinopHeapForEffect* lir = + new (alloc()) LWasmAtomicBinopHeapForEffect(useRegister(base), value, + LDefinition::BogusTemp(), + useRegister(memoryBase)); + lir->setAddrTemp(temp()); + add(lir, ins); + return; + } + + // Case 2: the result of the operation is used. + // + // For ADD and SUB we'll use XADD: + // + // movl value, output + // lock xaddl output, mem + // + // For the 8-bit variants XADD needs a byte register for the + // output only, we can still set up with movl; just pin the output + // to eax (or ebx / ecx / edx). + // + // For AND/OR/XOR we need to use a CMPXCHG loop: + // + // movl *mem, eax + // L: mov eax, temp + // andl value, temp + // lock cmpxchg temp, mem ; reads eax also + // jnz L + // ; result in eax + // + // Note the placement of L, cmpxchg will update eax with *mem if + // *mem does not have the expected value, so reloading it at the + // top of the loop would be redundant. + // + // We want to fix eax as the output. We also need a temp for + // the intermediate value. + // + // For the 8-bit variants the temp must have a byte register. + // + // There are optimization opportunities: + // - better 8-bit register allocation and instruction selection, Bug + // #1077036. + + bool bitOp = !(ins->operation() == AtomicFetchAddOp || + ins->operation() == AtomicFetchSubOp); + LDefinition tempDef = LDefinition::BogusTemp(); + LAllocation value; + + if (byteArray) { + value = useFixed(ins->value(), ebx); + if (bitOp) { + tempDef = tempFixed(ecx); + } + } else if (bitOp || ins->value()->isConstant()) { + value = useRegisterOrConstant(ins->value()); + if (bitOp) { + tempDef = temp(); + } + } else { + value = useRegisterAtStart(ins->value()); + } + + LWasmAtomicBinopHeap* lir = new (alloc()) + LWasmAtomicBinopHeap(useRegister(base), value, tempDef, + LDefinition::BogusTemp(), useRegister(memoryBase)); + + lir->setAddrTemp(temp()); + if (byteArray || bitOp) { + defineFixed(lir, ins, LAllocation(AnyRegister(eax))); + } else if (ins->value()->isConstant()) { + define(lir, ins); + } else { + defineReuseInput(lir, ins, LWasmAtomicBinopHeap::valueOp); + } +} + +void LIRGeneratorX86::lowerDivI64(MDiv* div) { + MOZ_CRASH("We use MWasmBuiltinModI64 instead."); +} + +void LIRGeneratorX86::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) { + MOZ_ASSERT(div->lhs()->type() == div->rhs()->type()); + MOZ_ASSERT(IsNumberType(div->type())); + + MOZ_ASSERT(div->type() == MIRType::Int64); + + if (div->isUnsigned()) { + LUDivOrModI64* lir = new (alloc()) + LUDivOrModI64(useInt64FixedAtStart(div->lhs(), Register64(eax, ebx)), + useInt64FixedAtStart(div->rhs(), Register64(ecx, edx)), + useFixedAtStart(div->instance(), InstanceReg)); + defineReturn(lir, div); + return; + } + + LDivOrModI64* lir = new (alloc()) + LDivOrModI64(useInt64FixedAtStart(div->lhs(), Register64(eax, ebx)), + useInt64FixedAtStart(div->rhs(), Register64(ecx, edx)), + useFixedAtStart(div->instance(), InstanceReg)); + defineReturn(lir, div); +} + +void LIRGeneratorX86::lowerModI64(MMod* mod) { + MOZ_CRASH("We use MWasmBuiltinModI64 instead."); +} + +void LIRGeneratorX86::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) { + MDefinition* lhs = mod->lhs(); + MDefinition* rhs = mod->rhs(); + MOZ_ASSERT(lhs->type() == rhs->type()); + MOZ_ASSERT(IsNumberType(mod->type())); + + MOZ_ASSERT(mod->type() == MIRType::Int64); + MOZ_ASSERT(mod->type() == MIRType::Int64); + + if (mod->isUnsigned()) { + LUDivOrModI64* lir = new (alloc()) + LUDivOrModI64(useInt64FixedAtStart(lhs, Register64(eax, ebx)), + useInt64FixedAtStart(rhs, Register64(ecx, edx)), + useFixedAtStart(mod->instance(), InstanceReg)); + defineReturn(lir, mod); + return; + } + + LDivOrModI64* lir = new (alloc()) + LDivOrModI64(useInt64FixedAtStart(lhs, Register64(eax, ebx)), + useInt64FixedAtStart(rhs, Register64(ecx, edx)), + useFixedAtStart(mod->instance(), InstanceReg)); + defineReturn(lir, mod); +} + +void LIRGeneratorX86::lowerUDivI64(MDiv* div) { + MOZ_CRASH("We use MWasmBuiltinDivI64 instead."); +} + +void LIRGeneratorX86::lowerUModI64(MMod* mod) { + MOZ_CRASH("We use MWasmBuiltinModI64 instead."); +} + +void LIRGeneratorX86::lowerBigIntDiv(MBigIntDiv* ins) { + auto* lir = new (alloc()) LBigIntDiv( + useRegister(ins->lhs()), useRegister(ins->rhs()), tempFixed(eax), temp()); + defineFixed(lir, ins, LAllocation(AnyRegister(edx))); + assignSafepoint(lir, ins); +} + +void LIRGeneratorX86::lowerBigIntMod(MBigIntMod* ins) { + auto* lir = new (alloc()) LBigIntMod( + useRegister(ins->lhs()), useRegister(ins->rhs()), tempFixed(eax), temp()); + defineFixed(lir, ins, LAllocation(AnyRegister(edx))); + assignSafepoint(lir, ins); +} + +void LIRGenerator::visitSubstr(MSubstr* ins) { + // Due to lack of registers on x86, we reuse the string register as + // temporary. As a result we only need two temporary registers and take a + // bogus temporary as fifth argument. + LSubstr* lir = new (alloc()) + LSubstr(useRegister(ins->string()), useRegister(ins->begin()), + useRegister(ins->length()), temp(), LDefinition::BogusTemp(), + tempByteOpRegister()); + define(lir, ins); + assignSafepoint(lir, ins); +} + +void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32); + + LDefinition temp = tempDouble(); + defineInt64(new (alloc()) LWasmTruncateToInt64(useRegister(opd), temp), ins); +} + +void LIRGeneratorX86::lowerWasmBuiltinTruncateToInt64( + MWasmBuiltinTruncateToInt64* ins) { + MOZ_CRASH("We don't use it for this architecture"); +} + +void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) { + MDefinition* opd = ins->input(); + MOZ_ASSERT(opd->type() == MIRType::Int64); + MOZ_ASSERT(IsFloatingPointType(ins->type())); + + LDefinition maybeTemp = + (ins->isUnsigned() && + ((ins->type() == MIRType::Double && AssemblerX86Shared::HasSSE3()) || + ins->type() == MIRType::Float32)) + ? temp() + : LDefinition::BogusTemp(); + + define(new (alloc()) LInt64ToFloatingPoint(useInt64Register(opd), maybeTemp), + ins); +} + +void LIRGeneratorX86::lowerBuiltinInt64ToFloatingPoint( + MBuiltinInt64ToFloatingPoint* ins) { + MOZ_CRASH("We don't use it for this architecture"); +} + +void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) { + if (ins->isUnsigned()) { + defineInt64(new (alloc()) + LExtendInt32ToInt64(useRegisterAtStart(ins->input())), + ins); + } else { + LExtendInt32ToInt64* lir = + new (alloc()) LExtendInt32ToInt64(useFixedAtStart(ins->input(), eax)); + defineInt64Fixed(lir, ins, + LInt64Allocation(LAllocation(AnyRegister(edx)), + LAllocation(AnyRegister(eax)))); + } +} + +void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) { + // Here we'll end up using cdq which requires input and output in (edx,eax). + LSignExtendInt64* lir = new (alloc()) LSignExtendInt64( + useInt64FixedAtStart(ins->input(), Register64(edx, eax))); + defineInt64Fixed(lir, ins, + LInt64Allocation(LAllocation(AnyRegister(edx)), + LAllocation(AnyRegister(eax)))); +} + +// On x86 we specialize the only cases where compare is {U,}Int32 and select +// is {U,}Int32. +bool LIRGeneratorShared::canSpecializeWasmCompareAndSelect( + MCompare::CompareType compTy, MIRType insTy) { + return insTy == MIRType::Int32 && (compTy == MCompare::Compare_Int32 || + compTy == MCompare::Compare_UInt32); +} + +void LIRGeneratorShared::lowerWasmCompareAndSelect(MWasmSelect* ins, + MDefinition* lhs, + MDefinition* rhs, + MCompare::CompareType compTy, + JSOp jsop) { + MOZ_ASSERT(canSpecializeWasmCompareAndSelect(compTy, ins->type())); + auto* lir = new (alloc()) LWasmCompareAndSelect( + useRegister(lhs), useAny(rhs), compTy, jsop, + useRegisterAtStart(ins->trueExpr()), useAny(ins->falseExpr())); + defineReuseInput(lir, ins, LWasmCompareAndSelect::IfTrueExprIndex); +} diff --git a/js/src/jit/x86/Lowering-x86.h b/js/src/jit/x86/Lowering-x86.h new file mode 100644 index 0000000000..b82109981e --- /dev/null +++ b/js/src/jit/x86/Lowering-x86.h @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_Lowering_x86_h +#define jit_x86_Lowering_x86_h + +#include "jit/x86-shared/Lowering-x86-shared.h" + +namespace js { +namespace jit { + +class LIRGeneratorX86 : public LIRGeneratorX86Shared { + protected: + LIRGeneratorX86(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph) + : LIRGeneratorX86Shared(gen, graph, lirGraph) {} + + // Returns a box allocation with type set to reg1 and payload set to reg2. + LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register reg2, + bool useAtStart = false); + + // It's a trap! On x86, the 1-byte store can only use one of + // {al,bl,cl,dl,ah,bh,ch,dh}. That means if the register allocator + // gives us one of {edi,esi,ebp,esp}, we're out of luck. (The formatter + // will assert on us.) Ideally, we'd just ask the register allocator to + // give us one of {al,bl,cl,dl}. For now, just useFixed(al). + LAllocation useByteOpRegister(MDefinition* mir); + LAllocation useByteOpRegisterAtStart(MDefinition* mir); + LAllocation useByteOpRegisterOrNonDoubleConstant(MDefinition* mir); + LDefinition tempByteOpRegister(); + + inline LDefinition tempToUnbox() { return LDefinition::BogusTemp(); } + + bool needTempForPostBarrier() { return true; } + + void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block, + size_t lirIndex); + + void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block, + size_t lirIndex); + void defineInt64Phi(MPhi* phi, size_t lirIndex); + + void lowerForALUInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, + MDefinition* mir, MDefinition* input); + void lowerForALUInt64( + LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins, + MDefinition* mir, MDefinition* lhs, MDefinition* rhs); + void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, + MDefinition* rhs); + + void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins); + void lowerWasmBuiltinTruncateToInt64(MWasmBuiltinTruncateToInt64* ins); + void lowerDivI64(MDiv* div); + void lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div); + void lowerModI64(MMod* mod); + void lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod); + void lowerUDivI64(MDiv* div); + void lowerUModI64(MMod* mod); + + void lowerBigIntDiv(MBigIntDiv* ins); + void lowerBigIntMod(MBigIntMod* ins); + + void lowerAtomicLoad64(MLoadUnboxedScalar* ins); + void lowerAtomicStore64(MStoreUnboxedScalar* ins); + + void lowerPhi(MPhi* phi); + + public: + static bool allowTypedElementHoleCheck() { return true; } +}; + +typedef LIRGeneratorX86 LIRGeneratorSpecific; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_Lowering_x86_h */ diff --git a/js/src/jit/x86/MacroAssembler-x86-inl.h b/js/src/jit/x86/MacroAssembler-x86-inl.h new file mode 100644 index 0000000000..66050cc1b5 --- /dev/null +++ b/js/src/jit/x86/MacroAssembler-x86-inl.h @@ -0,0 +1,1386 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_MacroAssembler_x86_inl_h +#define jit_x86_MacroAssembler_x86_inl_h + +#include "jit/x86/MacroAssembler-x86.h" + +#include "jit/x86-shared/MacroAssembler-x86-shared-inl.h" + +namespace js { +namespace jit { + +//{{{ check_macroassembler_style + +void MacroAssembler::move64(Imm64 imm, Register64 dest) { + move32(Imm32(imm.value & 0xFFFFFFFFL), dest.low); + move32(Imm32((imm.value >> 32) & 0xFFFFFFFFL), dest.high); +} + +void MacroAssembler::move64(Register64 src, Register64 dest) { + movl(src.low, dest.low); + movl(src.high, dest.high); +} + +void MacroAssembler::moveDoubleToGPR64(FloatRegister src, Register64 dest) { + ScratchDoubleScope scratch(*this); + + if (Assembler::HasSSE41()) { + vmovd(src, dest.low); + vpextrd(1, src, dest.high); + } else { + vmovd(src, dest.low); + moveDouble(src, scratch); + vpsrldq(Imm32(4), scratch, scratch); + vmovd(scratch, dest.high); + } +} + +void MacroAssembler::moveGPR64ToDouble(Register64 src, FloatRegister dest) { + if (Assembler::HasSSE41()) { + vmovd(src.low, dest); + vpinsrd(1, src.high, dest, dest); + } else { + ScratchDoubleScope fpscratch(*this); + vmovd(src.low, dest); + vmovd(src.high, fpscratch); + vunpcklps(fpscratch, dest, dest); + } +} + +void MacroAssembler::move64To32(Register64 src, Register dest) { + if (src.low != dest) { + movl(src.low, dest); + } +} + +void MacroAssembler::move32To64ZeroExtend(Register src, Register64 dest) { + if (src != dest.low) { + movl(src, dest.low); + } + movl(Imm32(0), dest.high); +} + +void MacroAssembler::move8To64SignExtend(Register src, Register64 dest) { + move8SignExtend(src, dest.low); + if (dest.low == eax && dest.high == edx) { + masm.cdq(); + } else { + movl(dest.low, dest.high); + sarl(Imm32(31), dest.high); + } +} + +void MacroAssembler::move16To64SignExtend(Register src, Register64 dest) { + move16SignExtend(src, dest.low); + if (dest.low == eax && dest.high == edx) { + masm.cdq(); + } else { + movl(dest.low, dest.high); + sarl(Imm32(31), dest.high); + } +} + +void MacroAssembler::move32To64SignExtend(Register src, Register64 dest) { + if (src != dest.low) { + movl(src, dest.low); + } + if (dest.low == eax && dest.high == edx) { + masm.cdq(); + } else { + movl(dest.low, dest.high); + sarl(Imm32(31), dest.high); + } +} + +void MacroAssembler::move32SignExtendToPtr(Register src, Register dest) { + movl(src, dest); +} + +void MacroAssembler::move32ZeroExtendToPtr(Register src, Register dest) { + movl(src, dest); +} + +// =============================================================== +// Load instructions + +void MacroAssembler::load32SignExtendToPtr(const Address& src, Register dest) { + load32(src, dest); +} + +// =============================================================== +// Logical functions + +void MacroAssembler::notPtr(Register reg) { notl(reg); } + +void MacroAssembler::andPtr(Register src, Register dest) { andl(src, dest); } + +void MacroAssembler::andPtr(Imm32 imm, Register dest) { andl(imm, dest); } + +void MacroAssembler::and64(Imm64 imm, Register64 dest) { + if (imm.low().value != int32_t(0xFFFFFFFF)) { + andl(imm.low(), dest.low); + } + if (imm.hi().value != int32_t(0xFFFFFFFF)) { + andl(imm.hi(), dest.high); + } +} + +void MacroAssembler::or64(Imm64 imm, Register64 dest) { + if (imm.low().value != 0) { + orl(imm.low(), dest.low); + } + if (imm.hi().value != 0) { + orl(imm.hi(), dest.high); + } +} + +void MacroAssembler::xor64(Imm64 imm, Register64 dest) { + if (imm.low().value != 0) { + xorl(imm.low(), dest.low); + } + if (imm.hi().value != 0) { + xorl(imm.hi(), dest.high); + } +} + +void MacroAssembler::orPtr(Register src, Register dest) { orl(src, dest); } + +void MacroAssembler::orPtr(Imm32 imm, Register dest) { orl(imm, dest); } + +void MacroAssembler::and64(Register64 src, Register64 dest) { + andl(src.low, dest.low); + andl(src.high, dest.high); +} + +void MacroAssembler::or64(Register64 src, Register64 dest) { + orl(src.low, dest.low); + orl(src.high, dest.high); +} + +void MacroAssembler::xor64(Register64 src, Register64 dest) { + xorl(src.low, dest.low); + xorl(src.high, dest.high); +} + +void MacroAssembler::xorPtr(Register src, Register dest) { xorl(src, dest); } + +void MacroAssembler::xorPtr(Imm32 imm, Register dest) { xorl(imm, dest); } + +// =============================================================== +// Swap instructions + +void MacroAssembler::byteSwap64(Register64 reg) { + bswapl(reg.low); + bswapl(reg.high); + xchgl(reg.low, reg.high); +} + +// =============================================================== +// Arithmetic functions + +void MacroAssembler::addPtr(Register src, Register dest) { addl(src, dest); } + +void MacroAssembler::addPtr(Imm32 imm, Register dest) { addl(imm, dest); } + +void MacroAssembler::addPtr(ImmWord imm, Register dest) { + addl(Imm32(imm.value), dest); +} + +void MacroAssembler::addPtr(Imm32 imm, const Address& dest) { + addl(imm, Operand(dest)); +} + +void MacroAssembler::addPtr(Imm32 imm, const AbsoluteAddress& dest) { + addl(imm, Operand(dest)); +} + +void MacroAssembler::addPtr(const Address& src, Register dest) { + addl(Operand(src), dest); +} + +void MacroAssembler::add64(Register64 src, Register64 dest) { + addl(src.low, dest.low); + adcl(src.high, dest.high); +} + +void MacroAssembler::add64(Imm32 imm, Register64 dest) { + addl(imm, dest.low); + adcl(Imm32(0), dest.high); +} + +void MacroAssembler::add64(Imm64 imm, Register64 dest) { + if (imm.low().value == 0) { + addl(imm.hi(), dest.high); + return; + } + addl(imm.low(), dest.low); + adcl(imm.hi(), dest.high); +} + +void MacroAssembler::addConstantDouble(double d, FloatRegister dest) { + Double* dbl = getDouble(d); + if (!dbl) { + return; + } + masm.vaddsd_mr(nullptr, dest.encoding(), dest.encoding()); + propagateOOM(dbl->uses.append(CodeOffset(masm.size()))); +} + +CodeOffset MacroAssembler::sub32FromStackPtrWithPatch(Register dest) { + moveStackPtrTo(dest); + addlWithPatch(Imm32(0), dest); + return CodeOffset(currentOffset()); +} + +void MacroAssembler::patchSub32FromStackPtr(CodeOffset offset, Imm32 imm) { + patchAddl(offset, -imm.value); +} + +void MacroAssembler::subPtr(Register src, Register dest) { subl(src, dest); } + +void MacroAssembler::subPtr(Register src, const Address& dest) { + subl(src, Operand(dest)); +} + +void MacroAssembler::subPtr(Imm32 imm, Register dest) { subl(imm, dest); } + +void MacroAssembler::subPtr(const Address& addr, Register dest) { + subl(Operand(addr), dest); +} + +void MacroAssembler::sub64(Register64 src, Register64 dest) { + subl(src.low, dest.low); + sbbl(src.high, dest.high); +} + +void MacroAssembler::sub64(Imm64 imm, Register64 dest) { + if (imm.low().value == 0) { + subl(imm.hi(), dest.high); + return; + } + subl(imm.low(), dest.low); + sbbl(imm.hi(), dest.high); +} + +void MacroAssembler::mulHighUnsigned32(Imm32 imm, Register src, Register dest) { + // Preserve edx:eax, unless they're the destination register. + if (edx != dest) { + push(edx); + } + if (eax != dest) { + push(eax); + } + + if (src != eax) { + // Compute edx:eax := eax ∗ src + movl(imm, eax); + mull(src); + } else { + // Compute edx:eax := eax ∗ edx + movl(imm, edx); + mull(edx); + } + + // Move the high word from edx into |dest|. + if (edx != dest) { + movl(edx, dest); + } + + // Restore edx:eax. + if (eax != dest) { + pop(eax); + } + if (edx != dest) { + pop(edx); + } +} + +void MacroAssembler::mulPtr(Register rhs, Register srcDest) { + imull(rhs, srcDest); +} + +// Note: this function clobbers eax and edx. +void MacroAssembler::mul64(Imm64 imm, const Register64& dest) { + // LOW32 = LOW(LOW(dest) * LOW(imm)); + // HIGH32 = LOW(HIGH(dest) * LOW(imm)) [multiply imm into upper bits] + // + LOW(LOW(dest) * HIGH(imm)) [multiply dest into upper bits] + // + HIGH(LOW(dest) * LOW(imm)) [carry] + + MOZ_ASSERT(dest.low != eax && dest.low != edx); + MOZ_ASSERT(dest.high != eax && dest.high != edx); + + // HIGH(dest) = LOW(HIGH(dest) * LOW(imm)); + movl(Imm32(imm.value & 0xFFFFFFFFL), edx); + imull(edx, dest.high); + + // edx:eax = LOW(dest) * LOW(imm); + movl(Imm32(imm.value & 0xFFFFFFFFL), edx); + movl(dest.low, eax); + mull(edx); + + // HIGH(dest) += edx; + addl(edx, dest.high); + + // HIGH(dest) += LOW(LOW(dest) * HIGH(imm)); + if (((imm.value >> 32) & 0xFFFFFFFFL) == 5) { + leal(Operand(dest.low, dest.low, TimesFour), edx); + } else { + MOZ_CRASH("Unsupported imm"); + } + addl(edx, dest.high); + + // LOW(dest) = eax; + movl(eax, dest.low); +} + +void MacroAssembler::mul64(Imm64 imm, const Register64& dest, + const Register temp) { + // LOW32 = LOW(LOW(dest) * LOW(src)); (1) + // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits] (2) + // + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits] (3) + // + HIGH(LOW(dest) * LOW(src)) [carry] (4) + + MOZ_ASSERT(dest == Register64(edx, eax)); + MOZ_ASSERT(temp != edx && temp != eax); + + movl(dest.low, temp); + + // Compute mul64 + imull(imm.low(), dest.high); // (2) + imull(imm.hi(), temp); // (3) + addl(dest.high, temp); + movl(imm.low(), dest.high); + mull(dest.high /*, dest.low*/); // (4) + (1) output in edx:eax + // (dest_hi:dest_lo) + addl(temp, dest.high); +} + +void MacroAssembler::mul64(const Register64& src, const Register64& dest, + const Register temp) { + // LOW32 = LOW(LOW(dest) * LOW(src)); (1) + // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits] (2) + // + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits] (3) + // + HIGH(LOW(dest) * LOW(src)) [carry] (4) + + MOZ_ASSERT(dest == Register64(edx, eax)); + MOZ_ASSERT(src != Register64(edx, eax) && src != Register64(eax, edx)); + + // Make sure the rhs.high isn't the dest.high register anymore. + // This saves us from doing other register moves. + movl(dest.low, temp); + + // Compute mul64 + imull(src.low, dest.high); // (2) + imull(src.high, temp); // (3) + addl(dest.high, temp); + movl(src.low, dest.high); + mull(dest.high /*, dest.low*/); // (4) + (1) output in edx:eax + // (dest_hi:dest_lo) + addl(temp, dest.high); +} + +void MacroAssembler::mulBy3(Register src, Register dest) { + lea(Operand(src, src, TimesTwo), dest); +} + +void MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp, + FloatRegister dest) { + movl(imm, temp); + vmulsd(Operand(temp, 0), dest, dest); +} + +void MacroAssembler::inc64(AbsoluteAddress dest) { + addl(Imm32(1), Operand(dest)); + Label noOverflow; + j(NonZero, &noOverflow); + addl(Imm32(1), Operand(dest.offset(4))); + bind(&noOverflow); +} + +void MacroAssembler::neg64(Register64 reg) { + negl(reg.low); + adcl(Imm32(0), reg.high); + negl(reg.high); +} + +void MacroAssembler::negPtr(Register reg) { negl(reg); } + +// =============================================================== +// Shift functions + +void MacroAssembler::lshiftPtr(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 32); + shll(imm, dest); +} + +void MacroAssembler::lshiftPtr(Register shift, Register srcDest) { + if (HasBMI2()) { + shlxl(srcDest, shift, srcDest); + return; + } + MOZ_ASSERT(shift == ecx); + shll_cl(srcDest); +} + +void MacroAssembler::lshift64(Imm32 imm, Register64 dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + if (imm.value < 32) { + shldl(imm, dest.low, dest.high); + shll(imm, dest.low); + return; + } + + mov(dest.low, dest.high); + shll(Imm32(imm.value & 0x1f), dest.high); + xorl(dest.low, dest.low); +} + +void MacroAssembler::lshift64(Register shift, Register64 srcDest) { + MOZ_ASSERT(shift == ecx); + MOZ_ASSERT(srcDest.low != ecx && srcDest.high != ecx); + + Label done; + + shldl_cl(srcDest.low, srcDest.high); + shll_cl(srcDest.low); + + testl(Imm32(0x20), ecx); + j(Condition::Equal, &done); + + // 32 - 63 bit shift + movl(srcDest.low, srcDest.high); + xorl(srcDest.low, srcDest.low); + + bind(&done); +} + +void MacroAssembler::rshiftPtr(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 32); + shrl(imm, dest); +} + +void MacroAssembler::rshiftPtr(Register shift, Register srcDest) { + if (HasBMI2()) { + shrxl(srcDest, shift, srcDest); + return; + } + MOZ_ASSERT(shift == ecx); + shrl_cl(srcDest); +} + +void MacroAssembler::rshift64(Imm32 imm, Register64 dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + if (imm.value < 32) { + shrdl(imm, dest.high, dest.low); + shrl(imm, dest.high); + return; + } + + movl(dest.high, dest.low); + shrl(Imm32(imm.value & 0x1f), dest.low); + xorl(dest.high, dest.high); +} + +void MacroAssembler::rshift64(Register shift, Register64 srcDest) { + MOZ_ASSERT(shift == ecx); + MOZ_ASSERT(srcDest.low != ecx && srcDest.high != ecx); + + Label done; + + shrdl_cl(srcDest.high, srcDest.low); + shrl_cl(srcDest.high); + + testl(Imm32(0x20), ecx); + j(Condition::Equal, &done); + + // 32 - 63 bit shift + movl(srcDest.high, srcDest.low); + xorl(srcDest.high, srcDest.high); + + bind(&done); +} + +void MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 32); + sarl(imm, dest); +} + +void MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest) { + MOZ_ASSERT(0 <= imm.value && imm.value < 64); + if (imm.value < 32) { + shrdl(imm, dest.high, dest.low); + sarl(imm, dest.high); + return; + } + + movl(dest.high, dest.low); + sarl(Imm32(imm.value & 0x1f), dest.low); + sarl(Imm32(0x1f), dest.high); +} + +void MacroAssembler::rshift64Arithmetic(Register shift, Register64 srcDest) { + MOZ_ASSERT(shift == ecx); + MOZ_ASSERT(srcDest.low != ecx && srcDest.high != ecx); + + Label done; + + shrdl_cl(srcDest.high, srcDest.low); + sarl_cl(srcDest.high); + + testl(Imm32(0x20), ecx); + j(Condition::Equal, &done); + + // 32 - 63 bit shift + movl(srcDest.high, srcDest.low); + sarl(Imm32(0x1f), srcDest.high); + + bind(&done); +} + +// =============================================================== +// Rotation functions + +void MacroAssembler::rotateLeft64(Register count, Register64 src, + Register64 dest, Register temp) { + MOZ_ASSERT(src == dest, "defineReuseInput"); + MOZ_ASSERT(count == ecx, "defineFixed(ecx)"); + + Label done; + + movl(dest.high, temp); + shldl_cl(dest.low, dest.high); + shldl_cl(temp, dest.low); + + testl(Imm32(0x20), count); + j(Condition::Equal, &done); + xchgl(dest.high, dest.low); + + bind(&done); +} + +void MacroAssembler::rotateRight64(Register count, Register64 src, + Register64 dest, Register temp) { + MOZ_ASSERT(src == dest, "defineReuseInput"); + MOZ_ASSERT(count == ecx, "defineFixed(ecx)"); + + Label done; + + movl(dest.high, temp); + shrdl_cl(dest.low, dest.high); + shrdl_cl(temp, dest.low); + + testl(Imm32(0x20), count); + j(Condition::Equal, &done); + xchgl(dest.high, dest.low); + + bind(&done); +} + +void MacroAssembler::rotateLeft64(Imm32 count, Register64 src, Register64 dest, + Register temp) { + MOZ_ASSERT(src == dest, "defineReuseInput"); + + int32_t amount = count.value & 0x3f; + if ((amount & 0x1f) != 0) { + movl(dest.high, temp); + shldl(Imm32(amount & 0x1f), dest.low, dest.high); + shldl(Imm32(amount & 0x1f), temp, dest.low); + } + + if (!!(amount & 0x20)) { + xchgl(dest.high, dest.low); + } +} + +void MacroAssembler::rotateRight64(Imm32 count, Register64 src, Register64 dest, + Register temp) { + MOZ_ASSERT(src == dest, "defineReuseInput"); + + int32_t amount = count.value & 0x3f; + if ((amount & 0x1f) != 0) { + movl(dest.high, temp); + shrdl(Imm32(amount & 0x1f), dest.low, dest.high); + shrdl(Imm32(amount & 0x1f), temp, dest.low); + } + + if (!!(amount & 0x20)) { + xchgl(dest.high, dest.low); + } +} + +// =============================================================== +// Bit counting functions + +void MacroAssembler::clz64(Register64 src, Register dest) { + if (AssemblerX86Shared::HasLZCNT()) { + Label nonzero, zero; + + testl(src.high, src.high); + j(Assembler::Zero, &zero); + + lzcntl(src.high, dest); + jump(&nonzero); + + bind(&zero); + lzcntl(src.low, dest); + addl(Imm32(32), dest); + + bind(&nonzero); + return; + } + + // Because |dest| may be equal to |src.low|, we rely on BSR not modifying its + // output when the input is zero. AMD ISA documents BSR not modifying the + // output and current Intel CPUs follow AMD. + + Label nonzero, zero; + + bsrl(src.high, dest); + j(Assembler::Zero, &zero); + orl(Imm32(32), dest); + jump(&nonzero); + + bind(&zero); + bsrl(src.low, dest); + j(Assembler::NonZero, &nonzero); + movl(Imm32(0x7F), dest); + + bind(&nonzero); + xorl(Imm32(0x3F), dest); +} + +void MacroAssembler::ctz64(Register64 src, Register dest) { + if (AssemblerX86Shared::HasBMI1()) { + Label nonzero, zero; + + testl(src.low, src.low); + j(Assembler::Zero, &zero); + + tzcntl(src.low, dest); + jump(&nonzero); + + bind(&zero); + tzcntl(src.high, dest); + addl(Imm32(32), dest); + + bind(&nonzero); + return; + } + + // Because |dest| may be equal to |src.low|, we rely on BSF not modifying its + // output when the input is zero. AMD ISA documents BSF not modifying the + // output and current Intel CPUs follow AMD. + + Label done, nonzero; + + bsfl(src.low, dest); + j(Assembler::NonZero, &done); + bsfl(src.high, dest); + j(Assembler::NonZero, &nonzero); + movl(Imm32(64), dest); + jump(&done); + + bind(&nonzero); + orl(Imm32(32), dest); + + bind(&done); +} + +void MacroAssembler::popcnt64(Register64 src, Register64 dest, Register tmp) { + // The tmp register is only needed if there is no native POPCNT. + + MOZ_ASSERT(src.low != tmp && src.high != tmp); + MOZ_ASSERT(dest.low != tmp && dest.high != tmp); + + if (dest.low != src.high) { + popcnt32(src.low, dest.low, tmp); + popcnt32(src.high, dest.high, tmp); + } else { + MOZ_ASSERT(dest.high != src.high); + popcnt32(src.low, dest.high, tmp); + popcnt32(src.high, dest.low, tmp); + } + addl(dest.high, dest.low); + xorl(dest.high, dest.high); +} + +// =============================================================== +// Condition functions + +void MacroAssembler::cmp64Set(Condition cond, Address lhs, Imm64 rhs, + Register dest) { + Label success, done; + + branch64(cond, lhs, rhs, &success); + move32(Imm32(0), dest); + jump(&done); + bind(&success); + move32(Imm32(1), dest); + bind(&done); +} + +template <typename T1, typename T2> +void MacroAssembler::cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) { + cmpPtr(lhs, rhs); + emitSet(cond, dest); +} + +// =============================================================== +// Branch functions + +void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs, + Register rhs, Label* label) { + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs, + Imm32 rhs, Label* label) { + cmp32(Operand(lhs), rhs); + j(cond, label); +} + +void MacroAssembler::branch32(Condition cond, wasm::SymbolicAddress lhs, + Imm32 rhs, Label* label) { + cmpl(rhs, lhs); + j(cond, label); +} + +void MacroAssembler::branch64(Condition cond, Register64 lhs, Imm64 val, + Label* success, Label* fail) { + bool fallthrough = false; + Label fallthroughLabel; + + if (!fail) { + fail = &fallthroughLabel; + fallthrough = true; + } + + switch (cond) { + case Assembler::Equal: + branch32(Assembler::NotEqual, lhs.low, val.low(), fail); + branch32(Assembler::Equal, lhs.high, val.hi(), success); + if (!fallthrough) { + jump(fail); + } + break; + case Assembler::NotEqual: + branch32(Assembler::NotEqual, lhs.low, val.low(), success); + branch32(Assembler::NotEqual, lhs.high, val.hi(), success); + if (!fallthrough) { + jump(fail); + } + break; + case Assembler::LessThan: + case Assembler::LessThanOrEqual: + case Assembler::GreaterThan: + case Assembler::GreaterThanOrEqual: + case Assembler::Below: + case Assembler::BelowOrEqual: + case Assembler::Above: + case Assembler::AboveOrEqual: { + Assembler::Condition cond1 = Assembler::ConditionWithoutEqual(cond); + Assembler::Condition cond2 = + Assembler::ConditionWithoutEqual(Assembler::InvertCondition(cond)); + Assembler::Condition cond3 = Assembler::UnsignedCondition(cond); + + cmp32(lhs.high, val.hi()); + j(cond1, success); + j(cond2, fail); + cmp32(lhs.low, val.low()); + j(cond3, success); + if (!fallthrough) { + jump(fail); + } + break; + } + default: + MOZ_CRASH("Condition code not supported"); + break; + } + + if (fallthrough) { + bind(fail); + } +} + +void MacroAssembler::branch64(Condition cond, Register64 lhs, Register64 rhs, + Label* success, Label* fail) { + bool fallthrough = false; + Label fallthroughLabel; + + if (!fail) { + fail = &fallthroughLabel; + fallthrough = true; + } + + switch (cond) { + case Assembler::Equal: + branch32(Assembler::NotEqual, lhs.low, rhs.low, fail); + branch32(Assembler::Equal, lhs.high, rhs.high, success); + if (!fallthrough) { + jump(fail); + } + break; + case Assembler::NotEqual: + branch32(Assembler::NotEqual, lhs.low, rhs.low, success); + branch32(Assembler::NotEqual, lhs.high, rhs.high, success); + if (!fallthrough) { + jump(fail); + } + break; + case Assembler::LessThan: + case Assembler::LessThanOrEqual: + case Assembler::GreaterThan: + case Assembler::GreaterThanOrEqual: + case Assembler::Below: + case Assembler::BelowOrEqual: + case Assembler::Above: + case Assembler::AboveOrEqual: { + Assembler::Condition cond1 = Assembler::ConditionWithoutEqual(cond); + Assembler::Condition cond2 = + Assembler::ConditionWithoutEqual(Assembler::InvertCondition(cond)); + Assembler::Condition cond3 = Assembler::UnsignedCondition(cond); + + cmp32(lhs.high, rhs.high); + j(cond1, success); + j(cond2, fail); + cmp32(lhs.low, rhs.low); + j(cond3, success); + if (!fallthrough) { + jump(fail); + } + break; + } + default: + MOZ_CRASH("Condition code not supported"); + break; + } + + if (fallthrough) { + bind(fail); + } +} + +void MacroAssembler::branch64(Condition cond, const Address& lhs, Imm64 val, + Label* label) { + MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal, + "other condition codes not supported"); + + Label done; + + if (cond == Assembler::Equal) { + branch32(Assembler::NotEqual, lhs, val.firstHalf(), &done); + } else { + branch32(Assembler::NotEqual, lhs, val.firstHalf(), label); + } + branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)), + val.secondHalf(), label); + + bind(&done); +} + +void MacroAssembler::branch64(Condition cond, const Address& lhs, + Register64 rhs, Label* label) { + MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal, + "other condition codes not supported"); + + Label done; + + if (cond == Assembler::Equal) { + branch32(Assembler::NotEqual, lhs, rhs.low, &done); + } else { + branch32(Assembler::NotEqual, lhs, rhs.low, label); + } + branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)), rhs.high, + label); + + bind(&done); +} + +void MacroAssembler::branch64(Condition cond, const Address& lhs, + const Address& rhs, Register scratch, + Label* label) { + MOZ_ASSERT(cond == Assembler::NotEqual || cond == Assembler::Equal, + "other condition codes not supported"); + MOZ_ASSERT(lhs.base != scratch); + MOZ_ASSERT(rhs.base != scratch); + + Label done; + + load32(rhs, scratch); + if (cond == Assembler::Equal) { + branch32(Assembler::NotEqual, lhs, scratch, &done); + } else { + branch32(Assembler::NotEqual, lhs, scratch, label); + } + + load32(Address(rhs.base, rhs.offset + sizeof(uint32_t)), scratch); + branch32(cond, Address(lhs.base, lhs.offset + sizeof(uint32_t)), scratch, + label); + + bind(&done); +} + +void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs, + Register rhs, Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs, + ImmWord rhs, Label* label) { + branchPtrImpl(cond, lhs, rhs, label); +} + +void MacroAssembler::branchPtr(Condition cond, wasm::SymbolicAddress lhs, + Register rhs, Label* label) { + cmpl(rhs, lhs); + j(cond, label); +} + +void MacroAssembler::branchPrivatePtr(Condition cond, const Address& lhs, + Register rhs, Label* label) { + branchPtr(cond, lhs, rhs, label); +} + +void MacroAssembler::branchTruncateFloat32ToPtr(FloatRegister src, + Register dest, Label* fail) { + branchTruncateFloat32ToInt32(src, dest, fail); +} + +void MacroAssembler::branchTruncateFloat32MaybeModUint32(FloatRegister src, + Register dest, + Label* fail) { + branchTruncateFloat32ToInt32(src, dest, fail); +} + +void MacroAssembler::branchTruncateFloat32ToInt32(FloatRegister src, + Register dest, Label* fail) { + vcvttss2si(src, dest); + + // vcvttss2si returns 0x80000000 on failure. Test for it by + // subtracting 1 and testing overflow (this permits the use of a + // smaller immediate field). + cmp32(dest, Imm32(1)); + j(Assembler::Overflow, fail); +} + +void MacroAssembler::branchTruncateDoubleToPtr(FloatRegister src, Register dest, + Label* fail) { + branchTruncateDoubleToInt32(src, dest, fail); +} + +void MacroAssembler::branchTruncateDoubleMaybeModUint32(FloatRegister src, + Register dest, + Label* fail) { + // TODO: X64 supports supports integers up till 64bits. Here we only support + // 32bits, before failing. Implementing this for x86 might give a x86 kraken + // win. + branchTruncateDoubleToInt32(src, dest, fail); +} + +void MacroAssembler::branchTruncateDoubleToInt32(FloatRegister src, + Register dest, Label* fail) { + vcvttsd2si(src, dest); + + // vcvttsd2si returns 0x80000000 on failure. Test for it by + // subtracting 1 and testing overflow (this permits the use of a + // smaller immediate field). + cmp32(dest, Imm32(1)); + j(Assembler::Overflow, fail); +} + +void MacroAssembler::branchAdd64(Condition cond, Imm64 imm, Register64 dest, + Label* label) { + add64(imm, dest); + j(cond, label); +} + +void MacroAssembler::branchTest32(Condition cond, const AbsoluteAddress& lhs, + Imm32 rhs, Label* label) { + test32(Operand(lhs), rhs); + j(cond, label); +} + +template <class L> +void MacroAssembler::branchTest64(Condition cond, Register64 lhs, + Register64 rhs, Register temp, L label) { + if (cond == Assembler::Zero || cond == Assembler::NonZero) { + MOZ_ASSERT(lhs.low == rhs.low); + MOZ_ASSERT(lhs.high == rhs.high); + movl(lhs.low, temp); + orl(lhs.high, temp); + branchTestPtr(cond, temp, temp, label); + } else if (cond == Assembler::Signed || cond == Assembler::NotSigned) { + branchTest32(cond, lhs.high, rhs.high, label); + } else { + MOZ_CRASH("Unsupported condition"); + } +} + +void MacroAssembler::branchTestBooleanTruthy(bool truthy, + const ValueOperand& value, + Label* label) { + test32(value.payloadReg(), value.payloadReg()); + j(truthy ? NonZero : Zero, label); +} + +void MacroAssembler::branchTestMagic(Condition cond, const Address& valaddr, + JSWhyMagic why, Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + + Label notMagic; + if (cond == Assembler::Equal) { + branchTestMagic(Assembler::NotEqual, valaddr, ¬Magic); + } else { + branchTestMagic(Assembler::NotEqual, valaddr, label); + } + + branch32(cond, ToPayload(valaddr), Imm32(why), label); + bind(¬Magic); +} + +void MacroAssembler::branchTestValue(Condition cond, const BaseIndex& lhs, + const ValueOperand& rhs, Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + + Label notSameValue; + if (cond == Assembler::Equal) { + branch32(Assembler::NotEqual, ToType(lhs), rhs.typeReg(), ¬SameValue); + } else { + branch32(Assembler::NotEqual, ToType(lhs), rhs.typeReg(), label); + } + + branch32(cond, ToPayload(lhs), rhs.payloadReg(), label); + bind(¬SameValue); +} + +void MacroAssembler::branchToComputedAddress(const BaseIndex& addr) { + jmp(Operand(addr)); +} + +void MacroAssembler::cmp32MovePtr(Condition cond, Register lhs, Imm32 rhs, + Register src, Register dest) { + cmp32(lhs, rhs); + cmovCCl(cond, Operand(src), dest); +} + +void MacroAssembler::cmp32LoadPtr(Condition cond, const Address& lhs, Imm32 rhs, + const Address& src, Register dest) { + cmp32(lhs, rhs); + cmovCCl(cond, Operand(src), dest); +} + +void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, Register rhs, + Register src, Register dest) { + cmp32Move32(cond, lhs, rhs, src, dest); +} + +void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, + const Address& rhs, Register src, + Register dest) { + cmp32Move32(cond, lhs, rhs, src, dest); +} + +void MacroAssembler::test32LoadPtr(Condition cond, const Address& addr, + Imm32 mask, const Address& src, + Register dest) { + MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero); + test32(addr, mask); + cmovCCl(cond, Operand(src), dest); +} + +void MacroAssembler::test32MovePtr(Condition cond, const Address& addr, + Imm32 mask, Register src, Register dest) { + MOZ_ASSERT(cond == Assembler::Zero || cond == Assembler::NonZero); + test32(addr, mask); + cmovCCl(cond, Operand(src), dest); +} + +void MacroAssembler::spectreMovePtr(Condition cond, Register src, + Register dest) { + cmovCCl(cond, Operand(src), dest); +} + +void MacroAssembler::spectreBoundsCheck32(Register index, const Operand& length, + Register maybeScratch, + Label* failure) { + Label failurePopValue; + bool pushedValue = false; + if (JitOptions.spectreIndexMasking) { + if (maybeScratch == InvalidReg) { + push(Imm32(0)); + pushedValue = true; + } else { + move32(Imm32(0), maybeScratch); + } + } + + cmp32(index, length); + j(Assembler::AboveOrEqual, pushedValue ? &failurePopValue : failure); + + if (JitOptions.spectreIndexMasking) { + if (maybeScratch == InvalidReg) { + Label done; + cmovCCl(Assembler::AboveOrEqual, Operand(StackPointer, 0), index); + lea(Operand(StackPointer, sizeof(void*)), StackPointer); + jump(&done); + + bind(&failurePopValue); + lea(Operand(StackPointer, sizeof(void*)), StackPointer); + jump(failure); + + bind(&done); + } else { + cmovCCl(Assembler::AboveOrEqual, maybeScratch, index); + } + } +} + +void MacroAssembler::spectreBoundsCheck32(Register index, Register length, + Register maybeScratch, + Label* failure) { + MOZ_ASSERT(length != maybeScratch); + MOZ_ASSERT(index != maybeScratch); + + spectreBoundsCheck32(index, Operand(length), maybeScratch, failure); +} + +void MacroAssembler::spectreBoundsCheck32(Register index, const Address& length, + Register maybeScratch, + Label* failure) { + MOZ_ASSERT(index != length.base); + MOZ_ASSERT(length.base != maybeScratch); + MOZ_ASSERT(index != maybeScratch); + + spectreBoundsCheck32(index, Operand(length), maybeScratch, failure); +} + +void MacroAssembler::spectreBoundsCheckPtr(Register index, Register length, + Register maybeScratch, + Label* failure) { + spectreBoundsCheck32(index, length, maybeScratch, failure); +} + +void MacroAssembler::spectreBoundsCheckPtr(Register index, + const Address& length, + Register maybeScratch, + Label* failure) { + spectreBoundsCheck32(index, length, maybeScratch, failure); +} + +// ======================================================================== +// SIMD + +void MacroAssembler::extractLaneInt64x2(uint32_t lane, FloatRegister src, + Register64 dest) { + if (lane == 0) { + vmovd(src, dest.low); + } else { + vpextrd(2 * lane, src, dest.low); + } + vpextrd(2 * lane + 1, src, dest.high); +} + +void MacroAssembler::replaceLaneInt64x2(unsigned lane, Register64 rhs, + FloatRegister lhsDest) { + vpinsrd(2 * lane, rhs.low, lhsDest, lhsDest); + vpinsrd(2 * lane + 1, rhs.high, lhsDest, lhsDest); +} + +void MacroAssembler::replaceLaneInt64x2(unsigned lane, FloatRegister lhs, + Register64 rhs, FloatRegister dest) { + vpinsrd(2 * lane, rhs.low, lhs, dest); + vpinsrd(2 * lane + 1, rhs.high, dest, dest); +} + +void MacroAssembler::splatX2(Register64 src, FloatRegister dest) { + vmovd(src.low, dest); + vpinsrd(1, src.high, dest, dest); + vpunpcklqdq(dest, dest, dest); +} + +// ======================================================================== +// Truncate floating point. + +void MacroAssembler::truncateFloat32ToUInt64(Address src, Address dest, + Register temp, + FloatRegister floatTemp) { + Label done; + + loadFloat32(src, floatTemp); + + truncateFloat32ToInt64(src, dest, temp); + + // For unsigned conversion the case of [INT64, UINT64] needs to get handle + // seperately. + load32(HighWord(dest), temp); + branch32(Assembler::Condition::NotSigned, temp, Imm32(0), &done); + + // Move the value inside INT64 range. + storeFloat32(floatTemp, dest); + loadConstantFloat32(double(int64_t(0x8000000000000000)), floatTemp); + vaddss(Operand(dest), floatTemp, floatTemp); + storeFloat32(floatTemp, dest); + truncateFloat32ToInt64(dest, dest, temp); + + load32(HighWord(dest), temp); + orl(Imm32(0x80000000), temp); + store32(temp, HighWord(dest)); + + bind(&done); +} + +void MacroAssembler::truncateDoubleToUInt64(Address src, Address dest, + Register temp, + FloatRegister floatTemp) { + Label done; + + loadDouble(src, floatTemp); + + truncateDoubleToInt64(src, dest, temp); + + // For unsigned conversion the case of [INT64, UINT64] needs to get handle + // seperately. + load32(HighWord(dest), temp); + branch32(Assembler::Condition::NotSigned, temp, Imm32(0), &done); + + // Move the value inside INT64 range. + storeDouble(floatTemp, dest); + loadConstantDouble(double(int64_t(0x8000000000000000)), floatTemp); + vaddsd(Operand(dest), floatTemp, floatTemp); + storeDouble(floatTemp, dest); + truncateDoubleToInt64(dest, dest, temp); + + load32(HighWord(dest), temp); + orl(Imm32(0x80000000), temp); + store32(temp, HighWord(dest)); + + bind(&done); +} + +template <typename T> +void MacroAssemblerX86::fallibleUnboxPtrImpl(const T& src, Register dest, + JSValueType type, Label* fail) { + switch (type) { + case JSVAL_TYPE_OBJECT: + asMasm().branchTestObject(Assembler::NotEqual, src, fail); + break; + case JSVAL_TYPE_STRING: + asMasm().branchTestString(Assembler::NotEqual, src, fail); + break; + case JSVAL_TYPE_SYMBOL: + asMasm().branchTestSymbol(Assembler::NotEqual, src, fail); + break; + case JSVAL_TYPE_BIGINT: + asMasm().branchTestBigInt(Assembler::NotEqual, src, fail); + break; + default: + MOZ_CRASH("Unexpected type"); + } + unboxNonDouble(src, dest, type); +} + +void MacroAssembler::fallibleUnboxPtr(const ValueOperand& src, Register dest, + JSValueType type, Label* fail) { + fallibleUnboxPtrImpl(src, dest, type, fail); +} + +void MacroAssembler::fallibleUnboxPtr(const Address& src, Register dest, + JSValueType type, Label* fail) { + fallibleUnboxPtrImpl(src, dest, type, fail); +} + +void MacroAssembler::fallibleUnboxPtr(const BaseIndex& src, Register dest, + JSValueType type, Label* fail) { + fallibleUnboxPtrImpl(src, dest, type, fail); +} + +//}}} check_macroassembler_style +// =============================================================== + +// Note: this function clobbers the source register. +void MacroAssemblerX86::convertUInt32ToDouble(Register src, + FloatRegister dest) { + // src is [0, 2^32-1] + subl(Imm32(0x80000000), src); + + // Now src is [-2^31, 2^31-1] - int range, but not the same value. + convertInt32ToDouble(src, dest); + + // dest is now a double with the int range. + // correct the double value by adding 0x80000000. + asMasm().addConstantDouble(2147483648.0, dest); +} + +// Note: this function clobbers the source register. +void MacroAssemblerX86::convertUInt32ToFloat32(Register src, + FloatRegister dest) { + convertUInt32ToDouble(src, dest); + convertDoubleToFloat32(dest, dest); +} + +void MacroAssemblerX86::unboxValue(const ValueOperand& src, AnyRegister dest, + JSValueType) { + if (dest.isFloat()) { + Label notInt32, end; + asMasm().branchTestInt32(Assembler::NotEqual, src, ¬Int32); + convertInt32ToDouble(src.payloadReg(), dest.fpu()); + jump(&end); + bind(¬Int32); + unboxDouble(src, dest.fpu()); + bind(&end); + } else { + if (src.payloadReg() != dest.gpr()) { + movl(src.payloadReg(), dest.gpr()); + } + } +} + +template <typename T> +void MacroAssemblerX86::loadInt32OrDouble(const T& src, FloatRegister dest) { + Label notInt32, end; + asMasm().branchTestInt32(Assembler::NotEqual, src, ¬Int32); + convertInt32ToDouble(ToPayload(src), dest); + jump(&end); + bind(¬Int32); + loadDouble(src, dest); + bind(&end); +} + +template <typename T> +void MacroAssemblerX86::loadUnboxedValue(const T& src, MIRType type, + AnyRegister dest) { + if (dest.isFloat()) { + loadInt32OrDouble(src, dest.fpu()); + } else { + movl(Operand(src), dest.gpr()); + } +} + +// If source is a double, load it into dest. If source is int32, +// convert it to double. Else, branch to failure. +void MacroAssemblerX86::ensureDouble(const ValueOperand& source, + FloatRegister dest, Label* failure) { + Label isDouble, done; + asMasm().branchTestDouble(Assembler::Equal, source.typeReg(), &isDouble); + asMasm().branchTestInt32(Assembler::NotEqual, source.typeReg(), failure); + + convertInt32ToDouble(source.payloadReg(), dest); + jump(&done); + + bind(&isDouble); + unboxDouble(source, dest); + + bind(&done); +} + +} // namespace jit +} // namespace js + +#endif /* jit_x86_MacroAssembler_x86_inl_h */ diff --git a/js/src/jit/x86/MacroAssembler-x86.cpp b/js/src/jit/x86/MacroAssembler-x86.cpp new file mode 100644 index 0000000000..c9d8acbd4d --- /dev/null +++ b/js/src/jit/x86/MacroAssembler-x86.cpp @@ -0,0 +1,1829 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "jit/x86/MacroAssembler-x86.h" + +#include "mozilla/Alignment.h" +#include "mozilla/Casting.h" + +#include "jit/AtomicOp.h" +#include "jit/Bailouts.h" +#include "jit/BaselineFrame.h" +#include "jit/JitFrames.h" +#include "jit/JitRuntime.h" +#include "jit/MacroAssembler.h" +#include "jit/MoveEmitter.h" +#include "util/Memory.h" +#include "vm/BigIntType.h" +#include "vm/JitActivation.h" // js::jit::JitActivation +#include "vm/JSContext.h" +#include "vm/StringType.h" + +#include "jit/MacroAssembler-inl.h" +#include "vm/JSScript-inl.h" + +using namespace js; +using namespace js::jit; + +void MacroAssemblerX86::loadConstantDouble(double d, FloatRegister dest) { + if (maybeInlineDouble(d, dest)) { + return; + } + Double* dbl = getDouble(d); + if (!dbl) { + return; + } + masm.vmovsd_mr(nullptr, dest.encoding()); + propagateOOM(dbl->uses.append(CodeOffset(masm.size()))); +} + +void MacroAssemblerX86::loadConstantFloat32(float f, FloatRegister dest) { + if (maybeInlineFloat(f, dest)) { + return; + } + Float* flt = getFloat(f); + if (!flt) { + return; + } + masm.vmovss_mr(nullptr, dest.encoding()); + propagateOOM(flt->uses.append(CodeOffset(masm.size()))); +} + +void MacroAssemblerX86::loadConstantSimd128Int(const SimdConstant& v, + FloatRegister dest) { + if (maybeInlineSimd128Int(v, dest)) { + return; + } + SimdData* i4 = getSimdData(v); + if (!i4) { + return; + } + masm.vmovdqa_mr(nullptr, dest.encoding()); + propagateOOM(i4->uses.append(CodeOffset(masm.size()))); +} + +void MacroAssemblerX86::loadConstantSimd128Float(const SimdConstant& v, + FloatRegister dest) { + if (maybeInlineSimd128Float(v, dest)) { + return; + } + SimdData* f4 = getSimdData(v); + if (!f4) { + return; + } + masm.vmovaps_mr(nullptr, dest.encoding()); + propagateOOM(f4->uses.append(CodeOffset(masm.size()))); +} + +void MacroAssemblerX86::vpPatchOpSimd128( + const SimdConstant& v, FloatRegister src, FloatRegister dest, + void (X86Encoding::BaseAssemblerX86::*op)( + const void* address, X86Encoding::XMMRegisterID srcId, + X86Encoding::XMMRegisterID destId)) { + SimdData* val = getSimdData(v); + if (!val) { + return; + } + (masm.*op)(nullptr, src.encoding(), dest.encoding()); + propagateOOM(val->uses.append(CodeOffset(masm.size()))); +} + +void MacroAssemblerX86::vpPatchOpSimd128( + const SimdConstant& v, FloatRegister src, FloatRegister dest, + size_t (X86Encoding::BaseAssemblerX86::*op)( + const void* address, X86Encoding::XMMRegisterID srcId, + X86Encoding::XMMRegisterID destId)) { + SimdData* val = getSimdData(v); + if (!val) { + return; + } + size_t patchOffsetFromEnd = + (masm.*op)(nullptr, src.encoding(), dest.encoding()); + propagateOOM(val->uses.append(CodeOffset(masm.size() - patchOffsetFromEnd))); +} + +void MacroAssemblerX86::vpaddbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddb_mr); +} + +void MacroAssemblerX86::vpaddwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddw_mr); +} + +void MacroAssemblerX86::vpadddSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddd_mr); +} + +void MacroAssemblerX86::vpaddqSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddq_mr); +} + +void MacroAssemblerX86::vpsubbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubb_mr); +} + +void MacroAssemblerX86::vpsubwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubw_mr); +} + +void MacroAssemblerX86::vpsubdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubd_mr); +} + +void MacroAssemblerX86::vpsubqSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubq_mr); +} + +void MacroAssemblerX86::vpmullwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmullw_mr); +} + +void MacroAssemblerX86::vpmulldSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmulld_mr); +} + +void MacroAssemblerX86::vpaddsbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddsb_mr); +} + +void MacroAssemblerX86::vpaddusbSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddusb_mr); +} + +void MacroAssemblerX86::vpaddswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddsw_mr); +} + +void MacroAssemblerX86::vpadduswSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddusw_mr); +} + +void MacroAssemblerX86::vpsubsbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubsb_mr); +} + +void MacroAssemblerX86::vpsubusbSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubusb_mr); +} + +void MacroAssemblerX86::vpsubswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubsw_mr); +} + +void MacroAssemblerX86::vpsubuswSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubusw_mr); +} + +void MacroAssemblerX86::vpminsbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminsb_mr); +} + +void MacroAssemblerX86::vpminubSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminub_mr); +} + +void MacroAssemblerX86::vpminswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminsw_mr); +} + +void MacroAssemblerX86::vpminuwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminuw_mr); +} + +void MacroAssemblerX86::vpminsdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminsd_mr); +} + +void MacroAssemblerX86::vpminudSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminud_mr); +} + +void MacroAssemblerX86::vpmaxsbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxsb_mr); +} + +void MacroAssemblerX86::vpmaxubSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxub_mr); +} + +void MacroAssemblerX86::vpmaxswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxsw_mr); +} + +void MacroAssemblerX86::vpmaxuwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxuw_mr); +} + +void MacroAssemblerX86::vpmaxsdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxsd_mr); +} + +void MacroAssemblerX86::vpmaxudSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxud_mr); +} + +void MacroAssemblerX86::vpandSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpand_mr); +} + +void MacroAssemblerX86::vpxorSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpxor_mr); +} + +void MacroAssemblerX86::vporSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpor_mr); +} + +void MacroAssemblerX86::vaddpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vaddps_mr); +} + +void MacroAssemblerX86::vaddpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vaddpd_mr); +} + +void MacroAssemblerX86::vsubpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vsubps_mr); +} + +void MacroAssemblerX86::vsubpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vsubpd_mr); +} + +void MacroAssemblerX86::vdivpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vdivps_mr); +} + +void MacroAssemblerX86::vdivpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vdivpd_mr); +} + +void MacroAssemblerX86::vmulpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vmulps_mr); +} + +void MacroAssemblerX86::vmulpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vmulpd_mr); +} + +void MacroAssemblerX86::vandpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vandpd_mr); +} + +void MacroAssemblerX86::vminpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vminpd_mr); +} + +void MacroAssemblerX86::vpacksswbSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpacksswb_mr); +} + +void MacroAssemblerX86::vpackuswbSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpackuswb_mr); +} + +void MacroAssemblerX86::vpackssdwSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpackssdw_mr); +} + +void MacroAssemblerX86::vpackusdwSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpackusdw_mr); +} + +void MacroAssemblerX86::vpunpckldqSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpunpckldq_mr); +} + +void MacroAssemblerX86::vunpcklpsSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vunpcklps_mr); +} + +void MacroAssemblerX86::vpshufbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpshufb_mr); +} + +void MacroAssemblerX86::vptestSimd128(const SimdConstant& v, + FloatRegister lhs) { + vpPatchOpSimd128(v, lhs, &X86Encoding::BaseAssemblerX86::vptest_mr); +} + +void MacroAssemblerX86::vpmaddwdSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaddwd_mr); +} + +void MacroAssemblerX86::vpcmpeqbSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpeqb_mr); +} + +void MacroAssemblerX86::vpcmpgtbSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpgtb_mr); +} + +void MacroAssemblerX86::vpcmpeqwSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpeqw_mr); +} + +void MacroAssemblerX86::vpcmpgtwSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpgtw_mr); +} + +void MacroAssemblerX86::vpcmpeqdSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpeqd_mr); +} + +void MacroAssemblerX86::vpcmpgtdSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpgtd_mr); +} + +void MacroAssemblerX86::vcmpeqpsSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpeqps_mr); +} + +void MacroAssemblerX86::vcmpneqpsSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpneqps_mr); +} + +void MacroAssemblerX86::vcmpltpsSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpltps_mr); +} + +void MacroAssemblerX86::vcmplepsSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpleps_mr); +} + +void MacroAssemblerX86::vcmpgepsSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpgeps_mr); +} + +void MacroAssemblerX86::vcmpeqpdSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpeqpd_mr); +} + +void MacroAssemblerX86::vcmpneqpdSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpneqpd_mr); +} + +void MacroAssemblerX86::vcmpltpdSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpltpd_mr); +} + +void MacroAssemblerX86::vcmplepdSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmplepd_mr); +} + +void MacroAssemblerX86::vpmaddubswSimd128(const SimdConstant& v, + FloatRegister lhs, + FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaddubsw_mr); +} + +void MacroAssemblerX86::vpmuludqSimd128(const SimdConstant& v, + FloatRegister lhs, FloatRegister dest) { + vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmuludq_mr); +} + +void MacroAssemblerX86::finish() { + // Last instruction may be an indirect jump so eagerly insert an undefined + // instruction byte to prevent processors from decoding data values into + // their pipelines. See Intel performance guides. + masm.ud2(); + + if (!doubles_.empty()) { + masm.haltingAlign(sizeof(double)); + } + for (const Double& d : doubles_) { + CodeOffset cst(masm.currentOffset()); + for (CodeOffset use : d.uses) { + addCodeLabel(CodeLabel(use, cst)); + } + masm.doubleConstant(d.value); + if (!enoughMemory_) { + return; + } + } + + if (!floats_.empty()) { + masm.haltingAlign(sizeof(float)); + } + for (const Float& f : floats_) { + CodeOffset cst(masm.currentOffset()); + for (CodeOffset use : f.uses) { + addCodeLabel(CodeLabel(use, cst)); + } + masm.floatConstant(f.value); + if (!enoughMemory_) { + return; + } + } + + // SIMD memory values must be suitably aligned. + if (!simds_.empty()) { + masm.haltingAlign(SimdMemoryAlignment); + } + for (const SimdData& v : simds_) { + CodeOffset cst(masm.currentOffset()); + for (CodeOffset use : v.uses) { + addCodeLabel(CodeLabel(use, cst)); + } + masm.simd128Constant(v.value.bytes()); + if (!enoughMemory_) { + return; + } + } +} + +void MacroAssemblerX86::handleFailureWithHandlerTail(Label* profilerExitTail, + Label* bailoutTail) { + // Reserve space for exception information. + subl(Imm32(sizeof(ResumeFromException)), esp); + movl(esp, eax); + + // Call the handler. + using Fn = void (*)(ResumeFromException * rfe); + asMasm().setupUnalignedABICall(ecx); + asMasm().passABIArg(eax); + asMasm().callWithABI<Fn, HandleException>( + MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame); + + Label entryFrame; + Label catch_; + Label finally; + Label returnBaseline; + Label returnIon; + Label bailout; + Label wasm; + Label wasmCatch; + + loadPtr(Address(esp, ResumeFromException::offsetOfKind()), eax); + asMasm().branch32(Assembler::Equal, eax, + Imm32(ExceptionResumeKind::EntryFrame), &entryFrame); + asMasm().branch32(Assembler::Equal, eax, Imm32(ExceptionResumeKind::Catch), + &catch_); + asMasm().branch32(Assembler::Equal, eax, Imm32(ExceptionResumeKind::Finally), + &finally); + asMasm().branch32(Assembler::Equal, eax, + Imm32(ExceptionResumeKind::ForcedReturnBaseline), + &returnBaseline); + asMasm().branch32(Assembler::Equal, eax, + Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon); + asMasm().branch32(Assembler::Equal, eax, Imm32(ExceptionResumeKind::Bailout), + &bailout); + asMasm().branch32(Assembler::Equal, eax, Imm32(ExceptionResumeKind::Wasm), + &wasm); + asMasm().branch32(Assembler::Equal, eax, + Imm32(ExceptionResumeKind::WasmCatch), &wasmCatch); + + breakpoint(); // Invalid kind. + + // No exception handler. Load the error value, restore state and return from + // the entry frame. + bind(&entryFrame); + asMasm().moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand); + loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp); + loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp); + ret(); + + // If we found a catch handler, this must be a baseline frame. Restore state + // and jump to the catch block. + bind(&catch_); + loadPtr(Address(esp, ResumeFromException::offsetOfTarget()), eax); + loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp); + loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp); + jmp(Operand(eax)); + + // If we found a finally block, this must be a baseline frame. Push two + // values expected by the finally block: the exception and BooleanValue(true). + bind(&finally); + ValueOperand exception = ValueOperand(ecx, edx); + loadValue(Address(esp, ResumeFromException::offsetOfException()), exception); + + loadPtr(Address(esp, ResumeFromException::offsetOfTarget()), eax); + loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp); + loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp); + + pushValue(exception); + pushValue(BooleanValue(true)); + jmp(Operand(eax)); + + // Return BaselineFrame->returnValue() to the caller. + // Used in debug mode and for GeneratorReturn. + Label profilingInstrumentation; + bind(&returnBaseline); + loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp); + loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp); + loadValue(Address(ebp, BaselineFrame::reverseOffsetOfReturnValue()), + JSReturnOperand); + jump(&profilingInstrumentation); + + // Return the given value to the caller. + bind(&returnIon); + loadValue(Address(esp, ResumeFromException::offsetOfException()), + JSReturnOperand); + loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp); + loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp); + + // If profiling is enabled, then update the lastProfilingFrame to refer to + // caller frame before returning. This code is shared by ForcedReturnIon + // and ForcedReturnBaseline. + bind(&profilingInstrumentation); + { + Label skipProfilingInstrumentation; + // Test if profiler enabled. + AbsoluteAddress addressOfEnabled( + asMasm().runtime()->geckoProfiler().addressOfEnabled()); + asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0), + &skipProfilingInstrumentation); + jump(profilerExitTail); + bind(&skipProfilingInstrumentation); + } + + movl(ebp, esp); + pop(ebp); + ret(); + + // If we are bailing out to baseline to handle an exception, jump to the + // bailout tail stub. Load 1 (true) in ReturnReg to indicate success. + bind(&bailout); + loadPtr(Address(esp, ResumeFromException::offsetOfBailoutInfo()), ecx); + loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp); + move32(Imm32(1), ReturnReg); + jump(bailoutTail); + + // If we are throwing and the innermost frame was a wasm frame, reset SP and + // FP; SP is pointing to the unwound return address to the wasm entry, so + // we can just ret(). + bind(&wasm); + loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp); + loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp); + movePtr(ImmPtr((const void*)wasm::FailInstanceReg), InstanceReg); + masm.ret(); + + // Found a wasm catch handler, restore state and jump to it. + bind(&wasmCatch); + loadPtr(Address(esp, ResumeFromException::offsetOfTarget()), eax); + loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp); + loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp); + jmp(Operand(eax)); +} + +void MacroAssemblerX86::profilerEnterFrame(Register framePtr, + Register scratch) { + asMasm().loadJSContext(scratch); + loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch); + storePtr(framePtr, + Address(scratch, JitActivation::offsetOfLastProfilingFrame())); + storePtr(ImmPtr(nullptr), + Address(scratch, JitActivation::offsetOfLastProfilingCallSite())); +} + +void MacroAssemblerX86::profilerExitFrame() { + jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail()); +} + +Assembler::Condition MacroAssemblerX86::testStringTruthy( + bool truthy, const ValueOperand& value) { + Register string = value.payloadReg(); + cmp32(Operand(string, JSString::offsetOfLength()), Imm32(0)); + return truthy ? Assembler::NotEqual : Assembler::Equal; +} + +Assembler::Condition MacroAssemblerX86::testBigIntTruthy( + bool truthy, const ValueOperand& value) { + Register bi = value.payloadReg(); + cmp32(Operand(bi, JS::BigInt::offsetOfDigitLength()), Imm32(0)); + return truthy ? Assembler::NotEqual : Assembler::Equal; +} + +MacroAssembler& MacroAssemblerX86::asMasm() { + return *static_cast<MacroAssembler*>(this); +} + +const MacroAssembler& MacroAssemblerX86::asMasm() const { + return *static_cast<const MacroAssembler*>(this); +} + +void MacroAssembler::subFromStackPtr(Imm32 imm32) { + if (imm32.value) { + // On windows, we cannot skip very far down the stack without touching the + // memory pages in-between. This is a corner-case code for situations where + // the Ion frame data for a piece of code is very large. To handle this + // special case, for frames over 4k in size we allocate memory on the stack + // incrementally, touching it as we go. + // + // When the amount is quite large, which it can be, we emit an actual loop, + // in order to keep the function prologue compact. Compactness is a + // requirement for eg Wasm's CodeRange data structure, which can encode only + // 8-bit offsets. + uint32_t amountLeft = imm32.value; + uint32_t fullPages = amountLeft / 4096; + if (fullPages <= 8) { + while (amountLeft > 4096) { + subl(Imm32(4096), StackPointer); + store32(Imm32(0), Address(StackPointer, 0)); + amountLeft -= 4096; + } + subl(Imm32(amountLeft), StackPointer); + } else { + // Save scratch register. + push(eax); + amountLeft -= 4; + fullPages = amountLeft / 4096; + + Label top; + move32(Imm32(fullPages), eax); + bind(&top); + subl(Imm32(4096), StackPointer); + store32(Imm32(0), Address(StackPointer, 0)); + subl(Imm32(1), eax); + j(Assembler::NonZero, &top); + amountLeft -= fullPages * 4096; + if (amountLeft) { + subl(Imm32(amountLeft), StackPointer); + } + + // Restore scratch register. + movl(Operand(StackPointer, uint32_t(imm32.value) - 4), eax); + } + } +} + +//{{{ check_macroassembler_style +// =============================================================== +// ABI function calls. + +void MacroAssembler::setupUnalignedABICall(Register scratch) { + setupNativeABICall(); + dynamicAlignment_ = true; + + movl(esp, scratch); + andl(Imm32(~(ABIStackAlignment - 1)), esp); + push(scratch); +} + +void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) { + MOZ_ASSERT(inCall_); + uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar(); + + if (dynamicAlignment_) { + // sizeof(intptr_t) accounts for the saved stack pointer pushed by + // setupUnalignedABICall. + stackForCall += ComputeByteAlignment(stackForCall + sizeof(intptr_t), + ABIStackAlignment); + } else { + uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0; + stackForCall += ComputeByteAlignment( + stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment); + } + + *stackAdjust = stackForCall; + reserveStack(stackForCall); + + // Position all arguments. + { + enoughMemory_ &= moveResolver_.resolve(); + if (!enoughMemory_) { + return; + } + + MoveEmitter emitter(*this); + emitter.emit(moveResolver_); + emitter.finish(); + } + + assertStackAlignment(ABIStackAlignment); +} + +void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result, + bool callFromWasm) { + freeStack(stackAdjust); + + // Calls to native functions in wasm pass through a thunk which already + // fixes up the return value for us. + if (!callFromWasm) { + if (result == MoveOp::DOUBLE) { + reserveStack(sizeof(double)); + fstp(Operand(esp, 0)); + loadDouble(Operand(esp, 0), ReturnDoubleReg); + freeStack(sizeof(double)); + } else if (result == MoveOp::FLOAT32) { + reserveStack(sizeof(float)); + fstp32(Operand(esp, 0)); + loadFloat32(Operand(esp, 0), ReturnFloat32Reg); + freeStack(sizeof(float)); + } + } + + if (dynamicAlignment_) { + pop(esp); + } + +#ifdef DEBUG + MOZ_ASSERT(inCall_); + inCall_ = false; +#endif +} + +void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) { + uint32_t stackAdjust; + callWithABIPre(&stackAdjust); + call(fun); + callWithABIPost(stackAdjust, result); +} + +void MacroAssembler::callWithABINoProfiler(const Address& fun, + MoveOp::Type result) { + uint32_t stackAdjust; + callWithABIPre(&stackAdjust); + call(fun); + callWithABIPost(stackAdjust, result); +} + +// =============================================================== +// Move instructions + +void MacroAssembler::moveValue(const TypedOrValueRegister& src, + const ValueOperand& dest) { + if (src.hasValue()) { + moveValue(src.valueReg(), dest); + return; + } + + MIRType type = src.type(); + AnyRegister reg = src.typedReg(); + + if (!IsFloatingPointType(type)) { + if (reg.gpr() != dest.payloadReg()) { + movl(reg.gpr(), dest.payloadReg()); + } + mov(ImmWord(MIRTypeToTag(type)), dest.typeReg()); + return; + } + + ScratchDoubleScope scratch(*this); + FloatRegister freg = reg.fpu(); + if (type == MIRType::Float32) { + convertFloat32ToDouble(freg, scratch); + freg = scratch; + } + boxDouble(freg, dest, scratch); +} + +void MacroAssembler::moveValue(const ValueOperand& src, + const ValueOperand& dest) { + Register s0 = src.typeReg(); + Register s1 = src.payloadReg(); + Register d0 = dest.typeReg(); + Register d1 = dest.payloadReg(); + + // Either one or both of the source registers could be the same as a + // destination register. + if (s1 == d0) { + if (s0 == d1) { + // If both are, this is just a swap of two registers. + xchgl(d0, d1); + return; + } + // If only one is, copy that source first. + std::swap(s0, s1); + std::swap(d0, d1); + } + + if (s0 != d0) { + movl(s0, d0); + } + if (s1 != d1) { + movl(s1, d1); + } +} + +void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) { + movl(Imm32(src.toNunboxTag()), dest.typeReg()); + if (src.isGCThing()) { + movl(ImmGCPtr(src.toGCThing()), dest.payloadReg()); + } else { + movl(Imm32(src.toNunboxPayload()), dest.payloadReg()); + } +} + +// =============================================================== +// Branch functions + +void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) { + if (ptr != buffer) { + movePtr(ptr, buffer); + } + andPtr(Imm32(~gc::ChunkMask), buffer); + loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer); +} + +void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr, + Register temp, Label* label) { + MOZ_ASSERT(temp != InvalidReg); // A temp register is required for x86. + MOZ_ASSERT(ptr != temp); + movePtr(ptr, temp); + branchPtrInNurseryChunkImpl(cond, temp, label); +} + +void MacroAssembler::branchPtrInNurseryChunk(Condition cond, + const Address& address, + Register temp, Label* label) { + MOZ_ASSERT(temp != InvalidReg); // A temp register is required for x86. + loadPtr(address, temp); + branchPtrInNurseryChunkImpl(cond, temp, label); +} + +void MacroAssembler::branchPtrInNurseryChunkImpl(Condition cond, Register ptr, + Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + + andPtr(Imm32(~gc::ChunkMask), ptr); + branchPtr(InvertCondition(cond), Address(ptr, gc::ChunkStoreBufferOffset), + ImmWord(0), label); +} + +void MacroAssembler::branchValueIsNurseryCell(Condition cond, + const Address& address, + Register temp, Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + + Label done; + + branchTestGCThing(Assembler::NotEqual, address, + cond == Assembler::Equal ? &done : label); + branchPtrInNurseryChunk(cond, ToPayload(address), temp, label); + + bind(&done); +} + +void MacroAssembler::branchValueIsNurseryCell(Condition cond, + ValueOperand value, Register temp, + Label* label) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + + Label done; + + branchTestGCThing(Assembler::NotEqual, value, + cond == Assembler::Equal ? &done : label); + branchPtrInNurseryChunk(cond, value.payloadReg(), temp, label); + + bind(&done); +} + +void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs, + const Value& rhs, Label* label) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + if (rhs.isGCThing()) { + cmpPtr(lhs.payloadReg(), ImmGCPtr(rhs.toGCThing())); + } else { + cmpPtr(lhs.payloadReg(), ImmWord(rhs.toNunboxPayload())); + } + + if (cond == Equal) { + Label done; + j(NotEqual, &done); + { + cmp32(lhs.typeReg(), Imm32(rhs.toNunboxTag())); + j(Equal, label); + } + bind(&done); + } else { + j(NotEqual, label); + + cmp32(lhs.typeReg(), Imm32(rhs.toNunboxTag())); + j(NotEqual, label); + } +} + +// ======================================================================== +// Memory access primitives. +template <typename T> +void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, + MIRType valueType, const T& dest) { + MOZ_ASSERT(valueType < MIRType::Value); + + if (valueType == MIRType::Double) { + storeDouble(value.reg().typedReg().fpu(), dest); + return; + } + + // Store the type tag. + storeTypeTag(ImmType(ValueTypeFromMIRType(valueType)), Operand(dest)); + + // Store the payload. + if (value.constant()) { + storePayload(value.value(), Operand(dest)); + } else { + storePayload(value.reg().typedReg().gpr(), Operand(dest)); + } +} + +template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, + MIRType valueType, + const Address& dest); +template void MacroAssembler::storeUnboxedValue( + const ConstantOrRegister& value, MIRType valueType, + const BaseObjectElementIndex& dest); + +// wasm specific methods, used in both the wasm baseline compiler and ion. + +void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access, + Operand srcAddr, AnyRegister out) { + MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP || + srcAddr.kind() == Operand::MEM_SCALE); + + MOZ_ASSERT_IF( + access.isZeroExtendSimd128Load(), + access.type() == Scalar::Float32 || access.type() == Scalar::Float64); + MOZ_ASSERT_IF( + access.isSplatSimd128Load(), + access.type() == Scalar::Uint8 || access.type() == Scalar::Uint16 || + access.type() == Scalar::Float32 || access.type() == Scalar::Float64); + MOZ_ASSERT_IF(access.isWidenSimd128Load(), access.type() == Scalar::Float64); + + // NOTE: the generated code must match the assembly code in gen_load in + // GenerateAtomicOperations.py + memoryBarrierBefore(access.sync()); + + append(access, size()); + switch (access.type()) { + case Scalar::Int8: + movsbl(srcAddr, out.gpr()); + break; + case Scalar::Uint8: + if (access.isSplatSimd128Load()) { + vbroadcastb(srcAddr, out.fpu()); + } else { + movzbl(srcAddr, out.gpr()); + } + break; + case Scalar::Int16: + movswl(srcAddr, out.gpr()); + break; + case Scalar::Uint16: + if (access.isSplatSimd128Load()) { + vbroadcastw(srcAddr, out.fpu()); + } else { + movzwl(srcAddr, out.gpr()); + } + break; + case Scalar::Int32: + case Scalar::Uint32: + movl(srcAddr, out.gpr()); + break; + case Scalar::Float32: + if (access.isSplatSimd128Load()) { + vbroadcastss(srcAddr, out.fpu()); + } else { + // vmovss does the right thing also for access.isZeroExtendSimd128Load() + vmovss(srcAddr, out.fpu()); + } + break; + case Scalar::Float64: + if (access.isSplatSimd128Load()) { + vmovddup(srcAddr, out.fpu()); + } else if (access.isWidenSimd128Load()) { + switch (access.widenSimdOp()) { + case wasm::SimdOp::V128Load8x8S: + vpmovsxbw(srcAddr, out.fpu()); + break; + case wasm::SimdOp::V128Load8x8U: + vpmovzxbw(srcAddr, out.fpu()); + break; + case wasm::SimdOp::V128Load16x4S: + vpmovsxwd(srcAddr, out.fpu()); + break; + case wasm::SimdOp::V128Load16x4U: + vpmovzxwd(srcAddr, out.fpu()); + break; + case wasm::SimdOp::V128Load32x2S: + vpmovsxdq(srcAddr, out.fpu()); + break; + case wasm::SimdOp::V128Load32x2U: + vpmovzxdq(srcAddr, out.fpu()); + break; + default: + MOZ_CRASH("Unexpected widening op for wasmLoad"); + } + } else { + // vmovsd does the right thing also for access.isZeroExtendSimd128Load() + vmovsd(srcAddr, out.fpu()); + } + break; + case Scalar::Simd128: + vmovups(srcAddr, out.fpu()); + break; + case Scalar::Int64: + case Scalar::Uint8Clamped: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected type"); + } + + memoryBarrierAfter(access.sync()); +} + +void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, + Operand srcAddr, Register64 out) { + // Atomic i64 load must use lock_cmpxchg8b. + MOZ_ASSERT_IF(access.isAtomic(), access.byteSize() <= 4); + MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP || + srcAddr.kind() == Operand::MEM_SCALE); + MOZ_ASSERT(!access.isZeroExtendSimd128Load()); // Use wasmLoad() + MOZ_ASSERT(!access.isSplatSimd128Load()); // Use wasmLoad() + MOZ_ASSERT(!access.isWidenSimd128Load()); // Use wasmLoad() + + memoryBarrierBefore(access.sync()); + + append(access, size()); + switch (access.type()) { + case Scalar::Int8: + MOZ_ASSERT(out == Register64(edx, eax)); + movsbl(srcAddr, out.low); + + cdq(); + break; + case Scalar::Uint8: + movzbl(srcAddr, out.low); + + xorl(out.high, out.high); + break; + case Scalar::Int16: + MOZ_ASSERT(out == Register64(edx, eax)); + movswl(srcAddr, out.low); + + cdq(); + break; + case Scalar::Uint16: + movzwl(srcAddr, out.low); + + xorl(out.high, out.high); + break; + case Scalar::Int32: + MOZ_ASSERT(out == Register64(edx, eax)); + movl(srcAddr, out.low); + + cdq(); + break; + case Scalar::Uint32: + movl(srcAddr, out.low); + + xorl(out.high, out.high); + break; + case Scalar::Int64: { + if (srcAddr.kind() == Operand::MEM_SCALE) { + MOZ_RELEASE_ASSERT(srcAddr.toBaseIndex().base != out.low && + srcAddr.toBaseIndex().index != out.low); + } + if (srcAddr.kind() == Operand::MEM_REG_DISP) { + MOZ_RELEASE_ASSERT(srcAddr.toAddress().base != out.low); + } + + movl(LowWord(srcAddr), out.low); + + append(access, size()); + movl(HighWord(srcAddr), out.high); + + break; + } + case Scalar::Float32: + case Scalar::Float64: + MOZ_CRASH("non-int64 loads should use load()"); + case Scalar::Simd128: + case Scalar::Uint8Clamped: + case Scalar::BigInt64: + case Scalar::BigUint64: + case Scalar::MaxTypedArrayViewType: + MOZ_CRASH("unexpected array type"); + } + + memoryBarrierAfter(access.sync()); +} + +void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, + AnyRegister value, Operand dstAddr) { + MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP || + dstAddr.kind() == Operand::MEM_SCALE); + + // NOTE: the generated code must match the assembly code in gen_store in + // GenerateAtomicOperations.py + memoryBarrierBefore(access.sync()); + + append(access, size()); + switch (access.type()) { + case Scalar::Int8: + case Scalar::Uint8Clamped: + case Scalar::Uint8: + movb(value.gpr(), dstAddr); + break; + case Scalar::Int16: + case Scalar::Uint16: + movw(value.gpr(), dstAddr); + break; + case Scalar::Int32: + case Scalar::Uint32: + movl(value.gpr(), dstAddr); + break; + case Scalar::Float32: + vmovss(value.fpu(), dstAddr); + break; + case Scalar::Float64: + vmovsd(value.fpu(), dstAddr); + break; + case Scalar::Simd128: + vmovups(value.fpu(), dstAddr); + break; + case Scalar::Int64: + MOZ_CRASH("Should be handled in storeI64."); + case Scalar::MaxTypedArrayViewType: + case Scalar::BigInt64: + case Scalar::BigUint64: + MOZ_CRASH("unexpected type"); + } + + memoryBarrierAfter(access.sync()); +} + +void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access, + Register64 value, Operand dstAddr) { + // Atomic i64 store must use lock_cmpxchg8b. + MOZ_ASSERT(!access.isAtomic()); + MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP || + dstAddr.kind() == Operand::MEM_SCALE); + + // Store the high word first so as to hit guard-page-based OOB checks without + // writing partial data. + append(access, size()); + movl(value.high, HighWord(dstAddr)); + + append(access, size()); + movl(value.low, LowWord(dstAddr)); +} + +template <typename T> +static void AtomicLoad64(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, const T& address, + Register64 temp, Register64 output) { + MOZ_ASSERT(temp.low == ebx); + MOZ_ASSERT(temp.high == ecx); + MOZ_ASSERT(output.high == edx); + MOZ_ASSERT(output.low == eax); + + // In the event edx:eax matches what's in memory, ecx:ebx will be + // stored. The two pairs must therefore have the same values. + masm.movl(edx, ecx); + masm.movl(eax, ebx); + + if (access) { + masm.append(*access, masm.size()); + } + masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(address)); +} + +void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access, + const Address& mem, Register64 temp, + Register64 output) { + AtomicLoad64(*this, &access, mem, temp, output); +} + +void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, Register64 temp, + Register64 output) { + AtomicLoad64(*this, &access, mem, temp, output); +} + +template <typename T> +static void CompareExchange64(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, + const T& mem, Register64 expected, + Register64 replacement, Register64 output) { + MOZ_ASSERT(expected == output); + MOZ_ASSERT(expected.high == edx); + MOZ_ASSERT(expected.low == eax); + MOZ_ASSERT(replacement.high == ecx); + MOZ_ASSERT(replacement.low == ebx); + + // NOTE: the generated code must match the assembly code in gen_cmpxchg in + // GenerateAtomicOperations.py + if (access) { + masm.append(*access, masm.size()); + } + masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem)); +} + +void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, + const Address& mem, + Register64 expected, + Register64 replacement, + Register64 output) { + CompareExchange64(*this, &access, mem, expected, replacement, output); +} + +void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, + Register64 expected, + Register64 replacement, + Register64 output) { + CompareExchange64(*this, &access, mem, expected, replacement, output); +} + +template <typename T> +static void AtomicExchange64(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, const T& mem, + Register64 value, Register64 output) { + MOZ_ASSERT(value.low == ebx); + MOZ_ASSERT(value.high == ecx); + MOZ_ASSERT(output.high == edx); + MOZ_ASSERT(output.low == eax); + + // edx:eax has garbage initially, and that is the best we can do unless + // we can guess with high probability what's in memory. + + MOZ_ASSERT(mem.base != edx && mem.base != eax); + if constexpr (std::is_same_v<T, BaseIndex>) { + MOZ_ASSERT(mem.index != edx && mem.index != eax); + } else { + static_assert(std::is_same_v<T, Address>); + } + + Label again; + masm.bind(&again); + if (access) { + masm.append(*access, masm.size()); + } + masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem)); + masm.j(MacroAssembler::NonZero, &again); +} + +void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, + const Address& mem, Register64 value, + Register64 output) { + AtomicExchange64(*this, &access, mem, value, output); +} + +void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, + const BaseIndex& mem, + Register64 value, Register64 output) { + AtomicExchange64(*this, &access, mem, value, output); +} + +template <typename T> +static void AtomicFetchOp64(MacroAssembler& masm, + const wasm::MemoryAccessDesc* access, AtomicOp op, + const Address& value, const T& mem, Register64 temp, + Register64 output) { + // We don't have enough registers for all the operands on x86, so the rhs + // operand is in memory. + +#define ATOMIC_OP_BODY(OPERATE) \ + do { \ + MOZ_ASSERT(output.low == eax); \ + MOZ_ASSERT(output.high == edx); \ + MOZ_ASSERT(temp.low == ebx); \ + MOZ_ASSERT(temp.high == ecx); \ + if (access) { \ + masm.append(*access, masm.size()); \ + } \ + masm.load64(mem, output); \ + Label again; \ + masm.bind(&again); \ + masm.move64(output, temp); \ + masm.OPERATE(Operand(value), temp); \ + masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem)); \ + masm.j(MacroAssembler::NonZero, &again); \ + } while (0) + + switch (op) { + case AtomicFetchAddOp: + ATOMIC_OP_BODY(add64FromMemory); + break; + case AtomicFetchSubOp: + ATOMIC_OP_BODY(sub64FromMemory); + break; + case AtomicFetchAndOp: + ATOMIC_OP_BODY(and64FromMemory); + break; + case AtomicFetchOrOp: + ATOMIC_OP_BODY(or64FromMemory); + break; + case AtomicFetchXorOp: + ATOMIC_OP_BODY(xor64FromMemory); + break; + default: + MOZ_CRASH(); + } + +#undef ATOMIC_OP_BODY +} + +void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, + AtomicOp op, const Address& value, + const Address& mem, Register64 temp, + Register64 output) { + AtomicFetchOp64(*this, &access, op, value, mem, temp, output); +} + +void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, + AtomicOp op, const Address& value, + const BaseIndex& mem, Register64 temp, + Register64 output) { + AtomicFetchOp64(*this, &access, op, value, mem, temp, output); +} + +void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input, + Register output, + bool isSaturating, + Label* oolEntry) { + Label done; + vcvttsd2si(input, output); + branch32(Assembler::Condition::NotSigned, output, Imm32(0), &done); + + ScratchDoubleScope fpscratch(*this); + loadConstantDouble(double(int32_t(0x80000000)), fpscratch); + addDouble(input, fpscratch); + vcvttsd2si(fpscratch, output); + + branch32(Assembler::Condition::Signed, output, Imm32(0), oolEntry); + or32(Imm32(0x80000000), output); + + bind(&done); +} + +void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input, + Register output, + bool isSaturating, + Label* oolEntry) { + Label done; + vcvttss2si(input, output); + branch32(Assembler::Condition::NotSigned, output, Imm32(0), &done); + + ScratchFloat32Scope fpscratch(*this); + loadConstantFloat32(float(int32_t(0x80000000)), fpscratch); + addFloat32(input, fpscratch); + vcvttss2si(fpscratch, output); + + branch32(Assembler::Condition::Signed, output, Imm32(0), oolEntry); + or32(Imm32(0x80000000), output); + + bind(&done); +} + +void MacroAssembler::wasmTruncateDoubleToInt64( + FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry, + Label* oolRejoin, FloatRegister tempReg) { + Label ok; + Register temp = output.high; + + reserveStack(2 * sizeof(int32_t)); + storeDouble(input, Operand(esp, 0)); + + truncateDoubleToInt64(Address(esp, 0), Address(esp, 0), temp); + load64(Address(esp, 0), output); + + cmpl(Imm32(0), Operand(esp, 0)); + j(Assembler::NotEqual, &ok); + + cmpl(Imm32(1), Operand(esp, 4)); + j(Assembler::Overflow, oolEntry); + + bind(&ok); + bind(oolRejoin); + + freeStack(2 * sizeof(int32_t)); +} + +void MacroAssembler::wasmTruncateFloat32ToInt64( + FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry, + Label* oolRejoin, FloatRegister tempReg) { + Label ok; + Register temp = output.high; + + reserveStack(2 * sizeof(int32_t)); + storeFloat32(input, Operand(esp, 0)); + + truncateFloat32ToInt64(Address(esp, 0), Address(esp, 0), temp); + load64(Address(esp, 0), output); + + cmpl(Imm32(0), Operand(esp, 0)); + j(Assembler::NotEqual, &ok); + + cmpl(Imm32(1), Operand(esp, 4)); + j(Assembler::Overflow, oolEntry); + + bind(&ok); + bind(oolRejoin); + + freeStack(2 * sizeof(int32_t)); +} + +void MacroAssembler::wasmTruncateDoubleToUInt64( + FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry, + Label* oolRejoin, FloatRegister tempReg) { + Label fail, convert; + Register temp = output.high; + + // Make sure input fits in uint64. + reserveStack(2 * sizeof(int32_t)); + storeDouble(input, Operand(esp, 0)); + branchDoubleNotInUInt64Range(Address(esp, 0), temp, &fail); + size_t stackBeforeBranch = framePushed(); + jump(&convert); + + bind(&fail); + freeStack(2 * sizeof(int32_t)); + jump(oolEntry); + if (isSaturating) { + // The OOL path computes the right values. + setFramePushed(stackBeforeBranch); + } else { + // The OOL path just checks the input values. + bind(oolRejoin); + reserveStack(2 * sizeof(int32_t)); + storeDouble(input, Operand(esp, 0)); + } + + // Convert the double/float to uint64. + bind(&convert); + truncateDoubleToUInt64(Address(esp, 0), Address(esp, 0), temp, tempReg); + + // Load value into int64 register. + load64(Address(esp, 0), output); + freeStack(2 * sizeof(int32_t)); + + if (isSaturating) { + bind(oolRejoin); + } +} + +void MacroAssembler::wasmTruncateFloat32ToUInt64( + FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry, + Label* oolRejoin, FloatRegister tempReg) { + Label fail, convert; + Register temp = output.high; + + // Make sure input fits in uint64. + reserveStack(2 * sizeof(int32_t)); + storeFloat32(input, Operand(esp, 0)); + branchFloat32NotInUInt64Range(Address(esp, 0), temp, &fail); + size_t stackBeforeBranch = framePushed(); + jump(&convert); + + bind(&fail); + freeStack(2 * sizeof(int32_t)); + jump(oolEntry); + if (isSaturating) { + // The OOL path computes the right values. + setFramePushed(stackBeforeBranch); + } else { + // The OOL path just checks the input values. + bind(oolRejoin); + reserveStack(2 * sizeof(int32_t)); + storeFloat32(input, Operand(esp, 0)); + } + + // Convert the float to uint64. + bind(&convert); + truncateFloat32ToUInt64(Address(esp, 0), Address(esp, 0), temp, tempReg); + + // Load value into int64 register. + load64(Address(esp, 0), output); + freeStack(2 * sizeof(int32_t)); + + if (isSaturating) { + bind(oolRejoin); + } +} + +// ======================================================================== +// Primitive atomic operations. + +void MacroAssembler::atomicLoad64(const Synchronization&, const Address& mem, + Register64 temp, Register64 output) { + AtomicLoad64(*this, nullptr, mem, temp, output); +} + +void MacroAssembler::atomicLoad64(const Synchronization&, const BaseIndex& mem, + Register64 temp, Register64 output) { + AtomicLoad64(*this, nullptr, mem, temp, output); +} + +void MacroAssembler::atomicStore64(const Synchronization&, const Address& mem, + Register64 value, Register64 temp) { + AtomicExchange64(*this, nullptr, mem, value, temp); +} + +void MacroAssembler::atomicStore64(const Synchronization&, const BaseIndex& mem, + Register64 value, Register64 temp) { + AtomicExchange64(*this, nullptr, mem, value, temp); +} + +void MacroAssembler::compareExchange64(const Synchronization&, + const Address& mem, Register64 expected, + Register64 replacement, + Register64 output) { + CompareExchange64(*this, nullptr, mem, expected, replacement, output); +} + +void MacroAssembler::compareExchange64(const Synchronization&, + const BaseIndex& mem, + Register64 expected, + Register64 replacement, + Register64 output) { + CompareExchange64(*this, nullptr, mem, expected, replacement, output); +} + +void MacroAssembler::atomicExchange64(const Synchronization&, + const Address& mem, Register64 value, + Register64 output) { + AtomicExchange64(*this, nullptr, mem, value, output); +} + +void MacroAssembler::atomicExchange64(const Synchronization&, + const BaseIndex& mem, Register64 value, + Register64 output) { + AtomicExchange64(*this, nullptr, mem, value, output); +} + +void MacroAssembler::atomicFetchOp64(const Synchronization&, AtomicOp op, + const Address& value, const Address& mem, + Register64 temp, Register64 output) { + AtomicFetchOp64(*this, nullptr, op, value, mem, temp, output); +} + +void MacroAssembler::atomicFetchOp64(const Synchronization&, AtomicOp op, + const Address& value, const BaseIndex& mem, + Register64 temp, Register64 output) { + AtomicFetchOp64(*this, nullptr, op, value, mem, temp, output); +} + +// ======================================================================== +// Convert floating point. + +bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return HasSSE3(); } + +void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest, + Register temp) { + // SUBPD needs SSE2, HADDPD needs SSE3. + if (!HasSSE3()) { + MOZ_ASSERT(temp == Register::Invalid()); + + // Zero the dest register to break dependencies, see convertInt32ToDouble. + zeroDouble(dest); + + Push(src.high); + Push(src.low); + fild(Operand(esp, 0)); + + Label notNegative; + branch32(Assembler::NotSigned, src.high, Imm32(0), ¬Negative); + double add_constant = 18446744073709551616.0; // 2^64 + store64(Imm64(mozilla::BitwiseCast<uint64_t>(add_constant)), + Address(esp, 0)); + fld(Operand(esp, 0)); + faddp(); + bind(¬Negative); + + fstp(Operand(esp, 0)); + vmovsd(Address(esp, 0), dest); + freeStack(2 * sizeof(intptr_t)); + return; + } + + // Following operation uses entire 128-bit of dest XMM register. + // Currently higher 64-bit is free when we have access to lower 64-bit. + MOZ_ASSERT(dest.size() == 8); + FloatRegister dest128 = + FloatRegister(dest.encoding(), FloatRegisters::Simd128); + + // Assume that src is represented as following: + // src = 0x HHHHHHHH LLLLLLLL + + { + // Move src to dest (=dest128) and ScratchInt32x4Reg (=scratch): + // dest = 0x 00000000 00000000 00000000 LLLLLLLL + // scratch = 0x 00000000 00000000 00000000 HHHHHHHH + ScratchSimd128Scope scratch(*this); + vmovd(src.low, dest128); + vmovd(src.high, scratch); + + // Unpack and interleave dest and scratch to dest: + // dest = 0x 00000000 00000000 HHHHHHHH LLLLLLLL + vpunpckldq(scratch, dest128, dest128); + } + + // Unpack and interleave dest and a constant C1 to dest: + // C1 = 0x 00000000 00000000 45300000 43300000 + // dest = 0x 45300000 HHHHHHHH 43300000 LLLLLLLL + // here, each 64-bit part of dest represents following double: + // HI(dest) = 0x 1.00000HHHHHHHH * 2**84 == 2**84 + 0x HHHHHHHH 00000000 + // LO(dest) = 0x 1.00000LLLLLLLL * 2**52 == 2**52 + 0x 00000000 LLLLLLLL + // See convertUInt64ToDouble for the details. + static const int32_t CST1[4] = { + 0x43300000, + 0x45300000, + 0x0, + 0x0, + }; + + vpunpckldqSimd128(SimdConstant::CreateX4(CST1), dest128, dest128); + + // Subtract a constant C2 from dest, for each 64-bit part: + // C2 = 0x 45300000 00000000 43300000 00000000 + // here, each 64-bit part of C2 represents following double: + // HI(C2) = 0x 1.0000000000000 * 2**84 == 2**84 + // LO(C2) = 0x 1.0000000000000 * 2**52 == 2**52 + // after the operation each 64-bit part of dest represents following: + // HI(dest) = double(0x HHHHHHHH 00000000) + // LO(dest) = double(0x 00000000 LLLLLLLL) + static const int32_t CST2[4] = { + 0x0, + 0x43300000, + 0x0, + 0x45300000, + }; + + vsubpdSimd128(SimdConstant::CreateX4(CST2), dest128, dest128); + + // Add HI(dest) and LO(dest) in double and store it into LO(dest), + // LO(dest) = double(0x HHHHHHHH 00000000) + double(0x 00000000 LLLLLLLL) + // = double(0x HHHHHHHH LLLLLLLL) + // = double(src) + vhaddpd(dest128, dest128); +} + +void MacroAssembler::convertInt64ToDouble(Register64 input, + FloatRegister output) { + // Zero the output register to break dependencies, see convertInt32ToDouble. + zeroDouble(output); + + Push(input.high); + Push(input.low); + fild(Operand(esp, 0)); + + fstp(Operand(esp, 0)); + vmovsd(Address(esp, 0), output); + freeStack(2 * sizeof(intptr_t)); +} + +void MacroAssembler::convertUInt64ToFloat32(Register64 input, + FloatRegister output, + Register temp) { + // Zero the dest register to break dependencies, see convertInt32ToDouble. + zeroDouble(output); + + // Set the FPU precision to 80 bits. + reserveStack(2 * sizeof(intptr_t)); + fnstcw(Operand(esp, 0)); + load32(Operand(esp, 0), temp); + orl(Imm32(0x300), temp); + store32(temp, Operand(esp, sizeof(intptr_t))); + fldcw(Operand(esp, sizeof(intptr_t))); + + Push(input.high); + Push(input.low); + fild(Operand(esp, 0)); + + Label notNegative; + branch32(Assembler::NotSigned, input.high, Imm32(0), ¬Negative); + double add_constant = 18446744073709551616.0; // 2^64 + uint64_t add_constant_u64 = mozilla::BitwiseCast<uint64_t>(add_constant); + store64(Imm64(add_constant_u64), Address(esp, 0)); + + fld(Operand(esp, 0)); + faddp(); + bind(¬Negative); + + fstp32(Operand(esp, 0)); + vmovss(Address(esp, 0), output); + freeStack(2 * sizeof(intptr_t)); + + // Restore FPU precision to the initial value. + fldcw(Operand(esp, 0)); + freeStack(2 * sizeof(intptr_t)); +} + +void MacroAssembler::convertInt64ToFloat32(Register64 input, + FloatRegister output) { + // Zero the output register to break dependencies, see convertInt32ToDouble. + zeroDouble(output); + + Push(input.high); + Push(input.low); + fild(Operand(esp, 0)); + + fstp32(Operand(esp, 0)); + vmovss(Address(esp, 0), output); + freeStack(2 * sizeof(intptr_t)); +} + +void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) { + convertInt32ToDouble(src, dest); +} + +void MacroAssembler::PushBoxed(FloatRegister reg) { Push(reg); } + +CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) { + return movWithPatch(ImmPtr(nullptr), dest); +} + +void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc, + CodeLocationLabel target) { + PatchDataWithValueCheck(loc, ImmPtr(target.raw()), ImmPtr(nullptr)); +} + +void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, + Register64 boundsCheckLimit, Label* ok) { + Label notOk; + cmp32(index.high, Imm32(0)); + j(Assembler::NonZero, ¬Ok); + wasmBoundsCheck32(cond, index.low, boundsCheckLimit.low, ok); + bind(¬Ok); +} + +void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, + Address boundsCheckLimit, Label* ok) { + Label notOk; + cmp32(index.high, Imm32(0)); + j(Assembler::NonZero, ¬Ok); + wasmBoundsCheck32(cond, index.low, boundsCheckLimit, ok); + bind(¬Ok); +} + +//}}} check_macroassembler_style diff --git a/js/src/jit/x86/MacroAssembler-x86.h b/js/src/jit/x86/MacroAssembler-x86.h new file mode 100644 index 0000000000..6a99a44d04 --- /dev/null +++ b/js/src/jit/x86/MacroAssembler-x86.h @@ -0,0 +1,1149 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_MacroAssembler_x86_h +#define jit_x86_MacroAssembler_x86_h + +#include "jit/JitOptions.h" +#include "jit/MoveResolver.h" +#include "jit/x86-shared/MacroAssembler-x86-shared.h" +#include "js/HeapAPI.h" +#include "wasm/WasmBuiltins.h" + +namespace js { +namespace jit { + +// See documentation for ScratchTagScope and ScratchTagScopeRelease in +// MacroAssembler-x64.h. + +class ScratchTagScope { + const ValueOperand& v_; + + public: + ScratchTagScope(MacroAssembler&, const ValueOperand& v) : v_(v) {} + operator Register() { return v_.typeReg(); } + void release() {} + void reacquire() {} +}; + +class ScratchTagScopeRelease { + public: + explicit ScratchTagScopeRelease(ScratchTagScope*) {} +}; + +class MacroAssemblerX86 : public MacroAssemblerX86Shared { + private: + // Perform a downcast. Should be removed by Bug 996602. + MacroAssembler& asMasm(); + const MacroAssembler& asMasm() const; + + protected: + MoveResolver moveResolver_; + + private: + Operand payloadOfAfterStackPush(const Address& address) { + // If we are basing off %esp, the address will be invalid after the + // first push. + if (address.base == StackPointer) { + return Operand(address.base, address.offset + 4); + } + return payloadOf(address); + } + Operand payloadOfAfterStackPush(const BaseIndex& address) { + // If we are basing off %esp, the address will be invalid after the + // first push. + if (address.base == StackPointer) { + return Operand(address.base, address.index, address.scale, + address.offset + 4); + } + return payloadOf(address); + } + Operand payloadOf(const Address& address) { + return Operand(address.base, address.offset); + } + Operand payloadOf(const BaseIndex& address) { + return Operand(address.base, address.index, address.scale, address.offset); + } + Operand tagOf(const Address& address) { + return Operand(address.base, address.offset + 4); + } + Operand tagOf(const BaseIndex& address) { + return Operand(address.base, address.index, address.scale, + address.offset + 4); + } + + void setupABICall(uint32_t args); + + void vpPatchOpSimd128(const SimdConstant& v, FloatRegister reg, + void (X86Encoding::BaseAssemblerX86::*op)( + const void* address, + X86Encoding::XMMRegisterID srcId, + X86Encoding::XMMRegisterID destId)) { + vpPatchOpSimd128(v, reg, reg, op); + } + + void vpPatchOpSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest, + void (X86Encoding::BaseAssemblerX86::*op)( + const void* address, + X86Encoding::XMMRegisterID srcId, + X86Encoding::XMMRegisterID destId)); + + void vpPatchOpSimd128(const SimdConstant& v, FloatRegister reg, + size_t (X86Encoding::BaseAssemblerX86::*op)( + const void* address, + X86Encoding::XMMRegisterID srcId, + X86Encoding::XMMRegisterID destId)) { + vpPatchOpSimd128(v, reg, reg, op); + } + + void vpPatchOpSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest, + size_t (X86Encoding::BaseAssemblerX86::*op)( + const void* address, + X86Encoding::XMMRegisterID srcId, + X86Encoding::XMMRegisterID destId)); + + public: + using MacroAssemblerX86Shared::call; + using MacroAssemblerX86Shared::load32; + using MacroAssemblerX86Shared::store16; + using MacroAssemblerX86Shared::store32; + + MacroAssemblerX86() {} + + // The buffer is about to be linked, make sure any constant pools or excess + // bookkeeping has been flushed to the instruction stream. + void finish(); + + ///////////////////////////////////////////////////////////////// + // X86-specific interface. + ///////////////////////////////////////////////////////////////// + + Operand ToPayload(Operand base) { return base; } + Address ToPayload(Address base) { return base; } + BaseIndex ToPayload(BaseIndex base) { return base; } + Operand ToType(Operand base) { + switch (base.kind()) { + case Operand::MEM_REG_DISP: + return Operand(Register::FromCode(base.base()), + base.disp() + sizeof(void*)); + + case Operand::MEM_SCALE: + return Operand(Register::FromCode(base.base()), + Register::FromCode(base.index()), base.scale(), + base.disp() + sizeof(void*)); + + default: + MOZ_CRASH("unexpected operand kind"); + } + } + Address ToType(Address base) { return ToType(Operand(base)).toAddress(); } + BaseIndex ToType(BaseIndex base) { + return ToType(Operand(base)).toBaseIndex(); + } + + template <typename T> + void add64FromMemory(const T& address, Register64 dest) { + addl(Operand(LowWord(address)), dest.low); + adcl(Operand(HighWord(address)), dest.high); + } + template <typename T> + void sub64FromMemory(const T& address, Register64 dest) { + subl(Operand(LowWord(address)), dest.low); + sbbl(Operand(HighWord(address)), dest.high); + } + template <typename T> + void and64FromMemory(const T& address, Register64 dest) { + andl(Operand(LowWord(address)), dest.low); + andl(Operand(HighWord(address)), dest.high); + } + template <typename T> + void or64FromMemory(const T& address, Register64 dest) { + orl(Operand(LowWord(address)), dest.low); + orl(Operand(HighWord(address)), dest.high); + } + template <typename T> + void xor64FromMemory(const T& address, Register64 dest) { + xorl(Operand(LowWord(address)), dest.low); + xorl(Operand(HighWord(address)), dest.high); + } + + ///////////////////////////////////////////////////////////////// + // X86/X64-common interface. + ///////////////////////////////////////////////////////////////// + void storeValue(ValueOperand val, Operand dest) { + movl(val.payloadReg(), ToPayload(dest)); + movl(val.typeReg(), ToType(dest)); + } + void storeValue(ValueOperand val, const Address& dest) { + storeValue(val, Operand(dest)); + } + template <typename T> + void storeValue(JSValueType type, Register reg, const T& dest) { + storeTypeTag(ImmTag(JSVAL_TYPE_TO_TAG(type)), Operand(dest)); + storePayload(reg, Operand(dest)); + } + template <typename T> + void storeValue(const Value& val, const T& dest) { + storeTypeTag(ImmTag(val.toNunboxTag()), Operand(dest)); + storePayload(val, Operand(dest)); + } + void storeValue(ValueOperand val, BaseIndex dest) { + storeValue(val, Operand(dest)); + } + void storeValue(const Address& src, const Address& dest, Register temp) { + MOZ_ASSERT(src.base != temp); + MOZ_ASSERT(dest.base != temp); + + load32(ToType(src), temp); + store32(temp, ToType(dest)); + + load32(ToPayload(src), temp); + store32(temp, ToPayload(dest)); + } + void storePrivateValue(Register src, const Address& dest) { + store32(Imm32(0), ToType(dest)); + store32(src, ToPayload(dest)); + } + void storePrivateValue(ImmGCPtr imm, const Address& dest) { + store32(Imm32(0), ToType(dest)); + movl(imm, Operand(ToPayload(dest))); + } + void loadValue(Operand src, ValueOperand val) { + Operand payload = ToPayload(src); + Operand type = ToType(src); + + // Ensure that loading the payload does not erase the pointer to the + // Value in memory or the index. + Register baseReg = Register::FromCode(src.base()); + Register indexReg = (src.kind() == Operand::MEM_SCALE) + ? Register::FromCode(src.index()) + : InvalidReg; + + // If we have a BaseIndex that uses both result registers, first compute + // the address and then load the Value from there. + if ((baseReg == val.payloadReg() && indexReg == val.typeReg()) || + (baseReg == val.typeReg() && indexReg == val.payloadReg())) { + computeEffectiveAddress(src, val.scratchReg()); + loadValue(Address(val.scratchReg(), 0), val); + return; + } + + if (baseReg == val.payloadReg() || indexReg == val.payloadReg()) { + MOZ_ASSERT(baseReg != val.typeReg()); + MOZ_ASSERT(indexReg != val.typeReg()); + + movl(type, val.typeReg()); + movl(payload, val.payloadReg()); + } else { + MOZ_ASSERT(baseReg != val.payloadReg()); + MOZ_ASSERT(indexReg != val.payloadReg()); + + movl(payload, val.payloadReg()); + movl(type, val.typeReg()); + } + } + void loadValue(Address src, ValueOperand val) { + loadValue(Operand(src), val); + } + void loadValue(const BaseIndex& src, ValueOperand val) { + loadValue(Operand(src), val); + } + void loadUnalignedValue(const Address& src, ValueOperand dest) { + loadValue(src, dest); + } + void tagValue(JSValueType type, Register payload, ValueOperand dest) { + MOZ_ASSERT(dest.typeReg() != dest.payloadReg()); + if (payload != dest.payloadReg()) { + movl(payload, dest.payloadReg()); + } + movl(ImmType(type), dest.typeReg()); + } + void pushValue(ValueOperand val) { + push(val.typeReg()); + push(val.payloadReg()); + } + void popValue(ValueOperand val) { + pop(val.payloadReg()); + pop(val.typeReg()); + } + void pushValue(const Value& val) { + push(Imm32(val.toNunboxTag())); + if (val.isGCThing()) { + push(ImmGCPtr(val.toGCThing())); + } else { + push(Imm32(val.toNunboxPayload())); + } + } + void pushValue(JSValueType type, Register reg) { + push(ImmTag(JSVAL_TYPE_TO_TAG(type))); + push(reg); + } + void pushValue(const Address& addr) { + push(tagOf(addr)); + push(payloadOfAfterStackPush(addr)); + } + void pushValue(const BaseIndex& addr, Register scratch) { + push(tagOf(addr)); + push(payloadOfAfterStackPush(addr)); + } + void push64(Register64 src) { + push(src.high); + push(src.low); + } + void pop64(Register64 dest) { + pop(dest.low); + pop(dest.high); + } + void storePayload(const Value& val, Operand dest) { + if (val.isGCThing()) { + movl(ImmGCPtr(val.toGCThing()), ToPayload(dest)); + } else { + movl(Imm32(val.toNunboxPayload()), ToPayload(dest)); + } + } + void storePayload(Register src, Operand dest) { movl(src, ToPayload(dest)); } + void storeTypeTag(ImmTag tag, Operand dest) { movl(tag, ToType(dest)); } + + void movePtr(Register src, Register dest) { movl(src, dest); } + void movePtr(Register src, const Operand& dest) { movl(src, dest); } + + void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag) { + MOZ_ASSERT(value.typeReg() == tag); + } + + Condition testUndefined(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_UNDEFINED)); + return cond; + } + Condition testBoolean(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_BOOLEAN)); + return cond; + } + Condition testInt32(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_INT32)); + return cond; + } + Condition testDouble(Condition cond, Register tag) { + MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); + Condition actual = (cond == Equal) ? Below : AboveOrEqual; + cmp32(tag, ImmTag(JSVAL_TAG_CLEAR)); + return actual; + } + Condition testNull(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_NULL)); + return cond; + } + Condition testString(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_STRING)); + return cond; + } + Condition testSymbol(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_SYMBOL)); + return cond; + } + Condition testBigInt(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_BIGINT)); + return cond; + } + Condition testObject(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_OBJECT)); + return cond; + } + Condition testNumber(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JS::detail::ValueUpperInclNumberTag)); + return cond == Equal ? BelowOrEqual : Above; + } + Condition testGCThing(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JS::detail::ValueLowerInclGCThingTag)); + return cond == Equal ? AboveOrEqual : Below; + } + Condition testGCThing(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JS::detail::ValueLowerInclGCThingTag)); + return cond == Equal ? AboveOrEqual : Below; + } + Condition testMagic(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_MAGIC)); + return cond; + } + Condition testMagic(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JSVAL_TAG_MAGIC)); + return cond; + } + Condition testMagic(Condition cond, const Operand& operand) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(ToType(operand), ImmTag(JSVAL_TAG_MAGIC)); + return cond; + } + Condition testPrimitive(Condition cond, Register tag) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tag, ImmTag(JS::detail::ValueUpperExclPrimitiveTag)); + return cond == Equal ? Below : AboveOrEqual; + } + Condition testError(Condition cond, Register tag) { + return testMagic(cond, tag); + } + Condition testBoolean(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(Operand(ToType(address)), ImmTag(JSVAL_TAG_BOOLEAN)); + return cond; + } + Condition testInt32(Condition cond, const Operand& operand) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(ToType(operand), ImmTag(JSVAL_TAG_INT32)); + return cond; + } + Condition testInt32(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + return testInt32(cond, Operand(address)); + } + Condition testObject(Condition cond, const Operand& operand) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(ToType(operand), ImmTag(JSVAL_TAG_OBJECT)); + return cond; + } + Condition testObject(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + return testObject(cond, Operand(address)); + } + Condition testDouble(Condition cond, const Operand& operand) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + Condition actual = (cond == Equal) ? Below : AboveOrEqual; + cmp32(ToType(operand), ImmTag(JSVAL_TAG_CLEAR)); + return actual; + } + Condition testDouble(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + return testDouble(cond, Operand(address)); + } + + Condition testUndefined(Condition cond, const Operand& operand) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(ToType(operand), ImmTag(JSVAL_TAG_UNDEFINED)); + return cond; + } + Condition testUndefined(Condition cond, const Address& addr) { + return testUndefined(cond, Operand(addr)); + } + Condition testNull(Condition cond, const Operand& operand) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(ToType(operand), ImmTag(JSVAL_TAG_NULL)); + return cond; + } + Condition testNull(Condition cond, const Address& addr) { + return testNull(cond, Operand(addr)); + } + + Condition testUndefined(Condition cond, const ValueOperand& value) { + return testUndefined(cond, value.typeReg()); + } + Condition testBoolean(Condition cond, const ValueOperand& value) { + return testBoolean(cond, value.typeReg()); + } + Condition testInt32(Condition cond, const ValueOperand& value) { + return testInt32(cond, value.typeReg()); + } + Condition testDouble(Condition cond, const ValueOperand& value) { + return testDouble(cond, value.typeReg()); + } + Condition testNull(Condition cond, const ValueOperand& value) { + return testNull(cond, value.typeReg()); + } + Condition testString(Condition cond, const ValueOperand& value) { + return testString(cond, value.typeReg()); + } + Condition testSymbol(Condition cond, const ValueOperand& value) { + return testSymbol(cond, value.typeReg()); + } + Condition testBigInt(Condition cond, const ValueOperand& value) { + return testBigInt(cond, value.typeReg()); + } + Condition testObject(Condition cond, const ValueOperand& value) { + return testObject(cond, value.typeReg()); + } + Condition testMagic(Condition cond, const ValueOperand& value) { + return testMagic(cond, value.typeReg()); + } + Condition testError(Condition cond, const ValueOperand& value) { + return testMagic(cond, value); + } + Condition testNumber(Condition cond, const ValueOperand& value) { + return testNumber(cond, value.typeReg()); + } + Condition testGCThing(Condition cond, const ValueOperand& value) { + return testGCThing(cond, value.typeReg()); + } + Condition testPrimitive(Condition cond, const ValueOperand& value) { + return testPrimitive(cond, value.typeReg()); + } + + Condition testUndefined(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_UNDEFINED)); + return cond; + } + Condition testNull(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_NULL)); + return cond; + } + Condition testBoolean(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_BOOLEAN)); + return cond; + } + Condition testString(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_STRING)); + return cond; + } + Condition testString(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_STRING)); + return cond; + } + Condition testSymbol(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_SYMBOL)); + return cond; + } + Condition testSymbol(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_SYMBOL)); + return cond; + } + Condition testBigInt(Condition cond, const Address& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_BIGINT)); + return cond; + } + Condition testBigInt(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_BIGINT)); + return cond; + } + Condition testInt32(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_INT32)); + return cond; + } + Condition testObject(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_OBJECT)); + return cond; + } + Condition testDouble(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + Condition actual = (cond == Equal) ? Below : AboveOrEqual; + cmp32(tagOf(address), ImmTag(JSVAL_TAG_CLEAR)); + return actual; + } + Condition testMagic(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JSVAL_TAG_MAGIC)); + return cond; + } + Condition testGCThing(Condition cond, const BaseIndex& address) { + MOZ_ASSERT(cond == Equal || cond == NotEqual); + cmp32(tagOf(address), ImmTag(JS::detail::ValueLowerInclGCThingTag)); + return cond == Equal ? AboveOrEqual : Below; + } + + void testNullSet(Condition cond, const ValueOperand& value, Register dest) { + cond = testNull(cond, value); + emitSet(cond, dest); + } + + void testObjectSet(Condition cond, const ValueOperand& value, Register dest) { + cond = testObject(cond, value); + emitSet(cond, dest); + } + + void testUndefinedSet(Condition cond, const ValueOperand& value, + Register dest) { + cond = testUndefined(cond, value); + emitSet(cond, dest); + } + + void cmpPtr(Register lhs, const ImmWord rhs) { cmpl(Imm32(rhs.value), lhs); } + void cmpPtr(Register lhs, const ImmPtr imm) { + cmpPtr(lhs, ImmWord(uintptr_t(imm.value))); + } + void cmpPtr(Register lhs, const ImmGCPtr rhs) { cmpl(rhs, lhs); } + void cmpPtr(const Operand& lhs, Imm32 rhs) { cmp32(lhs, rhs); } + void cmpPtr(const Operand& lhs, const ImmWord rhs) { + cmp32(lhs, Imm32(rhs.value)); + } + void cmpPtr(const Operand& lhs, const ImmPtr imm) { + cmpPtr(lhs, ImmWord(uintptr_t(imm.value))); + } + void cmpPtr(const Operand& lhs, const ImmGCPtr rhs) { cmpl(rhs, lhs); } + void cmpPtr(const Address& lhs, Register rhs) { cmpPtr(Operand(lhs), rhs); } + void cmpPtr(const Operand& lhs, Register rhs) { cmp32(lhs, rhs); } + void cmpPtr(const Address& lhs, const ImmWord rhs) { + cmpPtr(Operand(lhs), rhs); + } + void cmpPtr(const Address& lhs, const ImmPtr rhs) { + cmpPtr(lhs, ImmWord(uintptr_t(rhs.value))); + } + void cmpPtr(const Address& lhs, const ImmGCPtr rhs) { + cmpPtr(Operand(lhs), rhs); + } + void cmpPtr(Register lhs, Register rhs) { cmp32(lhs, rhs); } + void testPtr(Register lhs, Register rhs) { test32(lhs, rhs); } + void testPtr(Register lhs, Imm32 rhs) { test32(lhs, rhs); } + void testPtr(Register lhs, ImmWord rhs) { test32(lhs, Imm32(rhs.value)); } + void testPtr(const Operand& lhs, Imm32 rhs) { test32(lhs, rhs); } + void testPtr(const Operand& lhs, ImmWord rhs) { + test32(lhs, Imm32(rhs.value)); + } + + ///////////////////////////////////////////////////////////////// + // Common interface. + ///////////////////////////////////////////////////////////////// + + void movePtr(ImmWord imm, Register dest) { movl(Imm32(imm.value), dest); } + void movePtr(ImmPtr imm, Register dest) { movl(imm, dest); } + void movePtr(wasm::SymbolicAddress imm, Register dest) { mov(imm, dest); } + void movePtr(ImmGCPtr imm, Register dest) { movl(imm, dest); } + void loadPtr(const Address& address, Register dest) { + movl(Operand(address), dest); + } + void loadPtr(const Operand& src, Register dest) { movl(src, dest); } + void loadPtr(const BaseIndex& src, Register dest) { + movl(Operand(src), dest); + } + void loadPtr(AbsoluteAddress address, Register dest) { + movl(Operand(address), dest); + } + void loadPrivate(const Address& src, Register dest) { + movl(payloadOf(src), dest); + } + void load32(AbsoluteAddress address, Register dest) { + movl(Operand(address), dest); + } + void load64(const Address& address, Register64 dest) { + bool highBeforeLow = address.base == dest.low; + if (highBeforeLow) { + movl(Operand(HighWord(address)), dest.high); + movl(Operand(LowWord(address)), dest.low); + } else { + movl(Operand(LowWord(address)), dest.low); + movl(Operand(HighWord(address)), dest.high); + } + } + void load64(const BaseIndex& address, Register64 dest) { + // If you run into this, relax your register allocation constraints. + MOZ_RELEASE_ASSERT( + !((address.base == dest.low || address.base == dest.high) && + (address.index == dest.low || address.index == dest.high))); + bool highBeforeLow = address.base == dest.low || address.index == dest.low; + if (highBeforeLow) { + movl(Operand(HighWord(address)), dest.high); + movl(Operand(LowWord(address)), dest.low); + } else { + movl(Operand(LowWord(address)), dest.low); + movl(Operand(HighWord(address)), dest.high); + } + } + template <typename T> + void load64Unaligned(const T& address, Register64 dest) { + load64(address, dest); + } + template <typename T> + void storePtr(ImmWord imm, T address) { + movl(Imm32(imm.value), Operand(address)); + } + template <typename T> + void storePtr(ImmPtr imm, T address) { + storePtr(ImmWord(uintptr_t(imm.value)), address); + } + template <typename T> + void storePtr(ImmGCPtr imm, T address) { + movl(imm, Operand(address)); + } + void storePtr(Register src, const Address& address) { + movl(src, Operand(address)); + } + void storePtr(Register src, const BaseIndex& address) { + movl(src, Operand(address)); + } + void storePtr(Register src, const Operand& dest) { movl(src, dest); } + void storePtr(Register src, AbsoluteAddress address) { + movl(src, Operand(address)); + } + void store32(Register src, AbsoluteAddress address) { + movl(src, Operand(address)); + } + void store16(Register src, AbsoluteAddress address) { + movw(src, Operand(address)); + } + template <typename T> + void store64(Register64 src, const T& address) { + movl(src.low, Operand(LowWord(address))); + movl(src.high, Operand(HighWord(address))); + } + void store64(Imm64 imm, Address address) { + movl(imm.low(), Operand(LowWord(address))); + movl(imm.hi(), Operand(HighWord(address))); + } + template <typename S, typename T> + void store64Unaligned(const S& src, const T& dest) { + store64(src, dest); + } + + void setStackArg(Register reg, uint32_t arg) { + movl(reg, Operand(esp, arg * sizeof(intptr_t))); + } + + void boxDouble(FloatRegister src, const ValueOperand& dest, + FloatRegister temp) { + if (Assembler::HasSSE41()) { + vmovd(src, dest.payloadReg()); + vpextrd(1, src, dest.typeReg()); + } else { + vmovd(src, dest.payloadReg()); + if (src != temp) { + moveDouble(src, temp); + } + vpsrldq(Imm32(4), temp, temp); + vmovd(temp, dest.typeReg()); + } + } + void boxNonDouble(JSValueType type, Register src, const ValueOperand& dest) { + if (src != dest.payloadReg()) { + movl(src, dest.payloadReg()); + } + movl(ImmType(type), dest.typeReg()); + } + + void unboxNonDouble(const ValueOperand& src, Register dest, JSValueType type, + Register scratch = InvalidReg) { + unboxNonDouble(Operand(src.typeReg()), Operand(src.payloadReg()), dest, + type, scratch); + } + void unboxNonDouble(const Operand& tag, const Operand& payload, Register dest, + JSValueType type, Register scratch = InvalidReg) { + auto movPayloadToDest = [&]() { + if (payload.kind() != Operand::REG || !payload.containsReg(dest)) { + movl(payload, dest); + } + }; + if (!JitOptions.spectreValueMasking) { + movPayloadToDest(); + return; + } + + // Spectre mitigation: We zero the payload if the tag does not match the + // expected type and if this is a pointer type. + if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) { + movPayloadToDest(); + return; + } + + if (!tag.containsReg(dest) && !payload.containsReg(dest)) { + // We zero the destination register and move the payload into it if + // the tag corresponds to the given type. + xorl(dest, dest); + cmpl(Imm32(JSVAL_TYPE_TO_TAG(type)), tag); + cmovCCl(Condition::Equal, payload, dest); + return; + } + + if (scratch == InvalidReg || scratch == dest || tag.containsReg(scratch) || + payload.containsReg(scratch)) { + // UnboxedLayout::makeConstructorCode calls extractObject with a + // scratch register which aliases the tag register, thus we cannot + // assert the above condition. + scratch = InvalidReg; + } + + // The destination register aliases one of the operands. We create a + // zero value either in a scratch register or on the stack and use it + // to reset the destination register after reading both the tag and the + // payload. + Operand zero(Address(esp, 0)); + if (scratch == InvalidReg) { + push(Imm32(0)); + } else { + xorl(scratch, scratch); + zero = Operand(scratch); + } + cmpl(Imm32(JSVAL_TYPE_TO_TAG(type)), tag); + movPayloadToDest(); + cmovCCl(Condition::NotEqual, zero, dest); + if (scratch == InvalidReg) { + addl(Imm32(sizeof(void*)), esp); + } + } + void unboxNonDouble(const Address& src, Register dest, JSValueType type) { + unboxNonDouble(tagOf(src), payloadOf(src), dest, type); + } + void unboxNonDouble(const BaseIndex& src, Register dest, JSValueType type) { + unboxNonDouble(tagOf(src), payloadOf(src), dest, type); + } + void unboxInt32(const ValueOperand& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_INT32); + } + void unboxInt32(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_INT32); + } + void unboxInt32(const BaseIndex& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_INT32); + } + void unboxBoolean(const ValueOperand& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN); + } + void unboxBoolean(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN); + } + void unboxBoolean(const BaseIndex& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_BOOLEAN); + } + void unboxString(const ValueOperand& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_STRING); + } + void unboxString(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_STRING); + } + void unboxSymbol(const ValueOperand& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL); + } + void unboxSymbol(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL); + } + void unboxBigInt(const ValueOperand& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT); + } + void unboxBigInt(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT); + } + void unboxObject(const ValueOperand& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT); + } + void unboxObject(const Address& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT); + } + void unboxObject(const BaseIndex& src, Register dest) { + unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT); + } + template <typename T> + void unboxObjectOrNull(const T& src, Register dest) { + // Due to Spectre mitigation logic (see Value.h), if the value is an Object + // then this yields the object; otherwise it yields zero (null), as desired. + unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT); + } + template <typename T> + void unboxDouble(const T& src, FloatRegister dest) { + loadDouble(Operand(src), dest); + } + void unboxDouble(const ValueOperand& src, FloatRegister dest) { + if (Assembler::HasSSE41()) { + vmovd(src.payloadReg(), dest); + vpinsrd(1, src.typeReg(), dest, dest); + } else { + ScratchDoubleScope fpscratch(asMasm()); + vmovd(src.payloadReg(), dest); + vmovd(src.typeReg(), fpscratch); + vunpcklps(fpscratch, dest, dest); + } + } + void unboxDouble(const Operand& payload, const Operand& type, + Register scratch, FloatRegister dest) { + if (Assembler::HasSSE41()) { + movl(payload, scratch); + vmovd(scratch, dest); + movl(type, scratch); + vpinsrd(1, scratch, dest, dest); + } else { + ScratchDoubleScope fpscratch(asMasm()); + movl(payload, scratch); + vmovd(scratch, dest); + movl(type, scratch); + vmovd(scratch, fpscratch); + vunpcklps(fpscratch, dest, dest); + } + } + inline void unboxValue(const ValueOperand& src, AnyRegister dest, + JSValueType type); + + // See comment in MacroAssembler-x64.h. + void unboxGCThingForGCBarrier(const Address& src, Register dest) { + movl(payloadOf(src), dest); + } + + void notBoolean(const ValueOperand& val) { xorl(Imm32(1), val.payloadReg()); } + + template <typename T> + void fallibleUnboxPtrImpl(const T& src, Register dest, JSValueType type, + Label* fail); + + // Extended unboxing API. If the payload is already in a register, returns + // that register. Otherwise, provides a move to the given scratch register, + // and returns that. + [[nodiscard]] Register extractObject(const Address& address, Register dest) { + unboxObject(address, dest); + return dest; + } + [[nodiscard]] Register extractObject(const ValueOperand& value, + Register scratch) { + unboxNonDouble(value, value.payloadReg(), JSVAL_TYPE_OBJECT, scratch); + return value.payloadReg(); + } + [[nodiscard]] Register extractSymbol(const ValueOperand& value, + Register scratch) { + unboxNonDouble(value, value.payloadReg(), JSVAL_TYPE_SYMBOL, scratch); + return value.payloadReg(); + } + [[nodiscard]] Register extractInt32(const ValueOperand& value, + Register scratch) { + return value.payloadReg(); + } + [[nodiscard]] Register extractBoolean(const ValueOperand& value, + Register scratch) { + return value.payloadReg(); + } + [[nodiscard]] Register extractTag(const Address& address, Register scratch) { + movl(tagOf(address), scratch); + return scratch; + } + [[nodiscard]] Register extractTag(const ValueOperand& value, + Register scratch) { + return value.typeReg(); + } + + void convertDoubleToPtr(FloatRegister src, Register dest, Label* fail, + bool negativeZeroCheck = true) { + convertDoubleToInt32(src, dest, fail, negativeZeroCheck); + } + + void boolValueToDouble(const ValueOperand& operand, FloatRegister dest) { + convertInt32ToDouble(operand.payloadReg(), dest); + } + void boolValueToFloat32(const ValueOperand& operand, FloatRegister dest) { + convertInt32ToFloat32(operand.payloadReg(), dest); + } + void int32ValueToDouble(const ValueOperand& operand, FloatRegister dest) { + convertInt32ToDouble(operand.payloadReg(), dest); + } + void int32ValueToFloat32(const ValueOperand& operand, FloatRegister dest) { + convertInt32ToFloat32(operand.payloadReg(), dest); + } + + void loadConstantDouble(double d, FloatRegister dest); + void loadConstantFloat32(float f, FloatRegister dest); + + void loadConstantSimd128Int(const SimdConstant& v, FloatRegister dest); + void loadConstantSimd128Float(const SimdConstant& v, FloatRegister dest); + void vpaddbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpaddwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpadddSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpaddqSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpsubbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpsubwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpsubdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpsubqSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmullwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmulldSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpaddsbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpaddusbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpaddswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpadduswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpsubsbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpsubusbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpsubswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpsubuswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpminsbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpminubSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpminswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpminuwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpminsdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpminudSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmaxsbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmaxubSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmaxswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmaxuwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmaxsdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmaxudSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpandSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpxorSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vporSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vaddpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vaddpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vsubpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vsubpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vdivpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vdivpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vmulpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vmulpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vandpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vminpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpacksswbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpackuswbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpackssdwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpackusdwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpunpckldqSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vunpcklpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpshufbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vptestSimd128(const SimdConstant& v, FloatRegister lhs); + void vpmaddwdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpcmpeqbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpcmpgtbSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpcmpeqwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpcmpgtwSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpcmpeqdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpcmpgtdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmpeqpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmpneqpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmpltpsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmplepsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmpgepsSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmpeqpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmpneqpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmpltpdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vcmplepdSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmaddubswSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + void vpmuludqSimd128(const SimdConstant& v, FloatRegister lhs, + FloatRegister dest); + + Condition testInt32Truthy(bool truthy, const ValueOperand& operand) { + test32(operand.payloadReg(), operand.payloadReg()); + return truthy ? NonZero : Zero; + } + Condition testStringTruthy(bool truthy, const ValueOperand& value); + Condition testBigIntTruthy(bool truthy, const ValueOperand& value); + + template <typename T> + inline void loadInt32OrDouble(const T& src, FloatRegister dest); + + template <typename T> + inline void loadUnboxedValue(const T& src, MIRType type, AnyRegister dest); + + template <typename T> + void storeUnboxedPayload(ValueOperand value, T address, size_t nbytes, + JSValueType) { + switch (nbytes) { + case 4: + storePtr(value.payloadReg(), address); + return; + case 1: + store8(value.payloadReg(), address); + return; + default: + MOZ_CRASH("Bad payload width"); + } + } + + // Note: this function clobbers the source register. + inline void convertUInt32ToDouble(Register src, FloatRegister dest); + + // Note: this function clobbers the source register. + inline void convertUInt32ToFloat32(Register src, FloatRegister dest); + + void incrementInt32Value(const Address& addr) { + addl(Imm32(1), payloadOf(addr)); + } + + inline void ensureDouble(const ValueOperand& source, FloatRegister dest, + Label* failure); + + public: + // Used from within an Exit frame to handle a pending exception. + void handleFailureWithHandlerTail(Label* profilerExitTail, + Label* bailoutTail); + + // Instrumentation for entering and leaving the profiler. + void profilerEnterFrame(Register framePtr, Register scratch); + void profilerExitFrame(); +}; + +typedef MacroAssemblerX86 MacroAssemblerSpecific; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_MacroAssembler_x86_h */ diff --git a/js/src/jit/x86/SharedICHelpers-x86-inl.h b/js/src/jit/x86/SharedICHelpers-x86-inl.h new file mode 100644 index 0000000000..2ab41c287a --- /dev/null +++ b/js/src/jit/x86/SharedICHelpers-x86-inl.h @@ -0,0 +1,77 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_SharedICHelpers_x86_inl_h +#define jit_x86_SharedICHelpers_x86_inl_h + +#include "jit/BaselineFrame.h" +#include "jit/SharedICHelpers.h" + +#include "jit/MacroAssembler-inl.h" + +namespace js { +namespace jit { + +inline void EmitBaselineTailCallVM(TrampolinePtr target, MacroAssembler& masm, + uint32_t argSize) { +#ifdef DEBUG + // We assume during this that R0 and R1 have been pushed. + // Store frame size without VMFunction arguments for debug assertions. + masm.movl(FramePointer, eax); + masm.subl(StackPointer, eax); + masm.subl(Imm32(argSize), eax); + Address frameSizeAddr(FramePointer, + BaselineFrame::reverseOffsetOfDebugFrameSize()); + masm.store32(eax, frameSizeAddr); +#endif + + // Push frame descriptor and perform the tail call. + masm.pushFrameDescriptor(FrameType::BaselineJS); + masm.push(ICTailCallReg); + masm.jump(target); +} + +inline void EmitBaselineCallVM(TrampolinePtr target, MacroAssembler& masm) { + masm.pushFrameDescriptor(FrameType::BaselineStub); + masm.call(target); +} + +inline void EmitBaselineEnterStubFrame(MacroAssembler& masm, Register scratch) { +#ifdef DEBUG + // Compute frame size. Because the return address is still on the stack, + // this is: + // + // FramePointer + // - StackPointer + // - sizeof(return address) + + masm.movl(FramePointer, scratch); + masm.subl(StackPointer, scratch); + masm.subl(Imm32(sizeof(void*)), scratch); // Return address. + + Address frameSizeAddr(FramePointer, + BaselineFrame::reverseOffsetOfDebugFrameSize()); + masm.store32(scratch, frameSizeAddr); +#endif + + // Push the return address that's currently on top of the stack. + masm.Push(Operand(StackPointer, 0)); + + // Replace the original return address with the frame descriptor. + masm.storePtr(ImmWord(MakeFrameDescriptor(FrameType::BaselineJS)), + Address(StackPointer, sizeof(uintptr_t))); + + // Save old frame pointer, stack pointer and stub reg. + masm.Push(FramePointer); + masm.mov(StackPointer, FramePointer); + + masm.Push(ICStubReg); +} + +} // namespace jit +} // namespace js + +#endif /* jit_x86_SharedICHelpers_x86_inl_h */ diff --git a/js/src/jit/x86/SharedICHelpers-x86.h b/js/src/jit/x86/SharedICHelpers-x86.h new file mode 100644 index 0000000000..07d2ae8a6f --- /dev/null +++ b/js/src/jit/x86/SharedICHelpers-x86.h @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_SharedICHelpers_x86_h +#define jit_x86_SharedICHelpers_x86_h + +#include "jit/BaselineIC.h" +#include "jit/JitFrames.h" +#include "jit/MacroAssembler.h" +#include "jit/SharedICRegisters.h" + +namespace js { +namespace jit { + +// Distance from stack top to the top Value inside an IC stub (this is the +// return address). +static const size_t ICStackValueOffset = sizeof(void*); + +inline void EmitRestoreTailCallReg(MacroAssembler& masm) { + masm.Pop(ICTailCallReg); +} + +inline void EmitRepushTailCallReg(MacroAssembler& masm) { + masm.Push(ICTailCallReg); +} + +inline void EmitCallIC(MacroAssembler& masm, CodeOffset* callOffset) { + // The stub pointer must already be in ICStubReg. + // Call the stubcode. + masm.call(Address(ICStubReg, ICStub::offsetOfStubCode())); + *callOffset = CodeOffset(masm.currentOffset()); +} + +inline void EmitReturnFromIC(MacroAssembler& masm) { masm.ret(); } + +inline void EmitBaselineLeaveStubFrame(MacroAssembler& masm) { + Address stubAddr(FramePointer, BaselineStubFrameLayout::ICStubOffsetFromFP); + masm.loadPtr(stubAddr, ICStubReg); + + masm.mov(FramePointer, StackPointer); + masm.Pop(FramePointer); + + // The return address is on top of the stack, followed by the frame + // descriptor. Use a pop instruction to overwrite the frame descriptor + // with the return address. Note that pop increments the stack pointer + // before computing the address. + masm.Pop(Operand(StackPointer, 0)); +} + +template <typename AddrType> +inline void EmitPreBarrier(MacroAssembler& masm, const AddrType& addr, + MIRType type) { + masm.guardedCallPreBarrier(addr, type); +} + +inline void EmitStubGuardFailure(MacroAssembler& masm) { + // Load next stub into ICStubReg + masm.loadPtr(Address(ICStubReg, ICCacheIRStub::offsetOfNext()), ICStubReg); + + // Return address is already loaded, just jump to the next stubcode. + masm.jmp(Operand(ICStubReg, ICStub::offsetOfStubCode())); +} + +} // namespace jit +} // namespace js + +#endif /* jit_x86_SharedICHelpers_x86_h */ diff --git a/js/src/jit/x86/SharedICRegisters-x86.h b/js/src/jit/x86/SharedICRegisters-x86.h new file mode 100644 index 0000000000..44edb5288f --- /dev/null +++ b/js/src/jit/x86/SharedICRegisters-x86.h @@ -0,0 +1,36 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jit_x86_SharedICRegisters_x86_h +#define jit_x86_SharedICRegisters_x86_h + +#include "jit/Registers.h" +#include "jit/RegisterSets.h" +#include "jit/x86/Assembler-x86.h" + +namespace js { +namespace jit { + +// ValueOperands R0, R1, and R2 +static constexpr ValueOperand R0(ecx, edx); +static constexpr ValueOperand R1(eax, ebx); +static constexpr ValueOperand R2(esi, edi); + +// ICTailCallReg and ICStubReg reuse +// registers from R2. +static constexpr Register ICTailCallReg = esi; +static constexpr Register ICStubReg = edi; + +// FloatReg0 must be equal to ReturnFloatReg. +static constexpr FloatRegister FloatReg0 = xmm0; +static constexpr FloatRegister FloatReg1 = xmm1; +static constexpr FloatRegister FloatReg2 = xmm2; +static constexpr FloatRegister FloatReg3 = xmm3; + +} // namespace jit +} // namespace js + +#endif /* jit_x86_SharedICRegisters_x86_h */ diff --git a/js/src/jit/x86/Trampoline-x86.cpp b/js/src/jit/x86/Trampoline-x86.cpp new file mode 100644 index 0000000000..da459cdd3a --- /dev/null +++ b/js/src/jit/x86/Trampoline-x86.cpp @@ -0,0 +1,796 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/MathAlgorithms.h" + +#include "jit/Bailouts.h" +#include "jit/BaselineFrame.h" +#include "jit/BaselineJIT.h" +#include "jit/CalleeToken.h" +#include "jit/JitFrames.h" +#include "jit/JitRuntime.h" +#include "jit/JitSpewer.h" +#include "jit/PerfSpewer.h" +#include "jit/VMFunctions.h" +#include "jit/x86/SharedICHelpers-x86.h" +#include "vm/JitActivation.h" // js::jit::JitActivation +#include "vm/JSContext.h" +#include "vm/Realm.h" +#ifdef MOZ_VTUNE +# include "vtune/VTuneWrapper.h" +#endif + +#include "jit/MacroAssembler-inl.h" +#include "vm/JSScript-inl.h" + +using mozilla::IsPowerOfTwo; + +using namespace js; +using namespace js::jit; + +// All registers to save and restore. This includes the stack pointer, since we +// use the ability to reference register values on the stack by index. +static const LiveRegisterSet AllRegs = + LiveRegisterSet(GeneralRegisterSet(Registers::AllMask), + FloatRegisterSet(FloatRegisters::AllMask)); + +enum EnterJitEbpArgumentOffset { + ARG_JITCODE = 2 * sizeof(void*), + ARG_ARGC = 3 * sizeof(void*), + ARG_ARGV = 4 * sizeof(void*), + ARG_STACKFRAME = 5 * sizeof(void*), + ARG_CALLEETOKEN = 6 * sizeof(void*), + ARG_SCOPECHAIN = 7 * sizeof(void*), + ARG_STACKVALUES = 8 * sizeof(void*), + ARG_RESULT = 9 * sizeof(void*) +}; + +// Generates a trampoline for calling Jit compiled code from a C++ function. +// The trampoline use the EnterJitCode signature, with the standard cdecl +// calling convention. +void JitRuntime::generateEnterJIT(JSContext* cx, MacroAssembler& masm) { + AutoCreatedBy acb(masm, "JitRuntime::generateEnterJIT"); + + enterJITOffset_ = startTrampolineCode(masm); + + masm.assertStackAlignment(ABIStackAlignment, + -int32_t(sizeof(uintptr_t)) /* return address */); + + // Save old stack frame pointer, set new stack frame pointer. + masm.push(ebp); + masm.movl(esp, ebp); + + // Save non-volatile registers. These must be saved by the trampoline, + // rather than the JIT'd code, because they are scanned by the conservative + // scanner. + masm.push(ebx); + masm.push(esi); + masm.push(edi); + + // Load the number of values to be copied (argc) into eax + masm.loadPtr(Address(ebp, ARG_ARGC), eax); + + // If we are constructing, that also needs to include newTarget + { + Label noNewTarget; + masm.loadPtr(Address(ebp, ARG_CALLEETOKEN), edx); + masm.branchTest32(Assembler::Zero, edx, + Imm32(CalleeToken_FunctionConstructing), &noNewTarget); + + masm.addl(Imm32(1), eax); + + masm.bind(&noNewTarget); + } + + // eax <- 8*numValues, eax is now the offset betwen argv and the last value. + masm.shll(Imm32(3), eax); + + // Guarantee stack alignment of Jit frames. + // + // This code compensates for the offset created by the copy of the vector of + // arguments, such that the jit frame will be aligned once the return + // address is pushed on the stack. + // + // In the computation of the offset, we omit the size of the JitFrameLayout + // which is pushed on the stack, as the JitFrameLayout size is a multiple of + // the JitStackAlignment. + masm.movl(esp, ecx); + masm.subl(eax, ecx); + static_assert( + sizeof(JitFrameLayout) % JitStackAlignment == 0, + "No need to consider the JitFrameLayout for aligning the stack"); + + // ecx = ecx & 15, holds alignment. + masm.andl(Imm32(JitStackAlignment - 1), ecx); + masm.subl(ecx, esp); + + /*************************************************************** + Loop over argv vector, push arguments onto stack in reverse order + ***************************************************************/ + + // ebx = argv --argv pointer is in ebp + 16 + masm.loadPtr(Address(ebp, ARG_ARGV), ebx); + + // eax = argv[8(argc)] --eax now points one value past the last argument + masm.addl(ebx, eax); + + // while (eax > ebx) --while still looping through arguments + { + Label header, footer; + masm.bind(&header); + + masm.cmp32(eax, ebx); + masm.j(Assembler::BelowOrEqual, &footer); + + // eax -= 8 --move to previous argument + masm.subl(Imm32(8), eax); + + // Push what eax points to on stack, a Value is 2 words + masm.push(Operand(eax, 4)); + masm.push(Operand(eax, 0)); + + masm.jmp(&header); + masm.bind(&footer); + } + + // Load the number of actual arguments. |result| is used to store the + // actual number of arguments without adding an extra argument to the enter + // JIT. + masm.mov(Operand(ebp, ARG_RESULT), eax); + masm.unboxInt32(Address(eax, 0x0), eax); + + // Push the callee token. + masm.push(Operand(ebp, ARG_CALLEETOKEN)); + + // Load the InterpreterFrame address into the OsrFrameReg. + // This address is also used for setting the constructing bit on all paths. + masm.loadPtr(Address(ebp, ARG_STACKFRAME), OsrFrameReg); + + // Push the descriptor. + masm.pushFrameDescriptorForJitCall(FrameType::CppToJSJit, eax, eax); + + CodeLabel returnLabel; + Label oomReturnLabel; + { + // Handle Interpreter -> Baseline OSR. + AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + MOZ_ASSERT(!regs.has(ebp)); + regs.take(OsrFrameReg); + regs.take(ReturnReg); + + Register scratch = regs.takeAny(); + + Label notOsr; + masm.branchTestPtr(Assembler::Zero, OsrFrameReg, OsrFrameReg, ¬Osr); + + Register numStackValues = regs.takeAny(); + masm.loadPtr(Address(ebp, ARG_STACKVALUES), numStackValues); + + Register jitcode = regs.takeAny(); + masm.loadPtr(Address(ebp, ARG_JITCODE), jitcode); + + // Push return address. + masm.mov(&returnLabel, scratch); + masm.push(scratch); + + // Frame prologue. + masm.push(ebp); + masm.mov(esp, ebp); + + // Reserve frame. + masm.subPtr(Imm32(BaselineFrame::Size()), esp); + + Register framePtrScratch = regs.takeAny(); + masm.touchFrameValues(numStackValues, scratch, framePtrScratch); + masm.mov(esp, framePtrScratch); + + // Reserve space for locals and stack values. + masm.mov(numStackValues, scratch); + masm.shll(Imm32(3), scratch); + masm.subPtr(scratch, esp); + + // Enter exit frame. + masm.pushFrameDescriptor(FrameType::BaselineJS); + masm.push(Imm32(0)); // Fake return address. + masm.push(FramePointer); + // No GC things to mark on the stack, push a bare token. + masm.loadJSContext(scratch); + masm.enterFakeExitFrame(scratch, scratch, ExitFrameType::Bare); + + masm.push(jitcode); + + using Fn = bool (*)(BaselineFrame * frame, InterpreterFrame * interpFrame, + uint32_t numStackValues); + masm.setupUnalignedABICall(scratch); + masm.passABIArg(framePtrScratch); // BaselineFrame + masm.passABIArg(OsrFrameReg); // InterpreterFrame + masm.passABIArg(numStackValues); + masm.callWithABI<Fn, jit::InitBaselineFrameForOsr>( + MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame); + + masm.pop(jitcode); + + MOZ_ASSERT(jitcode != ReturnReg); + + Label error; + masm.addPtr(Imm32(ExitFrameLayout::SizeWithFooter()), esp); + masm.branchIfFalseBool(ReturnReg, &error); + + // If OSR-ing, then emit instrumentation for setting lastProfilerFrame + // if profiler instrumentation is enabled. + { + Label skipProfilingInstrumentation; + AbsoluteAddress addressOfEnabled( + cx->runtime()->geckoProfiler().addressOfEnabled()); + masm.branch32(Assembler::Equal, addressOfEnabled, Imm32(0), + &skipProfilingInstrumentation); + masm.profilerEnterFrame(ebp, scratch); + masm.bind(&skipProfilingInstrumentation); + } + + masm.jump(jitcode); + + // OOM: frame epilogue, load error value, discard return address and return. + masm.bind(&error); + masm.mov(ebp, esp); + masm.pop(ebp); + masm.addPtr(Imm32(sizeof(uintptr_t)), esp); // Return address. + masm.moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand); + masm.jump(&oomReturnLabel); + + masm.bind(¬Osr); + masm.loadPtr(Address(ebp, ARG_SCOPECHAIN), R1.scratchReg()); + } + + // The call will push the return address and frame pointer on the stack, thus + // we check that the stack would be aligned once the call is complete. + masm.assertStackAlignment(JitStackAlignment, 2 * sizeof(uintptr_t)); + + /*************************************************************** + Call passed-in code, get return value and fill in the + passed in return value pointer + ***************************************************************/ + masm.call(Address(ebp, ARG_JITCODE)); + + { + // Interpreter -> Baseline OSR will return here. + masm.bind(&returnLabel); + masm.addCodeLabel(returnLabel); + masm.bind(&oomReturnLabel); + } + + // Restore the stack pointer so the stack looks like this: + // +20 ... arguments ... + // +16 <return> + // +12 ebp <- %ebp pointing here. + // +8 ebx + // +4 esi + // +0 edi <- %esp pointing here. + masm.lea(Operand(ebp, -int32_t(3 * sizeof(void*))), esp); + + // Store the return value. + masm.loadPtr(Address(ebp, ARG_RESULT), eax); + masm.storeValue(JSReturnOperand, Operand(eax, 0)); + + /************************************************************** + Return stack and registers to correct state + **************************************************************/ + + // Restore non-volatile registers + masm.pop(edi); + masm.pop(esi); + masm.pop(ebx); + + // Restore old stack frame pointer + masm.pop(ebp); + masm.ret(); +} + +// static +mozilla::Maybe<::JS::ProfilingFrameIterator::RegisterState> +JitRuntime::getCppEntryRegisters(JitFrameLayout* frameStackAddress) { + // Not supported, or not implemented yet. + // TODO: Implement along with the corresponding stack-walker changes, in + // coordination with the Gecko Profiler, see bug 1635987 and follow-ups. + return mozilla::Nothing{}; +} + +// Push AllRegs in a way that is compatible with RegisterDump, regardless of +// what PushRegsInMask might do to reduce the set size. +static void DumpAllRegs(MacroAssembler& masm) { +#ifdef ENABLE_WASM_SIMD + masm.PushRegsInMask(AllRegs); +#else + // When SIMD isn't supported, PushRegsInMask reduces the set of float + // registers to be double-sized, while the RegisterDump expects each of + // the float registers to have the maximal possible size + // (Simd128DataSize). To work around this, we just spill the double + // registers by hand here, using the register dump offset directly. + for (GeneralRegisterBackwardIterator iter(AllRegs.gprs()); iter.more(); + ++iter) { + masm.Push(*iter); + } + + masm.reserveStack(sizeof(RegisterDump::FPUArray)); + for (FloatRegisterBackwardIterator iter(AllRegs.fpus()); iter.more(); + ++iter) { + FloatRegister reg = *iter; + Address spillAddress(StackPointer, reg.getRegisterDumpOffsetInBytes()); + masm.storeDouble(reg, spillAddress); + } +#endif +} + +void JitRuntime::generateInvalidator(MacroAssembler& masm, Label* bailoutTail) { + AutoCreatedBy acb(masm, "JitRuntime::generateInvalidator"); + + invalidatorOffset_ = startTrampolineCode(masm); + + // We do the minimum amount of work in assembly and shunt the rest + // off to InvalidationBailout. Assembly does: + // + // - Push the machine state onto the stack. + // - Call the InvalidationBailout routine with the stack pointer. + // - Now that the frame has been bailed out, convert the invalidated + // frame into an exit frame. + // - Do the normal check-return-code-and-thunk-to-the-interpreter dance. + + // Push registers such that we can access them from [base + code]. + DumpAllRegs(masm); + + masm.movl(esp, eax); // Argument to jit::InvalidationBailout. + + // Make space for InvalidationBailout's bailoutInfo outparam. + masm.reserveStack(sizeof(void*)); + masm.movl(esp, ebx); + + using Fn = + bool (*)(InvalidationBailoutStack * sp, BaselineBailoutInfo * *info); + masm.setupUnalignedABICall(edx); + masm.passABIArg(eax); + masm.passABIArg(ebx); + masm.callWithABI<Fn, InvalidationBailout>( + MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckOther); + + masm.pop(ecx); // Get bailoutInfo outparam. + + // Pop the machine state and the dead frame. + masm.moveToStackPtr(FramePointer); + + // Jump to shared bailout tail. The BailoutInfo pointer has to be in ecx. + masm.jmp(bailoutTail); +} + +void JitRuntime::generateArgumentsRectifier(MacroAssembler& masm, + ArgumentsRectifierKind kind) { + AutoCreatedBy acb(masm, "JitRuntime::generateArgumentsRectifier"); + + switch (kind) { + case ArgumentsRectifierKind::Normal: + argumentsRectifierOffset_ = startTrampolineCode(masm); + break; + case ArgumentsRectifierKind::TrialInlining: + trialInliningArgumentsRectifierOffset_ = startTrampolineCode(masm); + break; + } + + // Caller: + // [arg2] [arg1] [this] [ [argc] [callee] [descr] [raddr] ] <- esp + + // Frame prologue. + // + // NOTE: if this changes, fix the Baseline bailout code too! + // See BaselineStackBuilder::calculatePrevFramePtr and + // BaselineStackBuilder::buildRectifierFrame (in BaselineBailouts.cpp). + masm.push(FramePointer); + masm.movl(esp, FramePointer); // Save %esp. + + // Load argc. + masm.loadNumActualArgs(FramePointer, esi); + + // Load the number of |undefined|s to push into %ecx. + masm.loadPtr(Address(ebp, RectifierFrameLayout::offsetOfCalleeToken()), eax); + masm.mov(eax, ecx); + masm.andl(Imm32(CalleeTokenMask), ecx); + masm.loadFunctionArgCount(ecx, ecx); + + // The frame pointer and its padding are pushed on the stack. + // Including |this|, there are (|nformals| + 1) arguments to push to the + // stack. Then we push a JitFrameLayout. We compute the padding expressed + // in the number of extra |undefined| values to push on the stack. + static_assert( + sizeof(JitFrameLayout) % JitStackAlignment == 0, + "No need to consider the JitFrameLayout for aligning the stack"); + static_assert( + JitStackAlignment % sizeof(Value) == 0, + "Ensure that we can pad the stack by pushing extra UndefinedValue"); + static_assert(IsPowerOfTwo(JitStackValueAlignment), + "must have power of two for masm.andl to do its job"); + + masm.addl( + Imm32(JitStackValueAlignment - 1 /* for padding */ + 1 /* for |this| */), + ecx); + + // Account for newTarget, if necessary. + static_assert( + CalleeToken_FunctionConstructing == 1, + "Ensure that we can use the constructing bit to count an extra push"); + masm.mov(eax, edx); + masm.andl(Imm32(CalleeToken_FunctionConstructing), edx); + masm.addl(edx, ecx); + + masm.andl(Imm32(~(JitStackValueAlignment - 1)), ecx); + masm.subl(esi, ecx); + masm.subl(Imm32(1), ecx); // For |this|. + + // Copy the number of actual arguments into edx. + masm.mov(esi, edx); + + masm.moveValue(UndefinedValue(), ValueOperand(ebx, edi)); + + // Caller: + // [arg2] [arg1] [this] [ [argc] [callee] [descr] [raddr] ] + // '-- #esi ---' + // + // Rectifier frame: + // [ebp'] <- ebp [padding] <- esp [undef] [undef] [arg2] [arg1] [this] + // '--- #ecx ----' '-- #esi ---' + // + // [ [argc] [callee] [descr] [raddr] ] + + // Push undefined. + { + Label undefLoopTop; + masm.bind(&undefLoopTop); + + masm.push(ebx); // type(undefined); + masm.push(edi); // payload(undefined); + masm.subl(Imm32(1), ecx); + masm.j(Assembler::NonZero, &undefLoopTop); + } + + // Get the topmost argument. + BaseIndex b(FramePointer, esi, TimesEight, sizeof(RectifierFrameLayout)); + masm.lea(Operand(b), ecx); + + // Push arguments, |nargs| + 1 times (to include |this|). + masm.addl(Imm32(1), esi); + { + Label copyLoopTop; + + masm.bind(©LoopTop); + masm.push(Operand(ecx, sizeof(Value) / 2)); + masm.push(Operand(ecx, 0x0)); + masm.subl(Imm32(sizeof(Value)), ecx); + masm.subl(Imm32(1), esi); + masm.j(Assembler::NonZero, ©LoopTop); + } + + { + Label notConstructing; + + masm.mov(eax, ebx); + masm.branchTest32(Assembler::Zero, ebx, + Imm32(CalleeToken_FunctionConstructing), + ¬Constructing); + + BaseValueIndex src(FramePointer, edx, + sizeof(RectifierFrameLayout) + sizeof(Value)); + + masm.andl(Imm32(CalleeTokenMask), ebx); + masm.loadFunctionArgCount(ebx, ebx); + + BaseValueIndex dst(esp, ebx, sizeof(Value)); + + ValueOperand newTarget(ecx, edi); + + masm.loadValue(src, newTarget); + masm.storeValue(newTarget, dst); + + masm.bind(¬Constructing); + } + + // Construct JitFrameLayout. + masm.push(eax); // callee token + masm.pushFrameDescriptorForJitCall(FrameType::Rectifier, edx, edx); + + // Call the target function. + masm.andl(Imm32(CalleeTokenMask), eax); + switch (kind) { + case ArgumentsRectifierKind::Normal: + masm.loadJitCodeRaw(eax, eax); + argumentsRectifierReturnOffset_ = masm.callJitNoProfiler(eax); + break; + case ArgumentsRectifierKind::TrialInlining: + Label noBaselineScript, done; + masm.loadBaselineJitCodeRaw(eax, ebx, &noBaselineScript); + masm.callJitNoProfiler(ebx); + masm.jump(&done); + + // See BaselineCacheIRCompiler::emitCallInlinedFunction. + masm.bind(&noBaselineScript); + masm.loadJitCodeRaw(eax, eax); + masm.callJitNoProfiler(eax); + masm.bind(&done); + break; + } + + masm.mov(FramePointer, StackPointer); + masm.pop(FramePointer); + masm.ret(); +} + +static void PushBailoutFrame(MacroAssembler& masm, Register spArg) { + // Push registers such that we can access them from [base + code]. + DumpAllRegs(masm); + + // The current stack pointer is the first argument to jit::Bailout. + masm.movl(esp, spArg); +} + +static void GenerateBailoutThunk(MacroAssembler& masm, Label* bailoutTail) { + PushBailoutFrame(masm, eax); + + // Make space for Bailout's bailoutInfo outparam. + masm.reserveStack(sizeof(void*)); + masm.movl(esp, ebx); + + // Call the bailout function. + using Fn = bool (*)(BailoutStack * sp, BaselineBailoutInfo * *info); + masm.setupUnalignedABICall(ecx); + masm.passABIArg(eax); + masm.passABIArg(ebx); + masm.callWithABI<Fn, Bailout>(MoveOp::GENERAL, + CheckUnsafeCallWithABI::DontCheckOther); + + masm.pop(ecx); // Get the bailoutInfo outparam. + + // Remove both the bailout frame and the topmost Ion frame's stack. + masm.moveToStackPtr(FramePointer); + + // Jump to shared bailout tail. The BailoutInfo pointer has to be in ecx. + masm.jmp(bailoutTail); +} + +void JitRuntime::generateBailoutHandler(MacroAssembler& masm, + Label* bailoutTail) { + AutoCreatedBy acb(masm, "JitRuntime::generateBailoutHandler"); + + bailoutHandlerOffset_ = startTrampolineCode(masm); + + GenerateBailoutThunk(masm, bailoutTail); +} + +bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm, + const VMFunctionData& f, DynFn nativeFun, + uint32_t* wrapperOffset) { + AutoCreatedBy acb(masm, "JitRuntime::generateVMWrapper"); + + *wrapperOffset = startTrampolineCode(masm); + + // Avoid conflicts with argument registers while discarding the result after + // the function call. + AllocatableGeneralRegisterSet regs(Register::Codes::WrapperMask); + + static_assert( + (Register::Codes::VolatileMask & ~Register::Codes::WrapperMask) == 0, + "Wrapper register set must be a superset of Volatile register set."); + + // The context is the first argument. + Register cxreg = regs.takeAny(); + + // Stack is: + // ... frame ... + // +8 [args] + // +4 descriptor + // +0 returnAddress + // + // Push the frame pointer to finish the exit frame, then link it up. + masm.Push(FramePointer); + masm.moveStackPtrTo(FramePointer); + masm.loadJSContext(cxreg); + masm.enterExitFrame(cxreg, regs.getAny(), &f); + + // Save the current stack pointer as the base for copying arguments. + Register argsBase = InvalidReg; + if (f.explicitArgs) { + argsBase = regs.takeAny(); + masm.lea(Operand(esp, ExitFrameLayout::SizeWithFooter()), argsBase); + } + + // Reserve space for the outparameter. + Register outReg = InvalidReg; + switch (f.outParam) { + case Type_Value: + outReg = regs.takeAny(); + masm.Push(UndefinedValue()); + masm.movl(esp, outReg); + break; + + case Type_Handle: + outReg = regs.takeAny(); + masm.PushEmptyRooted(f.outParamRootType); + masm.movl(esp, outReg); + break; + + case Type_Int32: + case Type_Pointer: + case Type_Bool: + outReg = regs.takeAny(); + masm.reserveStack(sizeof(int32_t)); + masm.movl(esp, outReg); + break; + + case Type_Double: + outReg = regs.takeAny(); + masm.reserveStack(sizeof(double)); + masm.movl(esp, outReg); + break; + + default: + MOZ_ASSERT(f.outParam == Type_Void); + break; + } + + masm.setupUnalignedABICall(regs.getAny()); + masm.passABIArg(cxreg); + + size_t argDisp = 0; + + // Copy arguments. + for (uint32_t explicitArg = 0; explicitArg < f.explicitArgs; explicitArg++) { + switch (f.argProperties(explicitArg)) { + case VMFunctionData::WordByValue: + masm.passABIArg(MoveOperand(argsBase, argDisp), MoveOp::GENERAL); + argDisp += sizeof(void*); + break; + case VMFunctionData::DoubleByValue: + // We don't pass doubles in float registers on x86, so no need + // to check for argPassedInFloatReg. + masm.passABIArg(MoveOperand(argsBase, argDisp), MoveOp::GENERAL); + argDisp += sizeof(void*); + masm.passABIArg(MoveOperand(argsBase, argDisp), MoveOp::GENERAL); + argDisp += sizeof(void*); + break; + case VMFunctionData::WordByRef: + masm.passABIArg( + MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress), + MoveOp::GENERAL); + argDisp += sizeof(void*); + break; + case VMFunctionData::DoubleByRef: + masm.passABIArg( + MoveOperand(argsBase, argDisp, MoveOperand::Kind::EffectiveAddress), + MoveOp::GENERAL); + argDisp += 2 * sizeof(void*); + break; + } + } + + // Copy the implicit outparam, if any. + if (outReg != InvalidReg) { + masm.passABIArg(outReg); + } + + masm.callWithABI(nativeFun, MoveOp::GENERAL, + CheckUnsafeCallWithABI::DontCheckHasExitFrame); + + // Test for failure. + switch (f.failType()) { + case Type_Cell: + masm.branchTestPtr(Assembler::Zero, eax, eax, masm.failureLabel()); + break; + case Type_Bool: + masm.testb(eax, eax); + masm.j(Assembler::Zero, masm.failureLabel()); + break; + case Type_Void: + break; + default: + MOZ_CRASH("unknown failure kind"); + } + + // Load the outparam and free any allocated stack. + switch (f.outParam) { + case Type_Handle: + masm.popRooted(f.outParamRootType, ReturnReg, JSReturnOperand); + break; + + case Type_Value: + masm.Pop(JSReturnOperand); + break; + + case Type_Int32: + case Type_Pointer: + masm.Pop(ReturnReg); + break; + + case Type_Bool: + masm.Pop(ReturnReg); + masm.movzbl(ReturnReg, ReturnReg); + break; + + case Type_Double: + masm.Pop(ReturnDoubleReg); + break; + + default: + MOZ_ASSERT(f.outParam == Type_Void); + break; + } + + // Until C++ code is instrumented against Spectre, prevent speculative + // execution from returning any private data. + if (f.returnsData() && JitOptions.spectreJitToCxxCalls) { + masm.speculationBarrier(); + } + + // Pop ExitFooterFrame and the frame pointer. + masm.leaveExitFrame(0); + masm.pop(FramePointer); + + // Return. Subtract sizeof(void*) for the frame pointer. + masm.retn(Imm32(sizeof(ExitFrameLayout) - sizeof(void*) + + f.explicitStackSlots() * sizeof(void*) + + f.extraValuesToPop * sizeof(Value))); + + return true; +} + +uint32_t JitRuntime::generatePreBarrier(JSContext* cx, MacroAssembler& masm, + MIRType type) { + AutoCreatedBy acb(masm, "JitRuntime::generatePreBarrier"); + + uint32_t offset = startTrampolineCode(masm); + + static_assert(PreBarrierReg == edx); + Register temp1 = eax; + Register temp2 = ebx; + Register temp3 = ecx; + masm.push(temp1); + masm.push(temp2); + masm.push(temp3); + + Label noBarrier; + masm.emitPreBarrierFastPath(cx->runtime(), type, temp1, temp2, temp3, + &noBarrier); + + // Call into C++ to mark this GC thing. + masm.pop(temp3); + masm.pop(temp2); + masm.pop(temp1); + + LiveRegisterSet save; + save.set() = RegisterSet(GeneralRegisterSet(Registers::VolatileMask), + FloatRegisterSet(FloatRegisters::VolatileMask)); + masm.PushRegsInMask(save); + + masm.movl(ImmPtr(cx->runtime()), ecx); + + masm.setupUnalignedABICall(eax); + masm.passABIArg(ecx); + masm.passABIArg(edx); + masm.callWithABI(JitPreWriteBarrier(type)); + + masm.PopRegsInMask(save); + masm.ret(); + + masm.bind(&noBarrier); + masm.pop(temp3); + masm.pop(temp2); + masm.pop(temp1); + masm.ret(); + + return offset; +} + +void JitRuntime::generateBailoutTailStub(MacroAssembler& masm, + Label* bailoutTail) { + AutoCreatedBy acb(masm, "JitRuntime::generateBailoutTailStub"); + + masm.bind(bailoutTail); + masm.generateBailoutTail(edx, ecx); +} |