summaryrefslogtreecommitdiffstats
path: root/js/src/jit/x86-shared/Lowering-x86-shared.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/jit/x86-shared/Lowering-x86-shared.cpp')
-rw-r--r--js/src/jit/x86-shared/Lowering-x86-shared.cpp1869
1 files changed, 1869 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
new file mode 100644
index 0000000000..3b69e06876
--- /dev/null
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -0,0 +1,1869 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/x86-shared/Lowering-x86-shared.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/Lowering.h"
+#include "jit/MIR.h"
+
+#include "jit/shared/Lowering-shared-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::Abs;
+using mozilla::FloorLog2;
+using mozilla::Maybe;
+using mozilla::Nothing;
+using mozilla::Some;
+
+LTableSwitch* LIRGeneratorX86Shared::newLTableSwitch(
+ const LAllocation& in, const LDefinition& inputCopy,
+ MTableSwitch* tableswitch) {
+ return new (alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
+}
+
+LTableSwitchV* LIRGeneratorX86Shared::newLTableSwitchV(
+ MTableSwitch* tableswitch) {
+ return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(),
+ tempDouble(), temp(), tableswitch);
+}
+
+void LIRGenerator::visitPowHalf(MPowHalf* ins) {
+ MDefinition* input = ins->input();
+ MOZ_ASSERT(input->type() == MIRType::Double);
+ LPowHalfD* lir = new (alloc()) LPowHalfD(useRegisterAtStart(input));
+ define(lir, ins);
+}
+
+void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+
+ // Shift operand should be constant or, unless BMI2 is available, in register
+ // ecx. x86 can't shift a non-ecx register.
+ if (rhs->isConstant()) {
+ ins->setOperand(1, useOrConstantAtStart(rhs));
+ } else if (Assembler::HasBMI2() && !mir->isRotate()) {
+ ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
+ ? useRegister(rhs)
+ : useRegisterAtStart(rhs));
+ } else {
+ ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
+ ? useFixed(rhs, ecx)
+ : useFixedAtStart(rhs, ecx));
+ }
+
+ defineReuseInput(ins, mir, 0);
+}
+
+template <size_t Temps>
+void LIRGeneratorX86Shared::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+ ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+#if defined(JS_NUNBOX32)
+ if (mir->isRotate()) {
+ ins->setTemp(0, temp());
+ }
+#endif
+
+ static_assert(LShiftI64::Rhs == INT64_PIECES,
+ "Assume Rhs is located at INT64_PIECES.");
+ static_assert(LRotateI64::Count == INT64_PIECES,
+ "Assume Count is located at INT64_PIECES.");
+
+ // Shift operand should be constant or, unless BMI2 is available, in register
+ // ecx. x86 can't shift a non-ecx register.
+ if (rhs->isConstant()) {
+ ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs));
+#ifdef JS_CODEGEN_X64
+ } else if (Assembler::HasBMI2() && !mir->isRotate()) {
+ ins->setOperand(INT64_PIECES, useRegister(rhs));
+#endif
+ } else {
+ // The operands are int64, but we only care about the lower 32 bits of
+ // the RHS. On 32-bit, the code below will load that part in ecx and
+ // will discard the upper half.
+ ensureDefined(rhs);
+ LUse use(ecx);
+ use.setVirtualRegister(rhs->virtualRegister());
+ ins->setOperand(INT64_PIECES, use);
+ }
+
+ defineInt64ReuseInput(ins, mir, 0);
+}
+
+template void LIRGeneratorX86Shared::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorX86Shared::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+void LIRGeneratorX86Shared::lowerForCompareI64AndBranch(
+ MTest* mir, MCompare* comp, JSOp op, MDefinition* left, MDefinition* right,
+ MBasicBlock* ifTrue, MBasicBlock* ifFalse) {
+ auto* lir = new (alloc())
+ LCompareI64AndBranch(comp, op, useInt64Register(left),
+ useInt64OrConstant(right), ifTrue, ifFalse);
+ add(lir, mir);
+}
+
+void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
+ MDefinition* mir, MDefinition* input) {
+ ins->setOperand(0, useRegisterAtStart(input));
+ defineReuseInput(ins, mir, 0);
+}
+
+void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+ ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
+ ? useOrConstant(rhs)
+ : useOrConstantAtStart(rhs));
+ defineReuseInput(ins, mir, 0);
+}
+
+template <size_t Temps>
+void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ // Without AVX, we'll need to use the x86 encodings where one of the
+ // inputs must be the same location as the output.
+ if (!Assembler::HasAVX()) {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+ ins->setOperand(
+ 1, willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs));
+ defineReuseInput(ins, mir, 0);
+ } else {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+ ins->setOperand(1, useAtStart(rhs));
+ define(ins, mir);
+ }
+}
+
+template void LIRGeneratorX86Shared::lowerForFPU(
+ LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs);
+template void LIRGeneratorX86Shared::lowerForFPU(
+ LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs);
+
+void LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab,
+ MInstruction* mir,
+ MDefinition* lhs,
+ MDefinition* rhs) {
+ baab->setOperand(0, useRegisterAtStart(lhs));
+ baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
+ add(baab, mir);
+}
+
+void LIRGeneratorX86Shared::lowerNegI(MInstruction* ins, MDefinition* input) {
+ defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(input)), ins, 0);
+}
+
+void LIRGeneratorX86Shared::lowerNegI64(MInstruction* ins, MDefinition* input) {
+ defineInt64ReuseInput(new (alloc()) LNegI64(useInt64RegisterAtStart(input)),
+ ins, 0);
+}
+
+void LIRGenerator::visitAbs(MAbs* ins) {
+ defineReuseInput(allocateAbs(ins, useRegisterAtStart(ins->input())), ins, 0);
+}
+
+void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs,
+ MDefinition* rhs) {
+ // Note: If we need a negative zero check, lhs is used twice.
+ LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
+ LMulI* lir = new (alloc())
+ LMulI(useRegisterAtStart(lhs),
+ willHaveDifferentLIRNodes(lhs, rhs) ? useOrConstant(rhs)
+ : useOrConstantAtStart(rhs),
+ lhsCopy);
+ if (mul->fallible()) {
+ assignSnapshot(lir, mul->bailoutKind());
+ }
+ defineReuseInput(lir, mul, 0);
+}
+
+void LIRGeneratorX86Shared::lowerDivI(MDiv* div) {
+ if (div->isUnsigned()) {
+ lowerUDiv(div);
+ return;
+ }
+
+ // Division instructions are slow. Division by constant denominators can be
+ // rewritten to use other instructions.
+ if (div->rhs()->isConstant()) {
+ int32_t rhs = div->rhs()->toConstant()->toInt32();
+
+ // Division by powers of two can be done by shifting, and division by
+ // other numbers can be done by a reciprocal multiplication technique.
+ int32_t shift = FloorLog2(Abs(rhs));
+ if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
+ LAllocation lhs = useRegisterAtStart(div->lhs());
+ LDivPowTwoI* lir;
+ // When truncated with maybe a non-zero remainder, we have to round the
+ // result toward 0. This requires an extra register to round up/down
+ // whether the left-hand-side is signed.
+ bool needRoundNeg = div->canBeNegativeDividend() && div->isTruncated();
+ if (!needRoundNeg) {
+ // Numerator is unsigned, so does not need adjusting.
+ lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
+ } else {
+ // Numerator might be signed, and needs adjusting, and an extra lhs copy
+ // is needed to round the result of the integer division towards zero.
+ lir = new (alloc())
+ LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
+ }
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineReuseInput(lir, div, 0);
+ return;
+ }
+ if (rhs != 0) {
+ LDivOrModConstantI* lir;
+ lir = new (alloc())
+ LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineFixed(lir, div, LAllocation(AnyRegister(edx)));
+ return;
+ }
+ }
+
+ LDivI* lir = new (alloc())
+ LDivI(useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineFixed(lir, div, LAllocation(AnyRegister(eax)));
+}
+
+void LIRGeneratorX86Shared::lowerModI(MMod* mod) {
+ if (mod->isUnsigned()) {
+ lowerUMod(mod);
+ return;
+ }
+
+ if (mod->rhs()->isConstant()) {
+ int32_t rhs = mod->rhs()->toConstant()->toInt32();
+ int32_t shift = FloorLog2(Abs(rhs));
+ if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
+ LModPowTwoI* lir =
+ new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineReuseInput(lir, mod, 0);
+ return;
+ }
+ if (rhs != 0) {
+ LDivOrModConstantI* lir;
+ lir = new (alloc())
+ LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
+ return;
+ }
+ }
+
+ LModI* lir = new (alloc())
+ LModI(useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
+}
+
+void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
+ switch (ins->type()) {
+ case MIRType::Int32:
+ defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(ins->input())),
+ ins, 0);
+ break;
+ case MIRType::Float32:
+ defineReuseInput(new (alloc()) LNegF(useRegisterAtStart(ins->input())),
+ ins, 0);
+ break;
+ case MIRType::Double:
+ defineReuseInput(new (alloc()) LNegD(useRegisterAtStart(ins->input())),
+ ins, 0);
+ break;
+ default:
+ MOZ_CRASH();
+ }
+}
+
+void LIRGeneratorX86Shared::lowerWasmSelectI(MWasmSelect* select) {
+ auto* lir = new (alloc())
+ LWasmSelect(useRegisterAtStart(select->trueExpr()),
+ useAny(select->falseExpr()), useRegister(select->condExpr()));
+ defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
+}
+
+void LIRGeneratorX86Shared::lowerWasmSelectI64(MWasmSelect* select) {
+ auto* lir = new (alloc()) LWasmSelectI64(
+ useInt64RegisterAtStart(select->trueExpr()),
+ useInt64(select->falseExpr()), useRegister(select->condExpr()));
+ defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex);
+}
+
+void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
+ MDefinition* base = ins->base();
+ MOZ_ASSERT(base->type() == MIRType::Int32);
+
+ MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+ MOZ_ASSERT_IF(ins->needsBoundsCheck(),
+ boundsCheckLimit->type() == MIRType::Int32);
+
+ // For simplicity, require a register if we're going to emit a bounds-check
+ // branch, so that we don't have special cases for constants. This should
+ // only happen in rare constant-folding cases since asm.js sets the minimum
+ // heap size based when accessed via constant.
+ LAllocation baseAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(base)
+ : useRegisterOrZeroAtStart(base);
+
+ LAllocation limitAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(boundsCheckLimit)
+ : LAllocation();
+ LAllocation memoryBaseAlloc = ins->hasMemoryBase()
+ ? useRegisterAtStart(ins->memoryBase())
+ : LAllocation();
+
+ auto* lir =
+ new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc);
+ define(lir, ins);
+}
+
+void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
+ MDefinition* base = ins->base();
+ MOZ_ASSERT(base->type() == MIRType::Int32);
+
+ MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+ MOZ_ASSERT_IF(ins->needsBoundsCheck(),
+ boundsCheckLimit->type() == MIRType::Int32);
+
+ // For simplicity, require a register if we're going to emit a bounds-check
+ // branch, so that we don't have special cases for constants. This should
+ // only happen in rare constant-folding cases since asm.js sets the minimum
+ // heap size based when accessed via constant.
+ LAllocation baseAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(base)
+ : useRegisterOrZeroAtStart(base);
+
+ LAllocation limitAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(boundsCheckLimit)
+ : LAllocation();
+ LAllocation memoryBaseAlloc = ins->hasMemoryBase()
+ ? useRegisterAtStart(ins->memoryBase())
+ : LAllocation();
+
+ LAsmJSStoreHeap* lir = nullptr;
+ switch (ins->access().type()) {
+ case Scalar::Int8:
+ case Scalar::Uint8:
+#ifdef JS_CODEGEN_X86
+ // See comment for LIRGeneratorX86::useByteOpRegister.
+ lir = new (alloc()) LAsmJSStoreHeap(
+ baseAlloc, useFixed(ins->value(), eax), limitAlloc, memoryBaseAlloc);
+ break;
+#endif
+ case Scalar::Int16:
+ case Scalar::Uint16:
+ case Scalar::Int32:
+ case Scalar::Uint32:
+ case Scalar::Float32:
+ case Scalar::Float64:
+ // For now, don't allow constant values. The immediate operand affects
+ // instruction layout which affects patching.
+ lir = new (alloc())
+ LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
+ limitAlloc, memoryBaseAlloc);
+ break;
+ case Scalar::Int64:
+ case Scalar::Simd128:
+ MOZ_CRASH("NYI");
+ case Scalar::Uint8Clamped:
+ case Scalar::BigInt64:
+ case Scalar::BigUint64:
+ case Scalar::MaxTypedArrayViewType:
+ MOZ_CRASH("unexpected array type");
+ }
+ add(lir, ins);
+}
+
+void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) {
+ if (div->rhs()->isConstant()) {
+ uint32_t rhs = div->rhs()->toConstant()->toInt32();
+ int32_t shift = FloorLog2(rhs);
+
+ LAllocation lhs = useRegisterAtStart(div->lhs());
+ if (rhs != 0 && uint32_t(1) << shift == rhs) {
+ LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, false);
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineReuseInput(lir, div, 0);
+ } else {
+ LUDivOrModConstant* lir = new (alloc())
+ LUDivOrModConstant(useRegister(div->lhs()), rhs, tempFixed(eax));
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineFixed(lir, div, LAllocation(AnyRegister(edx)));
+ }
+ return;
+ }
+
+ LUDivOrMod* lir = new (alloc()) LUDivOrMod(
+ useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineFixed(lir, div, LAllocation(AnyRegister(eax)));
+}
+
+void LIRGeneratorX86Shared::lowerUMod(MMod* mod) {
+ if (mod->rhs()->isConstant()) {
+ uint32_t rhs = mod->rhs()->toConstant()->toInt32();
+ int32_t shift = FloorLog2(rhs);
+
+ if (rhs != 0 && uint32_t(1) << shift == rhs) {
+ LModPowTwoI* lir =
+ new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineReuseInput(lir, mod, 0);
+ } else {
+ LUDivOrModConstant* lir = new (alloc())
+ LUDivOrModConstant(useRegister(mod->lhs()), rhs, tempFixed(edx));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
+ }
+ return;
+ }
+
+ LUDivOrMod* lir = new (alloc()) LUDivOrMod(
+ useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
+}
+
+void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) {
+ MDefinition* lhs = mir->lhs();
+ MDefinition* rhs = mir->rhs();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Int32);
+ MOZ_ASSERT(rhs->type() == MIRType::Int32);
+ MOZ_ASSERT(mir->type() == MIRType::Double);
+
+#ifdef JS_CODEGEN_X64
+ static_assert(ecx == rcx);
+#endif
+
+ // Without BMI2, x86 can only shift by ecx.
+ LUse lhsUse = useRegisterAtStart(lhs);
+ LAllocation rhsAlloc;
+ if (rhs->isConstant()) {
+ rhsAlloc = useOrConstant(rhs);
+ } else if (Assembler::HasBMI2()) {
+ rhsAlloc = useRegister(rhs);
+ } else {
+ rhsAlloc = useFixed(rhs, ecx);
+ }
+
+ LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
+ define(lir, mir);
+}
+
+void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) {
+ int32_t base = mir->input()->toConstant()->toInt32();
+ MDefinition* power = mir->power();
+
+ // Shift operand should be in register ecx, unless BMI2 is available.
+ // x86 can't shift a non-ecx register.
+ LAllocation powerAlloc =
+ Assembler::HasBMI2() ? useRegister(power) : useFixed(power, ecx);
+ auto* lir = new (alloc()) LPowOfTwoI(powerAlloc, base);
+ assignSnapshot(lir, mir->bailoutKind());
+ define(lir, mir);
+}
+
+void LIRGeneratorX86Shared::lowerBigIntLsh(MBigIntLsh* ins) {
+ // Shift operand should be in register ecx, unless BMI2 is available.
+ // x86 can't shift a non-ecx register.
+ LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx);
+ auto* lir =
+ new (alloc()) LBigIntLsh(useRegister(ins->lhs()), useRegister(ins->rhs()),
+ temp(), shiftAlloc, temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorX86Shared::lowerBigIntRsh(MBigIntRsh* ins) {
+ // Shift operand should be in register ecx, unless BMI2 is available.
+ // x86 can't shift a non-ecx register.
+ LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx);
+ auto* lir =
+ new (alloc()) LBigIntRsh(useRegister(ins->lhs()), useRegister(ins->rhs()),
+ temp(), shiftAlloc, temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorX86Shared::lowerWasmBuiltinTruncateToInt32(
+ MWasmBuiltinTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+ LDefinition maybeTemp =
+ Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
+ if (opd->type() == MIRType::Double) {
+ define(new (alloc()) LWasmBuiltinTruncateDToInt32(
+ useRegister(opd), useFixed(ins->instance(), InstanceReg),
+ maybeTemp),
+ ins);
+ return;
+ }
+
+ define(
+ new (alloc()) LWasmBuiltinTruncateFToInt32(
+ useRegister(opd), useFixed(ins->instance(), InstanceReg), maybeTemp),
+ ins);
+}
+
+void LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Double);
+
+ LDefinition maybeTemp =
+ Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
+ define(new (alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
+}
+
+void LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Float32);
+
+ LDefinition maybeTemp =
+ Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
+ define(new (alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
+}
+
+void LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(
+ MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index =
+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+ // If the target is a floating register then we need a temp at the
+ // lower level; that temp must be eax.
+ //
+ // Otherwise the target (if used) is an integer register, which
+ // must be eax. If the target is not used the machine code will
+ // still clobber eax, so just pretend it's used.
+ //
+ // oldval must be in a register.
+ //
+ // newval must be in a register. If the source is a byte array
+ // then newval must be a register that has a byte size: on x86
+ // this must be ebx, ecx, or edx (eax is taken for the output).
+ //
+ // Bug #1077036 describes some further optimization opportunities.
+
+ bool fixedOutput = false;
+ LDefinition tempDef = LDefinition::BogusTemp();
+ LAllocation newval;
+ if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+ tempDef = tempFixed(eax);
+ newval = useRegister(ins->newval());
+ } else {
+ fixedOutput = true;
+ if (useI386ByteRegisters && ins->isByteArray()) {
+ newval = useFixed(ins->newval(), ebx);
+ } else {
+ newval = useRegister(ins->newval());
+ }
+ }
+
+ const LAllocation oldval = useRegister(ins->oldval());
+
+ LCompareExchangeTypedArrayElement* lir =
+ new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
+ newval, tempDef);
+
+ if (fixedOutput) {
+ defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+ } else {
+ define(lir, ins);
+ }
+}
+
+void LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(
+ MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
+ MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
+
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index =
+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+ const LAllocation value = useRegister(ins->value());
+
+ // The underlying instruction is XCHG, which can operate on any
+ // register.
+ //
+ // If the target is a floating register (for Uint32) then we need
+ // a temp into which to exchange.
+ //
+ // If the source is a byte array then we need a register that has
+ // a byte size; in this case -- on x86 only -- pin the output to
+ // an appropriate register and use that as a temp in the back-end.
+
+ LDefinition tempDef = LDefinition::BogusTemp();
+ if (ins->arrayType() == Scalar::Uint32) {
+ MOZ_ASSERT(ins->type() == MIRType::Double);
+ tempDef = temp();
+ }
+
+ LAtomicExchangeTypedArrayElement* lir = new (alloc())
+ LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
+
+ if (useI386ByteRegisters && ins->isByteArray()) {
+ defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+ } else {
+ define(lir, ins);
+ }
+}
+
+void LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(
+ MAtomicTypedArrayElementBinop* ins, bool useI386ByteRegisters) {
+ MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index =
+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
+
+ // Case 1: the result of the operation is not used.
+ //
+ // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
+ // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case.
+
+ if (ins->isForEffect()) {
+ LAllocation value;
+ if (useI386ByteRegisters && ins->isByteArray() &&
+ !ins->value()->isConstant()) {
+ value = useFixed(ins->value(), ebx);
+ } else {
+ value = useRegisterOrConstant(ins->value());
+ }
+
+ LAtomicTypedArrayElementBinopForEffect* lir = new (alloc())
+ LAtomicTypedArrayElementBinopForEffect(elements, index, value);
+
+ add(lir, ins);
+ return;
+ }
+
+ // Case 2: the result of the operation is used.
+ //
+ // For ADD and SUB we'll use XADD:
+ //
+ // movl src, output
+ // lock xaddl output, mem
+ //
+ // For the 8-bit variants XADD needs a byte register for the output.
+ //
+ // For AND/OR/XOR we need to use a CMPXCHG loop:
+ //
+ // movl *mem, eax
+ // L: mov eax, temp
+ // andl src, temp
+ // lock cmpxchg temp, mem ; reads eax also
+ // jnz L
+ // ; result in eax
+ //
+ // Note the placement of L, cmpxchg will update eax with *mem if
+ // *mem does not have the expected value, so reloading it at the
+ // top of the loop would be redundant.
+ //
+ // If the array is not a uint32 array then:
+ // - eax should be the output (one result of the cmpxchg)
+ // - there is a temp, which must have a byte register if
+ // the array has 1-byte elements elements
+ //
+ // If the array is a uint32 array then:
+ // - eax is the first temp
+ // - we also need a second temp
+ //
+ // There are optimization opportunities:
+ // - better register allocation in the x86 8-bit case, Bug #1077036.
+
+ bool bitOp = !(ins->operation() == AtomicFetchAddOp ||
+ ins->operation() == AtomicFetchSubOp);
+ bool fixedOutput = true;
+ bool reuseInput = false;
+ LDefinition tempDef1 = LDefinition::BogusTemp();
+ LDefinition tempDef2 = LDefinition::BogusTemp();
+ LAllocation value;
+
+ if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+ value = useRegisterOrConstant(ins->value());
+ fixedOutput = false;
+ if (bitOp) {
+ tempDef1 = tempFixed(eax);
+ tempDef2 = temp();
+ } else {
+ tempDef1 = temp();
+ }
+ } else if (useI386ByteRegisters && ins->isByteArray()) {
+ if (ins->value()->isConstant()) {
+ value = useRegisterOrConstant(ins->value());
+ } else {
+ value = useFixed(ins->value(), ebx);
+ }
+ if (bitOp) {
+ tempDef1 = tempFixed(ecx);
+ }
+ } else if (bitOp) {
+ value = useRegisterOrConstant(ins->value());
+ tempDef1 = temp();
+ } else if (ins->value()->isConstant()) {
+ fixedOutput = false;
+ value = useRegisterOrConstant(ins->value());
+ } else {
+ fixedOutput = false;
+ reuseInput = true;
+ value = useRegisterAtStart(ins->value());
+ }
+
+ LAtomicTypedArrayElementBinop* lir = new (alloc())
+ LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
+
+ if (fixedOutput) {
+ defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+ } else if (reuseInput) {
+ defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
+ } else {
+ define(lir, ins);
+ }
+}
+
+void LIRGenerator::visitCopySign(MCopySign* ins) {
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+
+ MOZ_ASSERT(IsFloatingPointType(lhs->type()));
+ MOZ_ASSERT(lhs->type() == rhs->type());
+ MOZ_ASSERT(lhs->type() == ins->type());
+
+ LInstructionHelper<1, 2, 2>* lir;
+ if (lhs->type() == MIRType::Double) {
+ lir = new (alloc()) LCopySignD();
+ } else {
+ lir = new (alloc()) LCopySignF();
+ }
+
+ // As lowerForFPU, but we want rhs to be in a FP register too.
+ lir->setOperand(0, useRegisterAtStart(lhs));
+ if (!Assembler::HasAVX()) {
+ lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
+ ? useRegister(rhs)
+ : useRegisterAtStart(rhs));
+ defineReuseInput(lir, ins, 0);
+ } else {
+ lir->setOperand(1, useRegisterAtStart(rhs));
+ define(lir, ins);
+ }
+}
+
+// These lowerings are really x86-shared but some Masm APIs are not yet
+// available on x86.
+
+// Ternary and binary operators require the dest register to be the same as
+// their first input register, leading to a pattern of useRegisterAtStart +
+// defineReuseInput.
+
+void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->v0()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->v1()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->v2()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::V128Bitselect: {
+ // Enforcing lhs == output avoids one setup move. We would like to also
+ // enforce merging the control with the temp (with
+ // usRegisterAtStart(control) and tempCopy()), but the register allocator
+ // ignores those constraints at present.
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegisterAtStart(ins->v0()), useRegister(ins->v1()),
+ useRegister(ins->v2()), tempSimd128());
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V0);
+ break;
+ }
+ case wasm::SimdOp::F32x4RelaxedFma:
+ case wasm::SimdOp::F32x4RelaxedFnma:
+ case wasm::SimdOp::F64x2RelaxedFma:
+ case wasm::SimdOp::F64x2RelaxedFnma: {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+ useRegisterAtStart(ins->v2()));
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+ break;
+ }
+ case wasm::SimdOp::I32x4DotI8x16I7x16AddS: {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+ useRegisterAtStart(ins->v2()));
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+ break;
+ }
+ case wasm::SimdOp::I8x16RelaxedLaneSelect:
+ case wasm::SimdOp::I16x8RelaxedLaneSelect:
+ case wasm::SimdOp::I32x4RelaxedLaneSelect:
+ case wasm::SimdOp::I64x2RelaxedLaneSelect: {
+ if (Assembler::HasAVX()) {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegisterAtStart(ins->v0()),
+ useRegisterAtStart(ins->v1()), useRegisterAtStart(ins->v2()));
+ define(lir, ins);
+ } else {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegister(ins->v0()),
+ useRegisterAtStart(ins->v1()), useFixed(ins->v2(), vmm0));
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V1);
+ }
+ break;
+ }
+ case wasm::SimdOp::F32x4RelaxedDotBF16x8AddF32x4: {
+ auto* lir = new (alloc()) LWasmTernarySimd128(
+ ins->simdOp(), useRegister(ins->v0()), useRegister(ins->v1()),
+ useRegisterAtStart(ins->v2()), tempSimd128());
+ defineReuseInput(lir, ins, LWasmTernarySimd128::V2);
+ break;
+ }
+ default:
+ MOZ_CRASH("NYI");
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+ wasm::SimdOp op = ins->simdOp();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(rhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ // Note MWasmBinarySimd128::foldsTo has already specialized operations that
+ // have a constant operand, so this takes care of more general cases of
+ // reordering, see ReorderCommutative.
+ if (ins->isCommutative()) {
+ ReorderCommutative(&lhs, &rhs, ins);
+ }
+
+ // Swap operands and change operation if necessary, these are all x86/x64
+ // dependent transformations. Except where noted, this is about avoiding
+ // unnecessary moves and fixups in the code generator macros.
+ bool swap = false;
+ switch (op) {
+ case wasm::SimdOp::V128AndNot: {
+ // Code generation requires the operands to be reversed.
+ swap = true;
+ break;
+ }
+ case wasm::SimdOp::I8x16LtS: {
+ swap = true;
+ op = wasm::SimdOp::I8x16GtS;
+ break;
+ }
+ case wasm::SimdOp::I8x16GeS: {
+ swap = true;
+ op = wasm::SimdOp::I8x16LeS;
+ break;
+ }
+ case wasm::SimdOp::I16x8LtS: {
+ swap = true;
+ op = wasm::SimdOp::I16x8GtS;
+ break;
+ }
+ case wasm::SimdOp::I16x8GeS: {
+ swap = true;
+ op = wasm::SimdOp::I16x8LeS;
+ break;
+ }
+ case wasm::SimdOp::I32x4LtS: {
+ swap = true;
+ op = wasm::SimdOp::I32x4GtS;
+ break;
+ }
+ case wasm::SimdOp::I32x4GeS: {
+ swap = true;
+ op = wasm::SimdOp::I32x4LeS;
+ break;
+ }
+ case wasm::SimdOp::F32x4Gt: {
+ swap = true;
+ op = wasm::SimdOp::F32x4Lt;
+ break;
+ }
+ case wasm::SimdOp::F32x4Ge: {
+ swap = true;
+ op = wasm::SimdOp::F32x4Le;
+ break;
+ }
+ case wasm::SimdOp::F64x2Gt: {
+ swap = true;
+ op = wasm::SimdOp::F64x2Lt;
+ break;
+ }
+ case wasm::SimdOp::F64x2Ge: {
+ swap = true;
+ op = wasm::SimdOp::F64x2Le;
+ break;
+ }
+ case wasm::SimdOp::F32x4PMin:
+ case wasm::SimdOp::F32x4PMax:
+ case wasm::SimdOp::F64x2PMin:
+ case wasm::SimdOp::F64x2PMax: {
+ // Code generation requires the operations to be reversed (the rhs is the
+ // output register).
+ swap = true;
+ break;
+ }
+ default:
+ break;
+ }
+ if (swap) {
+ MDefinition* tmp = lhs;
+ lhs = rhs;
+ rhs = tmp;
+ }
+
+ // Allocate temp registers
+ LDefinition tempReg0 = LDefinition::BogusTemp();
+ LDefinition tempReg1 = LDefinition::BogusTemp();
+ switch (op) {
+ case wasm::SimdOp::I64x2Mul:
+ tempReg0 = tempSimd128();
+ break;
+ case wasm::SimdOp::F32x4Min:
+ case wasm::SimdOp::F32x4Max:
+ case wasm::SimdOp::F64x2Min:
+ case wasm::SimdOp::F64x2Max:
+ tempReg0 = tempSimd128();
+ tempReg1 = tempSimd128();
+ break;
+ case wasm::SimdOp::I64x2LtS:
+ case wasm::SimdOp::I64x2GtS:
+ case wasm::SimdOp::I64x2LeS:
+ case wasm::SimdOp::I64x2GeS:
+ // The compareForOrderingInt64x2AVX implementation does not require
+ // temps but needs SSE4.2 support. Checking if both AVX and SSE4.2
+ // are enabled.
+ if (!(Assembler::HasAVX() && Assembler::HasSSE42())) {
+ tempReg0 = tempSimd128();
+ tempReg1 = tempSimd128();
+ }
+ break;
+ default:
+ break;
+ }
+
+ // For binary ops, without AVX support, the Masm API always is usually
+ // (rhs, lhsDest) and requires AtStart+ReuseInput for the lhs.
+ //
+ // For a few ops, the API is actually (rhsDest, lhs) and the rules are the
+ // same but the reversed. We swapped operands above; they will be swapped
+ // again in the code generator to emit the right code.
+ //
+ // If AVX support is enabled, some binary ops can use output as destination,
+ // useRegisterAtStart is applied for both operands and no need for ReuseInput.
+
+ switch (op) {
+ case wasm::SimdOp::I8x16AvgrU:
+ case wasm::SimdOp::I16x8AvgrU:
+ case wasm::SimdOp::I8x16Add:
+ case wasm::SimdOp::I8x16AddSatS:
+ case wasm::SimdOp::I8x16AddSatU:
+ case wasm::SimdOp::I8x16Sub:
+ case wasm::SimdOp::I8x16SubSatS:
+ case wasm::SimdOp::I8x16SubSatU:
+ case wasm::SimdOp::I16x8Mul:
+ case wasm::SimdOp::I16x8MinS:
+ case wasm::SimdOp::I16x8MinU:
+ case wasm::SimdOp::I16x8MaxS:
+ case wasm::SimdOp::I16x8MaxU:
+ case wasm::SimdOp::I32x4Add:
+ case wasm::SimdOp::I32x4Sub:
+ case wasm::SimdOp::I32x4Mul:
+ case wasm::SimdOp::I32x4MinS:
+ case wasm::SimdOp::I32x4MinU:
+ case wasm::SimdOp::I32x4MaxS:
+ case wasm::SimdOp::I32x4MaxU:
+ case wasm::SimdOp::I64x2Add:
+ case wasm::SimdOp::I64x2Sub:
+ case wasm::SimdOp::I64x2Mul:
+ case wasm::SimdOp::F32x4Add:
+ case wasm::SimdOp::F32x4Sub:
+ case wasm::SimdOp::F32x4Mul:
+ case wasm::SimdOp::F32x4Div:
+ case wasm::SimdOp::F64x2Add:
+ case wasm::SimdOp::F64x2Sub:
+ case wasm::SimdOp::F64x2Mul:
+ case wasm::SimdOp::F64x2Div:
+ case wasm::SimdOp::F32x4Eq:
+ case wasm::SimdOp::F32x4Ne:
+ case wasm::SimdOp::F32x4Lt:
+ case wasm::SimdOp::F32x4Le:
+ case wasm::SimdOp::F64x2Eq:
+ case wasm::SimdOp::F64x2Ne:
+ case wasm::SimdOp::F64x2Lt:
+ case wasm::SimdOp::F64x2Le:
+ case wasm::SimdOp::F32x4PMin:
+ case wasm::SimdOp::F32x4PMax:
+ case wasm::SimdOp::F64x2PMin:
+ case wasm::SimdOp::F64x2PMax:
+ case wasm::SimdOp::I8x16Swizzle:
+ case wasm::SimdOp::I8x16RelaxedSwizzle:
+ case wasm::SimdOp::I8x16Eq:
+ case wasm::SimdOp::I8x16Ne:
+ case wasm::SimdOp::I8x16GtS:
+ case wasm::SimdOp::I8x16LeS:
+ case wasm::SimdOp::I8x16LtU:
+ case wasm::SimdOp::I8x16GtU:
+ case wasm::SimdOp::I8x16LeU:
+ case wasm::SimdOp::I8x16GeU:
+ case wasm::SimdOp::I16x8Eq:
+ case wasm::SimdOp::I16x8Ne:
+ case wasm::SimdOp::I16x8GtS:
+ case wasm::SimdOp::I16x8LeS:
+ case wasm::SimdOp::I16x8LtU:
+ case wasm::SimdOp::I16x8GtU:
+ case wasm::SimdOp::I16x8LeU:
+ case wasm::SimdOp::I16x8GeU:
+ case wasm::SimdOp::I32x4Eq:
+ case wasm::SimdOp::I32x4Ne:
+ case wasm::SimdOp::I32x4GtS:
+ case wasm::SimdOp::I32x4LeS:
+ case wasm::SimdOp::I32x4LtU:
+ case wasm::SimdOp::I32x4GtU:
+ case wasm::SimdOp::I32x4LeU:
+ case wasm::SimdOp::I32x4GeU:
+ case wasm::SimdOp::I64x2Eq:
+ case wasm::SimdOp::I64x2Ne:
+ case wasm::SimdOp::I64x2LtS:
+ case wasm::SimdOp::I64x2GtS:
+ case wasm::SimdOp::I64x2LeS:
+ case wasm::SimdOp::I64x2GeS:
+ case wasm::SimdOp::V128And:
+ case wasm::SimdOp::V128Or:
+ case wasm::SimdOp::V128Xor:
+ case wasm::SimdOp::V128AndNot:
+ case wasm::SimdOp::F32x4Min:
+ case wasm::SimdOp::F32x4Max:
+ case wasm::SimdOp::F64x2Min:
+ case wasm::SimdOp::F64x2Max:
+ case wasm::SimdOp::I8x16NarrowI16x8S:
+ case wasm::SimdOp::I8x16NarrowI16x8U:
+ case wasm::SimdOp::I16x8NarrowI32x4S:
+ case wasm::SimdOp::I16x8NarrowI32x4U:
+ case wasm::SimdOp::I32x4DotI16x8S:
+ case wasm::SimdOp::I16x8ExtmulLowI8x16S:
+ case wasm::SimdOp::I16x8ExtmulHighI8x16S:
+ case wasm::SimdOp::I16x8ExtmulLowI8x16U:
+ case wasm::SimdOp::I16x8ExtmulHighI8x16U:
+ case wasm::SimdOp::I32x4ExtmulLowI16x8S:
+ case wasm::SimdOp::I32x4ExtmulHighI16x8S:
+ case wasm::SimdOp::I32x4ExtmulLowI16x8U:
+ case wasm::SimdOp::I32x4ExtmulHighI16x8U:
+ case wasm::SimdOp::I64x2ExtmulLowI32x4S:
+ case wasm::SimdOp::I64x2ExtmulHighI32x4S:
+ case wasm::SimdOp::I64x2ExtmulLowI32x4U:
+ case wasm::SimdOp::I64x2ExtmulHighI32x4U:
+ case wasm::SimdOp::I16x8Q15MulrSatS:
+ case wasm::SimdOp::F32x4RelaxedMin:
+ case wasm::SimdOp::F32x4RelaxedMax:
+ case wasm::SimdOp::F64x2RelaxedMin:
+ case wasm::SimdOp::F64x2RelaxedMax:
+ case wasm::SimdOp::I16x8RelaxedQ15MulrS:
+ case wasm::SimdOp::I16x8DotI8x16I7x16S:
+ case wasm::SimdOp::MozPMADDUBSW:
+ if (isThreeOpAllowed()) {
+ auto* lir = new (alloc())
+ LWasmBinarySimd128(op, useRegisterAtStart(lhs),
+ useRegisterAtStart(rhs), tempReg0, tempReg1);
+ define(lir, ins);
+ break;
+ }
+ [[fallthrough]];
+ default: {
+ LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
+ LAllocation rhsAlloc = willHaveDifferentLIRNodes(lhs, rhs)
+ ? useRegister(rhs)
+ : useRegisterAtStart(rhs);
+ auto* lir = new (alloc())
+ LWasmBinarySimd128(op, lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
+ defineReuseInput(lir, ins, LWasmBinarySimd128::LhsDest);
+ break;
+ }
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+#ifdef ENABLE_WASM_SIMD
+bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle(
+ int8_t shuffle[16]) {
+ if (simdOp() != wasm::SimdOp::V128Bitselect) {
+ return false;
+ }
+
+ // Optimization when control vector is a mask with all 0 or all 1 per lane.
+ // On x86, there is no bitselect, blend operations will be a win,
+ // e.g. via PBLENDVB or PBLENDW.
+ SimdConstant constant = static_cast<MWasmFloatConstant*>(v2())->toSimd128();
+ const SimdConstant::I8x16& bytes = constant.asInt8x16();
+ for (int8_t i = 0; i < 16; i++) {
+ if (bytes[i] == -1) {
+ shuffle[i] = i + 16;
+ } else if (bytes[i] == 0) {
+ shuffle[i] = i;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+bool MWasmTernarySimd128::canRelaxBitselect() {
+ wasm::SimdOp simdOp;
+ if (v2()->isWasmBinarySimd128()) {
+ simdOp = v2()->toWasmBinarySimd128()->simdOp();
+ } else if (v2()->isWasmBinarySimd128WithConstant()) {
+ simdOp = v2()->toWasmBinarySimd128WithConstant()->simdOp();
+ } else {
+ return false;
+ }
+ switch (simdOp) {
+ case wasm::SimdOp::I8x16Eq:
+ case wasm::SimdOp::I8x16Ne:
+ case wasm::SimdOp::I8x16GtS:
+ case wasm::SimdOp::I8x16GeS:
+ case wasm::SimdOp::I8x16LtS:
+ case wasm::SimdOp::I8x16LeS:
+ case wasm::SimdOp::I8x16GtU:
+ case wasm::SimdOp::I8x16GeU:
+ case wasm::SimdOp::I8x16LtU:
+ case wasm::SimdOp::I8x16LeU:
+ case wasm::SimdOp::I16x8Eq:
+ case wasm::SimdOp::I16x8Ne:
+ case wasm::SimdOp::I16x8GtS:
+ case wasm::SimdOp::I16x8GeS:
+ case wasm::SimdOp::I16x8LtS:
+ case wasm::SimdOp::I16x8LeS:
+ case wasm::SimdOp::I16x8GtU:
+ case wasm::SimdOp::I16x8GeU:
+ case wasm::SimdOp::I16x8LtU:
+ case wasm::SimdOp::I16x8LeU:
+ case wasm::SimdOp::I32x4Eq:
+ case wasm::SimdOp::I32x4Ne:
+ case wasm::SimdOp::I32x4GtS:
+ case wasm::SimdOp::I32x4GeS:
+ case wasm::SimdOp::I32x4LtS:
+ case wasm::SimdOp::I32x4LeS:
+ case wasm::SimdOp::I32x4GtU:
+ case wasm::SimdOp::I32x4GeU:
+ case wasm::SimdOp::I32x4LtU:
+ case wasm::SimdOp::I32x4LeU:
+ case wasm::SimdOp::I64x2Eq:
+ case wasm::SimdOp::I64x2Ne:
+ case wasm::SimdOp::I64x2GtS:
+ case wasm::SimdOp::I64x2GeS:
+ case wasm::SimdOp::I64x2LtS:
+ case wasm::SimdOp::I64x2LeS:
+ case wasm::SimdOp::F32x4Eq:
+ case wasm::SimdOp::F32x4Ne:
+ case wasm::SimdOp::F32x4Gt:
+ case wasm::SimdOp::F32x4Ge:
+ case wasm::SimdOp::F32x4Lt:
+ case wasm::SimdOp::F32x4Le:
+ case wasm::SimdOp::F64x2Eq:
+ case wasm::SimdOp::F64x2Ne:
+ case wasm::SimdOp::F64x2Gt:
+ case wasm::SimdOp::F64x2Ge:
+ case wasm::SimdOp::F64x2Lt:
+ case wasm::SimdOp::F64x2Le:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool MWasmBinarySimd128::canPmaddubsw() {
+ MOZ_ASSERT(Assembler::HasSSE3());
+ return true;
+}
+#endif
+
+bool MWasmBinarySimd128::specializeForConstantRhs() {
+ // The order follows MacroAssembler.h, generally
+ switch (simdOp()) {
+ // Operations implemented by a single native instruction where it is
+ // plausible that the rhs (after commutation if available) could be a
+ // constant.
+ //
+ // Swizzle is not here because it was handled earlier in the pipeline.
+ //
+ // Integer compares >= and < are not here because they are not supported in
+ // the hardware.
+ //
+ // Floating compares are not here because our patching machinery can't
+ // handle them yet.
+ //
+ // Floating-point min and max (including pmin and pmax) are not here because
+ // they are not straightforward to implement.
+ case wasm::SimdOp::I8x16Add:
+ case wasm::SimdOp::I16x8Add:
+ case wasm::SimdOp::I32x4Add:
+ case wasm::SimdOp::I64x2Add:
+ case wasm::SimdOp::I8x16Sub:
+ case wasm::SimdOp::I16x8Sub:
+ case wasm::SimdOp::I32x4Sub:
+ case wasm::SimdOp::I64x2Sub:
+ case wasm::SimdOp::I16x8Mul:
+ case wasm::SimdOp::I32x4Mul:
+ case wasm::SimdOp::I8x16AddSatS:
+ case wasm::SimdOp::I8x16AddSatU:
+ case wasm::SimdOp::I16x8AddSatS:
+ case wasm::SimdOp::I16x8AddSatU:
+ case wasm::SimdOp::I8x16SubSatS:
+ case wasm::SimdOp::I8x16SubSatU:
+ case wasm::SimdOp::I16x8SubSatS:
+ case wasm::SimdOp::I16x8SubSatU:
+ case wasm::SimdOp::I8x16MinS:
+ case wasm::SimdOp::I8x16MinU:
+ case wasm::SimdOp::I16x8MinS:
+ case wasm::SimdOp::I16x8MinU:
+ case wasm::SimdOp::I32x4MinS:
+ case wasm::SimdOp::I32x4MinU:
+ case wasm::SimdOp::I8x16MaxS:
+ case wasm::SimdOp::I8x16MaxU:
+ case wasm::SimdOp::I16x8MaxS:
+ case wasm::SimdOp::I16x8MaxU:
+ case wasm::SimdOp::I32x4MaxS:
+ case wasm::SimdOp::I32x4MaxU:
+ case wasm::SimdOp::V128And:
+ case wasm::SimdOp::V128Or:
+ case wasm::SimdOp::V128Xor:
+ case wasm::SimdOp::I8x16Eq:
+ case wasm::SimdOp::I8x16Ne:
+ case wasm::SimdOp::I8x16GtS:
+ case wasm::SimdOp::I8x16LeS:
+ case wasm::SimdOp::I16x8Eq:
+ case wasm::SimdOp::I16x8Ne:
+ case wasm::SimdOp::I16x8GtS:
+ case wasm::SimdOp::I16x8LeS:
+ case wasm::SimdOp::I32x4Eq:
+ case wasm::SimdOp::I32x4Ne:
+ case wasm::SimdOp::I32x4GtS:
+ case wasm::SimdOp::I32x4LeS:
+ case wasm::SimdOp::I64x2Mul:
+ case wasm::SimdOp::F32x4Eq:
+ case wasm::SimdOp::F32x4Ne:
+ case wasm::SimdOp::F32x4Lt:
+ case wasm::SimdOp::F32x4Le:
+ case wasm::SimdOp::F64x2Eq:
+ case wasm::SimdOp::F64x2Ne:
+ case wasm::SimdOp::F64x2Lt:
+ case wasm::SimdOp::F64x2Le:
+ case wasm::SimdOp::I32x4DotI16x8S:
+ case wasm::SimdOp::F32x4Add:
+ case wasm::SimdOp::F64x2Add:
+ case wasm::SimdOp::F32x4Sub:
+ case wasm::SimdOp::F64x2Sub:
+ case wasm::SimdOp::F32x4Div:
+ case wasm::SimdOp::F64x2Div:
+ case wasm::SimdOp::F32x4Mul:
+ case wasm::SimdOp::F64x2Mul:
+ case wasm::SimdOp::I8x16NarrowI16x8S:
+ case wasm::SimdOp::I8x16NarrowI16x8U:
+ case wasm::SimdOp::I16x8NarrowI32x4S:
+ case wasm::SimdOp::I16x8NarrowI32x4U:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void LIRGenerator::visitWasmBinarySimd128WithConstant(
+ MWasmBinarySimd128WithConstant* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MDefinition* lhs = ins->lhs();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ // Allocate temp registers
+ LDefinition tempReg = LDefinition::BogusTemp();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I64x2Mul:
+ tempReg = tempSimd128();
+ break;
+ default:
+ break;
+ }
+
+ if (isThreeOpAllowed()) {
+ // The non-destructive versions of instructions will be available
+ // when AVX is enabled.
+ LAllocation lhsAlloc = useRegisterAtStart(lhs);
+ auto* lir = new (alloc())
+ LWasmBinarySimd128WithConstant(lhsAlloc, ins->rhs(), tempReg);
+ define(lir, ins);
+ } else {
+ // Always beneficial to reuse the lhs register here, see discussion in
+ // visitWasmBinarySimd128() and also code in specializeForConstantRhs().
+ LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
+ auto* lir = new (alloc())
+ LWasmBinarySimd128WithConstant(lhsDestAlloc, ins->rhs(), tempReg);
+ defineReuseInput(lir, ins, LWasmBinarySimd128WithConstant::LhsDest);
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(rhs->type() == MIRType::Int32);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ if (rhs->isConstant()) {
+ int32_t shiftCountMask;
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Shl:
+ case wasm::SimdOp::I8x16ShrU:
+ case wasm::SimdOp::I8x16ShrS:
+ shiftCountMask = 7;
+ break;
+ case wasm::SimdOp::I16x8Shl:
+ case wasm::SimdOp::I16x8ShrU:
+ case wasm::SimdOp::I16x8ShrS:
+ shiftCountMask = 15;
+ break;
+ case wasm::SimdOp::I32x4Shl:
+ case wasm::SimdOp::I32x4ShrU:
+ case wasm::SimdOp::I32x4ShrS:
+ shiftCountMask = 31;
+ break;
+ case wasm::SimdOp::I64x2Shl:
+ case wasm::SimdOp::I64x2ShrU:
+ case wasm::SimdOp::I64x2ShrS:
+ shiftCountMask = 63;
+ break;
+ default:
+ MOZ_CRASH("Unexpected shift operation");
+ }
+
+ int32_t shiftCount = rhs->toConstant()->toInt32() & shiftCountMask;
+ if (shiftCount == shiftCountMask) {
+ // Check if possible to apply sign replication optimization.
+ // For some ops the input shall be reused.
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16ShrS: {
+ auto* lir =
+ new (alloc()) LWasmSignReplicationSimd128(useRegister(lhs));
+ define(lir, ins);
+ return;
+ }
+ case wasm::SimdOp::I16x8ShrS:
+ case wasm::SimdOp::I32x4ShrS:
+ case wasm::SimdOp::I64x2ShrS: {
+ auto* lir = new (alloc())
+ LWasmSignReplicationSimd128(useRegisterAtStart(lhs));
+ if (isThreeOpAllowed()) {
+ define(lir, ins);
+ } else {
+ // For non-AVX, it is always beneficial to reuse the input.
+ defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
+ }
+ return;
+ }
+ default:
+ break;
+ }
+ }
+
+# ifdef DEBUG
+ js::wasm::ReportSimdAnalysis("shift -> constant shift");
+# endif
+ auto* lir = new (alloc())
+ LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount);
+ if (isThreeOpAllowed()) {
+ define(lir, ins);
+ } else {
+ // For non-AVX, it is always beneficial to reuse the input.
+ defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
+ }
+ return;
+ }
+
+# ifdef DEBUG
+ js::wasm::ReportSimdAnalysis("shift -> variable shift");
+# endif
+
+ LDefinition tempReg = LDefinition::BogusTemp();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Shl:
+ case wasm::SimdOp::I8x16ShrS:
+ case wasm::SimdOp::I8x16ShrU:
+ case wasm::SimdOp::I64x2ShrS:
+ tempReg = tempSimd128();
+ break;
+ default:
+ break;
+ }
+
+ // Reusing the input if possible is never detrimental.
+ LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
+ LAllocation rhsAlloc = useRegisterAtStart(rhs);
+ auto* lir =
+ new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg);
+ defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsDest);
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ SimdShuffle s = ins->shuffle();
+ switch (s.opd) {
+ case SimdShuffle::Operand::LEFT:
+ case SimdShuffle::Operand::RIGHT: {
+ LAllocation src;
+ bool reuse = false;
+ switch (*s.permuteOp) {
+ case SimdPermuteOp::MOVE:
+ reuse = true;
+ break;
+ case SimdPermuteOp::BROADCAST_8x16:
+ case SimdPermuteOp::BROADCAST_16x8:
+ case SimdPermuteOp::PERMUTE_8x16:
+ case SimdPermuteOp::PERMUTE_16x8:
+ case SimdPermuteOp::PERMUTE_32x4:
+ case SimdPermuteOp::ROTATE_RIGHT_8x16:
+ case SimdPermuteOp::SHIFT_LEFT_8x16:
+ case SimdPermuteOp::SHIFT_RIGHT_8x16:
+ case SimdPermuteOp::REVERSE_16x8:
+ case SimdPermuteOp::REVERSE_32x4:
+ case SimdPermuteOp::REVERSE_64x2:
+ // No need to reuse registers when VEX instructions are enabled.
+ reuse = !Assembler::HasAVX();
+ break;
+ default:
+ MOZ_CRASH("Unexpected operator");
+ }
+ if (s.opd == SimdShuffle::Operand::LEFT) {
+ src = useRegisterAtStart(ins->lhs());
+ } else {
+ src = useRegisterAtStart(ins->rhs());
+ }
+ auto* lir =
+ new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
+ if (reuse) {
+ defineReuseInput(lir, ins, LWasmPermuteSimd128::Src);
+ } else {
+ define(lir, ins);
+ }
+ break;
+ }
+ case SimdShuffle::Operand::BOTH:
+ case SimdShuffle::Operand::BOTH_SWAPPED: {
+ LDefinition temp = LDefinition::BogusTemp();
+ switch (*s.shuffleOp) {
+ case SimdShuffleOp::BLEND_8x16:
+ temp = Assembler::HasAVX() ? tempSimd128() : tempFixed(xmm0);
+ break;
+ default:
+ break;
+ }
+ if (isThreeOpAllowed()) {
+ LAllocation lhs;
+ LAllocation rhs;
+ if (s.opd == SimdShuffle::Operand::BOTH) {
+ lhs = useRegisterAtStart(ins->lhs());
+ rhs = useRegisterAtStart(ins->rhs());
+ } else {
+ lhs = useRegisterAtStart(ins->rhs());
+ rhs = useRegisterAtStart(ins->lhs());
+ }
+ auto* lir = new (alloc())
+ LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
+ define(lir, ins);
+ } else {
+ LAllocation lhs;
+ LAllocation rhs;
+ if (s.opd == SimdShuffle::Operand::BOTH) {
+ lhs = useRegisterAtStart(ins->lhs());
+ rhs = useRegister(ins->rhs());
+ } else {
+ lhs = useRegisterAtStart(ins->rhs());
+ rhs = useRegister(ins->lhs());
+ }
+ auto* lir = new (alloc())
+ LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
+ defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsDest);
+ }
+ break;
+ }
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ // If AVX support is disabled, the Masm API is (rhs, lhsDest) and requires
+ // AtStart+ReuseInput for the lhs. For type reasons, the rhs will never be
+ // the same as the lhs and is therefore a plain Use.
+ //
+ // If AVX support is enabled, useRegisterAtStart is preferred.
+
+ if (ins->rhs()->type() == MIRType::Int64) {
+ if (isThreeOpAllowed()) {
+ auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
+ useRegisterAtStart(ins->lhs()), useInt64RegisterAtStart(ins->rhs()));
+ define(lir, ins);
+ } else {
+ auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
+ useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
+ defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsDest);
+ }
+ } else {
+ if (isThreeOpAllowed()) {
+ auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
+ useRegisterAtStart(ins->lhs()), useRegisterAtStart(ins->rhs()));
+ define(lir, ins);
+ } else {
+ auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
+ useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
+ defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsDest);
+ }
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ switch (ins->input()->type()) {
+ case MIRType::Int64: {
+ // 64-bit integer splats.
+ // Load-and-(sign|zero)extend.
+ auto* lir = new (alloc())
+ LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ case MIRType::Float32:
+ case MIRType::Double: {
+ // Floating-point splats.
+ // Ideally we save a move on SSE systems by reusing the input register,
+ // but since the input and output register types differ, we can't.
+ auto* lir =
+ new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ default: {
+ // 32-bit integer splats.
+ auto* lir =
+ new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ bool useAtStart = false;
+ bool reuseInput = false;
+ LDefinition tempReg = LDefinition::BogusTemp();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Neg:
+ case wasm::SimdOp::I16x8Neg:
+ case wasm::SimdOp::I32x4Neg:
+ case wasm::SimdOp::I64x2Neg:
+ case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
+ // Prefer src != dest to avoid an unconditional src->temp move.
+ MOZ_ASSERT(!reuseInput);
+ // If AVX is enabled, we prefer useRegisterAtStart.
+ useAtStart = isThreeOpAllowed();
+ break;
+ case wasm::SimdOp::F32x4Neg:
+ case wasm::SimdOp::F64x2Neg:
+ case wasm::SimdOp::F32x4Abs:
+ case wasm::SimdOp::F64x2Abs:
+ case wasm::SimdOp::V128Not:
+ case wasm::SimdOp::F32x4Sqrt:
+ case wasm::SimdOp::F64x2Sqrt:
+ case wasm::SimdOp::I8x16Abs:
+ case wasm::SimdOp::I16x8Abs:
+ case wasm::SimdOp::I32x4Abs:
+ case wasm::SimdOp::I64x2Abs:
+ case wasm::SimdOp::I32x4TruncSatF32x4S:
+ case wasm::SimdOp::F32x4ConvertI32x4U:
+ case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
+ case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
+ case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
+ case wasm::SimdOp::I32x4RelaxedTruncSSatF32x4:
+ case wasm::SimdOp::I32x4RelaxedTruncUSatF32x4:
+ case wasm::SimdOp::I32x4RelaxedTruncSatF64x2SZero:
+ case wasm::SimdOp::I32x4RelaxedTruncSatF64x2UZero:
+ case wasm::SimdOp::I64x2ExtendHighI32x4S:
+ case wasm::SimdOp::I64x2ExtendHighI32x4U:
+ // Prefer src == dest to avoid an unconditional src->dest move
+ // for better performance in non-AVX mode (e.g. non-PSHUFD use).
+ useAtStart = true;
+ reuseInput = !isThreeOpAllowed();
+ break;
+ case wasm::SimdOp::I32x4TruncSatF32x4U:
+ case wasm::SimdOp::I32x4TruncSatF64x2SZero:
+ case wasm::SimdOp::I32x4TruncSatF64x2UZero:
+ case wasm::SimdOp::I8x16Popcnt:
+ tempReg = tempSimd128();
+ // Prefer src == dest to avoid an unconditional src->dest move
+ // in non-AVX mode.
+ useAtStart = true;
+ reuseInput = !isThreeOpAllowed();
+ break;
+ case wasm::SimdOp::I16x8ExtendLowI8x16S:
+ case wasm::SimdOp::I16x8ExtendHighI8x16S:
+ case wasm::SimdOp::I16x8ExtendLowI8x16U:
+ case wasm::SimdOp::I16x8ExtendHighI8x16U:
+ case wasm::SimdOp::I32x4ExtendLowI16x8S:
+ case wasm::SimdOp::I32x4ExtendHighI16x8S:
+ case wasm::SimdOp::I32x4ExtendLowI16x8U:
+ case wasm::SimdOp::I32x4ExtendHighI16x8U:
+ case wasm::SimdOp::I64x2ExtendLowI32x4S:
+ case wasm::SimdOp::I64x2ExtendLowI32x4U:
+ case wasm::SimdOp::F32x4ConvertI32x4S:
+ case wasm::SimdOp::F32x4Ceil:
+ case wasm::SimdOp::F32x4Floor:
+ case wasm::SimdOp::F32x4Trunc:
+ case wasm::SimdOp::F32x4Nearest:
+ case wasm::SimdOp::F64x2Ceil:
+ case wasm::SimdOp::F64x2Floor:
+ case wasm::SimdOp::F64x2Trunc:
+ case wasm::SimdOp::F64x2Nearest:
+ case wasm::SimdOp::F32x4DemoteF64x2Zero:
+ case wasm::SimdOp::F64x2PromoteLowF32x4:
+ case wasm::SimdOp::F64x2ConvertLowI32x4S:
+ case wasm::SimdOp::F64x2ConvertLowI32x4U:
+ // Prefer src == dest to exert the lowest register pressure on the
+ // surrounding code.
+ useAtStart = true;
+ MOZ_ASSERT(!reuseInput);
+ break;
+ default:
+ MOZ_CRASH("Unary SimdOp not implemented");
+ }
+
+ LUse inputUse =
+ useAtStart ? useRegisterAtStart(ins->input()) : useRegister(ins->input());
+ LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(inputUse, tempReg);
+ if (reuseInput) {
+ defineReuseInput(lir, ins, LWasmUnarySimd128::Src);
+ } else {
+ define(lir, ins);
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ // A trick: On 32-bit systems, the base pointer is 32 bits (it was bounds
+ // checked and then chopped). On 64-bit systems, it can be 32 bits or 64
+ // bits. Either way, it fits in a GPR so we can ignore the
+ // Register/Register64 distinction here.
+# ifndef JS_64BIT
+ MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
+# endif
+ LUse base = useRegisterAtStart(ins->base());
+ LUse inputUse = useRegisterAtStart(ins->value());
+ LAllocation memoryBase = ins->hasMemoryBase()
+ ? useRegisterAtStart(ins->memoryBase())
+ : LAllocation();
+ LWasmLoadLaneSimd128* lir = new (alloc()) LWasmLoadLaneSimd128(
+ base, inputUse, LDefinition::BogusTemp(), memoryBase);
+ defineReuseInput(lir, ins, LWasmLoadLaneSimd128::Src);
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ // See comment above.
+# ifndef JS_64BIT
+ MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
+# endif
+ LUse base = useRegisterAtStart(ins->base());
+ LUse input = useRegisterAtStart(ins->value());
+ LAllocation memoryBase = ins->hasMemoryBase()
+ ? useRegisterAtStart(ins->memoryBase())
+ : LAllocation();
+ LWasmStoreLaneSimd128* lir = new (alloc())
+ LWasmStoreLaneSimd128(base, input, LDefinition::BogusTemp(), memoryBase);
+ add(lir, ins);
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}
+
+#ifdef ENABLE_WASM_SIMD
+
+bool LIRGeneratorX86Shared::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
+ switch (op) {
+ case wasm::SimdOp::V128AnyTrue:
+ case wasm::SimdOp::I8x16AllTrue:
+ case wasm::SimdOp::I16x8AllTrue:
+ case wasm::SimdOp::I32x4AllTrue:
+ case wasm::SimdOp::I64x2AllTrue:
+ case wasm::SimdOp::I16x8Bitmask:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool LIRGeneratorX86Shared::canEmitWasmReduceSimd128AtUses(
+ MWasmReduceSimd128* ins) {
+ if (!ins->canEmitAtUses()) {
+ return false;
+ }
+ // Only specific ops generating int32.
+ if (ins->type() != MIRType::Int32) {
+ return false;
+ }
+ if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
+ return false;
+ }
+ // If never used then defer (it will be removed).
+ MUseIterator iter(ins->usesBegin());
+ if (iter == ins->usesEnd()) {
+ return true;
+ }
+ // We require an MTest consumer.
+ MNode* node = iter->consumer();
+ if (!node->isDefinition() || !node->toDefinition()->isTest()) {
+ return false;
+ }
+ // Defer only if there's only one use.
+ iter++;
+ return iter == ins->usesEnd();
+}
+
+#endif // ENABLE_WASM_SIMD
+
+void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
+#ifdef ENABLE_WASM_SIMD
+ if (canEmitWasmReduceSimd128AtUses(ins)) {
+ emitAtUses(ins);
+ return;
+ }
+
+ // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer
+ // useRegisterAtStart:
+ //
+ // - In most cases, the input type differs from the output type, so there's no
+ // conflict and it doesn't really matter.
+ //
+ // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero
+ // code being generated.
+ //
+ // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register
+ // to be targeted lowers register pressure if it's the last use of the
+ // input.
+
+ if (ins->type() == MIRType::Int64) {
+ auto* lir = new (alloc())
+ LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
+ defineInt64(lir, ins);
+ } else {
+ // Ideally we would reuse the input register for floating extract_lane if
+ // the lane is zero, but constraints in the register allocator require the
+ // input and output register types to be the same.
+ auto* lir = new (alloc()) LWasmReduceSimd128(
+ useRegisterAtStart(ins->input()), LDefinition::BogusTemp());
+ define(lir, ins);
+ }
+#else
+ MOZ_CRASH("No SIMD");
+#endif
+}