summaryrefslogtreecommitdiffstats
path: root/js/src/jit/x86-shared/Lowering-x86-shared.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--js/src/jit/x86-shared/Lowering-x86-shared.cpp1393
1 files changed, 1393 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
new file mode 100644
index 0000000000..d0ce6b1496
--- /dev/null
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -0,0 +1,1393 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/x86-shared/Lowering-x86-shared.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "jit/Lowering.h"
+#include "jit/MIR.h"
+
+#include "jit/shared/Lowering-shared-inl.h"
+
+using namespace js;
+using namespace js::jit;
+
+using mozilla::Abs;
+using mozilla::FloorLog2;
+using mozilla::Maybe;
+using mozilla::Nothing;
+using mozilla::Some;
+
+LTableSwitch* LIRGeneratorX86Shared::newLTableSwitch(
+ const LAllocation& in, const LDefinition& inputCopy,
+ MTableSwitch* tableswitch) {
+ return new (alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
+}
+
+LTableSwitchV* LIRGeneratorX86Shared::newLTableSwitchV(
+ MTableSwitch* tableswitch) {
+ return new (alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), temp(),
+ tempDouble(), temp(), tableswitch);
+}
+
+void LIRGenerator::visitPowHalf(MPowHalf* ins) {
+ MDefinition* input = ins->input();
+ MOZ_ASSERT(input->type() == MIRType::Double);
+ LPowHalfD* lir = new (alloc()) LPowHalfD(useRegisterAtStart(input));
+ define(lir, ins);
+}
+
+void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+
+ // Shift operand should be constant or, unless BMI2 is available, in register
+ // ecx. x86 can't shift a non-ecx register.
+ if (rhs->isConstant()) {
+ ins->setOperand(1, useOrConstantAtStart(rhs));
+ } else if (Assembler::HasBMI2() && !mir->isRotate()) {
+ ins->setOperand(1, lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs));
+ } else {
+ ins->setOperand(
+ 1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx));
+ }
+
+ defineReuseInput(ins, mir, 0);
+}
+
+template <size_t Temps>
+void LIRGeneratorX86Shared::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
+ ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+#if defined(JS_NUNBOX32)
+ if (mir->isRotate()) {
+ ins->setTemp(0, temp());
+ }
+#endif
+
+ static_assert(LShiftI64::Rhs == INT64_PIECES,
+ "Assume Rhs is located at INT64_PIECES.");
+ static_assert(LRotateI64::Count == INT64_PIECES,
+ "Assume Count is located at INT64_PIECES.");
+
+ // Shift operand should be constant or, unless BMI2 is available, in register
+ // ecx. x86 can't shift a non-ecx register.
+ if (rhs->isConstant()) {
+ ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs));
+#ifdef JS_CODEGEN_X64
+ } else if (Assembler::HasBMI2() && !mir->isRotate()) {
+ ins->setOperand(INT64_PIECES, useRegister(rhs));
+#endif
+ } else {
+ // The operands are int64, but we only care about the lower 32 bits of
+ // the RHS. On 32-bit, the code below will load that part in ecx and
+ // will discard the upper half.
+ ensureDefined(rhs);
+ LUse use(ecx);
+ use.setVirtualRegister(rhs->virtualRegister());
+ ins->setOperand(INT64_PIECES, use);
+ }
+
+ defineInt64ReuseInput(ins, mir, 0);
+}
+
+template void LIRGeneratorX86Shared::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorX86Shared::lowerForShiftInt64(
+ LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 1>* ins,
+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+
+void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
+ MDefinition* mir, MDefinition* input) {
+ ins->setOperand(0, useRegisterAtStart(input));
+ defineReuseInput(ins, mir, 0);
+}
+
+void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+ ins->setOperand(1,
+ lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs));
+ defineReuseInput(ins, mir, 0);
+}
+
+template <size_t Temps>
+void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
+ MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs) {
+ // Without AVX, we'll need to use the x86 encodings where one of the
+ // inputs must be the same location as the output.
+ if (!Assembler::HasAVX()) {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+ ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
+ defineReuseInput(ins, mir, 0);
+ } else {
+ ins->setOperand(0, useRegisterAtStart(lhs));
+ ins->setOperand(1, useAtStart(rhs));
+ define(ins, mir);
+ }
+}
+
+template void LIRGeneratorX86Shared::lowerForFPU(
+ LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs);
+template void LIRGeneratorX86Shared::lowerForFPU(
+ LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, MDefinition* lhs,
+ MDefinition* rhs);
+
+void LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab,
+ MInstruction* mir,
+ MDefinition* lhs,
+ MDefinition* rhs) {
+ baab->setOperand(0, useRegisterAtStart(lhs));
+ baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
+ add(baab, mir);
+}
+
+void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs,
+ MDefinition* rhs) {
+ // Note: If we need a negative zero check, lhs is used twice.
+ LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
+ LMulI* lir = new (alloc()) LMulI(
+ useRegisterAtStart(lhs),
+ lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs), lhsCopy);
+ if (mul->fallible()) {
+ assignSnapshot(lir, mul->bailoutKind());
+ }
+ defineReuseInput(lir, mul, 0);
+}
+
+void LIRGeneratorX86Shared::lowerDivI(MDiv* div) {
+ if (div->isUnsigned()) {
+ lowerUDiv(div);
+ return;
+ }
+
+ // Division instructions are slow. Division by constant denominators can be
+ // rewritten to use other instructions.
+ if (div->rhs()->isConstant()) {
+ int32_t rhs = div->rhs()->toConstant()->toInt32();
+
+ // Division by powers of two can be done by shifting, and division by
+ // other numbers can be done by a reciprocal multiplication technique.
+ int32_t shift = FloorLog2(Abs(rhs));
+ if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
+ LAllocation lhs = useRegisterAtStart(div->lhs());
+ LDivPowTwoI* lir;
+ // When truncated with maybe a non-zero remainder, we have to round the
+ // result toward 0. This requires an extra register to round up/down
+ // whether the left-hand-side is signed.
+ bool needRoundNeg = div->canBeNegativeDividend() && div->isTruncated();
+ if (!needRoundNeg) {
+ // Numerator is unsigned, so does not need adjusting.
+ lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
+ } else {
+ // Numerator might be signed, and needs adjusting, and an extra lhs copy
+ // is needed to round the result of the integer division towards zero.
+ lir = new (alloc())
+ LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
+ }
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineReuseInput(lir, div, 0);
+ return;
+ }
+ if (rhs != 0) {
+ LDivOrModConstantI* lir;
+ lir = new (alloc())
+ LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineFixed(lir, div, LAllocation(AnyRegister(edx)));
+ return;
+ }
+ }
+
+ LDivI* lir = new (alloc())
+ LDivI(useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineFixed(lir, div, LAllocation(AnyRegister(eax)));
+}
+
+void LIRGeneratorX86Shared::lowerModI(MMod* mod) {
+ if (mod->isUnsigned()) {
+ lowerUMod(mod);
+ return;
+ }
+
+ if (mod->rhs()->isConstant()) {
+ int32_t rhs = mod->rhs()->toConstant()->toInt32();
+ int32_t shift = FloorLog2(Abs(rhs));
+ if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
+ LModPowTwoI* lir =
+ new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineReuseInput(lir, mod, 0);
+ return;
+ }
+ if (rhs != 0) {
+ LDivOrModConstantI* lir;
+ lir = new (alloc())
+ LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
+ return;
+ }
+ }
+
+ LModI* lir = new (alloc())
+ LModI(useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
+}
+
+void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
+ switch (ins->type()) {
+ case MIRType::Int32:
+ defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(ins->input())),
+ ins, 0);
+ break;
+ case MIRType::Float32:
+ defineReuseInput(new (alloc()) LNegF(useRegisterAtStart(ins->input())),
+ ins, 0);
+ break;
+ case MIRType::Double:
+ defineReuseInput(new (alloc()) LNegD(useRegisterAtStart(ins->input())),
+ ins, 0);
+ break;
+ default:
+ MOZ_CRASH();
+ }
+}
+
+void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
+ MDefinition* base = ins->base();
+ MOZ_ASSERT(base->type() == MIRType::Int32);
+
+ MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+ MOZ_ASSERT_IF(ins->needsBoundsCheck(),
+ boundsCheckLimit->type() == MIRType::Int32);
+
+ // For simplicity, require a register if we're going to emit a bounds-check
+ // branch, so that we don't have special cases for constants. This should
+ // only happen in rare constant-folding cases since asm.js sets the minimum
+ // heap size based when accessed via constant.
+ LAllocation baseAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(base)
+ : useRegisterOrZeroAtStart(base);
+
+ LAllocation limitAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(boundsCheckLimit)
+ : LAllocation();
+ LAllocation memoryBaseAlloc = ins->hasMemoryBase()
+ ? useRegisterAtStart(ins->memoryBase())
+ : LAllocation();
+
+ auto* lir =
+ new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc);
+ define(lir, ins);
+}
+
+void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
+ MDefinition* base = ins->base();
+ MOZ_ASSERT(base->type() == MIRType::Int32);
+
+ MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
+ MOZ_ASSERT_IF(ins->needsBoundsCheck(),
+ boundsCheckLimit->type() == MIRType::Int32);
+
+ // For simplicity, require a register if we're going to emit a bounds-check
+ // branch, so that we don't have special cases for constants. This should
+ // only happen in rare constant-folding cases since asm.js sets the minimum
+ // heap size based when accessed via constant.
+ LAllocation baseAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(base)
+ : useRegisterOrZeroAtStart(base);
+
+ LAllocation limitAlloc = ins->needsBoundsCheck()
+ ? useRegisterAtStart(boundsCheckLimit)
+ : LAllocation();
+ LAllocation memoryBaseAlloc = ins->hasMemoryBase()
+ ? useRegisterAtStart(ins->memoryBase())
+ : LAllocation();
+
+ LAsmJSStoreHeap* lir = nullptr;
+ switch (ins->access().type()) {
+ case Scalar::Int8:
+ case Scalar::Uint8:
+#ifdef JS_CODEGEN_X86
+ // See comment for LIRGeneratorX86::useByteOpRegister.
+ lir = new (alloc()) LAsmJSStoreHeap(
+ baseAlloc, useFixed(ins->value(), eax), limitAlloc, memoryBaseAlloc);
+ break;
+#endif
+ case Scalar::Int16:
+ case Scalar::Uint16:
+ case Scalar::Int32:
+ case Scalar::Uint32:
+ case Scalar::Float32:
+ case Scalar::Float64:
+ // For now, don't allow constant values. The immediate operand affects
+ // instruction layout which affects patching.
+ lir = new (alloc())
+ LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
+ limitAlloc, memoryBaseAlloc);
+ break;
+ case Scalar::Int64:
+ case Scalar::Simd128:
+ MOZ_CRASH("NYI");
+ case Scalar::Uint8Clamped:
+ case Scalar::BigInt64:
+ case Scalar::BigUint64:
+ case Scalar::MaxTypedArrayViewType:
+ MOZ_CRASH("unexpected array type");
+ }
+ add(lir, ins);
+}
+
+void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) {
+ if (div->rhs()->isConstant()) {
+ uint32_t rhs = div->rhs()->toConstant()->toInt32();
+ int32_t shift = FloorLog2(rhs);
+
+ LAllocation lhs = useRegisterAtStart(div->lhs());
+ if (rhs != 0 && uint32_t(1) << shift == rhs) {
+ LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, false);
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineReuseInput(lir, div, 0);
+ } else {
+ LUDivOrModConstant* lir = new (alloc())
+ LUDivOrModConstant(useRegister(div->lhs()), rhs, tempFixed(eax));
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineFixed(lir, div, LAllocation(AnyRegister(edx)));
+ }
+ return;
+ }
+
+ LUDivOrMod* lir = new (alloc()) LUDivOrMod(
+ useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
+ if (div->fallible()) {
+ assignSnapshot(lir, div->bailoutKind());
+ }
+ defineFixed(lir, div, LAllocation(AnyRegister(eax)));
+}
+
+void LIRGeneratorX86Shared::lowerUMod(MMod* mod) {
+ if (mod->rhs()->isConstant()) {
+ uint32_t rhs = mod->rhs()->toConstant()->toInt32();
+ int32_t shift = FloorLog2(rhs);
+
+ if (rhs != 0 && uint32_t(1) << shift == rhs) {
+ LModPowTwoI* lir =
+ new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineReuseInput(lir, mod, 0);
+ } else {
+ LUDivOrModConstant* lir = new (alloc())
+ LUDivOrModConstant(useRegister(mod->lhs()), rhs, tempFixed(edx));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
+ }
+ return;
+ }
+
+ LUDivOrMod* lir = new (alloc()) LUDivOrMod(
+ useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
+ if (mod->fallible()) {
+ assignSnapshot(lir, mod->bailoutKind());
+ }
+ defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
+}
+
+void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) {
+ MDefinition* lhs = mir->lhs();
+ MDefinition* rhs = mir->rhs();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Int32);
+ MOZ_ASSERT(rhs->type() == MIRType::Int32);
+ MOZ_ASSERT(mir->type() == MIRType::Double);
+
+#ifdef JS_CODEGEN_X64
+ static_assert(ecx == rcx);
+#endif
+
+ // Without BMI2, x86 can only shift by ecx.
+ LUse lhsUse = useRegisterAtStart(lhs);
+ LAllocation rhsAlloc;
+ if (rhs->isConstant()) {
+ rhsAlloc = useOrConstant(rhs);
+ } else if (Assembler::HasBMI2()) {
+ rhsAlloc = useRegister(rhs);
+ } else {
+ rhsAlloc = useFixed(rhs, ecx);
+ }
+
+ LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
+ define(lir, mir);
+}
+
+void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) {
+ int32_t base = mir->input()->toConstant()->toInt32();
+ MDefinition* power = mir->power();
+
+ // Shift operand should be in register ecx, unless BMI2 is available.
+ // x86 can't shift a non-ecx register.
+ LAllocation powerAlloc =
+ Assembler::HasBMI2() ? useRegister(power) : useFixed(power, ecx);
+ auto* lir = new (alloc()) LPowOfTwoI(base, powerAlloc);
+ assignSnapshot(lir, mir->bailoutKind());
+ define(lir, mir);
+}
+
+void LIRGeneratorX86Shared::lowerBigIntLsh(MBigIntLsh* ins) {
+ // Shift operand should be in register ecx, unless BMI2 is available.
+ // x86 can't shift a non-ecx register.
+ LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx);
+ auto* lir =
+ new (alloc()) LBigIntLsh(useRegister(ins->lhs()), useRegister(ins->rhs()),
+ temp(), shiftAlloc, temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorX86Shared::lowerBigIntRsh(MBigIntRsh* ins) {
+ // Shift operand should be in register ecx, unless BMI2 is available.
+ // x86 can't shift a non-ecx register.
+ LDefinition shiftAlloc = Assembler::HasBMI2() ? temp() : tempFixed(ecx);
+ auto* lir =
+ new (alloc()) LBigIntRsh(useRegister(ins->lhs()), useRegister(ins->rhs()),
+ temp(), shiftAlloc, temp());
+ define(lir, ins);
+ assignSafepoint(lir, ins);
+}
+
+void LIRGeneratorX86Shared::lowerWasmBuiltinTruncateToInt32(
+ MWasmBuiltinTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
+
+ LDefinition maybeTemp =
+ Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
+ if (opd->type() == MIRType::Double) {
+ define(new (alloc()) LWasmBuiltinTruncateDToInt32(
+ useRegister(opd), useFixed(ins->tls(), WasmTlsReg), maybeTemp),
+ ins);
+ return;
+ }
+
+ define(new (alloc()) LWasmBuiltinTruncateFToInt32(
+ useRegister(opd), useFixed(ins->tls(), WasmTlsReg), maybeTemp),
+ ins);
+}
+
+void LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Double);
+
+ LDefinition maybeTemp =
+ Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
+ define(new (alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
+}
+
+void LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins) {
+ MDefinition* opd = ins->input();
+ MOZ_ASSERT(opd->type() == MIRType::Float32);
+
+ LDefinition maybeTemp =
+ Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
+ define(new (alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
+}
+
+void LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(
+ MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index = useRegisterOrConstant(ins->index());
+
+ // If the target is a floating register then we need a temp at the
+ // lower level; that temp must be eax.
+ //
+ // Otherwise the target (if used) is an integer register, which
+ // must be eax. If the target is not used the machine code will
+ // still clobber eax, so just pretend it's used.
+ //
+ // oldval must be in a register.
+ //
+ // newval must be in a register. If the source is a byte array
+ // then newval must be a register that has a byte size: on x86
+ // this must be ebx, ecx, or edx (eax is taken for the output).
+ //
+ // Bug #1077036 describes some further optimization opportunities.
+
+ bool fixedOutput = false;
+ LDefinition tempDef = LDefinition::BogusTemp();
+ LAllocation newval;
+ if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+ tempDef = tempFixed(eax);
+ newval = useRegister(ins->newval());
+ } else {
+ fixedOutput = true;
+ if (useI386ByteRegisters && ins->isByteArray()) {
+ newval = useFixed(ins->newval(), ebx);
+ } else {
+ newval = useRegister(ins->newval());
+ }
+ }
+
+ const LAllocation oldval = useRegister(ins->oldval());
+
+ LCompareExchangeTypedArrayElement* lir =
+ new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
+ newval, tempDef);
+
+ if (fixedOutput) {
+ defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+ } else {
+ define(lir, ins);
+ }
+}
+
+void LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(
+ MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
+ MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
+
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index = useRegisterOrConstant(ins->index());
+ const LAllocation value = useRegister(ins->value());
+
+ // The underlying instruction is XCHG, which can operate on any
+ // register.
+ //
+ // If the target is a floating register (for Uint32) then we need
+ // a temp into which to exchange.
+ //
+ // If the source is a byte array then we need a register that has
+ // a byte size; in this case -- on x86 only -- pin the output to
+ // an appropriate register and use that as a temp in the back-end.
+
+ LDefinition tempDef = LDefinition::BogusTemp();
+ if (ins->arrayType() == Scalar::Uint32) {
+ MOZ_ASSERT(ins->type() == MIRType::Double);
+ tempDef = temp();
+ }
+
+ LAtomicExchangeTypedArrayElement* lir = new (alloc())
+ LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
+
+ if (useI386ByteRegisters && ins->isByteArray()) {
+ defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+ } else {
+ define(lir, ins);
+ }
+}
+
+void LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(
+ MAtomicTypedArrayElementBinop* ins, bool useI386ByteRegisters) {
+ MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+ MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
+ MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
+
+ const LUse elements = useRegister(ins->elements());
+ const LAllocation index = useRegisterOrConstant(ins->index());
+
+ // Case 1: the result of the operation is not used.
+ //
+ // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
+ // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case.
+
+ if (!ins->hasUses()) {
+ LAllocation value;
+ if (useI386ByteRegisters && ins->isByteArray() &&
+ !ins->value()->isConstant()) {
+ value = useFixed(ins->value(), ebx);
+ } else {
+ value = useRegisterOrConstant(ins->value());
+ }
+
+ LAtomicTypedArrayElementBinopForEffect* lir = new (alloc())
+ LAtomicTypedArrayElementBinopForEffect(elements, index, value);
+
+ add(lir, ins);
+ return;
+ }
+
+ // Case 2: the result of the operation is used.
+ //
+ // For ADD and SUB we'll use XADD:
+ //
+ // movl src, output
+ // lock xaddl output, mem
+ //
+ // For the 8-bit variants XADD needs a byte register for the output.
+ //
+ // For AND/OR/XOR we need to use a CMPXCHG loop:
+ //
+ // movl *mem, eax
+ // L: mov eax, temp
+ // andl src, temp
+ // lock cmpxchg temp, mem ; reads eax also
+ // jnz L
+ // ; result in eax
+ //
+ // Note the placement of L, cmpxchg will update eax with *mem if
+ // *mem does not have the expected value, so reloading it at the
+ // top of the loop would be redundant.
+ //
+ // If the array is not a uint32 array then:
+ // - eax should be the output (one result of the cmpxchg)
+ // - there is a temp, which must have a byte register if
+ // the array has 1-byte elements elements
+ //
+ // If the array is a uint32 array then:
+ // - eax is the first temp
+ // - we also need a second temp
+ //
+ // There are optimization opportunities:
+ // - better register allocation in the x86 8-bit case, Bug #1077036.
+
+ bool bitOp = !(ins->operation() == AtomicFetchAddOp ||
+ ins->operation() == AtomicFetchSubOp);
+ bool fixedOutput = true;
+ bool reuseInput = false;
+ LDefinition tempDef1 = LDefinition::BogusTemp();
+ LDefinition tempDef2 = LDefinition::BogusTemp();
+ LAllocation value;
+
+ if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+ value = useRegisterOrConstant(ins->value());
+ fixedOutput = false;
+ if (bitOp) {
+ tempDef1 = tempFixed(eax);
+ tempDef2 = temp();
+ } else {
+ tempDef1 = temp();
+ }
+ } else if (useI386ByteRegisters && ins->isByteArray()) {
+ if (ins->value()->isConstant()) {
+ value = useRegisterOrConstant(ins->value());
+ } else {
+ value = useFixed(ins->value(), ebx);
+ }
+ if (bitOp) {
+ tempDef1 = tempFixed(ecx);
+ }
+ } else if (bitOp) {
+ value = useRegisterOrConstant(ins->value());
+ tempDef1 = temp();
+ } else if (ins->value()->isConstant()) {
+ fixedOutput = false;
+ value = useRegisterOrConstant(ins->value());
+ } else {
+ fixedOutput = false;
+ reuseInput = true;
+ value = useRegisterAtStart(ins->value());
+ }
+
+ LAtomicTypedArrayElementBinop* lir = new (alloc())
+ LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
+
+ if (fixedOutput) {
+ defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
+ } else if (reuseInput) {
+ defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
+ } else {
+ define(lir, ins);
+ }
+}
+
+void LIRGenerator::visitCopySign(MCopySign* ins) {
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+
+ MOZ_ASSERT(IsFloatingPointType(lhs->type()));
+ MOZ_ASSERT(lhs->type() == rhs->type());
+ MOZ_ASSERT(lhs->type() == ins->type());
+
+ LInstructionHelper<1, 2, 2>* lir;
+ if (lhs->type() == MIRType::Double) {
+ lir = new (alloc()) LCopySignD();
+ } else {
+ lir = new (alloc()) LCopySignF();
+ }
+
+ // As lowerForFPU, but we want rhs to be in a FP register too.
+ lir->setOperand(0, useRegisterAtStart(lhs));
+ if (!Assembler::HasAVX()) {
+ lir->setOperand(1, lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs));
+ defineReuseInput(lir, ins, 0);
+ } else {
+ lir->setOperand(1, useRegisterAtStart(rhs));
+ define(lir, ins);
+ }
+}
+
+#ifdef ENABLE_WASM_SIMD
+
+// These lowerings are really x86-shared but some Masm APIs are not yet
+// available on x86.
+
+// Ternary and binary operators require the dest register to be the same as
+// their first input register, leading to a pattern of useRegisterAtStart +
+// defineReuseInput.
+
+void LIRGenerator::visitWasmBitselectSimd128(MWasmBitselectSimd128* ins) {
+ MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->control()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ // Enforcing lhs == output avoids one setup move. We would like to also
+ // enforce merging the control with the temp (with usRegisterAtStart(control)
+ // and tempCopy()), but the register allocator ignores those constraints
+ // at present.
+
+ auto* lir = new (alloc()) LWasmBitselectSimd128(
+ useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()),
+ useRegister(ins->control()), tempSimd128());
+ defineReuseInput(lir, ins, LWasmBitselectSimd128::LhsDest);
+}
+
+void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+ wasm::SimdOp op = ins->simdOp();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(rhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ if (ins->isCommutative()) {
+ ReorderCommutative(&lhs, &rhs, ins);
+ }
+
+ // Swap operands and change operation if necessary, these are all x86/x64
+ // dependent transformations. Except where noted, this is about avoiding
+ // unnecessary moves and fixups in the code generator macros.
+ bool swap = false;
+ switch (op) {
+ case wasm::SimdOp::V128AndNot: {
+ // Code generation requires the operands to be reversed.
+ swap = true;
+ break;
+ }
+ case wasm::SimdOp::I8x16LtS: {
+ swap = true;
+ op = wasm::SimdOp::I8x16GtS;
+ break;
+ }
+ case wasm::SimdOp::I8x16GeS: {
+ swap = true;
+ op = wasm::SimdOp::I8x16LeS;
+ break;
+ }
+ case wasm::SimdOp::I16x8LtS: {
+ swap = true;
+ op = wasm::SimdOp::I16x8GtS;
+ break;
+ }
+ case wasm::SimdOp::I16x8GeS: {
+ swap = true;
+ op = wasm::SimdOp::I16x8LeS;
+ break;
+ }
+ case wasm::SimdOp::I32x4LtS: {
+ swap = true;
+ op = wasm::SimdOp::I32x4GtS;
+ break;
+ }
+ case wasm::SimdOp::I32x4GeS: {
+ swap = true;
+ op = wasm::SimdOp::I32x4LeS;
+ break;
+ }
+ case wasm::SimdOp::F32x4Gt: {
+ swap = true;
+ op = wasm::SimdOp::F32x4Lt;
+ break;
+ }
+ case wasm::SimdOp::F32x4Ge: {
+ swap = true;
+ op = wasm::SimdOp::F32x4Le;
+ break;
+ }
+ case wasm::SimdOp::F64x2Gt: {
+ swap = true;
+ op = wasm::SimdOp::F64x2Lt;
+ break;
+ }
+ case wasm::SimdOp::F64x2Ge: {
+ swap = true;
+ op = wasm::SimdOp::F64x2Le;
+ break;
+ }
+ case wasm::SimdOp::F32x4PMin:
+ case wasm::SimdOp::F32x4PMax:
+ case wasm::SimdOp::F64x2PMin:
+ case wasm::SimdOp::F64x2PMax: {
+ // Code generation requires the operations to be reversed (the rhs is the
+ // output register).
+ swap = true;
+ break;
+ }
+ default:
+ break;
+ }
+ if (swap) {
+ MDefinition* tmp = lhs;
+ lhs = rhs;
+ rhs = tmp;
+ }
+
+ // Allocate temp registers
+ LDefinition tempReg0 = LDefinition::BogusTemp();
+ LDefinition tempReg1 = LDefinition::BogusTemp();
+ switch (op) {
+ case wasm::SimdOp::I64x2Mul:
+ case wasm::SimdOp::V8x16Swizzle:
+ tempReg0 = tempSimd128();
+ break;
+ case wasm::SimdOp::F32x4Min:
+ case wasm::SimdOp::F32x4Max:
+ case wasm::SimdOp::F64x2Min:
+ case wasm::SimdOp::F64x2Max:
+ case wasm::SimdOp::I8x16LtU:
+ case wasm::SimdOp::I8x16GtU:
+ case wasm::SimdOp::I8x16LeU:
+ case wasm::SimdOp::I8x16GeU:
+ case wasm::SimdOp::I16x8LtU:
+ case wasm::SimdOp::I16x8GtU:
+ case wasm::SimdOp::I16x8LeU:
+ case wasm::SimdOp::I16x8GeU:
+ case wasm::SimdOp::I32x4LtU:
+ case wasm::SimdOp::I32x4GtU:
+ case wasm::SimdOp::I32x4LeU:
+ case wasm::SimdOp::I32x4GeU:
+ tempReg0 = tempSimd128();
+ tempReg1 = tempSimd128();
+ break;
+ default:
+ break;
+ }
+
+ // For binary ops, the Masm API always is usually (rhs, lhsDest) and requires
+ // AtStart+ReuseInput for the lhs.
+ //
+ // The rhs is tricky due to register allocator restrictions:
+ // - if lhs == rhs and lhs is AtStart then rhs must be AtStart too
+ // - if lhs != rhs and lhs is AtStart then rhs must not be AtStart,
+ // this appears to have something to do with risk of the rhs
+ // being clobbered. Anyway it doesn't matter much, since the
+ // liveness of rhs will not prevent the lhs register to be reused
+ // for the output.
+ //
+ // For a few ops, the API is actually (rhsDest, lhs) and the rules are the
+ // same but the reversed. We swapped operands above; they will be swapped
+ // again in the code generator to emit the right code.
+
+ LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
+ LAllocation rhsAlloc =
+ lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs);
+ auto* lir = new (alloc())
+ LWasmBinarySimd128(op, lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
+ defineReuseInput(lir, ins, LWasmBinarySimd128::LhsDest);
+}
+
+bool MWasmBinarySimd128::specializeForConstantRhs() {
+ // The order follows MacroAssembler.h, generally
+ switch (simdOp()) {
+ // Operations implemented by a single native instruction where it is
+ // plausible that the rhs (after commutation if available) could be a
+ // constant.
+ //
+ // Swizzle is not here because it was handled earlier in the pipeline.
+ //
+ // Integer compares >= and < are not here because they are not supported in
+ // the hardware.
+ //
+ // Floating compares are not here because our patching machinery can't
+ // handle them yet.
+ //
+ // Floating-point min and max (including pmin and pmax) are not here because
+ // they are not straightforward to implement.
+ case wasm::SimdOp::I8x16Add:
+ case wasm::SimdOp::I16x8Add:
+ case wasm::SimdOp::I32x4Add:
+ case wasm::SimdOp::I64x2Add:
+ case wasm::SimdOp::I8x16Sub:
+ case wasm::SimdOp::I16x8Sub:
+ case wasm::SimdOp::I32x4Sub:
+ case wasm::SimdOp::I64x2Sub:
+ case wasm::SimdOp::I16x8Mul:
+ case wasm::SimdOp::I32x4Mul:
+ case wasm::SimdOp::I8x16AddSaturateS:
+ case wasm::SimdOp::I8x16AddSaturateU:
+ case wasm::SimdOp::I16x8AddSaturateS:
+ case wasm::SimdOp::I16x8AddSaturateU:
+ case wasm::SimdOp::I8x16SubSaturateS:
+ case wasm::SimdOp::I8x16SubSaturateU:
+ case wasm::SimdOp::I16x8SubSaturateS:
+ case wasm::SimdOp::I16x8SubSaturateU:
+ case wasm::SimdOp::I8x16MinS:
+ case wasm::SimdOp::I8x16MinU:
+ case wasm::SimdOp::I16x8MinS:
+ case wasm::SimdOp::I16x8MinU:
+ case wasm::SimdOp::I32x4MinS:
+ case wasm::SimdOp::I32x4MinU:
+ case wasm::SimdOp::I8x16MaxS:
+ case wasm::SimdOp::I8x16MaxU:
+ case wasm::SimdOp::I16x8MaxS:
+ case wasm::SimdOp::I16x8MaxU:
+ case wasm::SimdOp::I32x4MaxS:
+ case wasm::SimdOp::I32x4MaxU:
+ case wasm::SimdOp::V128And:
+ case wasm::SimdOp::V128Or:
+ case wasm::SimdOp::V128Xor:
+ case wasm::SimdOp::I8x16Eq:
+ case wasm::SimdOp::I8x16Ne:
+ case wasm::SimdOp::I8x16GtS:
+ case wasm::SimdOp::I8x16LeS:
+ case wasm::SimdOp::I16x8Eq:
+ case wasm::SimdOp::I16x8Ne:
+ case wasm::SimdOp::I16x8GtS:
+ case wasm::SimdOp::I16x8LeS:
+ case wasm::SimdOp::I32x4Eq:
+ case wasm::SimdOp::I32x4Ne:
+ case wasm::SimdOp::I32x4GtS:
+ case wasm::SimdOp::I32x4LeS:
+ case wasm::SimdOp::F32x4Eq:
+ case wasm::SimdOp::F32x4Ne:
+ case wasm::SimdOp::F32x4Lt:
+ case wasm::SimdOp::F32x4Le:
+ case wasm::SimdOp::F64x2Eq:
+ case wasm::SimdOp::F64x2Ne:
+ case wasm::SimdOp::F64x2Lt:
+ case wasm::SimdOp::F64x2Le:
+ case wasm::SimdOp::I32x4DotSI16x8:
+ case wasm::SimdOp::F32x4Add:
+ case wasm::SimdOp::F64x2Add:
+ case wasm::SimdOp::F32x4Sub:
+ case wasm::SimdOp::F64x2Sub:
+ case wasm::SimdOp::F32x4Div:
+ case wasm::SimdOp::F64x2Div:
+ case wasm::SimdOp::F32x4Mul:
+ case wasm::SimdOp::F64x2Mul:
+ case wasm::SimdOp::I8x16NarrowSI16x8:
+ case wasm::SimdOp::I8x16NarrowUI16x8:
+ case wasm::SimdOp::I16x8NarrowSI32x4:
+ case wasm::SimdOp::I16x8NarrowUI32x4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void LIRGenerator::visitWasmBinarySimd128WithConstant(
+ MWasmBinarySimd128WithConstant* ins) {
+ MDefinition* lhs = ins->lhs();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ // Always beneficial to reuse the lhs register here, see discussion in
+ // visitWasmBinarySimd128() and also code in specializeForConstantRhs().
+
+ LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
+ auto* lir =
+ new (alloc()) LWasmBinarySimd128WithConstant(lhsDestAlloc, ins->rhs());
+ defineReuseInput(lir, ins, LWasmBinarySimd128WithConstant::LhsDest);
+}
+
+void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
+ MDefinition* lhs = ins->lhs();
+ MDefinition* rhs = ins->rhs();
+
+ MOZ_ASSERT(lhs->type() == MIRType::Simd128);
+ MOZ_ASSERT(rhs->type() == MIRType::Int32);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ if (rhs->isConstant()) {
+ LDefinition temp = LDefinition::BogusTemp();
+ int32_t shiftCount = rhs->toConstant()->toInt32();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Shl:
+ case wasm::SimdOp::I8x16ShrU:
+ shiftCount &= 7;
+ break;
+ case wasm::SimdOp::I8x16ShrS:
+ shiftCount &= 7;
+ temp = tempSimd128();
+ break;
+ case wasm::SimdOp::I16x8Shl:
+ case wasm::SimdOp::I16x8ShrU:
+ case wasm::SimdOp::I16x8ShrS:
+ shiftCount &= 15;
+ break;
+ case wasm::SimdOp::I32x4Shl:
+ case wasm::SimdOp::I32x4ShrU:
+ case wasm::SimdOp::I32x4ShrS:
+ shiftCount &= 31;
+ break;
+ case wasm::SimdOp::I64x2Shl:
+ case wasm::SimdOp::I64x2ShrU:
+ case wasm::SimdOp::I64x2ShrS:
+ shiftCount &= 63;
+ break;
+ default:
+ MOZ_CRASH("Unexpected shift operation");
+ }
+# ifdef DEBUG
+ js::wasm::ReportSimdAnalysis("shift -> constant shift");
+# endif
+ // Almost always beneficial, and never detrimental, to reuse the input if
+ // possible.
+ auto* lir = new (alloc())
+ LWasmConstantShiftSimd128(useRegisterAtStart(lhs), temp, shiftCount);
+ defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
+ return;
+ }
+
+# ifdef DEBUG
+ js::wasm::ReportSimdAnalysis("shift -> variable shift");
+# endif
+
+ LDefinition tempReg0 = LDefinition::BogusTemp();
+ LDefinition tempReg1 = LDefinition::BogusTemp();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Shl:
+ case wasm::SimdOp::I8x16ShrS:
+ case wasm::SimdOp::I8x16ShrU:
+ tempReg0 = temp();
+ tempReg1 = tempSimd128();
+ break;
+ default:
+ tempReg0 = temp();
+ break;
+ }
+
+ // Reusing the input if possible is never detrimental.
+ LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
+ LAllocation rhsAlloc = useRegisterAtStart(rhs);
+ auto* lir = new (alloc())
+ LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1);
+ defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsDest);
+}
+
+void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
+ MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ Shuffle s = AnalyzeShuffle(ins);
+# ifdef DEBUG
+ ReportShuffleSpecialization(s);
+# endif
+ switch (s.opd) {
+ case Shuffle::Operand::LEFT:
+ case Shuffle::Operand::RIGHT: {
+ LAllocation src;
+ // All permute operators currently favor reusing the input register so
+ // we're not currently exercising code paths below that do not reuse.
+ // Those paths have been exercised in the past however and are believed
+ // to be correct.
+ bool useAtStartAndReuse = false;
+ switch (*s.permuteOp) {
+ case LWasmPermuteSimd128::MOVE:
+ case LWasmPermuteSimd128::BROADCAST_8x16:
+ case LWasmPermuteSimd128::BROADCAST_16x8:
+ case LWasmPermuteSimd128::PERMUTE_8x16:
+ case LWasmPermuteSimd128::PERMUTE_16x8:
+ case LWasmPermuteSimd128::PERMUTE_32x4:
+ case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
+ case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
+ case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
+ useAtStartAndReuse = true;
+ break;
+ default:
+ MOZ_CRASH("Unexpected operator");
+ }
+ if (s.opd == Shuffle::Operand::LEFT) {
+ if (useAtStartAndReuse) {
+ src = useRegisterAtStart(ins->lhs());
+ } else {
+ src = useRegister(ins->lhs());
+ }
+ } else {
+ if (useAtStartAndReuse) {
+ src = useRegisterAtStart(ins->rhs());
+ } else {
+ src = useRegister(ins->rhs());
+ }
+ }
+ auto* lir =
+ new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
+ if (useAtStartAndReuse) {
+ defineReuseInput(lir, ins, LWasmPermuteSimd128::Src);
+ } else {
+ define(lir, ins);
+ }
+ break;
+ }
+ case Shuffle::Operand::BOTH:
+ case Shuffle::Operand::BOTH_SWAPPED: {
+ LDefinition temp = LDefinition::BogusTemp();
+ switch (*s.shuffleOp) {
+ case LWasmShuffleSimd128::BLEND_8x16:
+ temp = tempSimd128();
+ break;
+ default:
+ break;
+ }
+ LAllocation lhs;
+ LAllocation rhs;
+ if (s.opd == Shuffle::Operand::BOTH) {
+ lhs = useRegisterAtStart(ins->lhs());
+ rhs = useRegister(ins->rhs());
+ } else {
+ lhs = useRegisterAtStart(ins->rhs());
+ rhs = useRegister(ins->lhs());
+ }
+ auto* lir = new (alloc())
+ LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
+ defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsDest);
+ break;
+ }
+ }
+}
+
+void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
+ MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ // The Masm API is (rhs, lhsDest) and requires AtStart+ReuseInput for the lhs.
+ // For type reasons, the rhs will never be the same as the lhs and is
+ // therefore a plain Use.
+
+ if (ins->rhs()->type() == MIRType::Int64) {
+ auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
+ useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
+ defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsDest);
+ } else {
+ auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
+ useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
+ defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsDest);
+ }
+}
+
+void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ switch (ins->input()->type()) {
+ case MIRType::Int64: {
+ // 64-bit integer splats.
+ // Load-and-(sign|zero)extend.
+ auto* lir = new (alloc())
+ LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ case MIRType::Float32:
+ case MIRType::Double: {
+ // Floating-point splats.
+ // Ideally we save a move on SSE systems by reusing the input register,
+ // but since the input and output register types differ, we can't.
+ auto* lir =
+ new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ default: {
+ // 32-bit integer splats.
+ auto* lir =
+ new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+ break;
+ }
+ }
+}
+
+void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
+ MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
+
+ bool useAtStart = false;
+ bool reuseInput = false;
+ LDefinition tempReg = LDefinition::BogusTemp();
+ switch (ins->simdOp()) {
+ case wasm::SimdOp::I8x16Neg:
+ case wasm::SimdOp::I16x8Neg:
+ case wasm::SimdOp::I32x4Neg:
+ case wasm::SimdOp::I64x2Neg:
+ // Prefer src != dest to avoid an unconditional src->temp move.
+ MOZ_ASSERT(!useAtStart && !reuseInput);
+ break;
+ case wasm::SimdOp::F32x4Neg:
+ case wasm::SimdOp::F64x2Neg:
+ case wasm::SimdOp::F32x4Abs:
+ case wasm::SimdOp::F64x2Abs:
+ case wasm::SimdOp::V128Not:
+ case wasm::SimdOp::F32x4Sqrt:
+ case wasm::SimdOp::F64x2Sqrt:
+ case wasm::SimdOp::I8x16Abs:
+ case wasm::SimdOp::I16x8Abs:
+ case wasm::SimdOp::I32x4Abs:
+ case wasm::SimdOp::I32x4TruncSSatF32x4:
+ case wasm::SimdOp::F32x4ConvertUI32x4:
+ // Prefer src == dest to avoid an unconditional src->dest move.
+ useAtStart = true;
+ reuseInput = true;
+ break;
+ case wasm::SimdOp::I32x4TruncUSatF32x4:
+ tempReg = tempSimd128();
+ // Prefer src == dest to avoid an unconditional src->dest move.
+ useAtStart = true;
+ reuseInput = true;
+ break;
+ case wasm::SimdOp::I16x8WidenLowSI8x16:
+ case wasm::SimdOp::I16x8WidenHighSI8x16:
+ case wasm::SimdOp::I16x8WidenLowUI8x16:
+ case wasm::SimdOp::I16x8WidenHighUI8x16:
+ case wasm::SimdOp::I32x4WidenLowSI16x8:
+ case wasm::SimdOp::I32x4WidenHighSI16x8:
+ case wasm::SimdOp::I32x4WidenLowUI16x8:
+ case wasm::SimdOp::I32x4WidenHighUI16x8:
+ case wasm::SimdOp::F32x4ConvertSI32x4:
+ case wasm::SimdOp::F32x4Ceil:
+ case wasm::SimdOp::F32x4Floor:
+ case wasm::SimdOp::F32x4Trunc:
+ case wasm::SimdOp::F32x4Nearest:
+ case wasm::SimdOp::F64x2Ceil:
+ case wasm::SimdOp::F64x2Floor:
+ case wasm::SimdOp::F64x2Trunc:
+ case wasm::SimdOp::F64x2Nearest:
+ // Prefer src == dest to exert the lowest register pressure on the
+ // surrounding code.
+ useAtStart = true;
+ MOZ_ASSERT(!reuseInput);
+ break;
+ default:
+ MOZ_CRASH("Unary SimdOp not implemented");
+ }
+
+ LUse inputUse =
+ useAtStart ? useRegisterAtStart(ins->input()) : useRegister(ins->input());
+ LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(inputUse, tempReg);
+ if (reuseInput) {
+ defineReuseInput(lir, ins, LWasmUnarySimd128::Src);
+ } else {
+ define(lir, ins);
+ }
+}
+
+bool LIRGeneratorX86Shared::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
+ switch (op) {
+ case wasm::SimdOp::I8x16AnyTrue:
+ case wasm::SimdOp::I16x8AnyTrue:
+ case wasm::SimdOp::I32x4AnyTrue:
+ case wasm::SimdOp::I8x16AllTrue:
+ case wasm::SimdOp::I16x8AllTrue:
+ case wasm::SimdOp::I32x4AllTrue:
+ case wasm::SimdOp::I16x8Bitmask:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool LIRGeneratorX86Shared::canEmitWasmReduceSimd128AtUses(
+ MWasmReduceSimd128* ins) {
+ if (!ins->canEmitAtUses()) {
+ return false;
+ }
+ // Only specific ops generating int32.
+ if (ins->type() != MIRType::Int32) {
+ return false;
+ }
+ if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
+ return false;
+ }
+ // If never used then defer (it will be removed).
+ MUseIterator iter(ins->usesBegin());
+ if (iter == ins->usesEnd()) {
+ return true;
+ }
+ // We require an MTest consumer.
+ MNode* node = iter->consumer();
+ if (!node->isDefinition() || !node->toDefinition()->isTest()) {
+ return false;
+ }
+ // Defer only if there's only one use.
+ iter++;
+ return iter == ins->usesEnd();
+}
+
+void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
+ if (canEmitWasmReduceSimd128AtUses(ins)) {
+ emitAtUses(ins);
+ return;
+ }
+
+ // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer
+ // useRegisterAtStart:
+ //
+ // - In most cases, the input type differs from the output type, so there's no
+ // conflict and it doesn't really matter.
+ //
+ // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero
+ // code being generated.
+ //
+ // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register
+ // to be targeted lowers register pressure if it's the last use of the
+ // input.
+
+ if (ins->type() == MIRType::Int64) {
+ auto* lir = new (alloc())
+ LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
+ defineInt64(lir, ins);
+ } else {
+ // Ideally we would reuse the input register for floating extract_lane if
+ // the lane is zero, but constraints in the register allocator require the
+ // input and output register types to be the same.
+ auto* lir =
+ new (alloc()) LWasmReduceSimd128(useRegisterAtStart(ins->input()));
+ define(lir, ins);
+ }
+}
+
+#endif // ENABLE_WASM_SIMD