1 files changed, 1034 insertions, 0 deletions
diff --git a/js/src/jit/shared/Lowering-shared.cpp b/js/src/jit/shared/Lowering-shared.cpp
new file mode 100644
index 0000000000..9d6faf4a8a
--- /dev/null
+++ b/js/src/jit/shared/Lowering-shared.cpp
@@ -0,0 +1,1034 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "jit/shared/Lowering-shared-inl.h"
+
+#include "jit/LIR.h"
+#include "jit/Lowering.h"
+#include "jit/MIR.h"
+
+#include "vm/SymbolType.h"
+
+using namespace js;
+using namespace jit;
+
+using mozilla::Maybe;
+using mozilla::Nothing;
+using mozilla::Some;
+
+bool LIRGeneratorShared::ShouldReorderCommutative(MDefinition* lhs,
+                                                  MDefinition* rhs,
+                                                  MInstruction* ins) {
+  // lhs and rhs are used by the commutative operator.
+  MOZ_ASSERT(lhs->hasDefUses());
+  MOZ_ASSERT(rhs->hasDefUses());
+
+  // Ensure that if there is a constant, then it is in rhs.
+  if (rhs->isConstant()) {
+    return false;
+  }
+  if (lhs->isConstant()) {
+    return true;
+  }
+
+  // Since clobbering binary operations clobber the left operand, prefer a
+  // non-constant lhs operand with no further uses. To be fully precise, we
+  // should check whether this is the *last* use, but checking hasOneDefUse()
+  // is a decent approximation which doesn't require any extra analysis.
+  bool rhsSingleUse = rhs->hasOneDefUse();
+  bool lhsSingleUse = lhs->hasOneDefUse();
+  if (rhsSingleUse) {
+    if (!lhsSingleUse) {
+      return true;
+    }
+  } else {
+    if (lhsSingleUse) {
+      return false;
+    }
+  }
+
+  // If this is a reduction-style computation, such as
+  //
+  //   sum = 0;
+  //   for (...)
+  //      sum += ...;
+  //
+  // put the phi on the left to promote coalescing. This is fairly specific.
+  if (rhsSingleUse && rhs->isPhi() && rhs->block()->isLoopHeader() &&
+      ins == rhs->toPhi()->getLoopBackedgeOperand()) {
+    return true;
+  }
+
+  return false;
+}
+
+void LIRGeneratorShared::ReorderCommutative(MDefinition** lhsp,
+                                            MDefinition** rhsp,
+                                            MInstruction* ins) {
+  MDefinition* lhs = *lhsp;
+  MDefinition* rhs = *rhsp;
+
+  if (ShouldReorderCommutative(lhs, rhs, ins)) {
+    *rhsp = lhs;
+    *lhsp = rhs;
+  }
+}
+
+void LIRGeneratorShared::definePhiOneRegister(MPhi* phi, size_t lirIndex) {
+  LPhi* lir = current->getPhi(lirIndex);
+
+  uint32_t vreg = getVirtualRegister();
+
+  phi->setVirtualRegister(vreg);
+  lir->setDef(0, LDefinition(vreg, LDefinition::TypeFrom(phi->type())));
+  annotate(lir);
+}
+
+#ifdef JS_NUNBOX32
+void LIRGeneratorShared::definePhiTwoRegisters(MPhi* phi, size_t lirIndex) {
+  LPhi* type = current->getPhi(lirIndex + VREG_TYPE_OFFSET);
+  LPhi* payload = current->getPhi(lirIndex + VREG_DATA_OFFSET);
+
+  uint32_t typeVreg = getVirtualRegister();
+  phi->setVirtualRegister(typeVreg);
+
+  uint32_t payloadVreg = getVirtualRegister();
+  MOZ_ASSERT(typeVreg + 1 == payloadVreg);
+
+  type->setDef(0, LDefinition(typeVreg, LDefinition::TYPE));
+  payload->setDef(0, LDefinition(payloadVreg, LDefinition::PAYLOAD));
+  annotate(type);
+  annotate(payload);
+}
+#endif
+
+void LIRGeneratorShared::lowerTypedPhiInput(MPhi* phi, uint32_t inputPosition,
+                                            LBlock* block, size_t lirIndex) {
+  MDefinition* operand = phi->getOperand(inputPosition);
+  LPhi* lir = block->getPhi(lirIndex);
+  lir->setOperand(inputPosition, LUse(operand->virtualRegister(), LUse::ANY));
+}
+
+LRecoverInfo* LIRGeneratorShared::getRecoverInfo(MResumePoint* rp) {
+  if (cachedRecoverInfo_ && cachedRecoverInfo_->mir() == rp) {
+    return cachedRecoverInfo_;
+  }
+
+  LRecoverInfo* recoverInfo = LRecoverInfo::New(gen, rp);
+  if (!recoverInfo) {
+    return nullptr;
+  }
+
+  cachedRecoverInfo_ = recoverInfo;
+  return recoverInfo;
+}
+
+#ifdef DEBUG
+bool LRecoverInfo::OperandIter::canOptimizeOutIfUnused() {
+  MDefinition* ins = **this;
+
+  // We check ins->type() in addition to ins->isUnused() because
+  // EliminateDeadResumePointOperands may replace nodes with the constant
+  // MagicValue(JS_OPTIMIZED_OUT).
+  if ((ins->isUnused() || ins->type() == MIRType::MagicOptimizedOut) &&
+      (*it_)->isResumePoint()) {
+    return !(*it_)->toResumePoint()->isObservableOperand(op_);
+  }
+
+  return true;
+}
+#endif
+
+#ifdef JS_NUNBOX32
+LSnapshot* LIRGeneratorShared::buildSnapshot(MResumePoint* rp,
+                                             BailoutKind kind) {
+  LRecoverInfo* recoverInfo = getRecoverInfo(rp);
+  if (!recoverInfo) {
+    return nullptr;
+  }
+
+  LSnapshot* snapshot = LSnapshot::New(gen, recoverInfo, kind);
+  if (!snapshot) {
+    return nullptr;
+  }
+
+  size_t index = 0;
+  for (LRecoverInfo::OperandIter it(recoverInfo); !it; ++it) {
+    // Check that optimized out operands are in eliminable slots.
+    MOZ_ASSERT(it.canOptimizeOutIfUnused());
+
+    MDefinition* ins = *it;
+
+    if (ins->isRecoveredOnBailout()) {
+      continue;
+    }
+
+    LAllocation* type = snapshot->typeOfSlot(index);
+    LAllocation* payload = snapshot->payloadOfSlot(index);
+    ++index;
+
+    if (ins->isBox()) {
+      ins = ins->toBox()->getOperand(0);
+    }
+
+    // Guards should never be eliminated.
+    MOZ_ASSERT_IF(ins->isUnused(), !ins->isGuard());
+
+    // Snapshot operands other than constants should never be
+    // emitted-at-uses. Try-catch support depends on there being no
+    // code between an instruction and the LOsiPoint that follows it.
+    MOZ_ASSERT_IF(!ins->isConstant(), !ins->isEmittedAtUses());
+
+    // The register allocation will fill these fields in with actual
+    // register/stack assignments. During code generation, we can restore
+    // interpreter state with the given information. Note that for
+    // constants, including known types, we record a dummy placeholder,
+    // since we can recover the same information, much cleaner, from MIR.
+    if (ins->isConstant() || ins->isUnused()) {
+      *type = LAllocation();
+      *payload = LAllocation();
+    } else if (ins->type() != MIRType::Value) {
+      *type = LAllocation();
+      *payload = use(ins, LUse(LUse::KEEPALIVE));
+    } else {
+      *type = useType(ins, LUse::KEEPALIVE);
+      *payload = usePayload(ins, LUse::KEEPALIVE);
+    }
+  }
+
+  return snapshot;
+}
+
+#elif JS_PUNBOX64
+
+LSnapshot* LIRGeneratorShared::buildSnapshot(MResumePoint* rp,
+                                             BailoutKind kind) {
+  LRecoverInfo* recoverInfo = getRecoverInfo(rp);
+  if (!recoverInfo) {
+    return nullptr;
+  }
+
+  LSnapshot* snapshot = LSnapshot::New(gen, recoverInfo, kind);
+  if (!snapshot) {
+    return nullptr;
+  }
+
+  size_t index = 0;
+  for (LRecoverInfo::OperandIter it(recoverInfo); !it; ++it) {
+    // Check that optimized out operands are in eliminable slots.
+    MOZ_ASSERT(it.canOptimizeOutIfUnused());
+
+    MDefinition* def = *it;
+
+    if (def->isRecoveredOnBailout()) {
+      continue;
+    }
+
+    if (def->isBox()) {
+      def = def->toBox()->getOperand(0);
+    }
+
+    // Guards should never be eliminated.
+    MOZ_ASSERT_IF(def->isUnused(), !def->isGuard());
+
+    // Snapshot operands other than constants should never be
+    // emitted-at-uses. Try-catch support depends on there being no
+    // code between an instruction and the LOsiPoint that follows it.
+    MOZ_ASSERT_IF(!def->isConstant(), !def->isEmittedAtUses());
+
+    LAllocation* a = snapshot->getEntry(index++);
+
+    if (def->isUnused()) {
+      *a = LAllocation();
+      continue;
+    }
+
+    *a = useKeepaliveOrConstant(def);
+  }
+
+  return snapshot;
+}
+#endif
+
+void LIRGeneratorShared::assignSnapshot(LInstruction* ins, BailoutKind kind) {
+  // assignSnapshot must be called before define/add, since
+  // it may add new instructions for emitted-at-use operands.
+  MOZ_ASSERT(ins->id() == 0);
+  MOZ_ASSERT(kind != BailoutKind::Unknown);
+
+  LSnapshot* snapshot = buildSnapshot(lastResumePoint_, kind);
+  if (!snapshot) {
+    abort(AbortReason::Alloc, "buildSnapshot failed");
+    return;
+  }
+
+  ins->assignSnapshot(snapshot);
+}
+
+void LIRGeneratorShared::assignSafepoint(LInstruction* ins, MInstruction* mir,
+                                         BailoutKind kind) {
+  MOZ_ASSERT(!osiPoint_);
+  MOZ_ASSERT(!ins->safepoint());
+
+  ins->initSafepoint(alloc());
+
+  MResumePoint* mrp =
+      mir->resumePoint() ? mir->resumePoint() : lastResumePoint_;
+  LSnapshot* postSnapshot = buildSnapshot(mrp, kind);
+  if (!postSnapshot) {
+    abort(AbortReason::Alloc, "buildSnapshot failed");
+    return;
+  }
+
+  osiPoint_ = new (alloc()) LOsiPoint(ins->safepoint(), postSnapshot);
+
+  if (!lirGraph_.noteNeedsSafepoint(ins)) {
+    abort(AbortReason::Alloc, "noteNeedsSafepoint failed");
+    return;
+  }
+}
+
+void LIRGeneratorShared::assignWasmSafepoint(LInstruction* ins,
+                                             MInstruction* mir) {
+  MOZ_ASSERT(!osiPoint_);
+  MOZ_ASSERT(!ins->safepoint());
+
+  ins->initSafepoint(alloc());
+
+  if (!lirGraph_.noteNeedsSafepoint(ins)) {
+    abort(AbortReason::Alloc, "noteNeedsSafepoint failed");
+    return;
+  }
+}
+
+#ifdef ENABLE_WASM_SIMD
+
+// Specialization analysis for SIMD operations.  This is still x86-centric but
+// generalizes fairly easily to other architectures.
+
+// Optimization of v8x16.shuffle.  The general byte shuffle+blend is very
+// expensive (equivalent to at least a dozen instructions), and we want to avoid
+// that if we can.  So look for special cases - there are many.
+//
+// The strategy is to sort the operation into one of three buckets depending
+// on the shuffle pattern and inputs:
+//
+//  - single operand; shuffles on these values are rotations, reversals,
+//    transpositions, and general permutations
+//  - single-operand-with-interesting-constant (especially zero); shuffles on
+//    these values are often byte shift or scatter operations
+//  - dual operand; shuffles on these operations are blends, catenated
+//    shifts, and (in the worst case) general shuffle+blends
+//
+// We're not trying to solve the general problem, only to lower reasonably
+// expressed patterns that express common operations.  Producers that produce
+// dense and convoluted patterns will end up with the general byte shuffle.
+// Producers that produce simpler patterns that easily map to hardware will
+// get faster code.
+//
+// In particular, these matchers do not try to combine transformations, so a
+// shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
+// usually going to end up as a general byte shuffle.
+
+// Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
+// true, updating *control.
+static bool ByteMaskToWordMask(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  int16_t controlWords[8];
+  for (int i = 0; i < 16; i += 2) {
+    if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
+      return false;
+    }
+    controlWords[i / 2] = lanes[i] / 2;
+  }
+  *control = SimdConstant::CreateX8(controlWords);
+  return true;
+}
+
+// Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
+// true, updating *control.
+static bool ByteMaskToDWordMask(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  int32_t controlDWords[4];
+  for (int i = 0; i < 16; i += 4) {
+    if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
+          lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
+      return false;
+    }
+    controlDWords[i / 4] = lanes[i] / 4;
+  }
+  *control = SimdConstant::CreateX4(controlDWords);
+  return true;
+}
+
+// Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return
+// true, updating *control.
+static bool ByteMaskToQWordMask(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  int64_t controlQWords[2];
+  for (int i = 0; i < 16; i += 8) {
+    if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 &&
+          lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 &&
+          lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 &&
+          lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) {
+      return false;
+    }
+    controlQWords[i / 8] = lanes[i] / 8;
+  }
+  *control = SimdConstant::CreateX2(controlQWords);
+  return true;
+}
+
+// Skip across consecutive values in lanes starting at i, returning the index
+// after the last element.  Lane values must be <= len-1 ("masked").
+//
+// Since every element is a 1-element run, the return value is never the same as
+// the starting i.
+template <typename T>
+static int ScanIncreasingMasked(const T* lanes, int i) {
+  int len = int(16 / sizeof(T));
+  MOZ_ASSERT(i < len);
+  MOZ_ASSERT(lanes[i] <= len - 1);
+  i++;
+  while (i < len && lanes[i] == lanes[i - 1] + 1) {
+    MOZ_ASSERT(lanes[i] <= len - 1);
+    i++;
+  }
+  return i;
+}
+
+// Skip across consecutive values in lanes starting at i, returning the index
+// after the last element.  Lane values must be <= len*2-1 ("unmasked"); the
+// values len-1 and len are not considered consecutive.
+//
+// Since every element is a 1-element run, the return value is never the same as
+// the starting i.
+template <typename T>
+static int ScanIncreasingUnmasked(const T* lanes, int i) {
+  int len = int(16 / sizeof(T));
+  MOZ_ASSERT(i < len);
+  if (lanes[i] < len) {
+    i++;
+    while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
+      i++;
+    }
+  } else {
+    i++;
+    while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
+      i++;
+    }
+  }
+  return i;
+}
+
+// Skip lanes that equal v starting at i, returning the index just beyond the
+// last of those.  There is no requirement that the initial lanes[i] == v.
+template <typename T>
+static int ScanConstant(const T* lanes, int v, int i) {
+  int len = int(16 / sizeof(T));
+  MOZ_ASSERT(i <= len);
+  while (i < len && lanes[i] == v) {
+    i++;
+  }
+  return i;
+}
+
+// Mask lane values denoting rhs elements into lhs elements.
+template <typename T>
+static void MaskLanes(T* result, const T* input) {
+  int len = int(16 / sizeof(T));
+  for (int i = 0; i < len; i++) {
+    result[i] = input[i] & (len - 1);
+  }
+}
+
+// Apply a transformation to each lane value.
+template <typename T>
+static void MapLanes(T* result, const T* input, int (*f)(int)) {
+  int len = int(16 / sizeof(T));
+  for (int i = 0; i < len; i++) {
+    result[i] = f(input[i]);
+  }
+}
+
+// Recognize an identity permutation, assuming lanes is masked.
+template <typename T>
+static bool IsIdentity(const T* lanes) {
+  return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
+}
+
+// Recognize part of an identity permutation starting at start, with
+// the first value of the permutation expected to be bias.
+template <typename T>
+static bool IsIdentity(const T* lanes, int start, int len, int bias) {
+  if (lanes[start] != bias) {
+    return false;
+  }
+  for (int i = start + 1; i < start + len; i++) {
+    if (lanes[i] != lanes[i - 1] + 1) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// We can permute by dwords if the mask is reducible to a dword mask, and in
+// this case a single PSHUFD is enough.
+static bool TryPermute32x4(SimdConstant* control) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToDWordMask(&tmp)) {
+    return false;
+  }
+  *control = tmp;
+  return true;
+}
+
+// Can we perform a byte rotate right?  We can use PALIGNR.  The shift count is
+// just lanes[0], and *control is unchanged.
+static bool TryRotateRight8x16(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  // Look for the first run of consecutive bytes.
+  int i = ScanIncreasingMasked(lanes, 0);
+
+  // If we reach the end of the vector, the vector must start at 0.
+  if (i == 16) {
+    return lanes[0] == 0;
+  }
+
+  // Second run must start at source lane zero
+  if (lanes[i] != 0) {
+    return false;
+  }
+
+  // Second run must end at the end of the lane vector.
+  return ScanIncreasingMasked(lanes, i) == 16;
+}
+
+// We can permute by words if the mask is reducible to a word mask, but the x64
+// lowering is only efficient if we can permute the high and low quadwords
+// separately, possibly after swapping quadwords.
+static bool TryPermute16x8(SimdConstant* control) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToWordMask(&tmp)) {
+    return false;
+  }
+  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
+  SimdConstant::I16x8 mapped;
+  MapLanes(mapped, lanes, [](int x) -> int { return x < 4 ? 0 : 1; });
+  int i = ScanConstant(mapped, mapped[0], 0);
+  if (i != 4) {
+    return false;
+  }
+  i = ScanConstant(mapped, mapped[4], 4);
+  if (i != 8) {
+    return false;
+  }
+  // Now compute the operation bits.  `mapped` holds the adjusted lane mask.
+  memcpy(mapped, lanes, sizeof(mapped));
+  int16_t op = 0;
+  if (mapped[0] > mapped[4]) {
+    op |= LWasmPermuteSimd128::SWAP_QWORDS;
+  }
+  for (int i = 0; i < 8; i++) {
+    mapped[i] &= 3;
+  }
+  if (!IsIdentity(mapped, 0, 4, 0)) {
+    op |= LWasmPermuteSimd128::PERM_LOW;
+  }
+  if (!IsIdentity(mapped, 4, 4, 0)) {
+    op |= LWasmPermuteSimd128::PERM_HIGH;
+  }
+  MOZ_ASSERT(op != 0);
+  mapped[0] |= op << 8;
+  *control = SimdConstant::CreateX8(mapped);
+  return true;
+}
+
+// A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
+static bool TryBroadcast16x8(SimdConstant* control) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToWordMask(&tmp)) {
+    return false;
+  }
+  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
+  if (ScanConstant(lanes, lanes[0], 0) < 8) {
+    return false;
+  }
+  *control = tmp;
+  return true;
+}
+
+// A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
+// PSHUFD.
+static bool TryBroadcast8x16(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  if (ScanConstant(lanes, lanes[0], 0) < 16) {
+    return false;
+  }
+  return true;
+}
+
+// Look for permutations of a single operand.
+static LWasmPermuteSimd128::Op AnalyzePermute(SimdConstant* control) {
+  // Lane indices are input-agnostic for single-operand permutations.
+  SimdConstant::I8x16 controlBytes;
+  MaskLanes(controlBytes, control->asInt8x16());
+
+  // Get rid of no-ops immediately, so nobody else needs to check.
+  if (IsIdentity(controlBytes)) {
+    return LWasmPermuteSimd128::MOVE;
+  }
+
+  // Default control is the masked bytes.
+  *control = SimdConstant::CreateX16(controlBytes);
+
+  // Analysis order matters here and is architecture-dependent or even
+  // microarchitecture-dependent: ideally the cheapest implementation first.
+  // The Intel manual says that the cost of a PSHUFB is about five other
+  // operations, so make that our cutoff.
+  //
+  // Word, dword, and qword reversals are handled optimally by general permutes.
+  //
+  // Byte reversals are probably best left to PSHUFB, no alternative rendition
+  // seems to reliably go below five instructions.  (Discuss.)
+  //
+  // Word swaps within doublewords and dword swaps within quadwords are handled
+  // optimally by general permutes.
+  //
+  // Dword and qword broadcasts are handled by dword permute.
+
+  if (TryPermute32x4(control)) {
+    return LWasmPermuteSimd128::PERMUTE_32x4;
+  }
+  if (TryRotateRight8x16(control)) {
+    return LWasmPermuteSimd128::ROTATE_RIGHT_8x16;
+  }
+  if (TryPermute16x8(control)) {
+    return LWasmPermuteSimd128::PERMUTE_16x8;
+  }
+  if (TryBroadcast16x8(control)) {
+    return LWasmPermuteSimd128::BROADCAST_16x8;
+  }
+  if (TryBroadcast8x16(control)) {
+    return LWasmPermuteSimd128::BROADCAST_8x16;
+  }
+
+  // TODO: (From v8) Unzip and transpose generally have renditions that slightly
+  // beat a general permute (three or four instructions)
+  //
+  // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
+  // used when merging two values.
+  //
+  // TODO: Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB.
+
+  // The default operation is to permute bytes with the default control.
+  return LWasmPermuteSimd128::PERMUTE_8x16;
+}
+
+// Can we shift the bytes left or right by a constant?  A shift is a run of
+// lanes from the rhs (which is zero) on one end and a run of values from the
+// lhs on the other end.
+static Maybe<LWasmPermuteSimd128::Op> TryShift8x16(SimdConstant* control) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+
+  // Represent all zero lanes by 16
+  SimdConstant::I8x16 zeroesMasked;
+  MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
+
+  int i = ScanConstant(zeroesMasked, 16, 0);
+  int shiftLeft = i;
+  if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
+    return Nothing();
+  }
+
+  i = ScanIncreasingUnmasked(zeroesMasked, i);
+  int shiftRight = 16 - i;
+  if (shiftRight > 0 && lanes[i - 1] != 15) {
+    return Nothing();
+  }
+
+  i = ScanConstant(zeroesMasked, 16, i);
+  if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
+      (shiftRight == 0 && shiftLeft == 0)) {
+    return Nothing();
+  }
+
+  if (shiftRight) {
+    *control = SimdConstant::SplatX16(shiftRight);
+    return Some(LWasmPermuteSimd128::SHIFT_RIGHT_8x16);
+  }
+  *control = SimdConstant::SplatX16(shiftLeft);
+  return Some(LWasmPermuteSimd128::SHIFT_LEFT_8x16);
+}
+
+static Maybe<LWasmPermuteSimd128::Op> AnalyzeShuffleWithZero(
+    SimdConstant* control) {
+  Maybe<LWasmPermuteSimd128::Op> op;
+  op = TryShift8x16(control);
+  if (op) {
+    return op;
+  }
+
+  // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
+  // PAND.  This may beat the general byte blend code below.
+  return Nothing();
+}
+
+// Concat: if the result is the suffix (high bytes) of the rhs in front of a
+// prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
+// swapped.
+static Maybe<LWasmShuffleSimd128::Op> TryConcatRightShift8x16(
+    SimdConstant* control, bool* swapOperands) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  int i = ScanIncreasingUnmasked(lanes, 0);
+  MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
+  // First run must end with 15 % 16
+  if ((lanes[i - 1] & 15) != 15) {
+    return Nothing();
+  }
+  // Second run must start with 0 % 16
+  if ((lanes[i] & 15) != 0) {
+    return Nothing();
+  }
+  // The two runs must come from different inputs
+  if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
+    return Nothing();
+  }
+  int suffixLength = i;
+
+  i = ScanIncreasingUnmasked(lanes, i);
+  // Must end at the left end
+  if (i != 16) {
+    return Nothing();
+  }
+
+  // If the suffix is from the lhs then swap the operands
+  if (lanes[0] < 16) {
+    *swapOperands = !*swapOperands;
+  }
+  *control = SimdConstant::SplatX16(suffixLength);
+  return Some(LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16);
+}
+
+// Blend words: if we pick words from both operands without a pattern but all
+// the input words stay in their position then this is PBLENDW (immediate mask);
+// this also handles all larger sizes on x64.
+static Maybe<LWasmShuffleSimd128::Op> TryBlendInt16x8(SimdConstant* control) {
+  SimdConstant tmp(*control);
+  if (!ByteMaskToWordMask(&tmp)) {
+    return Nothing();
+  }
+  SimdConstant::I16x8 masked;
+  MaskLanes(masked, tmp.asInt16x8());
+  if (!IsIdentity(masked)) {
+    return Nothing();
+  }
+  SimdConstant::I16x8 mapped;
+  MapLanes(mapped, tmp.asInt16x8(),
+           [](int x) -> int { return x < 8 ? 0 : -1; });
+  *control = SimdConstant::CreateX8(mapped);
+  return Some(LWasmShuffleSimd128::BLEND_16x8);
+}
+
+// Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
+// handled with a CONST, PAND, PANDNOT, and POR.
+//
+// TODO: Optimization opportunity? If we pick all but one lanes from one with at
+// most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
+// element is not in its source location).
+static Maybe<LWasmShuffleSimd128::Op> TryBlendInt8x16(SimdConstant* control) {
+  SimdConstant::I8x16 masked;
+  MaskLanes(masked, control->asInt8x16());
+  if (!IsIdentity(masked)) {
+    return Nothing();
+  }
+  SimdConstant::I8x16 mapped;
+  MapLanes(mapped, control->asInt8x16(),
+           [](int x) -> int { return x < 16 ? 0 : -1; });
+  *control = SimdConstant::CreateX16(mapped);
+  return Some(LWasmShuffleSimd128::BLEND_8x16);
+}
+
+template <typename T>
+static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
+  for (int i = 0; i < len; i++) {
+    if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Unpack/interleave:
+//  - if we interleave the low (bytes/words/doublewords) of the inputs into
+//    the output then this is UNPCKL*W (possibly with a swap of operands).
+//  - if we interleave the high ditto then it is UNPCKH*W (ditto)
+template <typename T>
+static Maybe<LWasmShuffleSimd128::Op> TryInterleave(
+    const T* lanes, int lhs, int rhs, bool* swapOperands,
+    LWasmShuffleSimd128::Op lowOp, LWasmShuffleSimd128::Op highOp) {
+  int len = int(32 / (sizeof(T) * 4));
+  if (MatchInterleave(lanes, lhs, rhs, len)) {
+    return Some(lowOp);
+  }
+  if (MatchInterleave(lanes, rhs, lhs, len)) {
+    *swapOperands = !*swapOperands;
+    return Some(lowOp);
+  }
+  if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
+    return Some(highOp);
+  }
+  if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
+    *swapOperands = !*swapOperands;
+    return Some(highOp);
+  }
+  return Nothing();
+}
+
+static Maybe<LWasmShuffleSimd128::Op> TryInterleave64x2(SimdConstant* control,
+                                                        bool* swapOperands) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToQWordMask(&tmp)) {
+    return Nothing();
+  }
+  const SimdConstant::I64x2& lanes = tmp.asInt64x2();
+  return TryInterleave(lanes, 0, 2, swapOperands,
+                       LWasmShuffleSimd128::INTERLEAVE_LOW_64x2,
+                       LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2);
+}
+
+static Maybe<LWasmShuffleSimd128::Op> TryInterleave32x4(SimdConstant* control,
+                                                        bool* swapOperands) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToDWordMask(&tmp)) {
+    return Nothing();
+  }
+  const SimdConstant::I32x4& lanes = tmp.asInt32x4();
+  return TryInterleave(lanes, 0, 4, swapOperands,
+                       LWasmShuffleSimd128::INTERLEAVE_LOW_32x4,
+                       LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4);
+}
+
+static Maybe<LWasmShuffleSimd128::Op> TryInterleave16x8(SimdConstant* control,
+                                                        bool* swapOperands) {
+  SimdConstant tmp = *control;
+  if (!ByteMaskToWordMask(&tmp)) {
+    return Nothing();
+  }
+  const SimdConstant::I16x8& lanes = tmp.asInt16x8();
+  return TryInterleave(lanes, 0, 8, swapOperands,
+                       LWasmShuffleSimd128::INTERLEAVE_LOW_16x8,
+                       LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8);
+}
+
+static Maybe<LWasmShuffleSimd128::Op> TryInterleave8x16(SimdConstant* control,
+                                                        bool* swapOperands) {
+  const SimdConstant::I8x16& lanes = control->asInt8x16();
+  return TryInterleave(lanes, 0, 16, swapOperands,
+                       LWasmShuffleSimd128::INTERLEAVE_LOW_8x16,
+                       LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16);
+}
+
+static LWasmShuffleSimd128::Op AnalyzeTwoArgShuffle(SimdConstant* control,
+                                                    bool* swapOperands) {
+  Maybe<LWasmShuffleSimd128::Op> op;
+  op = TryConcatRightShift8x16(control, swapOperands);
+  if (!op) {
+    op = TryBlendInt16x8(control);
+  }
+  if (!op) {
+    op = TryBlendInt8x16(control);
+  }
+  if (!op) {
+    op = TryInterleave64x2(control, swapOperands);
+  }
+  if (!op) {
+    op = TryInterleave32x4(control, swapOperands);
+  }
+  if (!op) {
+    op = TryInterleave16x8(control, swapOperands);
+  }
+  if (!op) {
+    op = TryInterleave8x16(control, swapOperands);
+  }
+  if (!op) {
+    op = Some(LWasmShuffleSimd128::SHUFFLE_BLEND_8x16);
+  }
+  return *op;
+}
+
+// Reorder the operands if that seems useful, notably, move a constant to the
+// right hand side.  Rewrites the control to account for any move.
+static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
+                                        SimdConstant* control) {
+  if ((*lhs)->isWasmFloatConstant()) {
+    MDefinition* tmp = *lhs;
+    *lhs = *rhs;
+    *rhs = tmp;
+
+    int8_t controlBytes[16];
+    const SimdConstant::I8x16& lanes = control->asInt8x16();
+    for (unsigned i = 0; i < 16; i++) {
+      controlBytes[i] = lanes[i] ^ 16;
+    }
+    *control = SimdConstant::CreateX16(controlBytes);
+
+    return true;
+  }
+  return false;
+}
+
+Shuffle LIRGeneratorShared::AnalyzeShuffle(MWasmShuffleSimd128* ins) {
+  // Control may be updated, but only once we commit to an operation or when we
+  // swap operands.
+  SimdConstant control = ins->control();
+  MDefinition* lhs = ins->lhs();
+  MDefinition* rhs = ins->rhs();
+
+  // If only one of the inputs is used, determine which.
+  bool useLeft = true;
+  bool useRight = true;
+  if (lhs == rhs) {
+    useRight = false;
+  } else {
+    bool allAbove = true;
+    bool allBelow = true;
+    const SimdConstant::I8x16& lanes = control.asInt8x16();
+    for (unsigned i = 0; i < 16; i++) {
+      allAbove = allAbove && lanes[i] >= 16;
+      allBelow = allBelow && lanes[i] < 16;
+    }
+    if (allAbove) {
+      useLeft = false;
+    } else if (allBelow) {
+      useRight = false;
+    }
+  }
+
+  // Deal with one-ignored-input.
+  if (!(useLeft && useRight)) {
+    LWasmPermuteSimd128::Op op = AnalyzePermute(&control);
+    return Shuffle::permute(
+        useLeft ? Shuffle::Operand::LEFT : Shuffle::Operand::RIGHT, control,
+        op);
+  }
+
+  // Move constants to rhs.
+  bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
+
+  // Deal with constant rhs.
+  if (rhs->isWasmFloatConstant()) {
+    SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
+    if (rhsConstant.isZeroBits()) {
+      Maybe<LWasmPermuteSimd128::Op> op = AnalyzeShuffleWithZero(&control);
+      if (op) {
+        return Shuffle::permute(
+            swapOperands ? Shuffle::Operand::RIGHT : Shuffle::Operand::LEFT,
+            control, *op);
+      }
+    }
+  }
+
+  // Two operands both of which are used.  If there's one constant operand it is
+  // now on the rhs.
+  LWasmShuffleSimd128::Op op = AnalyzeTwoArgShuffle(&control, &swapOperands);
+  return Shuffle::shuffle(
+      swapOperands ? Shuffle::Operand::BOTH_SWAPPED : Shuffle::Operand::BOTH,
+      control, op);
+}
+
+#  ifdef DEBUG
+void LIRGeneratorShared::ReportShuffleSpecialization(const Shuffle& s) {
+  switch (s.opd) {
+    case Shuffle::Operand::BOTH:
+    case Shuffle::Operand::BOTH_SWAPPED:
+      switch (*s.shuffleOp) {
+        case LWasmShuffleSimd128::SHUFFLE_BLEND_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
+          break;
+        case LWasmShuffleSimd128::BLEND_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
+          break;
+        case LWasmShuffleSimd128::BLEND_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
+          break;
+        case LWasmShuffleSimd128::CONCAT_RIGHT_SHIFT_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
+          break;
+        case LWasmShuffleSimd128::INTERLEAVE_HIGH_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
+          break;
+        case LWasmShuffleSimd128::INTERLEAVE_HIGH_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
+          break;
+        case LWasmShuffleSimd128::INTERLEAVE_HIGH_32x4:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
+          break;
+        case LWasmShuffleSimd128::INTERLEAVE_HIGH_64x2:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2");
+          break;
+        case LWasmShuffleSimd128::INTERLEAVE_LOW_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
+          break;
+        case LWasmShuffleSimd128::INTERLEAVE_LOW_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
+          break;
+        case LWasmShuffleSimd128::INTERLEAVE_LOW_32x4:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
+          break;
+        case LWasmShuffleSimd128::INTERLEAVE_LOW_64x2:
+          js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2");
+          break;
+        default:
+          MOZ_CRASH("Unexpected shuffle op");
+      }
+      break;
+    case Shuffle::Operand::LEFT:
+    case Shuffle::Operand::RIGHT:
+      switch (*s.permuteOp) {
+        case LWasmPermuteSimd128::BROADCAST_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
+          break;
+        case LWasmPermuteSimd128::BROADCAST_16x8:
+          js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
+          break;
+        case LWasmPermuteSimd128::MOVE:
+          js::wasm::ReportSimdAnalysis("shuffle -> move");
+          break;
+        case LWasmPermuteSimd128::PERMUTE_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
+          break;
+        case LWasmPermuteSimd128::PERMUTE_16x8: {
+          int op = s.control.asInt16x8()[0] >> 8;
+          char buf[256];
+          sprintf(buf, "shuffle -> permute 16x8%s%s%s",
+                  op & LWasmPermuteSimd128::SWAP_QWORDS ? " swap" : "",
+                  op & LWasmPermuteSimd128::PERM_HIGH ? " high" : "",
+                  op & LWasmPermuteSimd128::PERM_LOW ? " low" : "");
+          js::wasm::ReportSimdAnalysis(buf);
+          break;
+        }
+        case LWasmPermuteSimd128::PERMUTE_32x4:
+          js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
+          break;
+        case LWasmPermuteSimd128::ROTATE_RIGHT_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
+          break;
+        case LWasmPermuteSimd128::SHIFT_LEFT_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
+          break;
+        case LWasmPermuteSimd128::SHIFT_RIGHT_8x16:
+          js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
+          break;
+        default:
+          MOZ_CRASH("Unexpected permute op");
+      }
+      break;
+  }
+}
+#  endif  // DEBUG
+
+#endif  // ENABLE_WASM_SIMD