From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 21:33:14 +0200
Subject: Adding upstream version 115.7.0esr.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 .../tests/wasm/simd/ad-hack-binop-preamble.js      |  334 ++++
 js/src/jit-test/tests/wasm/simd/ad-hack-extra.js   |  697 ++++++++
 .../jit-test/tests/wasm/simd/ad-hack-preamble.js   |  211 +++
 .../tests/wasm/simd/ad-hack-simple-binops0.js      |    7 +
 .../tests/wasm/simd/ad-hack-simple-binops1.js      |    7 +
 .../tests/wasm/simd/ad-hack-simple-binops2.js      |    7 +
 .../tests/wasm/simd/ad-hack-simple-unops.js        |  122 ++
 js/src/jit-test/tests/wasm/simd/ad-hack.js         | 1747 ++++++++++++++++++++
 .../tests/wasm/simd/avx2-x64-ion-codegen.js        |  584 +++++++
 .../tests/wasm/simd/baseline-bug1636235.js         |  111 ++
 .../tests/wasm/simd/binop-x64-ion-codegen.js       |  255 +++
 .../tests/wasm/simd/binop-x86-ion-codegen.js       |   20 +
 .../tests/wasm/simd/bitselect-x64-ion-codegen.js   |   45 +
 js/src/jit-test/tests/wasm/simd/cmp-bitselect.js   |  107 ++
 .../tests/wasm/simd/cmp-x64-ion-codegen.js         |   77 +
 .../tests/wasm/simd/const-arm64-vixl-codegen.js    |  109 ++
 .../tests/wasm/simd/const-x64-ion-codegen.js       |   28 +
 .../tests/wasm/simd/cvt-x64-ion-codegen.js         |   27 +
 .../jit-test/tests/wasm/simd/debug-bug1644759.js   |   20 +
 js/src/jit-test/tests/wasm/simd/directives.txt     |    1 +
 js/src/jit-test/tests/wasm/simd/disabled.js        |   28 +
 js/src/jit-test/tests/wasm/simd/experimental.js    |  411 +++++
 js/src/jit-test/tests/wasm/simd/ion-analysis.js    |  902 ++++++++++
 js/src/jit-test/tests/wasm/simd/ion-bug1641973.js  |   13 +
 js/src/jit-test/tests/wasm/simd/ion-bug1688262.js  |   22 +
 js/src/jit-test/tests/wasm/simd/ion-bug1688713.js  |   24 +
 js/src/jit-test/tests/wasm/simd/js-api.js          |  130 ++
 .../tests/wasm/simd/neg-abs-not-x64-ion-codegen.js |   34 +
 .../tests/wasm/simd/pairwise-x64-ion-codegen.js    |   38 +
 .../tests/wasm/simd/pmaddubsw-x64-ion-codegen.js   |  154 ++
 .../tests/wasm/simd/reduce-x64-ion-codegen.js      |   46 +
 js/src/jit-test/tests/wasm/simd/select.js          |   33 +
 .../tests/wasm/simd/shift-x64-ion-codegen.js       |   26 +
 .../tests/wasm/simd/shuffle-x86-ion-codegen.js     |   88 +
 .../tests/wasm/simd/simd-partial-oob-store.js      |   38 +
 .../tests/wasm/simd/splat-x64-ion-codegen.js       |   29 +
 js/src/jit-test/tests/wasm/simd/validation.js      |  368 +++++
 37 files changed, 6900 insertions(+)
 create mode 100644 js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ad-hack-extra.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ad-hack.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/cmp-bitselect.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/debug-bug1644759.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/directives.txt
 create mode 100644 js/src/jit-test/tests/wasm/simd/disabled.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/experimental.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ion-analysis.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ion-bug1641973.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ion-bug1688262.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/ion-bug1688713.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/js-api.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/select.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js
 create mode 100644 js/src/jit-test/tests/wasm/simd/validation.js

(limited to 'js/src/jit-test/tests/wasm/simd')

diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js b/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js
new file mode 100644
index 0000000000..dd1443a1e7
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js
@@ -0,0 +1,334 @@
+// |jit-test| skip-if: true
+
+// Common code to test simple binary operators.  See runSimpleBinopTest below.
+
+function expandConstantBinopInputs(op, memtype, inputs) {
+    let s = '';
+    let ident = 0;
+    for ( let [a, b] of inputs ) {
+        let constlhs = `${memtype.layoutName} ${a.map(jsValueToWasmName).join(' ')}`;
+        let constrhs = `${memtype.layoutName} ${b.map(jsValueToWasmName).join(' ')}`;
+        s += `
+    ;; lhs is constant, rhs is variable
+    (func (export "run_constlhs${ident}")
+      (v128.store (i32.const 0)
+        (call $doit_constlhs${ident} (v128.const ${constrhs}))))
+    (func $doit_constlhs${ident} (param $b v128) (result v128)
+      (${op} (v128.const ${constlhs}) (local.get $b)))
+
+    ;; rhs is constant, lhs is variable
+    (func (export "run_constrhs${ident}")
+      (v128.store (i32.const 0)
+        (call $doit_constrhs${ident} (v128.const ${constlhs}))))
+    (func $doit_constrhs${ident} (param $a v128) (result v128)
+      (${op} (local.get $a) (v128.const ${constrhs})))
+
+    ;; both operands are constant
+    (func (export "run_constboth${ident}")
+      (v128.store (i32.const 0)
+        (call $doit_constboth${ident})))
+    (func $doit_constboth${ident} (result v128)
+      (${op} (v128.const ${constlhs}) (v128.const ${constrhs})))`
+        ident++;
+    }
+    return s;
+}
+
+function insAndMemBinop(op, memtype, resultmemtype, inputs) {
+    var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+
+    ;; both arguments are variable
+    (func (export "run")
+      (v128.store (i32.const 0)
+        (call $doit (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+    (func $doit (param $a v128) (param $b v128) (result v128)
+      (${op} (local.get $a) (local.get $b)))
+
+    ${expandConstantBinopInputs(op, memtype, inputs)})`);
+    var mem = new memtype(ins.exports.mem.buffer);
+    var resultmem = !resultmemtype || memtype == resultmemtype ? mem : new resultmemtype(ins.exports.mem.buffer);
+    return [ins, mem, resultmem];
+}
+
+function add(bits) { return (x, y) => sign_extend(x+y, bits) }
+function add64(x, y) { return sign_extend(BigInt(x)+BigInt(y), 64) }
+function sub(bits) { return (x, y) => sign_extend(x-y, bits) }
+function sub64(x, y) { return sign_extend(BigInt(x)-BigInt(y), 64) }
+// Even 32-bit multiply can overflow a Number, so always use BigInt
+function mul(bits) { return (x, y) => sign_extend(BigInt(x)*BigInt(y), bits) }
+function div(x, y) { return x/y }
+function min(x, y) { return x < y ? x : y }
+function max(x, y) { return x > y ? x : y }
+function and(x, y) { return zero_extend(x&y, 8) }
+function or(x, y) { return zero_extend(x|y, 8) }
+function xor(x, y) { return zero_extend(x^y, 8) }
+function andnot(x, y) { return zero_extend(x&~y, 8) }
+function avgr(x, y) { return (x + y + 1) >> 1; }
+function eq(truth) { return (x,y) => x==y ? truth : 0 }
+function ne(truth) { return (x,y) => x!=y ? truth : 0 }
+function lt(truth) { return (x, y) => x < y ? truth : 0 }
+function gt(truth) { return (x, y) => x > y ? truth : 0 }
+function le(truth) { return (x, y) => x <= y ? truth : 0 }
+function ge(truth) { return (x, y) => x >= y ? truth : 0 }
+
+function fadd(x, y) { return Math.fround(x+y) }
+function fsub(x, y) { return Math.fround(x-y) }
+function fmul(x, y) { return Math.fround(x*y) }
+function fdiv(x, y) { return Math.fround(x/y) }
+function fmin(x, y) {
+    if (x == y) return x;
+    if (x < y) return x;
+    if (y < x) return y;
+    if (isNaN(x)) return x;
+    return y;
+}
+function fmax(x, y) {
+    if (x == y) return x;
+    if (x > y) return x;
+    if (y > x) return y;
+    if (isNaN(x)) return x;
+    return y;
+}
+function dadd(x, y) { return x+y }
+function dsub(x, y) { return x-y }
+function dmul(x, y) { return x*y }
+function ddiv(x, y) { return x/y }
+var dmax = fmax;
+var dmin = fmin;
+
+function op_sat_s(bits, op) {
+    return (x, y) => {
+        return signed_saturate(op(sign_extend(x, bits),
+                                  sign_extend(y, bits)),
+                               bits);
+    }
+}
+
+function op_sat_u(bits, op) {
+    return (x, y) => {
+        return unsigned_saturate(op(zero_extend(x, bits),
+                                    zero_extend(y, bits)),
+                                 bits);
+    }
+}
+
+function add_sat_s(bits) {
+    return op_sat_s(bits, (x,y) => x+y);
+}
+function sub_sat_s(bits) {
+    return op_sat_s(bits, (x,y) => x-y);
+}
+function add_sat_u(bits) {
+    return op_sat_u(bits, (x,y) => x+y);
+}
+function sub_sat_u(bits) {
+    return op_sat_u(bits, (x,y) => x-y);
+}
+
+function max_s(bits) {
+    return (x, y) => {
+        return sign_extend(max(sign_extend(x, bits),
+                               sign_extend(y, bits)),
+                           bits);
+    }
+}
+
+function min_s(bits) {
+    return (x, y) => {
+        return sign_extend(min(sign_extend(x, bits),
+                               sign_extend(y, bits)),
+                           bits);
+    }
+}
+
+function max_u(bits) {
+    return (x, y) => {
+        return max(zero_extend(x, bits),
+                   zero_extend(y, bits));
+    }
+}
+
+function min_u(bits) {
+    return (x, y) => {
+        return min(zero_extend(x, bits),
+                   zero_extend(y, bits));
+    }
+}
+
+function pmin(x, y) { return y < x ? y : x }
+function pmax(x, y) { return x < y ? y : x }
+
+assertEq(max_s(8)(1, 2), 2);
+assertEq(max_s(8)(1, 128), 1);
+assertEq(min_s(8)(1, 2), 1);
+assertEq(min_s(8)(1, 128), -128);
+assertEq(max_u(8)(1, 2), 2);
+assertEq(max_u(8)(1, 128), 128);
+assertEq(min_u(8)(1, 2), 1);
+assertEq(min_u(8)(1, 128), 1);
+
+var binopTests =
+    [['i8x16.add', Int8Array, add(8)],
+     ['i16x8.add', Int16Array, add(16)],
+     ['i32x4.add', Int32Array, add(32)],
+     ['i64x2.add', BigInt64Array, add64],
+     ['i8x16.sub', Int8Array, sub(8)],
+     ['i16x8.sub', Int16Array, sub(16)],
+     ['i32x4.sub', Int32Array, sub(32)],
+     ['i64x2.sub', BigInt64Array, sub64],
+     ['i8x16.add_sat_s', Int8Array, add_sat_s(8)],
+     ['i8x16.add_sat_u', Uint8Array, add_sat_u(8)],
+     ['i16x8.add_sat_s', Int16Array, add_sat_s(16)],
+     ['i16x8.add_sat_u', Uint16Array, add_sat_u(16)],
+     ['i8x16.sub_sat_s', Int8Array, sub_sat_s(8)],
+     ['i8x16.sub_sat_u', Uint8Array, sub_sat_u(8)],
+     ['i16x8.sub_sat_s', Int16Array, sub_sat_s(16)],
+     ['i16x8.sub_sat_u', Uint16Array, sub_sat_u(16)],
+     ['i16x8.mul', Int16Array, mul(16)],
+     ['i32x4.mul', Int32Array, mul(32)],
+     ['i64x2.mul', BigInt64Array, mul(64)],
+     ['i8x16.avgr_u', Uint8Array, avgr],
+     ['i16x8.avgr_u', Uint16Array, avgr],
+     ['i8x16.max_s', Int8Array, max_s(8)],
+     ['i8x16.max_u', Uint8Array, max_u(8)],
+     ['i8x16.min_s', Int8Array, min_s(8)],
+     ['i8x16.min_u', Uint8Array, min_u(8)],
+     ['i16x8.max_s', Int16Array, max_s(16)],
+     ['i16x8.max_u', Uint16Array, max_u(16)],
+     ['i16x8.min_s', Int16Array, min_s(16)],
+     ['i16x8.min_u', Uint16Array, min_u(16)],
+     ['i32x4.max_s', Int32Array, max_s(32)],
+     ['i32x4.max_u', Uint32Array, max_u(32)],
+     ['i32x4.min_s', Int32Array, min_s(32)],
+     ['i32x4.min_u', Uint32Array, min_u(32)],
+     ['v128.and', Uint8Array, and],
+     ['v128.or', Uint8Array, or],
+     ['v128.xor', Uint8Array, xor],
+     ['v128.andnot', Uint8Array, andnot],
+     ['f32x4.add', Float32Array, fadd],
+     ['f32x4.sub', Float32Array, fsub],
+     ['f32x4.mul', Float32Array, fmul],
+     ['f32x4.div', Float32Array, fdiv],
+     ['f32x4.min', Float32Array, fmin],
+     ['f32x4.max', Float32Array, fmax],
+     ['f64x2.add', Float64Array, dadd],
+     ['f64x2.sub', Float64Array, dsub],
+     ['f64x2.mul', Float64Array, dmul],
+     ['f64x2.div', Float64Array, ddiv],
+     ['f64x2.min', Float64Array, dmin],
+     ['f64x2.max', Float64Array, dmax],
+     ['i8x16.eq', Int8Array, eq(-1)],
+     ['i8x16.ne', Int8Array, ne(-1)],
+     ['i8x16.lt_s', Int8Array, lt(-1)],
+     ['i8x16.gt_s', Int8Array, gt(-1)],
+     ['i8x16.le_s', Int8Array, le(-1)],
+     ['i8x16.ge_s', Int8Array, ge(-1)],
+     ['i8x16.gt_u', Uint8Array, gt(0xFF)],
+     ['i8x16.ge_u', Uint8Array, ge(0xFF)],
+     ['i8x16.lt_u', Uint8Array, lt(0xFF)],
+     ['i8x16.le_u', Uint8Array, le(0xFF)],
+     ['i16x8.eq', Int16Array, eq(-1)],
+     ['i16x8.ne', Int16Array, ne(-1)],
+     ['i16x8.lt_s', Int16Array, lt(-1)],
+     ['i16x8.gt_s', Int16Array, gt(-1)],
+     ['i16x8.le_s', Int16Array, le(-1)],
+     ['i16x8.ge_s', Int16Array, ge(-1)],
+     ['i16x8.gt_u', Uint16Array, gt(0xFFFF)],
+     ['i16x8.ge_u', Uint16Array, ge(0xFFFF)],
+     ['i16x8.lt_u', Uint16Array, lt(0xFFFF)],
+     ['i16x8.le_u', Uint16Array, le(0xFFFF)],
+     ['i32x4.eq', Int32Array, eq(-1)],
+     ['i32x4.ne', Int32Array, ne(-1)],
+     ['i32x4.lt_s', Int32Array, lt(-1)],
+     ['i32x4.gt_s', Int32Array, gt(-1)],
+     ['i32x4.le_s', Int32Array, le(-1)],
+     ['i32x4.ge_s', Int32Array, ge(-1)],
+     ['i32x4.gt_u', Uint32Array, gt(0xFFFFFFFF)],
+     ['i32x4.ge_u', Uint32Array, ge(0xFFFFFFFF)],
+     ['i32x4.lt_u', Uint32Array, lt(0xFFFFFFFF)],
+     ['i32x4.le_u', Uint32Array, le(0xFFFFFFFF)],
+     ['f32x4.eq', Float32Array, eq(-1), Int32Array],
+     ['f32x4.ne', Float32Array, ne(-1), Int32Array],
+     ['f32x4.lt', Float32Array, lt(-1), Int32Array],
+     ['f32x4.gt', Float32Array, gt(-1), Int32Array],
+     ['f32x4.le', Float32Array, le(-1), Int32Array],
+     ['f32x4.ge', Float32Array, ge(-1), Int32Array],
+     ['f64x2.eq', Float64Array, eq(-1), BigInt64Array],
+     ['f64x2.ne', Float64Array, ne(-1), BigInt64Array],
+     ['f64x2.lt', Float64Array, lt(-1), BigInt64Array],
+     ['f64x2.gt', Float64Array, gt(-1), BigInt64Array],
+     ['f64x2.le', Float64Array, le(-1), BigInt64Array],
+     ['f64x2.ge', Float64Array, ge(-1), BigInt64Array],
+     ['f32x4.pmin', Float32Array, pmin],
+     ['f32x4.pmax', Float32Array, pmax],
+     ['f64x2.pmin', Float64Array, pmin],
+     ['f64x2.pmax', Float64Array, pmax]]
+
+// Run v128 x v128 -> v128 tests.  Inputs are taken from the common input sets,
+// placed in memory, the test is run, and the result is extracted and checked.
+//
+// Runs tests with both operands as variables, either as constant, or both as
+// constant.  Also checks NaN behavior when appropriate.
+//
+// All runners that call this should use the same value for `ofParts` and should
+// pass different values for `part`, up to `ofParts` - 1.
+
+function runSimpleBinopTest(part, ofParts) {
+    let partSize = Math.ceil(binopTests.length / ofParts);
+    let start = part * partSize;
+    let end = Math.min((part + 1) * partSize, binopTests.length);
+    for ( let [op, memtype, rop, resultmemtype] of binopTests.slice(start, end) ) {
+        let inputs = cross(memtype.inputs);
+        let len = 16/memtype.BYTES_PER_ELEMENT;
+        let xs = iota(len);
+        let zero = xs.map(_ => 0);
+        let [ins, mem, resultmem] = insAndMemBinop(op, memtype, resultmemtype, inputs);
+        let bitsForF32 = memtype == Float32Array ? new Uint32Array(mem.buffer) : null;
+        let bitsForF64 = memtype == Float64Array ? new BigInt64Array(mem.buffer) : null;
+
+        function testIt(a,b,r) {
+            set(mem, len, a);
+            set(mem, len*2, b);
+            ins.exports.run();
+            assertSame(get(resultmem, 0, len), r);
+
+            // Test signalling NaN superficially by replacing QNaN inputs with SNaN
+            if (bitsForF32 != null && (a.some(isNaN) || b.some(isNaN))) {
+                a.forEach((x, i) => { if (isNaN(x)) { bitsForF32[len+i] = 0x7FA0_0000; } });
+                b.forEach((x, i) => { if (isNaN(x)) { bitsForF32[(len*2)+i] = 0x7FA0_0000; } });
+                ins.exports.run();
+                assertSame(get(resultmem, 0, len), r);
+            }
+            if (bitsForF64 != null && (a.some(isNaN) || b.some(isNaN))) {
+                a.forEach((x, i) => { if (isNaN(x)) { bitsForF64[len+i] = 0x7FF4_0000_0000_0000n; } });
+                b.forEach((x, i) => { if (isNaN(x)) { bitsForF64[(len*2)+i] = 0x7FF4_0000_0000_0000n; } });
+                ins.exports.run();
+                assertSame(get(resultmem, 0, len), r);
+            }
+        }
+
+        function testConstIt(i,r) {
+            set(resultmem, 0, zero);
+            ins.exports["run_constlhs" + i]();
+            assertSame(get(resultmem, 0, len), r);
+
+            set(resultmem, 0, zero);
+            ins.exports["run_constrhs" + i]();
+            assertSame(get(resultmem, 0, len), r);
+
+            set(resultmem, 0, zero);
+            ins.exports["run_constboth" + i]();
+            assertSame(get(resultmem, 0, len), r);
+        }
+
+        let i = 0;
+        for (let [a,b] of inputs) {
+            let r = xs.map((i) => rop(a[i], b[i]));
+            testIt(a,b,r);
+            testConstIt(i,r);
+            i++;
+        }
+    }
+}
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js b/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js
new file mode 100644
index 0000000000..ee770d16a9
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js
@@ -0,0 +1,697 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include this in the preamble, it must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+
+// Widening multiplication.
+// This is to be moved into ad-hack.js
+//
+//   (iMxN.extmul_{high,low}_iKxL_{s,u} A B)
+//
+// is equivalent to
+//
+//   (iMxN.mul (iMxN.extend_{high,low}_iKxL_{s,u} A)
+//             (iMxN.extend_{high,low}_iKxL_{s,u} B))
+//
+// It doesn't really matter what the inputs are, we can test this almost
+// blindly.
+//
+// Unfortunately, we do not yet have i64x2.extend_* so we introduce a helper
+// function to compute that.
+
+function makeExtMulTest(wide, narrow, part, signed) {
+    let widener = (wide == 'i64x2') ?
+        `call $${wide}_extend_${part}_${narrow}_${signed}` :
+        `${wide}.extend_${part}_${narrow}_${signed}`;
+    return `
+    (func (export "${wide}_extmul_${part}_${narrow}_${signed}")
+      (v128.store (i32.const 0)
+         (${wide}.extmul_${part}_${narrow}_${signed} (v128.load (i32.const 16))
+                                                     (v128.load (i32.const 32))))
+      (v128.store (i32.const 48)
+         (${wide}.mul (${widener} (v128.load (i32.const 16)))
+                      (${widener} (v128.load (i32.const 32))))))
+`;
+}
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func $i64x2_extend_low_i32x4_s (param v128) (result v128)
+      (i64x2.shr_s (i8x16.shuffle 16 16 16 16 0 1 2 3 16 16 16 16 4 5 6 7
+                                  (local.get 0)
+                                  (v128.const i32x4 0 0 0 0))
+                   (i32.const 32)))
+    (func $i64x2_extend_high_i32x4_s (param v128) (result v128)
+      (i64x2.shr_s (i8x16.shuffle 16 16 16 16 8 9 10 11 16 16 16 16 12 13 14 15
+                                  (local.get 0)
+                                  (v128.const i32x4 0 0 0 0))
+                   (i32.const 32)))
+    (func $i64x2_extend_low_i32x4_u (param v128) (result v128)
+      (i8x16.shuffle 0 1 2 3 16 16 16 16 4 5 6 7 16 16 16 16
+                     (local.get 0)
+                     (v128.const i32x4 0 0 0 0)))
+    (func $i64x2_extend_high_i32x4_u (param v128) (result v128)
+      (i8x16.shuffle 8 9 10 11 16 16 16 16 12 13 14 15 16 16 16 16
+                     (local.get 0)
+                     (v128.const i32x4 0 0 0 0)))
+    ${makeExtMulTest('i64x2','i32x4','low','s')}
+    ${makeExtMulTest('i64x2','i32x4','high','s')}
+    ${makeExtMulTest('i64x2','i32x4','low','u')}
+    ${makeExtMulTest('i64x2','i32x4','high','u')}
+    ${makeExtMulTest('i32x4','i16x8','low','s')}
+    ${makeExtMulTest('i32x4','i16x8','high','s')}
+    ${makeExtMulTest('i32x4','i16x8','low','u')}
+    ${makeExtMulTest('i32x4','i16x8','high','u')}
+    ${makeExtMulTest('i16x8','i8x16','low','s')}
+    ${makeExtMulTest('i16x8','i8x16','high','s')}
+    ${makeExtMulTest('i16x8','i8x16','low','u')}
+    ${makeExtMulTest('i16x8','i8x16','high','u')})`);
+
+for ( let [ WideArray, NarrowArray ] of
+      [ [ Int16Array, Int8Array ],
+        [ Int32Array, Int16Array ],
+        [ BigInt64Array, Int32Array ] ] ) {
+    let narrowMem = new NarrowArray(ins.exports.mem.buffer);
+    let narrowSrc0 = 16/NarrowArray.BYTES_PER_ELEMENT;
+    let narrowSrc1 = 32/NarrowArray.BYTES_PER_ELEMENT;
+    let wideMem = new WideArray(ins.exports.mem.buffer);
+    let wideElems = 16/WideArray.BYTES_PER_ELEMENT;
+    let wideRes0 = 0;
+    let wideRes1 = 48/WideArray.BYTES_PER_ELEMENT;
+    let zero = iota(wideElems).map(_ => 0);
+    for ( let part of [ 'low', 'high' ] ) {
+        for ( let signed of [ 's', 'u' ] ) {
+            for ( let [a, b] of cross(NarrowArray.inputs) ) {
+                set(wideMem, wideRes0, zero);
+                set(wideMem, wideRes1, zero);
+                set(narrowMem, narrowSrc0, a);
+                set(narrowMem, narrowSrc1, b);
+                let test = `${WideArray.layoutName}_extmul_${part}_${NarrowArray.layoutName}_${signed}`;
+                ins.exports[test]();
+                assertSame(get(wideMem, wideRes0, wideElems),
+                           get(wideMem, wideRes1, wideElems));
+            }
+        }
+    }
+}
+
+// Bitmask.  Ion constant folds, so test that too.
+// This is to be merged into the existing bitmask tests in ad-hack.js.
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "bitmask_i64x2") (result i32)
+      (i64x2.bitmask (v128.load (i32.const 16))))
+    (func (export "const_bitmask_i64x2") (result i32)
+      (i64x2.bitmask (v128.const i64x2 0xff337f8012345678 0x0001984212345678))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var mem64 = new BigUint64Array(ins.exports.mem.buffer);
+
+set(mem8, 16, iota(16).map((_) => 0));
+assertEq(ins.exports.bitmask_i64x2(), 0);
+
+set(mem64, 2, [0x8000000000000000n, 0x8000000000000000n]);
+assertEq(ins.exports.bitmask_i64x2(), 3);
+
+set(mem64, 2, [0x7FFFFFFFFFFFFFFFn, 0x7FFFFFFFFFFFFFFFn]);
+assertEq(ins.exports.bitmask_i64x2(), 0);
+
+set(mem64, 2, [0n, 0x8000000000000000n]);
+assertEq(ins.exports.bitmask_i64x2(), 2);
+
+set(mem64, 2, [0x8000000000000000n, 0n]);
+assertEq(ins.exports.bitmask_i64x2(), 1);
+
+assertEq(ins.exports.const_bitmask_i64x2(), 1);
+
+// Widen low/high.
+// This is to be merged into the existing widening tests in ad-hack.js.
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "extend_low_i32x4_s")
+      (v128.store (i32.const 0) (i64x2.extend_low_i32x4_s (v128.load (i32.const 16)))))
+    (func (export "extend_high_i32x4_s")
+      (v128.store (i32.const 0) (i64x2.extend_high_i32x4_s (v128.load (i32.const 16)))))
+    (func (export "extend_low_i32x4_u")
+      (v128.store (i32.const 0) (i64x2.extend_low_i32x4_u (v128.load (i32.const 16)))))
+    (func (export "extend_high_i32x4_u")
+      (v128.store (i32.const 0) (i64x2.extend_high_i32x4_u (v128.load (i32.const 16))))))`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+var mem64u = new BigUint64Array(ins.exports.mem.buffer);
+
+var as = [205, 1, 192, 3].map((x) => x << 24);
+set(mem32, 4, as);
+
+ins.exports.extend_low_i32x4_s();
+assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n])))
+
+ins.exports.extend_high_i32x4_s();
+assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n+2])));
+
+ins.exports.extend_low_i32x4_u();
+assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0)));
+
+ins.exports.extend_high_i32x4_u();
+assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));
+
+// Saturating rounding q-format multiplication.
+// This is to be moved into ad-hack.js
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "q15mulr_sat_s")
+      (v128.store (i32.const 0) (i16x8.q15mulr_sat_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+for ( let [as, bs] of cross(Int16Array.inputs) ) {
+    set(mem16, 8, as);
+    set(mem16, 16, bs);
+    ins.exports.q15mulr_sat_s();
+    assertSame(get(mem16, 0, 8),
+               iota(8).map((i) => signed_saturate((as[i] * bs[i] + 0x4000) >> 15, 16)));
+}
+
+
+// i64.all_true
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "i64_all_true") (result i32)
+      (i64x2.all_true (v128.load (i32.const 16)) ) ) )`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+
+set(mem32, 4, [0, 0, 0, 0]);
+assertEq(0, ins.exports.i64_all_true());
+set(mem32, 4, [1, 0, 0, 0]);
+assertEq(0, ins.exports.i64_all_true());
+set(mem32, 4, [1, 0, 0, 1]);
+assertEq(1, ins.exports.i64_all_true());
+set(mem32, 4, [0, 0, 10, 0]);
+assertEq(0, ins.exports.i64_all_true());
+set(mem32, 4, [0, -250, 1, 0]);
+assertEq(1, ins.exports.i64_all_true());
+set(mem32, 4, [-1, -1, -1, -1]);
+assertEq(1, ins.exports.i64_all_true());
+
+if (this.wasmSimdAnalysis && wasmCompileMode() == "ion") {
+  const positive =
+      wasmCompile(
+          `(module
+              (memory (export "mem") 1 1)
+              (func $f (param v128) (result i32)
+                  (if (result i32) (i64x2.all_true (local.get 0))
+                      (i32.const 42)
+                      (i32.const 37)))
+              (func (export "run") (result i32)
+                (call $f (v128.load (i32.const 16)))))`);
+  assertEq(wasmSimdAnalysis(), "simd128-to-scalar-and-branch -> folded");
+
+  const negative =
+      wasmCompile(
+          `(module
+              (memory (export "mem") 1 1)
+              (func $f (param v128) (result i32)
+                  (if (result i32) (i32.eqz (i64x2.all_true (local.get 0)))
+                      (i32.const 42)
+                      (i32.const 37)))
+              (func (export "run") (result i32)
+                (call $f (v128.load (i32.const 16)))))`);
+  assertEq(wasmSimdAnalysis(), "simd128-to-scalar-and-branch -> folded");
+
+  for ( let inp of [[1n, 2n], [4n, 0n], [0n, 0n]]) {
+      const all_true = inp.every(v => v != 0n)
+      let mem = new BigInt64Array(positive.exports.mem.buffer);
+      set(mem, 2, inp);
+      assertEq(positive.exports.run(), all_true ? 42 : 37);
+
+      mem = new BigInt64Array(negative.exports.mem.buffer);
+      set(mem, 2, inp);
+      assertEq(negative.exports.run(), all_true ? 37 : 42);
+  }
+
+  wasmCompile(`(module (func (result i32) (i64x2.all_true (v128.const i64x2 0 0))))`);
+  assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded");
+}
+
+
+// i64x2.eq and i64x2.ne
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "i64_eq")
+      (v128.store (i32.const 0)
+        (i64x2.eq (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
+    (func (export "i64_ne")
+      (v128.store (i32.const 0)
+         (i64x2.ne (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) )`);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+set(mem64, 2, [0n, 1n, 0n, 1n]);
+ins.exports.i64_eq();
+assertSame(get(mem64, 0, 2), [-1n, -1n]);
+ins.exports.i64_ne();
+assertSame(get(mem64, 0, 2), [0n, 0n]);
+set(mem64, 2, [0x0n, -1n, 0x100000000n, -1n]);
+ins.exports.i64_eq();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+set(mem64, 2, [-1n, 0x0n, -1n, 0x100000000n]);
+ins.exports.i64_ne();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+
+
+// i64x2.lt, i64x2.gt, i64x2.le, and i64.ge
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "i64_lt_s")
+      (v128.store (i32.const 0)
+        (i64x2.lt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
+    (func (export "i64_gt_s")
+      (v128.store (i32.const 0)
+        (i64x2.gt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
+    (func (export "i64_le_s")
+      (v128.store (i32.const 0)
+        (i64x2.le_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
+    (func (export "i64_ge_s")
+      (v128.store (i32.const 0)
+        (i64x2.ge_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) )`);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+set(mem64, 2, [0n, 1n, 1n, 0n]);
+ins.exports.i64_lt_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_gt_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+ins.exports.i64_le_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_ge_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+
+set(mem64, 2, [0n, -1n, -1n, 0n]);
+ins.exports.i64_lt_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+ins.exports.i64_gt_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_le_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+ins.exports.i64_ge_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+
+set(mem64, 2, [-2n, 2n, -1n, 1n]);
+ins.exports.i64_lt_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_gt_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+ins.exports.i64_le_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_ge_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+
+set(mem64, 2, [-2n, 1n, -2n, 1n]);
+ins.exports.i64_lt_s();
+assertSame(get(mem64, 0, 2), [0n, 0n]);
+ins.exports.i64_gt_s();
+assertSame(get(mem64, 0, 2), [0n, 0n]);
+ins.exports.i64_le_s();
+assertSame(get(mem64, 0, 2), [-1n, -1n]);
+ins.exports.i64_ge_s();
+assertSame(get(mem64, 0, 2), [-1n, -1n]);
+
+
+function wasmCompile(text) {
+  return new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(text)))
+}
+
+
+// i64x2.abs
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "i64_abs")
+      (v128.store (i32.const 0)
+        (i64x2.abs (v128.load (i32.const 16))) )) )`);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+set(mem64, 2, [-3n, 42n]);
+ins.exports.i64_abs();
+assertSame(get(mem64, 0, 2), [3n, 42n]);
+set(mem64, 2, [0n, -0x8000000000000000n]);
+ins.exports.i64_abs();
+assertSame(get(mem64, 0, 2), [0n, -0x8000000000000000n]);
+
+
+// Load lane
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    ${iota(16).map(i => `(func (export "load8_lane_${i}") (param i32)
+      (v128.store (i32.const 0)
+        (v128.load8_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16)))))
+    `).join('')}
+    ${iota(8).map(i => `(func (export "load16_lane_${i}") (param i32)
+    (v128.store (i32.const 0)
+      (v128.load16_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16)))))
+    `).join('')}
+    ${iota(4).map(i => `(func (export "load32_lane_${i}") (param i32)
+    (v128.store (i32.const 0)
+      (v128.load32_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16)))))
+    `).join('')}
+    ${iota(2).map(i => `(func (export "load64_lane_${i}") (param i32)
+    (v128.store (i32.const 0)
+      (v128.load64_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16)))))
+    `).join('')}
+    (func (export "load_lane_const_and_align")
+      (v128.store (i32.const 0)
+        (v128.load64_lane offset=32 1 (i32.const 1)
+          (v128.load32_lane offset=32 1 (i32.const 3)
+            (v128.load16_lane offset=32 0 (i32.const 5)
+              (v128.load (i32.const 16)))))
+      ))
+  )`);
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB];
+set(mem32, 4, as); set(mem8, 32, [0xC2]);
+
+ins.exports["load8_lane_0"](32);
+assertSame(get(mem32, 0, 4), [0x123456C2, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_1"](32);
+assertSame(get(mem32, 0, 4), [0x1234C278, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_2"](32);
+assertSame(get(mem32, 0, 4), [0x12C25678, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_3"](32);
+assertSame(get(mem32, 0, 4), [0xC2345678|0, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_4"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x234567C2, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_6"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23C26789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_9"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456C29A, 0x456789AB]);
+ins.exports["load8_lane_14"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0x45C289AB]);
+
+set(mem8, 32, [0xC2, 0xD1]);
+
+ins.exports["load16_lane_0"](32);
+assertSame(get(mem32, 0, 4), [0x1234D1C2, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load16_lane_1"](32);
+assertSame(get(mem32, 0, 4), [0xD1C25678|0, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load16_lane_2"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x2345D1C2, 0x3456789A, 0x456789AB]);
+ins.exports["load16_lane_5"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0xD1C2789A|0, 0x456789AB]);
+ins.exports["load16_lane_7"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0xD1C289AB|0]);
+
+set(mem32, 8, [0x16B5C3D0]);
+
+ins.exports["load32_lane_0"](32);
+assertSame(get(mem32, 0, 4), [0x16B5C3D0, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load32_lane_1"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x16B5C3D0, 0x3456789A, 0x456789AB]);
+ins.exports["load32_lane_2"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x16B5C3D0, 0x456789AB]);
+ins.exports["load32_lane_3"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0x16B5C3D0]);
+
+set(mem64, 4, [0x3300AA4416B5C3D0n]);
+
+ins.exports["load64_lane_0"](32);
+assertSame(get(mem64, 0, 2), [0x3300AA4416B5C3D0n, 0x456789AB3456789An]);
+ins.exports["load64_lane_1"](32);
+assertSame(get(mem64, 0, 2), [0x2345678912345678n, 0x3300AA4416B5C3D0n]);
+
+// .. (mis)align load lane
+
+var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB];
+set(mem32, 4, as); set(mem64, 4, [0x3300AA4416B5C3D0n, 0x300AA4416B5C3D03n]);
+
+ins.exports["load16_lane_5"](33);
+assertSame(get(mem32, 0, 4), [0x12345678,0x23456789,0xb5c3789a|0,0x456789ab]);
+ins.exports["load32_lane_1"](34);
+assertSame(get(mem32, 0, 4), [0x12345678, 0xaa4416b5|0,0x3456789a,0x456789ab]);
+ins.exports["load64_lane_0"](35);
+assertSame(get(mem64, 0, 2), [0x5c3d033300aa4416n, 0x456789ab3456789an]);
+
+ins.exports["load_lane_const_and_align"]();
+assertSame(get(mem32, 0, 4), [0x123400aa,0x00AA4416,0x4416b5c3,0x033300aa]);
+
+// Store lane
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    ${iota(16).map(i => `(func (export "store8_lane_${i}") (param i32) (param i32)
+      (v128.store8_lane ${i} (local.get 1) (v128.load (local.get 0))))
+    `).join('')}
+    ${iota(8).map(i => `(func (export "store16_lane_${i}") (param i32) (param i32)
+      (v128.store16_lane ${i} (local.get 1) (v128.load (local.get 0))))
+    `).join('')}
+    ${iota(4).map(i => `(func (export "store32_lane_${i}") (param i32) (param i32)
+      (v128.store32_lane ${i} (local.get 1) (v128.load (local.get 0))))
+    `).join('')}
+    ${iota(2).map(i => `(func (export "store64_lane_${i}") (param i32) (param i32)
+      (v128.store64_lane ${i} (local.get 1) (v128.load (local.get 0))))
+    `).join('')}
+    (func (export "store_lane_const_and_align")
+      (v128.store16_lane 1 (i32.const 33) (v128.load (i32.const 16)))
+      (v128.store32_lane 2 (i32.const 37) (v128.load (i32.const 16)))
+      (v128.store64_lane 0 (i32.const 47) (v128.load (i32.const 16)))
+    ))`);
+
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB];
+set(mem32, 4, as); set(mem32, 0, [0x7799AA00, 42, 3, 0]);
+
+ins.exports["store8_lane_0"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA78]);
+ins.exports["store8_lane_1"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA56]);
+ins.exports["store8_lane_2"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA34]);
+ins.exports["store8_lane_3"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA12]);
+ins.exports["store8_lane_5"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA67]);
+ins.exports["store8_lane_7"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA23]);
+ins.exports["store8_lane_8"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA9A]);
+ins.exports["store8_lane_15"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA45]);
+
+ins.exports["store16_lane_0"](16, 0); assertSame(get(mem32, 0, 1), [0x77995678]);
+ins.exports["store16_lane_1"](16, 0); assertSame(get(mem32, 0, 1), [0x77991234]);
+ins.exports["store16_lane_2"](16, 0); assertSame(get(mem32, 0, 1), [0x77996789]);
+ins.exports["store16_lane_5"](16, 0); assertSame(get(mem32, 0, 1), [0x77993456]);
+ins.exports["store16_lane_7"](16, 0); assertSame(get(mem32, 0, 1), [0x77994567]);
+
+ins.exports["store32_lane_0"](16, 0); assertSame(get(mem32, 0, 2), [0x12345678, 42]);
+ins.exports["store32_lane_1"](16, 0); assertSame(get(mem32, 0, 2), [0x23456789, 42]);
+ins.exports["store32_lane_2"](16, 0); assertSame(get(mem32, 0, 2), [0x3456789A, 42]);
+ins.exports["store32_lane_3"](16, 0); assertSame(get(mem32, 0, 2), [0x456789AB, 42]);
+
+ins.exports["store64_lane_0"](16, 0); assertSame(get(mem64, 0, 2), [0x2345678912345678n, 3]);
+ins.exports["store64_lane_1"](16, 0); assertSame(get(mem64, 0, 2), [0x456789AB3456789An, 3]);
+
+// .. (mis)align store lane
+
+var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB];
+set(mem32, 4, as); set(mem32, 0, [0x7799AA01, 42, 3, 0]);
+ins.exports["store16_lane_1"](16, 1); assertSame(get(mem32, 0, 2), [0x77123401, 42]);
+set(mem32, 0, [0x7799AA01, 42, 3, 0]);
+ins.exports["store32_lane_1"](16, 2); assertSame(get(mem32, 0, 2), [0x6789AA01, 0x2345]);
+set(mem32, 0, [0x7799AA01, 42, 5, 3]);
+ins.exports["store64_lane_0"](16, 1);
+assertSame(get(mem64, 0, 2), [0x4567891234567801n, 0x0300000023]);
+
+set(mem32, 4, [
+  0x12345678, 0x23456789, 0x3456789A, 0x456789AB,
+  0x55AA55AA, 0xCC44CC44, 0x55AA55AA, 0xCC44CC44,
+  0x55AA55AA, 0xCC44CC44, 0x55AA55AA, 0xCC44CC44,
+]);
+ins.exports["store_lane_const_and_align"]();
+assertSame(get(mem32, 8, 8), [
+  0x551234aa, 0x56789a44, 0x55aa5534, 0x7844cc44,
+  0x89123456|0, 0xcc234567|0, 0x55aa55aa, 0xcc44cc44|0,
+]);
+
+
+// i8x16.popcnt
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "i8x16_popcnt")
+      (v128.store (i32.const 0) (i8x16.popcnt (v128.load (i32.const 16)) )))
+  )`);
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+
+set(mem8, 16, [0, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 3, -1, 0xF0, 0x11, 0xFE, 0x0F, 0xE]);
+ins.exports.i8x16_popcnt();
+assertSame(get(mem8, 0, 16), [0,1,1,1,1,1,1,1,1,2,8,4,2,7,4,3]);
+
+
+/// Double-precision conversion instructions.
+/// f64x2.convert_low_i32x4_{u,s} / i32x4.trunc_sat_f64x2_{u,s}_zero
+/// f32x4.demote_f64x2_zero / f64x2.promote_low_f32x4
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "f64x2_convert_low_i32x4_s")
+      (v128.store (i32.const 0) (f64x2.convert_low_i32x4_s (v128.load (i32.const 16)) )))
+    (func (export "f64x2_convert_low_i32x4_u")
+      (v128.store (i32.const 0) (f64x2.convert_low_i32x4_u (v128.load (i32.const 16)) )))
+
+    (func (export "i32x4_trunc_sat_f64x2_s_zero")
+      (v128.store (i32.const 0) (i32x4.trunc_sat_f64x2_s_zero (v128.load (i32.const 16)) )))
+    (func (export "i32x4_trunc_sat_f64x2_u_zero")
+      (v128.store (i32.const 0) (i32x4.trunc_sat_f64x2_u_zero (v128.load (i32.const 16)) )))
+
+    (func (export "f32x4_demote_f64x2")
+      (v128.store (i32.const 0) (f32x4.demote_f64x2_zero (v128.load (i32.const 16)) )))
+    (func (export "f64x2_protomote_f32x4")
+      (v128.store (i32.const 0) (f64x2.promote_low_f32x4 (v128.load (i32.const 16)) )))
+  )`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var memU32 = new Uint32Array(ins.exports.mem.buffer);
+var memF32 = new Float32Array(ins.exports.mem.buffer);
+var memF64 = new Float64Array(ins.exports.mem.buffer);
+
+// f64x2.convert_low_i32x4_u / f64x2.convert_low_i32x4_s
+
+set(mem32, 4, [1, -2, 0, -2]);
+ins.exports.f64x2_convert_low_i32x4_s();
+assertSame(get(memF64, 0, 2), [1, -2]);
+set(mem32, 4, [-1, 0, 5, -212312312]);
+ins.exports.f64x2_convert_low_i32x4_s();
+assertSame(get(memF64, 0, 2), [-1, 0]);
+
+set(memU32, 4, [1, 4045646797, 4, 0]);
+ins.exports.f64x2_convert_low_i32x4_u();
+assertSame(get(memF64, 0, 2), [1, 4045646797]);
+set(memU32, 4, [0, 2, 4, 3]);
+ins.exports.f64x2_convert_low_i32x4_u();
+assertSame(get(memF64, 0, 2), [0, 2]);
+
+// i32x4.trunc_sat_f64x2_u_zero / i32x4.trunc_sat_f64x2_s_zero
+
+set(memF64, 2, [0,0])
+ins.exports.i32x4_trunc_sat_f64x2_s_zero();
+assertSame(get(mem32, 0, 4), [0,0,0,0]);
+ins.exports.i32x4_trunc_sat_f64x2_u_zero();
+assertSame(get(memU32, 0, 4), [0,0,0,0]);
+
+set(memF64, 2, [-1.23,65535.12])
+ins.exports.i32x4_trunc_sat_f64x2_s_zero();
+assertSame(get(mem32, 0, 4), [-1,65535,0,0]);
+set(memF64, 2, [1.99,65535.12])
+ins.exports.i32x4_trunc_sat_f64x2_u_zero();
+assertSame(get(memU32, 0, 4), [1,65535,0,0]);
+
+set(memF64, 2, [10e+100,-10e+100])
+ins.exports.i32x4_trunc_sat_f64x2_s_zero();
+assertSame(get(mem32, 0, 4), [0x7fffffff,-0x80000000,0,0]);
+ins.exports.i32x4_trunc_sat_f64x2_u_zero();
+assertSame(get(memU32, 0, 4), [0xffffffff,0,0,0]);
+
+// f32x4.demote_f64x2_zero
+
+set(memF64, 2, [1, 2])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [1,2,0,0]);
+
+set(memF64, 2, [-4e38, 4e38])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [-Infinity,Infinity,0,0]);
+
+set(memF64, 2, [-1e-46, 1e-46])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [1/-Infinity,0,0,0]);
+
+set(memF64, 2, [0, NaN])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [0, NaN,0,0]);
+
+set(memF64, 2, [Infinity, -Infinity])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [Infinity, -Infinity,0,0]);
+
+// f64x2.promote_low_f32x4
+
+set(memF32, 4, [4, 3, 1, 2])
+ins.exports.f64x2_protomote_f32x4();
+assertSame(get(memF64, 0, 2), [4, 3]);
+
+set(memF32, 4, [NaN, 0, 0, 0])
+ins.exports.f64x2_protomote_f32x4();
+assertSame(get(memF64, 0, 2), [NaN, 0]);
+
+set(memF32, 4, [Infinity, -Infinity, 0, 0])
+ins.exports.f64x2_protomote_f32x4();
+assertSame(get(memF64, 0, 2), [Infinity, -Infinity]);
+
+
+// i16x8.extadd_pairwise_i8x16_{s,u} / i32x4.extadd_pairwise_i16x8_{s,u}
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "i16x8_extadd_pairwise_i8x16_s")
+      (v128.store (i32.const 0) (i16x8.extadd_pairwise_i8x16_s (v128.load (i32.const 16)) )))
+    (func (export "i16x8_extadd_pairwise_i8x16_u")
+      (v128.store (i32.const 0) (i16x8.extadd_pairwise_i8x16_u (v128.load (i32.const 16)) )))
+
+    (func (export "i32x4_extadd_pairwise_i16x8_s")
+      (v128.store (i32.const 0) (i32x4.extadd_pairwise_i16x8_s (v128.load (i32.const 16)) )))
+    (func (export "i32x4_extadd_pairwise_i16x8_u")
+      (v128.store (i32.const 0) (i32x4.extadd_pairwise_i16x8_u (v128.load (i32.const 16)) )))
+  )`);
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var memU8 = new Uint8Array(ins.exports.mem.buffer);
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var memU16 = new Uint16Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var memU32 = new Uint32Array(ins.exports.mem.buffer);
+
+set(mem8, 16, [0, 0, 1, 1, 2, -2, 0, 42, 1, -101, 101, -1, 127, 125, -1, -2]);
+ins.exports.i16x8_extadd_pairwise_i8x16_s();
+assertSame(get(mem16, 0, 8), [0, 2, 0, 42, -100, 100, 252, -3]);
+
+set(memU8, 16, [0, 0, 1, 1, 2, 255, 0, 42, 0, 255, 254, 0, 127, 125, 255, 255]);
+ins.exports.i16x8_extadd_pairwise_i8x16_u();
+assertSame(get(memU16, 0, 8), [0, 2, 257, 42, 255, 254, 252, 510]);
+
+set(mem16, 8, [0, 0, 1, 1, 2, -2, -1, -2]);
+ins.exports.i32x4_extadd_pairwise_i16x8_s();
+assertSame(get(mem32, 0, 4), [0, 2, 0, -3]);
+set(mem16, 8, [0, 42, 1, -32760, 32766, -1, 32761, 32762]);
+ins.exports.i32x4_extadd_pairwise_i16x8_s();
+assertSame(get(mem32, 0, 4), [42, -32759, 32765, 65523]);
+
+set(memU16, 8, [0, 0, 1, 1, 2, 65535, 65535, 65535]);
+ins.exports.i32x4_extadd_pairwise_i16x8_u();
+assertSame(get(memU32, 0, 4), [0, 2, 65537, 131070]);
+set(memU16, 8, [0, 42, 0, 65535, 65534, 0, 32768, 32765]);
+ins.exports.i32x4_extadd_pairwise_i16x8_u();
+assertSame(get(memU32, 0, 4), [42, 65535, 65534, 65533]);
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js b/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js
new file mode 100644
index 0000000000..407b59476f
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js
@@ -0,0 +1,211 @@
+// |jit-test| skip-if: true
+
+// Common code for the ad-hack test cases.
+
+function get(arr, loc, len) {
+    let res = [];
+    for ( let i=0; i < len; i++ ) {
+        res.push(arr[loc+i]);
+    }
+    return res;
+}
+
+function getUnaligned(arr, width, loc, len) {
+    assertEq(arr.constructor, Uint8Array);
+    assertEq(width <= 4, true);
+    let res = [];
+    for ( let i=0; i < len; i++ ) {
+        let x = 0;
+        for ( let j=width-1; j >=0; j-- )
+            x = (x << 8) | arr[loc+i*width+j];
+        res.push(x);
+    }
+    return res;
+}
+
+function set(arr, loc, vals) {
+    for ( let i=0; i < vals.length; i++ ) {
+        if (arr instanceof BigInt64Array) {
+            arr[loc+i] = BigInt(vals[i]);
+        } else {
+            arr[loc+i] = vals[i];
+        }
+    }
+}
+
+function setUnaligned(arr, width, loc, vals) {
+    assertEq(arr.constructor, Uint8Array);
+    assertEq(width <= 4, true);
+    for ( let i=0; i < vals.length; i++ ) {
+        let x = vals[i];
+        for ( let j=0 ; j < width ; j++ ) {
+            arr[loc+i*width + j] = x & 255;
+            x >>= 8;
+        }
+    }
+}
+
+function equal(a, b) {
+    return a === b || isNaN(a) && isNaN(b);
+}
+
+function upd(xs, at, val) {
+    let ys = Array.from(xs);
+    ys[at] = val;
+    return ys;
+}
+
+// The following operations are not always generalized fully, they are just
+// functional enough for the existing test cases to pass.
+
+function sign_extend(n, bits) {
+    if (bits < 32) {
+        n = Number(n);
+        return (n << (32 - bits)) >> (32 - bits);
+    }
+    if (typeof n == "bigint") {
+        if (bits == 32)
+            return Number(n & 0xFFFF_FFFFn) | 0;
+        assertEq(bits, 64);
+        n = (n & 0xFFFF_FFFF_FFFF_FFFFn)
+        if (n > 0x7FFF_FFFF_FFFF_FFFFn)
+            return n - 0x1_0000_0000_0000_0000n;
+        return n;
+    }
+    assertEq(bits, 32);
+    return n|0;
+}
+
+function zero_extend(n, bits) {
+    if (bits < 32) {
+        return n & ((1 << bits) - 1);
+    }
+    if (n < 0)
+        n = 0x100000000 + n;
+    return n;
+}
+
+function signed_saturate(z, bits) {
+    let min = -(1 << (bits-1));
+    if (z <= min) {
+        return min;
+    }
+    let max = (1 << (bits-1)) - 1;
+    if (z > max) {
+        return max;
+    }
+    return z;
+}
+
+function unsigned_saturate(z, bits) {
+    if (z <= 0) {
+        return 0;
+    }
+    let max = (1 << bits) - 1;
+    if (z > max) {
+        return max;
+    }
+    return z;
+}
+
+function shl(count, width) {
+    if (width == 64) {
+        count = BigInt(count);
+        return (v) => {
+            v = BigInt(v);
+            if (v < 0)
+                v = (1n << 64n) + v;
+            let r = (v << count) & ((1n << 64n) - 1n);
+            if (r & (1n << 63n))
+                r = -((1n << 64n) - r);
+            return r;
+        }
+    } else {
+        return (v) => {
+            let mask = (width == 32) ? -1 : ((1 << width) - 1);
+            return (v << count) & mask;
+        }
+    }
+}
+
+function popcount(n) {
+  n = n - ((n >> 1) & 0x55555555)
+  n = (n & 0x33333333) + ((n >> 2) & 0x33333333)
+  return ((n + (n >> 4) & 0xF0F0F0F) * 0x1010101) >> 24
+}
+
+function jsValueToWasmName(x) {
+    if (typeof x == "number") {
+        if (x == 0) return 1 / x < 0 ? "-0" : "0";
+        if (isNaN(x)) return "+nan";
+        if (!isFinite(x)) return (x < 0 ? "-" : "+") + "inf";
+    }
+    return x;
+}
+
+// For each input array, a set of arrays of the proper length for v128, with
+// values in range but possibly of the wrong signedness (eg, for Int8Array, 128
+// is in range but is really -128).  Also a unary operator `rectify` that
+// transforms the value to the proper sign and bitwidth.
+
+Int8Array.inputs = [iota(16).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)),
+                    iota(16).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)),
+                    [1,2,128,127,1,4,128,127,1,2,129,125,1,2,254,0],
+                    [2,1,127,128,5,1,127,128,2,1,126,130,2,1,1,255],
+                    iota(16).map((x) => ((x + 37) * 8 + 12) % 256),
+                    iota(16).map((x) => ((x + 12) * 4 + 9) % 256)];
+Int8Array.rectify = (x) => sign_extend(x,8);
+Int8Array.layoutName = 'i8x16';
+
+Uint8Array.inputs = Int8Array.inputs;
+Uint8Array.rectify = (x) => zero_extend(x,8);
+Uint8Array.layoutName = 'i8x16';
+
+Int16Array.inputs = [iota(8).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)),
+                     iota(8).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)),
+                     [1,2,32768,32767,1,4,32768,32767],
+                     [2,1,32767,32768,5,1,32767,32768],
+                     [1,2,128,127,1,4,128,127].map((x) => (x << 8) + x*2),
+                     [2,1,127,128,1,1,128,128].map((x) => (x << 8) + x*3)];
+Int16Array.rectify = (x) => sign_extend(x,16);
+Int16Array.layoutName = 'i16x8';
+
+Uint16Array.inputs = Int16Array.inputs;
+Uint16Array.rectify = (x) => zero_extend(x,16);
+Uint16Array.layoutName = 'i16x8';
+
+Int32Array.inputs = [iota(4).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)),
+                     iota(4).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)),
+                     [1,2,32768 << 16,32767 << 16],
+                     [2,1,32767 << 16,32768 << 16],
+                     [1,2,128,127].map((x) => (x << 24) + (x << 8) + x*3),
+                     [2,1,127,128].map((x) => (x << 24) + (x << 8) + x*4)];
+Int32Array.rectify = (x) => sign_extend(x,32);
+Int32Array.layoutName = 'i32x4';
+
+Uint32Array.inputs = Int32Array.inputs;
+Uint32Array.rectify = (x) => zero_extend(x,32);
+Uint32Array.layoutName = 'i32x4';
+
+BigInt64Array.inputs = [[1,2],[2,1],[-1,-2],[-2,-1],[2n ** 32n, 2n ** 32n - 5n],
+                        [(2n ** 38n) / 5n, (2n ** 41n) / 7n],
+                        [-((2n ** 38n) / 5n), (2n ** 41n) / 7n]];
+BigInt64Array.rectify = (x) => BigInt(x);
+BigInt64Array.layoutName = 'i64x2';
+
+Float32Array.inputs = [[1, -1, 1e10, -1e10],
+                       [-1, -2, -1e10, 1e10],
+                       [5.1, -1.1, -4.3, -0],
+                       ...permute([1, -10, NaN, Infinity])];
+Float32Array.rectify = (x) => Math.fround(x);
+Float32Array.layoutName = 'f32x4';
+
+Float64Array.inputs = Float32Array.inputs.map((x) => x.slice(0, 2))
+Float64Array.rectify = (x) => x;
+Float64Array.layoutName = 'f64x2';
+
+// Tidy up all the inputs
+for ( let A of [Int8Array, Uint8Array, Int16Array, Uint16Array, Int32Array, Uint32Array, BigInt64Array,
+                Float32Array, Float64Array]) {
+    A.inputs = A.inputs.map((xs) => xs.map(A.rectify));
+}
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js
new file mode 100644
index 0000000000..f3406ac44a
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js
@@ -0,0 +1,7 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include these in the preamble, they must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+load(scriptdir + "ad-hack-binop-preamble.js")
+
+runSimpleBinopTest(0, 3);
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js
new file mode 100644
index 0000000000..e6d6f7e2fc
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js
@@ -0,0 +1,7 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include these in the preamble, they must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+load(scriptdir + "ad-hack-binop-preamble.js")
+
+runSimpleBinopTest(1, 3);
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js
new file mode 100644
index 0000000000..a196aa28fc
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js
@@ -0,0 +1,7 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include these in the preamble, they must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+load(scriptdir + "ad-hack-binop-preamble.js")
+
+runSimpleBinopTest(2, 3);
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js
new file mode 100644
index 0000000000..6e562a97a1
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js
@@ -0,0 +1,122 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include this in the preamble, it must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+
+// Simple unary operators.  Place parameter in memory at offset 16,
+// read the result at offset 0.
+
+function expandConstantUnopInputs(op, memtype, inputs) {
+    let s = '';
+    let ident = 0;
+    for ( let a of inputs ) {
+        let constval = `${memtype.layoutName} ${a.map(jsValueToWasmName).join(' ')}`;
+        s += `
+    (func (export "run_const${ident}")
+      (v128.store (i32.const 0)
+        (${op} (v128.const ${constval}))))
+`;
+        ident++;
+    }
+    return s;
+}
+
+function insAndMemUnop(op, memtype, resultmemtype, inputs) {
+    var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+
+    (func (export "run")
+      (v128.store (i32.const 0)
+        (call $doit (v128.load (i32.const 16)))))
+
+    (func $doit (param $a v128) (result v128)
+      (${op} (local.get $a)))
+
+    ${expandConstantUnopInputs(op, memtype, inputs)})`);
+    var mem = new memtype(ins.exports.mem.buffer);
+    var resultmem = !resultmemtype || memtype == resultmemtype ? mem : new resultmemtype(ins.exports.mem.buffer);
+    return [ins, mem, resultmem];
+}
+
+function ineg(bits) { return (a) => sign_extend(!a ? a : -a,bits) }
+function iabs(bits) { return (a) => zero_extend(a < 0 ? -a : a, bits) }
+function fneg(a) { return -a }
+function fabs(a) { return Math.abs(a) }
+function fsqrt(a) { return Math.fround(Math.sqrt(Math.fround(a))) }
+function dsqrt(a) { return Math.sqrt(a) }
+function bitnot(a) { return (~a) & 255 }
+function ffloor(x) { return Math.fround(Math.floor(x)) }
+function fceil(x) { return Math.fround(Math.ceil(x)) }
+function ftrunc(x) { return Math.fround(Math.sign(x)*Math.floor(Math.abs(x))) }
+function fnearest(x) { return Math.fround(Math.round(x)) }
+function dfloor(x) { return Math.floor(x) }
+function dceil(x) { return Math.ceil(x) }
+function dtrunc(x) { return Math.sign(x)*Math.floor(Math.abs(x)) }
+function dnearest(x) { return Math.round(x) }
+
+for ( let [op, memtype, rop, resultmemtype] of
+      [['i8x16.neg', Int8Array, ineg(8)],
+       ['i16x8.neg', Int16Array, ineg(16)],
+       ['i32x4.neg', Int32Array, ineg(32)],
+       ['i64x2.neg', BigInt64Array, ineg(64)],
+       ['i8x16.abs', Int8Array, iabs(8), Uint8Array],
+       ['i16x8.abs', Int16Array, iabs(16), Uint16Array],
+       ['i32x4.abs', Int32Array, iabs(32), Uint32Array],
+       ['f32x4.neg', Float32Array, fneg],
+       ['f64x2.neg', Float64Array, fneg],
+       ['f32x4.abs', Float32Array, fabs],
+       ['f64x2.abs', Float64Array, fabs],
+       ['f32x4.sqrt', Float32Array, fsqrt],
+       ['f64x2.sqrt', Float64Array, dsqrt],
+       ['f32x4.ceil', Float32Array, fceil],
+       ['f32x4.floor', Float32Array, ffloor],
+       ['f32x4.trunc', Float32Array, ftrunc],
+       ['f32x4.nearest', Float32Array, fnearest],
+       ['f64x2.ceil', Float64Array, dceil],
+       ['f64x2.floor', Float64Array, dfloor],
+       ['f64x2.trunc', Float64Array, dtrunc],
+       ['f64x2.nearest', Float64Array, dnearest],
+       ['v128.not', Uint8Array, bitnot],
+      ])
+{
+    let [ins, mem, resultmem] = insAndMemUnop(op, memtype, resultmemtype, memtype.inputs);
+    let len = 16/memtype.BYTES_PER_ELEMENT;
+    let xs = iota(len);
+    let zero = xs.map(_ => 0);
+    let bitsForF32 = memtype == Float32Array ? new Uint32Array(mem.buffer) : null;
+    let bitsForF64 = memtype == Float64Array ? new BigInt64Array(mem.buffer) : null;
+
+    function testIt(a, r) {
+        set(mem, len, a);
+        ins.exports.run();
+        assertSame(get(resultmem, 0, len), r);
+
+        // Test signalling NaN superficially by replacing QNaN inputs with SNaN
+        if (bitsForF32 != null && a.some(isNaN)) {
+            a.forEach((x, i) => { if (isNaN(x)) { bitsForF32[len+i] = 0x7FA0_0000; } });
+            ins.exports.run();
+            assertSame(get(resultmem, 0, len), r);
+        }
+        if (bitsForF64 != null && a.some(isNaN)) {
+            a.forEach((x, i) => { if (isNaN(x)) { bitsForF64[len+i] = 0x7FF4_0000_0000_0000n; } });
+            ins.exports.run();
+            assertSame(get(resultmem, 0, len), r);
+        }
+    }
+
+    function testConstIt(i,r) {
+        set(resultmem, 0, zero);
+        ins.exports["run_const" + i]();
+        assertSame(get(resultmem, 0, len), r);
+    }
+
+    let i = 0;
+    for (let a of memtype.inputs) {
+        let r = xs.map((i) => rop(a[i]));
+        testIt(a, r);
+        testConstIt(i, r);
+        i++;
+    }
+}
+
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack.js b/js/src/jit-test/tests/wasm/simd/ad-hack.js
new file mode 100644
index 0000000000..b64b11cf52
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack.js
@@ -0,0 +1,1747 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Ad-hoc test cases used during development.  Generally these are ordered from
+// easier toward harder.
+//
+// The test cases here are usually those that require some special processing.
+// Simple binary operators (v128 x v128 -> v128) and unary operators (v128 ->
+// v128) are tested in ad-hack-simple-binops*.js and ad-hack-simple-unops.js.
+
+// Do not include this in the preamble, it must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+
+// v128.store
+// oob store
+// v128.const
+
+for ( let offset of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) {
+    var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "f") (param $loc i32)
+      (v128.store offset=${offset} (local.get $loc) (v128.const i32x4 ${1+offset} 2 3 ${4+offset*2}))))`);
+    var mem8 = new Uint8Array(ins.exports.mem.buffer);
+    ins.exports.f(160);
+    assertSame(getUnaligned(mem8, 4, 160 + offset, 4), [1+offset, 2, 3, 4+offset*2]);
+
+    // OOB write should trap
+    assertErrorMessage(() => ins.exports.f(65536-15),
+                       WebAssembly.RuntimeError,
+                       /index out of bounds/)
+
+    // Ensure that OOB writes don't write anything: moved to simd-partial-oob-store.js
+}
+
+// v128.load
+// oob load
+// v128.store
+// temp register
+
+for ( let offset of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) {
+    var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "copy") (param $dest i32) (param $src i32)
+      (v128.store (local.get $dest) (v128.load offset=${offset} (local.get $src)))))`);
+    var mem32 = new Uint32Array(ins.exports.mem.buffer);
+    var mem8 = new Uint8Array(ins.exports.mem.buffer);
+    setUnaligned(mem8, 4, 4*4 + offset, [8+offset, 10, 12, 14+offset*2]);
+    ins.exports.copy(40*4, 4*4);
+    assertSame(get(mem32, 40, 4), [8+offset, 10, 12, 14+offset*2]);
+    assertErrorMessage(() => ins.exports.copy(40*4, 65536-15),
+                       WebAssembly.RuntimeError,
+                       /index out of bounds/);
+}
+
+// call [with register params]
+// parameters [in registers]
+// return [with register values]
+// locals
+//
+// local.get
+// local.set
+// v128.const
+// v128.store
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func $g (param $param v128) (result v128)
+      (local $tmp v128)
+      (local.set $tmp (local.get $param))
+      (local.get $tmp))
+    (func (export "f")
+      (v128.store (i32.const 160) (call $g (v128.const i32x4 1 2 3 4)))))`);
+var mem = new Uint32Array(ins.exports.mem.buffer);
+ins.exports.f();
+assertSame(get(mem, 40, 4), [1, 2, 3, 4]);
+
+// Same test but with local.tee
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func $g (param $param v128) (result v128)
+      (local $tmp v128)
+      (local.tee $tmp (local.get $param)))
+    (func (export "f")
+      (v128.store (i32.const 160) (call $g (v128.const i32x4 1 2 3 4)))))`);
+var mem = new Uint32Array(ins.exports.mem.buffer);
+ins.exports.f();
+assertSame(get(mem, 40, 4), [1, 2, 3, 4]);
+
+// Locals that end up on the stack.  Try to create unaligned placement (in the
+// baseline compiler anyway) by inserting i32 locals before or after and
+// inbetween the v128 ones and by having so many locals that we run out of
+// registers.
+
+var nlocals = 64;
+for ( let start of [0, 1]) {
+    let decl = "";
+    let set = "";
+    let sum = "(v128.const i32x4 0 0 0 0)";
+    var res = [0,0,0,0];
+    var locno = start;
+    for ( let i=start ; i < start + nlocals ; i++ ) {
+        decl += "(local v128) ";
+        set += `(local.set ${locno} (v128.const i32x4 ${i} ${i+1} ${i+2} ${i+3})) `;
+        sum = `(i32x4.add ${sum} (local.get ${locno}))`;
+        locno++;
+        res[0] += i;
+        res[1] += i+1;
+        res[2] += i+2;
+        res[3] += i+3;
+        if ((i % 5) == 3) {
+            decl += "(local i32) ";
+            locno++;
+        }
+    }
+    if (start)
+        decl = "(local i32) " + decl;
+    else
+        decl += "(local i32) ";
+    var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func $g (result v128)
+      ${decl}
+      ${set}
+      ${sum})
+    (func (export "f")
+      (v128.store (i32.const 160) (call $g))))`);
+
+    var mem = new Uint32Array(ins.exports.mem.buffer);
+    ins.exports.f();
+    assertSame(get(mem, 40, 4), res);
+}
+
+// Ditto parameters.  This is like the case above but values are passed rather
+// than set.
+//
+// call
+// call_indirect
+
+var nlocals = 64;
+for ( let start of [0, 1]) {
+    let decl = "";
+    let pass = "";
+    let sum = "(v128.const i32x4 0 0 0 0)";
+    var res = [0,0,0,0];
+    var locno = start;
+    for ( let i=start ; i < start + nlocals ; i++ ) {
+        decl += "(param v128) ";
+        pass += `(v128.const i32x4 ${i} ${i+1} ${i+2} ${i+3}) `;
+        sum = `(i32x4.add ${sum} (local.get ${locno}))`;
+        locno++;
+        res[0] += i;
+        res[1] += i+1;
+        res[2] += i+2;
+        res[3] += i+3;
+        if ((i % 5) == 3) {
+            decl += "(param i32) ";
+            pass += "(i32.const 0) ";
+            locno++;
+        }
+    }
+    if (start) {
+        decl = "(param i32) " + decl;
+        pass = "(i32.const 0) " + pass;
+    } else {
+        decl += "(param i32) ";
+        pass += "(i32.const 0) ";
+    }
+    var txt = `
+  (module
+    (memory (export "mem") 1 1)
+    (type $t1 (func ${decl} (result v128)))
+    (table funcref (elem $h))
+    (func $g ${decl} (result v128)
+      ${sum})
+    (func (export "f1")
+      (v128.store (i32.const 160) (call $g ${pass})))
+    (func $h ${decl} (result v128)
+      ${sum})
+    (func (export "f2")
+      (v128.store (i32.const 512) (call_indirect (type $t1) ${pass} (i32.const 0)))))`;
+    var ins = wasmEvalText(txt);
+
+    var mem = new Uint32Array(ins.exports.mem.buffer);
+    ins.exports.f1();
+    assertSame(get(mem, 40, 4), res);
+    ins.exports.f2();
+    assertSame(get(mem, 128, 4), res);
+}
+
+// Widening integer dot product
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "run")
+      (v128.store (i32.const 0)
+        (i32x4.dot_i16x8_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var xs = [5, 1, -4, 2, 20, -15, 12, 3];
+var ys = [6, 0, -7, 3, 8, -1, -3, 7];
+var ans = [xs[0]*ys[0] + xs[1]*ys[1],
+           xs[2]*ys[2] + xs[3]*ys[3],
+           xs[4]*ys[4] + xs[5]*ys[5],
+           xs[6]*ys[6] + xs[7]*ys[7]];
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+set(mem16, 8, xs);
+set(mem16, 16, ys);
+ins.exports.run();
+var result = get(mem32, 0, 4);
+assertSame(result, ans);
+
+// Splat, with and without constants (different code paths in ion)
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "splat_i8x16") (param $src i32)
+      (v128.store (i32.const 0) (i8x16.splat (local.get $src))))
+    (func (export "csplat_i8x16")
+      (v128.store (i32.const 0) (i8x16.splat (i32.const 37))))
+    (func (export "splat_i16x8") (param $src i32)
+      (v128.store (i32.const 0) (i16x8.splat (local.get $src))))
+    (func (export "csplat_i16x8")
+      (v128.store (i32.const 0) (i16x8.splat (i32.const 1175))))
+    (func (export "splat_i32x4") (param $src i32)
+      (v128.store (i32.const 0) (i32x4.splat (local.get $src))))
+    (func (export "csplat_i32x4")
+      (v128.store (i32.const 0) (i32x4.splat (i32.const 127639))))
+    (func (export "splat_i64x2") (param $src i64)
+      (v128.store (i32.const 0) (i64x2.splat (local.get $src))))
+    (func (export "csplat_i64x2")
+      (v128.store (i32.const 0) (i64x2.splat (i64.const 0x1234_5678_4365))))
+    (func (export "splat_f32x4") (param $src f32)
+      (v128.store (i32.const 0) (f32x4.splat (local.get $src))))
+    (func (export "csplat_f32x4")
+      (v128.store (i32.const 0) (f32x4.splat (f32.const 9121.25))))
+    (func (export "splat_f64x2") (param $src f64)
+      (v128.store (i32.const 0) (f64x2.splat (local.get $src))))
+    (func (export "csplat_f64x2")
+      (v128.store (i32.const 0) (f64x2.splat (f64.const 26789.125))))
+)`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+ins.exports.splat_i8x16(3);
+assertSame(get(mem8, 0, 16), iota(16).map(_=>3));
+ins.exports.csplat_i8x16();
+assertSame(get(mem8, 0, 16), iota(16).map(_=>37));
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+ins.exports.splat_i16x8(976);
+assertSame(get(mem16, 0, 8), iota(8).map(_=>976));
+ins.exports.csplat_i16x8();
+assertSame(get(mem16, 0, 8), iota(8).map(_=>1175));
+
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+ins.exports.splat_i32x4(147812);
+assertSame(get(mem32, 0, 4), [147812, 147812, 147812, 147812]);
+ins.exports.csplat_i32x4();
+assertSame(get(mem32, 0, 4), [127639, 127639, 127639, 127639]);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+ins.exports.splat_i64x2(147812n);
+assertSame(get(mem64, 0, 2), [147812, 147812]);
+ins.exports.csplat_i64x2();
+assertSame(get(mem64, 0, 2), [0x1234_5678_4365n, 0x1234_5678_4365n]);
+
+var memf32 = new Float32Array(ins.exports.mem.buffer);
+ins.exports.splat_f32x4(147812.5);
+assertSame(get(memf32, 0, 4), [147812.5, 147812.5, 147812.5, 147812.5]);
+ins.exports.csplat_f32x4();
+assertSame(get(memf32, 0, 4), [9121.25, 9121.25, 9121.25, 9121.25]);
+
+var memf64 = new Float64Array(ins.exports.mem.buffer);
+ins.exports.splat_f64x2(147812.5);
+assertSame(get(memf64, 0, 2), [147812.5, 147812.5]);
+ins.exports.csplat_f64x2();
+assertSame(get(memf64, 0, 2), [26789.125, 26789.125]);
+
+// AnyTrue.  Ion constant folds, so test that too.
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "anytrue_i8x16") (result i32)
+      (v128.any_true (v128.load (i32.const 16))))
+    (func (export "true_anytrue_i8x16") (result i32)
+      (v128.any_true (v128.const i8x16 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0)))
+    (func (export "false_anytrue_i8x16") (result i32)
+      (v128.any_true (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0))))`);
+
+var mem = new Uint8Array(ins.exports.mem.buffer);
+set(mem, 16, iota(16).map((_) => 0));
+assertEq(ins.exports.anytrue_i8x16(), 0);
+
+for ( let dope of [1, 7, 32, 195 ] ) {
+    set(mem, 16, iota(16).map((x) => x == 7 ? dope : 0));
+    assertEq(ins.exports.anytrue_i8x16(), 1);
+}
+
+assertEq(ins.exports.true_anytrue_i8x16(), 1);
+assertEq(ins.exports.false_anytrue_i8x16(), 0);
+
+// AllTrue.  Ion constant folds, so test that too.
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "alltrue_i8x16") (result i32)
+      (i8x16.all_true (v128.load (i32.const 16))))
+    (func (export "true_alltrue_i8x16") (result i32)
+      (i8x16.all_true (v128.const i8x16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)))
+    (func (export "false_alltrue_i8x16") (result i32)
+      (i8x16.all_true (v128.const i8x16 1 2 3 4 5 6 0 8 9 10 11 12 13 14 15 16)))
+    (func (export "alltrue_i16x8") (result i32)
+      (i16x8.all_true (v128.load (i32.const 16))))
+    (func (export "true_alltrue_i16x8") (result i32)
+      (i16x8.all_true (v128.const i16x8 1 2 3 4 5 6 7 8)))
+    (func (export "false_alltrue_i16x8") (result i32)
+      (i16x8.all_true (v128.const i16x8 1 2 3 4 5 0 7 8)))
+    (func (export "alltrue_i32x4") (result i32)
+      (i32x4.all_true (v128.load (i32.const 16))))
+    (func (export "true_alltrue_i32x4") (result i32)
+      (i32x4.all_true (v128.const i32x4 1 2 3 4)))
+    (func (export "false_alltrue_i32x4") (result i32)
+      (i32x4.all_true (v128.const i32x4 1 2 3 0))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+
+set(mem8, 16, iota(16).map((_) => 0));
+assertEq(ins.exports.alltrue_i8x16(), 0);
+assertEq(ins.exports.alltrue_i16x8(), 0);
+assertEq(ins.exports.alltrue_i32x4(), 0);
+
+set(mem8, 16, iota(16).map((_) => 1));
+assertEq(ins.exports.alltrue_i8x16(), 1);
+
+set(mem16, 8, iota(8).map((_) => 1));
+assertEq(ins.exports.alltrue_i16x8(), 1);
+
+set(mem32, 4, iota(4).map((_) => 1));
+assertEq(ins.exports.alltrue_i32x4(), 1);
+
+for ( let dope of [1, 7, 32, 195 ] ) {
+    set(mem8, 16, iota(16).map((x) => x == 7 ? 0 : dope));
+    assertEq(ins.exports.alltrue_i8x16(), 0);
+
+    set(mem16, 8, iota(8).map((x) => x == 4 ? 0 : dope));
+    assertEq(ins.exports.alltrue_i16x8(), 0);
+
+    set(mem32, 4, iota(4).map((x) => x == 2 ? 0 : dope));
+    assertEq(ins.exports.alltrue_i32x4(), 0);
+}
+
+assertEq(ins.exports.true_alltrue_i8x16(), 1);
+assertEq(ins.exports.false_alltrue_i8x16(), 0);
+assertEq(ins.exports.true_alltrue_i16x8(), 1);
+assertEq(ins.exports.false_alltrue_i16x8(), 0);
+assertEq(ins.exports.true_alltrue_i32x4(), 1);
+assertEq(ins.exports.false_alltrue_i32x4(), 0);
+
+// Bitmask.  Ion constant folds, so test that too.
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "bitmask_i8x16") (result i32)
+      (i8x16.bitmask (v128.load (i32.const 16))))
+    (func (export "const_bitmask_i8x16") (result i32)
+      (i8x16.bitmask (v128.const i8x16 0x80 0x7f 0xff 0x33 0x42 0x98 0x01 0x00
+                                       0x31 0xcc 0xdd 0x12 0xf0 0x40 0x02 0xa0)))
+    (func (export "bitmask_i16x8") (result i32)
+      (i16x8.bitmask (v128.load (i32.const 16))))
+    (func (export "const_bitmask_i16x8") (result i32)
+      (i16x8.bitmask (v128.const i16x8 0x7f80 0xff33 0x9842 0x0001 0xcc31 0x12dd 0x40f0 0xa002)))
+    (func (export "bitmask_i32x4") (result i32)
+      (i32x4.bitmask (v128.load (i32.const 16))))
+    (func (export "const_bitmask_i32x4") (result i32)
+      (i32x4.bitmask (v128.const i32x4 0xff337f80 0x00019842 0xcc3112dd 0xa00240f0))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+
+set(mem8, 16, iota(16).map((_) => 0));
+assertEq(ins.exports.bitmask_i8x16(), 0);
+assertEq(ins.exports.bitmask_i16x8(), 0);
+assertEq(ins.exports.bitmask_i32x4(), 0);
+
+set(mem8, 16, iota(16).map((_) => 0x80));
+assertEq(ins.exports.bitmask_i8x16(), 0xFFFF);
+
+set(mem8, 16, iota(16).map((_) => 0x7F));
+assertEq(ins.exports.bitmask_i8x16(), 0);
+
+set(mem8, 16, iota(16).map((i) => popcount(i) == 1 ? 0x80 : 0));
+assertEq(ins.exports.bitmask_i8x16(), (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8));
+
+assertEq(ins.exports.const_bitmask_i8x16(), 0x9625);
+
+set(mem16, 8, iota(8).map((i) => 0x8000))
+assertEq(ins.exports.bitmask_i16x8(), 0xFF)
+
+set(mem16, 8, iota(8).map((i) => 0x7FFF))
+assertEq(ins.exports.bitmask_i16x8(), 0)
+
+set(mem16, 8, iota(8).map((i) => popcount(i) == 1 ? 0x8000 : 0))
+assertEq(ins.exports.bitmask_i16x8(), (1 << 1) | (1 << 2) | (1 << 4));
+
+assertEq(ins.exports.const_bitmask_i16x8(), 0x96);
+
+set(mem32, 4, iota(4).map((_) => 0x80000000))
+assertEq(ins.exports.bitmask_i32x4(), 0xF);
+
+set(mem32, 4, iota(4).map((_) => 0x7FFFFFFF))
+assertEq(ins.exports.bitmask_i32x4(), 0);
+
+set(mem32, 4, iota(4).map((i) => popcount(i) == 1 ? 0x80000000 : 0))
+assertEq(ins.exports.bitmask_i32x4(), (1 << 1) | (1 << 2));
+
+assertEq(ins.exports.const_bitmask_i32x4(), 0xd);
+
+// Shifts
+//
+// lhs is v128 in memory
+// rhs is i32 (passed directly)
+// result is v128 in memory
+
+function shr(count, width) {
+    return (v) => {
+        if (count == 0)
+            return v;
+        if (width == 64) {
+            if (v < 0) {
+                // This basically mirrors what the SIMD code does, so if there's
+                // a bug there then there's a bug here too.  Seems OK though.
+                let s = 0x1_0000_0000_0000_0000n + BigInt(v);
+                let t = s / (1n << BigInt(count));
+                let u = ((1n << BigInt(count)) - 1n) * (2n ** BigInt(64-count));
+                let w = t + u;
+                return w - 0x1_0000_0000_0000_0000n;
+            }
+            return BigInt(v) / (1n << BigInt(count));
+        } else {
+            let mask = (width == 32) ? -1 : ((1 << width) - 1);
+            return (sign_extend(v, width) >> count) & mask;
+        }
+    }
+}
+
+function shru(count, width) {
+    if (width == 64) {
+        return (v) => {
+            if (count == 0)
+                return v;
+            if (v < 0) {
+                v = 0x1_0000_0000_0000_0000n + BigInt(v);
+            }
+            return BigInt(v) / (1n << BigInt(count));
+        }
+    } else {
+        return (v) => {
+            let mask = (width == 32) ? -1 : ((1 << width) - 1);
+            return (v >>> count) & mask;
+        }
+    }
+}
+
+var constantI8Shifts = "";
+for ( let i of iota(10).concat([-7]) ) {
+    constantI8Shifts += `
+    (func (export "shl_i8x16_${i}")
+      (v128.store (i32.const 0) (i8x16.shl (v128.load (i32.const 16)) (i32.const ${i}))))
+    (func (export "shr_i8x16_${i}")
+      (v128.store (i32.const 0) (i8x16.shr_s (v128.load (i32.const 16)) (i32.const ${i}))))
+    (func (export "shr_u8x16_${i}")
+      (v128.store (i32.const 0) (i8x16.shr_u (v128.load (i32.const 16)) (i32.const ${i}))))`;
+}
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "shl_i8x16") (param $count i32)
+      (v128.store (i32.const 0) (i8x16.shl (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shr_i8x16") (param $count i32)
+      (v128.store (i32.const 0) (i8x16.shr_s (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shr_u8x16") (param $count i32)
+      (v128.store (i32.const 0) (i8x16.shr_u (v128.load (i32.const 16)) (local.get $count))))
+    ${constantI8Shifts}
+    (func (export "shl_i16x8") (param $count i32)
+      (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shl_i16x8_3")
+      (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 3))))
+    (func (export "shl_i16x8_15")
+      (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 15))))
+    (func (export "shl_i16x8_16")
+      (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 16))))
+    (func (export "shl_i16x8_-15")
+      (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const -15))))
+    (func (export "shr_i16x8") (param $count i32)
+      (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shr_i16x8_3")
+      (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 3))))
+    (func (export "shr_i16x8_15")
+      (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 15))))
+    (func (export "shr_i16x8_16")
+      (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 16))))
+    (func (export "shr_i16x8_-15")
+      (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const -15))))
+    (func (export "shr_u16x8") (param $count i32)
+      (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shr_u16x8_3")
+      (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 3))))
+    (func (export "shr_u16x8_15")
+      (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 15))))
+    (func (export "shr_u16x8_16")
+      (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 16))))
+    (func (export "shr_u16x8_-15")
+      (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const -15))))
+    (func (export "shl_i32x4") (param $count i32)
+      (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shl_i32x4_12")
+      (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 12))))
+    (func (export "shl_i32x4_31")
+      (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 31))))
+    (func (export "shl_i32x4_32")
+      (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 32))))
+    (func (export "shl_i32x4_-27")
+      (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const -27))))
+    (func (export "shr_i32x4") (param $count i32)
+      (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shr_i32x4_12")
+      (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 12))))
+    (func (export "shr_i32x4_31")
+      (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 31))))
+    (func (export "shr_i32x4_32")
+      (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 32))))
+    (func (export "shr_i32x4_-27")
+      (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const -27))))
+    (func (export "shr_u32x4") (param $count i32)
+      (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shr_u32x4_12")
+      (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 12))))
+    (func (export "shr_u32x4_31")
+      (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 31))))
+    (func (export "shr_u32x4_32")
+      (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 32))))
+    (func (export "shr_u32x4_-27")
+      (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const -27))))
+    (func (export "shl_i64x2") (param $count i32)
+      (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shl_i64x2_27")
+      (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 27))))
+    (func (export "shl_i64x2_63")
+      (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 63))))
+    (func (export "shl_i64x2_64")
+      (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 64))))
+    (func (export "shl_i64x2_-231")
+      (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const -231))))
+    (func (export "shr_i64x2") (param $count i32)
+      (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shr_i64x2_27")
+      (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 27))))
+    (func (export "shr_i64x2_45")
+      (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 45))))
+    (func (export "shr_i64x2_63")
+      (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 63))))
+    (func (export "shr_i64x2_64")
+      (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 64))))
+    (func (export "shr_i64x2_-231")
+      (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const -231))))
+    (func (export "shr_i64x2_-1")
+      (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const -1))))
+    (func (export "shr_u64x2") (param $count i32)
+      (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (local.get $count))))
+    (func (export "shr_u64x2_27")
+      (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 27))))
+    (func (export "shr_u64x2_63")
+      (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 63))))
+    (func (export "shr_u64x2_64")
+      (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 64))))
+    (func (export "shr_u64x2_-231")
+      (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const -231)))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var as = [1, 2, 4, 8, 16, 32, 64, 128, 129, 130, 132, 136, 144, 160, 192, 255];
+
+set(mem8, 16, as);
+
+for (let [meth,op] of [["shl_i8x16",shl], ["shr_i8x16",shr], ["shr_u8x16",shru]]) {
+    for ( let i=0 ; i < 8 ; i++ ) {
+        ins.exports[meth](i);
+        assertSame(get(mem8, 0, 16), as.map(op(i, 8)))
+        ins.exports[meth + "_" + i]();
+        assertSame(get(mem8, 0, 16), as.map(op(i, 8)))
+    }
+
+    ins.exports[meth](1);
+    let a = get(mem8, 0, 16);
+    ins.exports[meth](9);
+    let b = get(mem8, 0, 16);
+    assertSame(a, b);
+    ins.exports[meth](-7);
+    let c = get(mem8, 0, 16);
+    assertSame(a, c);
+
+    ins.exports[meth + "_1"]();
+    let x = get(mem8, 0, 16);
+    ins.exports[meth + "_9"]();
+    let y = get(mem8, 0, 16);
+    ins.exports[meth + "_-7"]();
+    let z = get(mem8, 0, 16);
+    assertSame(x, y);
+    assertSame(x, z);
+}
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var as = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000];
+set(mem16, 8, as)
+
+ins.exports.shl_i16x8(2);
+var res = get(mem16, 0, 8);
+assertSame(res, as.map(shl(2, 16)))
+
+ins.exports.shl_i16x8(18);      // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+ins.exports.shl_i16x8(-14);      // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+for ( let shift of [3, 15, 16, -15] ) {
+    ins.exports["shl_i16x8_" + shift]();
+    assertSame(get(mem16, 0, 8), as.map(shl(shift & 15, 16)))
+}
+
+ins.exports.shr_i16x8(1);
+var res = get(mem16, 0, 8);
+assertSame(res, as.map(shr(1, 16)))
+
+ins.exports.shr_i16x8(17);      // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+ins.exports.shr_i16x8(-15);      // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+for ( let shift of [3, 15, 16, -15] ) {
+    ins.exports["shr_i16x8_" + shift]();
+    assertSame(get(mem16, 0, 8), as.map(shr(shift & 15, 16)))
+}
+
+ins.exports.shr_u16x8(1);
+var res = get(mem16, 0, 8);
+assertSame(res, as.map(shru(1, 16)))
+
+ins.exports.shr_u16x8(17);      // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+ins.exports.shr_u16x8(-15);      // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+for ( let shift of [3, 15, 16, -15] ) {
+    ins.exports["shr_u16x8_" + shift]();
+    assertSame(get(mem16, 0, 8), as.map(shru(shift & 15, 16)))
+}
+
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+var as = [5152, 6768, 7074, 800811];
+
+set(mem32, 4, as)
+ins.exports.shl_i32x4(2);
+var res = get(mem32, 0, 4);
+assertSame(res, as.map(shl(2, 32)))
+
+ins.exports.shl_i32x4(34);      // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+ins.exports.shl_i32x4(-30);      // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+for ( let shift of [12, 31, 32, -27] ) {
+    ins.exports["shl_i32x4_" + shift]();
+    assertSame(get(mem32, 0, 4), as.map(shl(shift & 31, 32)).map(x => x>>>0))
+}
+
+ins.exports.shr_i32x4(1);
+var res = get(mem32, 0, 4);
+assertSame(res, as.map(shr(1, 32)))
+
+ins.exports.shr_i32x4(33);      // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+ins.exports.shr_i32x4(-31);      // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+for ( let shift of [12, 31, 32, -27] ) {
+    ins.exports["shr_i32x4_" + shift]();
+    assertSame(get(mem32, 0, 4), as.map(shr(shift & 31, 32)))
+}
+
+ins.exports.shr_u32x4(1);
+var res = get(mem32, 0, 4);
+assertSame(res, as.map(shru(1, 32)))
+
+ins.exports.shr_u32x4(33);      // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+ins.exports.shr_u32x4(-31);      // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+for ( let shift of [12, 31, 32, -27] ) {
+    ins.exports["shr_u32x4_" + shift]();
+    assertSame(get(mem32, 0, 4), as.map(shru(shift & 31, 32)))
+}
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+var as = [50515253, -616263];
+
+set(mem64, 2, as)
+ins.exports.shl_i64x2(2);
+var res = get(mem64, 0, 2);
+assertSame(res, as.map(shl(2, 64)))
+
+ins.exports.shl_i64x2(66);      // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+ins.exports.shl_i64x2(-62);      // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+for ( let shift of [27, 63, 64, -231] ) {
+    ins.exports["shl_i64x2_" + shift]();
+    assertSame(get(mem64, 0, 2), as.map(shl(shift & 63, 64)))
+}
+
+ins.exports.shr_u64x2(1);
+var res = get(mem64, 0, 2);
+assertSame(res, as.map(shru(1, 64)))
+
+ins.exports.shr_u64x2(65);      // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+ins.exports.shr_u64x2(-63);      // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+for ( let shift of [27, 63, 64, -231] ) {
+    ins.exports["shr_u64x2_" + shift]();
+    assertSame(get(mem64, 0, 2), as.map(shru(shift & 63, 64)))
+}
+
+ins.exports.shr_i64x2(2);
+var res = get(mem64, 0, 2);
+assertSame(res, as.map(shr(2, 64)))
+
+ins.exports.shr_i64x2(66);      // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+ins.exports.shr_i64x2(-62);      // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+// The ion code generator has multiple paths here, for < 32 and >= 32
+for ( let shift of [27, 45, 63, 64, -1, -231] ) {
+    ins.exports["shr_i64x2_" + shift]();
+    assertSame(get(mem64, 0, 2), as.map(shr(shift & 63, 64)))
+}
+
+// Narrow
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "narrow_i16x8_s")
+      (v128.store (i32.const 0) (i8x16.narrow_i16x8_s (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+    (func (export "narrow_i16x8_u")
+      (v128.store (i32.const 0) (i8x16.narrow_i16x8_u (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+    (func (export "narrow_i32x4_s")
+      (v128.store (i32.const 0) (i16x8.narrow_i32x4_s (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+    (func (export "narrow_i32x4_u")
+      (v128.store (i32.const 0) (i16x8.narrow_i32x4_u (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem8u = new Uint8Array(ins.exports.mem.buffer);
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var mem16u = new Uint16Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+
+var as = [1, 267, 3987, 14523, 32768, 3, 312, 4876].map((x) => sign_extend(x, 16));
+var bs = [2, 312, 4876, 15987, 33777, 1, 267, 3987].map((x) => sign_extend(x, 16));
+
+set(mem16, 8, as);
+set(mem16, 16, bs);
+
+ins.exports.narrow_i16x8_s();
+var cs = as.concat(...bs).map((x) => signed_saturate(x, 8));
+assertSame(get(mem8, 0, 16), cs);
+
+ins.exports.narrow_i16x8_u();
+var cs = as.concat(...bs).map((x) => unsigned_saturate(x, 8));
+assertSame(get(mem8u, 0, 16), cs);
+
+var xs = [1, 3987, 14523, 32768].map((x) => x << 16).map((x) => sign_extend(x, 32));
+var ys = [2, 4876, 15987, 33777].map((x) => x << 16).map((x) => sign_extend(x, 32));
+
+set(mem32, 4, xs);
+set(mem32, 8, ys);
+
+ins.exports.narrow_i32x4_s();
+var cs = xs.concat(...ys).map((x) => signed_saturate(x, 16));
+assertSame(get(mem16, 0, 8), cs);
+
+ins.exports.narrow_i32x4_u();
+var cs = xs.concat(...ys).map((x) => unsigned_saturate(x, 16));
+assertSame(get(mem16u, 0, 8), cs);
+
+// Extend low/high
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "extend_low_i8x16_s")
+      (v128.store (i32.const 0) (i16x8.extend_low_i8x16_s (v128.load (i32.const 16)))))
+    (func (export "extend_high_i8x16_s")
+      (v128.store (i32.const 0) (i16x8.extend_high_i8x16_s (v128.load (i32.const 16)))))
+    (func (export "extend_low_i8x16_u")
+      (v128.store (i32.const 0) (i16x8.extend_low_i8x16_u (v128.load (i32.const 16)))))
+    (func (export "extend_high_i8x16_u")
+      (v128.store (i32.const 0) (i16x8.extend_high_i8x16_u (v128.load (i32.const 16)))))
+    (func (export "extend_low_i16x8_s")
+      (v128.store (i32.const 0) (i32x4.extend_low_i16x8_s (v128.load (i32.const 16)))))
+    (func (export "extend_high_i16x8_s")
+      (v128.store (i32.const 0) (i32x4.extend_high_i16x8_s (v128.load (i32.const 16)))))
+    (func (export "extend_low_i16x8_u")
+      (v128.store (i32.const 0) (i32x4.extend_low_i16x8_u (v128.load (i32.const 16)))))
+    (func (export "extend_high_i16x8_u")
+      (v128.store (i32.const 0) (i32x4.extend_high_i16x8_u (v128.load (i32.const 16))))))`);
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var mem16u = new Uint16Array(ins.exports.mem.buffer);
+var mem8 =  new Int8Array(ins.exports.mem.buffer);
+var as = [0, 1, 192, 3, 205, 5, 6, 133, 8, 9, 129, 11, 201, 13, 14, 255];
+
+set(mem8, 16, as);
+
+ins.exports.extend_low_i8x16_s();
+assertSame(get(mem16, 0, 8), iota(8).map((n) => sign_extend(as[n], 8)));
+
+ins.exports.extend_high_i8x16_s();
+assertSame(get(mem16, 0, 8), iota(8).map((n) => sign_extend(as[n+8], 8)));
+
+ins.exports.extend_low_i8x16_u();
+assertSame(get(mem16u, 0, 8), iota(8).map((n) => zero_extend(as[n], 8)));
+
+ins.exports.extend_high_i8x16_u();
+assertSame(get(mem16u, 0, 8), iota(8).map((n) => zero_extend(as[n+8], 8)));
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var mem32u = new Uint32Array(ins.exports.mem.buffer);
+
+var as = [0, 1, 192, 3, 205, 5, 6, 133].map((x) => x << 8);
+
+set(mem16, 8, as);
+
+ins.exports.extend_low_i16x8_s();
+assertSame(get(mem32, 0, 4), iota(4).map((n) => sign_extend(as[n], 16)));
+
+ins.exports.extend_high_i16x8_s();
+assertSame(get(mem32, 0, 4), iota(4).map((n) => sign_extend(as[n+4], 16)));
+
+ins.exports.extend_low_i16x8_u();
+assertSame(get(mem32u, 0, 4), iota(4).map((n) => zero_extend(as[n], 16)));
+
+ins.exports.extend_high_i16x8_u();
+assertSame(get(mem32u, 0, 4), iota(4).map((n) => zero_extend(as[n+4], 16)));
+
+
+// Extract lane.  Ion constant folds, so test that too.
+//
+// operand is v128 in memory (or constant)
+// lane index is immediate so we're testing something randomish but not zero
+// result is scalar (returned directly)
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "extract_i8x16_9") (result i32)
+      (i8x16.extract_lane_s 9 (v128.load (i32.const 16))))
+    (func (export "const_extract_i8x16_9") (result i32)
+      (i8x16.extract_lane_s 9 (v128.const i8x16 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16)))
+    (func (export "extract_u8x16_6") (result i32)
+      (i8x16.extract_lane_u 6 (v128.load (i32.const 16))))
+    (func (export "const_extract_u8x16_9") (result i32)
+      (i8x16.extract_lane_u 9 (v128.const i8x16 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16)))
+    (func (export "extract_i16x8_5") (result i32)
+      (i16x8.extract_lane_s 5 (v128.load (i32.const 16))))
+    (func (export "const_extract_i16x8_5") (result i32)
+      (i16x8.extract_lane_s 5 (v128.const i16x8 -1 -2 -3 -4 -5 -6 -7 -8)))
+    (func (export "extract_u16x8_3") (result i32)
+      (i16x8.extract_lane_u 3 (v128.load (i32.const 16))))
+    (func (export "const_extract_u16x8_3") (result i32)
+      (i16x8.extract_lane_u 3 (v128.const i16x8 -1 -2 -3 -4 -5 -6 -7 -8)))
+    (func (export "extract_i32x4_2") (result i32)
+      (i32x4.extract_lane 2 (v128.load (i32.const 16))))
+    (func (export "const_extract_i32x4_2") (result i32)
+      (i32x4.extract_lane 2 (v128.const i32x4 -1 -2 -3 -4)))
+    (func (export "extract_i64x2_1") (result i64)
+      (i64x2.extract_lane 1 (v128.load (i32.const 16))))
+    (func (export "const_extract_i64x2_1") (result i64)
+      (i64x2.extract_lane 1 (v128.const i64x2 -1 -2)))
+    (func (export "extract_f32x4_2") (result f32)
+      (f32x4.extract_lane 2 (v128.load (i32.const 16))))
+    (func (export "const_extract_f32x4_2") (result f32)
+      (f32x4.extract_lane 2 (v128.const f32x4 -1 -2 -3 -4)))
+    (func (export "extract_f64x2_1") (result f64)
+      (f64x2.extract_lane 1 (v128.load (i32.const 16))))
+    (func (export "const_extract_f64x2_1") (result f64)
+      (f64x2.extract_lane 1 (v128.const f64x2 -1 -2))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+var bs = as.map((x) => -x);
+
+set(mem8, 16, as)
+assertEq(ins.exports.extract_i8x16_9(), as[9]);
+
+set(mem8, 16, bs)
+assertEq(ins.exports.extract_u8x16_6(), 256 - as[6]);
+
+assertEq(ins.exports.const_extract_i8x16_9(), -10);
+assertEq(ins.exports.const_extract_u8x16_9(), 256-10);
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4, 5, 6, 7, 8];
+var bs = as.map((x) => -x);
+
+set(mem16, 8, as)
+assertEq(ins.exports.extract_i16x8_5(), as[5]);
+
+set(mem16, 8, bs)
+assertEq(ins.exports.extract_u16x8_3(), 65536 - as[3]);
+
+assertEq(ins.exports.const_extract_i16x8_5(), -6);
+assertEq(ins.exports.const_extract_u16x8_3(), 65536-4);
+
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4];
+
+set(mem32, 4, as)
+assertEq(ins.exports.extract_i32x4_2(), as[2]);
+
+assertEq(ins.exports.const_extract_i32x4_2(), -3);
+
+var mem32 = new Float32Array(ins.exports.mem.buffer);
+var as = [1.5, 2.5, 3.5, 4.5];
+
+set(mem32, 4, as)
+assertEq(ins.exports.extract_f32x4_2(), as[2]);
+
+assertEq(ins.exports.const_extract_f32x4_2(), -3);
+
+var mem64 = new Float64Array(ins.exports.mem.buffer);
+var as = [1.5, 2.5];
+
+set(mem64, 2, as)
+assertEq(ins.exports.extract_f64x2_1(), as[1]);
+
+assertEq(ins.exports.const_extract_f64x2_1(), -2);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+var as = [12345, 67890];
+
+set(mem64, 2, as)
+assertSame(ins.exports.extract_i64x2_1(), as[1]);
+
+assertEq(ins.exports.const_extract_i64x2_1(), -2n);
+
+// Replace lane
+//
+// operand 1 is v128 in memory
+// operand 2 is immediate scalar
+// lane index is immediate so we're testing something randomish but not zero
+// (note though that fp operations have special cases for zero)
+// result is v128 in memory
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "replace_i8x16_9") (param $value i32)
+      (v128.store (i32.const 0)
+        (i8x16.replace_lane 9 (v128.load (i32.const 16)) (local.get $value))))
+    (func (export "replace_i16x8_5") (param $value i32)
+      (v128.store (i32.const 0)
+        (i16x8.replace_lane 5 (v128.load (i32.const 16)) (local.get $value))))
+    (func (export "replace_i32x4_3") (param $value i32)
+      (v128.store (i32.const 0)
+        (i32x4.replace_lane 3 (v128.load (i32.const 16)) (local.get $value))))
+    (func (export "replace_i64x2_1") (param $value i64)
+      (v128.store (i32.const 0)
+        (i64x2.replace_lane 1 (v128.load (i32.const 16)) (local.get $value))))
+    (func (export "replace_f32x4_0") (param $value f32)
+      (v128.store (i32.const 0)
+        (f32x4.replace_lane 0 (v128.load (i32.const 16)) (local.get $value))))
+    (func (export "replace_f32x4_3") (param $value f32)
+      (v128.store (i32.const 0)
+        (f32x4.replace_lane 3 (v128.load (i32.const 16)) (local.get $value))))
+    (func (export "replace_f64x2_0") (param $value f64)
+      (v128.store (i32.const 0)
+        (f64x2.replace_lane 0 (v128.load (i32.const 16)) (local.get $value))))
+    (func (export "replace_f64x2_1") (param $value f64)
+      (v128.store (i32.const 0)
+        (f64x2.replace_lane 1 (v128.load (i32.const 16)) (local.get $value)))))`);
+
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+
+set(mem8, 16, as)
+ins.exports.replace_i8x16_9(42);
+assertSame(get(mem8, 0, 16), upd(as, 9, 42));
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4, 5, 6, 7, 8];
+
+set(mem16, 8, as)
+ins.exports.replace_i16x8_5(42);
+assertSame(get(mem16, 0, 8), upd(as, 5, 42));
+
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4];
+
+set(mem32, 4, as)
+ins.exports.replace_i32x4_3(42);
+assertSame(get(mem32, 0, 4), upd(as, 3, 42));
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+var as = [1, 2];
+
+set(mem64, 2, as)
+ins.exports.replace_i64x2_1(42n);
+assertSame(get(mem64, 0, 2), upd(as, 1, 42));
+
+var mem32 = new Float32Array(ins.exports.mem.buffer);
+var as = [1.5, 2.5, 3.5, 4.5];
+
+set(mem32, 4, as)
+ins.exports.replace_f32x4_0(42.5);
+assertSame(get(mem32, 0, 4), upd(as, 0, 42.5));
+
+set(mem32, 4, as)
+ins.exports.replace_f32x4_3(42.5);
+assertSame(get(mem32, 0, 4), upd(as, 3, 42.5));
+
+var mem64 = new Float64Array(ins.exports.mem.buffer);
+var as = [1.5, 2.5];
+
+set(mem64, 2, as)
+ins.exports.replace_f64x2_0(42.5);
+assertSame(get(mem64, 0, 2), upd(as, 0, 42.5));
+
+set(mem64, 2, as)
+ins.exports.replace_f64x2_1(42.5);
+assertSame(get(mem64, 0, 2), upd(as, 1, 42.5));
+
+// Load and splat
+//
+// Operand is memory address of scalar
+// Result is v128 in memory
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "load_splat_v8x16") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load8_splat (local.get $addr))))
+    (func (export "load_splat_v16x8") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load16_splat (local.get $addr))))
+    (func (export "load_splat_v32x4") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load32_splat (local.get $addr))))
+    (func (export "load_splat_v64x2") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load64_splat (local.get $addr)))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+mem8[37] = 42;
+ins.exports.load_splat_v8x16(37);
+assertSame(get(mem8, 0, 16), [42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42]);
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+mem16[37] = 69;
+ins.exports.load_splat_v16x8(37*2);
+assertSame(get(mem16, 0, 8), [69, 69, 69, 69, 69, 69, 69, 69]);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+mem32[37] = 83;
+ins.exports.load_splat_v32x4(37*4);
+assertSame(get(mem32, 0, 4), [83, 83, 83, 83]);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+mem64[37] = 83n;
+ins.exports.load_splat_v64x2(37*8);
+assertSame(get(mem64, 0, 2), [83, 83]);
+
+// Load and zero
+//
+// Operand is memory address of scalar
+// Result is v128 in memory
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "load32_zero") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load32_zero (local.get $addr))))
+    (func (export "load64_zero") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load64_zero (local.get $addr)))))`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+mem32[37] = 0x12345678;
+mem32[38] = 0xffffffff;
+mem32[39] = 0xfffffffe;
+mem32[40] = 0xfffffffd;
+ins.exports.load32_zero(37*4);
+assertSame(get(mem32, 0, 4), [0x12345678, 0, 0, 0]);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+mem64[37] = 0x12345678abcdef01n;
+mem64[38] = 0xffffffffffffffffn;
+ins.exports.load64_zero(37*8);
+assertSame(get(mem64, 0, 2), [0x12345678abcdef01n, 0n]);
+
+// Load and extend
+//
+// Operand is memory address of 64-bit scalar representing 8, 4, or 2 values
+// Result is v128 in memory
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "load8x8_s") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load8x8_s (local.get $addr))))
+    (func (export "load8x8_u") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load8x8_u (local.get $addr))))
+    (func (export "load16x4_s") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load16x4_s (local.get $addr))))
+    (func (export "load16x4_u") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load16x4_u (local.get $addr))))
+    (func (export "load32x2_s") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load32x2_s (local.get $addr))))
+    (func (export "load32x2_u") (param $addr i32)
+      (v128.store (i32.const 0) (v128.load32x2_u (local.get $addr)))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var mem16s = new Int16Array(ins.exports.mem.buffer);
+var mem16u = new Uint16Array(ins.exports.mem.buffer);
+var mem32s = new Int32Array(ins.exports.mem.buffer);
+var mem32u = new Uint32Array(ins.exports.mem.buffer);
+var mem64s = new BigInt64Array(ins.exports.mem.buffer);
+var mem64u = new BigUint64Array(ins.exports.mem.buffer);
+var xs = [42, 129, 2, 212, 44, 27, 12, 199];
+set(mem8, 48, xs);
+
+ins.exports.load8x8_s(48);
+assertSame(get(mem16s, 0, 8), xs.map((x) => sign_extend(x, 8)));
+
+ins.exports.load8x8_u(48);
+assertSame(get(mem16u, 0, 8), xs.map((x) => zero_extend(x, 8)));
+
+var xs = [(42 << 8) | 129, (212 << 8) | 2, (44 << 8) | 27, (199 << 8) | 12];
+set(mem16u, 24, xs);
+
+ins.exports.load16x4_s(48);
+assertSame(get(mem32s, 0, 4), xs.map((x) => sign_extend(x, 16)));
+
+ins.exports.load16x4_u(48);
+assertSame(get(mem32u, 0, 4), xs.map((x) => zero_extend(x, 16)));
+
+var xs = [5, -8];
+set(mem32u, 12, xs);
+
+ins.exports.load32x2_s(48);
+assertSame(get(mem64s, 0, 2), xs.map((x) => sign_extend(x, 32)));
+
+ins.exports.load32x2_u(48);
+assertSame(get(mem64s, 0, 2), xs.map((x) => zero_extend(x, 32)));
+
+// Vector select
+//
+// Operands and results are all in memory
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "bitselect_v128")
+      (v128.store (i32.const 0)
+        (v128.bitselect (v128.load (i32.const 16))
+                        (v128.load (i32.const 32))
+                        (v128.load (i32.const 48))))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+set(mem8, 16, iota(16).map((_) => 0xAA));
+set(mem8, 32, iota(16).map((_) => 0x55));
+
+set(mem8, 48, iota(16).map((_) => 0x99));
+ins.exports.bitselect_v128();
+assertSame(get(mem8, 0, 16), iota(16).map((_) => 0xCC));
+
+set(mem8, 48, iota(16).map((_) => 0x77));
+ins.exports.bitselect_v128();
+assertSame(get(mem8, 0, 16), iota(16).map((_) => 0x22));
+
+// Vector shuffle
+//
+// Operands and results are all in memory
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    ;; the result interleaves the low eight bytes of the inputs
+    (func (export "shuffle1")
+      (v128.store (i32.const 0)
+        (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23
+           (v128.load (i32.const 16))
+           (v128.load (i32.const 32)))))
+    ;; ditto the high eight bytes
+    (func (export "shuffle2")
+      (v128.store (i32.const 0)
+        (i8x16.shuffle 8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31
+           (v128.load (i32.const 16))
+           (v128.load (i32.const 32))))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var xs = iota(16).map((n) => 0xA0 + n);
+var ys = iota(16).map((n) => 0x50 + n);
+set(mem8, 16, xs);
+set(mem8, 32, ys);
+
+ins.exports.shuffle1();
+assertSame(get(mem8, 0, 16), iota(16).map((x) => ((x & 1) ? ys : xs)[x >>> 1]))
+
+ins.exports.shuffle2();
+assertSame(get(mem8, 0, 16), iota(32).map((x) => ((x & 1) ? ys : xs)[x >>> 1]).slice(16));
+
+// Vector swizzle (variable permute).
+//
+// Case 1: Operands and results are all in memory
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "swizzle")
+      (v128.store (i32.const 0)
+        (i8x16.swizzle (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+
+var xs = [100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115];
+set(mem8, 16, xs);
+
+set(mem8, 32, [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]);
+ins.exports.swizzle();
+assertSame(get(mem8, 0, 16), [101,100,103,102,105,104,107,106,109,108,111,110,113,112,115,114]);
+
+set(mem8, 32, [9,8,11,10,13,12,16,14,1,0,3,2,5,192,7,6]);
+ins.exports.swizzle();
+assertSame(get(mem8, 0, 16), [109,108,111,110,113,112,0,114,101,100,103,102,105,0,107,106]);
+
+// Case 2: The mask operand is a constant; the swizzle gets optimized into a
+// shuffle (also see ion-analysis.js).
+
+for ( let [mask, expected] of [[[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14],
+                                [101,100,103,102,105,104,107,106,109,108,111,110,113,112,115,114]],
+                               [[9,8,11,10,13,12,16,14,1,0,3,2,5,192,7,6],
+                                [109,108,111,110,113,112,0,114,101,100,103,102,105,0,107,106]]] ) {
+
+    let ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "swizzle")
+      (v128.store (i32.const 0)
+        (i8x16.swizzle (v128.load (i32.const 16)) (v128.const i8x16 ${mask.join(' ')})))))
+`);
+
+    let mem8 = new Uint8Array(ins.exports.mem.buffer);
+    set(mem8, 16, [100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115]);
+    ins.exports.swizzle();
+    assertSame(get(mem8, 0, 16), expected);
+}
+
+// Convert integer to floating point
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "convert_s")
+      (v128.store (i32.const 0)
+        (f32x4.convert_i32x4_s (v128.load (i32.const 16)))))
+    (func (export "convert_u")
+      (v128.store (i32.const 0)
+        (f32x4.convert_i32x4_u (v128.load (i32.const 16))))))`);
+
+var mem32s = new Int32Array(ins.exports.mem.buffer);
+var mem32f = new Float32Array(ins.exports.mem.buffer);
+var xs = [1, -9, 77987, -34512];
+
+set(mem32s, 4, xs);
+ins.exports.convert_s();
+assertSame(get(mem32f, 0, 4), xs);
+
+var mem32u = new Uint32Array(ins.exports.mem.buffer);
+var ys = xs.map((x) => x>>>0);
+
+set(mem32u, 4, ys);
+ins.exports.convert_u();
+assertSame(get(mem32f, 0, 4), ys.map(Math.fround));
+
+// Convert floating point to integer with saturating truncation
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "trunc_sat_s")
+      (v128.store (i32.const 0)
+        (i32x4.trunc_sat_f32x4_s (v128.load (i32.const 16)))))
+    (func (export "trunc_sat_u")
+      (v128.store (i32.const 0)
+        (i32x4.trunc_sat_f32x4_u (v128.load (i32.const 16))))))`);
+
+var mem32s = new Int32Array(ins.exports.mem.buffer);
+var mem32u = new Uint32Array(ins.exports.mem.buffer);
+var mem32f = new Float32Array(ins.exports.mem.buffer);
+var xs = [1.5, -9.5, 7.5e12, -8e13];
+
+set(mem32f, 4, xs);
+ins.exports.trunc_sat_s();
+assertSame(get(mem32s, 0, 4), [1, -9, 0x7FFFFFFF, -0x80000000]);
+
+var xs = [1.5, -9.5, 7.5e12, 812];
+set(mem32f, 4, xs);
+ins.exports.trunc_sat_u();
+assertSame(get(mem32u, 0, 4), [1, 0, 0xFFFFFFFF, 812]);
+
+var xs = [0, -0, 0x80860000, 0x100000000];
+set(mem32f, 4, xs);
+ins.exports.trunc_sat_u();
+assertSame(get(mem32u, 0, 4), [0, 0, 0x80860000, 0xFFFFFFFF]);
+
+// Loops and blocks.  This should at least test "sync" in the baseline compiler.
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func $f (param $count i32) (param $v v128) (result v128)
+      (local $tmp v128)
+      (block $B1
+        (loop $L1
+          (br_if $B1 (i32.eqz (local.get $count)))
+          (local.set $tmp (i32x4.add (local.get $tmp) (local.get $v)))
+          (local.set $count (i32.sub (local.get $count) (i32.const 1)))
+          (br $L1)))
+      (local.get $tmp))
+    (func (export "run") (param $count i32)
+      (v128.store (i32.const 0)
+        (call $f (local.get $count) (v128.load (i32.const 16))))))`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+set(mem32, 4, [1,2,3,4]);
+ins.exports.run(7);
+assertSame(get(mem32, 0, 4), [7,14,21,28]);
+
+// Lots of parameters, this should trigger stack parameter passing
+//
+// 10 parameters in memory, we load them and pass them and operate on them.
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func $f (param $v0 v128) (param $v1 v128) (param $v2 v128) (param $v3 v128) (param $v4 v128)
+             (param $v5 v128) (param $v6 v128) (param $v7 v128) (param $v8 v128) (param $v9 v128)
+             (result v128)
+      (i32x4.add (local.get $v0)
+        (i32x4.add (local.get $v1)
+          (i32x4.add (local.get $v2)
+            (i32x4.add (local.get $v3)
+              (i32x4.add (local.get $v4)
+                (i32x4.add (local.get $v5)
+                  (i32x4.add (local.get $v6)
+                    (i32x4.add (local.get $v7)
+                      (i32x4.add (local.get $v8) (local.get $v9)))))))))))
+    (func (export "run")
+      (v128.store (i32.const 0)
+        (call $f (v128.load (i32.const ${16*1}))
+                 (v128.load (i32.const ${16*2}))
+                 (v128.load (i32.const ${16*3}))
+                 (v128.load (i32.const ${16*4}))
+                 (v128.load (i32.const ${16*5}))
+                 (v128.load (i32.const ${16*6}))
+                 (v128.load (i32.const ${16*7}))
+                 (v128.load (i32.const ${16*8}))
+                 (v128.load (i32.const ${16*9}))
+                 (v128.load (i32.const ${16*10}))))))`);
+
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var sum = [0, 0, 0, 0];
+for ( let i=1; i <= 10; i++ ) {
+    let v = [1,2,3,4].map((x) => x*i);
+    set(mem32, 4*i, v);
+    for ( let j=0; j < 4; j++ )
+        sum[j] += v[j];
+}
+
+ins.exports.run();
+
+assertSame(get(mem32, 0, 4), sum);
+
+// Globals.
+//
+// We have a number of different code paths and representations and
+// need to test them all.
+//
+// Cases:
+//  - private global, mutable / immutable, initialized from constant or imported immutable global
+//  - exported global, mutable / immutable, initialized from constant or imported immutable global
+//  - imported global, mutable / immutable
+//  - imported global that's re-exported, mutable / immutable
+
+// Global used for initialization below.
+
+var init = (function () {
+    var ins = wasmEvalText(`
+      (module
+        (global (export "init") v128 (v128.const i32x4 9 8 7 6)))`);
+    return ins.exports;
+})();
+
+for ( let exportspec of ['', '(export "g")'] ) {
+
+    // Private/exported immutable initialized from constant
+
+    let ins1 = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (global $g ${exportspec} v128 (v128.const i32x4 9 8 7 6))
+    (func (export "get") (param $dest i32)
+      (v128.store (local.get $dest) (global.get $g))))`);
+
+    let mem1 = new Int32Array(ins1.exports.mem.buffer);
+    ins1.exports.get(0);
+    assertSame(get(mem1, 0, 4), [9, 8, 7, 6]);
+
+    // Private/exported mutable initialized from constant
+
+    let ins2 = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (global $g ${exportspec} (mut v128) (v128.const i32x4 9 8 7 6))
+    (func (export "put") (param $val i32)
+      (global.set $g (i32x4.splat (local.get $val))))
+    (func (export "get") (param $dest i32)
+      (v128.store (local.get $dest) (global.get $g))))`);
+
+    let mem2 = new Int32Array(ins2.exports.mem.buffer);
+    ins2.exports.get(0);
+    assertSame(get(mem2, 0, 4), [9, 8, 7, 6]);
+    ins2.exports.put(37);
+    ins2.exports.get(0);
+    assertSame(get(mem2, 0, 4), [37, 37, 37, 37]);
+
+    // Private/exported immutable initialized from imported immutable global
+
+    let ins3 = wasmEvalText(`
+  (module
+    (global $init (import "m" "init") v128)
+    (memory (export "mem") 1 1)
+    (global $g ${exportspec} v128 (global.get $init))
+    (func (export "get") (param $dest i32)
+      (v128.store (local.get $dest) (global.get $g))))`,
+                       {m:init});
+
+    let mem3 = new Int32Array(ins3.exports.mem.buffer);
+    ins3.exports.get(0);
+    assertSame(get(mem3, 0, 4), [9, 8, 7, 6]);
+
+    // Private/exported mutable initialized from imported immutable global
+
+    let ins4 = wasmEvalText(`
+  (module
+    (global $init (import "m" "init") v128)
+    (memory (export "mem") 1 1)
+    (global $g ${exportspec} (mut v128) (global.get $init))
+    (func (export "put") (param $val i32)
+      (global.set $g (i32x4.splat (local.get $val))))
+    (func (export "get") (param $dest i32)
+      (v128.store (local.get $dest) (global.get $g))))`,
+                       {m:init});
+
+    let mem4 = new Int32Array(ins4.exports.mem.buffer);
+    ins4.exports.get(0);
+    assertSame(get(mem4, 0, 4), [9, 8, 7, 6]);
+    ins4.exports.put(37);
+    ins4.exports.get(0);
+    assertSame(get(mem4, 0, 4), [37, 37, 37, 37]);
+
+    // Imported private/re-exported immutable
+
+    let ins5 = wasmEvalText(`
+  (module
+    (global $g ${exportspec} (import "m" "init") v128)
+    (memory (export "mem") 1 1)
+    (func (export "get") (param $dest i32)
+      (v128.store (local.get $dest) (global.get $g))))`,
+                       {m:init});
+
+    let mem5 = new Int32Array(ins5.exports.mem.buffer);
+    ins5.exports.get(0);
+    assertSame(get(mem5, 0, 4), [9, 8, 7, 6]);
+
+    // Imported private/re-exported mutable
+
+    let mutg = (function () {
+        var ins = wasmEvalText(`
+      (module
+        (global (export "mutg") (mut v128) (v128.const i32x4 19 18 17 16)))`);
+        return ins.exports;
+    })();
+
+    let ins6 = wasmEvalText(`
+  (module
+    (global $g ${exportspec} (import "m" "mutg") (mut v128))
+    (memory (export "mem") 1 1)
+    (func (export "put") (param $val i32)
+      (global.set $g (i32x4.splat (local.get $val))))
+    (func (export "get") (param $dest i32)
+      (v128.store (local.get $dest) (global.get $g))))`,
+                       {m:mutg});
+
+    let mem6 = new Int32Array(ins6.exports.mem.buffer);
+    ins6.exports.get(0);
+    assertSame(get(mem6, 0, 4), [19, 18, 17, 16]);
+    ins6.exports.put(37);
+    ins6.exports.get(0);
+    assertSame(get(mem6, 0, 4), [37, 37, 37, 37]);
+}
+
+// Imports and exports that pass and return v128
+
+var insworker = wasmEvalText(`
+  (module
+    (func (export "worker") (param v128) (result v128)
+      (i8x16.add (local.get 0) (v128.const i8x16 ${iota(16).join(' ')}))))`);
+
+var insrun = wasmEvalText(`
+  (module
+    (import "" "worker" (func $worker (param v128) (result v128)))
+    (memory (export "mem") 1 1)
+    (func (export "run") (param $srcloc i32) (param $destloc i32)
+      (v128.store (local.get $destloc)
+        (call $worker (v128.load (local.get $srcloc))))))`,
+                          {"":insworker.exports});
+
+var mem = new Uint8Array(insrun.exports.mem.buffer);
+var xs = iota(16).map((x) => x+5);
+set(mem, 0, xs);
+insrun.exports.run(0, 16);
+assertSame(get(mem, 16, 16), xs.map((x,i) => x+i))
+
+// Make sure JS<->wasm call guards are sensible.
+
+// Calling from JS to export that accepts v128.
+assertErrorMessage(() => insworker.exports.worker(),
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+
+// Calling from wasm with v128 to import that comes from JS.  The instantiation
+// will succeed even if the param type of the import is v128 (see "create a host
+// function" in the Wasm JSAPI spec), it is the act of invoking it that checks
+// that verboten types are not used (see "run a host function", ibid.).
+var badImporter = wasmEvalText(`
+  (module
+    (import "" "worker" (func $worker (param v128) (result v128)))
+    (func (export "run")
+      (drop (call $worker (v128.const i32x4 0 1 2 3)))))`,
+             {"":{worker: function(a) { return a; }}});
+
+assertErrorMessage(() => badImporter.exports.run(),
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+
+// Imports and exports that pass and return v128 as stack (not register) args.
+
+var exportWithStackArgs = wasmEvalText(`
+  (module
+    (func (export "worker") (param v128) (param v128) (param v128) (param v128)
+                            (param v128) (param v128) (param v128) (param v128)
+                            (param v128) (param v128) (param v128) (param v128)
+                            (param v128) (param v128)
+           (result v128 v128)
+      (i8x16.add (local.get 3) (local.get 12))
+      (local.get 7)))`);
+
+var importWithStackArgs = wasmEvalText(`
+  (module
+    (type $t1 (func (param v128) (param v128) (param v128) (param v128)
+                    (param v128) (param v128) (param v128) (param v128)
+                    (param v128) (param v128) (param v128) (param v128)
+                    (param v128) (param v128)
+                    (result v128 v128)))
+    (import "" "worker" (func $worker (type $t1)))
+    (memory (export "mem") 1 1)
+    (table funcref (elem $worker))
+    (func (export "run")
+      (i32.const 16)
+      (call_indirect (type $t1) (v128.const i32x4 1 1 1 1) (v128.const i32x4 2 2 2 2) (v128.const i32x4 3 3 3 3)
+                    (v128.const i32x4 4 4 4 4) (v128.const i32x4 5 5 5 5) (v128.const i32x4 6 6 6 6)
+                    (v128.const i32x4 7 7 7 7) (v128.const i32x4 8 8 8 8) (v128.const i32x4 9 9 9 9)
+                    (v128.const i32x4 10 10 10 10) (v128.const i32x4 11 11 11 11) (v128.const i32x4 12 12 12 12)
+                    (v128.const i32x4 13 13 13 13) (v128.const i32x4 14 14 14 14)
+           (i32.const 0))
+      drop
+      v128.store
+      (i32.const 0)
+      (call $worker (v128.const i32x4 1 1 1 1) (v128.const i32x4 2 2 2 2) (v128.const i32x4 3 3 3 3)
+                    (v128.const i32x4 4 4 4 4) (v128.const i32x4 5 5 5 5) (v128.const i32x4 6 6 6 6)
+                    (v128.const i32x4 7 7 7 7) (v128.const i32x4 8 8 8 8) (v128.const i32x4 9 9 9 9)
+                    (v128.const i32x4 10 10 10 10) (v128.const i32x4 11 11 11 11) (v128.const i32x4 12 12 12 12)
+                    (v128.const i32x4 13 13 13 13) (v128.const i32x4 14 14 14 14))
+      drop
+      v128.store))`,
+                                       {"": exportWithStackArgs.exports});
+
+var mem = new Int32Array(importWithStackArgs.exports.mem.buffer);
+importWithStackArgs.exports.run();
+assertSame(get(mem, 0, 4), [17, 17, 17, 17]);
+assertSame(get(mem, 4, 4), [17, 17, 17, 17]);
+
+// Imports and exports of v128 globals
+
+var insexporter = wasmEvalText(`
+  (module
+    (global (export "myglobal") (mut v128) (v128.const i8x16 ${iota(16).join(' ')})))`);
+
+var insimporter = wasmEvalText(`
+  (module
+    (import "m" "myglobal" (global $g (mut v128)))
+    (memory (export "mem") 1 1)
+    (func (export "run") (param $dest i32)
+      (v128.store (local.get $dest) (global.get $g))))`,
+                               {m:insexporter.exports});
+
+var mem = new Uint8Array(insimporter.exports.mem.buffer);
+insimporter.exports.run(16);
+assertSame(get(mem, 16, 16), iota(16));
+
+// Guards on accessing v128 globals from JS
+
+assertErrorMessage(() => insexporter.exports.myglobal.value = 0,
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+
+assertErrorMessage(function () { let v = insexporter.exports.myglobal.value },
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+
+// Multi-value cases + v128 parameters to if, block, loop
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func $mvreturn (result v128 v128 v128)
+      (v128.load (i32.const 16))
+      (v128.load (i32.const 0))
+      (v128.load (i32.const 32)))
+    (func (export "runreturn")
+      i32.const 48
+      (call $mvreturn)
+      i32x4.sub ;; [-20, -20, -20, -20]
+      i32x4.sub ;; [31, 32, 33, 34]
+      v128.store)
+    (func (export "runif") (param $cond i32)
+      i32.const 48
+      (v128.load (i32.const 0))
+      (v128.load (i32.const 16))
+      (if (param v128) (param v128) (result v128 v128)
+          (local.get $cond)
+          (then i32x4.add
+                (v128.load (i32.const 32)))
+          (else i32x4.sub
+                (v128.load (i32.const 0))))
+      i32x4.add
+      v128.store)
+    (func (export "runblock")
+      i32.const 48
+      (v128.load (i32.const 0))
+      (v128.load (i32.const 16))
+      (block (param v128 v128) (result v128 v128)
+          i32x4.add
+          (v128.load (i32.const 32)))
+      i32x4.add
+      v128.store)
+    (func (export "runloop") (param $count i32)
+      i32.const 48
+      (v128.load (i32.const 0))
+      (v128.load (i32.const 16))
+      (block $B (param v128 v128) (result v128 v128)
+        (loop $L (param v128 v128) (result v128 v128)
+          i32x4.add
+          (v128.load (i32.const 32))
+          (local.set $count (i32.sub (local.get $count) (i32.const 1)))
+          (br_if $B (i32.eqz (local.get $count)))
+          (br $L)))
+      i32x4.add
+      v128.store))`);
+
+var mem = new Int32Array(ins.exports.mem.buffer);
+set(mem, 0, [1, 2, 3, 4]);
+set(mem, 4, [11, 12, 13, 14]);
+set(mem, 8, [21, 22, 23, 24]);
+
+// Multi-value returns
+
+ins.exports.runreturn();
+assertSame(get(mem, 12, 4), [31, 32, 33, 34]);
+
+// Multi-parameters to and multi-returns from "if"
+
+// This should be vector@0 + vector@16 + vector@32
+ins.exports.runif(1);
+assertSame(get(mem, 12, 4),
+           [33, 36, 39, 42]);
+
+// This should be vector@0 - vector@16 + vector@0
+ins.exports.runif(0);
+assertSame(get(mem, 12, 4),
+           [-9, -8, -7, -6]);
+
+// This should be vector@0 + vector@16 + vector@32
+ins.exports.runblock();
+assertSame(get(mem, 12, 4),
+           [33, 36, 39, 42]);
+
+// This should be vector@0 + vector@16 + N * vector@32 where
+// N is the parameter to runloop.
+ins.exports.runloop(3);
+assertSame(get(mem, 12, 4),
+           [12+3*21, 14+3*22, 16+3*23, 18+3*24]);
diff --git a/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js
new file mode 100644
index 0000000000..af8269e190
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js
@@ -0,0 +1,584 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || !isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves for various SIMD conversion
+// operations. See README-codegen.md for general information about this type of
+// test case.
+
+// Note, these tests test the beginning of the output but not the end.
+
+// Currently AVX2 exhibits a defect when function uses its first v128 arg and
+// returns v128: the register allocator adds unneeded extra moves from xmm0,
+// then into different temporary, and then the latter temporary is used as arg.
+// In the tests below, to simplify things, don't use/ignore the first arg.
+// v128 OP v128 -> v128
+// inputs: [[complete-opname, expected-pattern], ...]
+function codegenTestX64_v128xv128_v128_avxhack(inputs, options = {}) {
+     for ( let [op, expected] of inputs ) {
+         codegenTestX64_adhoc(wrap(options, `
+         (func (export "f") (param v128 v128 v128) (result v128)
+           (${op} (local.get 1) (local.get 2)))`),
+                              'f',
+                              expected,
+                              options);
+     }
+}
+// (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect)
+// v128 OP const -> v128
+// inputs: [[complete-opname, const, expected-pattern], ...]
+function codegenTestX64_v128xLITERAL_v128_avxhack(inputs, options = {}) {
+     for ( let [op, const_, expected] of inputs ) {
+         codegenTestX64_adhoc(wrap(options, `
+         (func (export "f") (param v128 v128) (result v128)
+           (${op} (local.get 1) ${const_}))`),
+                              'f',
+                              expected,
+                              options);
+     }
+}
+// (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect)
+// const OP v128 -> v128
+// inputs: [[complete-opname, const, expected-pattern], ...]
+function codegenTestX64_LITERALxv128_v128_avxhack(inputs, options = {}) {
+     for ( let [op, const_, expected] of inputs ) {
+         codegenTestX64_adhoc(wrap(options, `
+         (func (export "f") (param v128 v128) (result v128)
+           (${op} ${const_} (local.get 1)))`),
+                              'f',
+                              expected,
+                              options);
+     }
+}
+
+// Utility function to test SIMD operations encoding, where the input argument
+// has the specified type (T).
+// inputs: [[type, complete-opname, expected-pattern], ...]
+function codegenTestX64_T_v128_avxhack(inputs, options = {}) {
+     for ( let [ty, op, expected] of inputs ) {
+         codegenTestX64_adhoc(wrap(options, `
+         (func (export "f") (param ${ty}) (result v128)
+           (${op} (local.get 0)))`),
+                              'f',
+                              expected,
+                              options);
+     }
+}
+
+// Machers for any 64- and 32-bit registers.
+var GPR_I64 = "%r\\w+";
+var GPR_I32 = "%(?:e\\w+|r\\d+d)";
+
+// Simple binary ops: e.g. add, sub, mul
+codegenTestX64_v128xv128_v128_avxhack(
+     [['i8x16.avgr_u',    `c5 f1 e0 c2               vpavgb %xmm2, %xmm1, %xmm0`],
+      ['i16x8.avgr_u',    `c5 f1 e3 c2               vpavgw %xmm2, %xmm1, %xmm0`],
+      ['i8x16.add',       `c5 f1 fc c2               vpaddb %xmm2, %xmm1, %xmm0`],
+      ['i8x16.add_sat_s', `c5 f1 ec c2               vpaddsb %xmm2, %xmm1, %xmm0`],
+      ['i8x16.add_sat_u', `c5 f1 dc c2               vpaddusb %xmm2, %xmm1, %xmm0`],
+      ['i8x16.sub',       `c5 f1 f8 c2               vpsubb %xmm2, %xmm1, %xmm0`],
+      ['i8x16.sub_sat_s', `c5 f1 e8 c2               vpsubsb %xmm2, %xmm1, %xmm0`],
+      ['i8x16.sub_sat_u', `c5 f1 d8 c2               vpsubusb %xmm2, %xmm1, %xmm0`],
+      ['i16x8.mul',       `c5 f1 d5 c2               vpmullw %xmm2, %xmm1, %xmm0`],
+      ['i16x8.min_s',     `c5 f1 ea c2               vpminsw %xmm2, %xmm1, %xmm0`],
+      ['i16x8.min_u',     `c4 e2 71 3a c2            vpminuw %xmm2, %xmm1, %xmm0`],
+      ['i16x8.max_s',     `c5 f1 ee c2               vpmaxsw %xmm2, %xmm1, %xmm0`],
+      ['i16x8.max_u',     `c4 e2 71 3e c2            vpmaxuw %xmm2, %xmm1, %xmm0`],
+      ['i32x4.add',       `c5 f1 fe c2               vpaddd %xmm2, %xmm1, %xmm0`],
+      ['i32x4.sub',       `c5 f1 fa c2               vpsubd %xmm2, %xmm1, %xmm0`],
+      ['i32x4.mul',       `c4 e2 71 40 c2            vpmulld %xmm2, %xmm1, %xmm0`],
+      ['i32x4.min_s',     `c4 e2 71 39 c2            vpminsd %xmm2, %xmm1, %xmm0`],
+      ['i32x4.min_u',     `c4 e2 71 3b c2            vpminud %xmm2, %xmm1, %xmm0`],
+      ['i32x4.max_s',     `c4 e2 71 3d c2            vpmaxsd %xmm2, %xmm1, %xmm0`],
+      ['i32x4.max_u',     `c4 e2 71 3f c2            vpmaxud %xmm2, %xmm1, %xmm0`],
+      ['i64x2.add',       `c5 f1 d4 c2               vpaddq %xmm2, %xmm1, %xmm0`],
+      ['i64x2.sub',       `c5 f1 fb c2               vpsubq %xmm2, %xmm1, %xmm0`],
+      ['i64x2.mul', `
+c5 e1 73 d1 20            vpsrlq \\$0x20, %xmm1, %xmm3
+66 0f f4 da               pmuludq %xmm2, %xmm3
+c5 81 73 d2 20            vpsrlq \\$0x20, %xmm2, %xmm15
+66 44 0f f4 f9            pmuludq %xmm1, %xmm15
+66 44 0f d4 fb            paddq %xmm3, %xmm15
+66 41 0f 73 f7 20         psllq \\$0x20, %xmm15
+c5 f1 f4 c2               vpmuludq %xmm2, %xmm1, %xmm0
+66 41 0f d4 c7            paddq %xmm15, %xmm0`],
+      ['f32x4.add',            `c5 f0 58 c2               vaddps %xmm2, %xmm1, %xmm0`],
+      ['f32x4.sub',            `c5 f0 5c c2               vsubps %xmm2, %xmm1, %xmm0`],
+      ['f32x4.mul',            `c5 f0 59 c2               vmulps %xmm2, %xmm1, %xmm0`],
+      ['f32x4.div',            `c5 f0 5e c2               vdivps %xmm2, %xmm1, %xmm0`],
+      ['f64x2.add',            `c5 f1 58 c2               vaddpd %xmm2, %xmm1, %xmm0`],
+      ['f64x2.sub',            `c5 f1 5c c2               vsubpd %xmm2, %xmm1, %xmm0`],
+      ['f64x2.mul',            `c5 f1 59 c2               vmulpd %xmm2, %xmm1, %xmm0`],
+      ['f64x2.div',            `c5 f1 5e c2               vdivpd %xmm2, %xmm1, %xmm0`],
+      ['i8x16.narrow_i16x8_s', `c5 f1 63 c2               vpacksswb %xmm2, %xmm1, %xmm0`],
+      ['i8x16.narrow_i16x8_u', `c5 f1 67 c2               vpackuswb %xmm2, %xmm1, %xmm0`],
+      ['i16x8.narrow_i32x4_s', `c5 f1 6b c2               vpackssdw %xmm2, %xmm1, %xmm0`],
+      ['i16x8.narrow_i32x4_u', `c4 e2 71 2b c2            vpackusdw %xmm2, %xmm1, %xmm0`],
+      ['i32x4.dot_i16x8_s',    `c5 f1 f5 c2               vpmaddwd %xmm2, %xmm1, %xmm0`]]);
+
+// Simple comparison ops
+codegenTestX64_v128xv128_v128_avxhack(
+     [['i8x16.eq', `c5 f1 74 c2               vpcmpeqb %xmm2, %xmm1, %xmm0`],
+      ['i8x16.ne', `
+c5 f1 74 c2               vpcmpeqb %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i8x16.lt_s', `c5 e9 64 c1               vpcmpgtb %xmm1, %xmm2, %xmm0`],
+      ['i8x16.gt_u', `
+c5 f1 de c2               vpmaxub %xmm2, %xmm1, %xmm0
+66 0f 74 c2               pcmpeqb %xmm2, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i16x8.eq', `c5 f1 75 c2               vpcmpeqw %xmm2, %xmm1, %xmm0`],
+      ['i16x8.ne', `
+c5 f1 75 c2               vpcmpeqw %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i16x8.le_s', `
+c5 f1 65 c2               vpcmpgtw %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i16x8.ge_u', `
+c4 e2 71 3a c2            vpminuw %xmm2, %xmm1, %xmm0
+66 0f 75 c2               pcmpeqw %xmm2, %xmm0`],
+      ['i32x4.eq', `c5 f1 76 c2               vpcmpeqd %xmm2, %xmm1, %xmm0`],
+      ['i32x4.ne', `
+c5 f1 76 c2               vpcmpeqd %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i32x4.lt_s', `c5 e9 66 c1               vpcmpgtd %xmm1, %xmm2, %xmm0`],
+      ['i32x4.gt_u', `
+c4 e2 71 3f c2            vpmaxud %xmm2, %xmm1, %xmm0
+66 0f 76 c2               pcmpeqd %xmm2, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i64x2.eq', `c4 e2 71 29 c2            vpcmpeqq %xmm2, %xmm1, %xmm0`],
+      ['i64x2.ne', `
+c4 e2 71 29 c2            vpcmpeqq %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i64x2.lt_s', `c4 e2 69 37 c1            vpcmpgtq %xmm1, %xmm2, %xmm0`],
+      ['i64x2.ge_s', `
+c4 e2 69 37 c1            vpcmpgtq %xmm1, %xmm2, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['f32x4.eq', `c5 f0 c2 c2 00            vcmpps \\$0x00, %xmm2, %xmm1, %xmm0`],
+      ['f32x4.lt', `c5 f0 c2 c2 01            vcmpps \\$0x01, %xmm2, %xmm1, %xmm0`],
+      ['f32x4.ge', `c5 e8 c2 c1 02            vcmpps \\$0x02, %xmm1, %xmm2, %xmm0`],
+      ['f64x2.eq', `c5 f1 c2 c2 00            vcmppd \\$0x00, %xmm2, %xmm1, %xmm0`],
+      ['f64x2.lt', `c5 f1 c2 c2 01            vcmppd \\$0x01, %xmm2, %xmm1, %xmm0`],
+      ['f64x2.ge', `c5 e9 c2 c1 02            vcmppd \\$0x02, %xmm1, %xmm2, %xmm0`],
+      ['f32x4.pmin', `c5 e8 5d c1               vminps %xmm1, %xmm2, %xmm0`],
+      ['f32x4.pmax', `c5 e8 5f c1               vmaxps %xmm1, %xmm2, %xmm0`],
+      ['f64x2.pmin', `c5 e9 5d c1               vminpd %xmm1, %xmm2, %xmm0`],
+      ['f64x2.pmax', `c5 e9 5f c1               vmaxpd %xmm1, %xmm2, %xmm0`],
+      ['i8x16.swizzle', `
+c5 69 dc 3d ${RIPRADDR}   vpaddusbx ${RIPR}, %xmm2, %xmm15
+c4 c2 71 00 c7            vpshufb %xmm15, %xmm1, %xmm0`],
+      ['i16x8.extmul_high_i8x16_s', `
+66 44 0f 3a 0f fa 08      palignr \\$0x08, %xmm2, %xmm15
+c4 42 79 20 ff            vpmovsxbw %xmm15, %xmm15
+66 0f 3a 0f c1 08         palignr \\$0x08, %xmm1, %xmm0
+c4 e2 79 20 c0            vpmovsxbw %xmm0, %xmm0
+66 41 0f d5 c7            pmullw %xmm15, %xmm0`],
+      ['i32x4.extmul_low_i16x8_u', `
+c5 71 e4 fa               vpmulhuw %xmm2, %xmm1, %xmm15
+c5 f1 d5 c2               vpmullw %xmm2, %xmm1, %xmm0
+66 41 0f 61 c7            punpcklwd %xmm15, %xmm0`],
+      ['i64x2.extmul_low_i32x4_s', `
+c5 79 70 f9 10            vpshufd \\$0x10, %xmm1, %xmm15
+c5 f9 70 c2 10            vpshufd \\$0x10, %xmm2, %xmm0
+66 41 0f 38 28 c7         pmuldq %xmm15, %xmm0`],
+      ['i16x8.q15mulr_sat_s', `
+c4 e2 71 0b c2            vpmulhrsw %xmm2, %xmm1, %xmm0
+c5 79 75 3d ${RIPRADDR}   vpcmpeqwx ${RIPR}, %xmm0, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+]);
+
+// Bitwise binary ops
+codegenTestX64_v128xv128_v128_avxhack(
+     [['v128.and', `c5 f1 db c2               vpand %xmm2, %xmm1, %xmm0`],
+      ['v128.andnot', `c5 e9 df c1               vpandn %xmm1, %xmm2, %xmm0`],
+      ['v128.or', `c5 f1 eb c2               vpor %xmm2, %xmm1, %xmm0`],
+      ['v128.xor', `c5 f1 ef c2               vpxor %xmm2, %xmm1, %xmm0`]]);
+
+
+// Replace lane ops.
+codegenTestX64_adhoc(`(module
+     (func (export "f") (param v128 v128 i32) (result v128)
+          (i8x16.replace_lane 7 (local.get 1) (local.get 2))))`, 'f', `
+c4 .. 71 20 .. 07         vpinsrb \\$0x07, ${GPR_I32}, %xmm1, %xmm0`);
+codegenTestX64_adhoc(`(module
+     (func (export "f") (param v128 v128 i32) (result v128)
+          (i16x8.replace_lane 3 (local.get 1) (local.get 2))))`, 'f', `
+(?:c4 .. 71|c5 f1) c4 .. 03            vpinsrw \\$0x03, ${GPR_I32}, %xmm1, %xmm0`);
+codegenTestX64_adhoc(`(module
+     (func (export "f") (param v128 v128 i32) (result v128)
+          (i32x4.replace_lane 2 (local.get 1) (local.get 2))))`, 'f', `
+c4 .. 71 22 .. 02         vpinsrd \\$0x02, ${GPR_I32}, %xmm1, %xmm0`);
+codegenTestX64_adhoc(`(module
+     (func (export "f") (param v128 v128 i64) (result v128)
+          (i64x2.replace_lane 1 (local.get 1) (local.get 2))))`, 'f', `
+c4 .. f1 22 .. 01         vpinsrq \\$0x01, ${GPR_I64}, %xmm1, %xmm0`);
+     
+                             
+if (isAvxPresent(2)) {
+     codegenTestX64_T_v128_avxhack(
+          [['i32', 'i8x16.splat', `
+c5 f9 6e ..               vmovd ${GPR_I32}, %xmm0
+c4 e2 79 78 c0            vpbroadcastb %xmm0, %xmm0`],
+           ['i32', 'i16x8.splat', `
+c5 f9 6e ..               vmovd ${GPR_I32}, %xmm0
+c4 e2 79 79 c0            vpbroadcastw %xmm0, %xmm0`],
+           ['i32', 'i32x4.splat', `
+c5 f9 6e ..               vmovd ${GPR_I32}, %xmm0
+c4 e2 79 58 c0            vpbroadcastd %xmm0, %xmm0`],
+           ['i64', 'i64x2.splat', `
+c4 e1 f9 6e ..            vmovq ${GPR_I64}, %xmm0
+c4 e2 79 59 c0            vpbroadcastq %xmm0, %xmm0`],
+           ['f32', 'f32x4.splat', `c4 e2 79 18 c0            vbroadcastss %xmm0, %xmm0`]], {log:true});
+
+     codegenTestX64_T_v128_avxhack(
+          [['i32', 'v128.load8_splat',
+            'c4 c2 79 78 04 ..         vpbroadcastbb \\(%r15,%r\\w+,1\\), %xmm0'],
+           ['i32', 'v128.load16_splat',
+            'c4 c2 79 79 04 ..         vpbroadcastww \\(%r15,%r\\w+,1\\), %xmm0'],
+           ['i32', 'v128.load32_splat',
+            'c4 c2 79 18 04 ..         vbroadcastssl \\(%r15,%r\\w+,1\\), %xmm0']], {memory: 1});
+}
+
+// Using VEX during shuffle ops
+codegenTestX64_v128xv128_v128_avxhack([
+     // Identity op on second argument should generate a move
+    ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15',
+     'c5 f9 6f c1               vmovdqa %xmm1, %xmm0'],
+
+     // Broadcast a byte from first argument
+    ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5',
+     `
+c5 f1 60 c1               vpunpcklbw %xmm1, %xmm1, %xmm0
+c5 fa 70 c0 55            vpshufhw \\$0x55, %xmm0, %xmm0
+c5 f9 70 c0 aa            vpshufd \\$0xAA, %xmm0, %xmm0`],
+
+     // Broadcast a word from first argument
+    ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5',
+     `
+c5 fb 70 c1 aa            vpshuflw \\$0xAA, %xmm1, %xmm0
+c5 f9 70 c0 00            vpshufd \\$0x00, %xmm0, %xmm0`],
+
+     // Permute words
+     ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13',
+`
+c5 fb 70 c1 b1            vpshuflw \\$0xB1, %xmm1, %xmm0
+c5 fa 70 c0 b1            vpshufhw \\$0xB1, %xmm0, %xmm0`],
+
+     // Permute doublewords
+     ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11',
+      'c5 f9 70 c1 b1            vpshufd \\$0xB1, %xmm1, %xmm0'],
+
+     // Interleave doublewords
+     ['i8x16.shuffle 0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23',
+      'c5 f1 62 c2               vpunpckldq %xmm2, %xmm1, %xmm0'],
+
+     // Interleave quadwords
+     ['i8x16.shuffle 24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15',
+      'c5 e9 6d c1               vpunpckhqdq %xmm1, %xmm2, %xmm0'],
+     
+     // Rotate right
+    ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12',
+     `c4 e3 71 0f c1 0d         vpalignr \\$0x0D, %xmm1, %xmm1, %xmm0`],
+    ['i8x16.shuffle 28 29 30 31 0 1 2 3 4 5 6 7 8 9 10 11',
+     `c4 e3 71 0f c2 0c         vpalignr \\$0x0C, %xmm2, %xmm1, %xmm0`]]);
+
+if (isAvxPresent(2)) {
+     codegenTestX64_v128xv128_v128_avxhack([
+          // Broadcast low byte from second argument
+          ['i8x16.shuffle 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
+           'c4 e2 79 78 c1            vpbroadcastb %xmm1, %xmm0'],
+
+          // Broadcast low word from third argument
+          ['i8x16.shuffle 16 17 16 17 16 17 16 17 16 17 16 17 16 17 16 17',
+          'c4 e2 79 79 c2            vpbroadcastw %xmm2, %xmm0'],
+
+          // Broadcast low doubleword from second argument
+          ['i8x16.shuffle 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3',
+           'c4 e2 79 58 c1            vpbroadcastd %xmm1, %xmm0']]);
+}
+
+// Testing AVX optimization where VPBLENDVB accepts four XMM registers as args.
+codegenTestX64_adhoc(
+     `(func (export "f") (param v128 v128 v128 v128) (result v128)
+        (i8x16.shuffle 0 17 2 3 4 5 6 7 24 25 26 11 12 13 30 15
+          (local.get 2)(local.get 3)))`,
+     'f',
+`
+66 0f 6f 0d ${RIPRADDR}   movdqax ${RIPR}, %xmm1
+c4 e3 69 4c c3 10         vpblendvb %xmm1, %xmm3, %xmm2, %xmm0`);
+
+// Constant arguments that are folded into the instruction
+codegenTestX64_v128xLITERAL_v128_avxhack(
+     [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 fc 05 ${RIPRADDR}   vpaddbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 f8 05 ${RIPRADDR}   vpsubbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 ec 05 ${RIPRADDR}   vpaddsbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 dc 05 ${RIPRADDR}   vpaddusbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 e8 05 ${RIPRADDR}   vpsubsbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 d8 05 ${RIPRADDR}   vpsubusbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 38 05 ${RIPRADDR} vpminsbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 da 05 ${RIPRADDR}   vpminubx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 3c 05 ${RIPRADDR} vpmaxsbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 de 05 ${RIPRADDR}   vpmaxubx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 74 05 ${RIPRADDR}   vpcmpeqbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+ c5 f1 74 05 ${RIPRADDR}   vpcmpeqbx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 64 05 ${RIPRADDR}   vpcmpgtbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+ c5 f1 64 05 ${RIPRADDR}   vpcmpgtbx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 63 05 ${RIPRADDR}  vpacksswbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 67 05 ${RIPRADDR}  vpackuswbx ${RIPR}, %xmm1, %xmm0`],
+ 
+      ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 fd 05 ${RIPRADDR}  vpaddwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 f9 05 ${RIPRADDR}  vpsubwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 d5 05 ${RIPRADDR}  vpmullwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 ed 05 ${RIPRADDR}  vpaddswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 dd 05 ${RIPRADDR}  vpadduswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 e9 05 ${RIPRADDR}  vpsubswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 d9 05 ${RIPRADDR}  vpsubuswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 ea 05 ${RIPRADDR}  vpminswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 3a 05 ${RIPRADDR} vpminuwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 ee 05 ${RIPRADDR}  vpmaxswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 3e 05 ${RIPRADDR} vpmaxuwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 75 05 ${RIPRADDR}  vpcmpeqwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+ c5 f1 75 05 ${RIPRADDR}  vpcmpeqwx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 65 05 ${RIPRADDR}  vpcmpgtwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+ c5 f1 65 05 ${RIPRADDR}  vpcmpgtwx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 6b 05 ${RIPRADDR}  vpackssdwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 2b 05 ${RIPRADDR} vpackusdwx ${RIPR}, %xmm1, %xmm0`],
+ 
+      ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
+       `c5 f1 fe 05 ${RIPRADDR}  vpadddx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.sub', '(v128.const i32x4 1 2 1 2)',
+       `c5 f1 fa 05 ${RIPRADDR}  vpsubdx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 40 05 ${RIPRADDR} vpmulldx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 39 05 ${RIPRADDR} vpminsdx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 3b 05 ${RIPRADDR} vpminudx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 3d 05 ${RIPRADDR} vpmaxsdx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 3f 05 ${RIPRADDR} vpmaxudx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
+       `c5 f1 76 05 ${RIPRADDR}  vpcmpeqdx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
+ c5 f1 76 05 ${RIPRADDR}  vpcmpeqdx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)',
+       `c5 f1 66 05 ${RIPRADDR}  vpcmpgtdx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', `
+ c5 f1 66 05 ${RIPRADDR}  vpcmpgtdx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
+       `c5 f1 f5 05 ${RIPRADDR}  vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
+ 
+      ['i64x2.add', '(v128.const i64x2 1 2)',
+       `c5 f1 d4 05 ${RIPRADDR}  vpaddqx ${RIPR}, %xmm1, %xmm0`],
+      ['i64x2.sub', '(v128.const i64x2 1 2)',
+       `c5 f1 fb 05 ${RIPRADDR}  vpsubqx ${RIPR}, %xmm1, %xmm0`],
+ 
+      ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 db 05 ${RIPRADDR}  vpandx ${RIPR}, %xmm1, %xmm0`],
+      ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 eb 05 ${RIPRADDR}  vporx ${RIPR}, %xmm1, %xmm0`],
+      ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 ef 05 ${RIPRADDR}  vpxorx ${RIPR}, %xmm1, %xmm0`],
+ 
+      ['f32x4.add', '(v128.const f32x4 1 2 3 4)',
+       `c5 f0 58 05 ${RIPRADDR}      vaddpsx ${RIPR}, %xmm1, %xmm0`],
+      ['f32x4.sub', '(v128.const f32x4 1 2 3 4)',
+       `c5 f0 5c 05 ${RIPRADDR}      vsubpsx ${RIPR}, %xmm1, %xmm0`],
+      ['f32x4.mul', '(v128.const f32x4 1 2 3 4)',
+       `c5 f0 59 05 ${RIPRADDR}      vmulpsx ${RIPR}, %xmm1, %xmm0`],
+      ['f32x4.div', '(v128.const f32x4 1 2 3 4)',
+       `c5 f0 5e 05 ${RIPRADDR}      vdivpsx ${RIPR}, %xmm1, %xmm0`],
+ 
+      ['f64x2.add', '(v128.const f64x2 1 2)',
+       `c5 f1 58 05 ${RIPRADDR}      vaddpdx ${RIPR}, %xmm1, %xmm0`],
+      ['f64x2.sub', '(v128.const f64x2 1 2)',
+       `c5 f1 5c 05 ${RIPRADDR}      vsubpdx ${RIPR}, %xmm1, %xmm0`],
+      ['f64x2.mul', '(v128.const f64x2 1 2)',
+       `c5 f1 59 05 ${RIPRADDR}      vmulpdx ${RIPR}, %xmm1, %xmm0`],
+      ['f64x2.div', '(v128.const f64x2 1 2)',
+       `c5 f1 5e 05 ${RIPRADDR}      vdivpdx ${RIPR}, %xmm1, %xmm0`],
+
+      ['f32x4.eq', '(v128.const f32x4 1 2 3 4)',
+       `c5 f0 c2 05 ${RIPRADDR} 00   vcmppsx \\$0x00, ${RIPR}, %xmm1, %xmm0`],
+      ['f32x4.ne', '(v128.const f32x4 1 2 3 4)',
+       `c5 f0 c2 05 ${RIPRADDR} 04   vcmppsx \\$0x04, ${RIPR}, %xmm1, %xmm0`],
+      ['f32x4.lt', '(v128.const f32x4 1 2 3 4)',
+       `c5 f0 c2 05 ${RIPRADDR} 01   vcmppsx \\$0x01, ${RIPR}, %xmm1, %xmm0`],
+      ['f32x4.le', '(v128.const f32x4 1 2 3 4)',
+       `c5 f0 c2 05 ${RIPRADDR} 02   vcmppsx \\$0x02, ${RIPR}, %xmm1, %xmm0`],
+
+      ['f64x2.eq', '(v128.const f64x2 1 2)',
+       `c5 f1 c2 05 ${RIPRADDR} 00  vcmppdx \\$0x00, ${RIPR}, %xmm1, %xmm0`],
+      ['f64x2.ne', '(v128.const f64x2 1 2)',
+       `c5 f1 c2 05 ${RIPRADDR} 04  vcmppdx \\$0x04, ${RIPR}, %xmm1, %xmm0`],
+      ['f64x2.lt', '(v128.const f64x2 1 2)',
+       `c5 f1 c2 05 ${RIPRADDR} 01  vcmppdx \\$0x01, ${RIPR}, %xmm1, %xmm0`],
+      ['f64x2.le', '(v128.const f64x2 1 2)',
+       `c5 f1 c2 05 ${RIPRADDR} 02  vcmppdx \\$0x02, ${RIPR}, %xmm1, %xmm0`]]);
+ 
+ // Commutative operations with constants on the lhs should generate the same
+ // code as with the constant on the rhs.
+ codegenTestX64_LITERALxv128_v128_avxhack(
+     [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 fc 05 ${RIPRADDR}  vpaddbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 ec 05 ${RIPRADDR}  vpaddsbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 dc 05 ${RIPRADDR}  vpaddusbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 38 05 ${RIPRADDR} vpminsbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 da 05 ${RIPRADDR}  vpminubx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 3c 05 ${RIPRADDR} vpmaxsbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 de 05 ${RIPRADDR}  vpmaxubx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 74 05 ${RIPRADDR}  vpcmpeqbx ${RIPR}, %xmm1, %xmm0`],
+      ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+ c5 f1 74 05 ${RIPRADDR}  vpcmpeqbx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+ 
+      ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 fd 05 ${RIPRADDR}  vpaddwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 d5 05 ${RIPRADDR}  vpmullwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 ed 05 ${RIPRADDR}  vpaddswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 dd 05 ${RIPRADDR}  vpadduswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 ea 05 ${RIPRADDR}  vpminswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 3a 05 ${RIPRADDR} vpminuwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 ee 05 ${RIPRADDR}  vpmaxswx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c4 e2 71 3e 05 ${RIPRADDR} vpmaxuwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+       `c5 f1 75 05 ${RIPRADDR}  vpcmpeqwx ${RIPR}, %xmm1, %xmm0`],
+      ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+ c5 f1 75 05 ${RIPRADDR}  vpcmpeqwx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+ 
+      ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
+       `c5 f1 fe 05 ${RIPRADDR}  vpadddx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 40 05 ${RIPRADDR} vpmulldx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 39 05 ${RIPRADDR} vpminsdx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 3b 05 ${RIPRADDR} vpminudx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 3d 05 ${RIPRADDR} vpmaxsdx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
+       `c4 e2 71 3f 05 ${RIPRADDR} vpmaxudx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
+       `c5 f1 76 05 ${RIPRADDR}  vpcmpeqdx ${RIPR}, %xmm1, %xmm0`],
+      ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
+ c5 f1 76 05 ${RIPRADDR}  vpcmpeqdx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7            pxor %xmm15, %xmm0`],
+      ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
+       `c5 f1 f5 05 ${RIPRADDR}  vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
+ 
+      ['i64x2.add', '(v128.const i64x2 1 2)',
+       `c5 f1 d4 05 ${RIPRADDR}  vpaddqx ${RIPR}, %xmm1, %xmm0`],
+ 
+      ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 db 05 ${RIPRADDR}  vpandx ${RIPR}, %xmm1, %xmm0`],
+      ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 eb 05 ${RIPRADDR}  vporx ${RIPR}, %xmm1, %xmm0`],
+      ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+       `c5 f1 ef 05 ${RIPRADDR}  vpxorx ${RIPR}, %xmm1, %xmm0`]]);
+
+// Shift by constant encodings
+codegenTestX64_v128xLITERAL_v128_avxhack(
+     [['i8x16.shl', '(i32.const 2)', `
+c5 f1 fc c1               vpaddb %xmm1, %xmm1, %xmm0
+66 0f fc c0               paddb %xmm0, %xmm0`],
+      ['i8x16.shl', '(i32.const 4)', `
+c5 f1 db 05 ${RIPRADDR}   vpandx ${RIPR}, %xmm1, %xmm0
+66 0f 71 f0 04            psllw \\$0x04, %xmm0`],
+      ['i16x8.shl', '(i32.const 1)',
+       'c5 f9 71 f1 01            vpsllw \\$0x01, %xmm1, %xmm0'],
+      ['i16x8.shr_s', '(i32.const 3)',
+       'c5 f9 71 e1 03            vpsraw \\$0x03, %xmm1, %xmm0'],
+      ['i16x8.shr_u', '(i32.const 2)',
+       'c5 f9 71 d1 02            vpsrlw \\$0x02, %xmm1, %xmm0'], 
+      ['i32x4.shl', '(i32.const 5)',
+       'c5 f9 72 f1 05            vpslld \\$0x05, %xmm1, %xmm0'],
+      ['i32x4.shr_s', '(i32.const 2)',
+       'c5 f9 72 e1 02            vpsrad \\$0x02, %xmm1, %xmm0'],
+      ['i32x4.shr_u', '(i32.const 5)',
+       'c5 f9 72 d1 05            vpsrld \\$0x05, %xmm1, %xmm0'],
+      ['i64x2.shr_s', '(i32.const 7)', `
+c5 79 70 f9 f5            vpshufd \\$0xF5, %xmm1, %xmm15
+66 41 0f 72 e7 1f         psrad \\$0x1F, %xmm15
+c4 c1 71 ef c7            vpxor %xmm15, %xmm1, %xmm0
+66 0f 73 d0 07            psrlq \\$0x07, %xmm0
+66 41 0f ef c7            pxor %xmm15, %xmm0`]]);
+
+// vpblendvp optimization when bitselect follows comparison.
+codegenTestX64_adhoc(
+     `(module
+         (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128)
+           (v128.bitselect (local.get 2) (local.get 3)
+              (i32x4.eq (local.get 0) (local.get 1)))))`,
+         'f', `
+66 0f 76 c1               pcmpeqd %xmm1, %xmm0
+c4 e3 61 4c c2 00         vpblendvb %xmm0, %xmm2, %xmm3, %xmm0`);
diff --git a/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js b/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js
new file mode 100644
index 0000000000..da1fb68e6b
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js
@@ -0,0 +1,111 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Bug 1636235: assorted corner case baseline SIMD bugs.
+
+function get(arr, loc, len) {
+    let res = [];
+    for ( let i=0; i < len; i++ ) {
+        res.push(arr[loc+i]);
+    }
+    return res;
+}
+
+// Pass v128 along a control flow edge in br_table
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "run") (param $k i32)
+      (v128.store (i32.const 0) (call $f (local.get $k))))
+    (func $f (param $k i32) (result v128)
+      (block $B2 (result v128)
+        (block $B1 (result v128)
+          (v128.const i32x4 1 2 3 4)
+          (br_table $B1 $B2 (local.get $k)))
+        (drop)
+        (v128.const i32x4 5 6 7 8))))`);
+
+var mem = new Int32Array(ins.exports.mem.buffer);
+ins.exports.run(0);
+assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]);
+
+ins.exports.run(1);
+assertDeepEq(get(mem, 0, 4), [1, 2, 3, 4]);
+
+// Materialize a ConstV128 off the value stack in popStackResults (also: check
+// that br passing v128 values works as it should).
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+
+    (func (export "run") (param $k i32)
+      (local $t0 v128) (local $t1 v128) (local $t2 v128)
+      (call $f (local.get $k))
+      (local.set $t2)
+      (local.set $t1)
+      (local.set $t0)
+      (v128.store (i32.const 32) (local.get $t2))
+      (v128.store (i32.const 16) (local.get $t1))
+      (v128.store (i32.const 0) (local.get $t0)))
+
+    (func $f (param $k i32) (result v128 v128 v128)
+      (block $B2 (result v128 v128 v128)
+        (if (local.get $k)
+            (br $B2 (v128.const i32x4 5 6 7 8)
+                    (v128.const i32x4 9 10 11 12)
+                    (v128.const i32x4 13 14 15 16))
+            (br $B2 (v128.const i32x4 -5 -6 -7 -8)
+                    (v128.const i32x4 -9 -10 -11 -12)
+                    (v128.const i32x4 -13 -14 -15 -16)))
+        (unreachable))))`);
+
+var mem = new Int32Array(ins.exports.mem.buffer);
+ins.exports.run(0);
+assertDeepEq(get(mem, 0, 4), [-5, -6, -7, -8]);
+assertDeepEq(get(mem, 4, 4), [-9, -10, -11, -12]);
+assertDeepEq(get(mem, 8, 4), [-13, -14, -15, -16]);
+
+ins.exports.run(1);
+assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]);
+assertDeepEq(get(mem, 4, 4), [9, 10, 11, 12]);
+assertDeepEq(get(mem, 8, 4), [13, 14, 15, 16]);
+
+// Check that br_if passing v128 values works as it should.
+
+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+
+    (func (export "run") (param $k i32)
+      (local $t0 v128) (local $t1 v128) (local $t2 v128)
+      (call $f (local.get $k))
+      (local.set $t2)
+      (local.set $t1)
+      (local.set $t0)
+      (v128.store (i32.const 32) (local.get $t2))
+      (v128.store (i32.const 16) (local.get $t1))
+      (v128.store (i32.const 0) (local.get $t0)))
+
+    (func $f (param $k i32) (result v128 v128 v128)
+      (block $B2 (result v128 v128 v128)
+        (v128.const i32x4 5 6 7 8)
+        (v128.const i32x4 9 10 11 12)
+        (v128.const i32x4 13 14 15 16)
+        (br_if $B2 (local.get $k))
+        drop drop drop
+        (v128.const i32x4 -5 -6 -7 -8)
+        (v128.const i32x4 -9 -10 -11 -12)
+        (v128.const i32x4 -13 -14 -15 -16))))`);
+
+var mem = new Int32Array(ins.exports.mem.buffer);
+ins.exports.run(0);
+assertDeepEq(get(mem, 0, 4), [-5, -6, -7, -8]);
+assertDeepEq(get(mem, 4, 4), [-9, -10, -11, -12]);
+assertDeepEq(get(mem, 8, 4), [-13, -14, -15, -16]);
+
+ins.exports.run(1);
+assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]);
+assertDeepEq(get(mem, 4, 4), [9, 10, 11, 12]);
+assertDeepEq(get(mem, 8, 4), [13, 14, 15, 16]);
+
diff --git a/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js
new file mode 100644
index 0000000000..17c15e22d5
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js
@@ -0,0 +1,255 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or fixups for sundry SIMD binary
+// operations.  See README-codegen.md for general information about this type of
+// test case.
+
+// Inputs (xmm0, xmm1)
+
+codegenTestX64_v128xPTYPE_v128(
+    [['f32x4.replace_lane 0', 'f32', `f3 0f 10 c1               movss %xmm1, %xmm0`],
+     ['f32x4.replace_lane 1', 'f32', `66 0f 3a 21 c1 10         insertps \\$0x10, %xmm1, %xmm0`],
+     ['f32x4.replace_lane 3', 'f32', `66 0f 3a 21 c1 30         insertps \\$0x30, %xmm1, %xmm0`],
+     ['f64x2.replace_lane 0', 'f64', `f2 0f 10 c1               movsd %xmm1, %xmm0`],
+     ['f64x2.replace_lane 1', 'f64', `66 0f c6 c1 00            shufpd \\$0x00, %xmm1, %xmm0`]] );
+
+// Inputs (xmm1, xmm0)
+
+codegenTestX64_v128xv128_v128_reversed(
+    [['f32x4.pmin', `0f 5d c1                  minps %xmm1, %xmm0`],
+     ['f32x4.pmax', `0f 5f c1                  maxps %xmm1, %xmm0`],
+     ['f64x2.pmin', `66 0f 5d c1               minpd %xmm1, %xmm0`],
+     ['f64x2.pmax', `66 0f 5f c1               maxpd %xmm1, %xmm0`]] );
+
+// Constant arguments that are folded into the instruction
+
+codegenTestX64_v128xLITERAL_v128(
+    [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f fc 05 ${RIPRADDR}   paddbx ${RIPR}, %xmm0`],
+     ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f f8 05 ${RIPRADDR}   psubbx ${RIPR}, %xmm0`],
+     ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f ec 05 ${RIPRADDR}   paddsbx ${RIPR}, %xmm0`],
+     ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f dc 05 ${RIPRADDR}   paddusbx ${RIPR}, %xmm0`],
+     ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f e8 05 ${RIPRADDR}   psubsbx ${RIPR}, %xmm0`],
+     ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f d8 05 ${RIPRADDR}   psubusbx ${RIPR}, %xmm0`],
+     ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 38 38 05 ${RIPRADDR} pminsbx ${RIPR}, %xmm0`],
+     ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f da 05 ${RIPRADDR}   pminubx ${RIPR}, %xmm0`],
+     ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 38 3c 05 ${RIPRADDR} pmaxsbx ${RIPR}, %xmm0`],
+     ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f de 05 ${RIPRADDR}   pmaxubx ${RIPR}, %xmm0`],
+     ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 74 05 ${RIPRADDR}   pcmpeqbx ${RIPR}, %xmm0`],
+     ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+66 0f 74 05 ${RIPRADDR}   pcmpeqbx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 64 05 ${RIPRADDR}   pcmpgtbx ${RIPR}, %xmm0`],
+     ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+66 0f 64 05 ${RIPRADDR}   pcmpgtbx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 63 05 ${RIPRADDR}   packsswbx ${RIPR}, %xmm0`],
+     ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 67 05 ${RIPRADDR}   packuswbx ${RIPR}, %xmm0`],
+
+     ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f fd 05 ${RIPRADDR}   paddwx ${RIPR}, %xmm0`],
+     ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f f9 05 ${RIPRADDR}   psubwx ${RIPR}, %xmm0`],
+     ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f d5 05 ${RIPRADDR}   pmullwx ${RIPR}, %xmm0`],
+     ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f ed 05 ${RIPRADDR}   paddswx ${RIPR}, %xmm0`],
+     ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f dd 05 ${RIPRADDR}   padduswx ${RIPR}, %xmm0`],
+     ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f e9 05 ${RIPRADDR}   psubswx ${RIPR}, %xmm0`],
+     ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f d9 05 ${RIPRADDR}   psubuswx ${RIPR}, %xmm0`],
+     ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f ea 05 ${RIPRADDR}   pminswx ${RIPR}, %xmm0`],
+     ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 38 3a 05 ${RIPRADDR} pminuwx ${RIPR}, %xmm0`],
+     ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f ee 05 ${RIPRADDR}   pmaxswx ${RIPR}, %xmm0`],
+     ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 38 3e 05 ${RIPRADDR} pmaxuwx ${RIPR}, %xmm0`],
+     ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 75 05 ${RIPRADDR}   pcmpeqwx ${RIPR}, %xmm0`],
+     ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+66 0f 75 05 ${RIPRADDR}   pcmpeqwx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 65 05 ${RIPRADDR}   pcmpgtwx ${RIPR}, %xmm0`],
+     ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+66 0f 65 05 ${RIPRADDR}   pcmpgtwx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 6b 05 ${RIPRADDR}   packssdwx ${RIPR}, %xmm0`],
+     ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 38 2b 05 ${RIPRADDR} packusdwx ${RIPR}, %xmm0`],
+
+     ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
+      `66 0f fe 05 ${RIPRADDR}   padddx ${RIPR}, %xmm0`],
+     ['i32x4.sub', '(v128.const i32x4 1 2 1 2)',
+      `66 0f fa 05 ${RIPRADDR}   psubdx ${RIPR}, %xmm0`],
+     ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 40 05 ${RIPRADDR} pmulldx ${RIPR}, %xmm0`],
+     ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 39 05 ${RIPRADDR} pminsdx ${RIPR}, %xmm0`],
+     ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 3b 05 ${RIPRADDR} pminudx ${RIPR}, %xmm0`],
+     ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 3d 05 ${RIPRADDR} pmaxsdx ${RIPR}, %xmm0`],
+     ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 3f 05 ${RIPRADDR} pmaxudx ${RIPR}, %xmm0`],
+     ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 76 05 ${RIPRADDR}   pcmpeqdx ${RIPR}, %xmm0`],
+     ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
+66 0f 76 05 ${RIPRADDR}   pcmpeqdx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 66 05 ${RIPRADDR}   pcmpgtdx ${RIPR}, %xmm0`],
+     ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', `
+66 0f 66 05 ${RIPRADDR}   pcmpgtdx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
+      `66 0f f5 05 ${RIPRADDR}   pmaddwdx ${RIPR}, %xmm0`],
+
+     ['i64x2.add', '(v128.const i64x2 1 2)',
+      `66 0f d4 05 ${RIPRADDR}   paddqx ${RIPR}, %xmm0`],
+     ['i64x2.sub', '(v128.const i64x2 1 2)',
+      `66 0f fb 05 ${RIPRADDR}   psubqx ${RIPR}, %xmm0`],
+
+     ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f db 05 ${RIPRADDR}   pandx ${RIPR}, %xmm0`],
+     ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f eb 05 ${RIPRADDR}   porx ${RIPR}, %xmm0`],
+     ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f ef 05 ${RIPRADDR}   pxorx ${RIPR}, %xmm0`],
+
+     ['f32x4.add', '(v128.const f32x4 1 2 3 4)',
+      `0f 58 05 ${RIPRADDR}      addpsx ${RIPR}, %xmm0`],
+     ['f32x4.sub', '(v128.const f32x4 1 2 3 4)',
+      `0f 5c 05 ${RIPRADDR}      subpsx ${RIPR}, %xmm0`],
+     ['f32x4.mul', '(v128.const f32x4 1 2 3 4)',
+      `0f 59 05 ${RIPRADDR}      mulpsx ${RIPR}, %xmm0`],
+     ['f32x4.div', '(v128.const f32x4 1 2 3 4)',
+      `0f 5e 05 ${RIPRADDR}      divpsx ${RIPR}, %xmm0`],
+     ['f32x4.eq', '(v128.const f32x4 1 2 3 4)',
+      `0f c2 05 ${RIPRADDR} 00   cmppsx \\$0x00, ${RIPR}, %xmm0`],
+     ['f32x4.ne', '(v128.const f32x4 1 2 3 4)',
+      `0f c2 05 ${RIPRADDR} 04   cmppsx \\$0x04, ${RIPR}, %xmm0`],
+     ['f32x4.lt', '(v128.const f32x4 1 2 3 4)',
+      `0f c2 05 ${RIPRADDR} 01   cmppsx \\$0x01, ${RIPR}, %xmm0`],
+     ['f32x4.le', '(v128.const f32x4 1 2 3 4)',
+      `0f c2 05 ${RIPRADDR} 02   cmppsx \\$0x02, ${RIPR}, %xmm0`],
+
+     ['f64x2.add', '(v128.const f64x2 1 2)',
+      `66 0f 58 05 ${RIPRADDR}      addpdx ${RIPR}, %xmm0`],
+     ['f64x2.sub', '(v128.const f64x2 1 2)',
+      `66 0f 5c 05 ${RIPRADDR}      subpdx ${RIPR}, %xmm0`],
+     ['f64x2.mul', '(v128.const f64x2 1 2)',
+      `66 0f 59 05 ${RIPRADDR}      mulpdx ${RIPR}, %xmm0`],
+     ['f64x2.div', '(v128.const f64x2 1 2)',
+      `66 0f 5e 05 ${RIPRADDR}      divpdx ${RIPR}, %xmm0`],
+     ['f64x2.eq', '(v128.const f64x2 1 2)',
+      `66 0f c2 05 ${RIPRADDR} 00   cmppdx \\$0x00, ${RIPR}, %xmm0`],
+     ['f64x2.ne', '(v128.const f64x2 1 2)',
+      `66 0f c2 05 ${RIPRADDR} 04   cmppdx \\$0x04, ${RIPR}, %xmm0`],
+     ['f64x2.lt', '(v128.const f64x2 1 2)',
+      `66 0f c2 05 ${RIPRADDR} 01   cmppdx \\$0x01, ${RIPR}, %xmm0`],
+     ['f64x2.le', '(v128.const f64x2 1 2)',
+      `66 0f c2 05 ${RIPRADDR} 02   cmppdx \\$0x02, ${RIPR}, %xmm0`]]);
+
+// Commutative operations with constants on the lhs should generate the same
+// code as with the constant on the rhs.
+
+codegenTestX64_LITERALxv128_v128(
+    [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f fc 05 ${RIPRADDR}   paddbx ${RIPR}, %xmm0`],
+     ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f ec 05 ${RIPRADDR}   paddsbx ${RIPR}, %xmm0`],
+     ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f dc 05 ${RIPRADDR}   paddusbx ${RIPR}, %xmm0`],
+     ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 38 38 05 ${RIPRADDR} pminsbx ${RIPR}, %xmm0`],
+     ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f da 05 ${RIPRADDR}   pminubx ${RIPR}, %xmm0`],
+     ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 38 3c 05 ${RIPRADDR} pmaxsbx ${RIPR}, %xmm0`],
+     ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f de 05 ${RIPRADDR}   pmaxubx ${RIPR}, %xmm0`],
+     ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f 74 05 ${RIPRADDR}   pcmpeqbx ${RIPR}, %xmm0`],
+     ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+66 0f 74 05 ${RIPRADDR}   pcmpeqbx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+
+     ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f fd 05 ${RIPRADDR}   paddwx ${RIPR}, %xmm0`],
+     ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f d5 05 ${RIPRADDR}   pmullwx ${RIPR}, %xmm0`],
+     ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f ed 05 ${RIPRADDR}   paddswx ${RIPR}, %xmm0`],
+     ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f dd 05 ${RIPRADDR}   padduswx ${RIPR}, %xmm0`],
+     ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f ea 05 ${RIPRADDR}   pminswx ${RIPR}, %xmm0`],
+     ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 38 3a 05 ${RIPRADDR} pminuwx ${RIPR}, %xmm0`],
+     ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f ee 05 ${RIPRADDR}   pmaxswx ${RIPR}, %xmm0`],
+     ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 38 3e 05 ${RIPRADDR} pmaxuwx ${RIPR}, %xmm0`],
+     ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+      `66 0f 75 05 ${RIPRADDR}   pcmpeqwx ${RIPR}, %xmm0`],
+     ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+66 0f 75 05 ${RIPRADDR}   pcmpeqwx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+
+     ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
+      `66 0f fe 05 ${RIPRADDR}   padddx ${RIPR}, %xmm0`],
+     ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 40 05 ${RIPRADDR} pmulldx ${RIPR}, %xmm0`],
+     ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 39 05 ${RIPRADDR} pminsdx ${RIPR}, %xmm0`],
+     ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 3b 05 ${RIPRADDR} pminudx ${RIPR}, %xmm0`],
+     ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 3d 05 ${RIPRADDR} pmaxsdx ${RIPR}, %xmm0`],
+     ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 38 3f 05 ${RIPRADDR} pmaxudx ${RIPR}, %xmm0`],
+     ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
+      `66 0f 76 05 ${RIPRADDR}   pcmpeqdx ${RIPR}, %xmm0`],
+     ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
+66 0f 76 05 ${RIPRADDR}   pcmpeqdx ${RIPR}, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
+      `66 0f f5 05 ${RIPRADDR}   pmaddwdx ${RIPR}, %xmm0`],
+
+     ['i64x2.add', '(v128.const i64x2 1 2)',
+      `66 0f d4 05 ${RIPRADDR}   paddqx ${RIPR}, %xmm0`],
+
+     ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f db 05 ${RIPRADDR}   pandx ${RIPR}, %xmm0`],
+     ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f eb 05 ${RIPRADDR}   porx ${RIPR}, %xmm0`],
+     ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+      `66 0f ef 05 ${RIPRADDR}   pxorx ${RIPR}, %xmm0`]]);
diff --git a/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js
new file mode 100644
index 0000000000..2cb5f2e969
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js
@@ -0,0 +1,20 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x86 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x86-test.js
+
+codegenTestX86_v128xLITERAL_v128(
+    [['f32x4.eq', '(v128.const f32x4 1 2 3 4)',
+      `0f c2 05 ${ABSADDR} 00   cmppsx \\$0x00, ${ABS}, %xmm0`],
+     ['f32x4.ne', '(v128.const f32x4 1 2 3 4)',
+      `0f c2 05 ${ABSADDR} 04   cmppsx \\$0x04, ${ABS}, %xmm0`],
+     ['f32x4.lt', '(v128.const f32x4 1 2 3 4)',
+      `0f c2 05 ${ABSADDR} 01   cmppsx \\$0x01, ${ABS}, %xmm0`],
+     ['f32x4.le', '(v128.const f32x4 1 2 3 4)',
+      `0f c2 05 ${ABSADDR} 02   cmppsx \\$0x02, ${ABS}, %xmm0`],
+
+     ['f64x2.eq', '(v128.const f64x2 1 2)',
+      `66 0f c2 05 ${ABSADDR} 00   cmppdx \\$0x00, ${ABS}, %xmm0`],
+     ['f64x2.ne', '(v128.const f64x2 1 2)',
+      `66 0f c2 05 ${ABSADDR} 04   cmppdx \\$0x04, ${ABS}, %xmm0`],
+     ['f64x2.lt', '(v128.const f64x2 1 2)',
+      `66 0f c2 05 ${ABSADDR} 01   cmppdx \\$0x01, ${ABS}, %xmm0`],
+     ['f64x2.le', '(v128.const f64x2 1 2)',
+     `66 0f c2 05 ${ABSADDR} 02   cmppdx \\$0x02, ${ABS}, %xmm0`]]);
diff --git a/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js
new file mode 100644
index 0000000000..62951bce62
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js
@@ -0,0 +1,45 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or fixups for SIMD bitselect
+// operations.  See README-codegen.md for general information about this type of
+// test case.
+
+// The codegen enforces onTrue == output so we avoid a move to set that up.
+//
+// The remaining movdqa is currently unavoidable, it moves the control mask into a temp.
+// The temp should be identical to the mask but the regalloc does not currently
+// allow this constraint to be enforced.
+
+// Inputs (xmm0, xmm1, xmm2)
+
+codegenTestX64_adhoc(
+`(module
+    (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128)
+      (v128.bitselect (local.get 0) (local.get 1) (local.get 2))))`,
+    'f',
+`66 0f 6f da               movdqa %xmm2, %xmm3
+66 0f db c3               pand %xmm3, %xmm0
+66 0f df d9               pandn %xmm1, %xmm3
+66 0f eb c3               por %xmm3, %xmm0`);
+
+// Blend constant optimizations
+
+codegenTestX64_adhoc(
+  `(module
+      (func (export "f") (param v128) (param v128) (param v128) (result v128)
+        (v128.bitselect (local.get 0) (local.get 1) (v128.const i32x4 -1 0 0 -1))))`,
+      'f',
+  `66 0f 3a 0e c1 c3         pblendw \\$0xC3, %xmm1, %xmm0`);
+  
+// vpblendvp optimization when bitselect follows comparison.
+// Non-AVX pblendvb uses xmm0 as an implicit read-only operand.
+codegenTestX64_adhoc(
+  `(module
+      (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128)
+        (v128.bitselect (local.get 2) (local.get 3)
+           (i32x4.eq (local.get 0) (local.get 1)))))`,
+      'f', `
+66 0f 76 c1               pcmpeqd %xmm1, %xmm0
+66 0f 6f cb               movdqa %xmm3, %xmm1
+66 0f 38 10 ca            pblendvb %xmm2, %xmm1
+66 0f 6f c1               movdqa %xmm1, %xmm0`);
diff --git a/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js b/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js
new file mode 100644
index 0000000000..0629455b71
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js
@@ -0,0 +1,107 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+// Tests if combination of comparsion and bitselect produces correct result.
+// On x86/64 platforms, it is expected to replace slow bitselect emulation,
+// with its faster laneselect equivalent (pblendvb).
+// See bug 1751488 for more information.
+
+let verifyCodegen = _method => {};
+if (hasDisassembler() && wasmCompileMode() == "ion" &&
+    getBuildConfiguration().x64 && !getBuildConfiguration().simulator) {
+  if (isAvxPresent()) {
+    verifyCodegen = method => {
+        assertEq(wasmDis(method, {asString: true}).includes('vpblendvb'), true);
+    };
+  } else {
+    verifyCodegen = method => {
+        assertEq(wasmDis(method, {asString: true}).includes("pblendvb"), true);
+    };
+  }
+}
+
+const checkOps = {
+  eq(a, b) { return a == b; },
+  ne(a, b) { return a != b; },
+  lt(a, b) { return a < b; },
+  le(a, b) { return a <= b; },
+  gt(a, b) { return a > b; },
+  ge(a, b) { return a >= b; },
+};
+const checkPattern = new Uint8Array(Array(32).fill(null).map((_, i) => i));
+
+for (let [laneSize, aty_s, aty_u] of [
+    [8, Int8Array, Uint8Array], [16, Int16Array, Uint16Array],
+    [32, Int32Array, Uint32Array], [64, BigInt64Array, BigUint64Array]]) {
+    const laneCount = 128 / laneSize; 
+    const ty = `i${laneSize}x${laneCount}`;
+    for (let op of ['eq', 'ne', 'lt_s', 'le_s', 'gt_s', 'ge_s', 'lt_u', 'le_u', 'gt_u', 'ge_u']) {
+        if (laneSize == 64 && op.includes('_u')) continue;
+        const wrap = laneSize < 64 ? x => x : x => BigInt(x);
+        const aty = op.includes('_u') ? aty_u : aty_s;
+        const check = checkOps[op.replace(/_[us]$/, "")];
+        // Items to test: 0, 1, all 1s, top half 1s, low half 1s, top bit 1
+        const testData = new aty([wrap(0), wrap(1), ~wrap(0), ~wrap(0) << wrap(laneSize / 2),
+                                  ~((~wrap(0)) << wrap(laneSize / 2)), wrap(1) << wrap(laneSize - 1)]);
+        const ins = new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(`(module
+            (memory (export "memory") 1)
+            (func (export "run")
+              (v128.store (i32.const 32)
+                (v128.bitselect (v128.load (i32.const 64)) (v128.load (i32.const 80)) (${ty}.${op} (v128.load (i32.const 0)) (v128.load (i32.const 16))))) ))`)));
+        const mem = new aty(ins.exports.memory.buffer);
+        const memI8 = new Uint8Array(ins.exports.memory.buffer);
+        memI8.subarray(64, 96).set(checkPattern);
+        verifyCodegen(ins.exports.run);
+        for (let i = 0; i < testData.length; i++) {
+            for (let j = 0; j < testData.length; j++) {
+                for (let q = 0; q < laneCount; q++) {
+                    mem[q] = testData[(i + q) % testData.length];
+                    mem[q + laneCount] = testData[(j + q) % testData.length];
+                }
+                ins.exports.run();
+                for (let q = 0; q < laneCount; q++) {
+                    const val = check(mem[q], mem[q + laneCount]);
+                    const n = laneSize >> 3;
+                    for (let k = 0; k < n; k++) {
+                        assertEq(checkPattern[q * n + k + (val ? 0 : 16)],
+                                 memI8[32 + q * n + k]);
+                    }
+                }
+            }
+        }
+    }
+}
+
+for (let [laneSize, aty] of [[32, Float32Array], [64, Float64Array]]) {
+    const laneCount = 128 / laneSize; 
+    const ty = `f${laneSize}x${laneCount}`;
+    for (let op of ['eq', 'ne', 'lt', 'le', 'gt', 'ge']) {
+        const check = checkOps[op];
+        // Items to test: 0, 1, -1, PI, NaN, Inf, -0, -Inf
+        const testData = new aty([0, 1, -1, Math.PI, NaN, Infinity, 0/-Infinity, -Infinity]);
+        const ins = new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(`(module
+            (memory (export "memory") 1)
+            (func (export "run")
+              (v128.store (i32.const 32)
+                (v128.bitselect (v128.load (i32.const 64)) (v128.load (i32.const 80)) (${ty}.${op} (v128.load (i32.const 0)) (v128.load (i32.const 16))))) ))`)));
+        const mem = new aty(ins.exports.memory.buffer);
+        const memI8 = new Uint8Array(ins.exports.memory.buffer);
+        memI8.subarray(64, 96).set(checkPattern);
+        verifyCodegen(ins.exports.run);        
+        for (let i = 0; i < testData.length; i++) {
+            for (let j = 0; j < testData.length; j++) {
+                for (let q = 0; q < laneCount; q++) {
+                    mem[q] = testData[(i + q) % testData.length];
+                    mem[q + laneCount] = testData[(j + q) % testData.length];
+                }
+                ins.exports.run();
+                for (let q = 0; q < laneCount; q++) {
+                    const val = check(mem[q], mem[q + laneCount]);
+                    const n = laneSize >> 3;
+                    for (let k = 0; k < n; k++) {
+                        assertEq(checkPattern[q * n + k + (val ? 0 : 16)],
+                                 memI8[32 + q * n + k]);
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js
new file mode 100644
index 0000000000..b4fe1d0281
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js
@@ -0,0 +1,77 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or fixups for various SIMD comparison
+// operations.  See README-codegen.md for general information about this type of
+// test case.
+
+// Inputs (xmm0, xmm1)
+
+codegenTestX64_v128xv128_v128(
+    [['i8x16.gt_s', `66 0f 64 c1               pcmpgtb %xmm1, %xmm0`],
+     ['i16x8.gt_s', `66 0f 65 c1               pcmpgtw %xmm1, %xmm0`],
+     ['i32x4.gt_s', `66 0f 66 c1               pcmpgtd %xmm1, %xmm0`],
+     ['i8x16.le_s', `
+66 0f 64 c1               pcmpgtb %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0
+`],
+     ['i16x8.le_s', `
+66 0f 65 c1               pcmpgtw %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0
+`],
+     ['i32x4.le_s', `
+66 0f 66 c1               pcmpgtd %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0
+`],
+     ['i8x16.eq', `66 0f 74 c1               pcmpeqb %xmm1, %xmm0`],
+     ['i16x8.eq', `66 0f 75 c1               pcmpeqw %xmm1, %xmm0`],
+     ['i32x4.eq', `66 0f 76 c1               pcmpeqd %xmm1, %xmm0`],
+     ['i8x16.ne', `
+66 0f 74 c1               pcmpeqb %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0
+`],
+     ['i16x8.ne', `
+66 0f 75 c1               pcmpeqw %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0
+`],
+     ['i32x4.ne', `
+66 0f 76 c1               pcmpeqd %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0
+`],
+     ['f32x4.eq', `0f c2 c1 00               cmpps \\$0x00, %xmm1, %xmm0`],
+     ['f32x4.ne', `0f c2 c1 04               cmpps \\$0x04, %xmm1, %xmm0`],
+     ['f32x4.lt', `0f c2 c1 01               cmpps \\$0x01, %xmm1, %xmm0`],
+     ['f32x4.le', `0f c2 c1 02               cmpps \\$0x02, %xmm1, %xmm0`],
+     ['f64x2.eq', `66 0f c2 c1 00            cmppd \\$0x00, %xmm1, %xmm0`],
+     ['f64x2.ne', `66 0f c2 c1 04            cmppd \\$0x04, %xmm1, %xmm0`],
+     ['f64x2.lt', `66 0f c2 c1 01            cmppd \\$0x01, %xmm1, %xmm0`],
+     ['f64x2.le', `66 0f c2 c1 02            cmppd \\$0x02, %xmm1, %xmm0`]] );
+
+// Inputs (xmm1, xmm0) because the operation reverses its arguments.
+
+codegenTestX64_v128xv128_v128_reversed(
+    [['i8x16.ge_s', `
+66 0f 64 c1               pcmpgtb %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i16x8.ge_s',
+`
+66 0f 65 c1               pcmpgtw %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i32x4.ge_s', `
+66 0f 66 c1               pcmpgtd %xmm1, %xmm0
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`],
+     ['i8x16.lt_s', `66 0f 64 c1               pcmpgtb %xmm1, %xmm0`],
+     ['i16x8.lt_s', `66 0f 65 c1               pcmpgtw %xmm1, %xmm0`],
+     ['i32x4.lt_s', `66 0f 66 c1               pcmpgtd %xmm1, %xmm0`],
+     ['f32x4.gt', `0f c2 c1 01               cmpps \\$0x01, %xmm1, %xmm0`],
+     ['f32x4.ge', `0f c2 c1 02               cmpps \\$0x02, %xmm1, %xmm0`],
+     ['f64x2.gt', `66 0f c2 c1 01            cmppd \\$0x01, %xmm1, %xmm0`],
+     ['f64x2.ge', `66 0f c2 c1 02            cmppd \\$0x02, %xmm1, %xmm0`]] );
diff --git a/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js b/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js
new file mode 100644
index 0000000000..9dc08c6e6b
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js
@@ -0,0 +1,109 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "baseline" || !getBuildConfiguration().arm64
+
+// Test that the vixl logic for v128 constant loads is at least somewhat
+// reasonable.
+
+var lead = `0x[0-9a-f]+ +[0-9a-f]{8} +`;
+
+var prefix = `${lead}sub     sp, sp, #0x.. \\(..\\)
+${lead}str     x23, \\[sp, #..\\]`;
+
+var suffix =
+`${lead}b       #\\+0x8 \\(addr 0x.*\\)
+${lead}brk     #0x0`;
+
+for ( let [bits, expected, values] of [
+    // If high == low and the byte is 0 or ff then a single movi is sufficient.
+    ['i8x16 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00', `
+${prefix}
+${lead}movi    v0\\.2d, #0x0
+${suffix}
+`,
+     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+
+    ['i8x16 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0', `
+${prefix}
+${lead}movi    v0\\.2d, #0xff00ff00ff00ff
+${suffix}
+`,
+     [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0]],
+
+    // Splattable small things (up to a byte, at a byte location)
+    // can also use just one instruction
+    ['i32x4 1 1 1 1', `
+${prefix}
+${lead}movi    v0\\.4s, #0x1, lsl #0
+${suffix}
+`,
+     [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0]],
+
+    ['i32x4 0x300 0x300 0x300 0x300', `
+${prefix}
+${lead}movi    v0\\.4s, #0x3, lsl #8
+${suffix}
+`,
+     [0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0]],
+
+    // If high == low but the value is more complex then a constant load
+    // plus a dup is sufficient.  x16 is the designated temp.
+    ['i32x4 1 2 1 2', `
+${prefix}
+${lead}mov     x16, #0x1
+${lead}movk    x16, #0x2, lsl #32
+${lead}dup     v0\\.2d, x16
+${suffix}
+`,
+     [1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0]],
+
+    // If high != low then we degenerate to a more complicated pattern: dup the low value
+    // and then overwrite the high part with the high value.
+    ['i32x4 1 2 2 1', `
+${prefix}
+${lead}mov     x16, #0x1
+${lead}movk    x16, #0x2, lsl #32
+${lead}dup     v0\\.2d, x16
+${lead}mov     x16, #0x2
+${lead}movk    x16, #0x1, lsl #32
+${lead}mov     v0\\.d\\[1\\], x16
+${suffix}
+`,
+     [1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0]],
+
+    // Things are not always bleak, and vixl finds a way.
+    ['i32x4 1 1 2 2', `
+${prefix}
+${lead}movi    v0\\.4s, #0x1, lsl #0
+${lead}mov     x16, #0x200000002
+${lead}mov     v0\\.d\\[1\\], x16
+${suffix}
+`,
+     [1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0]],
+] ) {
+    let ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1)
+    (func (export "run")
+       (v128.store (i32.const 0) (call $f)))
+    (func $f (export "f") (result v128)
+      (v128.const ${bits})))`);
+    let output = wasmDis(ins.exports.f, {tier:"baseline", asString:true});
+    assertEq(output.match(new RegExp(expected)) != null, true);
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 0, iota(16).map(x => -1-x));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), values);
+}
+
+function get(arr, loc, len) {
+    let res = [];
+    for ( let i=0; i < len; i++ ) {
+        res.push(arr[loc+i]);
+    }
+    return res;
+}
+
+function set(arr, loc, vals) {
+    for ( let i=0; i < vals.length; i++ ) {
+        arr[loc+i] = vals[i];
+    }
+}
diff --git a/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js
new file mode 100644
index 0000000000..04a00b538d
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js
@@ -0,0 +1,28 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that constants that can be synthesized are synthesized.  See README-codegen.md
+// for general information about this type of test case.
+
+codegenTestX64_unit_v128(
+    [['v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
+      `66 0f ef c0               pxor %xmm0, %xmm0`],
+     ['v128.const i8x16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1',
+      `66 0f 75 c0               pcmpeqw %xmm0, %xmm0`],
+     ['v128.const i16x8 0 0 0 0 0 0 0 0',
+      `66 0f ef c0               pxor %xmm0, %xmm0`],
+     ['v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1',
+      `66 0f 75 c0               pcmpeqw %xmm0, %xmm0`],
+     ['v128.const i32x4 0 0 0 0',
+      `66 0f ef c0               pxor %xmm0, %xmm0`],
+     ['v128.const i32x4 -1 -1 -1 -1',
+      `66 0f 75 c0               pcmpeqw %xmm0, %xmm0`],
+     ['v128.const i64x2 0 0',
+      `66 0f ef c0               pxor %xmm0, %xmm0`],
+     ['v128.const i64x2 -1 -1',
+      `66 0f 75 c0               pcmpeqw %xmm0, %xmm0`],
+     ['v128.const f32x4 0 0 0 0',
+      // Arguably this should be xorps but that's for later
+      `66 0f ef c0               pxor %xmm0, %xmm0`],
+     ['v128.const f64x2 0 0',
+      // Arguably this should be xorpd but that's for later
+      `66 0f ef c0               pxor %xmm0, %xmm0`]] );
diff --git a/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js
new file mode 100644
index 0000000000..1e2d613c1a
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js
@@ -0,0 +1,27 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves for various SIMD conversion
+// operations. See README-codegen.md for general information about this type of
+// test case.
+
+// Note, these tests test the beginning of the output but not the end.
+
+codegenTestX64_v128_v128(
+    [['i32x4.trunc_sat_f32x4_s',
+     // The movaps is dest -> scratch and needs to be here.  The test is
+     // asserting that there is not an additional (redundant) move here.
+`
+44 0f 28 f8               movaps %xmm0, %xmm15
+45 0f c2 ff 00            cmpps \\$0x00, %xmm15, %xmm15
+66 41 0f db c7            pand %xmm15, %xmm0`],
+     ['i32x4.trunc_sat_f32x4_u', `
+45 0f 57 ff               xorps %xmm15, %xmm15
+41 0f 5f c7               maxps %xmm15, %xmm0`],
+     ['f32x4.convert_i32x4_u', `
+66 45 0f ef ff            pxor %xmm15, %xmm15
+66 44 0f 3a 0e f8 55      pblendw \\$0x55, %xmm0, %xmm15
+66 41 0f fa c7            psubd %xmm15, %xmm0
+45 0f 5b ff               cvtdq2ps %xmm15, %xmm15`]],
+    {no_suffix:true});
+
+
diff --git a/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js b/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js
new file mode 100644
index 0000000000..0de46e0f0c
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js
@@ -0,0 +1,20 @@
+// |jit-test| skip-if: !wasmDebuggingEnabled() || !wasmSimdEnabled()
+
+var g7 = newGlobal({newCompartment: true});
+g7.parent = this;
+g7.eval(`
+    Debugger(parent).onEnterFrame = function(frame) { };
+`);
+var ins = wasmEvalText(`
+    (memory (export "mem") 1 1)
+    (func (export "run")
+      (param $k i32)
+      (v128.store (i32.const 0) (call $f (local.get $k)))
+    )
+    (func $f
+      (param $k i32)
+      (result v128)
+      (v128.const i32x4 5 6 7 8)
+    )
+`);
+ins.exports.run(0);
diff --git a/js/src/jit-test/tests/wasm/simd/directives.txt b/js/src/jit-test/tests/wasm/simd/directives.txt
new file mode 100644
index 0000000000..3e89e7550b
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/directives.txt
@@ -0,0 +1 @@
+|jit-test| test-also=--wasm-compiler=baseline; test-also=--wasm-compiler=optimizing; test-also=--wasm-test-serialization; test-also=--wasm-compiler=optimizing --no-avx; skip-variant-if: --wasm-compiler=optimizing --no-avx, !getBuildConfiguration().x86 && !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:wasm.js
diff --git a/js/src/jit-test/tests/wasm/simd/disabled.js b/js/src/jit-test/tests/wasm/simd/disabled.js
new file mode 100644
index 0000000000..feae414697
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/disabled.js
@@ -0,0 +1,28 @@
+// |jit-test| skip-if: wasmSimdEnabled()
+
+// ../binary.js checks that all SIMD extended opcodes in the 0..255 range are
+// rejected if !wasmSimdEnabled, so no need to check that here.
+
+// Non-opcode cases that should also be rejected, lest feature sniffing may
+// erroneously conclude that simd is available when it's not.  The error message
+// may differ depending on ENABLE_WASM_SIMD: if SIMD is compiled in we usually
+// get a sensible error about v128; if not, we get something generic.
+
+wasmFailValidateText(`(module (func (param v128)))`,
+                     /(v128 not enabled)|(bad type)/);
+
+wasmFailValidateText(`(module (func (result v128)))`,
+                     /(v128 not enabled)|(bad type)/);
+
+wasmFailValidateText(`(module (func (local v128)))`,
+                     /(v128 not enabled)|(bad type)|(SIMD support is not enabled)/);
+
+wasmFailValidateText(`(module (global (import "m" "g") v128))`,
+                     /expected global type/);
+
+wasmFailValidateText(`(module (global (import "m" "g") (mut v128)))`,
+                     /expected global type/);
+
+wasmFailValidateText(`(module (global i32 (v128.const i32x4 0 0 0 0)))`,
+                     /(v128 not enabled)|(unrecognized opcode)/);
+
diff --git a/js/src/jit-test/tests/wasm/simd/experimental.js b/js/src/jit-test/tests/wasm/simd/experimental.js
new file mode 100644
index 0000000000..3f4a85ae75
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/experimental.js
@@ -0,0 +1,411 @@
+// |jit-test| --wasm-relaxed-simd; skip-if: !wasmRelaxedSimdEnabled()
+
+// Experimental opcodes.  We have no text parsing support for these yet.  The
+// tests will be cleaned up and moved into ad-hack.js if the opcodes are
+// adopted.
+
+load(libdir + "wasm-binary.js");
+
+function wasmEval(bytes, imports) {
+    return new WebAssembly.Instance(new WebAssembly.Module(bytes), imports);
+}
+
+function wasmValidateAndEval(bytes, imports) {
+    assertEq(WebAssembly.validate(bytes), true, "test of WasmValidate.cpp");
+    return wasmEval(bytes, imports);
+}
+
+function get(arr, loc, len) {
+    let res = [];
+    for ( let i=0; i < len; i++ ) {
+        res.push(arr[loc+i]);
+    }
+    return res;
+}
+
+function set(arr, loc, vals) {
+    for ( let i=0; i < vals.length; i++ ) {
+        if (arr instanceof BigInt64Array) {
+            arr[loc+i] = BigInt(vals[i]);
+        } else {
+            arr[loc+i] = vals[i];
+        }
+    }
+}
+
+const v2vSig = {args:[], ret:VoidCode};
+
+function V128Load(addr) {
+    return [I32ConstCode, varS32(addr),
+            SimdPrefix, V128LoadCode, 4, varU32(0)]
+}
+
+function V128StoreExpr(addr, v) {
+    return [I32ConstCode, varS32(addr),
+            ...v,
+            SimdPrefix, V128StoreCode, 4, varU32(0)];
+}
+
+// FMA/FNMA, https://github.com/WebAssembly/relaxed-simd/issues/27 and
+// https://github.com/WebAssembly/relaxed-simd/pull/81
+
+function fma(x, y, a) { return (x * y) + a; }
+function fnma(x, y, a) { return - (x * y) + a; }
+
+var fxs = [10, 20, 30, 40];
+var fys = [-2, -3, -4, -5];
+var fas = [0, 100, 500, 700];
+var dxs = [10, 20];
+var dys = [-2, -3];
+var das = [0, 100];
+
+for ( let [opcode, xs, ys, as, operator] of [[F32x4RelaxedFmaCode, fxs, fys, fas, fma],
+                                             [F32x4RelaxedFnmaCode, fxs, fys, fas, fnma],
+                                             [F64x2RelaxedFmaCode, dxs, dys, das, fma],
+                                             [F64x2RelaxedFnmaCode, dxs, dys, das, fnma]] ) {
+    var k = xs.length;
+    var ans = iota(k).map((i) => operator(xs[i], ys[i], as[i]))
+
+    var ins = wasmValidateAndEval(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0]),
+        memorySection(1),
+        exportSection([{funcIndex: 0, name: "run"},
+                       {memIndex: 0, name: "mem"}]),
+        bodySection([
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [...V128Load(16),
+                                                  ...V128Load(32),
+                                                  ...V128Load(48),
+                                                  SimdPrefix, varU32(opcode)])]})])]));
+
+    var mem = new (k == 4 ? Float32Array : Float64Array)(ins.exports.mem.buffer);
+    set(mem, k, xs);
+    set(mem, 2*k, ys);
+    set(mem, 3*k, as);
+    ins.exports.run();
+    var result = get(mem, 0, k);
+    assertSame(result, ans);
+
+    assertEq(false, WebAssembly.validate(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0]),
+        memorySection(1),
+        exportSection([{funcIndex: 0, name: "run"},
+                       {memIndex: 0, name: "mem"}]),
+        bodySection([
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [...V128Load(0),
+                                                  ...V128Load(0),
+                                                  SimdPrefix, varU32(opcode)])]})])])));    
+}
+
+// Relaxed swizzle, https://github.com/WebAssembly/relaxed-simd/issues/22
+
+var ins = wasmValidateAndEval(moduleWithSections([
+    sigSection([v2vSig]),
+    declSection([0]),
+    memorySection(1),
+    exportSection([{funcIndex: 0, name: "run"},
+                   {memIndex: 0, name: "mem"}]),
+    bodySection([
+        funcBody({locals:[],
+                  body: [...V128StoreExpr(0, [...V128Load(16),
+                                              ...V128Load(32),
+                                              SimdPrefix, varU32(I8x16RelaxedSwizzleCode)])]})])]));
+var mem = new Uint8Array(ins.exports.mem.buffer);
+var test = [1, 4, 3, 7, 123, 0, 8, 222];
+set(mem, 16, test);
+for (let [i, s] of [[0, 0], [0, 1], [1,1], [1, 3], [7,5]]) {
+    var ans = new Uint8Array(16);
+    for (let j = 0; j < 16; j++) {
+        mem[32 + j] = (j * s + i) & 15;
+        ans[j] = test[(j * s + i) & 15];
+    }
+    ins.exports.run();
+    var result = get(mem, 0, 16);
+    assertSame(result, ans);
+}
+
+assertEq(false, WebAssembly.validate(moduleWithSections([
+    sigSection([v2vSig]),
+    declSection([0]),
+    memorySection(1),
+    bodySection([
+        funcBody({locals:[],
+            body: [...V128StoreExpr(0, [...V128Load(16),
+                                        SimdPrefix, varU32(I8x16RelaxedSwizzleCode)])]})])])));
+
+
+// Relaxed MIN/MAX, https://github.com/WebAssembly/relaxed-simd/issues/33
+
+const Neg0 = -1/Infinity;
+var minMaxTests = [
+    {a: 0, b: 0, min: 0, max: 0, },
+    {a: Neg0, b: Neg0, min: Neg0, max: Neg0, },
+    {a: 1/3, b: 2/3, min: 1/3, max: 2/3, },
+    {a: -1/3, b: -2/3, min: -2/3, max: -1/3, },
+    {a: -1000, b: 1, min: -1000, max: 1, },
+    {a: 10, b: -2, min: -2, max: 10, },
+];
+
+for (let k of [4, 2]) {
+    const minOpcode = k == 4 ? F32x4RelaxedMinCode : F64x2RelaxedMinCode;
+    const maxOpcode = k == 4 ? F32x4RelaxedMaxCode : F64x2RelaxedMaxCode;
+
+    var ins = wasmValidateAndEval(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0, 0]),
+        memorySection(1),
+        exportSection([{funcIndex: 0, name: "min"},
+                       {funcIndex: 1, name: "max"},
+                       {memIndex: 0, name: "mem"}]),
+        bodySection([
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [...V128Load(16),
+                                                  ...V128Load(32),
+                                                  SimdPrefix, varU32(minOpcode)])]}),
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [...V128Load(16),
+                                                  ...V128Load(32),
+                                                  SimdPrefix, varU32(maxOpcode)])]})])]));
+    for (let i = 0; i < minMaxTests.length; i++) {
+        var Ty = k == 4 ? Float32Array : Float64Array;
+        var mem = new Ty(ins.exports.mem.buffer);
+        var minResult = new Ty(k);
+        var maxResult = new Ty(k);
+        for (let j = 0; j < k; j++) {
+            const {a, b, min, max } = minMaxTests[(j + i) % minMaxTests.length];
+            mem[j + k] = a;
+            mem[j + k * 2] = b;
+            minResult[j] = min;
+            maxResult[j] = max;
+        }
+        ins.exports.min();
+        var result = get(mem, 0, k);
+        assertSame(result, minResult);
+        ins.exports.max();
+        var result = get(mem, 0, k);
+        assertSame(result, maxResult);
+    }
+
+    for (let op of [minOpcode, maxOpcode]) {
+        assertEq(false, WebAssembly.validate(moduleWithSections([
+            sigSection([v2vSig]),
+            declSection([0, 0]),
+            memorySection(1),
+            exportSection([]),
+            bodySection([
+                funcBody({locals:[],
+                          body: [...V128StoreExpr(0, [...V128Load(0),
+                                                      SimdPrefix, varU32(op)])]})])])));
+    }
+}
+
+// Relaxed I32x4.TruncFXXX, https://github.com/WebAssembly/relaxed-simd/issues/21
+
+var ins = wasmValidateAndEval(moduleWithSections([
+    sigSection([v2vSig]),
+    declSection([0, 0, 0, 0]),
+    memorySection(1),
+    exportSection([{funcIndex: 0, name: "from32s"},
+                   {funcIndex: 1, name: "from32u"},
+                   {funcIndex: 2, name: "from64s"},
+                   {funcIndex: 3, name: "from64u"},
+                   {memIndex: 0, name: "mem"}]),
+    bodySection([
+        funcBody({locals:[],
+                  body: [...V128StoreExpr(0, [...V128Load(16),
+                                              SimdPrefix, varU32(I32x4RelaxedTruncSSatF32x4Code)])]}),
+        funcBody({locals:[],
+                  body: [...V128StoreExpr(0, [...V128Load(16),
+                                              SimdPrefix, varU32(I32x4RelaxedTruncUSatF32x4Code)])]}),
+        funcBody({locals:[],
+                  body: [...V128StoreExpr(0, [...V128Load(16),
+                                              SimdPrefix, varU32(I32x4RelaxedTruncSatF64x2SZeroCode)])]}),
+        funcBody({locals:[],
+                  body: [...V128StoreExpr(0, [...V128Load(16),
+                                              SimdPrefix, varU32(I32x4RelaxedTruncSatF64x2UZeroCode)])]})])]));
+
+var mem = ins.exports.mem.buffer;
+set(new Float32Array(mem), 4, [0, 2.3, -3.4, 100000]);
+ins.exports.from32s();
+var result = get(new Int32Array(mem), 0, 4);
+assertSame(result, [0, 2, -3, 100000]);
+
+set(new Float32Array(mem), 4, [0, 3.3, 0x80000000, 200000]);
+ins.exports.from32u();
+var result = get(new Uint32Array(mem), 0, 4);
+assertSame(result, [0, 3, 0x80000000, 200000]);
+set(new Float32Array(mem), 4, [0, 0x80000100, 0x80000101, 0xFFFFFF00]);
+ins.exports.from32u();
+var result = get(new Uint32Array(mem), 0, 4);
+assertSame(result, [0, 0x80000100, 0x80000100, 0xFFFFFF00]);
+
+set(new Float64Array(mem), 2, [200000.3, -3.4]);
+ins.exports.from64s();
+var result = get(new Int32Array(mem), 0, 4);
+assertSame(result, [200000, -3, 0, 0]);
+set(new Float64Array(mem), 2, [0x90000000 + 0.1, 0]);
+ins.exports.from64u();
+var result = get(new Uint32Array(mem), 0, 4);
+assertSame(result, [0x90000000, 0, 0, 0]);
+
+for (let op of [I32x4RelaxedTruncSSatF32x4Code, I32x4RelaxedTruncUSatF32x4Code,
+                I32x4RelaxedTruncSatF64x2SZeroCode, I32x4RelaxedTruncSatF64x2UZeroCode]) {
+    assertEq(false, WebAssembly.validate(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0]),
+        memorySection(1),
+        exportSection([]),
+        bodySection([
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [SimdPrefix, varU32(op)])]})])])));
+}
+
+// Relaxed blend / laneselect, https://github.com/WebAssembly/relaxed-simd/issues/17
+
+for (let [k, opcode, AT] of [[1, I8x16RelaxedLaneSelectCode, Int8Array],
+                             [2, I16x8RelaxedLaneSelectCode, Int16Array],
+                             [4, I32x4RelaxedLaneSelectCode, Int32Array],
+                             [8, I64x2RelaxedLaneSelectCode, BigInt64Array]]) {
+
+    var ins = wasmValidateAndEval(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0]),
+        memorySection(1),
+        exportSection([{funcIndex: 0, name: "run"},
+                        {memIndex: 0, name: "mem"}]),
+        bodySection([
+            funcBody({locals:[],
+                        body: [...V128StoreExpr(0, [...V128Load(16),
+                                                    ...V128Load(32),
+                                                    ...V128Load(48),
+                                                    SimdPrefix, varU32(opcode)])]})])]));
+
+    var mem = ins.exports.mem.buffer;
+    var mem8 = new Uint8Array(mem);
+    set(mem8, 16, [1,2,3,4,0,0,0,0,100,0,102,0,0,250,251,252,253]);
+    set(mem8, 32, [0,0,0,0,5,6,7,8,0,101,0,103,0,254,255,0,1]);
+    var c = new AT(mem, 48, 16 / k);
+    for (let i = 0; i < c.length; i++) {
+        // Use popcnt to randomize 0 and ~0 
+        const popcnt_i = i.toString(2).replace(/0/g, "").length;
+        const v = popcnt_i & 1 ? -1 : 0
+        c[i] = k == 8 ? BigInt(v) : v;
+    }
+    ins.exports.run();
+    for (let i = 0; i < 16; i++) {
+        const r = c[(i / k) | 0] ? mem8[16 + i] : mem8[32 + i];
+        assertEq(r, mem8[i]);
+    }
+
+    assertEq(false, WebAssembly.validate(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0]),
+        memorySection(1),
+        exportSection([{funcIndex: 0, name: "run"},
+                       {memIndex: 0, name: "mem"}]),
+        bodySection([
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [...V128Load(0),
+                                                  ...V128Load(0),
+                                                  SimdPrefix, varU32(opcode)])]})])])));    
+}
+
+
+// Relaxed rounding q-format multiplication.
+var ins = wasmValidateAndEval(moduleWithSections([
+    sigSection([v2vSig]),
+    declSection([0]),
+    memorySection(1),
+    exportSection([{funcIndex: 0, name: "relaxed_q15mulr_s"},
+                    {memIndex: 0, name: "mem"}]),
+    bodySection([
+        funcBody({locals:[],
+                    body: [...V128StoreExpr(0, [...V128Load(16),
+                                                ...V128Load(32),
+                                                SimdPrefix, varU32(I16x8RelaxedQ15MulrS)])]})])]));
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+for (let [as, bs] of cross([
+        [1, -3, 5, -7, 11, -13, -17, 19],
+        [-1, 0, 16, -32, 64, 128, -1024, 0, 1],
+        [1,2,-32768,32767,1,4,-32768,32767]]) ) {
+    set(mem16, 8, as);
+    set(mem16, 16, bs);
+    ins.exports.relaxed_q15mulr_s();
+    const result = get(mem16, 0, 8);
+    for (let i = 0; i < 8; i++) {
+        const expected = (as[i] * bs[i] + 0x4000) >> 15;
+        if (as[i] == -32768 && bs[i] == -32768) continue;
+        assertEq(expected, result[i], `result of ${as[i]} * ${bs[i]}`);
+    }
+}
+
+
+// Check relaxed dot product results.
+var ins = wasmValidateAndEval(moduleWithSections([
+    sigSection([v2vSig]),
+    declSection([0]),
+    memorySection(1),
+    exportSection([{funcIndex: 0, name: "dot_i8x16_i7x16_s"},
+                    {memIndex: 0, name: "mem"}]),
+    bodySection([
+        funcBody({locals:[],
+                    body: [...V128StoreExpr(0, [...V128Load(16),
+                                                ...V128Load(32),
+                                                SimdPrefix, varU32(I16x8DotI8x16I7x16S)])]})])]));
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var test7bit = [1, 2, 3, 4, 5, 64, 65, 127, 127, 0, 0,
+                1, 65, 64, 2, 3, 0, 0, 127, 127, 5, 4];
+var testNeg = test7bit.concat(test7bit.map(i => ~i));
+for (let ai = 0; ai < testNeg.length - 15; ai++)
+    for (let bi = 0; bi < test7bit.length - 15; bi++) {
+        set(mem8, 16, testNeg.slice(ai, ai + 16));
+        set(mem8, 32, test7bit.slice(bi, bi + 16));
+        ins.exports.dot_i8x16_i7x16_s();
+        const result = get(mem16, 0, 8);
+        for (let i = 0; i < 8; i++) {
+            const expected = ((testNeg[ai + i * 2] * test7bit[bi + i * 2]) +
+                            (testNeg[ai + i * 2 + 1] * test7bit[bi + i * 2 + 1])) | 0;
+            assertEq(expected, result[i]);
+        }
+    }
+
+var ins = wasmValidateAndEval(moduleWithSections([
+    sigSection([v2vSig]),
+    declSection([0]),
+    memorySection(1),
+    exportSection([{funcIndex: 0, name: "dot_i8x16_i7x16_add_s"},
+                    {memIndex: 0, name: "mem"}]),
+    bodySection([
+        funcBody({locals:[],
+                    body: [...V128StoreExpr(0, [...V128Load(16),
+                                                ...V128Load(32),
+                                                ...V128Load(48),
+                                                SimdPrefix, varU32(I32x4DotI8x16I7x16AddS)])]})])]));
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var test7bit = [1, 2, 3, 4, 5, 64, 65, 127, 127, 0, 0,
+                1, 65, 64, 2, 3, 0, 0, 127, 127, 5, 4];
+var testNeg = test7bit.concat(test7bit.map(i => ~i));
+var testAcc = [0, 12, 65336, -1, 0x10000000, -0xffffff];
+for (let ai = 0; ai < testNeg.length - 15; ai++)
+    for (let bi = 0; bi < test7bit.length - 15; bi++)
+        for (let ci = 0; ci < testAcc.length - 3; ci++) {
+            set(mem8, 16, testNeg.slice(ai, ai + 16));
+            set(mem8, 32, test7bit.slice(bi, bi + 16));
+            set(mem32, 48/4, testAcc.slice(ci, ci + 4));
+            ins.exports.dot_i8x16_i7x16_add_s();
+            const result = get(mem32, 0, 4);
+            for (let i = 0; i < 4; i++) {
+                const a1 = (testNeg[ai + i * 4] * test7bit[bi + i * 4]) +
+                           (testNeg[ai + i * 4 + 1] * test7bit[bi + i * 4 + 1]);
+                const a2 = (testNeg[ai + i * 4 + 2] * test7bit[bi + i * 4 + 2]) +
+                           (testNeg[ai + i * 4 + 3] * test7bit[bi + i * 4 + 3]);
+                const expected = (testAcc[ci + i] + a1 + a2) | 0;
+                assertEq(expected, result[i]);
+            }
+        }
diff --git a/js/src/jit-test/tests/wasm/simd/ion-analysis.js b/js/src/jit-test/tests/wasm/simd/ion-analysis.js
new file mode 100644
index 0000000000..723b42b40b
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ion-analysis.js
@@ -0,0 +1,902 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || wasmCompileMode() != "ion" || !this.wasmSimdAnalysis
+
+// White-box tests for SIMD optimizations.  These are sensitive to internal
+// details of the front-end and lowering logic, which is partly platform-dependent.
+//
+// In DEBUG builds, the testing function wasmSimdAnalysis() returns a string
+// describing the last decision made by the SIMD lowering code: to perform an
+// optimized lowering or the default byte shuffle+blend for i8x16.shuffle; to
+// shift by a constant or a variable for the various shifts; and so on.
+//
+// We test that the expected transformation applies, and that the machine code
+// generates the expected result.
+
+var isArm64 = getBuildConfiguration().arm64;
+
+// 32-bit permutation that is not a rotation.
+let perm32x4_pattern = [4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3];
+
+// Operands the same, dword permutation
+{
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${perm32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), perm32x4_pattern);
+}
+
+// Right operand ignored, dword permutation
+{
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${perm32x4_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    set(mem, 32, iota(16).map(x => x+16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), perm32x4_pattern);
+}
+
+// Left operand ignored, dword permutation
+{
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${perm32x4_pattern.map(x => x+16).join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16).map(x => x+16));
+    set(mem, 32, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), perm32x4_pattern);
+}
+
+// Operands the same, word permutation on both sides of the qword divide, with a qword swap
+{
+    let perm16x8_pattern = [12, 13, 14, 15, 10, 11, 8, 9,
+                             6,  7,  4,  5,  2,  3, 0, 1];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), perm16x8_pattern);
+}
+
+// Operands the same, word permutation on both sides of the qword divide, no qword swap
+{
+    let perm16x8_pattern = [ 6,  7,  4,  5,  2,  3, 0, 1,
+                            12, 13, 14, 15, 10, 11, 8, 9];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), perm16x8_pattern);
+}
+
+// Operands the same, word permutation on low side of the qword divide, no qword swap
+{
+    let perm16x8_pattern = [ 6, 7,  4,  5,  2,  3,  0,  1,
+                             8, 9, 10, 11, 12, 13, 14, 15];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), perm16x8_pattern);
+}
+
+// Operands the same, word permutation on high side of the qword divide, no qword swap
+{
+    let perm16x8_pattern = [ 0,  1,  2,  3,  4,  5, 6, 7,
+                            12, 13, 14, 15, 10, 11, 8, 9];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), perm16x8_pattern);
+}
+
+// Same operands, byte rotate
+{
+    // 8-bit permutation that is a rotation
+    let rot8x16_pattern = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${rot8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> rotate-right 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), rot8x16_pattern);
+}
+
+// Operands the same, random jumble => byte permutation
+{
+    // 8-bit permutation that is not a rotation
+    let perm8x16_pattern = [5, 7, 6, 8, 9, 10, 11, 4, 13, 14, 15, 0, 1, 2, 3, 12];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${perm8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), perm8x16_pattern);
+}
+
+// Operands differ, both accessed, rhs is constant zero, left-shift pattern
+{
+    // 8-bit shift with zeroes shifted in at the right end
+    let shift8x16_pattern = [16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> shift-left 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x));
+}
+
+// The same as above but the constant is lhs.
+{
+    // 8-bit shift with zeroes shifted in at the right end
+    let shift8x16_pattern = [16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(x => x ^ 16);
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${shift8x16_pattern.join(' ')} (v128.const i32x4 0 0 0 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> shift-left 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x < 16 ? 0 : x - 16));
+}
+
+// Operands differ, both accessed, rhs is constant zero, left-shift pattern that
+// does not start properly.
+{
+    // 8-bit shift with zeroes shifted in at the right end
+    let shift8x16_pattern = [16, 16, 16, 16, 16, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x));
+}
+
+// Operands differ, both accessed, rhs is constant zero, right-shift pattern
+{
+    // 8-bit shift with zeroes shifted in at the right end
+    let shift8x16_pattern = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 20, 20, 20, 20, 20];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> shift-right 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x));
+}
+
+// Operands differ, both accessed, rhs is constant zero, right-shift pattern
+// that does not end properly.
+{
+    // 8-bit shift with zeroes shifted in at the right end
+    let shift8x16_pattern = [6, 7, 8, 9, 10, 11, 12, 13, 14, 20, 20, 20, 20, 20, 20, 20];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x));
+}
+
+// Operands differ and are variable, both accessed, (lhs ++ rhs) >> k
+{
+    let concat8x16_pattern = [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${concat8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> concat+shift-right 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    set(mem, 32, iota(16).map(k => k+16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), concat8x16_pattern);
+}
+
+// Operands differ and are variable, both accessed, (rhs ++ lhs) >> k
+{
+    let concat8x16_pattern = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${concat8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> concat+shift-right 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    set(mem, 32, iota(16).map(k => k+16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), concat8x16_pattern);
+}
+
+// Operands differ, both accessed, but inputs stay in their lanes => byte blend
+{
+    let blend8x16_pattern = iota(16).map(x => (x % 3 == 0) ? x + 16 : x);
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${blend8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> blend 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    let lhs = iota(16);
+    let rhs = iota(16).map(x => x+16);
+    set(mem, 16, lhs);
+    set(mem, 32, rhs);
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), blend8x16_pattern);
+}
+
+// Operands differ, both accessed, but inputs stay in their lanes => word blend
+{
+    let blend16x8_pattern = iota(16).map(x => (x & 2) ? x + 16 : x);
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${blend16x8_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> blend 16x8");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    let lhs = iota(16);
+    let rhs = iota(16).map(x => x+16);
+    set(mem, 16, lhs);
+    set(mem, 32, rhs);
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), blend16x8_pattern);
+}
+
+// Interleave i32x4s
+for ( let [lhs, rhs, expected] of
+      [[[0, 1], [4, 5], "shuffle -> interleave-low 32x4"],
+       [[2, 3], [6, 7], "shuffle -> interleave-high 32x4"]] ) {
+    for (let swap of [false, true]) {
+        if (swap)
+            [lhs, rhs] = [rhs, lhs];
+        let interleave_pattern = i32ToI8(interleave(lhs, rhs));
+        let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+        assertEq(wasmSimdAnalysis(), expected);
+
+        let mem = new Int8Array(ins.exports.mem.buffer);
+        let lhsval = iota(16);
+        let rhsval = iota(16).map(x => x+16);
+        set(mem, 16, lhsval);
+        set(mem, 32, rhsval);
+        ins.exports.run();
+        assertSame(get(mem, 0, 16), interleave_pattern);
+    }
+}
+
+// Interleave i64x2s
+for ( let [lhs, rhs, expected] of
+  [[[0], [2], "shuffle -> interleave-low 64x2"],
+   [[1], [3], "shuffle -> interleave-high 64x2"]] ) {
+    for (let swap of [false, true]) {
+        if (swap)
+            [lhs, rhs] = [rhs, lhs];
+        let interleave_pattern = i64ToI2(interleave(lhs, rhs));
+        let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+        assertEq(wasmSimdAnalysis(), expected);
+
+        let mem = new Int8Array(ins.exports.mem.buffer);
+        let lhsval = iota(16);
+        let rhsval = iota(16).map(x => x+16);
+        set(mem, 16, lhsval);
+        set(mem, 32, rhsval);
+        ins.exports.run();
+        assertSame(get(mem, 0, 16), interleave_pattern);
+    }
+}
+
+// Interleave i16x8s
+for ( let [lhs, rhs, expected] of
+      [[[0, 1, 2, 3], [8, 9, 10, 11], "shuffle -> interleave-low 16x8"],
+       [[4, 5, 6, 7], [12, 13, 14, 15], "shuffle -> interleave-high 16x8"]] ) {
+    for (let swap of [false, true]) {
+        if (swap)
+            [lhs, rhs] = [rhs, lhs];
+        let interleave_pattern = i16ToI8(interleave(lhs, rhs));
+        let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+        assertEq(wasmSimdAnalysis(), expected);
+
+        let mem = new Int8Array(ins.exports.mem.buffer);
+        let lhsval = iota(16);
+        let rhsval = iota(16).map(x => x+16);
+        set(mem, 16, lhsval);
+        set(mem, 32, rhsval);
+        ins.exports.run();
+        assertSame(get(mem, 0, 16), interleave_pattern);
+    }
+}
+
+// Interleave i8x16s
+for ( let [lhs, rhs, expected] of
+      [[[0, 1, 2, 3, 4, 5, 6, 7],      [16, 17, 18, 19, 20, 21, 22, 23], "shuffle -> interleave-low 8x16"],
+       [[8, 9, 10, 11, 12, 13, 14, 15],[24, 25, 26, 27, 28, 29, 30, 31], "shuffle -> interleave-high 8x16"]] ) {
+    for (let swap of [false, true]) {
+        if (swap)
+            [lhs, rhs] = [rhs, lhs];
+        let interleave_pattern = interleave(lhs, rhs);
+        let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+        assertEq(wasmSimdAnalysis(), expected);
+
+        let mem = new Int8Array(ins.exports.mem.buffer);
+        let lhsval = iota(16);
+        let rhsval = iota(16).map(x => x+16);
+        set(mem, 16, lhsval);
+        set(mem, 32, rhsval);
+        ins.exports.run();
+        assertSame(get(mem, 0, 16), interleave_pattern);
+    }
+}
+
+// Operands differ, both accessed, random jumble => byte shuffle+blend
+{
+    let blend_perm8x16_pattern = [5, 23, 6, 24, 9, 10, 11, 7, 7, 14, 15, 19, 1, 2, 3, 12];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${blend_perm8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    let lhs = iota(16).map(x => x+16);
+    let rhs = iota(16);
+    set(mem, 16, lhs);
+    set(mem, 32, rhs);
+    ins.exports.run();
+    assertSame(get(mem, 0, 16),
+               blend_perm8x16_pattern.map(x => x < 16 ? lhs[x] : rhs[x-16]));
+}
+
+// No-op, ignoring right operand, should turn into a move.
+{
+    let nop8x16_pattern = iota(16);
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${nop8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> move");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    set(mem, 32, iota(16).map(x => x+16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), nop8x16_pattern);
+}
+
+// No-op, ignoring left operand, should turn into a move.
+{
+    let nop8x16_pattern = iota(16).map(x => x+16);
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+  (func $f (param v128) (param v128) (result v128)
+    (i8x16.shuffle ${nop8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> move");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    set(mem, 32, iota(16).map(x => x+16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), nop8x16_pattern);
+}
+
+// Broadcast byte
+for ( let byte of [3, 11, 8, 2] ) {
+    let broadcast8x16_pattern = iota(16).map(_ => byte);
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${broadcast8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), broadcast8x16_pattern);
+}
+
+// Broadcast word from high quadword
+{
+    let broadcast16x8_pattern = [10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${broadcast16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 16x8");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), broadcast16x8_pattern);
+}
+
+// Broadcast word from low quadword
+{
+    let broadcast16x8_pattern = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${broadcast16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 16x8");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), broadcast16x8_pattern);
+}
+
+// Broadcast dword from low quadword should turn into a dword permute
+{
+    let broadcast32x4_pattern = [4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7];
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${broadcast32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), broadcast32x4_pattern);
+}
+
+// Broadcast high qword should turn into a dword permute
+{
+    let broadcast64x2_pattern = [8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15]
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${broadcast64x2_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), broadcast64x2_pattern);
+}
+
+// Byte reversal should be a byte permute
+{
+    let rev8x16_pattern = iota(16).reverse();
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${rev8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 8x16");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), rev8x16_pattern);
+}
+
+// Byteswap of half-word, word and quad-word groups should be
+// reverse bytes analysis
+for (let k of [2, 4, 8]) {
+  let rev8_pattern = iota(16).map(i => i ^ (k - 1));
+  let ins = wasmCompile(`
+(module
+(memory (export "mem") 1 1)
+(func (export "run")
+  (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+(func $f (param v128) (result v128)
+  (i8x16.shuffle ${rev8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+  assertEq(wasmSimdAnalysis(), `shuffle -> reverse bytes in ${8 * k}-bit lanes`);
+
+  let mem = new Int8Array(ins.exports.mem.buffer);
+  set(mem, 16, iota(16));
+  ins.exports.run();
+  assertSame(get(mem, 0, 16), rev8_pattern);
+}
+
+// Word reversal should be a word permute
+{
+    let rev16x8_pattern = i16ToI8(iota(8).reverse());
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${rev16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), rev16x8_pattern);
+}
+
+// Dword reversal should be a dword permute
+{
+    let rev32x4_pattern = i32ToI8([3, 2, 1, 0]);
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${rev32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), rev32x4_pattern);
+}
+
+// Qword reversal should be a dword permute
+{
+    let rev64x2_pattern = i32ToI8([2, 3, 0, 1]);
+    let ins = wasmCompile(`
+(module
+  (memory (export "mem") 1 1)
+  (func (export "run")
+    (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+  (func $f (param v128) (result v128)
+    (i8x16.shuffle ${rev64x2_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+    let mem = new Int8Array(ins.exports.mem.buffer);
+    set(mem, 16, iota(16));
+    ins.exports.run();
+    assertSame(get(mem, 0, 16), rev64x2_pattern);
+}
+
+// In the case of shifts, we have separate tests that constant shifts work
+// correctly, so no such testing is done here.
+
+for ( let lanes of ['i8x16', 'i16x8', 'i32x4', 'i64x2'] ) {
+    for ( let shift of ['shl', 'shr_s', 'shr_u'] ) {
+        for ( let [count, result] of [['(i32.const 5)', /shift -> constant shift/],
+                                      ['(local.get 1)', /shift -> variable(?: scalarized)? shift/]] ) {
+            wasmCompile(`(module (func (param v128) (param i32) (result v128) (${lanes}.${shift} (local.get 0) ${count})))`);
+            assertEq(wasmSimdAnalysis().match(result).length, 1);
+        }
+    }
+}
+
+// Constant folding scalar->simd.  There are functional tests for all these in
+// ad-hack.js so here we only check that the transformation is triggered.
+
+for ( let [ty128, ty] of [['i8x16', 'i32'], ['i16x8', 'i32'], ['i32x4', 'i32'],
+                          ['i64x2', 'i64'], ['f32x4', 'f32'], ['f64x2', 'f64']] )
+{
+    wasmCompile(`(module (func (result v128) (${ty128}.splat (${ty}.const 37))))`);
+    assertEq(wasmSimdAnalysis(), "scalar-to-simd128 -> constant folded");
+}
+
+// Ditto simd->scalar.
+
+for ( let [ty128, suffix] of [['i8x16', '_s'], ['i8x16', '_u'], ['i16x8','_s'], ['i16x8','_u'], ['i32x4', '']] ) {
+    for ( let op of ['any_true', 'all_true', 'bitmask', `extract_lane${suffix} 0`] ) {
+        let operation = op == 'any_true' ? 'v128.any_true' : `${ty128}.${op}`;
+        wasmCompile(`(module (func (result i32) (${operation} (v128.const i64x2 0 0))))`);
+        assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded");
+    }
+}
+
+for ( let ty128 of ['f32x4','f64x2','i64x2'] ) {
+    wasmCompile(`(module (func (result ${ty128.match(/(...)x.*/)[1]}) (${ty128}.extract_lane 0 (v128.const i64x2 0 0))))`);
+    assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded");
+}
+
+// Optimizing all_true, any_true, and bitmask that are used for control flow, also when negated.
+
+for ( let [ty128,size] of [['i8x16',1], ['i16x8',2], ['i32x4',4]] ) {
+    let all = iota(16/size).map(n => n*n);
+    let some = iota(16/size).map(n => n*(n % 3));
+    let none = iota(16/size).map(n => 0);
+    let inputs = [all, some, none];
+    let ops = { all_true: allTrue, any_true: anyTrue, bitmask };
+
+    for ( let op of ['any_true', 'all_true', 'bitmask'] ) {
+        let folded = op != 'bitmask' || (size == 2 && !isArm64);
+        let operation = op == 'any_true' ? 'v128.any_true' : `${ty128}.${op}`;
+        let positive =
+            wasmCompile(
+                `(module
+                   (memory (export "mem") 1 1)
+                   (func $f (param v128) (result i32)
+                       (if (result i32) (${operation} (local.get 0))
+                           (i32.const 42)
+                           (i32.const 37)))
+                   (func (export "run") (result i32)
+                     (call $f (v128.load (i32.const 16)))))`);
+        assertEq(wasmSimdAnalysis(), folded ? "simd128-to-scalar-and-branch -> folded" : "none");
+
+        let negative =
+            wasmCompile(
+                `(module
+                   (memory (export "mem") 1 1)
+                   (func $f (param v128) (result i32)
+                       (if (result i32) (i32.eqz (${operation} (local.get 0)))
+                           (i32.const 42)
+                           (i32.const 37)))
+                   (func (export "run") (result i32)
+                     (call $f (v128.load (i32.const 16)))))`);
+        assertEq(wasmSimdAnalysis(), folded ? "simd128-to-scalar-and-branch -> folded" : "none");
+
+        for ( let inp of inputs ) {
+            let mem = new this[`Int${8*size}Array`](positive.exports.mem.buffer);
+            set(mem, 16/size, inp);
+            assertEq(positive.exports.run(), ops[op](inp) ? 42 : 37);
+
+            mem = new this[`Int${8*size}Array`](negative.exports.mem.buffer);
+            set(mem, 16/size, inp);
+            assertEq(negative.exports.run(), ops[op](inp) ? 37 : 42);
+        }
+    }
+}
+
+// Constant folding
+
+{
+    // Swizzle-with-constant rewritten as shuffle, and then further optimized
+    // into a dword permute.  Correctness is tested in ad-hack.js.
+    wasmCompile(`
+(module (func (param v128) (result v128)
+  (i8x16.swizzle (local.get 0) (v128.const i8x16 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11))))
+`);
+    assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+}
+
+// Bitselect with constant mask folded into shuffle operation
+
+if (!isArm64) {
+  wasmCompile(`
+  (module (func (param v128) (param v128) (result v128)
+    (v128.bitselect (local.get 0) (local.get 1) (v128.const i8x16 0 -1 -1 0 0 0 0 0 -1 -1 -1 -1 -1 -1 0 0))))
+  `);
+      assertEq(wasmSimdAnalysis(), "shuffle -> blend 8x16");  
+}
+
+// Library
+
+function wasmCompile(text) {
+    return new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(text)))
+}
+
+function get(arr, loc, len) {
+    let res = [];
+    for ( let i=0; i < len; i++ ) {
+        res.push(arr[loc+i]);
+    }
+    return res;
+}
+
+function set(arr, loc, vals) {
+    for ( let i=0; i < vals.length; i++ ) {
+        arr[loc+i] = vals[i];
+    }
+}
+
+function i32ToI8(xs) {
+    return xs.map(x => [x*4, x*4+1, x*4+2, x*4+3]).flat();
+}
+
+function i64ToI2(xs) {
+  return xs.map(x => [x*8, x*8+1, x*8+2, x*8+3,
+                      x*8+4, x*8+5, x*8+6, x*8+7]).flat();
+}
+
+function i16ToI8(xs) {
+    return xs.map(x => [x*2, x*2+1]).flat();
+}
+
+function allTrue(xs) {
+    return xs.every(v => v != 0);
+}
+
+function anyTrue(xs) {
+    return xs.some(v => v != 0);
+}
+
+function bitmask(xs) {
+    let shift = 128/xs.length - 1;
+    let res = 0;
+    let k = 0;
+    xs.forEach(v => { res |= ((v >>> shift) & 1) << k; k++; });
+    return res;
+}
diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js b/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js
new file mode 100644
index 0000000000..7dfdf3afad
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js
@@ -0,0 +1,13 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Fuzz test case.  The initial unreachable will result in the subsequent
+// i8x16.shuffle popping null pointers off the value stack.  Due to a missing
+// isDeadCode() check in WasmIonCompile.cpp the compiler would dereference those
+// null pointers.
+new WebAssembly.Module(wasmTextToBinary(`
+(module
+  (func (result v128)
+    (unreachable)
+    (i8x16.shuffle 0 0 23 0 4 4 4 4 4 16 1 0 4 4 4 4)))
+`))
+
diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js b/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js
new file mode 100644
index 0000000000..ebb2f72864
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js
@@ -0,0 +1,22 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Shuffle pattern incorrectly recognized as a rotate due to a missing guard in
+// the optimizer.
+
+let ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1)
+    (func (export "test")
+      (v128.store (i32.const 0)
+        (i8x16.shuffle 0 1 2 3 4 5 6 7 8 0 1 2 3 4 5 6
+                       (v128.load (i32.const 16))
+                       (v128.const i32x4 0 0 0 0)))))
+`);
+
+let mem = new Int8Array(ins.exports.mem.buffer);
+let input = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25];
+let output = [10, 11, 12, 13, 14, 15, 16, 17, 18, 10, 11, 12, 13, 14, 15, 16];
+mem.set(input, 16);
+ins.exports.test();
+let result = Array.from(mem.subarray(0, 16));
+assertDeepEq(output, result);
diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js b/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js
new file mode 100644
index 0000000000..86a2ff0b3c
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js
@@ -0,0 +1,24 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// This checks that we emit a REX prefix that includes the SIB index when
+// appropriate.
+//
+// This test case is a little tricky.  On Win64, the arg registers are rcx, rdx,
+// r8, r9; so we want to use local 2 or 3 as the index.  But on other x64
+// platforms, the arg registers are rdi, rsi, rdx, rcx, r8, r9; so we want to
+// use local 4 or 5 as the index.  This test uses both, and then looks for a hit
+// on the REX byte which must be 0x43.  Before the bugfix, since the index
+// register was ignored, the byte would always be 0x41, as it will continue to
+// be for the access that does not use an extended register.
+//
+// The test is brittle: the register allocator can easily make a mess of it.
+// But for now it works.
+
+codegenTestX64_adhoc(
+`(module
+   (memory 1)
+   (func $f (export "f") (param i32) (param i32) (param i32) (param i32) (param i32) (result v128)
+     (i32x4.add (v128.load8x8_s (local.get 4)) (v128.load8x8_s (local.get 2)))))`,
+    'f',
+    `66 43 0f 38 20 .. ..      pmovsxbwq \\(%r15,%r(8|9|10|11|12|13),1\\), %xmm[0-9]+`,
+    {no_prefix: true, no_suffix: true, log:true});
diff --git a/js/src/jit-test/tests/wasm/simd/js-api.js b/js/src/jit-test/tests/wasm/simd/js-api.js
new file mode 100644
index 0000000000..ea7f3fb147
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/js-api.js
@@ -0,0 +1,130 @@
+// |jit-test| test-also=--no-threads; skip-if: !wasmSimdEnabled()
+
+// SIMD JS API
+//
+// As of 31 March 2020 the SIMD spec is very light on information about the JS
+// API, and what it has is ridden with misspellings, grammatical errors, and
+// apparent redundancies.  The rules below represent my best effort at
+// understanding the intent of the spec.  As far as I can tell, the rules for
+// v128 are intended to match the rules for i64 in the Wasm MVP.
+
+// Hopefully, these are enough to test that various JIT stubs are generated and
+// used if we run the tests in a loop.
+
+setJitCompilerOption("baseline.warmup.trigger", 2);
+setJitCompilerOption("ion.warmup.trigger", 4);
+
+// RULE: v128 cannot cross the JS/wasm boundary as a function parameter.
+//
+// A wasm function that:
+//  - takes or returns v128
+//  - was imported into wasm
+//  - is ultimately a JS function
+// should always throw TypeError when called from wasm.
+//
+// Note, JIT exit stubs should be generated here because settings above should
+// cause the JIT to tier up.
+
+var ins = wasmEvalText(`
+  (module
+    (import "m" "v128_param" (func $f (param v128)))
+    (import "m" "v128_return" (func $g (result v128)))
+    (func (export "v128_param")
+      (call $f (v128.const i32x4 0 0 0 0)))
+    (func (export "v128_result")
+      (drop (call $g))))`,
+                       {m:{v128_param: (x) => 0,
+                           v128_return: () => 0}});
+
+function call_v128_param() { ins.exports.v128_param(); }
+function call_v128_result() { ins.exports.v128_result(); }
+
+for ( let i = 0 ; i < 100; i++ ) {
+    assertErrorMessage(call_v128_param,
+                       TypeError,
+                       /cannot pass.*v128.*to or from JS/);
+    assertErrorMessage(call_v128_result,
+                       TypeError,
+                       /cannot pass.*v128.*to or from JS/);
+}
+
+// RULE: v128 cannot cross the JS/wasm boundary as a function parameter.
+//
+// A wasm function that:
+//  - takes or returns v128
+//  - is exported from wasm
+//  - is ultimately a true wasm function
+// should always throw TypeError when called from JS.
+//
+// Note, JIT entry stubs should be generated here because settings above should
+// cause the JIT to tier up.
+
+var ins2 = wasmEvalText(`
+  (module
+    (func (export "v128_param") (param v128) (result i32)
+      (i32.const 0))
+    (func (export "v128_result") (result v128)
+      (v128.const i32x4 0 0 0 0)))`);
+
+function call_v128_param2() { ins2.exports.v128_param(); }
+function call_v128_result2() { ins2.exports.v128_result(); }
+
+for ( let i = 0 ; i < 100; i++ ) {
+    assertErrorMessage(call_v128_param2,
+                       TypeError,
+                       /cannot pass.*v128.*to or from JS/);
+    assertErrorMessage(call_v128_result2,
+                       TypeError,
+                       /cannot pass.*v128.*to or from JS/);
+}
+
+// RULE: The rules about v128 passing into or out of a function apply even when
+// an imported JS function is re-exported and is then called.
+
+var newfn = (x) => x;
+var ins = wasmEvalText(`
+  (module
+    (import "m" "fn" (func $f (param v128) (result v128)))
+    (export "newfn" (func $f)))`,
+                                   {m:{fn: newfn}});
+assertErrorMessage(() => ins.exports.newfn(3),
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+
+// RULE: WebAssembly.Global of type v128 is constructable from JS with a default
+// value.
+
+
+// RULE: WebAssembly.Global constructor for type v128 is not constructable with
+// or without a default value.
+
+assertErrorMessage(() => new WebAssembly.Global({value: "v128"}, 37),
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+assertErrorMessage(() => new WebAssembly.Global({value: "v128"}),
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+assertErrorMessage(() => new WebAssembly.Global({value: "v128", mutable: true}),
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+
+// RULE: WebAssembly.Global of type v128 have getters and setters that throw
+// TypeError when called from JS.
+
+let {gi, gm} = wasmEvalText(`
+  (module
+    (global (export "gi") v128 v128.const i64x2 0 0)
+    (global (export "gm") (mut v128) v128.const i64x2 0 0)
+  )`).exports;
+
+assertErrorMessage(() => gi.value,
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+assertErrorMessage(() => gi.valueOf(),
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+assertErrorMessage(() => gm.value = 0,
+                   TypeError,
+                   /cannot pass.*v128.*to or from JS/);
+
+
diff --git a/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js
new file mode 100644
index 0000000000..0ae75f38fb
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js
@@ -0,0 +1,34 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that there are no extraneous moves for variable SIMD negate, abs, and
+// not instructions. See README-codegen.md for general information about this
+// type of test case.
+
+// Integer negates don't have to reuse the input for the output, and prefer for
+// the registers to be different.  So use parameter 1 and ignore parameter 0.
+
+codegenTestX64_IGNOREDxv128_v128(
+    [['i8x16.neg', `
+66 0f ef c0               pxor %xmm0, %xmm0
+66 0f f8 c1               psubb %xmm1, %xmm0`],
+     ['i16x8.neg', `
+66 0f ef c0               pxor %xmm0, %xmm0
+66 0f f9 c1               psubw %xmm1, %xmm0`],
+     ['i32x4.neg', `
+66 0f ef c0               pxor %xmm0, %xmm0
+66 0f fa c1               psubd %xmm1, %xmm0`],
+     ['i64x2.neg', `
+66 0f ef c0               pxor %xmm0, %xmm0
+66 0f fb c1               psubq %xmm1, %xmm0`]] );
+
+// Floating point negate and absolute value, and bitwise not, prefer for the
+// registers to be the same and guarantee that no move is inserted if so.
+
+codegenTestX64_v128_v128(
+    [['f32x4.neg', `66 0f ef 05 ${RIPRADDR}   pxorx ${RIPR}, %xmm0`],
+     ['f64x2.neg', `66 0f ef 05 ${RIPRADDR}   pxorx ${RIPR}, %xmm0`],
+     ['f32x4.abs', `66 0f db 05 ${RIPRADDR}   pandx ${RIPR}, %xmm0`],
+     ['f64x2.abs', `66 0f db 05 ${RIPRADDR}   pandx ${RIPR}, %xmm0`],
+     ['v128.not', `
+66 45 0f 75 ff            pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7            pxor %xmm15, %xmm0`]] );
diff --git a/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js
new file mode 100644
index 0000000000..53ab47fdb8
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js
@@ -0,0 +1,38 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Tests for SIMD add pairwise instructions.
+
+if (!isAvxPresent()) {
+
+     codegenTestX64_IGNOREDxv128_v128(
+          [['i16x8.extadd_pairwise_i8x16_s', `
+66 0f 6f 05 ${RIPRADDR}    movdqax ${RIPR}, %xmm0
+66 0f 38 04 c1             pmaddubsw %xmm1, %xmm0`],
+           ['i16x8.extadd_pairwise_i8x16_u', `
+66 0f 6f c1                movdqa %xmm1, %xmm0
+66 0f 38 04 05 ${RIPRADDR} pmaddubswx ${RIPR}, %xmm0`],
+           ['i32x4.extadd_pairwise_i16x8_s', `
+66 0f 6f c1                movdqa %xmm1, %xmm0
+66 0f f5 05 ${RIPRADDR}    pmaddwdx ${RIPR}, %xmm0`],
+           ['i32x4.extadd_pairwise_i16x8_u', `
+66 0f 6f c1                movdqa %xmm1, %xmm0
+66 0f ef 05 ${RIPRADDR}    pxorx ${RIPR}, %xmm0
+66 0f f5 05 ${RIPRADDR}    pmaddwdx ${RIPR}, %xmm0
+66 0f fe 05 ${RIPRADDR}    padddx ${RIPR}, %xmm0`]]);
+
+} else {
+
+     codegenTestX64_IGNOREDxv128_v128(
+          [['i16x8.extadd_pairwise_i8x16_s', `
+66 0f 6f 05 ${RIPRADDR}    movdqax ${RIPR}, %xmm0
+66 0f 38 04 c1             pmaddubsw %xmm1, %xmm0`],
+           ['i16x8.extadd_pairwise_i8x16_u', `
+c4 e2 71 04 05 ${RIPRADDR} vpmaddubswx ${RIPR}, %xmm1, %xmm0`],
+           ['i32x4.extadd_pairwise_i16x8_s', `
+c5 f1 f5 05 ${RIPRADDR}    vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
+           ['i32x4.extadd_pairwise_i16x8_u', `
+c5 f1 ef 05 ${RIPRADDR}    vpxorx ${RIPR}, %xmm1, %xmm0
+66 0f f5 05 ${RIPRADDR}    pmaddwdx ${RIPR}, %xmm0
+66 0f fe 05 ${RIPRADDR}    padddx ${RIPR}, %xmm0`]]);
+
+}
diff --git a/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js
new file mode 100644
index 0000000000..94abfd5c54
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js
@@ -0,0 +1,154 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || wasmCompileMode() != "ion"
+
+// Testing _mm_maddubs_epi16 / vpmaddubsw behavoir for all platforms.
+//
+// Bug 1762413 adds specialization for emscripten's pattern to directly
+// emit PMADDUBSW machine code.
+
+const isX64 = getBuildConfiguration().x64 && !getBuildConfiguration().simulator;
+
+// Simple test.
+const simple = wasmTextToBinary(`(module 
+  (memory (export "memory") 1 1)
+  (func $_mm_maddubs_epi16 (export "t") (param v128 v128) (result v128)
+    local.get 1
+    i32.const 8
+    i16x8.shl
+    i32.const 8
+    i16x8.shr_s
+    local.get 0
+    v128.const i32x4 0x00ff00ff 0x00ff00ff 0x00ff00ff 0x00ff00ff
+    v128.and
+    i16x8.mul
+    local.get 1
+    i32.const 8
+    i16x8.shr_s
+    local.get 0
+    i32.const 8
+    i16x8.shr_u
+    i16x8.mul
+    i16x8.add_sat_s)
+  (func (export "run")
+    i32.const 0
+    v128.const i8x16 0 2 1 2 1  2  -1   1    255 255 255 255   0 0 255 255 
+    v128.const i8x16 1 0 3 4 -3 -4 -128 127  127 127 -128 -128 0 0 -128 127 
+    call $_mm_maddubs_epi16
+    v128.store
+  )
+)`);
+var ins = new WebAssembly.Instance(new WebAssembly.Module(simple));
+ins.exports.run();
+var mem16 = new Int16Array(ins.exports.memory.buffer, 0, 8);
+assertSame(mem16, [0, 11, -11, -32513, 32767, -32768, 0, -255]);
+
+if (hasDisassembler() && isX64) {
+  assertEq(wasmDis(ins.exports.t, {tier:"ion", asString:true}).includes('pmaddubsw'), true);
+}
+
+if (hasDisassembler() && isX64) {
+  // Two pmaddubsw has common operand, and code was optimized.
+  const realWorldOutput = wasmTextToBinary(`(module
+     (memory 1 1)
+     (func (export "test")
+         (local i32 i32 i32 i32 v128 v128 v128 v128 v128 v128)
+         local.get 0
+         local.get 1
+         i32.add
+         local.set 2
+         local.get 0
+         i32.const 16
+         i32.add
+         local.set 0
+         local.get 3
+         local.set 1
+         loop
+             local.get 5
+             local.get 0
+             v128.load
+             local.tee 5
+             i32.const 7
+             i8x16.shr_s
+             local.tee 8
+             local.get 1
+             v128.load offset=240
+             local.get 5
+             v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000
+             i8x16.eq
+             local.tee 7
+             v128.andnot
+             i8x16.add
+             local.get 8
+             v128.xor
+             local.tee 4
+             i32.const 8
+             i16x8.shl
+             i32.const 8
+             i16x8.shr_s
+             local.get 5
+             i8x16.abs
+             local.tee 5
+             v128.const i32x4 0x00ff00ff 0x00ff00ff 0x00ff00ff 0x00ff00ff
+             v128.and
+             local.tee 9
+             i16x8.mul
+             local.get 4
+             i32.const 8
+             i16x8.shr_s
+             local.get 5
+             i32.const 8
+             i16x8.shr_u
+             local.tee 4
+             i16x8.mul
+             i16x8.add_sat_s
+             i16x8.add_sat_s
+             local.set 5
+ 
+             local.get 6
+             local.get 8
+             local.get 1
+             v128.load offset=224
+             local.get 7
+             v128.andnot
+             i8x16.add
+             local.get 8
+             v128.xor
+             local.tee 6
+             i32.const 8
+             i16x8.shl
+             i32.const 8
+             i16x8.shr_s
+             local.get 9
+             i16x8.mul
+             local.get 6
+             i32.const 8
+             i16x8.shr_s
+             local.get 4
+             i16x8.mul
+             i16x8.add_sat_s
+             i16x8.add_sat_s
+             local.set 6
+ 
+             local.get 1
+             i32.const 128
+             i32.add
+             local.set 1
+             local.get 0
+             i32.const 16
+             i32.add
+             local.tee 0
+             local.get 2
+             i32.ne
+             br_if 0
+         end
+))`);
+ 
+  var ins = new WebAssembly.Instance(new WebAssembly.Module(realWorldOutput));
+  const output = wasmDis(ins.exports.test, {tier:"ion", asString:true}).replace(/^[0-9a-f]{8}  (?:[0-9a-f]{2} )+\n?\s+/gmi, "");
+  // Find two pmaddubsw+paddsw.
+  const re = /\bv?pmaddubsw[^\n]+\nv?paddsw /g;
+  assertEq(re.exec(output) != null, true);
+  assertEq(re.exec(output) != null, true);
+  assertEq(re.exec(output) == null, true);
+  // No leftover PMULL, PSLLW, or PSRAW.
+  assertEq(/pmullw|psllw|psraw/.test(output), false);
+}
diff --git a/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js
new file mode 100644
index 0000000000..bd59f30632
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js
@@ -0,0 +1,46 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test encoding of the all_true, and any_true operations.
+
+codegenTestX64_v128_i32(
+     [['v128.any_true', `
+66 0f 38 17 c0            ptest %xmm0, %xmm0
+0f 95 c0                  setnz %al
+0f b6 c0                  movzx %al, %eax`],
+     ['i8x16.all_true', `
+66 45 0f ef ff            pxor %xmm15, %xmm15
+66 44 0f 74 f8            pcmpeqb %xmm0, %xmm15
+66 45 0f 38 17 ff         ptest %xmm15, %xmm15
+0f 94 c0                  setz %al
+0f b6 c0                  movzx %al, %eax`],
+     ['i16x8.all_true', `
+66 45 0f ef ff            pxor %xmm15, %xmm15
+66 44 0f 75 f8            pcmpeqw %xmm0, %xmm15
+66 45 0f 38 17 ff         ptest %xmm15, %xmm15
+0f 94 c0                  setz %al
+0f b6 c0                  movzx %al, %eax`],
+     ['i32x4.all_true', `
+66 45 0f ef ff            pxor %xmm15, %xmm15
+66 44 0f 76 f8            pcmpeqd %xmm0, %xmm15
+66 45 0f 38 17 ff         ptest %xmm15, %xmm15
+0f 94 c0                  setz %al
+0f b6 c0                  movzx %al, %eax`],
+     ['i64x2.all_true', `
+66 45 0f ef ff            pxor %xmm15, %xmm15
+66 44 0f 38 29 f8         pcmpeqq %xmm0, %xmm15
+66 45 0f 38 17 ff         ptest %xmm15, %xmm15
+0f 94 c0                  setz %al
+0f b6 c0                  movzx %al, %eax`]], {}
+)
+
+// Utils.
+function codegenTestX64_v128_i32(inputs, options = {}) {
+     for ( let [op, expected] of inputs ) {
+         codegenTestX64_adhoc(wrap(options, `
+     (func (export "f") (param v128) (result i32)
+       (${op} (local.get 0)))`),
+                              'f',
+                              expected,
+                              options);
+     }
+ }
diff --git a/js/src/jit-test/tests/wasm/simd/select.js b/js/src/jit-test/tests/wasm/simd/select.js
new file mode 100644
index 0000000000..b3535d3039
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/select.js
@@ -0,0 +1,33 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+wasmAssert(`
+(module
+ (func $f (param i32) (result v128)
+  (select ;; no type
+   (v128.const i32x4 1 2 3 4)
+   (v128.const i32x4 4 3 2 1)
+   (local.get 0)
+  )
+ )
+ (export "" (func 0))
+)`, [
+    { type: 'v128', func: '$f', args: ['i32.const  0'], expected: 'i32x4 4 3 2 1' },
+    { type: 'v128', func: '$f', args: ['i32.const  1'], expected: 'i32x4 1 2 3 4' },
+    { type: 'v128', func: '$f', args: ['i32.const -1'], expected: 'i32x4 1 2 3 4' },
+], {});
+
+wasmAssert(`
+(module
+ (func $f (param i32) (result v128)
+  (select (result v128)
+   (v128.const i32x4 1 2 3 4)
+   (v128.const i32x4 4 3 2 1)
+   (local.get 0)
+  )
+ )
+ (export "" (func 0))
+)`, [
+    { type: 'v128', func: '$f', args: ['i32.const  0'], expected: 'i32x4 4 3 2 1' },
+    { type: 'v128', func: '$f', args: ['i32.const  1'], expected: 'i32x4 1 2 3 4' },
+    { type: 'v128', func: '$f', args: ['i32.const -1'], expected: 'i32x4 1 2 3 4' },
+], {});
diff --git a/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js
new file mode 100644
index 0000000000..9c9f4871d2
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js
@@ -0,0 +1,26 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that there are no extraneous moves for a constant integer SIMD shift
+// that can reuse its input for its output.  See README-codegen.md for general
+// information about this type of test case.
+//
+// There are test cases here for all codegen cases that include a potential move
+// to set up the operation, but not for all shift operations in general.
+
+codegenTestX64_v128xLITERAL_v128(
+    [['i8x16.shl', '(i32.const 2)', `
+66 0f fc c0               paddb %xmm0, %xmm0
+66 0f fc c0               paddb %xmm0, %xmm0`],
+     ['i16x8.shl', '(i32.const 2)', `66 0f 71 f0 02            psllw \\$0x02, %xmm0`],
+     ['i32x4.shl', '(i32.const 2)', `66 0f 72 f0 02            pslld \\$0x02, %xmm0`],
+     ['i64x2.shl', '(i32.const 2)', `66 0f 73 f0 02            psllq \\$0x02, %xmm0`],
+     ['i8x16.shr_u', '(i32.const 2)', `
+66 0f db 05 ${RIPRADDR}   pandx ${RIPR}, %xmm0
+66 0f 71 d0 02            psrlw \\$0x02, %xmm0`],
+     ['i16x8.shr_s', '(i32.const 2)', `66 0f 71 e0 02            psraw \\$0x02, %xmm0`],
+     ['i16x8.shr_u', '(i32.const 2)', `66 0f 71 d0 02            psrlw \\$0x02, %xmm0`],
+     ['i32x4.shr_s', '(i32.const 2)', `66 0f 72 e0 02            psrad \\$0x02, %xmm0`],
+     ['i32x4.shr_u', '(i32.const 2)', `66 0f 72 d0 02            psrld \\$0x02, %xmm0`],
+     ['i64x2.shr_u', '(i32.const 2)', `66 0f 73 d0 02            psrlq \\$0x02, %xmm0`]] );
+
+
diff --git a/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js
new file mode 100644
index 0000000000..b9d0cc0e82
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js
@@ -0,0 +1,88 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or fixups for SIMD shuffle
+// operations.  See README-codegen.md for general information about this type of
+// test case.
+
+codegenTestX64_v128xv128_v128([
+     // Identity op on first argument should generate no code
+    ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15',
+     ''],
+
+     // Identity op on second argument should generate a move
+    ['i8x16.shuffle 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31',
+     `66 0f 6f c1               movdqa %xmm1, %xmm0`],
+
+     // Broadcast a byte from first argument
+    ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5',
+     `
+66 0f 60 c0               punpcklbw %xmm0, %xmm0
+f3 0f 70 c0 55            pshufhw \\$0x55, %xmm0, %xmm0
+66 0f 70 c0 aa            pshufd \\$0xAA, %xmm0, %xmm0`],
+
+     // Broadcast a word from first argument
+    ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5',
+     `
+f2 0f 70 c0 aa            pshuflw \\$0xAA, %xmm0, %xmm0
+66 0f 70 c0 00            pshufd \\$0x00, %xmm0, %xmm0`],
+
+     // Permute bytes
+    ['i8x16.shuffle 2 1 4 3 6 5 8 7 10 9 12 11 14 13 0 15',
+`
+66 0f 38 00 05 ${RIPRADDR} pshufbx ${RIPR}, %xmm0`],
+
+     // Permute words
+    ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13',
+`
+f2 0f 70 c0 b1            pshuflw \\$0xB1, %xmm0, %xmm0
+f3 0f 70 c0 b1            pshufhw \\$0xB1, %xmm0, %xmm0`],
+
+     // Permute doublewords
+    ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11',
+     `66 0f 70 c0 b1            pshufd \\$0xB1, %xmm0, %xmm0`],
+
+     // Rotate right
+    ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12',
+     `66 0f 3a 0f c0 0d         palignr \\$0x0D, %xmm0, %xmm0`],
+
+     // General shuffle + blend.  The initial movdqa to scratch is unavoidable
+     // unless we can convince the compiler that it's OK to destroy xmm1.
+    ['i8x16.shuffle 15 29 0 1 2 1 2 0 3 4 7 8 16 8 17 9',
+`
+66 44 0f 6f f9                movdqa %xmm1, %xmm15
+66 44 0f 38 00 3d ${RIPRADDR} pshufbx ${RIPR}, %xmm15
+66 0f 38 00 05 ${RIPRADDR}    pshufbx ${RIPR}, %xmm0
+66 41 0f eb c7                por %xmm15, %xmm0`]]);
+
+codegenTestX64_v128xLITERAL_v128(
+    [// Shift left bytes, shifting in zeroes
+     //
+     // Remember the low-order bytes are at the "right" end
+     //
+     // The pxor is a code generation bug: the operand is unused, and no
+     // code should need to be generated for it, and no register should
+     // be allocated to it.  The lowering does not use that operand, but
+     // code generation still touches it.
+     ['i8x16.shuffle 16 16 16 0 1 2 3 4 5 6 7 8 9 10 11 12',
+      '(v128.const i32x4 0 0 0 0)',
+`
+66 0f 73 f8 03            pslldq \\$0x03, %xmm0`],
+
+     // Shift right bytes, shifting in zeroes.  See above.
+     ['i8x16.shuffle 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18',
+      '(v128.const i32x4 0 0 0 0)',
+`
+66 0f 73 d8 03            psrldq \\$0x03, %xmm0`]]);
+
+// SSE4.1 PBLENDVB instruction is using XMM0, checking if blend
+// operation generated as expected.
+codegenTestX64_adhoc(
+     `(func (export "f") (param v128 v128 v128 v128) (result v128)
+        (i8x16.shuffle 0 17 2 3 4 5 6 7 24 25 26 11 12 13 30 15
+          (local.get 2)(local.get 3)))`,
+     'f',
+`
+66 0f 6f ca               movdqa %xmm2, %xmm1
+66 0f 6f 05 ${RIPRADDR}   movdqax ${RIPR}, %xmm0
+66 0f 38 10 cb            pblendvb %xmm3, %xmm1
+66 0f 6f c1               movdqa %xmm1, %xmm0`);
diff --git a/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js b/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js
new file mode 100644
index 0000000000..00daceb438
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js
@@ -0,0 +1,38 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Cloned from ad-hack.js but kept separate because it may have to be disabled
+// on some devices until bugs are fixed.
+
+// Bug 1666747 - partially OOB stores are not handled correctly on ARM and ARM64.
+// The simulators don't implement the correct semantics anyhow, so when the bug
+// is fixed in the code generator they must remain excluded here.
+var conf = getBuildConfiguration();
+if (conf.arm64 || conf["arm64-simulator"] || conf.arm || conf["arm-simulator"])
+    quit(0);
+
+function get(arr, loc, len) {
+    let res = [];
+    for ( let i=0; i < len; i++ ) {
+        res.push(arr[loc+i]);
+    }
+    return res;
+}
+
+for ( let offset of iota(16) ) {
+    var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "f") (param $loc i32)
+      (v128.store offset=${offset} (local.get $loc) (v128.const i32x4 ${1+offset} 2 3 ${4+offset*2}))))`);
+
+    // OOB write should trap
+    assertErrorMessage(() => ins.exports.f(65536-15),
+                       WebAssembly.RuntimeError,
+                       /index out of bounds/)
+
+    // Ensure that OOB writes don't write anything.
+    let start = 65536 - 15 + offset;
+    let legalBytes = 65536 - start;
+    var mem8 = new Uint8Array(ins.exports.mem.buffer);
+    assertSame(get(mem8, start, legalBytes), iota(legalBytes).map((_) => 0));
+}
diff --git a/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js
new file mode 100644
index 0000000000..ce1d7adb12
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js
@@ -0,0 +1,29 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or other instructions for splat and
+// other splat-like operations that can reuse its input for its output and/or
+// has a specializable code path.  See README-codegen.md for general information
+// about this type of test case.
+
+codegenTestX64_PTYPE_v128(
+    [['f32x4.splat', 'f32', `0f c6 c0 00               shufps \\$0x00, %xmm0, %xmm0`],
+     ['f64x2.splat', 'f64', `f2 0f 12 c0               movddup %xmm0, %xmm0`]] , {log:true});
+
+// Skip these on Win64 because the ABI differs and there's a different parameter
+// register, this changes not just the name slightly but the binary encoding in
+// larger ways.
+
+if (!getBuildConfiguration().windows) {
+    codegenTestX64_PTYPE_v128(
+        [['v128.load32_splat', 'i32', `
+f3 41 0f 10 04 3f         movssl \\(%r15,%rdi,1\\), %xmm0
+0f c6 c0 00               shufps \\$0x00, %xmm0, %xmm0`],
+         ['v128.load64_splat', 'i32', `f2 41 0f 12 04 3f         movddupq \\(%r15,%rdi,1\\), %xmm0`],
+         ['v128.load8x8_s',    'i32', `66 41 0f 38 20 04 3f      pmovsxbwq \\(%r15,%rdi,1\\), %xmm0`],
+         ['v128.load8x8_u',    'i32', `66 41 0f 38 30 04 3f      pmovzxbwq \\(%r15,%rdi,1\\), %xmm0`],
+         ['v128.load16x4_s',   'i32', `66 41 0f 38 23 04 3f      pmovsxwdq \\(%r15,%rdi,1\\), %xmm0`],
+         ['v128.load16x4_u',   'i32', `66 41 0f 38 33 04 3f      pmovzxwdq \\(%r15,%rdi,1\\), %xmm0`],
+         ['v128.load32x2_s',   'i32', `66 41 0f 38 25 04 3f      pmovsxdqq \\(%r15,%rdi,1\\), %xmm0`],
+         ['v128.load32x2_u',   'i32', `66 41 0f 38 35 04 3f      pmovzxdqq \\(%r15,%rdi,1\\), %xmm0`]],
+        {memory:1});
+}
diff --git a/js/src/jit-test/tests/wasm/simd/validation.js b/js/src/jit-test/tests/wasm/simd/validation.js
new file mode 100644
index 0000000000..46b8df620f
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/validation.js
@@ -0,0 +1,368 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+function testValid(code) {
+    assertEq(WebAssembly.validate(wasmTextToBinary(code)), true);
+}
+
+function testInvalid(code) {
+    assertEq(WebAssembly.validate(wasmTextToBinary(code)), false);
+}
+
+// v128 -> v128
+
+for (let op of [
+    'i8x16.neg',
+    'i8x16.abs',
+    'i16x8.neg',
+    'i16x8.abs',
+    'i16x8.extend_low_i8x16_s',
+    'i16x8.extend_high_i8x16_s',
+    'i16x8.extend_low_i8x16_u',
+    'i16x8.extend_high_i8x16_u',
+    'i32x4.neg',
+    'i32x4.abs',
+    'i32x4.extend_low_i16x8_s',
+    'i32x4.extend_high_i16x8_s',
+    'i32x4.extend_low_i16x8_u',
+    'i32x4.extend_high_i16x8_u',
+    'i32x4.trunc_sat_f32x4_s',
+    'i32x4.trunc_sat_f32x4_u',
+    'i64x2.neg',
+    'f32x4.abs',
+    'f32x4.neg',
+    'f32x4.sqrt',
+    'f32x4.convert_i32x4_s',
+    'f32x4.convert_i32x4_s',
+    'f64x2.abs',
+    'f64x2.neg',
+    'f64x2.sqrt',
+    'v128.not'])
+{
+    testValid(`(module
+                 (func (param v128) (result v128)
+                   (${op} (local.get 0))))`);
+}
+
+for (let [prefix, result, suffix] of [['i8x16', 'i32', '_s'],
+                                      ['i8x16', 'i32', '_u'],
+                                      ['i16x8', 'i32', '_s'],
+                                      ['i16x8', 'i32', '_u'],
+                                      ['i32x4', 'i32', ''],
+                                      ['i64x2', 'i64', ''],
+                                      ['f32x4', 'f32', ''],
+                                      ['f64x2', 'f64', '']])
+{
+    testValid(`(module
+                 (func (param v128) (result ${result})
+                   (${prefix}.extract_lane${suffix} 1 (local.get 0))))`);
+}
+
+// The wat parser accepts small out-of-range lane indices, but they must be
+// caught in validation.
+
+testInvalid(
+    `(module
+       (func (param v128) (result i32)
+         (i8x16.extract_lane_u 16 (local.get 0))))`);
+
+// (v128, v128) -> v128
+
+for (let op of [
+    'i8x16.eq',
+    'i8x16.ne',
+    'i8x16.lt_s',
+    'i8x16.lt_u',
+    'i8x16.gt_s',
+    'i8x16.gt_u',
+    'i8x16.le_s',
+    'i8x16.le_u',
+    'i8x16.ge_s',
+    'i8x16.ge_u',
+    'i16x8.eq',
+    'i16x8.ne',
+    'i16x8.lt_s',
+    'i16x8.lt_u',
+    'i16x8.gt_s',
+    'i16x8.gt_u',
+    'i16x8.le_s',
+    'i16x8.le_u',
+    'i16x8.ge_s',
+    'i16x8.ge_u',
+    'i32x4.eq',
+    'i32x4.ne',
+    'i32x4.lt_s',
+    'i32x4.lt_u',
+    'i32x4.gt_s',
+    'i32x4.gt_u',
+    'i32x4.le_s',
+    'i32x4.le_u',
+    'i32x4.ge_s',
+    'i32x4.ge_u',
+    'f32x4.eq',
+    'f32x4.ne',
+    'f32x4.lt',
+    'f32x4.gt',
+    'f32x4.le',
+    'f32x4.ge',
+    'f64x2.eq',
+    'f64x2.ne',
+    'f64x2.lt',
+    'f64x2.gt',
+    'f64x2.le',
+    'f64x2.ge',
+    'v128.and',
+    'v128.or',
+    'v128.xor',
+    'v128.andnot',
+    'i8x16.avgr_u',
+    'i16x8.avgr_u',
+    'i8x16.add',
+    'i8x16.add_sat_s',
+    'i8x16.add_sat_u',
+    'i8x16.sub',
+    'i8x16.sub_sat_s',
+    'i8x16.sub_sat_u',
+    'i8x16.min_s',
+    'i8x16.max_s',
+    'i8x16.min_u',
+    'i8x16.max_u',
+    'i16x8.add',
+    'i16x8.add_sat_s',
+    'i16x8.add_sat_u',
+    'i16x8.sub',
+    'i16x8.sub_sat_s',
+    'i16x8.sub_sat_u',
+    'i16x8.mul',
+    'i16x8.min_s',
+    'i16x8.max_s',
+    'i16x8.min_u',
+    'i16x8.max_u',
+    'i32x4.add',
+    'i32x4.sub',
+    'i32x4.mul',
+    'i32x4.min_s',
+    'i32x4.max_s',
+    'i32x4.min_u',
+    'i32x4.max_u',
+    'i64x2.add',
+    'i64x2.sub',
+    'i64x2.mul',
+    'f32x4.add',
+    'f32x4.sub',
+    'f32x4.mul',
+    'f32x4.div',
+    'f32x4.min',
+    'f32x4.max',
+    'f64x2.add',
+    'f64x2.sub',
+    'f64x2.mul',
+    'f64x2.div',
+    'f64x2.min',
+    'f64x2.max',
+    'i8x16.narrow_i16x8_s',
+    'i8x16.narrow_i16x8_u',
+    'i16x8.narrow_i32x4_s',
+    'i16x8.narrow_i32x4_u',
+    'i8x16.swizzle'])
+{
+    testValid(`(module
+                 (func (param v128) (param v128) (result v128)
+                   (${op} (local.get 0) (local.get 1))))`);
+}
+
+testValid(`(module
+             (func (param v128) (param v128) (result v128)
+               (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23 (local.get 0) (local.get 1))))`);
+
+assertErrorMessage(() => testValid(
+    `(module
+       (func (param v128) (param v128) (result v128)
+         (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 (local.get 0) (local.get 1))))`),
+                   SyntaxError,
+                   /expected a u8/);
+
+// (v128, i32) -> v128
+
+for (let op of [
+    'i8x16.shl',
+    'i8x16.shr_s',
+    'i8x16.shr_u',
+    'i16x8.shl',
+    'i16x8.shr_s',
+    'i16x8.shr_u',
+    'i32x4.shl',
+    'i32x4.shr_s',
+    'i32x4.shr_u',
+    'i64x2.shl',
+    'i64x2.shr_s',
+    'i64x2.shr_u'])
+{
+    testValid(`(module
+                 (func (param v128) (param i32) (result v128)
+                   (${op} (local.get 0) (local.get 1))))`);
+}
+
+// v128 -> i32
+
+for (let op of [
+    'v128.any_true',
+    'i8x16.all_true',
+    'i16x8.all_true',
+    'i32x4.all_true',
+    'i8x16.bitmask',
+    'i16x8.bitmask',
+    'i32x4.bitmask'])
+{
+    testValid(`(module
+                 (func (param v128) (result i32)
+                   (${op} (local.get 0))))`);
+}
+
+// T -> V128
+
+for (let [op, input] of [
+    ['i8x16.splat', 'i32'],
+    ['i16x8.splat', 'i32'],
+    ['i32x4.splat', 'i32'],
+    ['i64x2.splat', 'i64'],
+    ['f32x4.splat', 'f32'],
+    ['f64x2.splat', 'f64']])
+{
+    testValid(`(module
+                 (func (param ${input}) (result v128)
+                   (${op} (local.get 0))))`);
+}
+
+// i32 -> v128
+
+for (let op of [
+    'v128.load',
+    'v128.load8_splat',
+    'v128.load16_splat',
+    'v128.load32_splat',
+    'v128.load64_splat',
+    'v128.load8x8_s',
+    'v128.load8x8_u',
+    'v128.load16x4_s',
+    'v128.load16x4_u',
+    'v128.load32x2_s',
+    'v128.load32x2_u'])
+{
+    testValid(`(module
+                 (memory 1 1)
+                 (func (param i32) (result v128)
+                   (${op} (local.get 0))))`);
+}
+
+testValid(`(module
+             (func (result v128)
+               (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
+             (func (result v128)
+               (v128.const i16x8 0 1 2 3 4 5 6 7))
+             (func (result v128)
+               (v128.const i32x4 0 1 2 3))
+             (func (result v128)
+               (v128.const i64x2 0 1))
+             (func (result v128)
+               (v128.const f32x4 0 1 2 3))
+             (func (result v128)
+               (v128.const f32x4 0.5 1.5 2.5 3.5))
+             (func (result v128)
+               (v128.const f64x2 0 1))
+             (func (result v128)
+               (v128.const f64x2 0.5 1.5)))`);
+
+assertErrorMessage(() => testValid(
+    `(module
+       (func (result v128)
+         (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)))`),
+                   SyntaxError,
+                   /expected a i8/);
+
+assertErrorMessage(() => testValid(
+    `(module
+       (func (result v128)
+         (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 256 15)))`),
+                   SyntaxError,
+                   /invalid i8 number/);
+
+assertErrorMessage(() => testValid(
+    `(module
+       (func (result v128)
+         (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 3.14 15)))`),
+                   SyntaxError,
+                   /expected a i8/);
+
+assertErrorMessage(() => testValid(
+    `(module
+       (func (result v128)
+         (v128.const f32x4 0.5 1.5 2.5))`),
+                   SyntaxError,
+                   /expected a float/);
+
+assertErrorMessage(() => testValid(
+    `(module
+       (func (result v128)
+         (v128.const i8x8 0 1 2 3 4 5 6 7)))`),
+                   SyntaxError,
+                   /expected one of/);
+
+// v128 -> ()
+
+testValid(`(module
+             (memory 1 1)
+             (func (param i32) (param v128)
+               (v128.store (local.get 0) (local.get 1))))`);
+
+// (v128, v128, v128) -> v128
+
+testValid(`(module
+             (func (param v128) (param v128) (param v128) (result v128)
+               (v128.bitselect (local.get 0) (local.get 1) (local.get 2))))`);
+
+// (v128, t) -> v128
+
+for (let [prefix, input] of [['i8x16', 'i32'],
+                             ['i16x8', 'i32'],
+                             ['i32x4', 'i32'],
+                             ['i64x2', 'i64'],
+                             ['f32x4', 'f32'],
+                             ['f64x2', 'f64']])
+{
+    testValid(`(module
+                 (func (param v128) (param ${input}) (result v128)
+                   (${prefix}.replace_lane 1 (local.get 0) (local.get 1))))`);
+}
+
+testInvalid(
+    `(module
+       (func (param v128) (param i32) (result v128)
+         (i8x16.replace_lane 16 (local.get 0) (local.get 1))))`);
+
+// Global variables
+
+testValid(`(module
+             (global $g (mut v128) (v128.const f32x4 1 2 3 4)))`);
+
+testValid(`(module
+             (global $g (import "m" "g") v128)
+             (global $h (mut v128) (global.get $g)))`);
+
+testValid(`(module
+             (global $g (export "g") v128 (v128.const f32x4 1 2 3 4)))`);
+
+testValid(`(module
+             (global $g (export "g") (mut v128) (v128.const f32x4 1 2 3 4)))`);
+
+// Imports, exports, calls
+
+testValid(`(module
+             (import "m" "g" (func (param v128) (result v128)))
+             (func (export "f") (param v128) (result v128)
+               (f64x2.add (local.get 0) (v128.const f64x2 1 2))))`);
+
+testValid(`(module
+             (func $f (param v128) (result v128)
+               (i8x16.neg (local.get 0)))
+             (func $g (export "g") (param v128) (result v128)
+               (call $f (local.get 0))))`);
-- 
cgit v1.2.3