summaryrefslogtreecommitdiffstats
path: root/js/src/jit-test/tests/wasm/simd
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/jit-test/tests/wasm/simd')
-rw-r--r--js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js334
-rw-r--r--js/src/jit-test/tests/wasm/simd/ad-hack-extra.js697
-rw-r--r--js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js211
-rw-r--r--js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js7
-rw-r--r--js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js7
-rw-r--r--js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js7
-rw-r--r--js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js122
-rw-r--r--js/src/jit-test/tests/wasm/simd/ad-hack.js1747
-rw-r--r--js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js584
-rw-r--r--js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js111
-rw-r--r--js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js255
-rw-r--r--js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js20
-rw-r--r--js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js45
-rw-r--r--js/src/jit-test/tests/wasm/simd/cmp-bitselect.js107
-rw-r--r--js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js77
-rw-r--r--js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js109
-rw-r--r--js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js28
-rw-r--r--js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js27
-rw-r--r--js/src/jit-test/tests/wasm/simd/debug-bug1644759.js20
-rw-r--r--js/src/jit-test/tests/wasm/simd/directives.txt1
-rw-r--r--js/src/jit-test/tests/wasm/simd/disabled.js28
-rw-r--r--js/src/jit-test/tests/wasm/simd/experimental.js411
-rw-r--r--js/src/jit-test/tests/wasm/simd/ion-analysis.js902
-rw-r--r--js/src/jit-test/tests/wasm/simd/ion-bug1641973.js13
-rw-r--r--js/src/jit-test/tests/wasm/simd/ion-bug1688262.js22
-rw-r--r--js/src/jit-test/tests/wasm/simd/ion-bug1688713.js24
-rw-r--r--js/src/jit-test/tests/wasm/simd/js-api.js130
-rw-r--r--js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js34
-rw-r--r--js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js38
-rw-r--r--js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js154
-rw-r--r--js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js46
-rw-r--r--js/src/jit-test/tests/wasm/simd/select.js33
-rw-r--r--js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js26
-rw-r--r--js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js88
-rw-r--r--js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js38
-rw-r--r--js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js29
-rw-r--r--js/src/jit-test/tests/wasm/simd/validation.js368
37 files changed, 6900 insertions, 0 deletions
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js b/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js
new file mode 100644
index 0000000000..dd1443a1e7
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js
@@ -0,0 +1,334 @@
+// |jit-test| skip-if: true
+
+// Common code to test simple binary operators. See runSimpleBinopTest below.
+
+function expandConstantBinopInputs(op, memtype, inputs) {
+ let s = '';
+ let ident = 0;
+ for ( let [a, b] of inputs ) {
+ let constlhs = `${memtype.layoutName} ${a.map(jsValueToWasmName).join(' ')}`;
+ let constrhs = `${memtype.layoutName} ${b.map(jsValueToWasmName).join(' ')}`;
+ s += `
+ ;; lhs is constant, rhs is variable
+ (func (export "run_constlhs${ident}")
+ (v128.store (i32.const 0)
+ (call $doit_constlhs${ident} (v128.const ${constrhs}))))
+ (func $doit_constlhs${ident} (param $b v128) (result v128)
+ (${op} (v128.const ${constlhs}) (local.get $b)))
+
+ ;; rhs is constant, lhs is variable
+ (func (export "run_constrhs${ident}")
+ (v128.store (i32.const 0)
+ (call $doit_constrhs${ident} (v128.const ${constlhs}))))
+ (func $doit_constrhs${ident} (param $a v128) (result v128)
+ (${op} (local.get $a) (v128.const ${constrhs})))
+
+ ;; both operands are constant
+ (func (export "run_constboth${ident}")
+ (v128.store (i32.const 0)
+ (call $doit_constboth${ident})))
+ (func $doit_constboth${ident} (result v128)
+ (${op} (v128.const ${constlhs}) (v128.const ${constrhs})))`
+ ident++;
+ }
+ return s;
+}
+
+function insAndMemBinop(op, memtype, resultmemtype, inputs) {
+ var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+
+ ;; both arguments are variable
+ (func (export "run")
+ (v128.store (i32.const 0)
+ (call $doit (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $doit (param $a v128) (param $b v128) (result v128)
+ (${op} (local.get $a) (local.get $b)))
+
+ ${expandConstantBinopInputs(op, memtype, inputs)})`);
+ var mem = new memtype(ins.exports.mem.buffer);
+ var resultmem = !resultmemtype || memtype == resultmemtype ? mem : new resultmemtype(ins.exports.mem.buffer);
+ return [ins, mem, resultmem];
+}
+
+function add(bits) { return (x, y) => sign_extend(x+y, bits) }
+function add64(x, y) { return sign_extend(BigInt(x)+BigInt(y), 64) }
+function sub(bits) { return (x, y) => sign_extend(x-y, bits) }
+function sub64(x, y) { return sign_extend(BigInt(x)-BigInt(y), 64) }
+// Even 32-bit multiply can overflow a Number, so always use BigInt
+function mul(bits) { return (x, y) => sign_extend(BigInt(x)*BigInt(y), bits) }
+function div(x, y) { return x/y }
+function min(x, y) { return x < y ? x : y }
+function max(x, y) { return x > y ? x : y }
+function and(x, y) { return zero_extend(x&y, 8) }
+function or(x, y) { return zero_extend(x|y, 8) }
+function xor(x, y) { return zero_extend(x^y, 8) }
+function andnot(x, y) { return zero_extend(x&~y, 8) }
+function avgr(x, y) { return (x + y + 1) >> 1; }
+function eq(truth) { return (x,y) => x==y ? truth : 0 }
+function ne(truth) { return (x,y) => x!=y ? truth : 0 }
+function lt(truth) { return (x, y) => x < y ? truth : 0 }
+function gt(truth) { return (x, y) => x > y ? truth : 0 }
+function le(truth) { return (x, y) => x <= y ? truth : 0 }
+function ge(truth) { return (x, y) => x >= y ? truth : 0 }
+
+function fadd(x, y) { return Math.fround(x+y) }
+function fsub(x, y) { return Math.fround(x-y) }
+function fmul(x, y) { return Math.fround(x*y) }
+function fdiv(x, y) { return Math.fround(x/y) }
+function fmin(x, y) {
+ if (x == y) return x;
+ if (x < y) return x;
+ if (y < x) return y;
+ if (isNaN(x)) return x;
+ return y;
+}
+function fmax(x, y) {
+ if (x == y) return x;
+ if (x > y) return x;
+ if (y > x) return y;
+ if (isNaN(x)) return x;
+ return y;
+}
+function dadd(x, y) { return x+y }
+function dsub(x, y) { return x-y }
+function dmul(x, y) { return x*y }
+function ddiv(x, y) { return x/y }
+var dmax = fmax;
+var dmin = fmin;
+
+function op_sat_s(bits, op) {
+ return (x, y) => {
+ return signed_saturate(op(sign_extend(x, bits),
+ sign_extend(y, bits)),
+ bits);
+ }
+}
+
+function op_sat_u(bits, op) {
+ return (x, y) => {
+ return unsigned_saturate(op(zero_extend(x, bits),
+ zero_extend(y, bits)),
+ bits);
+ }
+}
+
+function add_sat_s(bits) {
+ return op_sat_s(bits, (x,y) => x+y);
+}
+function sub_sat_s(bits) {
+ return op_sat_s(bits, (x,y) => x-y);
+}
+function add_sat_u(bits) {
+ return op_sat_u(bits, (x,y) => x+y);
+}
+function sub_sat_u(bits) {
+ return op_sat_u(bits, (x,y) => x-y);
+}
+
+function max_s(bits) {
+ return (x, y) => {
+ return sign_extend(max(sign_extend(x, bits),
+ sign_extend(y, bits)),
+ bits);
+ }
+}
+
+function min_s(bits) {
+ return (x, y) => {
+ return sign_extend(min(sign_extend(x, bits),
+ sign_extend(y, bits)),
+ bits);
+ }
+}
+
+function max_u(bits) {
+ return (x, y) => {
+ return max(zero_extend(x, bits),
+ zero_extend(y, bits));
+ }
+}
+
+function min_u(bits) {
+ return (x, y) => {
+ return min(zero_extend(x, bits),
+ zero_extend(y, bits));
+ }
+}
+
+function pmin(x, y) { return y < x ? y : x }
+function pmax(x, y) { return x < y ? y : x }
+
+assertEq(max_s(8)(1, 2), 2);
+assertEq(max_s(8)(1, 128), 1);
+assertEq(min_s(8)(1, 2), 1);
+assertEq(min_s(8)(1, 128), -128);
+assertEq(max_u(8)(1, 2), 2);
+assertEq(max_u(8)(1, 128), 128);
+assertEq(min_u(8)(1, 2), 1);
+assertEq(min_u(8)(1, 128), 1);
+
+var binopTests =
+ [['i8x16.add', Int8Array, add(8)],
+ ['i16x8.add', Int16Array, add(16)],
+ ['i32x4.add', Int32Array, add(32)],
+ ['i64x2.add', BigInt64Array, add64],
+ ['i8x16.sub', Int8Array, sub(8)],
+ ['i16x8.sub', Int16Array, sub(16)],
+ ['i32x4.sub', Int32Array, sub(32)],
+ ['i64x2.sub', BigInt64Array, sub64],
+ ['i8x16.add_sat_s', Int8Array, add_sat_s(8)],
+ ['i8x16.add_sat_u', Uint8Array, add_sat_u(8)],
+ ['i16x8.add_sat_s', Int16Array, add_sat_s(16)],
+ ['i16x8.add_sat_u', Uint16Array, add_sat_u(16)],
+ ['i8x16.sub_sat_s', Int8Array, sub_sat_s(8)],
+ ['i8x16.sub_sat_u', Uint8Array, sub_sat_u(8)],
+ ['i16x8.sub_sat_s', Int16Array, sub_sat_s(16)],
+ ['i16x8.sub_sat_u', Uint16Array, sub_sat_u(16)],
+ ['i16x8.mul', Int16Array, mul(16)],
+ ['i32x4.mul', Int32Array, mul(32)],
+ ['i64x2.mul', BigInt64Array, mul(64)],
+ ['i8x16.avgr_u', Uint8Array, avgr],
+ ['i16x8.avgr_u', Uint16Array, avgr],
+ ['i8x16.max_s', Int8Array, max_s(8)],
+ ['i8x16.max_u', Uint8Array, max_u(8)],
+ ['i8x16.min_s', Int8Array, min_s(8)],
+ ['i8x16.min_u', Uint8Array, min_u(8)],
+ ['i16x8.max_s', Int16Array, max_s(16)],
+ ['i16x8.max_u', Uint16Array, max_u(16)],
+ ['i16x8.min_s', Int16Array, min_s(16)],
+ ['i16x8.min_u', Uint16Array, min_u(16)],
+ ['i32x4.max_s', Int32Array, max_s(32)],
+ ['i32x4.max_u', Uint32Array, max_u(32)],
+ ['i32x4.min_s', Int32Array, min_s(32)],
+ ['i32x4.min_u', Uint32Array, min_u(32)],
+ ['v128.and', Uint8Array, and],
+ ['v128.or', Uint8Array, or],
+ ['v128.xor', Uint8Array, xor],
+ ['v128.andnot', Uint8Array, andnot],
+ ['f32x4.add', Float32Array, fadd],
+ ['f32x4.sub', Float32Array, fsub],
+ ['f32x4.mul', Float32Array, fmul],
+ ['f32x4.div', Float32Array, fdiv],
+ ['f32x4.min', Float32Array, fmin],
+ ['f32x4.max', Float32Array, fmax],
+ ['f64x2.add', Float64Array, dadd],
+ ['f64x2.sub', Float64Array, dsub],
+ ['f64x2.mul', Float64Array, dmul],
+ ['f64x2.div', Float64Array, ddiv],
+ ['f64x2.min', Float64Array, dmin],
+ ['f64x2.max', Float64Array, dmax],
+ ['i8x16.eq', Int8Array, eq(-1)],
+ ['i8x16.ne', Int8Array, ne(-1)],
+ ['i8x16.lt_s', Int8Array, lt(-1)],
+ ['i8x16.gt_s', Int8Array, gt(-1)],
+ ['i8x16.le_s', Int8Array, le(-1)],
+ ['i8x16.ge_s', Int8Array, ge(-1)],
+ ['i8x16.gt_u', Uint8Array, gt(0xFF)],
+ ['i8x16.ge_u', Uint8Array, ge(0xFF)],
+ ['i8x16.lt_u', Uint8Array, lt(0xFF)],
+ ['i8x16.le_u', Uint8Array, le(0xFF)],
+ ['i16x8.eq', Int16Array, eq(-1)],
+ ['i16x8.ne', Int16Array, ne(-1)],
+ ['i16x8.lt_s', Int16Array, lt(-1)],
+ ['i16x8.gt_s', Int16Array, gt(-1)],
+ ['i16x8.le_s', Int16Array, le(-1)],
+ ['i16x8.ge_s', Int16Array, ge(-1)],
+ ['i16x8.gt_u', Uint16Array, gt(0xFFFF)],
+ ['i16x8.ge_u', Uint16Array, ge(0xFFFF)],
+ ['i16x8.lt_u', Uint16Array, lt(0xFFFF)],
+ ['i16x8.le_u', Uint16Array, le(0xFFFF)],
+ ['i32x4.eq', Int32Array, eq(-1)],
+ ['i32x4.ne', Int32Array, ne(-1)],
+ ['i32x4.lt_s', Int32Array, lt(-1)],
+ ['i32x4.gt_s', Int32Array, gt(-1)],
+ ['i32x4.le_s', Int32Array, le(-1)],
+ ['i32x4.ge_s', Int32Array, ge(-1)],
+ ['i32x4.gt_u', Uint32Array, gt(0xFFFFFFFF)],
+ ['i32x4.ge_u', Uint32Array, ge(0xFFFFFFFF)],
+ ['i32x4.lt_u', Uint32Array, lt(0xFFFFFFFF)],
+ ['i32x4.le_u', Uint32Array, le(0xFFFFFFFF)],
+ ['f32x4.eq', Float32Array, eq(-1), Int32Array],
+ ['f32x4.ne', Float32Array, ne(-1), Int32Array],
+ ['f32x4.lt', Float32Array, lt(-1), Int32Array],
+ ['f32x4.gt', Float32Array, gt(-1), Int32Array],
+ ['f32x4.le', Float32Array, le(-1), Int32Array],
+ ['f32x4.ge', Float32Array, ge(-1), Int32Array],
+ ['f64x2.eq', Float64Array, eq(-1), BigInt64Array],
+ ['f64x2.ne', Float64Array, ne(-1), BigInt64Array],
+ ['f64x2.lt', Float64Array, lt(-1), BigInt64Array],
+ ['f64x2.gt', Float64Array, gt(-1), BigInt64Array],
+ ['f64x2.le', Float64Array, le(-1), BigInt64Array],
+ ['f64x2.ge', Float64Array, ge(-1), BigInt64Array],
+ ['f32x4.pmin', Float32Array, pmin],
+ ['f32x4.pmax', Float32Array, pmax],
+ ['f64x2.pmin', Float64Array, pmin],
+ ['f64x2.pmax', Float64Array, pmax]]
+
+// Run v128 x v128 -> v128 tests. Inputs are taken from the common input sets,
+// placed in memory, the test is run, and the result is extracted and checked.
+//
+// Runs tests with both operands as variables, either as constant, or both as
+// constant. Also checks NaN behavior when appropriate.
+//
+// All runners that call this should use the same value for `ofParts` and should
+// pass different values for `part`, up to `ofParts` - 1.
+
+function runSimpleBinopTest(part, ofParts) {
+ let partSize = Math.ceil(binopTests.length / ofParts);
+ let start = part * partSize;
+ let end = Math.min((part + 1) * partSize, binopTests.length);
+ for ( let [op, memtype, rop, resultmemtype] of binopTests.slice(start, end) ) {
+ let inputs = cross(memtype.inputs);
+ let len = 16/memtype.BYTES_PER_ELEMENT;
+ let xs = iota(len);
+ let zero = xs.map(_ => 0);
+ let [ins, mem, resultmem] = insAndMemBinop(op, memtype, resultmemtype, inputs);
+ let bitsForF32 = memtype == Float32Array ? new Uint32Array(mem.buffer) : null;
+ let bitsForF64 = memtype == Float64Array ? new BigInt64Array(mem.buffer) : null;
+
+ function testIt(a,b,r) {
+ set(mem, len, a);
+ set(mem, len*2, b);
+ ins.exports.run();
+ assertSame(get(resultmem, 0, len), r);
+
+ // Test signalling NaN superficially by replacing QNaN inputs with SNaN
+ if (bitsForF32 != null && (a.some(isNaN) || b.some(isNaN))) {
+ a.forEach((x, i) => { if (isNaN(x)) { bitsForF32[len+i] = 0x7FA0_0000; } });
+ b.forEach((x, i) => { if (isNaN(x)) { bitsForF32[(len*2)+i] = 0x7FA0_0000; } });
+ ins.exports.run();
+ assertSame(get(resultmem, 0, len), r);
+ }
+ if (bitsForF64 != null && (a.some(isNaN) || b.some(isNaN))) {
+ a.forEach((x, i) => { if (isNaN(x)) { bitsForF64[len+i] = 0x7FF4_0000_0000_0000n; } });
+ b.forEach((x, i) => { if (isNaN(x)) { bitsForF64[(len*2)+i] = 0x7FF4_0000_0000_0000n; } });
+ ins.exports.run();
+ assertSame(get(resultmem, 0, len), r);
+ }
+ }
+
+ function testConstIt(i,r) {
+ set(resultmem, 0, zero);
+ ins.exports["run_constlhs" + i]();
+ assertSame(get(resultmem, 0, len), r);
+
+ set(resultmem, 0, zero);
+ ins.exports["run_constrhs" + i]();
+ assertSame(get(resultmem, 0, len), r);
+
+ set(resultmem, 0, zero);
+ ins.exports["run_constboth" + i]();
+ assertSame(get(resultmem, 0, len), r);
+ }
+
+ let i = 0;
+ for (let [a,b] of inputs) {
+ let r = xs.map((i) => rop(a[i], b[i]));
+ testIt(a,b,r);
+ testConstIt(i,r);
+ i++;
+ }
+ }
+}
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js b/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js
new file mode 100644
index 0000000000..ee770d16a9
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js
@@ -0,0 +1,697 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include this in the preamble, it must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+
+// Widening multiplication.
+// This is to be moved into ad-hack.js
+//
+// (iMxN.extmul_{high,low}_iKxL_{s,u} A B)
+//
+// is equivalent to
+//
+// (iMxN.mul (iMxN.extend_{high,low}_iKxL_{s,u} A)
+// (iMxN.extend_{high,low}_iKxL_{s,u} B))
+//
+// It doesn't really matter what the inputs are, we can test this almost
+// blindly.
+//
+// Unfortunately, we do not yet have i64x2.extend_* so we introduce a helper
+// function to compute that.
+
+function makeExtMulTest(wide, narrow, part, signed) {
+ let widener = (wide == 'i64x2') ?
+ `call $${wide}_extend_${part}_${narrow}_${signed}` :
+ `${wide}.extend_${part}_${narrow}_${signed}`;
+ return `
+ (func (export "${wide}_extmul_${part}_${narrow}_${signed}")
+ (v128.store (i32.const 0)
+ (${wide}.extmul_${part}_${narrow}_${signed} (v128.load (i32.const 16))
+ (v128.load (i32.const 32))))
+ (v128.store (i32.const 48)
+ (${wide}.mul (${widener} (v128.load (i32.const 16)))
+ (${widener} (v128.load (i32.const 32))))))
+`;
+}
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func $i64x2_extend_low_i32x4_s (param v128) (result v128)
+ (i64x2.shr_s (i8x16.shuffle 16 16 16 16 0 1 2 3 16 16 16 16 4 5 6 7
+ (local.get 0)
+ (v128.const i32x4 0 0 0 0))
+ (i32.const 32)))
+ (func $i64x2_extend_high_i32x4_s (param v128) (result v128)
+ (i64x2.shr_s (i8x16.shuffle 16 16 16 16 8 9 10 11 16 16 16 16 12 13 14 15
+ (local.get 0)
+ (v128.const i32x4 0 0 0 0))
+ (i32.const 32)))
+ (func $i64x2_extend_low_i32x4_u (param v128) (result v128)
+ (i8x16.shuffle 0 1 2 3 16 16 16 16 4 5 6 7 16 16 16 16
+ (local.get 0)
+ (v128.const i32x4 0 0 0 0)))
+ (func $i64x2_extend_high_i32x4_u (param v128) (result v128)
+ (i8x16.shuffle 8 9 10 11 16 16 16 16 12 13 14 15 16 16 16 16
+ (local.get 0)
+ (v128.const i32x4 0 0 0 0)))
+ ${makeExtMulTest('i64x2','i32x4','low','s')}
+ ${makeExtMulTest('i64x2','i32x4','high','s')}
+ ${makeExtMulTest('i64x2','i32x4','low','u')}
+ ${makeExtMulTest('i64x2','i32x4','high','u')}
+ ${makeExtMulTest('i32x4','i16x8','low','s')}
+ ${makeExtMulTest('i32x4','i16x8','high','s')}
+ ${makeExtMulTest('i32x4','i16x8','low','u')}
+ ${makeExtMulTest('i32x4','i16x8','high','u')}
+ ${makeExtMulTest('i16x8','i8x16','low','s')}
+ ${makeExtMulTest('i16x8','i8x16','high','s')}
+ ${makeExtMulTest('i16x8','i8x16','low','u')}
+ ${makeExtMulTest('i16x8','i8x16','high','u')})`);
+
+for ( let [ WideArray, NarrowArray ] of
+ [ [ Int16Array, Int8Array ],
+ [ Int32Array, Int16Array ],
+ [ BigInt64Array, Int32Array ] ] ) {
+ let narrowMem = new NarrowArray(ins.exports.mem.buffer);
+ let narrowSrc0 = 16/NarrowArray.BYTES_PER_ELEMENT;
+ let narrowSrc1 = 32/NarrowArray.BYTES_PER_ELEMENT;
+ let wideMem = new WideArray(ins.exports.mem.buffer);
+ let wideElems = 16/WideArray.BYTES_PER_ELEMENT;
+ let wideRes0 = 0;
+ let wideRes1 = 48/WideArray.BYTES_PER_ELEMENT;
+ let zero = iota(wideElems).map(_ => 0);
+ for ( let part of [ 'low', 'high' ] ) {
+ for ( let signed of [ 's', 'u' ] ) {
+ for ( let [a, b] of cross(NarrowArray.inputs) ) {
+ set(wideMem, wideRes0, zero);
+ set(wideMem, wideRes1, zero);
+ set(narrowMem, narrowSrc0, a);
+ set(narrowMem, narrowSrc1, b);
+ let test = `${WideArray.layoutName}_extmul_${part}_${NarrowArray.layoutName}_${signed}`;
+ ins.exports[test]();
+ assertSame(get(wideMem, wideRes0, wideElems),
+ get(wideMem, wideRes1, wideElems));
+ }
+ }
+ }
+}
+
+// Bitmask. Ion constant folds, so test that too.
+// This is to be merged into the existing bitmask tests in ad-hack.js.
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "bitmask_i64x2") (result i32)
+ (i64x2.bitmask (v128.load (i32.const 16))))
+ (func (export "const_bitmask_i64x2") (result i32)
+ (i64x2.bitmask (v128.const i64x2 0xff337f8012345678 0x0001984212345678))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var mem64 = new BigUint64Array(ins.exports.mem.buffer);
+
+set(mem8, 16, iota(16).map((_) => 0));
+assertEq(ins.exports.bitmask_i64x2(), 0);
+
+set(mem64, 2, [0x8000000000000000n, 0x8000000000000000n]);
+assertEq(ins.exports.bitmask_i64x2(), 3);
+
+set(mem64, 2, [0x7FFFFFFFFFFFFFFFn, 0x7FFFFFFFFFFFFFFFn]);
+assertEq(ins.exports.bitmask_i64x2(), 0);
+
+set(mem64, 2, [0n, 0x8000000000000000n]);
+assertEq(ins.exports.bitmask_i64x2(), 2);
+
+set(mem64, 2, [0x8000000000000000n, 0n]);
+assertEq(ins.exports.bitmask_i64x2(), 1);
+
+assertEq(ins.exports.const_bitmask_i64x2(), 1);
+
+// Widen low/high.
+// This is to be merged into the existing widening tests in ad-hack.js.
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "extend_low_i32x4_s")
+ (v128.store (i32.const 0) (i64x2.extend_low_i32x4_s (v128.load (i32.const 16)))))
+ (func (export "extend_high_i32x4_s")
+ (v128.store (i32.const 0) (i64x2.extend_high_i32x4_s (v128.load (i32.const 16)))))
+ (func (export "extend_low_i32x4_u")
+ (v128.store (i32.const 0) (i64x2.extend_low_i32x4_u (v128.load (i32.const 16)))))
+ (func (export "extend_high_i32x4_u")
+ (v128.store (i32.const 0) (i64x2.extend_high_i32x4_u (v128.load (i32.const 16))))))`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+var mem64u = new BigUint64Array(ins.exports.mem.buffer);
+
+var as = [205, 1, 192, 3].map((x) => x << 24);
+set(mem32, 4, as);
+
+ins.exports.extend_low_i32x4_s();
+assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n])))
+
+ins.exports.extend_high_i32x4_s();
+assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n+2])));
+
+ins.exports.extend_low_i32x4_u();
+assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0)));
+
+ins.exports.extend_high_i32x4_u();
+assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));
+
+// Saturating rounding q-format multiplication.
+// This is to be moved into ad-hack.js
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "q15mulr_sat_s")
+ (v128.store (i32.const 0) (i16x8.q15mulr_sat_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+for ( let [as, bs] of cross(Int16Array.inputs) ) {
+ set(mem16, 8, as);
+ set(mem16, 16, bs);
+ ins.exports.q15mulr_sat_s();
+ assertSame(get(mem16, 0, 8),
+ iota(8).map((i) => signed_saturate((as[i] * bs[i] + 0x4000) >> 15, 16)));
+}
+
+
+// i64.all_true
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "i64_all_true") (result i32)
+ (i64x2.all_true (v128.load (i32.const 16)) ) ) )`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+
+set(mem32, 4, [0, 0, 0, 0]);
+assertEq(0, ins.exports.i64_all_true());
+set(mem32, 4, [1, 0, 0, 0]);
+assertEq(0, ins.exports.i64_all_true());
+set(mem32, 4, [1, 0, 0, 1]);
+assertEq(1, ins.exports.i64_all_true());
+set(mem32, 4, [0, 0, 10, 0]);
+assertEq(0, ins.exports.i64_all_true());
+set(mem32, 4, [0, -250, 1, 0]);
+assertEq(1, ins.exports.i64_all_true());
+set(mem32, 4, [-1, -1, -1, -1]);
+assertEq(1, ins.exports.i64_all_true());
+
+if (this.wasmSimdAnalysis && wasmCompileMode() == "ion") {
+ const positive =
+ wasmCompile(
+ `(module
+ (memory (export "mem") 1 1)
+ (func $f (param v128) (result i32)
+ (if (result i32) (i64x2.all_true (local.get 0))
+ (i32.const 42)
+ (i32.const 37)))
+ (func (export "run") (result i32)
+ (call $f (v128.load (i32.const 16)))))`);
+ assertEq(wasmSimdAnalysis(), "simd128-to-scalar-and-branch -> folded");
+
+ const negative =
+ wasmCompile(
+ `(module
+ (memory (export "mem") 1 1)
+ (func $f (param v128) (result i32)
+ (if (result i32) (i32.eqz (i64x2.all_true (local.get 0)))
+ (i32.const 42)
+ (i32.const 37)))
+ (func (export "run") (result i32)
+ (call $f (v128.load (i32.const 16)))))`);
+ assertEq(wasmSimdAnalysis(), "simd128-to-scalar-and-branch -> folded");
+
+ for ( let inp of [[1n, 2n], [4n, 0n], [0n, 0n]]) {
+ const all_true = inp.every(v => v != 0n)
+ let mem = new BigInt64Array(positive.exports.mem.buffer);
+ set(mem, 2, inp);
+ assertEq(positive.exports.run(), all_true ? 42 : 37);
+
+ mem = new BigInt64Array(negative.exports.mem.buffer);
+ set(mem, 2, inp);
+ assertEq(negative.exports.run(), all_true ? 37 : 42);
+ }
+
+ wasmCompile(`(module (func (result i32) (i64x2.all_true (v128.const i64x2 0 0))))`);
+ assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded");
+}
+
+
+// i64x2.eq and i64x2.ne
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "i64_eq")
+ (v128.store (i32.const 0)
+ (i64x2.eq (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
+ (func (export "i64_ne")
+ (v128.store (i32.const 0)
+ (i64x2.ne (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) )`);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+set(mem64, 2, [0n, 1n, 0n, 1n]);
+ins.exports.i64_eq();
+assertSame(get(mem64, 0, 2), [-1n, -1n]);
+ins.exports.i64_ne();
+assertSame(get(mem64, 0, 2), [0n, 0n]);
+set(mem64, 2, [0x0n, -1n, 0x100000000n, -1n]);
+ins.exports.i64_eq();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+set(mem64, 2, [-1n, 0x0n, -1n, 0x100000000n]);
+ins.exports.i64_ne();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+
+
+// i64x2.lt, i64x2.gt, i64x2.le, and i64.ge
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "i64_lt_s")
+ (v128.store (i32.const 0)
+ (i64x2.lt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
+ (func (export "i64_gt_s")
+ (v128.store (i32.const 0)
+ (i64x2.gt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
+ (func (export "i64_le_s")
+ (v128.store (i32.const 0)
+ (i64x2.le_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) ))
+ (func (export "i64_ge_s")
+ (v128.store (i32.const 0)
+ (i64x2.ge_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) )`);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+set(mem64, 2, [0n, 1n, 1n, 0n]);
+ins.exports.i64_lt_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_gt_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+ins.exports.i64_le_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_ge_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+
+set(mem64, 2, [0n, -1n, -1n, 0n]);
+ins.exports.i64_lt_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+ins.exports.i64_gt_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_le_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+ins.exports.i64_ge_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+
+set(mem64, 2, [-2n, 2n, -1n, 1n]);
+ins.exports.i64_lt_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_gt_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+ins.exports.i64_le_s();
+assertSame(get(mem64, 0, 2), [-1n, 0n]);
+ins.exports.i64_ge_s();
+assertSame(get(mem64, 0, 2), [0n, -1n]);
+
+set(mem64, 2, [-2n, 1n, -2n, 1n]);
+ins.exports.i64_lt_s();
+assertSame(get(mem64, 0, 2), [0n, 0n]);
+ins.exports.i64_gt_s();
+assertSame(get(mem64, 0, 2), [0n, 0n]);
+ins.exports.i64_le_s();
+assertSame(get(mem64, 0, 2), [-1n, -1n]);
+ins.exports.i64_ge_s();
+assertSame(get(mem64, 0, 2), [-1n, -1n]);
+
+
+function wasmCompile(text) {
+ return new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(text)))
+}
+
+
+// i64x2.abs
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "i64_abs")
+ (v128.store (i32.const 0)
+ (i64x2.abs (v128.load (i32.const 16))) )) )`);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+set(mem64, 2, [-3n, 42n]);
+ins.exports.i64_abs();
+assertSame(get(mem64, 0, 2), [3n, 42n]);
+set(mem64, 2, [0n, -0x8000000000000000n]);
+ins.exports.i64_abs();
+assertSame(get(mem64, 0, 2), [0n, -0x8000000000000000n]);
+
+
+// Load lane
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ ${iota(16).map(i => `(func (export "load8_lane_${i}") (param i32)
+ (v128.store (i32.const 0)
+ (v128.load8_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16)))))
+ `).join('')}
+ ${iota(8).map(i => `(func (export "load16_lane_${i}") (param i32)
+ (v128.store (i32.const 0)
+ (v128.load16_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16)))))
+ `).join('')}
+ ${iota(4).map(i => `(func (export "load32_lane_${i}") (param i32)
+ (v128.store (i32.const 0)
+ (v128.load32_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16)))))
+ `).join('')}
+ ${iota(2).map(i => `(func (export "load64_lane_${i}") (param i32)
+ (v128.store (i32.const 0)
+ (v128.load64_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16)))))
+ `).join('')}
+ (func (export "load_lane_const_and_align")
+ (v128.store (i32.const 0)
+ (v128.load64_lane offset=32 1 (i32.const 1)
+ (v128.load32_lane offset=32 1 (i32.const 3)
+ (v128.load16_lane offset=32 0 (i32.const 5)
+ (v128.load (i32.const 16)))))
+ ))
+ )`);
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB];
+set(mem32, 4, as); set(mem8, 32, [0xC2]);
+
+ins.exports["load8_lane_0"](32);
+assertSame(get(mem32, 0, 4), [0x123456C2, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_1"](32);
+assertSame(get(mem32, 0, 4), [0x1234C278, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_2"](32);
+assertSame(get(mem32, 0, 4), [0x12C25678, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_3"](32);
+assertSame(get(mem32, 0, 4), [0xC2345678|0, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_4"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x234567C2, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_6"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23C26789, 0x3456789A, 0x456789AB]);
+ins.exports["load8_lane_9"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456C29A, 0x456789AB]);
+ins.exports["load8_lane_14"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0x45C289AB]);
+
+set(mem8, 32, [0xC2, 0xD1]);
+
+ins.exports["load16_lane_0"](32);
+assertSame(get(mem32, 0, 4), [0x1234D1C2, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load16_lane_1"](32);
+assertSame(get(mem32, 0, 4), [0xD1C25678|0, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load16_lane_2"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x2345D1C2, 0x3456789A, 0x456789AB]);
+ins.exports["load16_lane_5"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0xD1C2789A|0, 0x456789AB]);
+ins.exports["load16_lane_7"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0xD1C289AB|0]);
+
+set(mem32, 8, [0x16B5C3D0]);
+
+ins.exports["load32_lane_0"](32);
+assertSame(get(mem32, 0, 4), [0x16B5C3D0, 0x23456789, 0x3456789A, 0x456789AB]);
+ins.exports["load32_lane_1"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x16B5C3D0, 0x3456789A, 0x456789AB]);
+ins.exports["load32_lane_2"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x16B5C3D0, 0x456789AB]);
+ins.exports["load32_lane_3"](32);
+assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0x16B5C3D0]);
+
+set(mem64, 4, [0x3300AA4416B5C3D0n]);
+
+ins.exports["load64_lane_0"](32);
+assertSame(get(mem64, 0, 2), [0x3300AA4416B5C3D0n, 0x456789AB3456789An]);
+ins.exports["load64_lane_1"](32);
+assertSame(get(mem64, 0, 2), [0x2345678912345678n, 0x3300AA4416B5C3D0n]);
+
+// .. (mis)align load lane
+
+var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB];
+set(mem32, 4, as); set(mem64, 4, [0x3300AA4416B5C3D0n, 0x300AA4416B5C3D03n]);
+
+ins.exports["load16_lane_5"](33);
+assertSame(get(mem32, 0, 4), [0x12345678,0x23456789,0xb5c3789a|0,0x456789ab]);
+ins.exports["load32_lane_1"](34);
+assertSame(get(mem32, 0, 4), [0x12345678, 0xaa4416b5|0,0x3456789a,0x456789ab]);
+ins.exports["load64_lane_0"](35);
+assertSame(get(mem64, 0, 2), [0x5c3d033300aa4416n, 0x456789ab3456789an]);
+
+ins.exports["load_lane_const_and_align"]();
+assertSame(get(mem32, 0, 4), [0x123400aa,0x00AA4416,0x4416b5c3,0x033300aa]);
+
+// Store lane
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ ${iota(16).map(i => `(func (export "store8_lane_${i}") (param i32) (param i32)
+ (v128.store8_lane ${i} (local.get 1) (v128.load (local.get 0))))
+ `).join('')}
+ ${iota(8).map(i => `(func (export "store16_lane_${i}") (param i32) (param i32)
+ (v128.store16_lane ${i} (local.get 1) (v128.load (local.get 0))))
+ `).join('')}
+ ${iota(4).map(i => `(func (export "store32_lane_${i}") (param i32) (param i32)
+ (v128.store32_lane ${i} (local.get 1) (v128.load (local.get 0))))
+ `).join('')}
+ ${iota(2).map(i => `(func (export "store64_lane_${i}") (param i32) (param i32)
+ (v128.store64_lane ${i} (local.get 1) (v128.load (local.get 0))))
+ `).join('')}
+ (func (export "store_lane_const_and_align")
+ (v128.store16_lane 1 (i32.const 33) (v128.load (i32.const 16)))
+ (v128.store32_lane 2 (i32.const 37) (v128.load (i32.const 16)))
+ (v128.store64_lane 0 (i32.const 47) (v128.load (i32.const 16)))
+ ))`);
+
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+
+var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB];
+set(mem32, 4, as); set(mem32, 0, [0x7799AA00, 42, 3, 0]);
+
+ins.exports["store8_lane_0"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA78]);
+ins.exports["store8_lane_1"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA56]);
+ins.exports["store8_lane_2"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA34]);
+ins.exports["store8_lane_3"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA12]);
+ins.exports["store8_lane_5"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA67]);
+ins.exports["store8_lane_7"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA23]);
+ins.exports["store8_lane_8"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA9A]);
+ins.exports["store8_lane_15"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA45]);
+
+ins.exports["store16_lane_0"](16, 0); assertSame(get(mem32, 0, 1), [0x77995678]);
+ins.exports["store16_lane_1"](16, 0); assertSame(get(mem32, 0, 1), [0x77991234]);
+ins.exports["store16_lane_2"](16, 0); assertSame(get(mem32, 0, 1), [0x77996789]);
+ins.exports["store16_lane_5"](16, 0); assertSame(get(mem32, 0, 1), [0x77993456]);
+ins.exports["store16_lane_7"](16, 0); assertSame(get(mem32, 0, 1), [0x77994567]);
+
+ins.exports["store32_lane_0"](16, 0); assertSame(get(mem32, 0, 2), [0x12345678, 42]);
+ins.exports["store32_lane_1"](16, 0); assertSame(get(mem32, 0, 2), [0x23456789, 42]);
+ins.exports["store32_lane_2"](16, 0); assertSame(get(mem32, 0, 2), [0x3456789A, 42]);
+ins.exports["store32_lane_3"](16, 0); assertSame(get(mem32, 0, 2), [0x456789AB, 42]);
+
+ins.exports["store64_lane_0"](16, 0); assertSame(get(mem64, 0, 2), [0x2345678912345678n, 3]);
+ins.exports["store64_lane_1"](16, 0); assertSame(get(mem64, 0, 2), [0x456789AB3456789An, 3]);
+
+// .. (mis)align store lane
+
+var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB];
+set(mem32, 4, as); set(mem32, 0, [0x7799AA01, 42, 3, 0]);
+ins.exports["store16_lane_1"](16, 1); assertSame(get(mem32, 0, 2), [0x77123401, 42]);
+set(mem32, 0, [0x7799AA01, 42, 3, 0]);
+ins.exports["store32_lane_1"](16, 2); assertSame(get(mem32, 0, 2), [0x6789AA01, 0x2345]);
+set(mem32, 0, [0x7799AA01, 42, 5, 3]);
+ins.exports["store64_lane_0"](16, 1);
+assertSame(get(mem64, 0, 2), [0x4567891234567801n, 0x0300000023]);
+
+set(mem32, 4, [
+ 0x12345678, 0x23456789, 0x3456789A, 0x456789AB,
+ 0x55AA55AA, 0xCC44CC44, 0x55AA55AA, 0xCC44CC44,
+ 0x55AA55AA, 0xCC44CC44, 0x55AA55AA, 0xCC44CC44,
+]);
+ins.exports["store_lane_const_and_align"]();
+assertSame(get(mem32, 8, 8), [
+ 0x551234aa, 0x56789a44, 0x55aa5534, 0x7844cc44,
+ 0x89123456|0, 0xcc234567|0, 0x55aa55aa, 0xcc44cc44|0,
+]);
+
+
+// i8x16.popcnt
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "i8x16_popcnt")
+ (v128.store (i32.const 0) (i8x16.popcnt (v128.load (i32.const 16)) )))
+ )`);
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+
+set(mem8, 16, [0, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 3, -1, 0xF0, 0x11, 0xFE, 0x0F, 0xE]);
+ins.exports.i8x16_popcnt();
+assertSame(get(mem8, 0, 16), [0,1,1,1,1,1,1,1,1,2,8,4,2,7,4,3]);
+
+
+/// Double-precision conversion instructions.
+/// f64x2.convert_low_i32x4_{u,s} / i32x4.trunc_sat_f64x2_{u,s}_zero
+/// f32x4.demote_f64x2_zero / f64x2.promote_low_f32x4
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "f64x2_convert_low_i32x4_s")
+ (v128.store (i32.const 0) (f64x2.convert_low_i32x4_s (v128.load (i32.const 16)) )))
+ (func (export "f64x2_convert_low_i32x4_u")
+ (v128.store (i32.const 0) (f64x2.convert_low_i32x4_u (v128.load (i32.const 16)) )))
+
+ (func (export "i32x4_trunc_sat_f64x2_s_zero")
+ (v128.store (i32.const 0) (i32x4.trunc_sat_f64x2_s_zero (v128.load (i32.const 16)) )))
+ (func (export "i32x4_trunc_sat_f64x2_u_zero")
+ (v128.store (i32.const 0) (i32x4.trunc_sat_f64x2_u_zero (v128.load (i32.const 16)) )))
+
+ (func (export "f32x4_demote_f64x2")
+ (v128.store (i32.const 0) (f32x4.demote_f64x2_zero (v128.load (i32.const 16)) )))
+ (func (export "f64x2_protomote_f32x4")
+ (v128.store (i32.const 0) (f64x2.promote_low_f32x4 (v128.load (i32.const 16)) )))
+ )`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var memU32 = new Uint32Array(ins.exports.mem.buffer);
+var memF32 = new Float32Array(ins.exports.mem.buffer);
+var memF64 = new Float64Array(ins.exports.mem.buffer);
+
+// f64x2.convert_low_i32x4_u / f64x2.convert_low_i32x4_s
+
+set(mem32, 4, [1, -2, 0, -2]);
+ins.exports.f64x2_convert_low_i32x4_s();
+assertSame(get(memF64, 0, 2), [1, -2]);
+set(mem32, 4, [-1, 0, 5, -212312312]);
+ins.exports.f64x2_convert_low_i32x4_s();
+assertSame(get(memF64, 0, 2), [-1, 0]);
+
+set(memU32, 4, [1, 4045646797, 4, 0]);
+ins.exports.f64x2_convert_low_i32x4_u();
+assertSame(get(memF64, 0, 2), [1, 4045646797]);
+set(memU32, 4, [0, 2, 4, 3]);
+ins.exports.f64x2_convert_low_i32x4_u();
+assertSame(get(memF64, 0, 2), [0, 2]);
+
+// i32x4.trunc_sat_f64x2_u_zero / i32x4.trunc_sat_f64x2_s_zero
+
+set(memF64, 2, [0,0])
+ins.exports.i32x4_trunc_sat_f64x2_s_zero();
+assertSame(get(mem32, 0, 4), [0,0,0,0]);
+ins.exports.i32x4_trunc_sat_f64x2_u_zero();
+assertSame(get(memU32, 0, 4), [0,0,0,0]);
+
+set(memF64, 2, [-1.23,65535.12])
+ins.exports.i32x4_trunc_sat_f64x2_s_zero();
+assertSame(get(mem32, 0, 4), [-1,65535,0,0]);
+set(memF64, 2, [1.99,65535.12])
+ins.exports.i32x4_trunc_sat_f64x2_u_zero();
+assertSame(get(memU32, 0, 4), [1,65535,0,0]);
+
+set(memF64, 2, [10e+100,-10e+100])
+ins.exports.i32x4_trunc_sat_f64x2_s_zero();
+assertSame(get(mem32, 0, 4), [0x7fffffff,-0x80000000,0,0]);
+ins.exports.i32x4_trunc_sat_f64x2_u_zero();
+assertSame(get(memU32, 0, 4), [0xffffffff,0,0,0]);
+
+// f32x4.demote_f64x2_zero
+
+set(memF64, 2, [1, 2])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [1,2,0,0]);
+
+set(memF64, 2, [-4e38, 4e38])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [-Infinity,Infinity,0,0]);
+
+set(memF64, 2, [-1e-46, 1e-46])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [1/-Infinity,0,0,0]);
+
+set(memF64, 2, [0, NaN])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [0, NaN,0,0]);
+
+set(memF64, 2, [Infinity, -Infinity])
+ins.exports.f32x4_demote_f64x2();
+assertSame(get(memF32, 0, 4), [Infinity, -Infinity,0,0]);
+
+// f64x2.promote_low_f32x4
+
+set(memF32, 4, [4, 3, 1, 2])
+ins.exports.f64x2_protomote_f32x4();
+assertSame(get(memF64, 0, 2), [4, 3]);
+
+set(memF32, 4, [NaN, 0, 0, 0])
+ins.exports.f64x2_protomote_f32x4();
+assertSame(get(memF64, 0, 2), [NaN, 0]);
+
+set(memF32, 4, [Infinity, -Infinity, 0, 0])
+ins.exports.f64x2_protomote_f32x4();
+assertSame(get(memF64, 0, 2), [Infinity, -Infinity]);
+
+
+// i16x8.extadd_pairwise_i8x16_{s,u} / i32x4.extadd_pairwise_i16x8_{s,u}
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "i16x8_extadd_pairwise_i8x16_s")
+ (v128.store (i32.const 0) (i16x8.extadd_pairwise_i8x16_s (v128.load (i32.const 16)) )))
+ (func (export "i16x8_extadd_pairwise_i8x16_u")
+ (v128.store (i32.const 0) (i16x8.extadd_pairwise_i8x16_u (v128.load (i32.const 16)) )))
+
+ (func (export "i32x4_extadd_pairwise_i16x8_s")
+ (v128.store (i32.const 0) (i32x4.extadd_pairwise_i16x8_s (v128.load (i32.const 16)) )))
+ (func (export "i32x4_extadd_pairwise_i16x8_u")
+ (v128.store (i32.const 0) (i32x4.extadd_pairwise_i16x8_u (v128.load (i32.const 16)) )))
+ )`);
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var memU8 = new Uint8Array(ins.exports.mem.buffer);
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var memU16 = new Uint16Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var memU32 = new Uint32Array(ins.exports.mem.buffer);
+
+set(mem8, 16, [0, 0, 1, 1, 2, -2, 0, 42, 1, -101, 101, -1, 127, 125, -1, -2]);
+ins.exports.i16x8_extadd_pairwise_i8x16_s();
+assertSame(get(mem16, 0, 8), [0, 2, 0, 42, -100, 100, 252, -3]);
+
+set(memU8, 16, [0, 0, 1, 1, 2, 255, 0, 42, 0, 255, 254, 0, 127, 125, 255, 255]);
+ins.exports.i16x8_extadd_pairwise_i8x16_u();
+assertSame(get(memU16, 0, 8), [0, 2, 257, 42, 255, 254, 252, 510]);
+
+set(mem16, 8, [0, 0, 1, 1, 2, -2, -1, -2]);
+ins.exports.i32x4_extadd_pairwise_i16x8_s();
+assertSame(get(mem32, 0, 4), [0, 2, 0, -3]);
+set(mem16, 8, [0, 42, 1, -32760, 32766, -1, 32761, 32762]);
+ins.exports.i32x4_extadd_pairwise_i16x8_s();
+assertSame(get(mem32, 0, 4), [42, -32759, 32765, 65523]);
+
+set(memU16, 8, [0, 0, 1, 1, 2, 65535, 65535, 65535]);
+ins.exports.i32x4_extadd_pairwise_i16x8_u();
+assertSame(get(memU32, 0, 4), [0, 2, 65537, 131070]);
+set(memU16, 8, [0, 42, 0, 65535, 65534, 0, 32768, 32765]);
+ins.exports.i32x4_extadd_pairwise_i16x8_u();
+assertSame(get(memU32, 0, 4), [42, 65535, 65534, 65533]);
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js b/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js
new file mode 100644
index 0000000000..407b59476f
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js
@@ -0,0 +1,211 @@
+// |jit-test| skip-if: true
+
+// Common code for the ad-hack test cases.
+
+function get(arr, loc, len) {
+ let res = [];
+ for ( let i=0; i < len; i++ ) {
+ res.push(arr[loc+i]);
+ }
+ return res;
+}
+
+function getUnaligned(arr, width, loc, len) {
+ assertEq(arr.constructor, Uint8Array);
+ assertEq(width <= 4, true);
+ let res = [];
+ for ( let i=0; i < len; i++ ) {
+ let x = 0;
+ for ( let j=width-1; j >=0; j-- )
+ x = (x << 8) | arr[loc+i*width+j];
+ res.push(x);
+ }
+ return res;
+}
+
+function set(arr, loc, vals) {
+ for ( let i=0; i < vals.length; i++ ) {
+ if (arr instanceof BigInt64Array) {
+ arr[loc+i] = BigInt(vals[i]);
+ } else {
+ arr[loc+i] = vals[i];
+ }
+ }
+}
+
+function setUnaligned(arr, width, loc, vals) {
+ assertEq(arr.constructor, Uint8Array);
+ assertEq(width <= 4, true);
+ for ( let i=0; i < vals.length; i++ ) {
+ let x = vals[i];
+ for ( let j=0 ; j < width ; j++ ) {
+ arr[loc+i*width + j] = x & 255;
+ x >>= 8;
+ }
+ }
+}
+
+function equal(a, b) {
+ return a === b || isNaN(a) && isNaN(b);
+}
+
+function upd(xs, at, val) {
+ let ys = Array.from(xs);
+ ys[at] = val;
+ return ys;
+}
+
+// The following operations are not always generalized fully, they are just
+// functional enough for the existing test cases to pass.
+
+function sign_extend(n, bits) {
+ if (bits < 32) {
+ n = Number(n);
+ return (n << (32 - bits)) >> (32 - bits);
+ }
+ if (typeof n == "bigint") {
+ if (bits == 32)
+ return Number(n & 0xFFFF_FFFFn) | 0;
+ assertEq(bits, 64);
+ n = (n & 0xFFFF_FFFF_FFFF_FFFFn)
+ if (n > 0x7FFF_FFFF_FFFF_FFFFn)
+ return n - 0x1_0000_0000_0000_0000n;
+ return n;
+ }
+ assertEq(bits, 32);
+ return n|0;
+}
+
+function zero_extend(n, bits) {
+ if (bits < 32) {
+ return n & ((1 << bits) - 1);
+ }
+ if (n < 0)
+ n = 0x100000000 + n;
+ return n;
+}
+
+function signed_saturate(z, bits) {
+ let min = -(1 << (bits-1));
+ if (z <= min) {
+ return min;
+ }
+ let max = (1 << (bits-1)) - 1;
+ if (z > max) {
+ return max;
+ }
+ return z;
+}
+
+function unsigned_saturate(z, bits) {
+ if (z <= 0) {
+ return 0;
+ }
+ let max = (1 << bits) - 1;
+ if (z > max) {
+ return max;
+ }
+ return z;
+}
+
+function shl(count, width) {
+ if (width == 64) {
+ count = BigInt(count);
+ return (v) => {
+ v = BigInt(v);
+ if (v < 0)
+ v = (1n << 64n) + v;
+ let r = (v << count) & ((1n << 64n) - 1n);
+ if (r & (1n << 63n))
+ r = -((1n << 64n) - r);
+ return r;
+ }
+ } else {
+ return (v) => {
+ let mask = (width == 32) ? -1 : ((1 << width) - 1);
+ return (v << count) & mask;
+ }
+ }
+}
+
+function popcount(n) {
+ n = n - ((n >> 1) & 0x55555555)
+ n = (n & 0x33333333) + ((n >> 2) & 0x33333333)
+ return ((n + (n >> 4) & 0xF0F0F0F) * 0x1010101) >> 24
+}
+
+function jsValueToWasmName(x) {
+ if (typeof x == "number") {
+ if (x == 0) return 1 / x < 0 ? "-0" : "0";
+ if (isNaN(x)) return "+nan";
+ if (!isFinite(x)) return (x < 0 ? "-" : "+") + "inf";
+ }
+ return x;
+}
+
+// For each input array, a set of arrays of the proper length for v128, with
+// values in range but possibly of the wrong signedness (eg, for Int8Array, 128
+// is in range but is really -128). Also a unary operator `rectify` that
+// transforms the value to the proper sign and bitwidth.
+
+Int8Array.inputs = [iota(16).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)),
+ iota(16).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)),
+ [1,2,128,127,1,4,128,127,1,2,129,125,1,2,254,0],
+ [2,1,127,128,5,1,127,128,2,1,126,130,2,1,1,255],
+ iota(16).map((x) => ((x + 37) * 8 + 12) % 256),
+ iota(16).map((x) => ((x + 12) * 4 + 9) % 256)];
+Int8Array.rectify = (x) => sign_extend(x,8);
+Int8Array.layoutName = 'i8x16';
+
+Uint8Array.inputs = Int8Array.inputs;
+Uint8Array.rectify = (x) => zero_extend(x,8);
+Uint8Array.layoutName = 'i8x16';
+
+Int16Array.inputs = [iota(8).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)),
+ iota(8).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)),
+ [1,2,32768,32767,1,4,32768,32767],
+ [2,1,32767,32768,5,1,32767,32768],
+ [1,2,128,127,1,4,128,127].map((x) => (x << 8) + x*2),
+ [2,1,127,128,1,1,128,128].map((x) => (x << 8) + x*3)];
+Int16Array.rectify = (x) => sign_extend(x,16);
+Int16Array.layoutName = 'i16x8';
+
+Uint16Array.inputs = Int16Array.inputs;
+Uint16Array.rectify = (x) => zero_extend(x,16);
+Uint16Array.layoutName = 'i16x8';
+
+Int32Array.inputs = [iota(4).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)),
+ iota(4).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)),
+ [1,2,32768 << 16,32767 << 16],
+ [2,1,32767 << 16,32768 << 16],
+ [1,2,128,127].map((x) => (x << 24) + (x << 8) + x*3),
+ [2,1,127,128].map((x) => (x << 24) + (x << 8) + x*4)];
+Int32Array.rectify = (x) => sign_extend(x,32);
+Int32Array.layoutName = 'i32x4';
+
+Uint32Array.inputs = Int32Array.inputs;
+Uint32Array.rectify = (x) => zero_extend(x,32);
+Uint32Array.layoutName = 'i32x4';
+
+BigInt64Array.inputs = [[1,2],[2,1],[-1,-2],[-2,-1],[2n ** 32n, 2n ** 32n - 5n],
+ [(2n ** 38n) / 5n, (2n ** 41n) / 7n],
+ [-((2n ** 38n) / 5n), (2n ** 41n) / 7n]];
+BigInt64Array.rectify = (x) => BigInt(x);
+BigInt64Array.layoutName = 'i64x2';
+
+Float32Array.inputs = [[1, -1, 1e10, -1e10],
+ [-1, -2, -1e10, 1e10],
+ [5.1, -1.1, -4.3, -0],
+ ...permute([1, -10, NaN, Infinity])];
+Float32Array.rectify = (x) => Math.fround(x);
+Float32Array.layoutName = 'f32x4';
+
+Float64Array.inputs = Float32Array.inputs.map((x) => x.slice(0, 2))
+Float64Array.rectify = (x) => x;
+Float64Array.layoutName = 'f64x2';
+
+// Tidy up all the inputs
+for ( let A of [Int8Array, Uint8Array, Int16Array, Uint16Array, Int32Array, Uint32Array, BigInt64Array,
+ Float32Array, Float64Array]) {
+ A.inputs = A.inputs.map((xs) => xs.map(A.rectify));
+}
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js
new file mode 100644
index 0000000000..f3406ac44a
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js
@@ -0,0 +1,7 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include these in the preamble, they must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+load(scriptdir + "ad-hack-binop-preamble.js")
+
+runSimpleBinopTest(0, 3);
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js
new file mode 100644
index 0000000000..e6d6f7e2fc
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js
@@ -0,0 +1,7 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include these in the preamble, they must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+load(scriptdir + "ad-hack-binop-preamble.js")
+
+runSimpleBinopTest(1, 3);
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js
new file mode 100644
index 0000000000..a196aa28fc
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js
@@ -0,0 +1,7 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include these in the preamble, they must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+load(scriptdir + "ad-hack-binop-preamble.js")
+
+runSimpleBinopTest(2, 3);
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js
new file mode 100644
index 0000000000..6e562a97a1
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js
@@ -0,0 +1,122 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Do not include this in the preamble, it must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+
+// Simple unary operators. Place parameter in memory at offset 16,
+// read the result at offset 0.
+
+function expandConstantUnopInputs(op, memtype, inputs) {
+ let s = '';
+ let ident = 0;
+ for ( let a of inputs ) {
+ let constval = `${memtype.layoutName} ${a.map(jsValueToWasmName).join(' ')}`;
+ s += `
+ (func (export "run_const${ident}")
+ (v128.store (i32.const 0)
+ (${op} (v128.const ${constval}))))
+`;
+ ident++;
+ }
+ return s;
+}
+
+function insAndMemUnop(op, memtype, resultmemtype, inputs) {
+ var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+
+ (func (export "run")
+ (v128.store (i32.const 0)
+ (call $doit (v128.load (i32.const 16)))))
+
+ (func $doit (param $a v128) (result v128)
+ (${op} (local.get $a)))
+
+ ${expandConstantUnopInputs(op, memtype, inputs)})`);
+ var mem = new memtype(ins.exports.mem.buffer);
+ var resultmem = !resultmemtype || memtype == resultmemtype ? mem : new resultmemtype(ins.exports.mem.buffer);
+ return [ins, mem, resultmem];
+}
+
+function ineg(bits) { return (a) => sign_extend(!a ? a : -a,bits) }
+function iabs(bits) { return (a) => zero_extend(a < 0 ? -a : a, bits) }
+function fneg(a) { return -a }
+function fabs(a) { return Math.abs(a) }
+function fsqrt(a) { return Math.fround(Math.sqrt(Math.fround(a))) }
+function dsqrt(a) { return Math.sqrt(a) }
+function bitnot(a) { return (~a) & 255 }
+function ffloor(x) { return Math.fround(Math.floor(x)) }
+function fceil(x) { return Math.fround(Math.ceil(x)) }
+function ftrunc(x) { return Math.fround(Math.sign(x)*Math.floor(Math.abs(x))) }
+function fnearest(x) { return Math.fround(Math.round(x)) }
+function dfloor(x) { return Math.floor(x) }
+function dceil(x) { return Math.ceil(x) }
+function dtrunc(x) { return Math.sign(x)*Math.floor(Math.abs(x)) }
+function dnearest(x) { return Math.round(x) }
+
+for ( let [op, memtype, rop, resultmemtype] of
+ [['i8x16.neg', Int8Array, ineg(8)],
+ ['i16x8.neg', Int16Array, ineg(16)],
+ ['i32x4.neg', Int32Array, ineg(32)],
+ ['i64x2.neg', BigInt64Array, ineg(64)],
+ ['i8x16.abs', Int8Array, iabs(8), Uint8Array],
+ ['i16x8.abs', Int16Array, iabs(16), Uint16Array],
+ ['i32x4.abs', Int32Array, iabs(32), Uint32Array],
+ ['f32x4.neg', Float32Array, fneg],
+ ['f64x2.neg', Float64Array, fneg],
+ ['f32x4.abs', Float32Array, fabs],
+ ['f64x2.abs', Float64Array, fabs],
+ ['f32x4.sqrt', Float32Array, fsqrt],
+ ['f64x2.sqrt', Float64Array, dsqrt],
+ ['f32x4.ceil', Float32Array, fceil],
+ ['f32x4.floor', Float32Array, ffloor],
+ ['f32x4.trunc', Float32Array, ftrunc],
+ ['f32x4.nearest', Float32Array, fnearest],
+ ['f64x2.ceil', Float64Array, dceil],
+ ['f64x2.floor', Float64Array, dfloor],
+ ['f64x2.trunc', Float64Array, dtrunc],
+ ['f64x2.nearest', Float64Array, dnearest],
+ ['v128.not', Uint8Array, bitnot],
+ ])
+{
+ let [ins, mem, resultmem] = insAndMemUnop(op, memtype, resultmemtype, memtype.inputs);
+ let len = 16/memtype.BYTES_PER_ELEMENT;
+ let xs = iota(len);
+ let zero = xs.map(_ => 0);
+ let bitsForF32 = memtype == Float32Array ? new Uint32Array(mem.buffer) : null;
+ let bitsForF64 = memtype == Float64Array ? new BigInt64Array(mem.buffer) : null;
+
+ function testIt(a, r) {
+ set(mem, len, a);
+ ins.exports.run();
+ assertSame(get(resultmem, 0, len), r);
+
+ // Test signalling NaN superficially by replacing QNaN inputs with SNaN
+ if (bitsForF32 != null && a.some(isNaN)) {
+ a.forEach((x, i) => { if (isNaN(x)) { bitsForF32[len+i] = 0x7FA0_0000; } });
+ ins.exports.run();
+ assertSame(get(resultmem, 0, len), r);
+ }
+ if (bitsForF64 != null && a.some(isNaN)) {
+ a.forEach((x, i) => { if (isNaN(x)) { bitsForF64[len+i] = 0x7FF4_0000_0000_0000n; } });
+ ins.exports.run();
+ assertSame(get(resultmem, 0, len), r);
+ }
+ }
+
+ function testConstIt(i,r) {
+ set(resultmem, 0, zero);
+ ins.exports["run_const" + i]();
+ assertSame(get(resultmem, 0, len), r);
+ }
+
+ let i = 0;
+ for (let a of memtype.inputs) {
+ let r = xs.map((i) => rop(a[i]));
+ testIt(a, r);
+ testConstIt(i, r);
+ i++;
+ }
+}
+
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack.js b/js/src/jit-test/tests/wasm/simd/ad-hack.js
new file mode 100644
index 0000000000..b64b11cf52
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack.js
@@ -0,0 +1,1747 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Ad-hoc test cases used during development. Generally these are ordered from
+// easier toward harder.
+//
+// The test cases here are usually those that require some special processing.
+// Simple binary operators (v128 x v128 -> v128) and unary operators (v128 ->
+// v128) are tested in ad-hack-simple-binops*.js and ad-hack-simple-unops.js.
+
+// Do not include this in the preamble, it must be loaded after lib/wasm.js
+load(scriptdir + "ad-hack-preamble.js")
+
+// v128.store
+// oob store
+// v128.const
+
+for ( let offset of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) {
+ var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "f") (param $loc i32)
+ (v128.store offset=${offset} (local.get $loc) (v128.const i32x4 ${1+offset} 2 3 ${4+offset*2}))))`);
+ var mem8 = new Uint8Array(ins.exports.mem.buffer);
+ ins.exports.f(160);
+ assertSame(getUnaligned(mem8, 4, 160 + offset, 4), [1+offset, 2, 3, 4+offset*2]);
+
+ // OOB write should trap
+ assertErrorMessage(() => ins.exports.f(65536-15),
+ WebAssembly.RuntimeError,
+ /index out of bounds/)
+
+ // Ensure that OOB writes don't write anything: moved to simd-partial-oob-store.js
+}
+
+// v128.load
+// oob load
+// v128.store
+// temp register
+
+for ( let offset of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) {
+ var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "copy") (param $dest i32) (param $src i32)
+ (v128.store (local.get $dest) (v128.load offset=${offset} (local.get $src)))))`);
+ var mem32 = new Uint32Array(ins.exports.mem.buffer);
+ var mem8 = new Uint8Array(ins.exports.mem.buffer);
+ setUnaligned(mem8, 4, 4*4 + offset, [8+offset, 10, 12, 14+offset*2]);
+ ins.exports.copy(40*4, 4*4);
+ assertSame(get(mem32, 40, 4), [8+offset, 10, 12, 14+offset*2]);
+ assertErrorMessage(() => ins.exports.copy(40*4, 65536-15),
+ WebAssembly.RuntimeError,
+ /index out of bounds/);
+}
+
+// call [with register params]
+// parameters [in registers]
+// return [with register values]
+// locals
+//
+// local.get
+// local.set
+// v128.const
+// v128.store
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func $g (param $param v128) (result v128)
+ (local $tmp v128)
+ (local.set $tmp (local.get $param))
+ (local.get $tmp))
+ (func (export "f")
+ (v128.store (i32.const 160) (call $g (v128.const i32x4 1 2 3 4)))))`);
+var mem = new Uint32Array(ins.exports.mem.buffer);
+ins.exports.f();
+assertSame(get(mem, 40, 4), [1, 2, 3, 4]);
+
+// Same test but with local.tee
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func $g (param $param v128) (result v128)
+ (local $tmp v128)
+ (local.tee $tmp (local.get $param)))
+ (func (export "f")
+ (v128.store (i32.const 160) (call $g (v128.const i32x4 1 2 3 4)))))`);
+var mem = new Uint32Array(ins.exports.mem.buffer);
+ins.exports.f();
+assertSame(get(mem, 40, 4), [1, 2, 3, 4]);
+
+// Locals that end up on the stack. Try to create unaligned placement (in the
+// baseline compiler anyway) by inserting i32 locals before or after and
+// inbetween the v128 ones and by having so many locals that we run out of
+// registers.
+
+var nlocals = 64;
+for ( let start of [0, 1]) {
+ let decl = "";
+ let set = "";
+ let sum = "(v128.const i32x4 0 0 0 0)";
+ var res = [0,0,0,0];
+ var locno = start;
+ for ( let i=start ; i < start + nlocals ; i++ ) {
+ decl += "(local v128) ";
+ set += `(local.set ${locno} (v128.const i32x4 ${i} ${i+1} ${i+2} ${i+3})) `;
+ sum = `(i32x4.add ${sum} (local.get ${locno}))`;
+ locno++;
+ res[0] += i;
+ res[1] += i+1;
+ res[2] += i+2;
+ res[3] += i+3;
+ if ((i % 5) == 3) {
+ decl += "(local i32) ";
+ locno++;
+ }
+ }
+ if (start)
+ decl = "(local i32) " + decl;
+ else
+ decl += "(local i32) ";
+ var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func $g (result v128)
+ ${decl}
+ ${set}
+ ${sum})
+ (func (export "f")
+ (v128.store (i32.const 160) (call $g))))`);
+
+ var mem = new Uint32Array(ins.exports.mem.buffer);
+ ins.exports.f();
+ assertSame(get(mem, 40, 4), res);
+}
+
+// Ditto parameters. This is like the case above but values are passed rather
+// than set.
+//
+// call
+// call_indirect
+
+var nlocals = 64;
+for ( let start of [0, 1]) {
+ let decl = "";
+ let pass = "";
+ let sum = "(v128.const i32x4 0 0 0 0)";
+ var res = [0,0,0,0];
+ var locno = start;
+ for ( let i=start ; i < start + nlocals ; i++ ) {
+ decl += "(param v128) ";
+ pass += `(v128.const i32x4 ${i} ${i+1} ${i+2} ${i+3}) `;
+ sum = `(i32x4.add ${sum} (local.get ${locno}))`;
+ locno++;
+ res[0] += i;
+ res[1] += i+1;
+ res[2] += i+2;
+ res[3] += i+3;
+ if ((i % 5) == 3) {
+ decl += "(param i32) ";
+ pass += "(i32.const 0) ";
+ locno++;
+ }
+ }
+ if (start) {
+ decl = "(param i32) " + decl;
+ pass = "(i32.const 0) " + pass;
+ } else {
+ decl += "(param i32) ";
+ pass += "(i32.const 0) ";
+ }
+ var txt = `
+ (module
+ (memory (export "mem") 1 1)
+ (type $t1 (func ${decl} (result v128)))
+ (table funcref (elem $h))
+ (func $g ${decl} (result v128)
+ ${sum})
+ (func (export "f1")
+ (v128.store (i32.const 160) (call $g ${pass})))
+ (func $h ${decl} (result v128)
+ ${sum})
+ (func (export "f2")
+ (v128.store (i32.const 512) (call_indirect (type $t1) ${pass} (i32.const 0)))))`;
+ var ins = wasmEvalText(txt);
+
+ var mem = new Uint32Array(ins.exports.mem.buffer);
+ ins.exports.f1();
+ assertSame(get(mem, 40, 4), res);
+ ins.exports.f2();
+ assertSame(get(mem, 128, 4), res);
+}
+
+// Widening integer dot product
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0)
+ (i32x4.dot_i16x8_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var xs = [5, 1, -4, 2, 20, -15, 12, 3];
+var ys = [6, 0, -7, 3, 8, -1, -3, 7];
+var ans = [xs[0]*ys[0] + xs[1]*ys[1],
+ xs[2]*ys[2] + xs[3]*ys[3],
+ xs[4]*ys[4] + xs[5]*ys[5],
+ xs[6]*ys[6] + xs[7]*ys[7]];
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+set(mem16, 8, xs);
+set(mem16, 16, ys);
+ins.exports.run();
+var result = get(mem32, 0, 4);
+assertSame(result, ans);
+
+// Splat, with and without constants (different code paths in ion)
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "splat_i8x16") (param $src i32)
+ (v128.store (i32.const 0) (i8x16.splat (local.get $src))))
+ (func (export "csplat_i8x16")
+ (v128.store (i32.const 0) (i8x16.splat (i32.const 37))))
+ (func (export "splat_i16x8") (param $src i32)
+ (v128.store (i32.const 0) (i16x8.splat (local.get $src))))
+ (func (export "csplat_i16x8")
+ (v128.store (i32.const 0) (i16x8.splat (i32.const 1175))))
+ (func (export "splat_i32x4") (param $src i32)
+ (v128.store (i32.const 0) (i32x4.splat (local.get $src))))
+ (func (export "csplat_i32x4")
+ (v128.store (i32.const 0) (i32x4.splat (i32.const 127639))))
+ (func (export "splat_i64x2") (param $src i64)
+ (v128.store (i32.const 0) (i64x2.splat (local.get $src))))
+ (func (export "csplat_i64x2")
+ (v128.store (i32.const 0) (i64x2.splat (i64.const 0x1234_5678_4365))))
+ (func (export "splat_f32x4") (param $src f32)
+ (v128.store (i32.const 0) (f32x4.splat (local.get $src))))
+ (func (export "csplat_f32x4")
+ (v128.store (i32.const 0) (f32x4.splat (f32.const 9121.25))))
+ (func (export "splat_f64x2") (param $src f64)
+ (v128.store (i32.const 0) (f64x2.splat (local.get $src))))
+ (func (export "csplat_f64x2")
+ (v128.store (i32.const 0) (f64x2.splat (f64.const 26789.125))))
+)`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+ins.exports.splat_i8x16(3);
+assertSame(get(mem8, 0, 16), iota(16).map(_=>3));
+ins.exports.csplat_i8x16();
+assertSame(get(mem8, 0, 16), iota(16).map(_=>37));
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+ins.exports.splat_i16x8(976);
+assertSame(get(mem16, 0, 8), iota(8).map(_=>976));
+ins.exports.csplat_i16x8();
+assertSame(get(mem16, 0, 8), iota(8).map(_=>1175));
+
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+ins.exports.splat_i32x4(147812);
+assertSame(get(mem32, 0, 4), [147812, 147812, 147812, 147812]);
+ins.exports.csplat_i32x4();
+assertSame(get(mem32, 0, 4), [127639, 127639, 127639, 127639]);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+ins.exports.splat_i64x2(147812n);
+assertSame(get(mem64, 0, 2), [147812, 147812]);
+ins.exports.csplat_i64x2();
+assertSame(get(mem64, 0, 2), [0x1234_5678_4365n, 0x1234_5678_4365n]);
+
+var memf32 = new Float32Array(ins.exports.mem.buffer);
+ins.exports.splat_f32x4(147812.5);
+assertSame(get(memf32, 0, 4), [147812.5, 147812.5, 147812.5, 147812.5]);
+ins.exports.csplat_f32x4();
+assertSame(get(memf32, 0, 4), [9121.25, 9121.25, 9121.25, 9121.25]);
+
+var memf64 = new Float64Array(ins.exports.mem.buffer);
+ins.exports.splat_f64x2(147812.5);
+assertSame(get(memf64, 0, 2), [147812.5, 147812.5]);
+ins.exports.csplat_f64x2();
+assertSame(get(memf64, 0, 2), [26789.125, 26789.125]);
+
+// AnyTrue. Ion constant folds, so test that too.
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "anytrue_i8x16") (result i32)
+ (v128.any_true (v128.load (i32.const 16))))
+ (func (export "true_anytrue_i8x16") (result i32)
+ (v128.any_true (v128.const i8x16 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0)))
+ (func (export "false_anytrue_i8x16") (result i32)
+ (v128.any_true (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0))))`);
+
+var mem = new Uint8Array(ins.exports.mem.buffer);
+set(mem, 16, iota(16).map((_) => 0));
+assertEq(ins.exports.anytrue_i8x16(), 0);
+
+for ( let dope of [1, 7, 32, 195 ] ) {
+ set(mem, 16, iota(16).map((x) => x == 7 ? dope : 0));
+ assertEq(ins.exports.anytrue_i8x16(), 1);
+}
+
+assertEq(ins.exports.true_anytrue_i8x16(), 1);
+assertEq(ins.exports.false_anytrue_i8x16(), 0);
+
+// AllTrue. Ion constant folds, so test that too.
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "alltrue_i8x16") (result i32)
+ (i8x16.all_true (v128.load (i32.const 16))))
+ (func (export "true_alltrue_i8x16") (result i32)
+ (i8x16.all_true (v128.const i8x16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16)))
+ (func (export "false_alltrue_i8x16") (result i32)
+ (i8x16.all_true (v128.const i8x16 1 2 3 4 5 6 0 8 9 10 11 12 13 14 15 16)))
+ (func (export "alltrue_i16x8") (result i32)
+ (i16x8.all_true (v128.load (i32.const 16))))
+ (func (export "true_alltrue_i16x8") (result i32)
+ (i16x8.all_true (v128.const i16x8 1 2 3 4 5 6 7 8)))
+ (func (export "false_alltrue_i16x8") (result i32)
+ (i16x8.all_true (v128.const i16x8 1 2 3 4 5 0 7 8)))
+ (func (export "alltrue_i32x4") (result i32)
+ (i32x4.all_true (v128.load (i32.const 16))))
+ (func (export "true_alltrue_i32x4") (result i32)
+ (i32x4.all_true (v128.const i32x4 1 2 3 4)))
+ (func (export "false_alltrue_i32x4") (result i32)
+ (i32x4.all_true (v128.const i32x4 1 2 3 0))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+
+set(mem8, 16, iota(16).map((_) => 0));
+assertEq(ins.exports.alltrue_i8x16(), 0);
+assertEq(ins.exports.alltrue_i16x8(), 0);
+assertEq(ins.exports.alltrue_i32x4(), 0);
+
+set(mem8, 16, iota(16).map((_) => 1));
+assertEq(ins.exports.alltrue_i8x16(), 1);
+
+set(mem16, 8, iota(8).map((_) => 1));
+assertEq(ins.exports.alltrue_i16x8(), 1);
+
+set(mem32, 4, iota(4).map((_) => 1));
+assertEq(ins.exports.alltrue_i32x4(), 1);
+
+for ( let dope of [1, 7, 32, 195 ] ) {
+ set(mem8, 16, iota(16).map((x) => x == 7 ? 0 : dope));
+ assertEq(ins.exports.alltrue_i8x16(), 0);
+
+ set(mem16, 8, iota(8).map((x) => x == 4 ? 0 : dope));
+ assertEq(ins.exports.alltrue_i16x8(), 0);
+
+ set(mem32, 4, iota(4).map((x) => x == 2 ? 0 : dope));
+ assertEq(ins.exports.alltrue_i32x4(), 0);
+}
+
+assertEq(ins.exports.true_alltrue_i8x16(), 1);
+assertEq(ins.exports.false_alltrue_i8x16(), 0);
+assertEq(ins.exports.true_alltrue_i16x8(), 1);
+assertEq(ins.exports.false_alltrue_i16x8(), 0);
+assertEq(ins.exports.true_alltrue_i32x4(), 1);
+assertEq(ins.exports.false_alltrue_i32x4(), 0);
+
+// Bitmask. Ion constant folds, so test that too.
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "bitmask_i8x16") (result i32)
+ (i8x16.bitmask (v128.load (i32.const 16))))
+ (func (export "const_bitmask_i8x16") (result i32)
+ (i8x16.bitmask (v128.const i8x16 0x80 0x7f 0xff 0x33 0x42 0x98 0x01 0x00
+ 0x31 0xcc 0xdd 0x12 0xf0 0x40 0x02 0xa0)))
+ (func (export "bitmask_i16x8") (result i32)
+ (i16x8.bitmask (v128.load (i32.const 16))))
+ (func (export "const_bitmask_i16x8") (result i32)
+ (i16x8.bitmask (v128.const i16x8 0x7f80 0xff33 0x9842 0x0001 0xcc31 0x12dd 0x40f0 0xa002)))
+ (func (export "bitmask_i32x4") (result i32)
+ (i32x4.bitmask (v128.load (i32.const 16))))
+ (func (export "const_bitmask_i32x4") (result i32)
+ (i32x4.bitmask (v128.const i32x4 0xff337f80 0x00019842 0xcc3112dd 0xa00240f0))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+
+set(mem8, 16, iota(16).map((_) => 0));
+assertEq(ins.exports.bitmask_i8x16(), 0);
+assertEq(ins.exports.bitmask_i16x8(), 0);
+assertEq(ins.exports.bitmask_i32x4(), 0);
+
+set(mem8, 16, iota(16).map((_) => 0x80));
+assertEq(ins.exports.bitmask_i8x16(), 0xFFFF);
+
+set(mem8, 16, iota(16).map((_) => 0x7F));
+assertEq(ins.exports.bitmask_i8x16(), 0);
+
+set(mem8, 16, iota(16).map((i) => popcount(i) == 1 ? 0x80 : 0));
+assertEq(ins.exports.bitmask_i8x16(), (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8));
+
+assertEq(ins.exports.const_bitmask_i8x16(), 0x9625);
+
+set(mem16, 8, iota(8).map((i) => 0x8000))
+assertEq(ins.exports.bitmask_i16x8(), 0xFF)
+
+set(mem16, 8, iota(8).map((i) => 0x7FFF))
+assertEq(ins.exports.bitmask_i16x8(), 0)
+
+set(mem16, 8, iota(8).map((i) => popcount(i) == 1 ? 0x8000 : 0))
+assertEq(ins.exports.bitmask_i16x8(), (1 << 1) | (1 << 2) | (1 << 4));
+
+assertEq(ins.exports.const_bitmask_i16x8(), 0x96);
+
+set(mem32, 4, iota(4).map((_) => 0x80000000))
+assertEq(ins.exports.bitmask_i32x4(), 0xF);
+
+set(mem32, 4, iota(4).map((_) => 0x7FFFFFFF))
+assertEq(ins.exports.bitmask_i32x4(), 0);
+
+set(mem32, 4, iota(4).map((i) => popcount(i) == 1 ? 0x80000000 : 0))
+assertEq(ins.exports.bitmask_i32x4(), (1 << 1) | (1 << 2));
+
+assertEq(ins.exports.const_bitmask_i32x4(), 0xd);
+
+// Shifts
+//
+// lhs is v128 in memory
+// rhs is i32 (passed directly)
+// result is v128 in memory
+
+function shr(count, width) {
+ return (v) => {
+ if (count == 0)
+ return v;
+ if (width == 64) {
+ if (v < 0) {
+ // This basically mirrors what the SIMD code does, so if there's
+ // a bug there then there's a bug here too. Seems OK though.
+ let s = 0x1_0000_0000_0000_0000n + BigInt(v);
+ let t = s / (1n << BigInt(count));
+ let u = ((1n << BigInt(count)) - 1n) * (2n ** BigInt(64-count));
+ let w = t + u;
+ return w - 0x1_0000_0000_0000_0000n;
+ }
+ return BigInt(v) / (1n << BigInt(count));
+ } else {
+ let mask = (width == 32) ? -1 : ((1 << width) - 1);
+ return (sign_extend(v, width) >> count) & mask;
+ }
+ }
+}
+
+function shru(count, width) {
+ if (width == 64) {
+ return (v) => {
+ if (count == 0)
+ return v;
+ if (v < 0) {
+ v = 0x1_0000_0000_0000_0000n + BigInt(v);
+ }
+ return BigInt(v) / (1n << BigInt(count));
+ }
+ } else {
+ return (v) => {
+ let mask = (width == 32) ? -1 : ((1 << width) - 1);
+ return (v >>> count) & mask;
+ }
+ }
+}
+
+var constantI8Shifts = "";
+for ( let i of iota(10).concat([-7]) ) {
+ constantI8Shifts += `
+ (func (export "shl_i8x16_${i}")
+ (v128.store (i32.const 0) (i8x16.shl (v128.load (i32.const 16)) (i32.const ${i}))))
+ (func (export "shr_i8x16_${i}")
+ (v128.store (i32.const 0) (i8x16.shr_s (v128.load (i32.const 16)) (i32.const ${i}))))
+ (func (export "shr_u8x16_${i}")
+ (v128.store (i32.const 0) (i8x16.shr_u (v128.load (i32.const 16)) (i32.const ${i}))))`;
+}
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "shl_i8x16") (param $count i32)
+ (v128.store (i32.const 0) (i8x16.shl (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shr_i8x16") (param $count i32)
+ (v128.store (i32.const 0) (i8x16.shr_s (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shr_u8x16") (param $count i32)
+ (v128.store (i32.const 0) (i8x16.shr_u (v128.load (i32.const 16)) (local.get $count))))
+ ${constantI8Shifts}
+ (func (export "shl_i16x8") (param $count i32)
+ (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shl_i16x8_3")
+ (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 3))))
+ (func (export "shl_i16x8_15")
+ (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 15))))
+ (func (export "shl_i16x8_16")
+ (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 16))))
+ (func (export "shl_i16x8_-15")
+ (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const -15))))
+ (func (export "shr_i16x8") (param $count i32)
+ (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shr_i16x8_3")
+ (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 3))))
+ (func (export "shr_i16x8_15")
+ (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 15))))
+ (func (export "shr_i16x8_16")
+ (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 16))))
+ (func (export "shr_i16x8_-15")
+ (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const -15))))
+ (func (export "shr_u16x8") (param $count i32)
+ (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shr_u16x8_3")
+ (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 3))))
+ (func (export "shr_u16x8_15")
+ (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 15))))
+ (func (export "shr_u16x8_16")
+ (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 16))))
+ (func (export "shr_u16x8_-15")
+ (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const -15))))
+ (func (export "shl_i32x4") (param $count i32)
+ (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shl_i32x4_12")
+ (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 12))))
+ (func (export "shl_i32x4_31")
+ (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 31))))
+ (func (export "shl_i32x4_32")
+ (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 32))))
+ (func (export "shl_i32x4_-27")
+ (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const -27))))
+ (func (export "shr_i32x4") (param $count i32)
+ (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shr_i32x4_12")
+ (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 12))))
+ (func (export "shr_i32x4_31")
+ (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 31))))
+ (func (export "shr_i32x4_32")
+ (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 32))))
+ (func (export "shr_i32x4_-27")
+ (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const -27))))
+ (func (export "shr_u32x4") (param $count i32)
+ (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shr_u32x4_12")
+ (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 12))))
+ (func (export "shr_u32x4_31")
+ (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 31))))
+ (func (export "shr_u32x4_32")
+ (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 32))))
+ (func (export "shr_u32x4_-27")
+ (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const -27))))
+ (func (export "shl_i64x2") (param $count i32)
+ (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shl_i64x2_27")
+ (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 27))))
+ (func (export "shl_i64x2_63")
+ (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 63))))
+ (func (export "shl_i64x2_64")
+ (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 64))))
+ (func (export "shl_i64x2_-231")
+ (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const -231))))
+ (func (export "shr_i64x2") (param $count i32)
+ (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shr_i64x2_27")
+ (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 27))))
+ (func (export "shr_i64x2_45")
+ (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 45))))
+ (func (export "shr_i64x2_63")
+ (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 63))))
+ (func (export "shr_i64x2_64")
+ (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 64))))
+ (func (export "shr_i64x2_-231")
+ (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const -231))))
+ (func (export "shr_i64x2_-1")
+ (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const -1))))
+ (func (export "shr_u64x2") (param $count i32)
+ (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (local.get $count))))
+ (func (export "shr_u64x2_27")
+ (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 27))))
+ (func (export "shr_u64x2_63")
+ (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 63))))
+ (func (export "shr_u64x2_64")
+ (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 64))))
+ (func (export "shr_u64x2_-231")
+ (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const -231)))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var as = [1, 2, 4, 8, 16, 32, 64, 128, 129, 130, 132, 136, 144, 160, 192, 255];
+
+set(mem8, 16, as);
+
+for (let [meth,op] of [["shl_i8x16",shl], ["shr_i8x16",shr], ["shr_u8x16",shru]]) {
+ for ( let i=0 ; i < 8 ; i++ ) {
+ ins.exports[meth](i);
+ assertSame(get(mem8, 0, 16), as.map(op(i, 8)))
+ ins.exports[meth + "_" + i]();
+ assertSame(get(mem8, 0, 16), as.map(op(i, 8)))
+ }
+
+ ins.exports[meth](1);
+ let a = get(mem8, 0, 16);
+ ins.exports[meth](9);
+ let b = get(mem8, 0, 16);
+ assertSame(a, b);
+ ins.exports[meth](-7);
+ let c = get(mem8, 0, 16);
+ assertSame(a, c);
+
+ ins.exports[meth + "_1"]();
+ let x = get(mem8, 0, 16);
+ ins.exports[meth + "_9"]();
+ let y = get(mem8, 0, 16);
+ ins.exports[meth + "_-7"]();
+ let z = get(mem8, 0, 16);
+ assertSame(x, y);
+ assertSame(x, z);
+}
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var as = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000];
+set(mem16, 8, as)
+
+ins.exports.shl_i16x8(2);
+var res = get(mem16, 0, 8);
+assertSame(res, as.map(shl(2, 16)))
+
+ins.exports.shl_i16x8(18); // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+ins.exports.shl_i16x8(-14); // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+for ( let shift of [3, 15, 16, -15] ) {
+ ins.exports["shl_i16x8_" + shift]();
+ assertSame(get(mem16, 0, 8), as.map(shl(shift & 15, 16)))
+}
+
+ins.exports.shr_i16x8(1);
+var res = get(mem16, 0, 8);
+assertSame(res, as.map(shr(1, 16)))
+
+ins.exports.shr_i16x8(17); // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+ins.exports.shr_i16x8(-15); // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+for ( let shift of [3, 15, 16, -15] ) {
+ ins.exports["shr_i16x8_" + shift]();
+ assertSame(get(mem16, 0, 8), as.map(shr(shift & 15, 16)))
+}
+
+ins.exports.shr_u16x8(1);
+var res = get(mem16, 0, 8);
+assertSame(res, as.map(shru(1, 16)))
+
+ins.exports.shr_u16x8(17); // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+ins.exports.shr_u16x8(-15); // Masked count
+assertSame(get(mem16, 0, 8), res);
+
+for ( let shift of [3, 15, 16, -15] ) {
+ ins.exports["shr_u16x8_" + shift]();
+ assertSame(get(mem16, 0, 8), as.map(shru(shift & 15, 16)))
+}
+
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+var as = [5152, 6768, 7074, 800811];
+
+set(mem32, 4, as)
+ins.exports.shl_i32x4(2);
+var res = get(mem32, 0, 4);
+assertSame(res, as.map(shl(2, 32)))
+
+ins.exports.shl_i32x4(34); // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+ins.exports.shl_i32x4(-30); // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+for ( let shift of [12, 31, 32, -27] ) {
+ ins.exports["shl_i32x4_" + shift]();
+ assertSame(get(mem32, 0, 4), as.map(shl(shift & 31, 32)).map(x => x>>>0))
+}
+
+ins.exports.shr_i32x4(1);
+var res = get(mem32, 0, 4);
+assertSame(res, as.map(shr(1, 32)))
+
+ins.exports.shr_i32x4(33); // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+ins.exports.shr_i32x4(-31); // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+for ( let shift of [12, 31, 32, -27] ) {
+ ins.exports["shr_i32x4_" + shift]();
+ assertSame(get(mem32, 0, 4), as.map(shr(shift & 31, 32)))
+}
+
+ins.exports.shr_u32x4(1);
+var res = get(mem32, 0, 4);
+assertSame(res, as.map(shru(1, 32)))
+
+ins.exports.shr_u32x4(33); // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+ins.exports.shr_u32x4(-31); // Masked count
+assertSame(get(mem32, 0, 4), res);
+
+for ( let shift of [12, 31, 32, -27] ) {
+ ins.exports["shr_u32x4_" + shift]();
+ assertSame(get(mem32, 0, 4), as.map(shru(shift & 31, 32)))
+}
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+var as = [50515253, -616263];
+
+set(mem64, 2, as)
+ins.exports.shl_i64x2(2);
+var res = get(mem64, 0, 2);
+assertSame(res, as.map(shl(2, 64)))
+
+ins.exports.shl_i64x2(66); // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+ins.exports.shl_i64x2(-62); // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+for ( let shift of [27, 63, 64, -231] ) {
+ ins.exports["shl_i64x2_" + shift]();
+ assertSame(get(mem64, 0, 2), as.map(shl(shift & 63, 64)))
+}
+
+ins.exports.shr_u64x2(1);
+var res = get(mem64, 0, 2);
+assertSame(res, as.map(shru(1, 64)))
+
+ins.exports.shr_u64x2(65); // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+ins.exports.shr_u64x2(-63); // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+for ( let shift of [27, 63, 64, -231] ) {
+ ins.exports["shr_u64x2_" + shift]();
+ assertSame(get(mem64, 0, 2), as.map(shru(shift & 63, 64)))
+}
+
+ins.exports.shr_i64x2(2);
+var res = get(mem64, 0, 2);
+assertSame(res, as.map(shr(2, 64)))
+
+ins.exports.shr_i64x2(66); // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+ins.exports.shr_i64x2(-62); // Masked count
+assertSame(get(mem64, 0, 2), res);
+
+// The ion code generator has multiple paths here, for < 32 and >= 32
+for ( let shift of [27, 45, 63, 64, -1, -231] ) {
+ ins.exports["shr_i64x2_" + shift]();
+ assertSame(get(mem64, 0, 2), as.map(shr(shift & 63, 64)))
+}
+
+// Narrow
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "narrow_i16x8_s")
+ (v128.store (i32.const 0) (i8x16.narrow_i16x8_s (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func (export "narrow_i16x8_u")
+ (v128.store (i32.const 0) (i8x16.narrow_i16x8_u (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func (export "narrow_i32x4_s")
+ (v128.store (i32.const 0) (i16x8.narrow_i32x4_s (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func (export "narrow_i32x4_u")
+ (v128.store (i32.const 0) (i16x8.narrow_i32x4_u (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem8u = new Uint8Array(ins.exports.mem.buffer);
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var mem16u = new Uint16Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+
+var as = [1, 267, 3987, 14523, 32768, 3, 312, 4876].map((x) => sign_extend(x, 16));
+var bs = [2, 312, 4876, 15987, 33777, 1, 267, 3987].map((x) => sign_extend(x, 16));
+
+set(mem16, 8, as);
+set(mem16, 16, bs);
+
+ins.exports.narrow_i16x8_s();
+var cs = as.concat(...bs).map((x) => signed_saturate(x, 8));
+assertSame(get(mem8, 0, 16), cs);
+
+ins.exports.narrow_i16x8_u();
+var cs = as.concat(...bs).map((x) => unsigned_saturate(x, 8));
+assertSame(get(mem8u, 0, 16), cs);
+
+var xs = [1, 3987, 14523, 32768].map((x) => x << 16).map((x) => sign_extend(x, 32));
+var ys = [2, 4876, 15987, 33777].map((x) => x << 16).map((x) => sign_extend(x, 32));
+
+set(mem32, 4, xs);
+set(mem32, 8, ys);
+
+ins.exports.narrow_i32x4_s();
+var cs = xs.concat(...ys).map((x) => signed_saturate(x, 16));
+assertSame(get(mem16, 0, 8), cs);
+
+ins.exports.narrow_i32x4_u();
+var cs = xs.concat(...ys).map((x) => unsigned_saturate(x, 16));
+assertSame(get(mem16u, 0, 8), cs);
+
+// Extend low/high
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "extend_low_i8x16_s")
+ (v128.store (i32.const 0) (i16x8.extend_low_i8x16_s (v128.load (i32.const 16)))))
+ (func (export "extend_high_i8x16_s")
+ (v128.store (i32.const 0) (i16x8.extend_high_i8x16_s (v128.load (i32.const 16)))))
+ (func (export "extend_low_i8x16_u")
+ (v128.store (i32.const 0) (i16x8.extend_low_i8x16_u (v128.load (i32.const 16)))))
+ (func (export "extend_high_i8x16_u")
+ (v128.store (i32.const 0) (i16x8.extend_high_i8x16_u (v128.load (i32.const 16)))))
+ (func (export "extend_low_i16x8_s")
+ (v128.store (i32.const 0) (i32x4.extend_low_i16x8_s (v128.load (i32.const 16)))))
+ (func (export "extend_high_i16x8_s")
+ (v128.store (i32.const 0) (i32x4.extend_high_i16x8_s (v128.load (i32.const 16)))))
+ (func (export "extend_low_i16x8_u")
+ (v128.store (i32.const 0) (i32x4.extend_low_i16x8_u (v128.load (i32.const 16)))))
+ (func (export "extend_high_i16x8_u")
+ (v128.store (i32.const 0) (i32x4.extend_high_i16x8_u (v128.load (i32.const 16))))))`);
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var mem16u = new Uint16Array(ins.exports.mem.buffer);
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var as = [0, 1, 192, 3, 205, 5, 6, 133, 8, 9, 129, 11, 201, 13, 14, 255];
+
+set(mem8, 16, as);
+
+ins.exports.extend_low_i8x16_s();
+assertSame(get(mem16, 0, 8), iota(8).map((n) => sign_extend(as[n], 8)));
+
+ins.exports.extend_high_i8x16_s();
+assertSame(get(mem16, 0, 8), iota(8).map((n) => sign_extend(as[n+8], 8)));
+
+ins.exports.extend_low_i8x16_u();
+assertSame(get(mem16u, 0, 8), iota(8).map((n) => zero_extend(as[n], 8)));
+
+ins.exports.extend_high_i8x16_u();
+assertSame(get(mem16u, 0, 8), iota(8).map((n) => zero_extend(as[n+8], 8)));
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var mem32u = new Uint32Array(ins.exports.mem.buffer);
+
+var as = [0, 1, 192, 3, 205, 5, 6, 133].map((x) => x << 8);
+
+set(mem16, 8, as);
+
+ins.exports.extend_low_i16x8_s();
+assertSame(get(mem32, 0, 4), iota(4).map((n) => sign_extend(as[n], 16)));
+
+ins.exports.extend_high_i16x8_s();
+assertSame(get(mem32, 0, 4), iota(4).map((n) => sign_extend(as[n+4], 16)));
+
+ins.exports.extend_low_i16x8_u();
+assertSame(get(mem32u, 0, 4), iota(4).map((n) => zero_extend(as[n], 16)));
+
+ins.exports.extend_high_i16x8_u();
+assertSame(get(mem32u, 0, 4), iota(4).map((n) => zero_extend(as[n+4], 16)));
+
+
+// Extract lane. Ion constant folds, so test that too.
+//
+// operand is v128 in memory (or constant)
+// lane index is immediate so we're testing something randomish but not zero
+// result is scalar (returned directly)
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "extract_i8x16_9") (result i32)
+ (i8x16.extract_lane_s 9 (v128.load (i32.const 16))))
+ (func (export "const_extract_i8x16_9") (result i32)
+ (i8x16.extract_lane_s 9 (v128.const i8x16 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16)))
+ (func (export "extract_u8x16_6") (result i32)
+ (i8x16.extract_lane_u 6 (v128.load (i32.const 16))))
+ (func (export "const_extract_u8x16_9") (result i32)
+ (i8x16.extract_lane_u 9 (v128.const i8x16 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16)))
+ (func (export "extract_i16x8_5") (result i32)
+ (i16x8.extract_lane_s 5 (v128.load (i32.const 16))))
+ (func (export "const_extract_i16x8_5") (result i32)
+ (i16x8.extract_lane_s 5 (v128.const i16x8 -1 -2 -3 -4 -5 -6 -7 -8)))
+ (func (export "extract_u16x8_3") (result i32)
+ (i16x8.extract_lane_u 3 (v128.load (i32.const 16))))
+ (func (export "const_extract_u16x8_3") (result i32)
+ (i16x8.extract_lane_u 3 (v128.const i16x8 -1 -2 -3 -4 -5 -6 -7 -8)))
+ (func (export "extract_i32x4_2") (result i32)
+ (i32x4.extract_lane 2 (v128.load (i32.const 16))))
+ (func (export "const_extract_i32x4_2") (result i32)
+ (i32x4.extract_lane 2 (v128.const i32x4 -1 -2 -3 -4)))
+ (func (export "extract_i64x2_1") (result i64)
+ (i64x2.extract_lane 1 (v128.load (i32.const 16))))
+ (func (export "const_extract_i64x2_1") (result i64)
+ (i64x2.extract_lane 1 (v128.const i64x2 -1 -2)))
+ (func (export "extract_f32x4_2") (result f32)
+ (f32x4.extract_lane 2 (v128.load (i32.const 16))))
+ (func (export "const_extract_f32x4_2") (result f32)
+ (f32x4.extract_lane 2 (v128.const f32x4 -1 -2 -3 -4)))
+ (func (export "extract_f64x2_1") (result f64)
+ (f64x2.extract_lane 1 (v128.load (i32.const 16))))
+ (func (export "const_extract_f64x2_1") (result f64)
+ (f64x2.extract_lane 1 (v128.const f64x2 -1 -2))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+var bs = as.map((x) => -x);
+
+set(mem8, 16, as)
+assertEq(ins.exports.extract_i8x16_9(), as[9]);
+
+set(mem8, 16, bs)
+assertEq(ins.exports.extract_u8x16_6(), 256 - as[6]);
+
+assertEq(ins.exports.const_extract_i8x16_9(), -10);
+assertEq(ins.exports.const_extract_u8x16_9(), 256-10);
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4, 5, 6, 7, 8];
+var bs = as.map((x) => -x);
+
+set(mem16, 8, as)
+assertEq(ins.exports.extract_i16x8_5(), as[5]);
+
+set(mem16, 8, bs)
+assertEq(ins.exports.extract_u16x8_3(), 65536 - as[3]);
+
+assertEq(ins.exports.const_extract_i16x8_5(), -6);
+assertEq(ins.exports.const_extract_u16x8_3(), 65536-4);
+
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4];
+
+set(mem32, 4, as)
+assertEq(ins.exports.extract_i32x4_2(), as[2]);
+
+assertEq(ins.exports.const_extract_i32x4_2(), -3);
+
+var mem32 = new Float32Array(ins.exports.mem.buffer);
+var as = [1.5, 2.5, 3.5, 4.5];
+
+set(mem32, 4, as)
+assertEq(ins.exports.extract_f32x4_2(), as[2]);
+
+assertEq(ins.exports.const_extract_f32x4_2(), -3);
+
+var mem64 = new Float64Array(ins.exports.mem.buffer);
+var as = [1.5, 2.5];
+
+set(mem64, 2, as)
+assertEq(ins.exports.extract_f64x2_1(), as[1]);
+
+assertEq(ins.exports.const_extract_f64x2_1(), -2);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+var as = [12345, 67890];
+
+set(mem64, 2, as)
+assertSame(ins.exports.extract_i64x2_1(), as[1]);
+
+assertEq(ins.exports.const_extract_i64x2_1(), -2n);
+
+// Replace lane
+//
+// operand 1 is v128 in memory
+// operand 2 is immediate scalar
+// lane index is immediate so we're testing something randomish but not zero
+// (note though that fp operations have special cases for zero)
+// result is v128 in memory
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "replace_i8x16_9") (param $value i32)
+ (v128.store (i32.const 0)
+ (i8x16.replace_lane 9 (v128.load (i32.const 16)) (local.get $value))))
+ (func (export "replace_i16x8_5") (param $value i32)
+ (v128.store (i32.const 0)
+ (i16x8.replace_lane 5 (v128.load (i32.const 16)) (local.get $value))))
+ (func (export "replace_i32x4_3") (param $value i32)
+ (v128.store (i32.const 0)
+ (i32x4.replace_lane 3 (v128.load (i32.const 16)) (local.get $value))))
+ (func (export "replace_i64x2_1") (param $value i64)
+ (v128.store (i32.const 0)
+ (i64x2.replace_lane 1 (v128.load (i32.const 16)) (local.get $value))))
+ (func (export "replace_f32x4_0") (param $value f32)
+ (v128.store (i32.const 0)
+ (f32x4.replace_lane 0 (v128.load (i32.const 16)) (local.get $value))))
+ (func (export "replace_f32x4_3") (param $value f32)
+ (v128.store (i32.const 0)
+ (f32x4.replace_lane 3 (v128.load (i32.const 16)) (local.get $value))))
+ (func (export "replace_f64x2_0") (param $value f64)
+ (v128.store (i32.const 0)
+ (f64x2.replace_lane 0 (v128.load (i32.const 16)) (local.get $value))))
+ (func (export "replace_f64x2_1") (param $value f64)
+ (v128.store (i32.const 0)
+ (f64x2.replace_lane 1 (v128.load (i32.const 16)) (local.get $value)))))`);
+
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+
+set(mem8, 16, as)
+ins.exports.replace_i8x16_9(42);
+assertSame(get(mem8, 0, 16), upd(as, 9, 42));
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4, 5, 6, 7, 8];
+
+set(mem16, 8, as)
+ins.exports.replace_i16x8_5(42);
+assertSame(get(mem16, 0, 8), upd(as, 5, 42));
+
+var mem32 = new Uint32Array(ins.exports.mem.buffer);
+var as = [1, 2, 3, 4];
+
+set(mem32, 4, as)
+ins.exports.replace_i32x4_3(42);
+assertSame(get(mem32, 0, 4), upd(as, 3, 42));
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+var as = [1, 2];
+
+set(mem64, 2, as)
+ins.exports.replace_i64x2_1(42n);
+assertSame(get(mem64, 0, 2), upd(as, 1, 42));
+
+var mem32 = new Float32Array(ins.exports.mem.buffer);
+var as = [1.5, 2.5, 3.5, 4.5];
+
+set(mem32, 4, as)
+ins.exports.replace_f32x4_0(42.5);
+assertSame(get(mem32, 0, 4), upd(as, 0, 42.5));
+
+set(mem32, 4, as)
+ins.exports.replace_f32x4_3(42.5);
+assertSame(get(mem32, 0, 4), upd(as, 3, 42.5));
+
+var mem64 = new Float64Array(ins.exports.mem.buffer);
+var as = [1.5, 2.5];
+
+set(mem64, 2, as)
+ins.exports.replace_f64x2_0(42.5);
+assertSame(get(mem64, 0, 2), upd(as, 0, 42.5));
+
+set(mem64, 2, as)
+ins.exports.replace_f64x2_1(42.5);
+assertSame(get(mem64, 0, 2), upd(as, 1, 42.5));
+
+// Load and splat
+//
+// Operand is memory address of scalar
+// Result is v128 in memory
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "load_splat_v8x16") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load8_splat (local.get $addr))))
+ (func (export "load_splat_v16x8") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load16_splat (local.get $addr))))
+ (func (export "load_splat_v32x4") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load32_splat (local.get $addr))))
+ (func (export "load_splat_v64x2") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load64_splat (local.get $addr)))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+mem8[37] = 42;
+ins.exports.load_splat_v8x16(37);
+assertSame(get(mem8, 0, 16), [42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42]);
+
+var mem16 = new Uint16Array(ins.exports.mem.buffer);
+mem16[37] = 69;
+ins.exports.load_splat_v16x8(37*2);
+assertSame(get(mem16, 0, 8), [69, 69, 69, 69, 69, 69, 69, 69]);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+mem32[37] = 83;
+ins.exports.load_splat_v32x4(37*4);
+assertSame(get(mem32, 0, 4), [83, 83, 83, 83]);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+mem64[37] = 83n;
+ins.exports.load_splat_v64x2(37*8);
+assertSame(get(mem64, 0, 2), [83, 83]);
+
+// Load and zero
+//
+// Operand is memory address of scalar
+// Result is v128 in memory
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "load32_zero") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load32_zero (local.get $addr))))
+ (func (export "load64_zero") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load64_zero (local.get $addr)))))`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+mem32[37] = 0x12345678;
+mem32[38] = 0xffffffff;
+mem32[39] = 0xfffffffe;
+mem32[40] = 0xfffffffd;
+ins.exports.load32_zero(37*4);
+assertSame(get(mem32, 0, 4), [0x12345678, 0, 0, 0]);
+
+var mem64 = new BigInt64Array(ins.exports.mem.buffer);
+mem64[37] = 0x12345678abcdef01n;
+mem64[38] = 0xffffffffffffffffn;
+ins.exports.load64_zero(37*8);
+assertSame(get(mem64, 0, 2), [0x12345678abcdef01n, 0n]);
+
+// Load and extend
+//
+// Operand is memory address of 64-bit scalar representing 8, 4, or 2 values
+// Result is v128 in memory
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "load8x8_s") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load8x8_s (local.get $addr))))
+ (func (export "load8x8_u") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load8x8_u (local.get $addr))))
+ (func (export "load16x4_s") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load16x4_s (local.get $addr))))
+ (func (export "load16x4_u") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load16x4_u (local.get $addr))))
+ (func (export "load32x2_s") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load32x2_s (local.get $addr))))
+ (func (export "load32x2_u") (param $addr i32)
+ (v128.store (i32.const 0) (v128.load32x2_u (local.get $addr)))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var mem16s = new Int16Array(ins.exports.mem.buffer);
+var mem16u = new Uint16Array(ins.exports.mem.buffer);
+var mem32s = new Int32Array(ins.exports.mem.buffer);
+var mem32u = new Uint32Array(ins.exports.mem.buffer);
+var mem64s = new BigInt64Array(ins.exports.mem.buffer);
+var mem64u = new BigUint64Array(ins.exports.mem.buffer);
+var xs = [42, 129, 2, 212, 44, 27, 12, 199];
+set(mem8, 48, xs);
+
+ins.exports.load8x8_s(48);
+assertSame(get(mem16s, 0, 8), xs.map((x) => sign_extend(x, 8)));
+
+ins.exports.load8x8_u(48);
+assertSame(get(mem16u, 0, 8), xs.map((x) => zero_extend(x, 8)));
+
+var xs = [(42 << 8) | 129, (212 << 8) | 2, (44 << 8) | 27, (199 << 8) | 12];
+set(mem16u, 24, xs);
+
+ins.exports.load16x4_s(48);
+assertSame(get(mem32s, 0, 4), xs.map((x) => sign_extend(x, 16)));
+
+ins.exports.load16x4_u(48);
+assertSame(get(mem32u, 0, 4), xs.map((x) => zero_extend(x, 16)));
+
+var xs = [5, -8];
+set(mem32u, 12, xs);
+
+ins.exports.load32x2_s(48);
+assertSame(get(mem64s, 0, 2), xs.map((x) => sign_extend(x, 32)));
+
+ins.exports.load32x2_u(48);
+assertSame(get(mem64s, 0, 2), xs.map((x) => zero_extend(x, 32)));
+
+// Vector select
+//
+// Operands and results are all in memory
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "bitselect_v128")
+ (v128.store (i32.const 0)
+ (v128.bitselect (v128.load (i32.const 16))
+ (v128.load (i32.const 32))
+ (v128.load (i32.const 48))))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+set(mem8, 16, iota(16).map((_) => 0xAA));
+set(mem8, 32, iota(16).map((_) => 0x55));
+
+set(mem8, 48, iota(16).map((_) => 0x99));
+ins.exports.bitselect_v128();
+assertSame(get(mem8, 0, 16), iota(16).map((_) => 0xCC));
+
+set(mem8, 48, iota(16).map((_) => 0x77));
+ins.exports.bitselect_v128();
+assertSame(get(mem8, 0, 16), iota(16).map((_) => 0x22));
+
+// Vector shuffle
+//
+// Operands and results are all in memory
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ ;; the result interleaves the low eight bytes of the inputs
+ (func (export "shuffle1")
+ (v128.store (i32.const 0)
+ (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23
+ (v128.load (i32.const 16))
+ (v128.load (i32.const 32)))))
+ ;; ditto the high eight bytes
+ (func (export "shuffle2")
+ (v128.store (i32.const 0)
+ (i8x16.shuffle 8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31
+ (v128.load (i32.const 16))
+ (v128.load (i32.const 32))))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+var xs = iota(16).map((n) => 0xA0 + n);
+var ys = iota(16).map((n) => 0x50 + n);
+set(mem8, 16, xs);
+set(mem8, 32, ys);
+
+ins.exports.shuffle1();
+assertSame(get(mem8, 0, 16), iota(16).map((x) => ((x & 1) ? ys : xs)[x >>> 1]))
+
+ins.exports.shuffle2();
+assertSame(get(mem8, 0, 16), iota(32).map((x) => ((x & 1) ? ys : xs)[x >>> 1]).slice(16));
+
+// Vector swizzle (variable permute).
+//
+// Case 1: Operands and results are all in memory
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "swizzle")
+ (v128.store (i32.const 0)
+ (i8x16.swizzle (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var mem8 = new Uint8Array(ins.exports.mem.buffer);
+
+var xs = [100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115];
+set(mem8, 16, xs);
+
+set(mem8, 32, [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]);
+ins.exports.swizzle();
+assertSame(get(mem8, 0, 16), [101,100,103,102,105,104,107,106,109,108,111,110,113,112,115,114]);
+
+set(mem8, 32, [9,8,11,10,13,12,16,14,1,0,3,2,5,192,7,6]);
+ins.exports.swizzle();
+assertSame(get(mem8, 0, 16), [109,108,111,110,113,112,0,114,101,100,103,102,105,0,107,106]);
+
+// Case 2: The mask operand is a constant; the swizzle gets optimized into a
+// shuffle (also see ion-analysis.js).
+
+for ( let [mask, expected] of [[[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14],
+ [101,100,103,102,105,104,107,106,109,108,111,110,113,112,115,114]],
+ [[9,8,11,10,13,12,16,14,1,0,3,2,5,192,7,6],
+ [109,108,111,110,113,112,0,114,101,100,103,102,105,0,107,106]]] ) {
+
+ let ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "swizzle")
+ (v128.store (i32.const 0)
+ (i8x16.swizzle (v128.load (i32.const 16)) (v128.const i8x16 ${mask.join(' ')})))))
+`);
+
+ let mem8 = new Uint8Array(ins.exports.mem.buffer);
+ set(mem8, 16, [100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115]);
+ ins.exports.swizzle();
+ assertSame(get(mem8, 0, 16), expected);
+}
+
+// Convert integer to floating point
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "convert_s")
+ (v128.store (i32.const 0)
+ (f32x4.convert_i32x4_s (v128.load (i32.const 16)))))
+ (func (export "convert_u")
+ (v128.store (i32.const 0)
+ (f32x4.convert_i32x4_u (v128.load (i32.const 16))))))`);
+
+var mem32s = new Int32Array(ins.exports.mem.buffer);
+var mem32f = new Float32Array(ins.exports.mem.buffer);
+var xs = [1, -9, 77987, -34512];
+
+set(mem32s, 4, xs);
+ins.exports.convert_s();
+assertSame(get(mem32f, 0, 4), xs);
+
+var mem32u = new Uint32Array(ins.exports.mem.buffer);
+var ys = xs.map((x) => x>>>0);
+
+set(mem32u, 4, ys);
+ins.exports.convert_u();
+assertSame(get(mem32f, 0, 4), ys.map(Math.fround));
+
+// Convert floating point to integer with saturating truncation
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "trunc_sat_s")
+ (v128.store (i32.const 0)
+ (i32x4.trunc_sat_f32x4_s (v128.load (i32.const 16)))))
+ (func (export "trunc_sat_u")
+ (v128.store (i32.const 0)
+ (i32x4.trunc_sat_f32x4_u (v128.load (i32.const 16))))))`);
+
+var mem32s = new Int32Array(ins.exports.mem.buffer);
+var mem32u = new Uint32Array(ins.exports.mem.buffer);
+var mem32f = new Float32Array(ins.exports.mem.buffer);
+var xs = [1.5, -9.5, 7.5e12, -8e13];
+
+set(mem32f, 4, xs);
+ins.exports.trunc_sat_s();
+assertSame(get(mem32s, 0, 4), [1, -9, 0x7FFFFFFF, -0x80000000]);
+
+var xs = [1.5, -9.5, 7.5e12, 812];
+set(mem32f, 4, xs);
+ins.exports.trunc_sat_u();
+assertSame(get(mem32u, 0, 4), [1, 0, 0xFFFFFFFF, 812]);
+
+var xs = [0, -0, 0x80860000, 0x100000000];
+set(mem32f, 4, xs);
+ins.exports.trunc_sat_u();
+assertSame(get(mem32u, 0, 4), [0, 0, 0x80860000, 0xFFFFFFFF]);
+
+// Loops and blocks. This should at least test "sync" in the baseline compiler.
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func $f (param $count i32) (param $v v128) (result v128)
+ (local $tmp v128)
+ (block $B1
+ (loop $L1
+ (br_if $B1 (i32.eqz (local.get $count)))
+ (local.set $tmp (i32x4.add (local.get $tmp) (local.get $v)))
+ (local.set $count (i32.sub (local.get $count) (i32.const 1)))
+ (br $L1)))
+ (local.get $tmp))
+ (func (export "run") (param $count i32)
+ (v128.store (i32.const 0)
+ (call $f (local.get $count) (v128.load (i32.const 16))))))`);
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+set(mem32, 4, [1,2,3,4]);
+ins.exports.run(7);
+assertSame(get(mem32, 0, 4), [7,14,21,28]);
+
+// Lots of parameters, this should trigger stack parameter passing
+//
+// 10 parameters in memory, we load them and pass them and operate on them.
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func $f (param $v0 v128) (param $v1 v128) (param $v2 v128) (param $v3 v128) (param $v4 v128)
+ (param $v5 v128) (param $v6 v128) (param $v7 v128) (param $v8 v128) (param $v9 v128)
+ (result v128)
+ (i32x4.add (local.get $v0)
+ (i32x4.add (local.get $v1)
+ (i32x4.add (local.get $v2)
+ (i32x4.add (local.get $v3)
+ (i32x4.add (local.get $v4)
+ (i32x4.add (local.get $v5)
+ (i32x4.add (local.get $v6)
+ (i32x4.add (local.get $v7)
+ (i32x4.add (local.get $v8) (local.get $v9)))))))))))
+ (func (export "run")
+ (v128.store (i32.const 0)
+ (call $f (v128.load (i32.const ${16*1}))
+ (v128.load (i32.const ${16*2}))
+ (v128.load (i32.const ${16*3}))
+ (v128.load (i32.const ${16*4}))
+ (v128.load (i32.const ${16*5}))
+ (v128.load (i32.const ${16*6}))
+ (v128.load (i32.const ${16*7}))
+ (v128.load (i32.const ${16*8}))
+ (v128.load (i32.const ${16*9}))
+ (v128.load (i32.const ${16*10}))))))`);
+
+
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var sum = [0, 0, 0, 0];
+for ( let i=1; i <= 10; i++ ) {
+ let v = [1,2,3,4].map((x) => x*i);
+ set(mem32, 4*i, v);
+ for ( let j=0; j < 4; j++ )
+ sum[j] += v[j];
+}
+
+ins.exports.run();
+
+assertSame(get(mem32, 0, 4), sum);
+
+// Globals.
+//
+// We have a number of different code paths and representations and
+// need to test them all.
+//
+// Cases:
+// - private global, mutable / immutable, initialized from constant or imported immutable global
+// - exported global, mutable / immutable, initialized from constant or imported immutable global
+// - imported global, mutable / immutable
+// - imported global that's re-exported, mutable / immutable
+
+// Global used for initialization below.
+
+var init = (function () {
+ var ins = wasmEvalText(`
+ (module
+ (global (export "init") v128 (v128.const i32x4 9 8 7 6)))`);
+ return ins.exports;
+})();
+
+for ( let exportspec of ['', '(export "g")'] ) {
+
+ // Private/exported immutable initialized from constant
+
+ let ins1 = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (global $g ${exportspec} v128 (v128.const i32x4 9 8 7 6))
+ (func (export "get") (param $dest i32)
+ (v128.store (local.get $dest) (global.get $g))))`);
+
+ let mem1 = new Int32Array(ins1.exports.mem.buffer);
+ ins1.exports.get(0);
+ assertSame(get(mem1, 0, 4), [9, 8, 7, 6]);
+
+ // Private/exported mutable initialized from constant
+
+ let ins2 = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (global $g ${exportspec} (mut v128) (v128.const i32x4 9 8 7 6))
+ (func (export "put") (param $val i32)
+ (global.set $g (i32x4.splat (local.get $val))))
+ (func (export "get") (param $dest i32)
+ (v128.store (local.get $dest) (global.get $g))))`);
+
+ let mem2 = new Int32Array(ins2.exports.mem.buffer);
+ ins2.exports.get(0);
+ assertSame(get(mem2, 0, 4), [9, 8, 7, 6]);
+ ins2.exports.put(37);
+ ins2.exports.get(0);
+ assertSame(get(mem2, 0, 4), [37, 37, 37, 37]);
+
+ // Private/exported immutable initialized from imported immutable global
+
+ let ins3 = wasmEvalText(`
+ (module
+ (global $init (import "m" "init") v128)
+ (memory (export "mem") 1 1)
+ (global $g ${exportspec} v128 (global.get $init))
+ (func (export "get") (param $dest i32)
+ (v128.store (local.get $dest) (global.get $g))))`,
+ {m:init});
+
+ let mem3 = new Int32Array(ins3.exports.mem.buffer);
+ ins3.exports.get(0);
+ assertSame(get(mem3, 0, 4), [9, 8, 7, 6]);
+
+ // Private/exported mutable initialized from imported immutable global
+
+ let ins4 = wasmEvalText(`
+ (module
+ (global $init (import "m" "init") v128)
+ (memory (export "mem") 1 1)
+ (global $g ${exportspec} (mut v128) (global.get $init))
+ (func (export "put") (param $val i32)
+ (global.set $g (i32x4.splat (local.get $val))))
+ (func (export "get") (param $dest i32)
+ (v128.store (local.get $dest) (global.get $g))))`,
+ {m:init});
+
+ let mem4 = new Int32Array(ins4.exports.mem.buffer);
+ ins4.exports.get(0);
+ assertSame(get(mem4, 0, 4), [9, 8, 7, 6]);
+ ins4.exports.put(37);
+ ins4.exports.get(0);
+ assertSame(get(mem4, 0, 4), [37, 37, 37, 37]);
+
+ // Imported private/re-exported immutable
+
+ let ins5 = wasmEvalText(`
+ (module
+ (global $g ${exportspec} (import "m" "init") v128)
+ (memory (export "mem") 1 1)
+ (func (export "get") (param $dest i32)
+ (v128.store (local.get $dest) (global.get $g))))`,
+ {m:init});
+
+ let mem5 = new Int32Array(ins5.exports.mem.buffer);
+ ins5.exports.get(0);
+ assertSame(get(mem5, 0, 4), [9, 8, 7, 6]);
+
+ // Imported private/re-exported mutable
+
+ let mutg = (function () {
+ var ins = wasmEvalText(`
+ (module
+ (global (export "mutg") (mut v128) (v128.const i32x4 19 18 17 16)))`);
+ return ins.exports;
+ })();
+
+ let ins6 = wasmEvalText(`
+ (module
+ (global $g ${exportspec} (import "m" "mutg") (mut v128))
+ (memory (export "mem") 1 1)
+ (func (export "put") (param $val i32)
+ (global.set $g (i32x4.splat (local.get $val))))
+ (func (export "get") (param $dest i32)
+ (v128.store (local.get $dest) (global.get $g))))`,
+ {m:mutg});
+
+ let mem6 = new Int32Array(ins6.exports.mem.buffer);
+ ins6.exports.get(0);
+ assertSame(get(mem6, 0, 4), [19, 18, 17, 16]);
+ ins6.exports.put(37);
+ ins6.exports.get(0);
+ assertSame(get(mem6, 0, 4), [37, 37, 37, 37]);
+}
+
+// Imports and exports that pass and return v128
+
+var insworker = wasmEvalText(`
+ (module
+ (func (export "worker") (param v128) (result v128)
+ (i8x16.add (local.get 0) (v128.const i8x16 ${iota(16).join(' ')}))))`);
+
+var insrun = wasmEvalText(`
+ (module
+ (import "" "worker" (func $worker (param v128) (result v128)))
+ (memory (export "mem") 1 1)
+ (func (export "run") (param $srcloc i32) (param $destloc i32)
+ (v128.store (local.get $destloc)
+ (call $worker (v128.load (local.get $srcloc))))))`,
+ {"":insworker.exports});
+
+var mem = new Uint8Array(insrun.exports.mem.buffer);
+var xs = iota(16).map((x) => x+5);
+set(mem, 0, xs);
+insrun.exports.run(0, 16);
+assertSame(get(mem, 16, 16), xs.map((x,i) => x+i))
+
+// Make sure JS<->wasm call guards are sensible.
+
+// Calling from JS to export that accepts v128.
+assertErrorMessage(() => insworker.exports.worker(),
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+
+// Calling from wasm with v128 to import that comes from JS. The instantiation
+// will succeed even if the param type of the import is v128 (see "create a host
+// function" in the Wasm JSAPI spec), it is the act of invoking it that checks
+// that verboten types are not used (see "run a host function", ibid.).
+var badImporter = wasmEvalText(`
+ (module
+ (import "" "worker" (func $worker (param v128) (result v128)))
+ (func (export "run")
+ (drop (call $worker (v128.const i32x4 0 1 2 3)))))`,
+ {"":{worker: function(a) { return a; }}});
+
+assertErrorMessage(() => badImporter.exports.run(),
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+
+// Imports and exports that pass and return v128 as stack (not register) args.
+
+var exportWithStackArgs = wasmEvalText(`
+ (module
+ (func (export "worker") (param v128) (param v128) (param v128) (param v128)
+ (param v128) (param v128) (param v128) (param v128)
+ (param v128) (param v128) (param v128) (param v128)
+ (param v128) (param v128)
+ (result v128 v128)
+ (i8x16.add (local.get 3) (local.get 12))
+ (local.get 7)))`);
+
+var importWithStackArgs = wasmEvalText(`
+ (module
+ (type $t1 (func (param v128) (param v128) (param v128) (param v128)
+ (param v128) (param v128) (param v128) (param v128)
+ (param v128) (param v128) (param v128) (param v128)
+ (param v128) (param v128)
+ (result v128 v128)))
+ (import "" "worker" (func $worker (type $t1)))
+ (memory (export "mem") 1 1)
+ (table funcref (elem $worker))
+ (func (export "run")
+ (i32.const 16)
+ (call_indirect (type $t1) (v128.const i32x4 1 1 1 1) (v128.const i32x4 2 2 2 2) (v128.const i32x4 3 3 3 3)
+ (v128.const i32x4 4 4 4 4) (v128.const i32x4 5 5 5 5) (v128.const i32x4 6 6 6 6)
+ (v128.const i32x4 7 7 7 7) (v128.const i32x4 8 8 8 8) (v128.const i32x4 9 9 9 9)
+ (v128.const i32x4 10 10 10 10) (v128.const i32x4 11 11 11 11) (v128.const i32x4 12 12 12 12)
+ (v128.const i32x4 13 13 13 13) (v128.const i32x4 14 14 14 14)
+ (i32.const 0))
+ drop
+ v128.store
+ (i32.const 0)
+ (call $worker (v128.const i32x4 1 1 1 1) (v128.const i32x4 2 2 2 2) (v128.const i32x4 3 3 3 3)
+ (v128.const i32x4 4 4 4 4) (v128.const i32x4 5 5 5 5) (v128.const i32x4 6 6 6 6)
+ (v128.const i32x4 7 7 7 7) (v128.const i32x4 8 8 8 8) (v128.const i32x4 9 9 9 9)
+ (v128.const i32x4 10 10 10 10) (v128.const i32x4 11 11 11 11) (v128.const i32x4 12 12 12 12)
+ (v128.const i32x4 13 13 13 13) (v128.const i32x4 14 14 14 14))
+ drop
+ v128.store))`,
+ {"": exportWithStackArgs.exports});
+
+var mem = new Int32Array(importWithStackArgs.exports.mem.buffer);
+importWithStackArgs.exports.run();
+assertSame(get(mem, 0, 4), [17, 17, 17, 17]);
+assertSame(get(mem, 4, 4), [17, 17, 17, 17]);
+
+// Imports and exports of v128 globals
+
+var insexporter = wasmEvalText(`
+ (module
+ (global (export "myglobal") (mut v128) (v128.const i8x16 ${iota(16).join(' ')})))`);
+
+var insimporter = wasmEvalText(`
+ (module
+ (import "m" "myglobal" (global $g (mut v128)))
+ (memory (export "mem") 1 1)
+ (func (export "run") (param $dest i32)
+ (v128.store (local.get $dest) (global.get $g))))`,
+ {m:insexporter.exports});
+
+var mem = new Uint8Array(insimporter.exports.mem.buffer);
+insimporter.exports.run(16);
+assertSame(get(mem, 16, 16), iota(16));
+
+// Guards on accessing v128 globals from JS
+
+assertErrorMessage(() => insexporter.exports.myglobal.value = 0,
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+
+assertErrorMessage(function () { let v = insexporter.exports.myglobal.value },
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+
+// Multi-value cases + v128 parameters to if, block, loop
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func $mvreturn (result v128 v128 v128)
+ (v128.load (i32.const 16))
+ (v128.load (i32.const 0))
+ (v128.load (i32.const 32)))
+ (func (export "runreturn")
+ i32.const 48
+ (call $mvreturn)
+ i32x4.sub ;; [-20, -20, -20, -20]
+ i32x4.sub ;; [31, 32, 33, 34]
+ v128.store)
+ (func (export "runif") (param $cond i32)
+ i32.const 48
+ (v128.load (i32.const 0))
+ (v128.load (i32.const 16))
+ (if (param v128) (param v128) (result v128 v128)
+ (local.get $cond)
+ (then i32x4.add
+ (v128.load (i32.const 32)))
+ (else i32x4.sub
+ (v128.load (i32.const 0))))
+ i32x4.add
+ v128.store)
+ (func (export "runblock")
+ i32.const 48
+ (v128.load (i32.const 0))
+ (v128.load (i32.const 16))
+ (block (param v128 v128) (result v128 v128)
+ i32x4.add
+ (v128.load (i32.const 32)))
+ i32x4.add
+ v128.store)
+ (func (export "runloop") (param $count i32)
+ i32.const 48
+ (v128.load (i32.const 0))
+ (v128.load (i32.const 16))
+ (block $B (param v128 v128) (result v128 v128)
+ (loop $L (param v128 v128) (result v128 v128)
+ i32x4.add
+ (v128.load (i32.const 32))
+ (local.set $count (i32.sub (local.get $count) (i32.const 1)))
+ (br_if $B (i32.eqz (local.get $count)))
+ (br $L)))
+ i32x4.add
+ v128.store))`);
+
+var mem = new Int32Array(ins.exports.mem.buffer);
+set(mem, 0, [1, 2, 3, 4]);
+set(mem, 4, [11, 12, 13, 14]);
+set(mem, 8, [21, 22, 23, 24]);
+
+// Multi-value returns
+
+ins.exports.runreturn();
+assertSame(get(mem, 12, 4), [31, 32, 33, 34]);
+
+// Multi-parameters to and multi-returns from "if"
+
+// This should be vector@0 + vector@16 + vector@32
+ins.exports.runif(1);
+assertSame(get(mem, 12, 4),
+ [33, 36, 39, 42]);
+
+// This should be vector@0 - vector@16 + vector@0
+ins.exports.runif(0);
+assertSame(get(mem, 12, 4),
+ [-9, -8, -7, -6]);
+
+// This should be vector@0 + vector@16 + vector@32
+ins.exports.runblock();
+assertSame(get(mem, 12, 4),
+ [33, 36, 39, 42]);
+
+// This should be vector@0 + vector@16 + N * vector@32 where
+// N is the parameter to runloop.
+ins.exports.runloop(3);
+assertSame(get(mem, 12, 4),
+ [12+3*21, 14+3*22, 16+3*23, 18+3*24]);
diff --git a/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js
new file mode 100644
index 0000000000..af8269e190
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js
@@ -0,0 +1,584 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || !isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves for various SIMD conversion
+// operations. See README-codegen.md for general information about this type of
+// test case.
+
+// Note, these tests test the beginning of the output but not the end.
+
+// Currently AVX2 exhibits a defect when function uses its first v128 arg and
+// returns v128: the register allocator adds unneeded extra moves from xmm0,
+// then into different temporary, and then the latter temporary is used as arg.
+// In the tests below, to simplify things, don't use/ignore the first arg.
+// v128 OP v128 -> v128
+// inputs: [[complete-opname, expected-pattern], ...]
+function codegenTestX64_v128xv128_v128_avxhack(inputs, options = {}) {
+ for ( let [op, expected] of inputs ) {
+ codegenTestX64_adhoc(wrap(options, `
+ (func (export "f") (param v128 v128 v128) (result v128)
+ (${op} (local.get 1) (local.get 2)))`),
+ 'f',
+ expected,
+ options);
+ }
+}
+// (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect)
+// v128 OP const -> v128
+// inputs: [[complete-opname, const, expected-pattern], ...]
+function codegenTestX64_v128xLITERAL_v128_avxhack(inputs, options = {}) {
+ for ( let [op, const_, expected] of inputs ) {
+ codegenTestX64_adhoc(wrap(options, `
+ (func (export "f") (param v128 v128) (result v128)
+ (${op} (local.get 1) ${const_}))`),
+ 'f',
+ expected,
+ options);
+ }
+}
+// (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect)
+// const OP v128 -> v128
+// inputs: [[complete-opname, const, expected-pattern], ...]
+function codegenTestX64_LITERALxv128_v128_avxhack(inputs, options = {}) {
+ for ( let [op, const_, expected] of inputs ) {
+ codegenTestX64_adhoc(wrap(options, `
+ (func (export "f") (param v128 v128) (result v128)
+ (${op} ${const_} (local.get 1)))`),
+ 'f',
+ expected,
+ options);
+ }
+}
+
+// Utility function to test SIMD operations encoding, where the input argument
+// has the specified type (T).
+// inputs: [[type, complete-opname, expected-pattern], ...]
+function codegenTestX64_T_v128_avxhack(inputs, options = {}) {
+ for ( let [ty, op, expected] of inputs ) {
+ codegenTestX64_adhoc(wrap(options, `
+ (func (export "f") (param ${ty}) (result v128)
+ (${op} (local.get 0)))`),
+ 'f',
+ expected,
+ options);
+ }
+}
+
+// Machers for any 64- and 32-bit registers.
+var GPR_I64 = "%r\\w+";
+var GPR_I32 = "%(?:e\\w+|r\\d+d)";
+
+// Simple binary ops: e.g. add, sub, mul
+codegenTestX64_v128xv128_v128_avxhack(
+ [['i8x16.avgr_u', `c5 f1 e0 c2 vpavgb %xmm2, %xmm1, %xmm0`],
+ ['i16x8.avgr_u', `c5 f1 e3 c2 vpavgw %xmm2, %xmm1, %xmm0`],
+ ['i8x16.add', `c5 f1 fc c2 vpaddb %xmm2, %xmm1, %xmm0`],
+ ['i8x16.add_sat_s', `c5 f1 ec c2 vpaddsb %xmm2, %xmm1, %xmm0`],
+ ['i8x16.add_sat_u', `c5 f1 dc c2 vpaddusb %xmm2, %xmm1, %xmm0`],
+ ['i8x16.sub', `c5 f1 f8 c2 vpsubb %xmm2, %xmm1, %xmm0`],
+ ['i8x16.sub_sat_s', `c5 f1 e8 c2 vpsubsb %xmm2, %xmm1, %xmm0`],
+ ['i8x16.sub_sat_u', `c5 f1 d8 c2 vpsubusb %xmm2, %xmm1, %xmm0`],
+ ['i16x8.mul', `c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0`],
+ ['i16x8.min_s', `c5 f1 ea c2 vpminsw %xmm2, %xmm1, %xmm0`],
+ ['i16x8.min_u', `c4 e2 71 3a c2 vpminuw %xmm2, %xmm1, %xmm0`],
+ ['i16x8.max_s', `c5 f1 ee c2 vpmaxsw %xmm2, %xmm1, %xmm0`],
+ ['i16x8.max_u', `c4 e2 71 3e c2 vpmaxuw %xmm2, %xmm1, %xmm0`],
+ ['i32x4.add', `c5 f1 fe c2 vpaddd %xmm2, %xmm1, %xmm0`],
+ ['i32x4.sub', `c5 f1 fa c2 vpsubd %xmm2, %xmm1, %xmm0`],
+ ['i32x4.mul', `c4 e2 71 40 c2 vpmulld %xmm2, %xmm1, %xmm0`],
+ ['i32x4.min_s', `c4 e2 71 39 c2 vpminsd %xmm2, %xmm1, %xmm0`],
+ ['i32x4.min_u', `c4 e2 71 3b c2 vpminud %xmm2, %xmm1, %xmm0`],
+ ['i32x4.max_s', `c4 e2 71 3d c2 vpmaxsd %xmm2, %xmm1, %xmm0`],
+ ['i32x4.max_u', `c4 e2 71 3f c2 vpmaxud %xmm2, %xmm1, %xmm0`],
+ ['i64x2.add', `c5 f1 d4 c2 vpaddq %xmm2, %xmm1, %xmm0`],
+ ['i64x2.sub', `c5 f1 fb c2 vpsubq %xmm2, %xmm1, %xmm0`],
+ ['i64x2.mul', `
+c5 e1 73 d1 20 vpsrlq \\$0x20, %xmm1, %xmm3
+66 0f f4 da pmuludq %xmm2, %xmm3
+c5 81 73 d2 20 vpsrlq \\$0x20, %xmm2, %xmm15
+66 44 0f f4 f9 pmuludq %xmm1, %xmm15
+66 44 0f d4 fb paddq %xmm3, %xmm15
+66 41 0f 73 f7 20 psllq \\$0x20, %xmm15
+c5 f1 f4 c2 vpmuludq %xmm2, %xmm1, %xmm0
+66 41 0f d4 c7 paddq %xmm15, %xmm0`],
+ ['f32x4.add', `c5 f0 58 c2 vaddps %xmm2, %xmm1, %xmm0`],
+ ['f32x4.sub', `c5 f0 5c c2 vsubps %xmm2, %xmm1, %xmm0`],
+ ['f32x4.mul', `c5 f0 59 c2 vmulps %xmm2, %xmm1, %xmm0`],
+ ['f32x4.div', `c5 f0 5e c2 vdivps %xmm2, %xmm1, %xmm0`],
+ ['f64x2.add', `c5 f1 58 c2 vaddpd %xmm2, %xmm1, %xmm0`],
+ ['f64x2.sub', `c5 f1 5c c2 vsubpd %xmm2, %xmm1, %xmm0`],
+ ['f64x2.mul', `c5 f1 59 c2 vmulpd %xmm2, %xmm1, %xmm0`],
+ ['f64x2.div', `c5 f1 5e c2 vdivpd %xmm2, %xmm1, %xmm0`],
+ ['i8x16.narrow_i16x8_s', `c5 f1 63 c2 vpacksswb %xmm2, %xmm1, %xmm0`],
+ ['i8x16.narrow_i16x8_u', `c5 f1 67 c2 vpackuswb %xmm2, %xmm1, %xmm0`],
+ ['i16x8.narrow_i32x4_s', `c5 f1 6b c2 vpackssdw %xmm2, %xmm1, %xmm0`],
+ ['i16x8.narrow_i32x4_u', `c4 e2 71 2b c2 vpackusdw %xmm2, %xmm1, %xmm0`],
+ ['i32x4.dot_i16x8_s', `c5 f1 f5 c2 vpmaddwd %xmm2, %xmm1, %xmm0`]]);
+
+// Simple comparison ops
+codegenTestX64_v128xv128_v128_avxhack(
+ [['i8x16.eq', `c5 f1 74 c2 vpcmpeqb %xmm2, %xmm1, %xmm0`],
+ ['i8x16.ne', `
+c5 f1 74 c2 vpcmpeqb %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i8x16.lt_s', `c5 e9 64 c1 vpcmpgtb %xmm1, %xmm2, %xmm0`],
+ ['i8x16.gt_u', `
+c5 f1 de c2 vpmaxub %xmm2, %xmm1, %xmm0
+66 0f 74 c2 pcmpeqb %xmm2, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i16x8.eq', `c5 f1 75 c2 vpcmpeqw %xmm2, %xmm1, %xmm0`],
+ ['i16x8.ne', `
+c5 f1 75 c2 vpcmpeqw %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i16x8.le_s', `
+c5 f1 65 c2 vpcmpgtw %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i16x8.ge_u', `
+c4 e2 71 3a c2 vpminuw %xmm2, %xmm1, %xmm0
+66 0f 75 c2 pcmpeqw %xmm2, %xmm0`],
+ ['i32x4.eq', `c5 f1 76 c2 vpcmpeqd %xmm2, %xmm1, %xmm0`],
+ ['i32x4.ne', `
+c5 f1 76 c2 vpcmpeqd %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i32x4.lt_s', `c5 e9 66 c1 vpcmpgtd %xmm1, %xmm2, %xmm0`],
+ ['i32x4.gt_u', `
+c4 e2 71 3f c2 vpmaxud %xmm2, %xmm1, %xmm0
+66 0f 76 c2 pcmpeqd %xmm2, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i64x2.eq', `c4 e2 71 29 c2 vpcmpeqq %xmm2, %xmm1, %xmm0`],
+ ['i64x2.ne', `
+c4 e2 71 29 c2 vpcmpeqq %xmm2, %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i64x2.lt_s', `c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0`],
+ ['i64x2.ge_s', `
+c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['f32x4.eq', `c5 f0 c2 c2 00 vcmpps \\$0x00, %xmm2, %xmm1, %xmm0`],
+ ['f32x4.lt', `c5 f0 c2 c2 01 vcmpps \\$0x01, %xmm2, %xmm1, %xmm0`],
+ ['f32x4.ge', `c5 e8 c2 c1 02 vcmpps \\$0x02, %xmm1, %xmm2, %xmm0`],
+ ['f64x2.eq', `c5 f1 c2 c2 00 vcmppd \\$0x00, %xmm2, %xmm1, %xmm0`],
+ ['f64x2.lt', `c5 f1 c2 c2 01 vcmppd \\$0x01, %xmm2, %xmm1, %xmm0`],
+ ['f64x2.ge', `c5 e9 c2 c1 02 vcmppd \\$0x02, %xmm1, %xmm2, %xmm0`],
+ ['f32x4.pmin', `c5 e8 5d c1 vminps %xmm1, %xmm2, %xmm0`],
+ ['f32x4.pmax', `c5 e8 5f c1 vmaxps %xmm1, %xmm2, %xmm0`],
+ ['f64x2.pmin', `c5 e9 5d c1 vminpd %xmm1, %xmm2, %xmm0`],
+ ['f64x2.pmax', `c5 e9 5f c1 vmaxpd %xmm1, %xmm2, %xmm0`],
+ ['i8x16.swizzle', `
+c5 69 dc 3d ${RIPRADDR} vpaddusbx ${RIPR}, %xmm2, %xmm15
+c4 c2 71 00 c7 vpshufb %xmm15, %xmm1, %xmm0`],
+ ['i16x8.extmul_high_i8x16_s', `
+66 44 0f 3a 0f fa 08 palignr \\$0x08, %xmm2, %xmm15
+c4 42 79 20 ff vpmovsxbw %xmm15, %xmm15
+66 0f 3a 0f c1 08 palignr \\$0x08, %xmm1, %xmm0
+c4 e2 79 20 c0 vpmovsxbw %xmm0, %xmm0
+66 41 0f d5 c7 pmullw %xmm15, %xmm0`],
+ ['i32x4.extmul_low_i16x8_u', `
+c5 71 e4 fa vpmulhuw %xmm2, %xmm1, %xmm15
+c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0
+66 41 0f 61 c7 punpcklwd %xmm15, %xmm0`],
+ ['i64x2.extmul_low_i32x4_s', `
+c5 79 70 f9 10 vpshufd \\$0x10, %xmm1, %xmm15
+c5 f9 70 c2 10 vpshufd \\$0x10, %xmm2, %xmm0
+66 41 0f 38 28 c7 pmuldq %xmm15, %xmm0`],
+ ['i16x8.q15mulr_sat_s', `
+c4 e2 71 0b c2 vpmulhrsw %xmm2, %xmm1, %xmm0
+c5 79 75 3d ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm0, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+]);
+
+// Bitwise binary ops
+codegenTestX64_v128xv128_v128_avxhack(
+ [['v128.and', `c5 f1 db c2 vpand %xmm2, %xmm1, %xmm0`],
+ ['v128.andnot', `c5 e9 df c1 vpandn %xmm1, %xmm2, %xmm0`],
+ ['v128.or', `c5 f1 eb c2 vpor %xmm2, %xmm1, %xmm0`],
+ ['v128.xor', `c5 f1 ef c2 vpxor %xmm2, %xmm1, %xmm0`]]);
+
+
+// Replace lane ops.
+codegenTestX64_adhoc(`(module
+ (func (export "f") (param v128 v128 i32) (result v128)
+ (i8x16.replace_lane 7 (local.get 1) (local.get 2))))`, 'f', `
+c4 .. 71 20 .. 07 vpinsrb \\$0x07, ${GPR_I32}, %xmm1, %xmm0`);
+codegenTestX64_adhoc(`(module
+ (func (export "f") (param v128 v128 i32) (result v128)
+ (i16x8.replace_lane 3 (local.get 1) (local.get 2))))`, 'f', `
+(?:c4 .. 71|c5 f1) c4 .. 03 vpinsrw \\$0x03, ${GPR_I32}, %xmm1, %xmm0`);
+codegenTestX64_adhoc(`(module
+ (func (export "f") (param v128 v128 i32) (result v128)
+ (i32x4.replace_lane 2 (local.get 1) (local.get 2))))`, 'f', `
+c4 .. 71 22 .. 02 vpinsrd \\$0x02, ${GPR_I32}, %xmm1, %xmm0`);
+codegenTestX64_adhoc(`(module
+ (func (export "f") (param v128 v128 i64) (result v128)
+ (i64x2.replace_lane 1 (local.get 1) (local.get 2))))`, 'f', `
+c4 .. f1 22 .. 01 vpinsrq \\$0x01, ${GPR_I64}, %xmm1, %xmm0`);
+
+
+if (isAvxPresent(2)) {
+ codegenTestX64_T_v128_avxhack(
+ [['i32', 'i8x16.splat', `
+c5 f9 6e .. vmovd ${GPR_I32}, %xmm0
+c4 e2 79 78 c0 vpbroadcastb %xmm0, %xmm0`],
+ ['i32', 'i16x8.splat', `
+c5 f9 6e .. vmovd ${GPR_I32}, %xmm0
+c4 e2 79 79 c0 vpbroadcastw %xmm0, %xmm0`],
+ ['i32', 'i32x4.splat', `
+c5 f9 6e .. vmovd ${GPR_I32}, %xmm0
+c4 e2 79 58 c0 vpbroadcastd %xmm0, %xmm0`],
+ ['i64', 'i64x2.splat', `
+c4 e1 f9 6e .. vmovq ${GPR_I64}, %xmm0
+c4 e2 79 59 c0 vpbroadcastq %xmm0, %xmm0`],
+ ['f32', 'f32x4.splat', `c4 e2 79 18 c0 vbroadcastss %xmm0, %xmm0`]], {log:true});
+
+ codegenTestX64_T_v128_avxhack(
+ [['i32', 'v128.load8_splat',
+ 'c4 c2 79 78 04 .. vpbroadcastbb \\(%r15,%r\\w+,1\\), %xmm0'],
+ ['i32', 'v128.load16_splat',
+ 'c4 c2 79 79 04 .. vpbroadcastww \\(%r15,%r\\w+,1\\), %xmm0'],
+ ['i32', 'v128.load32_splat',
+ 'c4 c2 79 18 04 .. vbroadcastssl \\(%r15,%r\\w+,1\\), %xmm0']], {memory: 1});
+}
+
+// Using VEX during shuffle ops
+codegenTestX64_v128xv128_v128_avxhack([
+ // Identity op on second argument should generate a move
+ ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15',
+ 'c5 f9 6f c1 vmovdqa %xmm1, %xmm0'],
+
+ // Broadcast a byte from first argument
+ ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5',
+ `
+c5 f1 60 c1 vpunpcklbw %xmm1, %xmm1, %xmm0
+c5 fa 70 c0 55 vpshufhw \\$0x55, %xmm0, %xmm0
+c5 f9 70 c0 aa vpshufd \\$0xAA, %xmm0, %xmm0`],
+
+ // Broadcast a word from first argument
+ ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5',
+ `
+c5 fb 70 c1 aa vpshuflw \\$0xAA, %xmm1, %xmm0
+c5 f9 70 c0 00 vpshufd \\$0x00, %xmm0, %xmm0`],
+
+ // Permute words
+ ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13',
+`
+c5 fb 70 c1 b1 vpshuflw \\$0xB1, %xmm1, %xmm0
+c5 fa 70 c0 b1 vpshufhw \\$0xB1, %xmm0, %xmm0`],
+
+ // Permute doublewords
+ ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11',
+ 'c5 f9 70 c1 b1 vpshufd \\$0xB1, %xmm1, %xmm0'],
+
+ // Interleave doublewords
+ ['i8x16.shuffle 0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23',
+ 'c5 f1 62 c2 vpunpckldq %xmm2, %xmm1, %xmm0'],
+
+ // Interleave quadwords
+ ['i8x16.shuffle 24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15',
+ 'c5 e9 6d c1 vpunpckhqdq %xmm1, %xmm2, %xmm0'],
+
+ // Rotate right
+ ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12',
+ `c4 e3 71 0f c1 0d vpalignr \\$0x0D, %xmm1, %xmm1, %xmm0`],
+ ['i8x16.shuffle 28 29 30 31 0 1 2 3 4 5 6 7 8 9 10 11',
+ `c4 e3 71 0f c2 0c vpalignr \\$0x0C, %xmm2, %xmm1, %xmm0`]]);
+
+if (isAvxPresent(2)) {
+ codegenTestX64_v128xv128_v128_avxhack([
+ // Broadcast low byte from second argument
+ ['i8x16.shuffle 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
+ 'c4 e2 79 78 c1 vpbroadcastb %xmm1, %xmm0'],
+
+ // Broadcast low word from third argument
+ ['i8x16.shuffle 16 17 16 17 16 17 16 17 16 17 16 17 16 17 16 17',
+ 'c4 e2 79 79 c2 vpbroadcastw %xmm2, %xmm0'],
+
+ // Broadcast low doubleword from second argument
+ ['i8x16.shuffle 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3',
+ 'c4 e2 79 58 c1 vpbroadcastd %xmm1, %xmm0']]);
+}
+
+// Testing AVX optimization where VPBLENDVB accepts four XMM registers as args.
+codegenTestX64_adhoc(
+ `(func (export "f") (param v128 v128 v128 v128) (result v128)
+ (i8x16.shuffle 0 17 2 3 4 5 6 7 24 25 26 11 12 13 30 15
+ (local.get 2)(local.get 3)))`,
+ 'f',
+`
+66 0f 6f 0d ${RIPRADDR} movdqax ${RIPR}, %xmm1
+c4 e3 69 4c c3 10 vpblendvb %xmm1, %xmm3, %xmm2, %xmm0`);
+
+// Constant arguments that are folded into the instruction
+codegenTestX64_v128xLITERAL_v128_avxhack(
+ [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 fc 05 ${RIPRADDR} vpaddbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 f8 05 ${RIPRADDR} vpsubbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 ec 05 ${RIPRADDR} vpaddsbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 dc 05 ${RIPRADDR} vpaddusbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 e8 05 ${RIPRADDR} vpsubsbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 d8 05 ${RIPRADDR} vpsubusbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 38 05 ${RIPRADDR} vpminsbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 da 05 ${RIPRADDR} vpminubx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 3c 05 ${RIPRADDR} vpmaxsbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 de 05 ${RIPRADDR} vpmaxubx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+ c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 64 05 ${RIPRADDR} vpcmpgtbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+ c5 f1 64 05 ${RIPRADDR} vpcmpgtbx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 63 05 ${RIPRADDR} vpacksswbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 67 05 ${RIPRADDR} vpackuswbx ${RIPR}, %xmm1, %xmm0`],
+
+ ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 fd 05 ${RIPRADDR} vpaddwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 f9 05 ${RIPRADDR} vpsubwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 d5 05 ${RIPRADDR} vpmullwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 ed 05 ${RIPRADDR} vpaddswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 dd 05 ${RIPRADDR} vpadduswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 e9 05 ${RIPRADDR} vpsubswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 d9 05 ${RIPRADDR} vpsubuswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 ea 05 ${RIPRADDR} vpminswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 3a 05 ${RIPRADDR} vpminuwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 ee 05 ${RIPRADDR} vpmaxswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 3e 05 ${RIPRADDR} vpmaxuwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+ c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 65 05 ${RIPRADDR} vpcmpgtwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+ c5 f1 65 05 ${RIPRADDR} vpcmpgtwx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 6b 05 ${RIPRADDR} vpackssdwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 2b 05 ${RIPRADDR} vpackusdwx ${RIPR}, %xmm1, %xmm0`],
+
+ ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
+ `c5 f1 fe 05 ${RIPRADDR} vpadddx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.sub', '(v128.const i32x4 1 2 1 2)',
+ `c5 f1 fa 05 ${RIPRADDR} vpsubdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 40 05 ${RIPRADDR} vpmulldx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 39 05 ${RIPRADDR} vpminsdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 3b 05 ${RIPRADDR} vpminudx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 3d 05 ${RIPRADDR} vpmaxsdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 3f 05 ${RIPRADDR} vpmaxudx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
+ `c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
+ c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)',
+ `c5 f1 66 05 ${RIPRADDR} vpcmpgtdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', `
+ c5 f1 66 05 ${RIPRADDR} vpcmpgtdx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
+ `c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
+
+ ['i64x2.add', '(v128.const i64x2 1 2)',
+ `c5 f1 d4 05 ${RIPRADDR} vpaddqx ${RIPR}, %xmm1, %xmm0`],
+ ['i64x2.sub', '(v128.const i64x2 1 2)',
+ `c5 f1 fb 05 ${RIPRADDR} vpsubqx ${RIPR}, %xmm1, %xmm0`],
+
+ ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0`],
+ ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 eb 05 ${RIPRADDR} vporx ${RIPR}, %xmm1, %xmm0`],
+ ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0`],
+
+ ['f32x4.add', '(v128.const f32x4 1 2 3 4)',
+ `c5 f0 58 05 ${RIPRADDR} vaddpsx ${RIPR}, %xmm1, %xmm0`],
+ ['f32x4.sub', '(v128.const f32x4 1 2 3 4)',
+ `c5 f0 5c 05 ${RIPRADDR} vsubpsx ${RIPR}, %xmm1, %xmm0`],
+ ['f32x4.mul', '(v128.const f32x4 1 2 3 4)',
+ `c5 f0 59 05 ${RIPRADDR} vmulpsx ${RIPR}, %xmm1, %xmm0`],
+ ['f32x4.div', '(v128.const f32x4 1 2 3 4)',
+ `c5 f0 5e 05 ${RIPRADDR} vdivpsx ${RIPR}, %xmm1, %xmm0`],
+
+ ['f64x2.add', '(v128.const f64x2 1 2)',
+ `c5 f1 58 05 ${RIPRADDR} vaddpdx ${RIPR}, %xmm1, %xmm0`],
+ ['f64x2.sub', '(v128.const f64x2 1 2)',
+ `c5 f1 5c 05 ${RIPRADDR} vsubpdx ${RIPR}, %xmm1, %xmm0`],
+ ['f64x2.mul', '(v128.const f64x2 1 2)',
+ `c5 f1 59 05 ${RIPRADDR} vmulpdx ${RIPR}, %xmm1, %xmm0`],
+ ['f64x2.div', '(v128.const f64x2 1 2)',
+ `c5 f1 5e 05 ${RIPRADDR} vdivpdx ${RIPR}, %xmm1, %xmm0`],
+
+ ['f32x4.eq', '(v128.const f32x4 1 2 3 4)',
+ `c5 f0 c2 05 ${RIPRADDR} 00 vcmppsx \\$0x00, ${RIPR}, %xmm1, %xmm0`],
+ ['f32x4.ne', '(v128.const f32x4 1 2 3 4)',
+ `c5 f0 c2 05 ${RIPRADDR} 04 vcmppsx \\$0x04, ${RIPR}, %xmm1, %xmm0`],
+ ['f32x4.lt', '(v128.const f32x4 1 2 3 4)',
+ `c5 f0 c2 05 ${RIPRADDR} 01 vcmppsx \\$0x01, ${RIPR}, %xmm1, %xmm0`],
+ ['f32x4.le', '(v128.const f32x4 1 2 3 4)',
+ `c5 f0 c2 05 ${RIPRADDR} 02 vcmppsx \\$0x02, ${RIPR}, %xmm1, %xmm0`],
+
+ ['f64x2.eq', '(v128.const f64x2 1 2)',
+ `c5 f1 c2 05 ${RIPRADDR} 00 vcmppdx \\$0x00, ${RIPR}, %xmm1, %xmm0`],
+ ['f64x2.ne', '(v128.const f64x2 1 2)',
+ `c5 f1 c2 05 ${RIPRADDR} 04 vcmppdx \\$0x04, ${RIPR}, %xmm1, %xmm0`],
+ ['f64x2.lt', '(v128.const f64x2 1 2)',
+ `c5 f1 c2 05 ${RIPRADDR} 01 vcmppdx \\$0x01, ${RIPR}, %xmm1, %xmm0`],
+ ['f64x2.le', '(v128.const f64x2 1 2)',
+ `c5 f1 c2 05 ${RIPRADDR} 02 vcmppdx \\$0x02, ${RIPR}, %xmm1, %xmm0`]]);
+
+ // Commutative operations with constants on the lhs should generate the same
+ // code as with the constant on the rhs.
+ codegenTestX64_LITERALxv128_v128_avxhack(
+ [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 fc 05 ${RIPRADDR} vpaddbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 ec 05 ${RIPRADDR} vpaddsbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 dc 05 ${RIPRADDR} vpaddusbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 38 05 ${RIPRADDR} vpminsbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 da 05 ${RIPRADDR} vpminubx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 3c 05 ${RIPRADDR} vpmaxsbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 de 05 ${RIPRADDR} vpmaxubx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0`],
+ ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+ c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+
+ ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 fd 05 ${RIPRADDR} vpaddwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 d5 05 ${RIPRADDR} vpmullwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 ed 05 ${RIPRADDR} vpaddswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 dd 05 ${RIPRADDR} vpadduswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 ea 05 ${RIPRADDR} vpminswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 3a 05 ${RIPRADDR} vpminuwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 ee 05 ${RIPRADDR} vpmaxswx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c4 e2 71 3e 05 ${RIPRADDR} vpmaxuwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0`],
+ ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+ c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+
+ ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
+ `c5 f1 fe 05 ${RIPRADDR} vpadddx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 40 05 ${RIPRADDR} vpmulldx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 39 05 ${RIPRADDR} vpminsdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 3b 05 ${RIPRADDR} vpminudx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 3d 05 ${RIPRADDR} vpmaxsdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
+ `c4 e2 71 3f 05 ${RIPRADDR} vpmaxudx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
+ `c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
+ c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0
+ 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+ 66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
+ `c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
+
+ ['i64x2.add', '(v128.const i64x2 1 2)',
+ `c5 f1 d4 05 ${RIPRADDR} vpaddqx ${RIPR}, %xmm1, %xmm0`],
+
+ ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0`],
+ ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 eb 05 ${RIPRADDR} vporx ${RIPR}, %xmm1, %xmm0`],
+ ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0`]]);
+
+// Shift by constant encodings
+codegenTestX64_v128xLITERAL_v128_avxhack(
+ [['i8x16.shl', '(i32.const 2)', `
+c5 f1 fc c1 vpaddb %xmm1, %xmm1, %xmm0
+66 0f fc c0 paddb %xmm0, %xmm0`],
+ ['i8x16.shl', '(i32.const 4)', `
+c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0
+66 0f 71 f0 04 psllw \\$0x04, %xmm0`],
+ ['i16x8.shl', '(i32.const 1)',
+ 'c5 f9 71 f1 01 vpsllw \\$0x01, %xmm1, %xmm0'],
+ ['i16x8.shr_s', '(i32.const 3)',
+ 'c5 f9 71 e1 03 vpsraw \\$0x03, %xmm1, %xmm0'],
+ ['i16x8.shr_u', '(i32.const 2)',
+ 'c5 f9 71 d1 02 vpsrlw \\$0x02, %xmm1, %xmm0'],
+ ['i32x4.shl', '(i32.const 5)',
+ 'c5 f9 72 f1 05 vpslld \\$0x05, %xmm1, %xmm0'],
+ ['i32x4.shr_s', '(i32.const 2)',
+ 'c5 f9 72 e1 02 vpsrad \\$0x02, %xmm1, %xmm0'],
+ ['i32x4.shr_u', '(i32.const 5)',
+ 'c5 f9 72 d1 05 vpsrld \\$0x05, %xmm1, %xmm0'],
+ ['i64x2.shr_s', '(i32.const 7)', `
+c5 79 70 f9 f5 vpshufd \\$0xF5, %xmm1, %xmm15
+66 41 0f 72 e7 1f psrad \\$0x1F, %xmm15
+c4 c1 71 ef c7 vpxor %xmm15, %xmm1, %xmm0
+66 0f 73 d0 07 psrlq \\$0x07, %xmm0
+66 41 0f ef c7 pxor %xmm15, %xmm0`]]);
+
+// vpblendvp optimization when bitselect follows comparison.
+codegenTestX64_adhoc(
+ `(module
+ (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128)
+ (v128.bitselect (local.get 2) (local.get 3)
+ (i32x4.eq (local.get 0) (local.get 1)))))`,
+ 'f', `
+66 0f 76 c1 pcmpeqd %xmm1, %xmm0
+c4 e3 61 4c c2 00 vpblendvb %xmm0, %xmm2, %xmm3, %xmm0`);
diff --git a/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js b/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js
new file mode 100644
index 0000000000..da1fb68e6b
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js
@@ -0,0 +1,111 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Bug 1636235: assorted corner case baseline SIMD bugs.
+
+function get(arr, loc, len) {
+ let res = [];
+ for ( let i=0; i < len; i++ ) {
+ res.push(arr[loc+i]);
+ }
+ return res;
+}
+
+// Pass v128 along a control flow edge in br_table
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "run") (param $k i32)
+ (v128.store (i32.const 0) (call $f (local.get $k))))
+ (func $f (param $k i32) (result v128)
+ (block $B2 (result v128)
+ (block $B1 (result v128)
+ (v128.const i32x4 1 2 3 4)
+ (br_table $B1 $B2 (local.get $k)))
+ (drop)
+ (v128.const i32x4 5 6 7 8))))`);
+
+var mem = new Int32Array(ins.exports.mem.buffer);
+ins.exports.run(0);
+assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]);
+
+ins.exports.run(1);
+assertDeepEq(get(mem, 0, 4), [1, 2, 3, 4]);
+
+// Materialize a ConstV128 off the value stack in popStackResults (also: check
+// that br passing v128 values works as it should).
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+
+ (func (export "run") (param $k i32)
+ (local $t0 v128) (local $t1 v128) (local $t2 v128)
+ (call $f (local.get $k))
+ (local.set $t2)
+ (local.set $t1)
+ (local.set $t0)
+ (v128.store (i32.const 32) (local.get $t2))
+ (v128.store (i32.const 16) (local.get $t1))
+ (v128.store (i32.const 0) (local.get $t0)))
+
+ (func $f (param $k i32) (result v128 v128 v128)
+ (block $B2 (result v128 v128 v128)
+ (if (local.get $k)
+ (br $B2 (v128.const i32x4 5 6 7 8)
+ (v128.const i32x4 9 10 11 12)
+ (v128.const i32x4 13 14 15 16))
+ (br $B2 (v128.const i32x4 -5 -6 -7 -8)
+ (v128.const i32x4 -9 -10 -11 -12)
+ (v128.const i32x4 -13 -14 -15 -16)))
+ (unreachable))))`);
+
+var mem = new Int32Array(ins.exports.mem.buffer);
+ins.exports.run(0);
+assertDeepEq(get(mem, 0, 4), [-5, -6, -7, -8]);
+assertDeepEq(get(mem, 4, 4), [-9, -10, -11, -12]);
+assertDeepEq(get(mem, 8, 4), [-13, -14, -15, -16]);
+
+ins.exports.run(1);
+assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]);
+assertDeepEq(get(mem, 4, 4), [9, 10, 11, 12]);
+assertDeepEq(get(mem, 8, 4), [13, 14, 15, 16]);
+
+// Check that br_if passing v128 values works as it should.
+
+var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+
+ (func (export "run") (param $k i32)
+ (local $t0 v128) (local $t1 v128) (local $t2 v128)
+ (call $f (local.get $k))
+ (local.set $t2)
+ (local.set $t1)
+ (local.set $t0)
+ (v128.store (i32.const 32) (local.get $t2))
+ (v128.store (i32.const 16) (local.get $t1))
+ (v128.store (i32.const 0) (local.get $t0)))
+
+ (func $f (param $k i32) (result v128 v128 v128)
+ (block $B2 (result v128 v128 v128)
+ (v128.const i32x4 5 6 7 8)
+ (v128.const i32x4 9 10 11 12)
+ (v128.const i32x4 13 14 15 16)
+ (br_if $B2 (local.get $k))
+ drop drop drop
+ (v128.const i32x4 -5 -6 -7 -8)
+ (v128.const i32x4 -9 -10 -11 -12)
+ (v128.const i32x4 -13 -14 -15 -16))))`);
+
+var mem = new Int32Array(ins.exports.mem.buffer);
+ins.exports.run(0);
+assertDeepEq(get(mem, 0, 4), [-5, -6, -7, -8]);
+assertDeepEq(get(mem, 4, 4), [-9, -10, -11, -12]);
+assertDeepEq(get(mem, 8, 4), [-13, -14, -15, -16]);
+
+ins.exports.run(1);
+assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]);
+assertDeepEq(get(mem, 4, 4), [9, 10, 11, 12]);
+assertDeepEq(get(mem, 8, 4), [13, 14, 15, 16]);
+
diff --git a/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js
new file mode 100644
index 0000000000..17c15e22d5
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js
@@ -0,0 +1,255 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or fixups for sundry SIMD binary
+// operations. See README-codegen.md for general information about this type of
+// test case.
+
+// Inputs (xmm0, xmm1)
+
+codegenTestX64_v128xPTYPE_v128(
+ [['f32x4.replace_lane 0', 'f32', `f3 0f 10 c1 movss %xmm1, %xmm0`],
+ ['f32x4.replace_lane 1', 'f32', `66 0f 3a 21 c1 10 insertps \\$0x10, %xmm1, %xmm0`],
+ ['f32x4.replace_lane 3', 'f32', `66 0f 3a 21 c1 30 insertps \\$0x30, %xmm1, %xmm0`],
+ ['f64x2.replace_lane 0', 'f64', `f2 0f 10 c1 movsd %xmm1, %xmm0`],
+ ['f64x2.replace_lane 1', 'f64', `66 0f c6 c1 00 shufpd \\$0x00, %xmm1, %xmm0`]] );
+
+// Inputs (xmm1, xmm0)
+
+codegenTestX64_v128xv128_v128_reversed(
+ [['f32x4.pmin', `0f 5d c1 minps %xmm1, %xmm0`],
+ ['f32x4.pmax', `0f 5f c1 maxps %xmm1, %xmm0`],
+ ['f64x2.pmin', `66 0f 5d c1 minpd %xmm1, %xmm0`],
+ ['f64x2.pmax', `66 0f 5f c1 maxpd %xmm1, %xmm0`]] );
+
+// Constant arguments that are folded into the instruction
+
+codegenTestX64_v128xLITERAL_v128(
+ [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f fc 05 ${RIPRADDR} paddbx ${RIPR}, %xmm0`],
+ ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f f8 05 ${RIPRADDR} psubbx ${RIPR}, %xmm0`],
+ ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f ec 05 ${RIPRADDR} paddsbx ${RIPR}, %xmm0`],
+ ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f dc 05 ${RIPRADDR} paddusbx ${RIPR}, %xmm0`],
+ ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f e8 05 ${RIPRADDR} psubsbx ${RIPR}, %xmm0`],
+ ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f d8 05 ${RIPRADDR} psubusbx ${RIPR}, %xmm0`],
+ ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 38 38 05 ${RIPRADDR} pminsbx ${RIPR}, %xmm0`],
+ ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f da 05 ${RIPRADDR} pminubx ${RIPR}, %xmm0`],
+ ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 38 3c 05 ${RIPRADDR} pmaxsbx ${RIPR}, %xmm0`],
+ ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f de 05 ${RIPRADDR} pmaxubx ${RIPR}, %xmm0`],
+ ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0`],
+ ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 64 05 ${RIPRADDR} pcmpgtbx ${RIPR}, %xmm0`],
+ ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+66 0f 64 05 ${RIPRADDR} pcmpgtbx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 63 05 ${RIPRADDR} packsswbx ${RIPR}, %xmm0`],
+ ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 67 05 ${RIPRADDR} packuswbx ${RIPR}, %xmm0`],
+
+ ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f fd 05 ${RIPRADDR} paddwx ${RIPR}, %xmm0`],
+ ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f f9 05 ${RIPRADDR} psubwx ${RIPR}, %xmm0`],
+ ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f d5 05 ${RIPRADDR} pmullwx ${RIPR}, %xmm0`],
+ ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f ed 05 ${RIPRADDR} paddswx ${RIPR}, %xmm0`],
+ ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f dd 05 ${RIPRADDR} padduswx ${RIPR}, %xmm0`],
+ ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f e9 05 ${RIPRADDR} psubswx ${RIPR}, %xmm0`],
+ ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f d9 05 ${RIPRADDR} psubuswx ${RIPR}, %xmm0`],
+ ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f ea 05 ${RIPRADDR} pminswx ${RIPR}, %xmm0`],
+ ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 38 3a 05 ${RIPRADDR} pminuwx ${RIPR}, %xmm0`],
+ ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f ee 05 ${RIPRADDR} pmaxswx ${RIPR}, %xmm0`],
+ ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 38 3e 05 ${RIPRADDR} pmaxuwx ${RIPR}, %xmm0`],
+ ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0`],
+ ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 65 05 ${RIPRADDR} pcmpgtwx ${RIPR}, %xmm0`],
+ ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+66 0f 65 05 ${RIPRADDR} pcmpgtwx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 6b 05 ${RIPRADDR} packssdwx ${RIPR}, %xmm0`],
+ ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 38 2b 05 ${RIPRADDR} packusdwx ${RIPR}, %xmm0`],
+
+ ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
+ `66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`],
+ ['i32x4.sub', '(v128.const i32x4 1 2 1 2)',
+ `66 0f fa 05 ${RIPRADDR} psubdx ${RIPR}, %xmm0`],
+ ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 40 05 ${RIPRADDR} pmulldx ${RIPR}, %xmm0`],
+ ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 39 05 ${RIPRADDR} pminsdx ${RIPR}, %xmm0`],
+ ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 3b 05 ${RIPRADDR} pminudx ${RIPR}, %xmm0`],
+ ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 3d 05 ${RIPRADDR} pmaxsdx ${RIPR}, %xmm0`],
+ ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 3f 05 ${RIPRADDR} pmaxudx ${RIPR}, %xmm0`],
+ ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0`],
+ ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
+66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 66 05 ${RIPRADDR} pcmpgtdx ${RIPR}, %xmm0`],
+ ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', `
+66 0f 66 05 ${RIPRADDR} pcmpgtdx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
+ `66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`],
+
+ ['i64x2.add', '(v128.const i64x2 1 2)',
+ `66 0f d4 05 ${RIPRADDR} paddqx ${RIPR}, %xmm0`],
+ ['i64x2.sub', '(v128.const i64x2 1 2)',
+ `66 0f fb 05 ${RIPRADDR} psubqx ${RIPR}, %xmm0`],
+
+ ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`],
+ ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f eb 05 ${RIPRADDR} porx ${RIPR}, %xmm0`],
+ ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`],
+
+ ['f32x4.add', '(v128.const f32x4 1 2 3 4)',
+ `0f 58 05 ${RIPRADDR} addpsx ${RIPR}, %xmm0`],
+ ['f32x4.sub', '(v128.const f32x4 1 2 3 4)',
+ `0f 5c 05 ${RIPRADDR} subpsx ${RIPR}, %xmm0`],
+ ['f32x4.mul', '(v128.const f32x4 1 2 3 4)',
+ `0f 59 05 ${RIPRADDR} mulpsx ${RIPR}, %xmm0`],
+ ['f32x4.div', '(v128.const f32x4 1 2 3 4)',
+ `0f 5e 05 ${RIPRADDR} divpsx ${RIPR}, %xmm0`],
+ ['f32x4.eq', '(v128.const f32x4 1 2 3 4)',
+ `0f c2 05 ${RIPRADDR} 00 cmppsx \\$0x00, ${RIPR}, %xmm0`],
+ ['f32x4.ne', '(v128.const f32x4 1 2 3 4)',
+ `0f c2 05 ${RIPRADDR} 04 cmppsx \\$0x04, ${RIPR}, %xmm0`],
+ ['f32x4.lt', '(v128.const f32x4 1 2 3 4)',
+ `0f c2 05 ${RIPRADDR} 01 cmppsx \\$0x01, ${RIPR}, %xmm0`],
+ ['f32x4.le', '(v128.const f32x4 1 2 3 4)',
+ `0f c2 05 ${RIPRADDR} 02 cmppsx \\$0x02, ${RIPR}, %xmm0`],
+
+ ['f64x2.add', '(v128.const f64x2 1 2)',
+ `66 0f 58 05 ${RIPRADDR} addpdx ${RIPR}, %xmm0`],
+ ['f64x2.sub', '(v128.const f64x2 1 2)',
+ `66 0f 5c 05 ${RIPRADDR} subpdx ${RIPR}, %xmm0`],
+ ['f64x2.mul', '(v128.const f64x2 1 2)',
+ `66 0f 59 05 ${RIPRADDR} mulpdx ${RIPR}, %xmm0`],
+ ['f64x2.div', '(v128.const f64x2 1 2)',
+ `66 0f 5e 05 ${RIPRADDR} divpdx ${RIPR}, %xmm0`],
+ ['f64x2.eq', '(v128.const f64x2 1 2)',
+ `66 0f c2 05 ${RIPRADDR} 00 cmppdx \\$0x00, ${RIPR}, %xmm0`],
+ ['f64x2.ne', '(v128.const f64x2 1 2)',
+ `66 0f c2 05 ${RIPRADDR} 04 cmppdx \\$0x04, ${RIPR}, %xmm0`],
+ ['f64x2.lt', '(v128.const f64x2 1 2)',
+ `66 0f c2 05 ${RIPRADDR} 01 cmppdx \\$0x01, ${RIPR}, %xmm0`],
+ ['f64x2.le', '(v128.const f64x2 1 2)',
+ `66 0f c2 05 ${RIPRADDR} 02 cmppdx \\$0x02, ${RIPR}, %xmm0`]]);
+
+// Commutative operations with constants on the lhs should generate the same
+// code as with the constant on the rhs.
+
+codegenTestX64_LITERALxv128_v128(
+ [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f fc 05 ${RIPRADDR} paddbx ${RIPR}, %xmm0`],
+ ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f ec 05 ${RIPRADDR} paddsbx ${RIPR}, %xmm0`],
+ ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f dc 05 ${RIPRADDR} paddusbx ${RIPR}, %xmm0`],
+ ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 38 38 05 ${RIPRADDR} pminsbx ${RIPR}, %xmm0`],
+ ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f da 05 ${RIPRADDR} pminubx ${RIPR}, %xmm0`],
+ ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 38 3c 05 ${RIPRADDR} pmaxsbx ${RIPR}, %xmm0`],
+ ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f de 05 ${RIPRADDR} pmaxubx ${RIPR}, %xmm0`],
+ ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0`],
+ ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', `
+66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+
+ ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f fd 05 ${RIPRADDR} paddwx ${RIPR}, %xmm0`],
+ ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f d5 05 ${RIPRADDR} pmullwx ${RIPR}, %xmm0`],
+ ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f ed 05 ${RIPRADDR} paddswx ${RIPR}, %xmm0`],
+ ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f dd 05 ${RIPRADDR} padduswx ${RIPR}, %xmm0`],
+ ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f ea 05 ${RIPRADDR} pminswx ${RIPR}, %xmm0`],
+ ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 38 3a 05 ${RIPRADDR} pminuwx ${RIPR}, %xmm0`],
+ ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f ee 05 ${RIPRADDR} pmaxswx ${RIPR}, %xmm0`],
+ ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 38 3e 05 ${RIPRADDR} pmaxuwx ${RIPR}, %xmm0`],
+ ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)',
+ `66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0`],
+ ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', `
+66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+
+ ['i32x4.add', '(v128.const i32x4 1 2 1 2)',
+ `66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`],
+ ['i32x4.mul', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 40 05 ${RIPRADDR} pmulldx ${RIPR}, %xmm0`],
+ ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 39 05 ${RIPRADDR} pminsdx ${RIPR}, %xmm0`],
+ ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 3b 05 ${RIPRADDR} pminudx ${RIPR}, %xmm0`],
+ ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 3d 05 ${RIPRADDR} pmaxsdx ${RIPR}, %xmm0`],
+ ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 38 3f 05 ${RIPRADDR} pmaxudx ${RIPR}, %xmm0`],
+ ['i32x4.eq', '(v128.const i32x4 1 2 1 2)',
+ `66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0`],
+ ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', `
+66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)',
+ `66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`],
+
+ ['i64x2.add', '(v128.const i64x2 1 2)',
+ `66 0f d4 05 ${RIPRADDR} paddqx ${RIPR}, %xmm0`],
+
+ ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`],
+ ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f eb 05 ${RIPRADDR} porx ${RIPR}, %xmm0`],
+ ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
+ `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`]]);
diff --git a/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js
new file mode 100644
index 0000000000..2cb5f2e969
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js
@@ -0,0 +1,20 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x86 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x86-test.js
+
+codegenTestX86_v128xLITERAL_v128(
+ [['f32x4.eq', '(v128.const f32x4 1 2 3 4)',
+ `0f c2 05 ${ABSADDR} 00 cmppsx \\$0x00, ${ABS}, %xmm0`],
+ ['f32x4.ne', '(v128.const f32x4 1 2 3 4)',
+ `0f c2 05 ${ABSADDR} 04 cmppsx \\$0x04, ${ABS}, %xmm0`],
+ ['f32x4.lt', '(v128.const f32x4 1 2 3 4)',
+ `0f c2 05 ${ABSADDR} 01 cmppsx \\$0x01, ${ABS}, %xmm0`],
+ ['f32x4.le', '(v128.const f32x4 1 2 3 4)',
+ `0f c2 05 ${ABSADDR} 02 cmppsx \\$0x02, ${ABS}, %xmm0`],
+
+ ['f64x2.eq', '(v128.const f64x2 1 2)',
+ `66 0f c2 05 ${ABSADDR} 00 cmppdx \\$0x00, ${ABS}, %xmm0`],
+ ['f64x2.ne', '(v128.const f64x2 1 2)',
+ `66 0f c2 05 ${ABSADDR} 04 cmppdx \\$0x04, ${ABS}, %xmm0`],
+ ['f64x2.lt', '(v128.const f64x2 1 2)',
+ `66 0f c2 05 ${ABSADDR} 01 cmppdx \\$0x01, ${ABS}, %xmm0`],
+ ['f64x2.le', '(v128.const f64x2 1 2)',
+ `66 0f c2 05 ${ABSADDR} 02 cmppdx \\$0x02, ${ABS}, %xmm0`]]);
diff --git a/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js
new file mode 100644
index 0000000000..62951bce62
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js
@@ -0,0 +1,45 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or fixups for SIMD bitselect
+// operations. See README-codegen.md for general information about this type of
+// test case.
+
+// The codegen enforces onTrue == output so we avoid a move to set that up.
+//
+// The remaining movdqa is currently unavoidable, it moves the control mask into a temp.
+// The temp should be identical to the mask but the regalloc does not currently
+// allow this constraint to be enforced.
+
+// Inputs (xmm0, xmm1, xmm2)
+
+codegenTestX64_adhoc(
+`(module
+ (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128)
+ (v128.bitselect (local.get 0) (local.get 1) (local.get 2))))`,
+ 'f',
+`66 0f 6f da movdqa %xmm2, %xmm3
+66 0f db c3 pand %xmm3, %xmm0
+66 0f df d9 pandn %xmm1, %xmm3
+66 0f eb c3 por %xmm3, %xmm0`);
+
+// Blend constant optimizations
+
+codegenTestX64_adhoc(
+ `(module
+ (func (export "f") (param v128) (param v128) (param v128) (result v128)
+ (v128.bitselect (local.get 0) (local.get 1) (v128.const i32x4 -1 0 0 -1))))`,
+ 'f',
+ `66 0f 3a 0e c1 c3 pblendw \\$0xC3, %xmm1, %xmm0`);
+
+// vpblendvp optimization when bitselect follows comparison.
+// Non-AVX pblendvb uses xmm0 as an implicit read-only operand.
+codegenTestX64_adhoc(
+ `(module
+ (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128)
+ (v128.bitselect (local.get 2) (local.get 3)
+ (i32x4.eq (local.get 0) (local.get 1)))))`,
+ 'f', `
+66 0f 76 c1 pcmpeqd %xmm1, %xmm0
+66 0f 6f cb movdqa %xmm3, %xmm1
+66 0f 38 10 ca pblendvb %xmm2, %xmm1
+66 0f 6f c1 movdqa %xmm1, %xmm0`);
diff --git a/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js b/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js
new file mode 100644
index 0000000000..0629455b71
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js
@@ -0,0 +1,107 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+// Tests if combination of comparsion and bitselect produces correct result.
+// On x86/64 platforms, it is expected to replace slow bitselect emulation,
+// with its faster laneselect equivalent (pblendvb).
+// See bug 1751488 for more information.
+
+let verifyCodegen = _method => {};
+if (hasDisassembler() && wasmCompileMode() == "ion" &&
+ getBuildConfiguration().x64 && !getBuildConfiguration().simulator) {
+ if (isAvxPresent()) {
+ verifyCodegen = method => {
+ assertEq(wasmDis(method, {asString: true}).includes('vpblendvb'), true);
+ };
+ } else {
+ verifyCodegen = method => {
+ assertEq(wasmDis(method, {asString: true}).includes("pblendvb"), true);
+ };
+ }
+}
+
+const checkOps = {
+ eq(a, b) { return a == b; },
+ ne(a, b) { return a != b; },
+ lt(a, b) { return a < b; },
+ le(a, b) { return a <= b; },
+ gt(a, b) { return a > b; },
+ ge(a, b) { return a >= b; },
+};
+const checkPattern = new Uint8Array(Array(32).fill(null).map((_, i) => i));
+
+for (let [laneSize, aty_s, aty_u] of [
+ [8, Int8Array, Uint8Array], [16, Int16Array, Uint16Array],
+ [32, Int32Array, Uint32Array], [64, BigInt64Array, BigUint64Array]]) {
+ const laneCount = 128 / laneSize;
+ const ty = `i${laneSize}x${laneCount}`;
+ for (let op of ['eq', 'ne', 'lt_s', 'le_s', 'gt_s', 'ge_s', 'lt_u', 'le_u', 'gt_u', 'ge_u']) {
+ if (laneSize == 64 && op.includes('_u')) continue;
+ const wrap = laneSize < 64 ? x => x : x => BigInt(x);
+ const aty = op.includes('_u') ? aty_u : aty_s;
+ const check = checkOps[op.replace(/_[us]$/, "")];
+ // Items to test: 0, 1, all 1s, top half 1s, low half 1s, top bit 1
+ const testData = new aty([wrap(0), wrap(1), ~wrap(0), ~wrap(0) << wrap(laneSize / 2),
+ ~((~wrap(0)) << wrap(laneSize / 2)), wrap(1) << wrap(laneSize - 1)]);
+ const ins = new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(`(module
+ (memory (export "memory") 1)
+ (func (export "run")
+ (v128.store (i32.const 32)
+ (v128.bitselect (v128.load (i32.const 64)) (v128.load (i32.const 80)) (${ty}.${op} (v128.load (i32.const 0)) (v128.load (i32.const 16))))) ))`)));
+ const mem = new aty(ins.exports.memory.buffer);
+ const memI8 = new Uint8Array(ins.exports.memory.buffer);
+ memI8.subarray(64, 96).set(checkPattern);
+ verifyCodegen(ins.exports.run);
+ for (let i = 0; i < testData.length; i++) {
+ for (let j = 0; j < testData.length; j++) {
+ for (let q = 0; q < laneCount; q++) {
+ mem[q] = testData[(i + q) % testData.length];
+ mem[q + laneCount] = testData[(j + q) % testData.length];
+ }
+ ins.exports.run();
+ for (let q = 0; q < laneCount; q++) {
+ const val = check(mem[q], mem[q + laneCount]);
+ const n = laneSize >> 3;
+ for (let k = 0; k < n; k++) {
+ assertEq(checkPattern[q * n + k + (val ? 0 : 16)],
+ memI8[32 + q * n + k]);
+ }
+ }
+ }
+ }
+ }
+}
+
+for (let [laneSize, aty] of [[32, Float32Array], [64, Float64Array]]) {
+ const laneCount = 128 / laneSize;
+ const ty = `f${laneSize}x${laneCount}`;
+ for (let op of ['eq', 'ne', 'lt', 'le', 'gt', 'ge']) {
+ const check = checkOps[op];
+ // Items to test: 0, 1, -1, PI, NaN, Inf, -0, -Inf
+ const testData = new aty([0, 1, -1, Math.PI, NaN, Infinity, 0/-Infinity, -Infinity]);
+ const ins = new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(`(module
+ (memory (export "memory") 1)
+ (func (export "run")
+ (v128.store (i32.const 32)
+ (v128.bitselect (v128.load (i32.const 64)) (v128.load (i32.const 80)) (${ty}.${op} (v128.load (i32.const 0)) (v128.load (i32.const 16))))) ))`)));
+ const mem = new aty(ins.exports.memory.buffer);
+ const memI8 = new Uint8Array(ins.exports.memory.buffer);
+ memI8.subarray(64, 96).set(checkPattern);
+ verifyCodegen(ins.exports.run);
+ for (let i = 0; i < testData.length; i++) {
+ for (let j = 0; j < testData.length; j++) {
+ for (let q = 0; q < laneCount; q++) {
+ mem[q] = testData[(i + q) % testData.length];
+ mem[q + laneCount] = testData[(j + q) % testData.length];
+ }
+ ins.exports.run();
+ for (let q = 0; q < laneCount; q++) {
+ const val = check(mem[q], mem[q + laneCount]);
+ const n = laneSize >> 3;
+ for (let k = 0; k < n; k++) {
+ assertEq(checkPattern[q * n + k + (val ? 0 : 16)],
+ memI8[32 + q * n + k]);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js
new file mode 100644
index 0000000000..b4fe1d0281
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js
@@ -0,0 +1,77 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or fixups for various SIMD comparison
+// operations. See README-codegen.md for general information about this type of
+// test case.
+
+// Inputs (xmm0, xmm1)
+
+codegenTestX64_v128xv128_v128(
+ [['i8x16.gt_s', `66 0f 64 c1 pcmpgtb %xmm1, %xmm0`],
+ ['i16x8.gt_s', `66 0f 65 c1 pcmpgtw %xmm1, %xmm0`],
+ ['i32x4.gt_s', `66 0f 66 c1 pcmpgtd %xmm1, %xmm0`],
+ ['i8x16.le_s', `
+66 0f 64 c1 pcmpgtb %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0
+`],
+ ['i16x8.le_s', `
+66 0f 65 c1 pcmpgtw %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0
+`],
+ ['i32x4.le_s', `
+66 0f 66 c1 pcmpgtd %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0
+`],
+ ['i8x16.eq', `66 0f 74 c1 pcmpeqb %xmm1, %xmm0`],
+ ['i16x8.eq', `66 0f 75 c1 pcmpeqw %xmm1, %xmm0`],
+ ['i32x4.eq', `66 0f 76 c1 pcmpeqd %xmm1, %xmm0`],
+ ['i8x16.ne', `
+66 0f 74 c1 pcmpeqb %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0
+`],
+ ['i16x8.ne', `
+66 0f 75 c1 pcmpeqw %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0
+`],
+ ['i32x4.ne', `
+66 0f 76 c1 pcmpeqd %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0
+`],
+ ['f32x4.eq', `0f c2 c1 00 cmpps \\$0x00, %xmm1, %xmm0`],
+ ['f32x4.ne', `0f c2 c1 04 cmpps \\$0x04, %xmm1, %xmm0`],
+ ['f32x4.lt', `0f c2 c1 01 cmpps \\$0x01, %xmm1, %xmm0`],
+ ['f32x4.le', `0f c2 c1 02 cmpps \\$0x02, %xmm1, %xmm0`],
+ ['f64x2.eq', `66 0f c2 c1 00 cmppd \\$0x00, %xmm1, %xmm0`],
+ ['f64x2.ne', `66 0f c2 c1 04 cmppd \\$0x04, %xmm1, %xmm0`],
+ ['f64x2.lt', `66 0f c2 c1 01 cmppd \\$0x01, %xmm1, %xmm0`],
+ ['f64x2.le', `66 0f c2 c1 02 cmppd \\$0x02, %xmm1, %xmm0`]] );
+
+// Inputs (xmm1, xmm0) because the operation reverses its arguments.
+
+codegenTestX64_v128xv128_v128_reversed(
+ [['i8x16.ge_s', `
+66 0f 64 c1 pcmpgtb %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i16x8.ge_s',
+`
+66 0f 65 c1 pcmpgtw %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i32x4.ge_s', `
+66 0f 66 c1 pcmpgtd %xmm1, %xmm0
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`],
+ ['i8x16.lt_s', `66 0f 64 c1 pcmpgtb %xmm1, %xmm0`],
+ ['i16x8.lt_s', `66 0f 65 c1 pcmpgtw %xmm1, %xmm0`],
+ ['i32x4.lt_s', `66 0f 66 c1 pcmpgtd %xmm1, %xmm0`],
+ ['f32x4.gt', `0f c2 c1 01 cmpps \\$0x01, %xmm1, %xmm0`],
+ ['f32x4.ge', `0f c2 c1 02 cmpps \\$0x02, %xmm1, %xmm0`],
+ ['f64x2.gt', `66 0f c2 c1 01 cmppd \\$0x01, %xmm1, %xmm0`],
+ ['f64x2.ge', `66 0f c2 c1 02 cmppd \\$0x02, %xmm1, %xmm0`]] );
diff --git a/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js b/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js
new file mode 100644
index 0000000000..9dc08c6e6b
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js
@@ -0,0 +1,109 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "baseline" || !getBuildConfiguration().arm64
+
+// Test that the vixl logic for v128 constant loads is at least somewhat
+// reasonable.
+
+var lead = `0x[0-9a-f]+ +[0-9a-f]{8} +`;
+
+var prefix = `${lead}sub sp, sp, #0x.. \\(..\\)
+${lead}str x23, \\[sp, #..\\]`;
+
+var suffix =
+`${lead}b #\\+0x8 \\(addr 0x.*\\)
+${lead}brk #0x0`;
+
+for ( let [bits, expected, values] of [
+ // If high == low and the byte is 0 or ff then a single movi is sufficient.
+ ['i8x16 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00', `
+${prefix}
+${lead}movi v0\\.2d, #0x0
+${suffix}
+`,
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+
+ ['i8x16 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0', `
+${prefix}
+${lead}movi v0\\.2d, #0xff00ff00ff00ff
+${suffix}
+`,
+ [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0]],
+
+ // Splattable small things (up to a byte, at a byte location)
+ // can also use just one instruction
+ ['i32x4 1 1 1 1', `
+${prefix}
+${lead}movi v0\\.4s, #0x1, lsl #0
+${suffix}
+`,
+ [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0]],
+
+ ['i32x4 0x300 0x300 0x300 0x300', `
+${prefix}
+${lead}movi v0\\.4s, #0x3, lsl #8
+${suffix}
+`,
+ [0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0]],
+
+ // If high == low but the value is more complex then a constant load
+ // plus a dup is sufficient. x16 is the designated temp.
+ ['i32x4 1 2 1 2', `
+${prefix}
+${lead}mov x16, #0x1
+${lead}movk x16, #0x2, lsl #32
+${lead}dup v0\\.2d, x16
+${suffix}
+`,
+ [1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0]],
+
+ // If high != low then we degenerate to a more complicated pattern: dup the low value
+ // and then overwrite the high part with the high value.
+ ['i32x4 1 2 2 1', `
+${prefix}
+${lead}mov x16, #0x1
+${lead}movk x16, #0x2, lsl #32
+${lead}dup v0\\.2d, x16
+${lead}mov x16, #0x2
+${lead}movk x16, #0x1, lsl #32
+${lead}mov v0\\.d\\[1\\], x16
+${suffix}
+`,
+ [1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0]],
+
+ // Things are not always bleak, and vixl finds a way.
+ ['i32x4 1 1 2 2', `
+${prefix}
+${lead}movi v0\\.4s, #0x1, lsl #0
+${lead}mov x16, #0x200000002
+${lead}mov v0\\.d\\[1\\], x16
+${suffix}
+`,
+ [1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0]],
+] ) {
+ let ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f)))
+ (func $f (export "f") (result v128)
+ (v128.const ${bits})))`);
+ let output = wasmDis(ins.exports.f, {tier:"baseline", asString:true});
+ assertEq(output.match(new RegExp(expected)) != null, true);
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 0, iota(16).map(x => -1-x));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), values);
+}
+
+function get(arr, loc, len) {
+ let res = [];
+ for ( let i=0; i < len; i++ ) {
+ res.push(arr[loc+i]);
+ }
+ return res;
+}
+
+function set(arr, loc, vals) {
+ for ( let i=0; i < vals.length; i++ ) {
+ arr[loc+i] = vals[i];
+ }
+}
diff --git a/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js
new file mode 100644
index 0000000000..04a00b538d
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js
@@ -0,0 +1,28 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that constants that can be synthesized are synthesized. See README-codegen.md
+// for general information about this type of test case.
+
+codegenTestX64_unit_v128(
+ [['v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0',
+ `66 0f ef c0 pxor %xmm0, %xmm0`],
+ ['v128.const i8x16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1',
+ `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`],
+ ['v128.const i16x8 0 0 0 0 0 0 0 0',
+ `66 0f ef c0 pxor %xmm0, %xmm0`],
+ ['v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1',
+ `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`],
+ ['v128.const i32x4 0 0 0 0',
+ `66 0f ef c0 pxor %xmm0, %xmm0`],
+ ['v128.const i32x4 -1 -1 -1 -1',
+ `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`],
+ ['v128.const i64x2 0 0',
+ `66 0f ef c0 pxor %xmm0, %xmm0`],
+ ['v128.const i64x2 -1 -1',
+ `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`],
+ ['v128.const f32x4 0 0 0 0',
+ // Arguably this should be xorps but that's for later
+ `66 0f ef c0 pxor %xmm0, %xmm0`],
+ ['v128.const f64x2 0 0',
+ // Arguably this should be xorpd but that's for later
+ `66 0f ef c0 pxor %xmm0, %xmm0`]] );
diff --git a/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js
new file mode 100644
index 0000000000..1e2d613c1a
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js
@@ -0,0 +1,27 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves for various SIMD conversion
+// operations. See README-codegen.md for general information about this type of
+// test case.
+
+// Note, these tests test the beginning of the output but not the end.
+
+codegenTestX64_v128_v128(
+ [['i32x4.trunc_sat_f32x4_s',
+ // The movaps is dest -> scratch and needs to be here. The test is
+ // asserting that there is not an additional (redundant) move here.
+`
+44 0f 28 f8 movaps %xmm0, %xmm15
+45 0f c2 ff 00 cmpps \\$0x00, %xmm15, %xmm15
+66 41 0f db c7 pand %xmm15, %xmm0`],
+ ['i32x4.trunc_sat_f32x4_u', `
+45 0f 57 ff xorps %xmm15, %xmm15
+41 0f 5f c7 maxps %xmm15, %xmm0`],
+ ['f32x4.convert_i32x4_u', `
+66 45 0f ef ff pxor %xmm15, %xmm15
+66 44 0f 3a 0e f8 55 pblendw \\$0x55, %xmm0, %xmm15
+66 41 0f fa c7 psubd %xmm15, %xmm0
+45 0f 5b ff cvtdq2ps %xmm15, %xmm15`]],
+ {no_suffix:true});
+
+
diff --git a/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js b/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js
new file mode 100644
index 0000000000..0de46e0f0c
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js
@@ -0,0 +1,20 @@
+// |jit-test| skip-if: !wasmDebuggingEnabled() || !wasmSimdEnabled()
+
+var g7 = newGlobal({newCompartment: true});
+g7.parent = this;
+g7.eval(`
+ Debugger(parent).onEnterFrame = function(frame) { };
+`);
+var ins = wasmEvalText(`
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (param $k i32)
+ (v128.store (i32.const 0) (call $f (local.get $k)))
+ )
+ (func $f
+ (param $k i32)
+ (result v128)
+ (v128.const i32x4 5 6 7 8)
+ )
+`);
+ins.exports.run(0);
diff --git a/js/src/jit-test/tests/wasm/simd/directives.txt b/js/src/jit-test/tests/wasm/simd/directives.txt
new file mode 100644
index 0000000000..3e89e7550b
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/directives.txt
@@ -0,0 +1 @@
+|jit-test| test-also=--wasm-compiler=baseline; test-also=--wasm-compiler=optimizing; test-also=--wasm-test-serialization; test-also=--wasm-compiler=optimizing --no-avx; skip-variant-if: --wasm-compiler=optimizing --no-avx, !getBuildConfiguration().x86 && !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:wasm.js
diff --git a/js/src/jit-test/tests/wasm/simd/disabled.js b/js/src/jit-test/tests/wasm/simd/disabled.js
new file mode 100644
index 0000000000..feae414697
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/disabled.js
@@ -0,0 +1,28 @@
+// |jit-test| skip-if: wasmSimdEnabled()
+
+// ../binary.js checks that all SIMD extended opcodes in the 0..255 range are
+// rejected if !wasmSimdEnabled, so no need to check that here.
+
+// Non-opcode cases that should also be rejected, lest feature sniffing may
+// erroneously conclude that simd is available when it's not. The error message
+// may differ depending on ENABLE_WASM_SIMD: if SIMD is compiled in we usually
+// get a sensible error about v128; if not, we get something generic.
+
+wasmFailValidateText(`(module (func (param v128)))`,
+ /(v128 not enabled)|(bad type)/);
+
+wasmFailValidateText(`(module (func (result v128)))`,
+ /(v128 not enabled)|(bad type)/);
+
+wasmFailValidateText(`(module (func (local v128)))`,
+ /(v128 not enabled)|(bad type)|(SIMD support is not enabled)/);
+
+wasmFailValidateText(`(module (global (import "m" "g") v128))`,
+ /expected global type/);
+
+wasmFailValidateText(`(module (global (import "m" "g") (mut v128)))`,
+ /expected global type/);
+
+wasmFailValidateText(`(module (global i32 (v128.const i32x4 0 0 0 0)))`,
+ /(v128 not enabled)|(unrecognized opcode)/);
+
diff --git a/js/src/jit-test/tests/wasm/simd/experimental.js b/js/src/jit-test/tests/wasm/simd/experimental.js
new file mode 100644
index 0000000000..3f4a85ae75
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/experimental.js
@@ -0,0 +1,411 @@
+// |jit-test| --wasm-relaxed-simd; skip-if: !wasmRelaxedSimdEnabled()
+
+// Experimental opcodes. We have no text parsing support for these yet. The
+// tests will be cleaned up and moved into ad-hack.js if the opcodes are
+// adopted.
+
+load(libdir + "wasm-binary.js");
+
+function wasmEval(bytes, imports) {
+ return new WebAssembly.Instance(new WebAssembly.Module(bytes), imports);
+}
+
+function wasmValidateAndEval(bytes, imports) {
+ assertEq(WebAssembly.validate(bytes), true, "test of WasmValidate.cpp");
+ return wasmEval(bytes, imports);
+}
+
+function get(arr, loc, len) {
+ let res = [];
+ for ( let i=0; i < len; i++ ) {
+ res.push(arr[loc+i]);
+ }
+ return res;
+}
+
+function set(arr, loc, vals) {
+ for ( let i=0; i < vals.length; i++ ) {
+ if (arr instanceof BigInt64Array) {
+ arr[loc+i] = BigInt(vals[i]);
+ } else {
+ arr[loc+i] = vals[i];
+ }
+ }
+}
+
+const v2vSig = {args:[], ret:VoidCode};
+
+function V128Load(addr) {
+ return [I32ConstCode, varS32(addr),
+ SimdPrefix, V128LoadCode, 4, varU32(0)]
+}
+
+function V128StoreExpr(addr, v) {
+ return [I32ConstCode, varS32(addr),
+ ...v,
+ SimdPrefix, V128StoreCode, 4, varU32(0)];
+}
+
+// FMA/FNMA, https://github.com/WebAssembly/relaxed-simd/issues/27 and
+// https://github.com/WebAssembly/relaxed-simd/pull/81
+
+function fma(x, y, a) { return (x * y) + a; }
+function fnma(x, y, a) { return - (x * y) + a; }
+
+var fxs = [10, 20, 30, 40];
+var fys = [-2, -3, -4, -5];
+var fas = [0, 100, 500, 700];
+var dxs = [10, 20];
+var dys = [-2, -3];
+var das = [0, 100];
+
+for ( let [opcode, xs, ys, as, operator] of [[F32x4RelaxedFmaCode, fxs, fys, fas, fma],
+ [F32x4RelaxedFnmaCode, fxs, fys, fas, fnma],
+ [F64x2RelaxedFmaCode, dxs, dys, das, fma],
+ [F64x2RelaxedFnmaCode, dxs, dys, das, fnma]] ) {
+ var k = xs.length;
+ var ans = iota(k).map((i) => operator(xs[i], ys[i], as[i]))
+
+ var ins = wasmValidateAndEval(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "run"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ ...V128Load(32),
+ ...V128Load(48),
+ SimdPrefix, varU32(opcode)])]})])]));
+
+ var mem = new (k == 4 ? Float32Array : Float64Array)(ins.exports.mem.buffer);
+ set(mem, k, xs);
+ set(mem, 2*k, ys);
+ set(mem, 3*k, as);
+ ins.exports.run();
+ var result = get(mem, 0, k);
+ assertSame(result, ans);
+
+ assertEq(false, WebAssembly.validate(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "run"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(0),
+ ...V128Load(0),
+ SimdPrefix, varU32(opcode)])]})])])));
+}
+
+// Relaxed swizzle, https://github.com/WebAssembly/relaxed-simd/issues/22
+
+var ins = wasmValidateAndEval(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "run"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ ...V128Load(32),
+ SimdPrefix, varU32(I8x16RelaxedSwizzleCode)])]})])]));
+var mem = new Uint8Array(ins.exports.mem.buffer);
+var test = [1, 4, 3, 7, 123, 0, 8, 222];
+set(mem, 16, test);
+for (let [i, s] of [[0, 0], [0, 1], [1,1], [1, 3], [7,5]]) {
+ var ans = new Uint8Array(16);
+ for (let j = 0; j < 16; j++) {
+ mem[32 + j] = (j * s + i) & 15;
+ ans[j] = test[(j * s + i) & 15];
+ }
+ ins.exports.run();
+ var result = get(mem, 0, 16);
+ assertSame(result, ans);
+}
+
+assertEq(false, WebAssembly.validate(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ SimdPrefix, varU32(I8x16RelaxedSwizzleCode)])]})])])));
+
+
+// Relaxed MIN/MAX, https://github.com/WebAssembly/relaxed-simd/issues/33
+
+const Neg0 = -1/Infinity;
+var minMaxTests = [
+ {a: 0, b: 0, min: 0, max: 0, },
+ {a: Neg0, b: Neg0, min: Neg0, max: Neg0, },
+ {a: 1/3, b: 2/3, min: 1/3, max: 2/3, },
+ {a: -1/3, b: -2/3, min: -2/3, max: -1/3, },
+ {a: -1000, b: 1, min: -1000, max: 1, },
+ {a: 10, b: -2, min: -2, max: 10, },
+];
+
+for (let k of [4, 2]) {
+ const minOpcode = k == 4 ? F32x4RelaxedMinCode : F64x2RelaxedMinCode;
+ const maxOpcode = k == 4 ? F32x4RelaxedMaxCode : F64x2RelaxedMaxCode;
+
+ var ins = wasmValidateAndEval(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0, 0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "min"},
+ {funcIndex: 1, name: "max"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ ...V128Load(32),
+ SimdPrefix, varU32(minOpcode)])]}),
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ ...V128Load(32),
+ SimdPrefix, varU32(maxOpcode)])]})])]));
+ for (let i = 0; i < minMaxTests.length; i++) {
+ var Ty = k == 4 ? Float32Array : Float64Array;
+ var mem = new Ty(ins.exports.mem.buffer);
+ var minResult = new Ty(k);
+ var maxResult = new Ty(k);
+ for (let j = 0; j < k; j++) {
+ const {a, b, min, max } = minMaxTests[(j + i) % minMaxTests.length];
+ mem[j + k] = a;
+ mem[j + k * 2] = b;
+ minResult[j] = min;
+ maxResult[j] = max;
+ }
+ ins.exports.min();
+ var result = get(mem, 0, k);
+ assertSame(result, minResult);
+ ins.exports.max();
+ var result = get(mem, 0, k);
+ assertSame(result, maxResult);
+ }
+
+ for (let op of [minOpcode, maxOpcode]) {
+ assertEq(false, WebAssembly.validate(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0, 0]),
+ memorySection(1),
+ exportSection([]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(0),
+ SimdPrefix, varU32(op)])]})])])));
+ }
+}
+
+// Relaxed I32x4.TruncFXXX, https://github.com/WebAssembly/relaxed-simd/issues/21
+
+var ins = wasmValidateAndEval(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0, 0, 0, 0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "from32s"},
+ {funcIndex: 1, name: "from32u"},
+ {funcIndex: 2, name: "from64s"},
+ {funcIndex: 3, name: "from64u"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ SimdPrefix, varU32(I32x4RelaxedTruncSSatF32x4Code)])]}),
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ SimdPrefix, varU32(I32x4RelaxedTruncUSatF32x4Code)])]}),
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ SimdPrefix, varU32(I32x4RelaxedTruncSatF64x2SZeroCode)])]}),
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ SimdPrefix, varU32(I32x4RelaxedTruncSatF64x2UZeroCode)])]})])]));
+
+var mem = ins.exports.mem.buffer;
+set(new Float32Array(mem), 4, [0, 2.3, -3.4, 100000]);
+ins.exports.from32s();
+var result = get(new Int32Array(mem), 0, 4);
+assertSame(result, [0, 2, -3, 100000]);
+
+set(new Float32Array(mem), 4, [0, 3.3, 0x80000000, 200000]);
+ins.exports.from32u();
+var result = get(new Uint32Array(mem), 0, 4);
+assertSame(result, [0, 3, 0x80000000, 200000]);
+set(new Float32Array(mem), 4, [0, 0x80000100, 0x80000101, 0xFFFFFF00]);
+ins.exports.from32u();
+var result = get(new Uint32Array(mem), 0, 4);
+assertSame(result, [0, 0x80000100, 0x80000100, 0xFFFFFF00]);
+
+set(new Float64Array(mem), 2, [200000.3, -3.4]);
+ins.exports.from64s();
+var result = get(new Int32Array(mem), 0, 4);
+assertSame(result, [200000, -3, 0, 0]);
+set(new Float64Array(mem), 2, [0x90000000 + 0.1, 0]);
+ins.exports.from64u();
+var result = get(new Uint32Array(mem), 0, 4);
+assertSame(result, [0x90000000, 0, 0, 0]);
+
+for (let op of [I32x4RelaxedTruncSSatF32x4Code, I32x4RelaxedTruncUSatF32x4Code,
+ I32x4RelaxedTruncSatF64x2SZeroCode, I32x4RelaxedTruncSatF64x2UZeroCode]) {
+ assertEq(false, WebAssembly.validate(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [SimdPrefix, varU32(op)])]})])])));
+}
+
+// Relaxed blend / laneselect, https://github.com/WebAssembly/relaxed-simd/issues/17
+
+for (let [k, opcode, AT] of [[1, I8x16RelaxedLaneSelectCode, Int8Array],
+ [2, I16x8RelaxedLaneSelectCode, Int16Array],
+ [4, I32x4RelaxedLaneSelectCode, Int32Array],
+ [8, I64x2RelaxedLaneSelectCode, BigInt64Array]]) {
+
+ var ins = wasmValidateAndEval(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "run"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ ...V128Load(32),
+ ...V128Load(48),
+ SimdPrefix, varU32(opcode)])]})])]));
+
+ var mem = ins.exports.mem.buffer;
+ var mem8 = new Uint8Array(mem);
+ set(mem8, 16, [1,2,3,4,0,0,0,0,100,0,102,0,0,250,251,252,253]);
+ set(mem8, 32, [0,0,0,0,5,6,7,8,0,101,0,103,0,254,255,0,1]);
+ var c = new AT(mem, 48, 16 / k);
+ for (let i = 0; i < c.length; i++) {
+ // Use popcnt to randomize 0 and ~0
+ const popcnt_i = i.toString(2).replace(/0/g, "").length;
+ const v = popcnt_i & 1 ? -1 : 0
+ c[i] = k == 8 ? BigInt(v) : v;
+ }
+ ins.exports.run();
+ for (let i = 0; i < 16; i++) {
+ const r = c[(i / k) | 0] ? mem8[16 + i] : mem8[32 + i];
+ assertEq(r, mem8[i]);
+ }
+
+ assertEq(false, WebAssembly.validate(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "run"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(0),
+ ...V128Load(0),
+ SimdPrefix, varU32(opcode)])]})])])));
+}
+
+
+// Relaxed rounding q-format multiplication.
+var ins = wasmValidateAndEval(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "relaxed_q15mulr_s"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ ...V128Load(32),
+ SimdPrefix, varU32(I16x8RelaxedQ15MulrS)])]})])]));
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+for (let [as, bs] of cross([
+ [1, -3, 5, -7, 11, -13, -17, 19],
+ [-1, 0, 16, -32, 64, 128, -1024, 0, 1],
+ [1,2,-32768,32767,1,4,-32768,32767]]) ) {
+ set(mem16, 8, as);
+ set(mem16, 16, bs);
+ ins.exports.relaxed_q15mulr_s();
+ const result = get(mem16, 0, 8);
+ for (let i = 0; i < 8; i++) {
+ const expected = (as[i] * bs[i] + 0x4000) >> 15;
+ if (as[i] == -32768 && bs[i] == -32768) continue;
+ assertEq(expected, result[i], `result of ${as[i]} * ${bs[i]}`);
+ }
+}
+
+
+// Check relaxed dot product results.
+var ins = wasmValidateAndEval(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "dot_i8x16_i7x16_s"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ ...V128Load(32),
+ SimdPrefix, varU32(I16x8DotI8x16I7x16S)])]})])]));
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var test7bit = [1, 2, 3, 4, 5, 64, 65, 127, 127, 0, 0,
+ 1, 65, 64, 2, 3, 0, 0, 127, 127, 5, 4];
+var testNeg = test7bit.concat(test7bit.map(i => ~i));
+for (let ai = 0; ai < testNeg.length - 15; ai++)
+ for (let bi = 0; bi < test7bit.length - 15; bi++) {
+ set(mem8, 16, testNeg.slice(ai, ai + 16));
+ set(mem8, 32, test7bit.slice(bi, bi + 16));
+ ins.exports.dot_i8x16_i7x16_s();
+ const result = get(mem16, 0, 8);
+ for (let i = 0; i < 8; i++) {
+ const expected = ((testNeg[ai + i * 2] * test7bit[bi + i * 2]) +
+ (testNeg[ai + i * 2 + 1] * test7bit[bi + i * 2 + 1])) | 0;
+ assertEq(expected, result[i]);
+ }
+ }
+
+var ins = wasmValidateAndEval(moduleWithSections([
+ sigSection([v2vSig]),
+ declSection([0]),
+ memorySection(1),
+ exportSection([{funcIndex: 0, name: "dot_i8x16_i7x16_add_s"},
+ {memIndex: 0, name: "mem"}]),
+ bodySection([
+ funcBody({locals:[],
+ body: [...V128StoreExpr(0, [...V128Load(16),
+ ...V128Load(32),
+ ...V128Load(48),
+ SimdPrefix, varU32(I32x4DotI8x16I7x16AddS)])]})])]));
+var mem8 = new Int8Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+var test7bit = [1, 2, 3, 4, 5, 64, 65, 127, 127, 0, 0,
+ 1, 65, 64, 2, 3, 0, 0, 127, 127, 5, 4];
+var testNeg = test7bit.concat(test7bit.map(i => ~i));
+var testAcc = [0, 12, 65336, -1, 0x10000000, -0xffffff];
+for (let ai = 0; ai < testNeg.length - 15; ai++)
+ for (let bi = 0; bi < test7bit.length - 15; bi++)
+ for (let ci = 0; ci < testAcc.length - 3; ci++) {
+ set(mem8, 16, testNeg.slice(ai, ai + 16));
+ set(mem8, 32, test7bit.slice(bi, bi + 16));
+ set(mem32, 48/4, testAcc.slice(ci, ci + 4));
+ ins.exports.dot_i8x16_i7x16_add_s();
+ const result = get(mem32, 0, 4);
+ for (let i = 0; i < 4; i++) {
+ const a1 = (testNeg[ai + i * 4] * test7bit[bi + i * 4]) +
+ (testNeg[ai + i * 4 + 1] * test7bit[bi + i * 4 + 1]);
+ const a2 = (testNeg[ai + i * 4 + 2] * test7bit[bi + i * 4 + 2]) +
+ (testNeg[ai + i * 4 + 3] * test7bit[bi + i * 4 + 3]);
+ const expected = (testAcc[ci + i] + a1 + a2) | 0;
+ assertEq(expected, result[i]);
+ }
+ }
diff --git a/js/src/jit-test/tests/wasm/simd/ion-analysis.js b/js/src/jit-test/tests/wasm/simd/ion-analysis.js
new file mode 100644
index 0000000000..723b42b40b
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ion-analysis.js
@@ -0,0 +1,902 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || wasmCompileMode() != "ion" || !this.wasmSimdAnalysis
+
+// White-box tests for SIMD optimizations. These are sensitive to internal
+// details of the front-end and lowering logic, which is partly platform-dependent.
+//
+// In DEBUG builds, the testing function wasmSimdAnalysis() returns a string
+// describing the last decision made by the SIMD lowering code: to perform an
+// optimized lowering or the default byte shuffle+blend for i8x16.shuffle; to
+// shift by a constant or a variable for the various shifts; and so on.
+//
+// We test that the expected transformation applies, and that the machine code
+// generates the expected result.
+
+var isArm64 = getBuildConfiguration().arm64;
+
+// 32-bit permutation that is not a rotation.
+let perm32x4_pattern = [4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3];
+
+// Operands the same, dword permutation
+{
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${perm32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), perm32x4_pattern);
+}
+
+// Right operand ignored, dword permutation
+{
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${perm32x4_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ set(mem, 32, iota(16).map(x => x+16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), perm32x4_pattern);
+}
+
+// Left operand ignored, dword permutation
+{
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${perm32x4_pattern.map(x => x+16).join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16).map(x => x+16));
+ set(mem, 32, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), perm32x4_pattern);
+}
+
+// Operands the same, word permutation on both sides of the qword divide, with a qword swap
+{
+ let perm16x8_pattern = [12, 13, 14, 15, 10, 11, 8, 9,
+ 6, 7, 4, 5, 2, 3, 0, 1];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), perm16x8_pattern);
+}
+
+// Operands the same, word permutation on both sides of the qword divide, no qword swap
+{
+ let perm16x8_pattern = [ 6, 7, 4, 5, 2, 3, 0, 1,
+ 12, 13, 14, 15, 10, 11, 8, 9];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), perm16x8_pattern);
+}
+
+// Operands the same, word permutation on low side of the qword divide, no qword swap
+{
+ let perm16x8_pattern = [ 6, 7, 4, 5, 2, 3, 0, 1,
+ 8, 9, 10, 11, 12, 13, 14, 15];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), perm16x8_pattern);
+}
+
+// Operands the same, word permutation on high side of the qword divide, no qword swap
+{
+ let perm16x8_pattern = [ 0, 1, 2, 3, 4, 5, 6, 7,
+ 12, 13, 14, 15, 10, 11, 8, 9];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), perm16x8_pattern);
+}
+
+// Same operands, byte rotate
+{
+ // 8-bit permutation that is a rotation
+ let rot8x16_pattern = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${rot8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> rotate-right 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), rot8x16_pattern);
+}
+
+// Operands the same, random jumble => byte permutation
+{
+ // 8-bit permutation that is not a rotation
+ let perm8x16_pattern = [5, 7, 6, 8, 9, 10, 11, 4, 13, 14, 15, 0, 1, 2, 3, 12];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${perm8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), perm8x16_pattern);
+}
+
+// Operands differ, both accessed, rhs is constant zero, left-shift pattern
+{
+ // 8-bit shift with zeroes shifted in at the right end
+ let shift8x16_pattern = [16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> shift-left 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x));
+}
+
+// The same as above but the constant is lhs.
+{
+ // 8-bit shift with zeroes shifted in at the right end
+ let shift8x16_pattern = [16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(x => x ^ 16);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${shift8x16_pattern.join(' ')} (v128.const i32x4 0 0 0 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> shift-left 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x < 16 ? 0 : x - 16));
+}
+
+// Operands differ, both accessed, rhs is constant zero, left-shift pattern that
+// does not start properly.
+{
+ // 8-bit shift with zeroes shifted in at the right end
+ let shift8x16_pattern = [16, 16, 16, 16, 16, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x));
+}
+
+// Operands differ, both accessed, rhs is constant zero, right-shift pattern
+{
+ // 8-bit shift with zeroes shifted in at the right end
+ let shift8x16_pattern = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 20, 20, 20, 20, 20];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> shift-right 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x));
+}
+
+// Operands differ, both accessed, rhs is constant zero, right-shift pattern
+// that does not end properly.
+{
+ // 8-bit shift with zeroes shifted in at the right end
+ let shift8x16_pattern = [6, 7, 8, 9, 10, 11, 12, 13, 14, 20, 20, 20, 20, 20, 20, 20];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x));
+}
+
+// Operands differ and are variable, both accessed, (lhs ++ rhs) >> k
+{
+ let concat8x16_pattern = [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${concat8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> concat+shift-right 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ set(mem, 32, iota(16).map(k => k+16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), concat8x16_pattern);
+}
+
+// Operands differ and are variable, both accessed, (rhs ++ lhs) >> k
+{
+ let concat8x16_pattern = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${concat8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> concat+shift-right 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ set(mem, 32, iota(16).map(k => k+16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), concat8x16_pattern);
+}
+
+// Operands differ, both accessed, but inputs stay in their lanes => byte blend
+{
+ let blend8x16_pattern = iota(16).map(x => (x % 3 == 0) ? x + 16 : x);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${blend8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> blend 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ let lhs = iota(16);
+ let rhs = iota(16).map(x => x+16);
+ set(mem, 16, lhs);
+ set(mem, 32, rhs);
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), blend8x16_pattern);
+}
+
+// Operands differ, both accessed, but inputs stay in their lanes => word blend
+{
+ let blend16x8_pattern = iota(16).map(x => (x & 2) ? x + 16 : x);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${blend16x8_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> blend 16x8");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ let lhs = iota(16);
+ let rhs = iota(16).map(x => x+16);
+ set(mem, 16, lhs);
+ set(mem, 32, rhs);
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), blend16x8_pattern);
+}
+
+// Interleave i32x4s
+for ( let [lhs, rhs, expected] of
+ [[[0, 1], [4, 5], "shuffle -> interleave-low 32x4"],
+ [[2, 3], [6, 7], "shuffle -> interleave-high 32x4"]] ) {
+ for (let swap of [false, true]) {
+ if (swap)
+ [lhs, rhs] = [rhs, lhs];
+ let interleave_pattern = i32ToI8(interleave(lhs, rhs));
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), expected);
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ let lhsval = iota(16);
+ let rhsval = iota(16).map(x => x+16);
+ set(mem, 16, lhsval);
+ set(mem, 32, rhsval);
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), interleave_pattern);
+ }
+}
+
+// Interleave i64x2s
+for ( let [lhs, rhs, expected] of
+ [[[0], [2], "shuffle -> interleave-low 64x2"],
+ [[1], [3], "shuffle -> interleave-high 64x2"]] ) {
+ for (let swap of [false, true]) {
+ if (swap)
+ [lhs, rhs] = [rhs, lhs];
+ let interleave_pattern = i64ToI2(interleave(lhs, rhs));
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), expected);
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ let lhsval = iota(16);
+ let rhsval = iota(16).map(x => x+16);
+ set(mem, 16, lhsval);
+ set(mem, 32, rhsval);
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), interleave_pattern);
+ }
+}
+
+// Interleave i16x8s
+for ( let [lhs, rhs, expected] of
+ [[[0, 1, 2, 3], [8, 9, 10, 11], "shuffle -> interleave-low 16x8"],
+ [[4, 5, 6, 7], [12, 13, 14, 15], "shuffle -> interleave-high 16x8"]] ) {
+ for (let swap of [false, true]) {
+ if (swap)
+ [lhs, rhs] = [rhs, lhs];
+ let interleave_pattern = i16ToI8(interleave(lhs, rhs));
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), expected);
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ let lhsval = iota(16);
+ let rhsval = iota(16).map(x => x+16);
+ set(mem, 16, lhsval);
+ set(mem, 32, rhsval);
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), interleave_pattern);
+ }
+}
+
+// Interleave i8x16s
+for ( let [lhs, rhs, expected] of
+ [[[0, 1, 2, 3, 4, 5, 6, 7], [16, 17, 18, 19, 20, 21, 22, 23], "shuffle -> interleave-low 8x16"],
+ [[8, 9, 10, 11, 12, 13, 14, 15],[24, 25, 26, 27, 28, 29, 30, 31], "shuffle -> interleave-high 8x16"]] ) {
+ for (let swap of [false, true]) {
+ if (swap)
+ [lhs, rhs] = [rhs, lhs];
+ let interleave_pattern = interleave(lhs, rhs);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), expected);
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ let lhsval = iota(16);
+ let rhsval = iota(16).map(x => x+16);
+ set(mem, 16, lhsval);
+ set(mem, 32, rhsval);
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), interleave_pattern);
+ }
+}
+
+// Operands differ, both accessed, random jumble => byte shuffle+blend
+{
+ let blend_perm8x16_pattern = [5, 23, 6, 24, 9, 10, 11, 7, 7, 14, 15, 19, 1, 2, 3, 12];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${blend_perm8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ let lhs = iota(16).map(x => x+16);
+ let rhs = iota(16);
+ set(mem, 16, lhs);
+ set(mem, 32, rhs);
+ ins.exports.run();
+ assertSame(get(mem, 0, 16),
+ blend_perm8x16_pattern.map(x => x < 16 ? lhs[x] : rhs[x-16]));
+}
+
+// No-op, ignoring right operand, should turn into a move.
+{
+ let nop8x16_pattern = iota(16);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${nop8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> move");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ set(mem, 32, iota(16).map(x => x+16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), nop8x16_pattern);
+}
+
+// No-op, ignoring left operand, should turn into a move.
+{
+ let nop8x16_pattern = iota(16).map(x => x+16);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32)))))
+ (func $f (param v128) (param v128) (result v128)
+ (i8x16.shuffle ${nop8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> move");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ set(mem, 32, iota(16).map(x => x+16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), nop8x16_pattern);
+}
+
+// Broadcast byte
+for ( let byte of [3, 11, 8, 2] ) {
+ let broadcast8x16_pattern = iota(16).map(_ => byte);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${broadcast8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), broadcast8x16_pattern);
+}
+
+// Broadcast word from high quadword
+{
+ let broadcast16x8_pattern = [10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${broadcast16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 16x8");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), broadcast16x8_pattern);
+}
+
+// Broadcast word from low quadword
+{
+ let broadcast16x8_pattern = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${broadcast16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 16x8");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), broadcast16x8_pattern);
+}
+
+// Broadcast dword from low quadword should turn into a dword permute
+{
+ let broadcast32x4_pattern = [4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7];
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${broadcast32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), broadcast32x4_pattern);
+}
+
+// Broadcast high qword should turn into a dword permute
+{
+ let broadcast64x2_pattern = [8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15]
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${broadcast64x2_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), broadcast64x2_pattern);
+}
+
+// Byte reversal should be a byte permute
+{
+ let rev8x16_pattern = iota(16).reverse();
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${rev8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 8x16");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), rev8x16_pattern);
+}
+
+// Byteswap of half-word, word and quad-word groups should be
+// reverse bytes analysis
+for (let k of [2, 4, 8]) {
+ let rev8_pattern = iota(16).map(i => i ^ (k - 1));
+ let ins = wasmCompile(`
+(module
+(memory (export "mem") 1 1)
+(func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+(func $f (param v128) (result v128)
+ (i8x16.shuffle ${rev8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), `shuffle -> reverse bytes in ${8 * k}-bit lanes`);
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), rev8_pattern);
+}
+
+// Word reversal should be a word permute
+{
+ let rev16x8_pattern = i16ToI8(iota(8).reverse());
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${rev16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), rev16x8_pattern);
+}
+
+// Dword reversal should be a dword permute
+{
+ let rev32x4_pattern = i32ToI8([3, 2, 1, 0]);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${rev32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), rev32x4_pattern);
+}
+
+// Qword reversal should be a dword permute
+{
+ let rev64x2_pattern = i32ToI8([2, 3, 0, 1]);
+ let ins = wasmCompile(`
+(module
+ (memory (export "mem") 1 1)
+ (func (export "run")
+ (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)))))
+ (func $f (param v128) (result v128)
+ (i8x16.shuffle ${rev64x2_pattern.join(' ')} (local.get 0) (local.get 0))))`);
+
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+
+ let mem = new Int8Array(ins.exports.mem.buffer);
+ set(mem, 16, iota(16));
+ ins.exports.run();
+ assertSame(get(mem, 0, 16), rev64x2_pattern);
+}
+
+// In the case of shifts, we have separate tests that constant shifts work
+// correctly, so no such testing is done here.
+
+for ( let lanes of ['i8x16', 'i16x8', 'i32x4', 'i64x2'] ) {
+ for ( let shift of ['shl', 'shr_s', 'shr_u'] ) {
+ for ( let [count, result] of [['(i32.const 5)', /shift -> constant shift/],
+ ['(local.get 1)', /shift -> variable(?: scalarized)? shift/]] ) {
+ wasmCompile(`(module (func (param v128) (param i32) (result v128) (${lanes}.${shift} (local.get 0) ${count})))`);
+ assertEq(wasmSimdAnalysis().match(result).length, 1);
+ }
+ }
+}
+
+// Constant folding scalar->simd. There are functional tests for all these in
+// ad-hack.js so here we only check that the transformation is triggered.
+
+for ( let [ty128, ty] of [['i8x16', 'i32'], ['i16x8', 'i32'], ['i32x4', 'i32'],
+ ['i64x2', 'i64'], ['f32x4', 'f32'], ['f64x2', 'f64']] )
+{
+ wasmCompile(`(module (func (result v128) (${ty128}.splat (${ty}.const 37))))`);
+ assertEq(wasmSimdAnalysis(), "scalar-to-simd128 -> constant folded");
+}
+
+// Ditto simd->scalar.
+
+for ( let [ty128, suffix] of [['i8x16', '_s'], ['i8x16', '_u'], ['i16x8','_s'], ['i16x8','_u'], ['i32x4', '']] ) {
+ for ( let op of ['any_true', 'all_true', 'bitmask', `extract_lane${suffix} 0`] ) {
+ let operation = op == 'any_true' ? 'v128.any_true' : `${ty128}.${op}`;
+ wasmCompile(`(module (func (result i32) (${operation} (v128.const i64x2 0 0))))`);
+ assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded");
+ }
+}
+
+for ( let ty128 of ['f32x4','f64x2','i64x2'] ) {
+ wasmCompile(`(module (func (result ${ty128.match(/(...)x.*/)[1]}) (${ty128}.extract_lane 0 (v128.const i64x2 0 0))))`);
+ assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded");
+}
+
+// Optimizing all_true, any_true, and bitmask that are used for control flow, also when negated.
+
+for ( let [ty128,size] of [['i8x16',1], ['i16x8',2], ['i32x4',4]] ) {
+ let all = iota(16/size).map(n => n*n);
+ let some = iota(16/size).map(n => n*(n % 3));
+ let none = iota(16/size).map(n => 0);
+ let inputs = [all, some, none];
+ let ops = { all_true: allTrue, any_true: anyTrue, bitmask };
+
+ for ( let op of ['any_true', 'all_true', 'bitmask'] ) {
+ let folded = op != 'bitmask' || (size == 2 && !isArm64);
+ let operation = op == 'any_true' ? 'v128.any_true' : `${ty128}.${op}`;
+ let positive =
+ wasmCompile(
+ `(module
+ (memory (export "mem") 1 1)
+ (func $f (param v128) (result i32)
+ (if (result i32) (${operation} (local.get 0))
+ (i32.const 42)
+ (i32.const 37)))
+ (func (export "run") (result i32)
+ (call $f (v128.load (i32.const 16)))))`);
+ assertEq(wasmSimdAnalysis(), folded ? "simd128-to-scalar-and-branch -> folded" : "none");
+
+ let negative =
+ wasmCompile(
+ `(module
+ (memory (export "mem") 1 1)
+ (func $f (param v128) (result i32)
+ (if (result i32) (i32.eqz (${operation} (local.get 0)))
+ (i32.const 42)
+ (i32.const 37)))
+ (func (export "run") (result i32)
+ (call $f (v128.load (i32.const 16)))))`);
+ assertEq(wasmSimdAnalysis(), folded ? "simd128-to-scalar-and-branch -> folded" : "none");
+
+ for ( let inp of inputs ) {
+ let mem = new this[`Int${8*size}Array`](positive.exports.mem.buffer);
+ set(mem, 16/size, inp);
+ assertEq(positive.exports.run(), ops[op](inp) ? 42 : 37);
+
+ mem = new this[`Int${8*size}Array`](negative.exports.mem.buffer);
+ set(mem, 16/size, inp);
+ assertEq(negative.exports.run(), ops[op](inp) ? 37 : 42);
+ }
+ }
+}
+
+// Constant folding
+
+{
+ // Swizzle-with-constant rewritten as shuffle, and then further optimized
+ // into a dword permute. Correctness is tested in ad-hack.js.
+ wasmCompile(`
+(module (func (param v128) (result v128)
+ (i8x16.swizzle (local.get 0) (v128.const i8x16 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11))))
+`);
+ assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4");
+}
+
+// Bitselect with constant mask folded into shuffle operation
+
+if (!isArm64) {
+ wasmCompile(`
+ (module (func (param v128) (param v128) (result v128)
+ (v128.bitselect (local.get 0) (local.get 1) (v128.const i8x16 0 -1 -1 0 0 0 0 0 -1 -1 -1 -1 -1 -1 0 0))))
+ `);
+ assertEq(wasmSimdAnalysis(), "shuffle -> blend 8x16");
+}
+
+// Library
+
+function wasmCompile(text) {
+ return new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(text)))
+}
+
+function get(arr, loc, len) {
+ let res = [];
+ for ( let i=0; i < len; i++ ) {
+ res.push(arr[loc+i]);
+ }
+ return res;
+}
+
+function set(arr, loc, vals) {
+ for ( let i=0; i < vals.length; i++ ) {
+ arr[loc+i] = vals[i];
+ }
+}
+
+function i32ToI8(xs) {
+ return xs.map(x => [x*4, x*4+1, x*4+2, x*4+3]).flat();
+}
+
+function i64ToI2(xs) {
+ return xs.map(x => [x*8, x*8+1, x*8+2, x*8+3,
+ x*8+4, x*8+5, x*8+6, x*8+7]).flat();
+}
+
+function i16ToI8(xs) {
+ return xs.map(x => [x*2, x*2+1]).flat();
+}
+
+function allTrue(xs) {
+ return xs.every(v => v != 0);
+}
+
+function anyTrue(xs) {
+ return xs.some(v => v != 0);
+}
+
+function bitmask(xs) {
+ let shift = 128/xs.length - 1;
+ let res = 0;
+ let k = 0;
+ xs.forEach(v => { res |= ((v >>> shift) & 1) << k; k++; });
+ return res;
+}
diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js b/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js
new file mode 100644
index 0000000000..7dfdf3afad
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js
@@ -0,0 +1,13 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Fuzz test case. The initial unreachable will result in the subsequent
+// i8x16.shuffle popping null pointers off the value stack. Due to a missing
+// isDeadCode() check in WasmIonCompile.cpp the compiler would dereference those
+// null pointers.
+new WebAssembly.Module(wasmTextToBinary(`
+(module
+ (func (result v128)
+ (unreachable)
+ (i8x16.shuffle 0 0 23 0 4 4 4 4 4 16 1 0 4 4 4 4)))
+`))
+
diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js b/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js
new file mode 100644
index 0000000000..ebb2f72864
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js
@@ -0,0 +1,22 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Shuffle pattern incorrectly recognized as a rotate due to a missing guard in
+// the optimizer.
+
+let ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1)
+ (func (export "test")
+ (v128.store (i32.const 0)
+ (i8x16.shuffle 0 1 2 3 4 5 6 7 8 0 1 2 3 4 5 6
+ (v128.load (i32.const 16))
+ (v128.const i32x4 0 0 0 0)))))
+`);
+
+let mem = new Int8Array(ins.exports.mem.buffer);
+let input = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25];
+let output = [10, 11, 12, 13, 14, 15, 16, 17, 18, 10, 11, 12, 13, 14, 15, 16];
+mem.set(input, 16);
+ins.exports.test();
+let result = Array.from(mem.subarray(0, 16));
+assertDeepEq(output, result);
diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js b/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js
new file mode 100644
index 0000000000..86a2ff0b3c
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js
@@ -0,0 +1,24 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// This checks that we emit a REX prefix that includes the SIB index when
+// appropriate.
+//
+// This test case is a little tricky. On Win64, the arg registers are rcx, rdx,
+// r8, r9; so we want to use local 2 or 3 as the index. But on other x64
+// platforms, the arg registers are rdi, rsi, rdx, rcx, r8, r9; so we want to
+// use local 4 or 5 as the index. This test uses both, and then looks for a hit
+// on the REX byte which must be 0x43. Before the bugfix, since the index
+// register was ignored, the byte would always be 0x41, as it will continue to
+// be for the access that does not use an extended register.
+//
+// The test is brittle: the register allocator can easily make a mess of it.
+// But for now it works.
+
+codegenTestX64_adhoc(
+`(module
+ (memory 1)
+ (func $f (export "f") (param i32) (param i32) (param i32) (param i32) (param i32) (result v128)
+ (i32x4.add (v128.load8x8_s (local.get 4)) (v128.load8x8_s (local.get 2)))))`,
+ 'f',
+ `66 43 0f 38 20 .. .. pmovsxbwq \\(%r15,%r(8|9|10|11|12|13),1\\), %xmm[0-9]+`,
+ {no_prefix: true, no_suffix: true, log:true});
diff --git a/js/src/jit-test/tests/wasm/simd/js-api.js b/js/src/jit-test/tests/wasm/simd/js-api.js
new file mode 100644
index 0000000000..ea7f3fb147
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/js-api.js
@@ -0,0 +1,130 @@
+// |jit-test| test-also=--no-threads; skip-if: !wasmSimdEnabled()
+
+// SIMD JS API
+//
+// As of 31 March 2020 the SIMD spec is very light on information about the JS
+// API, and what it has is ridden with misspellings, grammatical errors, and
+// apparent redundancies. The rules below represent my best effort at
+// understanding the intent of the spec. As far as I can tell, the rules for
+// v128 are intended to match the rules for i64 in the Wasm MVP.
+
+// Hopefully, these are enough to test that various JIT stubs are generated and
+// used if we run the tests in a loop.
+
+setJitCompilerOption("baseline.warmup.trigger", 2);
+setJitCompilerOption("ion.warmup.trigger", 4);
+
+// RULE: v128 cannot cross the JS/wasm boundary as a function parameter.
+//
+// A wasm function that:
+// - takes or returns v128
+// - was imported into wasm
+// - is ultimately a JS function
+// should always throw TypeError when called from wasm.
+//
+// Note, JIT exit stubs should be generated here because settings above should
+// cause the JIT to tier up.
+
+var ins = wasmEvalText(`
+ (module
+ (import "m" "v128_param" (func $f (param v128)))
+ (import "m" "v128_return" (func $g (result v128)))
+ (func (export "v128_param")
+ (call $f (v128.const i32x4 0 0 0 0)))
+ (func (export "v128_result")
+ (drop (call $g))))`,
+ {m:{v128_param: (x) => 0,
+ v128_return: () => 0}});
+
+function call_v128_param() { ins.exports.v128_param(); }
+function call_v128_result() { ins.exports.v128_result(); }
+
+for ( let i = 0 ; i < 100; i++ ) {
+ assertErrorMessage(call_v128_param,
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+ assertErrorMessage(call_v128_result,
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+}
+
+// RULE: v128 cannot cross the JS/wasm boundary as a function parameter.
+//
+// A wasm function that:
+// - takes or returns v128
+// - is exported from wasm
+// - is ultimately a true wasm function
+// should always throw TypeError when called from JS.
+//
+// Note, JIT entry stubs should be generated here because settings above should
+// cause the JIT to tier up.
+
+var ins2 = wasmEvalText(`
+ (module
+ (func (export "v128_param") (param v128) (result i32)
+ (i32.const 0))
+ (func (export "v128_result") (result v128)
+ (v128.const i32x4 0 0 0 0)))`);
+
+function call_v128_param2() { ins2.exports.v128_param(); }
+function call_v128_result2() { ins2.exports.v128_result(); }
+
+for ( let i = 0 ; i < 100; i++ ) {
+ assertErrorMessage(call_v128_param2,
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+ assertErrorMessage(call_v128_result2,
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+}
+
+// RULE: The rules about v128 passing into or out of a function apply even when
+// an imported JS function is re-exported and is then called.
+
+var newfn = (x) => x;
+var ins = wasmEvalText(`
+ (module
+ (import "m" "fn" (func $f (param v128) (result v128)))
+ (export "newfn" (func $f)))`,
+ {m:{fn: newfn}});
+assertErrorMessage(() => ins.exports.newfn(3),
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+
+// RULE: WebAssembly.Global of type v128 is constructable from JS with a default
+// value.
+
+
+// RULE: WebAssembly.Global constructor for type v128 is not constructable with
+// or without a default value.
+
+assertErrorMessage(() => new WebAssembly.Global({value: "v128"}, 37),
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+assertErrorMessage(() => new WebAssembly.Global({value: "v128"}),
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+assertErrorMessage(() => new WebAssembly.Global({value: "v128", mutable: true}),
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+
+// RULE: WebAssembly.Global of type v128 have getters and setters that throw
+// TypeError when called from JS.
+
+let {gi, gm} = wasmEvalText(`
+ (module
+ (global (export "gi") v128 v128.const i64x2 0 0)
+ (global (export "gm") (mut v128) v128.const i64x2 0 0)
+ )`).exports;
+
+assertErrorMessage(() => gi.value,
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+assertErrorMessage(() => gi.valueOf(),
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+assertErrorMessage(() => gm.value = 0,
+ TypeError,
+ /cannot pass.*v128.*to or from JS/);
+
+
diff --git a/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js
new file mode 100644
index 0000000000..0ae75f38fb
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js
@@ -0,0 +1,34 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that there are no extraneous moves for variable SIMD negate, abs, and
+// not instructions. See README-codegen.md for general information about this
+// type of test case.
+
+// Integer negates don't have to reuse the input for the output, and prefer for
+// the registers to be different. So use parameter 1 and ignore parameter 0.
+
+codegenTestX64_IGNOREDxv128_v128(
+ [['i8x16.neg', `
+66 0f ef c0 pxor %xmm0, %xmm0
+66 0f f8 c1 psubb %xmm1, %xmm0`],
+ ['i16x8.neg', `
+66 0f ef c0 pxor %xmm0, %xmm0
+66 0f f9 c1 psubw %xmm1, %xmm0`],
+ ['i32x4.neg', `
+66 0f ef c0 pxor %xmm0, %xmm0
+66 0f fa c1 psubd %xmm1, %xmm0`],
+ ['i64x2.neg', `
+66 0f ef c0 pxor %xmm0, %xmm0
+66 0f fb c1 psubq %xmm1, %xmm0`]] );
+
+// Floating point negate and absolute value, and bitwise not, prefer for the
+// registers to be the same and guarantee that no move is inserted if so.
+
+codegenTestX64_v128_v128(
+ [['f32x4.neg', `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`],
+ ['f64x2.neg', `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`],
+ ['f32x4.abs', `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`],
+ ['f64x2.abs', `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`],
+ ['v128.not', `
+66 45 0f 75 ff pcmpeqw %xmm15, %xmm15
+66 41 0f ef c7 pxor %xmm15, %xmm0`]] );
diff --git a/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js
new file mode 100644
index 0000000000..53ab47fdb8
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js
@@ -0,0 +1,38 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Tests for SIMD add pairwise instructions.
+
+if (!isAvxPresent()) {
+
+ codegenTestX64_IGNOREDxv128_v128(
+ [['i16x8.extadd_pairwise_i8x16_s', `
+66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0
+66 0f 38 04 c1 pmaddubsw %xmm1, %xmm0`],
+ ['i16x8.extadd_pairwise_i8x16_u', `
+66 0f 6f c1 movdqa %xmm1, %xmm0
+66 0f 38 04 05 ${RIPRADDR} pmaddubswx ${RIPR}, %xmm0`],
+ ['i32x4.extadd_pairwise_i16x8_s', `
+66 0f 6f c1 movdqa %xmm1, %xmm0
+66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`],
+ ['i32x4.extadd_pairwise_i16x8_u', `
+66 0f 6f c1 movdqa %xmm1, %xmm0
+66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0
+66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0
+66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`]]);
+
+} else {
+
+ codegenTestX64_IGNOREDxv128_v128(
+ [['i16x8.extadd_pairwise_i8x16_s', `
+66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0
+66 0f 38 04 c1 pmaddubsw %xmm1, %xmm0`],
+ ['i16x8.extadd_pairwise_i8x16_u', `
+c4 e2 71 04 05 ${RIPRADDR} vpmaddubswx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.extadd_pairwise_i16x8_s', `
+c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
+ ['i32x4.extadd_pairwise_i16x8_u', `
+c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0
+66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0
+66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`]]);
+
+}
diff --git a/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js
new file mode 100644
index 0000000000..94abfd5c54
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js
@@ -0,0 +1,154 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || wasmCompileMode() != "ion"
+
+// Testing _mm_maddubs_epi16 / vpmaddubsw behavoir for all platforms.
+//
+// Bug 1762413 adds specialization for emscripten's pattern to directly
+// emit PMADDUBSW machine code.
+
+const isX64 = getBuildConfiguration().x64 && !getBuildConfiguration().simulator;
+
+// Simple test.
+const simple = wasmTextToBinary(`(module
+ (memory (export "memory") 1 1)
+ (func $_mm_maddubs_epi16 (export "t") (param v128 v128) (result v128)
+ local.get 1
+ i32.const 8
+ i16x8.shl
+ i32.const 8
+ i16x8.shr_s
+ local.get 0
+ v128.const i32x4 0x00ff00ff 0x00ff00ff 0x00ff00ff 0x00ff00ff
+ v128.and
+ i16x8.mul
+ local.get 1
+ i32.const 8
+ i16x8.shr_s
+ local.get 0
+ i32.const 8
+ i16x8.shr_u
+ i16x8.mul
+ i16x8.add_sat_s)
+ (func (export "run")
+ i32.const 0
+ v128.const i8x16 0 2 1 2 1 2 -1 1 255 255 255 255 0 0 255 255
+ v128.const i8x16 1 0 3 4 -3 -4 -128 127 127 127 -128 -128 0 0 -128 127
+ call $_mm_maddubs_epi16
+ v128.store
+ )
+)`);
+var ins = new WebAssembly.Instance(new WebAssembly.Module(simple));
+ins.exports.run();
+var mem16 = new Int16Array(ins.exports.memory.buffer, 0, 8);
+assertSame(mem16, [0, 11, -11, -32513, 32767, -32768, 0, -255]);
+
+if (hasDisassembler() && isX64) {
+ assertEq(wasmDis(ins.exports.t, {tier:"ion", asString:true}).includes('pmaddubsw'), true);
+}
+
+if (hasDisassembler() && isX64) {
+ // Two pmaddubsw has common operand, and code was optimized.
+ const realWorldOutput = wasmTextToBinary(`(module
+ (memory 1 1)
+ (func (export "test")
+ (local i32 i32 i32 i32 v128 v128 v128 v128 v128 v128)
+ local.get 0
+ local.get 1
+ i32.add
+ local.set 2
+ local.get 0
+ i32.const 16
+ i32.add
+ local.set 0
+ local.get 3
+ local.set 1
+ loop
+ local.get 5
+ local.get 0
+ v128.load
+ local.tee 5
+ i32.const 7
+ i8x16.shr_s
+ local.tee 8
+ local.get 1
+ v128.load offset=240
+ local.get 5
+ v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000
+ i8x16.eq
+ local.tee 7
+ v128.andnot
+ i8x16.add
+ local.get 8
+ v128.xor
+ local.tee 4
+ i32.const 8
+ i16x8.shl
+ i32.const 8
+ i16x8.shr_s
+ local.get 5
+ i8x16.abs
+ local.tee 5
+ v128.const i32x4 0x00ff00ff 0x00ff00ff 0x00ff00ff 0x00ff00ff
+ v128.and
+ local.tee 9
+ i16x8.mul
+ local.get 4
+ i32.const 8
+ i16x8.shr_s
+ local.get 5
+ i32.const 8
+ i16x8.shr_u
+ local.tee 4
+ i16x8.mul
+ i16x8.add_sat_s
+ i16x8.add_sat_s
+ local.set 5
+
+ local.get 6
+ local.get 8
+ local.get 1
+ v128.load offset=224
+ local.get 7
+ v128.andnot
+ i8x16.add
+ local.get 8
+ v128.xor
+ local.tee 6
+ i32.const 8
+ i16x8.shl
+ i32.const 8
+ i16x8.shr_s
+ local.get 9
+ i16x8.mul
+ local.get 6
+ i32.const 8
+ i16x8.shr_s
+ local.get 4
+ i16x8.mul
+ i16x8.add_sat_s
+ i16x8.add_sat_s
+ local.set 6
+
+ local.get 1
+ i32.const 128
+ i32.add
+ local.set 1
+ local.get 0
+ i32.const 16
+ i32.add
+ local.tee 0
+ local.get 2
+ i32.ne
+ br_if 0
+ end
+))`);
+
+ var ins = new WebAssembly.Instance(new WebAssembly.Module(realWorldOutput));
+ const output = wasmDis(ins.exports.test, {tier:"ion", asString:true}).replace(/^[0-9a-f]{8} (?:[0-9a-f]{2} )+\n?\s+/gmi, "");
+ // Find two pmaddubsw+paddsw.
+ const re = /\bv?pmaddubsw[^\n]+\nv?paddsw /g;
+ assertEq(re.exec(output) != null, true);
+ assertEq(re.exec(output) != null, true);
+ assertEq(re.exec(output) == null, true);
+ // No leftover PMULL, PSLLW, or PSRAW.
+ assertEq(/pmullw|psllw|psraw/.test(output), false);
+}
diff --git a/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js
new file mode 100644
index 0000000000..bd59f30632
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js
@@ -0,0 +1,46 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test encoding of the all_true, and any_true operations.
+
+codegenTestX64_v128_i32(
+ [['v128.any_true', `
+66 0f 38 17 c0 ptest %xmm0, %xmm0
+0f 95 c0 setnz %al
+0f b6 c0 movzx %al, %eax`],
+ ['i8x16.all_true', `
+66 45 0f ef ff pxor %xmm15, %xmm15
+66 44 0f 74 f8 pcmpeqb %xmm0, %xmm15
+66 45 0f 38 17 ff ptest %xmm15, %xmm15
+0f 94 c0 setz %al
+0f b6 c0 movzx %al, %eax`],
+ ['i16x8.all_true', `
+66 45 0f ef ff pxor %xmm15, %xmm15
+66 44 0f 75 f8 pcmpeqw %xmm0, %xmm15
+66 45 0f 38 17 ff ptest %xmm15, %xmm15
+0f 94 c0 setz %al
+0f b6 c0 movzx %al, %eax`],
+ ['i32x4.all_true', `
+66 45 0f ef ff pxor %xmm15, %xmm15
+66 44 0f 76 f8 pcmpeqd %xmm0, %xmm15
+66 45 0f 38 17 ff ptest %xmm15, %xmm15
+0f 94 c0 setz %al
+0f b6 c0 movzx %al, %eax`],
+ ['i64x2.all_true', `
+66 45 0f ef ff pxor %xmm15, %xmm15
+66 44 0f 38 29 f8 pcmpeqq %xmm0, %xmm15
+66 45 0f 38 17 ff ptest %xmm15, %xmm15
+0f 94 c0 setz %al
+0f b6 c0 movzx %al, %eax`]], {}
+)
+
+// Utils.
+function codegenTestX64_v128_i32(inputs, options = {}) {
+ for ( let [op, expected] of inputs ) {
+ codegenTestX64_adhoc(wrap(options, `
+ (func (export "f") (param v128) (result i32)
+ (${op} (local.get 0)))`),
+ 'f',
+ expected,
+ options);
+ }
+ }
diff --git a/js/src/jit-test/tests/wasm/simd/select.js b/js/src/jit-test/tests/wasm/simd/select.js
new file mode 100644
index 0000000000..b3535d3039
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/select.js
@@ -0,0 +1,33 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+wasmAssert(`
+(module
+ (func $f (param i32) (result v128)
+ (select ;; no type
+ (v128.const i32x4 1 2 3 4)
+ (v128.const i32x4 4 3 2 1)
+ (local.get 0)
+ )
+ )
+ (export "" (func 0))
+)`, [
+ { type: 'v128', func: '$f', args: ['i32.const 0'], expected: 'i32x4 4 3 2 1' },
+ { type: 'v128', func: '$f', args: ['i32.const 1'], expected: 'i32x4 1 2 3 4' },
+ { type: 'v128', func: '$f', args: ['i32.const -1'], expected: 'i32x4 1 2 3 4' },
+], {});
+
+wasmAssert(`
+(module
+ (func $f (param i32) (result v128)
+ (select (result v128)
+ (v128.const i32x4 1 2 3 4)
+ (v128.const i32x4 4 3 2 1)
+ (local.get 0)
+ )
+ )
+ (export "" (func 0))
+)`, [
+ { type: 'v128', func: '$f', args: ['i32.const 0'], expected: 'i32x4 4 3 2 1' },
+ { type: 'v128', func: '$f', args: ['i32.const 1'], expected: 'i32x4 1 2 3 4' },
+ { type: 'v128', func: '$f', args: ['i32.const -1'], expected: 'i32x4 1 2 3 4' },
+], {});
diff --git a/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js
new file mode 100644
index 0000000000..9c9f4871d2
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js
@@ -0,0 +1,26 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
+
+// Test that there are no extraneous moves for a constant integer SIMD shift
+// that can reuse its input for its output. See README-codegen.md for general
+// information about this type of test case.
+//
+// There are test cases here for all codegen cases that include a potential move
+// to set up the operation, but not for all shift operations in general.
+
+codegenTestX64_v128xLITERAL_v128(
+ [['i8x16.shl', '(i32.const 2)', `
+66 0f fc c0 paddb %xmm0, %xmm0
+66 0f fc c0 paddb %xmm0, %xmm0`],
+ ['i16x8.shl', '(i32.const 2)', `66 0f 71 f0 02 psllw \\$0x02, %xmm0`],
+ ['i32x4.shl', '(i32.const 2)', `66 0f 72 f0 02 pslld \\$0x02, %xmm0`],
+ ['i64x2.shl', '(i32.const 2)', `66 0f 73 f0 02 psllq \\$0x02, %xmm0`],
+ ['i8x16.shr_u', '(i32.const 2)', `
+66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0
+66 0f 71 d0 02 psrlw \\$0x02, %xmm0`],
+ ['i16x8.shr_s', '(i32.const 2)', `66 0f 71 e0 02 psraw \\$0x02, %xmm0`],
+ ['i16x8.shr_u', '(i32.const 2)', `66 0f 71 d0 02 psrlw \\$0x02, %xmm0`],
+ ['i32x4.shr_s', '(i32.const 2)', `66 0f 72 e0 02 psrad \\$0x02, %xmm0`],
+ ['i32x4.shr_u', '(i32.const 2)', `66 0f 72 d0 02 psrld \\$0x02, %xmm0`],
+ ['i64x2.shr_u', '(i32.const 2)', `66 0f 73 d0 02 psrlq \\$0x02, %xmm0`]] );
+
+
diff --git a/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js
new file mode 100644
index 0000000000..b9d0cc0e82
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js
@@ -0,0 +1,88 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or fixups for SIMD shuffle
+// operations. See README-codegen.md for general information about this type of
+// test case.
+
+codegenTestX64_v128xv128_v128([
+ // Identity op on first argument should generate no code
+ ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15',
+ ''],
+
+ // Identity op on second argument should generate a move
+ ['i8x16.shuffle 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31',
+ `66 0f 6f c1 movdqa %xmm1, %xmm0`],
+
+ // Broadcast a byte from first argument
+ ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5',
+ `
+66 0f 60 c0 punpcklbw %xmm0, %xmm0
+f3 0f 70 c0 55 pshufhw \\$0x55, %xmm0, %xmm0
+66 0f 70 c0 aa pshufd \\$0xAA, %xmm0, %xmm0`],
+
+ // Broadcast a word from first argument
+ ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5',
+ `
+f2 0f 70 c0 aa pshuflw \\$0xAA, %xmm0, %xmm0
+66 0f 70 c0 00 pshufd \\$0x00, %xmm0, %xmm0`],
+
+ // Permute bytes
+ ['i8x16.shuffle 2 1 4 3 6 5 8 7 10 9 12 11 14 13 0 15',
+`
+66 0f 38 00 05 ${RIPRADDR} pshufbx ${RIPR}, %xmm0`],
+
+ // Permute words
+ ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13',
+`
+f2 0f 70 c0 b1 pshuflw \\$0xB1, %xmm0, %xmm0
+f3 0f 70 c0 b1 pshufhw \\$0xB1, %xmm0, %xmm0`],
+
+ // Permute doublewords
+ ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11',
+ `66 0f 70 c0 b1 pshufd \\$0xB1, %xmm0, %xmm0`],
+
+ // Rotate right
+ ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12',
+ `66 0f 3a 0f c0 0d palignr \\$0x0D, %xmm0, %xmm0`],
+
+ // General shuffle + blend. The initial movdqa to scratch is unavoidable
+ // unless we can convince the compiler that it's OK to destroy xmm1.
+ ['i8x16.shuffle 15 29 0 1 2 1 2 0 3 4 7 8 16 8 17 9',
+`
+66 44 0f 6f f9 movdqa %xmm1, %xmm15
+66 44 0f 38 00 3d ${RIPRADDR} pshufbx ${RIPR}, %xmm15
+66 0f 38 00 05 ${RIPRADDR} pshufbx ${RIPR}, %xmm0
+66 41 0f eb c7 por %xmm15, %xmm0`]]);
+
+codegenTestX64_v128xLITERAL_v128(
+ [// Shift left bytes, shifting in zeroes
+ //
+ // Remember the low-order bytes are at the "right" end
+ //
+ // The pxor is a code generation bug: the operand is unused, and no
+ // code should need to be generated for it, and no register should
+ // be allocated to it. The lowering does not use that operand, but
+ // code generation still touches it.
+ ['i8x16.shuffle 16 16 16 0 1 2 3 4 5 6 7 8 9 10 11 12',
+ '(v128.const i32x4 0 0 0 0)',
+`
+66 0f 73 f8 03 pslldq \\$0x03, %xmm0`],
+
+ // Shift right bytes, shifting in zeroes. See above.
+ ['i8x16.shuffle 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18',
+ '(v128.const i32x4 0 0 0 0)',
+`
+66 0f 73 d8 03 psrldq \\$0x03, %xmm0`]]);
+
+// SSE4.1 PBLENDVB instruction is using XMM0, checking if blend
+// operation generated as expected.
+codegenTestX64_adhoc(
+ `(func (export "f") (param v128 v128 v128 v128) (result v128)
+ (i8x16.shuffle 0 17 2 3 4 5 6 7 24 25 26 11 12 13 30 15
+ (local.get 2)(local.get 3)))`,
+ 'f',
+`
+66 0f 6f ca movdqa %xmm2, %xmm1
+66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0
+66 0f 38 10 cb pblendvb %xmm3, %xmm1
+66 0f 6f c1 movdqa %xmm1, %xmm0`);
diff --git a/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js b/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js
new file mode 100644
index 0000000000..00daceb438
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js
@@ -0,0 +1,38 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+// Cloned from ad-hack.js but kept separate because it may have to be disabled
+// on some devices until bugs are fixed.
+
+// Bug 1666747 - partially OOB stores are not handled correctly on ARM and ARM64.
+// The simulators don't implement the correct semantics anyhow, so when the bug
+// is fixed in the code generator they must remain excluded here.
+var conf = getBuildConfiguration();
+if (conf.arm64 || conf["arm64-simulator"] || conf.arm || conf["arm-simulator"])
+ quit(0);
+
+function get(arr, loc, len) {
+ let res = [];
+ for ( let i=0; i < len; i++ ) {
+ res.push(arr[loc+i]);
+ }
+ return res;
+}
+
+for ( let offset of iota(16) ) {
+ var ins = wasmEvalText(`
+ (module
+ (memory (export "mem") 1 1)
+ (func (export "f") (param $loc i32)
+ (v128.store offset=${offset} (local.get $loc) (v128.const i32x4 ${1+offset} 2 3 ${4+offset*2}))))`);
+
+ // OOB write should trap
+ assertErrorMessage(() => ins.exports.f(65536-15),
+ WebAssembly.RuntimeError,
+ /index out of bounds/)
+
+ // Ensure that OOB writes don't write anything.
+ let start = 65536 - 15 + offset;
+ let legalBytes = 65536 - start;
+ var mem8 = new Uint8Array(ins.exports.mem.buffer);
+ assertSame(get(mem8, start, legalBytes), iota(legalBytes).map((_) => 0));
+}
diff --git a/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js
new file mode 100644
index 0000000000..ce1d7adb12
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js
@@ -0,0 +1,29 @@
+// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js
+
+// Test that there are no extraneous moves or other instructions for splat and
+// other splat-like operations that can reuse its input for its output and/or
+// has a specializable code path. See README-codegen.md for general information
+// about this type of test case.
+
+codegenTestX64_PTYPE_v128(
+ [['f32x4.splat', 'f32', `0f c6 c0 00 shufps \\$0x00, %xmm0, %xmm0`],
+ ['f64x2.splat', 'f64', `f2 0f 12 c0 movddup %xmm0, %xmm0`]] , {log:true});
+
+// Skip these on Win64 because the ABI differs and there's a different parameter
+// register, this changes not just the name slightly but the binary encoding in
+// larger ways.
+
+if (!getBuildConfiguration().windows) {
+ codegenTestX64_PTYPE_v128(
+ [['v128.load32_splat', 'i32', `
+f3 41 0f 10 04 3f movssl \\(%r15,%rdi,1\\), %xmm0
+0f c6 c0 00 shufps \\$0x00, %xmm0, %xmm0`],
+ ['v128.load64_splat', 'i32', `f2 41 0f 12 04 3f movddupq \\(%r15,%rdi,1\\), %xmm0`],
+ ['v128.load8x8_s', 'i32', `66 41 0f 38 20 04 3f pmovsxbwq \\(%r15,%rdi,1\\), %xmm0`],
+ ['v128.load8x8_u', 'i32', `66 41 0f 38 30 04 3f pmovzxbwq \\(%r15,%rdi,1\\), %xmm0`],
+ ['v128.load16x4_s', 'i32', `66 41 0f 38 23 04 3f pmovsxwdq \\(%r15,%rdi,1\\), %xmm0`],
+ ['v128.load16x4_u', 'i32', `66 41 0f 38 33 04 3f pmovzxwdq \\(%r15,%rdi,1\\), %xmm0`],
+ ['v128.load32x2_s', 'i32', `66 41 0f 38 25 04 3f pmovsxdqq \\(%r15,%rdi,1\\), %xmm0`],
+ ['v128.load32x2_u', 'i32', `66 41 0f 38 35 04 3f pmovzxdqq \\(%r15,%rdi,1\\), %xmm0`]],
+ {memory:1});
+}
diff --git a/js/src/jit-test/tests/wasm/simd/validation.js b/js/src/jit-test/tests/wasm/simd/validation.js
new file mode 100644
index 0000000000..46b8df620f
--- /dev/null
+++ b/js/src/jit-test/tests/wasm/simd/validation.js
@@ -0,0 +1,368 @@
+// |jit-test| skip-if: !wasmSimdEnabled()
+
+function testValid(code) {
+ assertEq(WebAssembly.validate(wasmTextToBinary(code)), true);
+}
+
+function testInvalid(code) {
+ assertEq(WebAssembly.validate(wasmTextToBinary(code)), false);
+}
+
+// v128 -> v128
+
+for (let op of [
+ 'i8x16.neg',
+ 'i8x16.abs',
+ 'i16x8.neg',
+ 'i16x8.abs',
+ 'i16x8.extend_low_i8x16_s',
+ 'i16x8.extend_high_i8x16_s',
+ 'i16x8.extend_low_i8x16_u',
+ 'i16x8.extend_high_i8x16_u',
+ 'i32x4.neg',
+ 'i32x4.abs',
+ 'i32x4.extend_low_i16x8_s',
+ 'i32x4.extend_high_i16x8_s',
+ 'i32x4.extend_low_i16x8_u',
+ 'i32x4.extend_high_i16x8_u',
+ 'i32x4.trunc_sat_f32x4_s',
+ 'i32x4.trunc_sat_f32x4_u',
+ 'i64x2.neg',
+ 'f32x4.abs',
+ 'f32x4.neg',
+ 'f32x4.sqrt',
+ 'f32x4.convert_i32x4_s',
+ 'f32x4.convert_i32x4_s',
+ 'f64x2.abs',
+ 'f64x2.neg',
+ 'f64x2.sqrt',
+ 'v128.not'])
+{
+ testValid(`(module
+ (func (param v128) (result v128)
+ (${op} (local.get 0))))`);
+}
+
+for (let [prefix, result, suffix] of [['i8x16', 'i32', '_s'],
+ ['i8x16', 'i32', '_u'],
+ ['i16x8', 'i32', '_s'],
+ ['i16x8', 'i32', '_u'],
+ ['i32x4', 'i32', ''],
+ ['i64x2', 'i64', ''],
+ ['f32x4', 'f32', ''],
+ ['f64x2', 'f64', '']])
+{
+ testValid(`(module
+ (func (param v128) (result ${result})
+ (${prefix}.extract_lane${suffix} 1 (local.get 0))))`);
+}
+
+// The wat parser accepts small out-of-range lane indices, but they must be
+// caught in validation.
+
+testInvalid(
+ `(module
+ (func (param v128) (result i32)
+ (i8x16.extract_lane_u 16 (local.get 0))))`);
+
+// (v128, v128) -> v128
+
+for (let op of [
+ 'i8x16.eq',
+ 'i8x16.ne',
+ 'i8x16.lt_s',
+ 'i8x16.lt_u',
+ 'i8x16.gt_s',
+ 'i8x16.gt_u',
+ 'i8x16.le_s',
+ 'i8x16.le_u',
+ 'i8x16.ge_s',
+ 'i8x16.ge_u',
+ 'i16x8.eq',
+ 'i16x8.ne',
+ 'i16x8.lt_s',
+ 'i16x8.lt_u',
+ 'i16x8.gt_s',
+ 'i16x8.gt_u',
+ 'i16x8.le_s',
+ 'i16x8.le_u',
+ 'i16x8.ge_s',
+ 'i16x8.ge_u',
+ 'i32x4.eq',
+ 'i32x4.ne',
+ 'i32x4.lt_s',
+ 'i32x4.lt_u',
+ 'i32x4.gt_s',
+ 'i32x4.gt_u',
+ 'i32x4.le_s',
+ 'i32x4.le_u',
+ 'i32x4.ge_s',
+ 'i32x4.ge_u',
+ 'f32x4.eq',
+ 'f32x4.ne',
+ 'f32x4.lt',
+ 'f32x4.gt',
+ 'f32x4.le',
+ 'f32x4.ge',
+ 'f64x2.eq',
+ 'f64x2.ne',
+ 'f64x2.lt',
+ 'f64x2.gt',
+ 'f64x2.le',
+ 'f64x2.ge',
+ 'v128.and',
+ 'v128.or',
+ 'v128.xor',
+ 'v128.andnot',
+ 'i8x16.avgr_u',
+ 'i16x8.avgr_u',
+ 'i8x16.add',
+ 'i8x16.add_sat_s',
+ 'i8x16.add_sat_u',
+ 'i8x16.sub',
+ 'i8x16.sub_sat_s',
+ 'i8x16.sub_sat_u',
+ 'i8x16.min_s',
+ 'i8x16.max_s',
+ 'i8x16.min_u',
+ 'i8x16.max_u',
+ 'i16x8.add',
+ 'i16x8.add_sat_s',
+ 'i16x8.add_sat_u',
+ 'i16x8.sub',
+ 'i16x8.sub_sat_s',
+ 'i16x8.sub_sat_u',
+ 'i16x8.mul',
+ 'i16x8.min_s',
+ 'i16x8.max_s',
+ 'i16x8.min_u',
+ 'i16x8.max_u',
+ 'i32x4.add',
+ 'i32x4.sub',
+ 'i32x4.mul',
+ 'i32x4.min_s',
+ 'i32x4.max_s',
+ 'i32x4.min_u',
+ 'i32x4.max_u',
+ 'i64x2.add',
+ 'i64x2.sub',
+ 'i64x2.mul',
+ 'f32x4.add',
+ 'f32x4.sub',
+ 'f32x4.mul',
+ 'f32x4.div',
+ 'f32x4.min',
+ 'f32x4.max',
+ 'f64x2.add',
+ 'f64x2.sub',
+ 'f64x2.mul',
+ 'f64x2.div',
+ 'f64x2.min',
+ 'f64x2.max',
+ 'i8x16.narrow_i16x8_s',
+ 'i8x16.narrow_i16x8_u',
+ 'i16x8.narrow_i32x4_s',
+ 'i16x8.narrow_i32x4_u',
+ 'i8x16.swizzle'])
+{
+ testValid(`(module
+ (func (param v128) (param v128) (result v128)
+ (${op} (local.get 0) (local.get 1))))`);
+}
+
+testValid(`(module
+ (func (param v128) (param v128) (result v128)
+ (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23 (local.get 0) (local.get 1))))`);
+
+assertErrorMessage(() => testValid(
+ `(module
+ (func (param v128) (param v128) (result v128)
+ (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 (local.get 0) (local.get 1))))`),
+ SyntaxError,
+ /expected a u8/);
+
+// (v128, i32) -> v128
+
+for (let op of [
+ 'i8x16.shl',
+ 'i8x16.shr_s',
+ 'i8x16.shr_u',
+ 'i16x8.shl',
+ 'i16x8.shr_s',
+ 'i16x8.shr_u',
+ 'i32x4.shl',
+ 'i32x4.shr_s',
+ 'i32x4.shr_u',
+ 'i64x2.shl',
+ 'i64x2.shr_s',
+ 'i64x2.shr_u'])
+{
+ testValid(`(module
+ (func (param v128) (param i32) (result v128)
+ (${op} (local.get 0) (local.get 1))))`);
+}
+
+// v128 -> i32
+
+for (let op of [
+ 'v128.any_true',
+ 'i8x16.all_true',
+ 'i16x8.all_true',
+ 'i32x4.all_true',
+ 'i8x16.bitmask',
+ 'i16x8.bitmask',
+ 'i32x4.bitmask'])
+{
+ testValid(`(module
+ (func (param v128) (result i32)
+ (${op} (local.get 0))))`);
+}
+
+// T -> V128
+
+for (let [op, input] of [
+ ['i8x16.splat', 'i32'],
+ ['i16x8.splat', 'i32'],
+ ['i32x4.splat', 'i32'],
+ ['i64x2.splat', 'i64'],
+ ['f32x4.splat', 'f32'],
+ ['f64x2.splat', 'f64']])
+{
+ testValid(`(module
+ (func (param ${input}) (result v128)
+ (${op} (local.get 0))))`);
+}
+
+// i32 -> v128
+
+for (let op of [
+ 'v128.load',
+ 'v128.load8_splat',
+ 'v128.load16_splat',
+ 'v128.load32_splat',
+ 'v128.load64_splat',
+ 'v128.load8x8_s',
+ 'v128.load8x8_u',
+ 'v128.load16x4_s',
+ 'v128.load16x4_u',
+ 'v128.load32x2_s',
+ 'v128.load32x2_u'])
+{
+ testValid(`(module
+ (memory 1 1)
+ (func (param i32) (result v128)
+ (${op} (local.get 0))))`);
+}
+
+testValid(`(module
+ (func (result v128)
+ (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
+ (func (result v128)
+ (v128.const i16x8 0 1 2 3 4 5 6 7))
+ (func (result v128)
+ (v128.const i32x4 0 1 2 3))
+ (func (result v128)
+ (v128.const i64x2 0 1))
+ (func (result v128)
+ (v128.const f32x4 0 1 2 3))
+ (func (result v128)
+ (v128.const f32x4 0.5 1.5 2.5 3.5))
+ (func (result v128)
+ (v128.const f64x2 0 1))
+ (func (result v128)
+ (v128.const f64x2 0.5 1.5)))`);
+
+assertErrorMessage(() => testValid(
+ `(module
+ (func (result v128)
+ (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)))`),
+ SyntaxError,
+ /expected a i8/);
+
+assertErrorMessage(() => testValid(
+ `(module
+ (func (result v128)
+ (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 256 15)))`),
+ SyntaxError,
+ /invalid i8 number/);
+
+assertErrorMessage(() => testValid(
+ `(module
+ (func (result v128)
+ (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 3.14 15)))`),
+ SyntaxError,
+ /expected a i8/);
+
+assertErrorMessage(() => testValid(
+ `(module
+ (func (result v128)
+ (v128.const f32x4 0.5 1.5 2.5))`),
+ SyntaxError,
+ /expected a float/);
+
+assertErrorMessage(() => testValid(
+ `(module
+ (func (result v128)
+ (v128.const i8x8 0 1 2 3 4 5 6 7)))`),
+ SyntaxError,
+ /expected one of/);
+
+// v128 -> ()
+
+testValid(`(module
+ (memory 1 1)
+ (func (param i32) (param v128)
+ (v128.store (local.get 0) (local.get 1))))`);
+
+// (v128, v128, v128) -> v128
+
+testValid(`(module
+ (func (param v128) (param v128) (param v128) (result v128)
+ (v128.bitselect (local.get 0) (local.get 1) (local.get 2))))`);
+
+// (v128, t) -> v128
+
+for (let [prefix, input] of [['i8x16', 'i32'],
+ ['i16x8', 'i32'],
+ ['i32x4', 'i32'],
+ ['i64x2', 'i64'],
+ ['f32x4', 'f32'],
+ ['f64x2', 'f64']])
+{
+ testValid(`(module
+ (func (param v128) (param ${input}) (result v128)
+ (${prefix}.replace_lane 1 (local.get 0) (local.get 1))))`);
+}
+
+testInvalid(
+ `(module
+ (func (param v128) (param i32) (result v128)
+ (i8x16.replace_lane 16 (local.get 0) (local.get 1))))`);
+
+// Global variables
+
+testValid(`(module
+ (global $g (mut v128) (v128.const f32x4 1 2 3 4)))`);
+
+testValid(`(module
+ (global $g (import "m" "g") v128)
+ (global $h (mut v128) (global.get $g)))`);
+
+testValid(`(module
+ (global $g (export "g") v128 (v128.const f32x4 1 2 3 4)))`);
+
+testValid(`(module
+ (global $g (export "g") (mut v128) (v128.const f32x4 1 2 3 4)))`);
+
+// Imports, exports, calls
+
+testValid(`(module
+ (import "m" "g" (func (param v128) (result v128)))
+ (func (export "f") (param v128) (result v128)
+ (f64x2.add (local.get 0) (v128.const f64x2 1 2))))`);
+
+testValid(`(module
+ (func $f (param v128) (result v128)
+ (i8x16.neg (local.get 0)))
+ (func $g (export "g") (param v128) (result v128)
+ (call $f (local.get 0))))`);