diff options
Diffstat (limited to 'js/src/jit-test/tests/wasm/simd')
37 files changed, 6900 insertions, 0 deletions
diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js b/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js new file mode 100644 index 0000000000..dd1443a1e7 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ad-hack-binop-preamble.js @@ -0,0 +1,334 @@ +// |jit-test| skip-if: true + +// Common code to test simple binary operators. See runSimpleBinopTest below. + +function expandConstantBinopInputs(op, memtype, inputs) { + let s = ''; + let ident = 0; + for ( let [a, b] of inputs ) { + let constlhs = `${memtype.layoutName} ${a.map(jsValueToWasmName).join(' ')}`; + let constrhs = `${memtype.layoutName} ${b.map(jsValueToWasmName).join(' ')}`; + s += ` + ;; lhs is constant, rhs is variable + (func (export "run_constlhs${ident}") + (v128.store (i32.const 0) + (call $doit_constlhs${ident} (v128.const ${constrhs})))) + (func $doit_constlhs${ident} (param $b v128) (result v128) + (${op} (v128.const ${constlhs}) (local.get $b))) + + ;; rhs is constant, lhs is variable + (func (export "run_constrhs${ident}") + (v128.store (i32.const 0) + (call $doit_constrhs${ident} (v128.const ${constlhs})))) + (func $doit_constrhs${ident} (param $a v128) (result v128) + (${op} (local.get $a) (v128.const ${constrhs}))) + + ;; both operands are constant + (func (export "run_constboth${ident}") + (v128.store (i32.const 0) + (call $doit_constboth${ident}))) + (func $doit_constboth${ident} (result v128) + (${op} (v128.const ${constlhs}) (v128.const ${constrhs})))` + ident++; + } + return s; +} + +function insAndMemBinop(op, memtype, resultmemtype, inputs) { + var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + + ;; both arguments are variable + (func (export "run") + (v128.store (i32.const 0) + (call $doit (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $doit (param $a v128) (param $b v128) (result v128) + (${op} (local.get $a) (local.get $b))) + + ${expandConstantBinopInputs(op, memtype, inputs)})`); + var mem = new memtype(ins.exports.mem.buffer); + var resultmem = !resultmemtype || memtype == resultmemtype ? mem : new resultmemtype(ins.exports.mem.buffer); + return [ins, mem, resultmem]; +} + +function add(bits) { return (x, y) => sign_extend(x+y, bits) } +function add64(x, y) { return sign_extend(BigInt(x)+BigInt(y), 64) } +function sub(bits) { return (x, y) => sign_extend(x-y, bits) } +function sub64(x, y) { return sign_extend(BigInt(x)-BigInt(y), 64) } +// Even 32-bit multiply can overflow a Number, so always use BigInt +function mul(bits) { return (x, y) => sign_extend(BigInt(x)*BigInt(y), bits) } +function div(x, y) { return x/y } +function min(x, y) { return x < y ? x : y } +function max(x, y) { return x > y ? x : y } +function and(x, y) { return zero_extend(x&y, 8) } +function or(x, y) { return zero_extend(x|y, 8) } +function xor(x, y) { return zero_extend(x^y, 8) } +function andnot(x, y) { return zero_extend(x&~y, 8) } +function avgr(x, y) { return (x + y + 1) >> 1; } +function eq(truth) { return (x,y) => x==y ? truth : 0 } +function ne(truth) { return (x,y) => x!=y ? truth : 0 } +function lt(truth) { return (x, y) => x < y ? truth : 0 } +function gt(truth) { return (x, y) => x > y ? truth : 0 } +function le(truth) { return (x, y) => x <= y ? truth : 0 } +function ge(truth) { return (x, y) => x >= y ? truth : 0 } + +function fadd(x, y) { return Math.fround(x+y) } +function fsub(x, y) { return Math.fround(x-y) } +function fmul(x, y) { return Math.fround(x*y) } +function fdiv(x, y) { return Math.fround(x/y) } +function fmin(x, y) { + if (x == y) return x; + if (x < y) return x; + if (y < x) return y; + if (isNaN(x)) return x; + return y; +} +function fmax(x, y) { + if (x == y) return x; + if (x > y) return x; + if (y > x) return y; + if (isNaN(x)) return x; + return y; +} +function dadd(x, y) { return x+y } +function dsub(x, y) { return x-y } +function dmul(x, y) { return x*y } +function ddiv(x, y) { return x/y } +var dmax = fmax; +var dmin = fmin; + +function op_sat_s(bits, op) { + return (x, y) => { + return signed_saturate(op(sign_extend(x, bits), + sign_extend(y, bits)), + bits); + } +} + +function op_sat_u(bits, op) { + return (x, y) => { + return unsigned_saturate(op(zero_extend(x, bits), + zero_extend(y, bits)), + bits); + } +} + +function add_sat_s(bits) { + return op_sat_s(bits, (x,y) => x+y); +} +function sub_sat_s(bits) { + return op_sat_s(bits, (x,y) => x-y); +} +function add_sat_u(bits) { + return op_sat_u(bits, (x,y) => x+y); +} +function sub_sat_u(bits) { + return op_sat_u(bits, (x,y) => x-y); +} + +function max_s(bits) { + return (x, y) => { + return sign_extend(max(sign_extend(x, bits), + sign_extend(y, bits)), + bits); + } +} + +function min_s(bits) { + return (x, y) => { + return sign_extend(min(sign_extend(x, bits), + sign_extend(y, bits)), + bits); + } +} + +function max_u(bits) { + return (x, y) => { + return max(zero_extend(x, bits), + zero_extend(y, bits)); + } +} + +function min_u(bits) { + return (x, y) => { + return min(zero_extend(x, bits), + zero_extend(y, bits)); + } +} + +function pmin(x, y) { return y < x ? y : x } +function pmax(x, y) { return x < y ? y : x } + +assertEq(max_s(8)(1, 2), 2); +assertEq(max_s(8)(1, 128), 1); +assertEq(min_s(8)(1, 2), 1); +assertEq(min_s(8)(1, 128), -128); +assertEq(max_u(8)(1, 2), 2); +assertEq(max_u(8)(1, 128), 128); +assertEq(min_u(8)(1, 2), 1); +assertEq(min_u(8)(1, 128), 1); + +var binopTests = + [['i8x16.add', Int8Array, add(8)], + ['i16x8.add', Int16Array, add(16)], + ['i32x4.add', Int32Array, add(32)], + ['i64x2.add', BigInt64Array, add64], + ['i8x16.sub', Int8Array, sub(8)], + ['i16x8.sub', Int16Array, sub(16)], + ['i32x4.sub', Int32Array, sub(32)], + ['i64x2.sub', BigInt64Array, sub64], + ['i8x16.add_sat_s', Int8Array, add_sat_s(8)], + ['i8x16.add_sat_u', Uint8Array, add_sat_u(8)], + ['i16x8.add_sat_s', Int16Array, add_sat_s(16)], + ['i16x8.add_sat_u', Uint16Array, add_sat_u(16)], + ['i8x16.sub_sat_s', Int8Array, sub_sat_s(8)], + ['i8x16.sub_sat_u', Uint8Array, sub_sat_u(8)], + ['i16x8.sub_sat_s', Int16Array, sub_sat_s(16)], + ['i16x8.sub_sat_u', Uint16Array, sub_sat_u(16)], + ['i16x8.mul', Int16Array, mul(16)], + ['i32x4.mul', Int32Array, mul(32)], + ['i64x2.mul', BigInt64Array, mul(64)], + ['i8x16.avgr_u', Uint8Array, avgr], + ['i16x8.avgr_u', Uint16Array, avgr], + ['i8x16.max_s', Int8Array, max_s(8)], + ['i8x16.max_u', Uint8Array, max_u(8)], + ['i8x16.min_s', Int8Array, min_s(8)], + ['i8x16.min_u', Uint8Array, min_u(8)], + ['i16x8.max_s', Int16Array, max_s(16)], + ['i16x8.max_u', Uint16Array, max_u(16)], + ['i16x8.min_s', Int16Array, min_s(16)], + ['i16x8.min_u', Uint16Array, min_u(16)], + ['i32x4.max_s', Int32Array, max_s(32)], + ['i32x4.max_u', Uint32Array, max_u(32)], + ['i32x4.min_s', Int32Array, min_s(32)], + ['i32x4.min_u', Uint32Array, min_u(32)], + ['v128.and', Uint8Array, and], + ['v128.or', Uint8Array, or], + ['v128.xor', Uint8Array, xor], + ['v128.andnot', Uint8Array, andnot], + ['f32x4.add', Float32Array, fadd], + ['f32x4.sub', Float32Array, fsub], + ['f32x4.mul', Float32Array, fmul], + ['f32x4.div', Float32Array, fdiv], + ['f32x4.min', Float32Array, fmin], + ['f32x4.max', Float32Array, fmax], + ['f64x2.add', Float64Array, dadd], + ['f64x2.sub', Float64Array, dsub], + ['f64x2.mul', Float64Array, dmul], + ['f64x2.div', Float64Array, ddiv], + ['f64x2.min', Float64Array, dmin], + ['f64x2.max', Float64Array, dmax], + ['i8x16.eq', Int8Array, eq(-1)], + ['i8x16.ne', Int8Array, ne(-1)], + ['i8x16.lt_s', Int8Array, lt(-1)], + ['i8x16.gt_s', Int8Array, gt(-1)], + ['i8x16.le_s', Int8Array, le(-1)], + ['i8x16.ge_s', Int8Array, ge(-1)], + ['i8x16.gt_u', Uint8Array, gt(0xFF)], + ['i8x16.ge_u', Uint8Array, ge(0xFF)], + ['i8x16.lt_u', Uint8Array, lt(0xFF)], + ['i8x16.le_u', Uint8Array, le(0xFF)], + ['i16x8.eq', Int16Array, eq(-1)], + ['i16x8.ne', Int16Array, ne(-1)], + ['i16x8.lt_s', Int16Array, lt(-1)], + ['i16x8.gt_s', Int16Array, gt(-1)], + ['i16x8.le_s', Int16Array, le(-1)], + ['i16x8.ge_s', Int16Array, ge(-1)], + ['i16x8.gt_u', Uint16Array, gt(0xFFFF)], + ['i16x8.ge_u', Uint16Array, ge(0xFFFF)], + ['i16x8.lt_u', Uint16Array, lt(0xFFFF)], + ['i16x8.le_u', Uint16Array, le(0xFFFF)], + ['i32x4.eq', Int32Array, eq(-1)], + ['i32x4.ne', Int32Array, ne(-1)], + ['i32x4.lt_s', Int32Array, lt(-1)], + ['i32x4.gt_s', Int32Array, gt(-1)], + ['i32x4.le_s', Int32Array, le(-1)], + ['i32x4.ge_s', Int32Array, ge(-1)], + ['i32x4.gt_u', Uint32Array, gt(0xFFFFFFFF)], + ['i32x4.ge_u', Uint32Array, ge(0xFFFFFFFF)], + ['i32x4.lt_u', Uint32Array, lt(0xFFFFFFFF)], + ['i32x4.le_u', Uint32Array, le(0xFFFFFFFF)], + ['f32x4.eq', Float32Array, eq(-1), Int32Array], + ['f32x4.ne', Float32Array, ne(-1), Int32Array], + ['f32x4.lt', Float32Array, lt(-1), Int32Array], + ['f32x4.gt', Float32Array, gt(-1), Int32Array], + ['f32x4.le', Float32Array, le(-1), Int32Array], + ['f32x4.ge', Float32Array, ge(-1), Int32Array], + ['f64x2.eq', Float64Array, eq(-1), BigInt64Array], + ['f64x2.ne', Float64Array, ne(-1), BigInt64Array], + ['f64x2.lt', Float64Array, lt(-1), BigInt64Array], + ['f64x2.gt', Float64Array, gt(-1), BigInt64Array], + ['f64x2.le', Float64Array, le(-1), BigInt64Array], + ['f64x2.ge', Float64Array, ge(-1), BigInt64Array], + ['f32x4.pmin', Float32Array, pmin], + ['f32x4.pmax', Float32Array, pmax], + ['f64x2.pmin', Float64Array, pmin], + ['f64x2.pmax', Float64Array, pmax]] + +// Run v128 x v128 -> v128 tests. Inputs are taken from the common input sets, +// placed in memory, the test is run, and the result is extracted and checked. +// +// Runs tests with both operands as variables, either as constant, or both as +// constant. Also checks NaN behavior when appropriate. +// +// All runners that call this should use the same value for `ofParts` and should +// pass different values for `part`, up to `ofParts` - 1. + +function runSimpleBinopTest(part, ofParts) { + let partSize = Math.ceil(binopTests.length / ofParts); + let start = part * partSize; + let end = Math.min((part + 1) * partSize, binopTests.length); + for ( let [op, memtype, rop, resultmemtype] of binopTests.slice(start, end) ) { + let inputs = cross(memtype.inputs); + let len = 16/memtype.BYTES_PER_ELEMENT; + let xs = iota(len); + let zero = xs.map(_ => 0); + let [ins, mem, resultmem] = insAndMemBinop(op, memtype, resultmemtype, inputs); + let bitsForF32 = memtype == Float32Array ? new Uint32Array(mem.buffer) : null; + let bitsForF64 = memtype == Float64Array ? new BigInt64Array(mem.buffer) : null; + + function testIt(a,b,r) { + set(mem, len, a); + set(mem, len*2, b); + ins.exports.run(); + assertSame(get(resultmem, 0, len), r); + + // Test signalling NaN superficially by replacing QNaN inputs with SNaN + if (bitsForF32 != null && (a.some(isNaN) || b.some(isNaN))) { + a.forEach((x, i) => { if (isNaN(x)) { bitsForF32[len+i] = 0x7FA0_0000; } }); + b.forEach((x, i) => { if (isNaN(x)) { bitsForF32[(len*2)+i] = 0x7FA0_0000; } }); + ins.exports.run(); + assertSame(get(resultmem, 0, len), r); + } + if (bitsForF64 != null && (a.some(isNaN) || b.some(isNaN))) { + a.forEach((x, i) => { if (isNaN(x)) { bitsForF64[len+i] = 0x7FF4_0000_0000_0000n; } }); + b.forEach((x, i) => { if (isNaN(x)) { bitsForF64[(len*2)+i] = 0x7FF4_0000_0000_0000n; } }); + ins.exports.run(); + assertSame(get(resultmem, 0, len), r); + } + } + + function testConstIt(i,r) { + set(resultmem, 0, zero); + ins.exports["run_constlhs" + i](); + assertSame(get(resultmem, 0, len), r); + + set(resultmem, 0, zero); + ins.exports["run_constrhs" + i](); + assertSame(get(resultmem, 0, len), r); + + set(resultmem, 0, zero); + ins.exports["run_constboth" + i](); + assertSame(get(resultmem, 0, len), r); + } + + let i = 0; + for (let [a,b] of inputs) { + let r = xs.map((i) => rop(a[i], b[i])); + testIt(a,b,r); + testConstIt(i,r); + i++; + } + } +} diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js b/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js new file mode 100644 index 0000000000..ee770d16a9 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ad-hack-extra.js @@ -0,0 +1,697 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Do not include this in the preamble, it must be loaded after lib/wasm.js +load(scriptdir + "ad-hack-preamble.js") + +// Widening multiplication. +// This is to be moved into ad-hack.js +// +// (iMxN.extmul_{high,low}_iKxL_{s,u} A B) +// +// is equivalent to +// +// (iMxN.mul (iMxN.extend_{high,low}_iKxL_{s,u} A) +// (iMxN.extend_{high,low}_iKxL_{s,u} B)) +// +// It doesn't really matter what the inputs are, we can test this almost +// blindly. +// +// Unfortunately, we do not yet have i64x2.extend_* so we introduce a helper +// function to compute that. + +function makeExtMulTest(wide, narrow, part, signed) { + let widener = (wide == 'i64x2') ? + `call $${wide}_extend_${part}_${narrow}_${signed}` : + `${wide}.extend_${part}_${narrow}_${signed}`; + return ` + (func (export "${wide}_extmul_${part}_${narrow}_${signed}") + (v128.store (i32.const 0) + (${wide}.extmul_${part}_${narrow}_${signed} (v128.load (i32.const 16)) + (v128.load (i32.const 32)))) + (v128.store (i32.const 48) + (${wide}.mul (${widener} (v128.load (i32.const 16))) + (${widener} (v128.load (i32.const 32)))))) +`; +} + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func $i64x2_extend_low_i32x4_s (param v128) (result v128) + (i64x2.shr_s (i8x16.shuffle 16 16 16 16 0 1 2 3 16 16 16 16 4 5 6 7 + (local.get 0) + (v128.const i32x4 0 0 0 0)) + (i32.const 32))) + (func $i64x2_extend_high_i32x4_s (param v128) (result v128) + (i64x2.shr_s (i8x16.shuffle 16 16 16 16 8 9 10 11 16 16 16 16 12 13 14 15 + (local.get 0) + (v128.const i32x4 0 0 0 0)) + (i32.const 32))) + (func $i64x2_extend_low_i32x4_u (param v128) (result v128) + (i8x16.shuffle 0 1 2 3 16 16 16 16 4 5 6 7 16 16 16 16 + (local.get 0) + (v128.const i32x4 0 0 0 0))) + (func $i64x2_extend_high_i32x4_u (param v128) (result v128) + (i8x16.shuffle 8 9 10 11 16 16 16 16 12 13 14 15 16 16 16 16 + (local.get 0) + (v128.const i32x4 0 0 0 0))) + ${makeExtMulTest('i64x2','i32x4','low','s')} + ${makeExtMulTest('i64x2','i32x4','high','s')} + ${makeExtMulTest('i64x2','i32x4','low','u')} + ${makeExtMulTest('i64x2','i32x4','high','u')} + ${makeExtMulTest('i32x4','i16x8','low','s')} + ${makeExtMulTest('i32x4','i16x8','high','s')} + ${makeExtMulTest('i32x4','i16x8','low','u')} + ${makeExtMulTest('i32x4','i16x8','high','u')} + ${makeExtMulTest('i16x8','i8x16','low','s')} + ${makeExtMulTest('i16x8','i8x16','high','s')} + ${makeExtMulTest('i16x8','i8x16','low','u')} + ${makeExtMulTest('i16x8','i8x16','high','u')})`); + +for ( let [ WideArray, NarrowArray ] of + [ [ Int16Array, Int8Array ], + [ Int32Array, Int16Array ], + [ BigInt64Array, Int32Array ] ] ) { + let narrowMem = new NarrowArray(ins.exports.mem.buffer); + let narrowSrc0 = 16/NarrowArray.BYTES_PER_ELEMENT; + let narrowSrc1 = 32/NarrowArray.BYTES_PER_ELEMENT; + let wideMem = new WideArray(ins.exports.mem.buffer); + let wideElems = 16/WideArray.BYTES_PER_ELEMENT; + let wideRes0 = 0; + let wideRes1 = 48/WideArray.BYTES_PER_ELEMENT; + let zero = iota(wideElems).map(_ => 0); + for ( let part of [ 'low', 'high' ] ) { + for ( let signed of [ 's', 'u' ] ) { + for ( let [a, b] of cross(NarrowArray.inputs) ) { + set(wideMem, wideRes0, zero); + set(wideMem, wideRes1, zero); + set(narrowMem, narrowSrc0, a); + set(narrowMem, narrowSrc1, b); + let test = `${WideArray.layoutName}_extmul_${part}_${NarrowArray.layoutName}_${signed}`; + ins.exports[test](); + assertSame(get(wideMem, wideRes0, wideElems), + get(wideMem, wideRes1, wideElems)); + } + } + } +} + +// Bitmask. Ion constant folds, so test that too. +// This is to be merged into the existing bitmask tests in ad-hack.js. + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "bitmask_i64x2") (result i32) + (i64x2.bitmask (v128.load (i32.const 16)))) + (func (export "const_bitmask_i64x2") (result i32) + (i64x2.bitmask (v128.const i64x2 0xff337f8012345678 0x0001984212345678))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +var mem64 = new BigUint64Array(ins.exports.mem.buffer); + +set(mem8, 16, iota(16).map((_) => 0)); +assertEq(ins.exports.bitmask_i64x2(), 0); + +set(mem64, 2, [0x8000000000000000n, 0x8000000000000000n]); +assertEq(ins.exports.bitmask_i64x2(), 3); + +set(mem64, 2, [0x7FFFFFFFFFFFFFFFn, 0x7FFFFFFFFFFFFFFFn]); +assertEq(ins.exports.bitmask_i64x2(), 0); + +set(mem64, 2, [0n, 0x8000000000000000n]); +assertEq(ins.exports.bitmask_i64x2(), 2); + +set(mem64, 2, [0x8000000000000000n, 0n]); +assertEq(ins.exports.bitmask_i64x2(), 1); + +assertEq(ins.exports.const_bitmask_i64x2(), 1); + +// Widen low/high. +// This is to be merged into the existing widening tests in ad-hack.js. + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "extend_low_i32x4_s") + (v128.store (i32.const 0) (i64x2.extend_low_i32x4_s (v128.load (i32.const 16))))) + (func (export "extend_high_i32x4_s") + (v128.store (i32.const 0) (i64x2.extend_high_i32x4_s (v128.load (i32.const 16))))) + (func (export "extend_low_i32x4_u") + (v128.store (i32.const 0) (i64x2.extend_low_i32x4_u (v128.load (i32.const 16))))) + (func (export "extend_high_i32x4_u") + (v128.store (i32.const 0) (i64x2.extend_high_i32x4_u (v128.load (i32.const 16))))))`); + +var mem32 = new Int32Array(ins.exports.mem.buffer); +var mem64 = new BigInt64Array(ins.exports.mem.buffer); +var mem64u = new BigUint64Array(ins.exports.mem.buffer); + +var as = [205, 1, 192, 3].map((x) => x << 24); +set(mem32, 4, as); + +ins.exports.extend_low_i32x4_s(); +assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n]))) + +ins.exports.extend_high_i32x4_s(); +assertSame(get(mem64, 0, 2), iota(2).map((n) => BigInt(as[n+2]))); + +ins.exports.extend_low_i32x4_u(); +assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0))); + +ins.exports.extend_high_i32x4_u(); +assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0))); + +// Saturating rounding q-format multiplication. +// This is to be moved into ad-hack.js + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "q15mulr_sat_s") + (v128.store (i32.const 0) (i16x8.q15mulr_sat_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`); + +var mem16 = new Int16Array(ins.exports.mem.buffer); +for ( let [as, bs] of cross(Int16Array.inputs) ) { + set(mem16, 8, as); + set(mem16, 16, bs); + ins.exports.q15mulr_sat_s(); + assertSame(get(mem16, 0, 8), + iota(8).map((i) => signed_saturate((as[i] * bs[i] + 0x4000) >> 15, 16))); +} + + +// i64.all_true + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "i64_all_true") (result i32) + (i64x2.all_true (v128.load (i32.const 16)) ) ) )`); + +var mem32 = new Int32Array(ins.exports.mem.buffer); + +set(mem32, 4, [0, 0, 0, 0]); +assertEq(0, ins.exports.i64_all_true()); +set(mem32, 4, [1, 0, 0, 0]); +assertEq(0, ins.exports.i64_all_true()); +set(mem32, 4, [1, 0, 0, 1]); +assertEq(1, ins.exports.i64_all_true()); +set(mem32, 4, [0, 0, 10, 0]); +assertEq(0, ins.exports.i64_all_true()); +set(mem32, 4, [0, -250, 1, 0]); +assertEq(1, ins.exports.i64_all_true()); +set(mem32, 4, [-1, -1, -1, -1]); +assertEq(1, ins.exports.i64_all_true()); + +if (this.wasmSimdAnalysis && wasmCompileMode() == "ion") { + const positive = + wasmCompile( + `(module + (memory (export "mem") 1 1) + (func $f (param v128) (result i32) + (if (result i32) (i64x2.all_true (local.get 0)) + (i32.const 42) + (i32.const 37))) + (func (export "run") (result i32) + (call $f (v128.load (i32.const 16)))))`); + assertEq(wasmSimdAnalysis(), "simd128-to-scalar-and-branch -> folded"); + + const negative = + wasmCompile( + `(module + (memory (export "mem") 1 1) + (func $f (param v128) (result i32) + (if (result i32) (i32.eqz (i64x2.all_true (local.get 0))) + (i32.const 42) + (i32.const 37))) + (func (export "run") (result i32) + (call $f (v128.load (i32.const 16)))))`); + assertEq(wasmSimdAnalysis(), "simd128-to-scalar-and-branch -> folded"); + + for ( let inp of [[1n, 2n], [4n, 0n], [0n, 0n]]) { + const all_true = inp.every(v => v != 0n) + let mem = new BigInt64Array(positive.exports.mem.buffer); + set(mem, 2, inp); + assertEq(positive.exports.run(), all_true ? 42 : 37); + + mem = new BigInt64Array(negative.exports.mem.buffer); + set(mem, 2, inp); + assertEq(negative.exports.run(), all_true ? 37 : 42); + } + + wasmCompile(`(module (func (result i32) (i64x2.all_true (v128.const i64x2 0 0))))`); + assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded"); +} + + +// i64x2.eq and i64x2.ne + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "i64_eq") + (v128.store (i32.const 0) + (i64x2.eq (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) + (func (export "i64_ne") + (v128.store (i32.const 0) + (i64x2.ne (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) )`); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); + +set(mem64, 2, [0n, 1n, 0n, 1n]); +ins.exports.i64_eq(); +assertSame(get(mem64, 0, 2), [-1n, -1n]); +ins.exports.i64_ne(); +assertSame(get(mem64, 0, 2), [0n, 0n]); +set(mem64, 2, [0x0n, -1n, 0x100000000n, -1n]); +ins.exports.i64_eq(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +set(mem64, 2, [-1n, 0x0n, -1n, 0x100000000n]); +ins.exports.i64_ne(); +assertSame(get(mem64, 0, 2), [0n, -1n]); + + +// i64x2.lt, i64x2.gt, i64x2.le, and i64.ge + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "i64_lt_s") + (v128.store (i32.const 0) + (i64x2.lt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) + (func (export "i64_gt_s") + (v128.store (i32.const 0) + (i64x2.gt_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) + (func (export "i64_le_s") + (v128.store (i32.const 0) + (i64x2.le_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) + (func (export "i64_ge_s") + (v128.store (i32.const 0) + (i64x2.ge_s (v128.load (i32.const 16)) (v128.load (i32.const 32))) )) )`); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); + +set(mem64, 2, [0n, 1n, 1n, 0n]); +ins.exports.i64_lt_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_gt_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +ins.exports.i64_le_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_ge_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); + +set(mem64, 2, [0n, -1n, -1n, 0n]); +ins.exports.i64_lt_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +ins.exports.i64_gt_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_le_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +ins.exports.i64_ge_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); + +set(mem64, 2, [-2n, 2n, -1n, 1n]); +ins.exports.i64_lt_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_gt_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); +ins.exports.i64_le_s(); +assertSame(get(mem64, 0, 2), [-1n, 0n]); +ins.exports.i64_ge_s(); +assertSame(get(mem64, 0, 2), [0n, -1n]); + +set(mem64, 2, [-2n, 1n, -2n, 1n]); +ins.exports.i64_lt_s(); +assertSame(get(mem64, 0, 2), [0n, 0n]); +ins.exports.i64_gt_s(); +assertSame(get(mem64, 0, 2), [0n, 0n]); +ins.exports.i64_le_s(); +assertSame(get(mem64, 0, 2), [-1n, -1n]); +ins.exports.i64_ge_s(); +assertSame(get(mem64, 0, 2), [-1n, -1n]); + + +function wasmCompile(text) { + return new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(text))) +} + + +// i64x2.abs + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "i64_abs") + (v128.store (i32.const 0) + (i64x2.abs (v128.load (i32.const 16))) )) )`); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); + +set(mem64, 2, [-3n, 42n]); +ins.exports.i64_abs(); +assertSame(get(mem64, 0, 2), [3n, 42n]); +set(mem64, 2, [0n, -0x8000000000000000n]); +ins.exports.i64_abs(); +assertSame(get(mem64, 0, 2), [0n, -0x8000000000000000n]); + + +// Load lane + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + ${iota(16).map(i => `(func (export "load8_lane_${i}") (param i32) + (v128.store (i32.const 0) + (v128.load8_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16))))) + `).join('')} + ${iota(8).map(i => `(func (export "load16_lane_${i}") (param i32) + (v128.store (i32.const 0) + (v128.load16_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16))))) + `).join('')} + ${iota(4).map(i => `(func (export "load32_lane_${i}") (param i32) + (v128.store (i32.const 0) + (v128.load32_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16))))) + `).join('')} + ${iota(2).map(i => `(func (export "load64_lane_${i}") (param i32) + (v128.store (i32.const 0) + (v128.load64_lane offset=0 ${i} (local.get 0) (v128.load (i32.const 16))))) + `).join('')} + (func (export "load_lane_const_and_align") + (v128.store (i32.const 0) + (v128.load64_lane offset=32 1 (i32.const 1) + (v128.load32_lane offset=32 1 (i32.const 3) + (v128.load16_lane offset=32 0 (i32.const 5) + (v128.load (i32.const 16))))) + )) + )`); + +var mem8 = new Int8Array(ins.exports.mem.buffer); +var mem32 = new Int32Array(ins.exports.mem.buffer); +var mem64 = new BigInt64Array(ins.exports.mem.buffer); + +var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB]; +set(mem32, 4, as); set(mem8, 32, [0xC2]); + +ins.exports["load8_lane_0"](32); +assertSame(get(mem32, 0, 4), [0x123456C2, 0x23456789, 0x3456789A, 0x456789AB]); +ins.exports["load8_lane_1"](32); +assertSame(get(mem32, 0, 4), [0x1234C278, 0x23456789, 0x3456789A, 0x456789AB]); +ins.exports["load8_lane_2"](32); +assertSame(get(mem32, 0, 4), [0x12C25678, 0x23456789, 0x3456789A, 0x456789AB]); +ins.exports["load8_lane_3"](32); +assertSame(get(mem32, 0, 4), [0xC2345678|0, 0x23456789, 0x3456789A, 0x456789AB]); +ins.exports["load8_lane_4"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x234567C2, 0x3456789A, 0x456789AB]); +ins.exports["load8_lane_6"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x23C26789, 0x3456789A, 0x456789AB]); +ins.exports["load8_lane_9"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456C29A, 0x456789AB]); +ins.exports["load8_lane_14"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0x45C289AB]); + +set(mem8, 32, [0xC2, 0xD1]); + +ins.exports["load16_lane_0"](32); +assertSame(get(mem32, 0, 4), [0x1234D1C2, 0x23456789, 0x3456789A, 0x456789AB]); +ins.exports["load16_lane_1"](32); +assertSame(get(mem32, 0, 4), [0xD1C25678|0, 0x23456789, 0x3456789A, 0x456789AB]); +ins.exports["load16_lane_2"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x2345D1C2, 0x3456789A, 0x456789AB]); +ins.exports["load16_lane_5"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0xD1C2789A|0, 0x456789AB]); +ins.exports["load16_lane_7"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0xD1C289AB|0]); + +set(mem32, 8, [0x16B5C3D0]); + +ins.exports["load32_lane_0"](32); +assertSame(get(mem32, 0, 4), [0x16B5C3D0, 0x23456789, 0x3456789A, 0x456789AB]); +ins.exports["load32_lane_1"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x16B5C3D0, 0x3456789A, 0x456789AB]); +ins.exports["load32_lane_2"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x16B5C3D0, 0x456789AB]); +ins.exports["load32_lane_3"](32); +assertSame(get(mem32, 0, 4), [0x12345678, 0x23456789, 0x3456789A, 0x16B5C3D0]); + +set(mem64, 4, [0x3300AA4416B5C3D0n]); + +ins.exports["load64_lane_0"](32); +assertSame(get(mem64, 0, 2), [0x3300AA4416B5C3D0n, 0x456789AB3456789An]); +ins.exports["load64_lane_1"](32); +assertSame(get(mem64, 0, 2), [0x2345678912345678n, 0x3300AA4416B5C3D0n]); + +// .. (mis)align load lane + +var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB]; +set(mem32, 4, as); set(mem64, 4, [0x3300AA4416B5C3D0n, 0x300AA4416B5C3D03n]); + +ins.exports["load16_lane_5"](33); +assertSame(get(mem32, 0, 4), [0x12345678,0x23456789,0xb5c3789a|0,0x456789ab]); +ins.exports["load32_lane_1"](34); +assertSame(get(mem32, 0, 4), [0x12345678, 0xaa4416b5|0,0x3456789a,0x456789ab]); +ins.exports["load64_lane_0"](35); +assertSame(get(mem64, 0, 2), [0x5c3d033300aa4416n, 0x456789ab3456789an]); + +ins.exports["load_lane_const_and_align"](); +assertSame(get(mem32, 0, 4), [0x123400aa,0x00AA4416,0x4416b5c3,0x033300aa]); + +// Store lane + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + ${iota(16).map(i => `(func (export "store8_lane_${i}") (param i32) (param i32) + (v128.store8_lane ${i} (local.get 1) (v128.load (local.get 0)))) + `).join('')} + ${iota(8).map(i => `(func (export "store16_lane_${i}") (param i32) (param i32) + (v128.store16_lane ${i} (local.get 1) (v128.load (local.get 0)))) + `).join('')} + ${iota(4).map(i => `(func (export "store32_lane_${i}") (param i32) (param i32) + (v128.store32_lane ${i} (local.get 1) (v128.load (local.get 0)))) + `).join('')} + ${iota(2).map(i => `(func (export "store64_lane_${i}") (param i32) (param i32) + (v128.store64_lane ${i} (local.get 1) (v128.load (local.get 0)))) + `).join('')} + (func (export "store_lane_const_and_align") + (v128.store16_lane 1 (i32.const 33) (v128.load (i32.const 16))) + (v128.store32_lane 2 (i32.const 37) (v128.load (i32.const 16))) + (v128.store64_lane 0 (i32.const 47) (v128.load (i32.const 16))) + ))`); + + +var mem8 = new Int8Array(ins.exports.mem.buffer); +var mem32 = new Int32Array(ins.exports.mem.buffer); +var mem64 = new BigInt64Array(ins.exports.mem.buffer); + +var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB]; +set(mem32, 4, as); set(mem32, 0, [0x7799AA00, 42, 3, 0]); + +ins.exports["store8_lane_0"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA78]); +ins.exports["store8_lane_1"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA56]); +ins.exports["store8_lane_2"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA34]); +ins.exports["store8_lane_3"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA12]); +ins.exports["store8_lane_5"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA67]); +ins.exports["store8_lane_7"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA23]); +ins.exports["store8_lane_8"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA9A]); +ins.exports["store8_lane_15"](16, 0); assertSame(get(mem32, 0, 1), [0x7799AA45]); + +ins.exports["store16_lane_0"](16, 0); assertSame(get(mem32, 0, 1), [0x77995678]); +ins.exports["store16_lane_1"](16, 0); assertSame(get(mem32, 0, 1), [0x77991234]); +ins.exports["store16_lane_2"](16, 0); assertSame(get(mem32, 0, 1), [0x77996789]); +ins.exports["store16_lane_5"](16, 0); assertSame(get(mem32, 0, 1), [0x77993456]); +ins.exports["store16_lane_7"](16, 0); assertSame(get(mem32, 0, 1), [0x77994567]); + +ins.exports["store32_lane_0"](16, 0); assertSame(get(mem32, 0, 2), [0x12345678, 42]); +ins.exports["store32_lane_1"](16, 0); assertSame(get(mem32, 0, 2), [0x23456789, 42]); +ins.exports["store32_lane_2"](16, 0); assertSame(get(mem32, 0, 2), [0x3456789A, 42]); +ins.exports["store32_lane_3"](16, 0); assertSame(get(mem32, 0, 2), [0x456789AB, 42]); + +ins.exports["store64_lane_0"](16, 0); assertSame(get(mem64, 0, 2), [0x2345678912345678n, 3]); +ins.exports["store64_lane_1"](16, 0); assertSame(get(mem64, 0, 2), [0x456789AB3456789An, 3]); + +// .. (mis)align store lane + +var as = [0x12345678, 0x23456789, 0x3456789A, 0x456789AB]; +set(mem32, 4, as); set(mem32, 0, [0x7799AA01, 42, 3, 0]); +ins.exports["store16_lane_1"](16, 1); assertSame(get(mem32, 0, 2), [0x77123401, 42]); +set(mem32, 0, [0x7799AA01, 42, 3, 0]); +ins.exports["store32_lane_1"](16, 2); assertSame(get(mem32, 0, 2), [0x6789AA01, 0x2345]); +set(mem32, 0, [0x7799AA01, 42, 5, 3]); +ins.exports["store64_lane_0"](16, 1); +assertSame(get(mem64, 0, 2), [0x4567891234567801n, 0x0300000023]); + +set(mem32, 4, [ + 0x12345678, 0x23456789, 0x3456789A, 0x456789AB, + 0x55AA55AA, 0xCC44CC44, 0x55AA55AA, 0xCC44CC44, + 0x55AA55AA, 0xCC44CC44, 0x55AA55AA, 0xCC44CC44, +]); +ins.exports["store_lane_const_and_align"](); +assertSame(get(mem32, 8, 8), [ + 0x551234aa, 0x56789a44, 0x55aa5534, 0x7844cc44, + 0x89123456|0, 0xcc234567|0, 0x55aa55aa, 0xcc44cc44|0, +]); + + +// i8x16.popcnt + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "i8x16_popcnt") + (v128.store (i32.const 0) (i8x16.popcnt (v128.load (i32.const 16)) ))) + )`); + +var mem8 = new Int8Array(ins.exports.mem.buffer); + +set(mem8, 16, [0, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 3, -1, 0xF0, 0x11, 0xFE, 0x0F, 0xE]); +ins.exports.i8x16_popcnt(); +assertSame(get(mem8, 0, 16), [0,1,1,1,1,1,1,1,1,2,8,4,2,7,4,3]); + + +/// Double-precision conversion instructions. +/// f64x2.convert_low_i32x4_{u,s} / i32x4.trunc_sat_f64x2_{u,s}_zero +/// f32x4.demote_f64x2_zero / f64x2.promote_low_f32x4 + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "f64x2_convert_low_i32x4_s") + (v128.store (i32.const 0) (f64x2.convert_low_i32x4_s (v128.load (i32.const 16)) ))) + (func (export "f64x2_convert_low_i32x4_u") + (v128.store (i32.const 0) (f64x2.convert_low_i32x4_u (v128.load (i32.const 16)) ))) + + (func (export "i32x4_trunc_sat_f64x2_s_zero") + (v128.store (i32.const 0) (i32x4.trunc_sat_f64x2_s_zero (v128.load (i32.const 16)) ))) + (func (export "i32x4_trunc_sat_f64x2_u_zero") + (v128.store (i32.const 0) (i32x4.trunc_sat_f64x2_u_zero (v128.load (i32.const 16)) ))) + + (func (export "f32x4_demote_f64x2") + (v128.store (i32.const 0) (f32x4.demote_f64x2_zero (v128.load (i32.const 16)) ))) + (func (export "f64x2_protomote_f32x4") + (v128.store (i32.const 0) (f64x2.promote_low_f32x4 (v128.load (i32.const 16)) ))) + )`); + +var mem32 = new Int32Array(ins.exports.mem.buffer); +var memU32 = new Uint32Array(ins.exports.mem.buffer); +var memF32 = new Float32Array(ins.exports.mem.buffer); +var memF64 = new Float64Array(ins.exports.mem.buffer); + +// f64x2.convert_low_i32x4_u / f64x2.convert_low_i32x4_s + +set(mem32, 4, [1, -2, 0, -2]); +ins.exports.f64x2_convert_low_i32x4_s(); +assertSame(get(memF64, 0, 2), [1, -2]); +set(mem32, 4, [-1, 0, 5, -212312312]); +ins.exports.f64x2_convert_low_i32x4_s(); +assertSame(get(memF64, 0, 2), [-1, 0]); + +set(memU32, 4, [1, 4045646797, 4, 0]); +ins.exports.f64x2_convert_low_i32x4_u(); +assertSame(get(memF64, 0, 2), [1, 4045646797]); +set(memU32, 4, [0, 2, 4, 3]); +ins.exports.f64x2_convert_low_i32x4_u(); +assertSame(get(memF64, 0, 2), [0, 2]); + +// i32x4.trunc_sat_f64x2_u_zero / i32x4.trunc_sat_f64x2_s_zero + +set(memF64, 2, [0,0]) +ins.exports.i32x4_trunc_sat_f64x2_s_zero(); +assertSame(get(mem32, 0, 4), [0,0,0,0]); +ins.exports.i32x4_trunc_sat_f64x2_u_zero(); +assertSame(get(memU32, 0, 4), [0,0,0,0]); + +set(memF64, 2, [-1.23,65535.12]) +ins.exports.i32x4_trunc_sat_f64x2_s_zero(); +assertSame(get(mem32, 0, 4), [-1,65535,0,0]); +set(memF64, 2, [1.99,65535.12]) +ins.exports.i32x4_trunc_sat_f64x2_u_zero(); +assertSame(get(memU32, 0, 4), [1,65535,0,0]); + +set(memF64, 2, [10e+100,-10e+100]) +ins.exports.i32x4_trunc_sat_f64x2_s_zero(); +assertSame(get(mem32, 0, 4), [0x7fffffff,-0x80000000,0,0]); +ins.exports.i32x4_trunc_sat_f64x2_u_zero(); +assertSame(get(memU32, 0, 4), [0xffffffff,0,0,0]); + +// f32x4.demote_f64x2_zero + +set(memF64, 2, [1, 2]) +ins.exports.f32x4_demote_f64x2(); +assertSame(get(memF32, 0, 4), [1,2,0,0]); + +set(memF64, 2, [-4e38, 4e38]) +ins.exports.f32x4_demote_f64x2(); +assertSame(get(memF32, 0, 4), [-Infinity,Infinity,0,0]); + +set(memF64, 2, [-1e-46, 1e-46]) +ins.exports.f32x4_demote_f64x2(); +assertSame(get(memF32, 0, 4), [1/-Infinity,0,0,0]); + +set(memF64, 2, [0, NaN]) +ins.exports.f32x4_demote_f64x2(); +assertSame(get(memF32, 0, 4), [0, NaN,0,0]); + +set(memF64, 2, [Infinity, -Infinity]) +ins.exports.f32x4_demote_f64x2(); +assertSame(get(memF32, 0, 4), [Infinity, -Infinity,0,0]); + +// f64x2.promote_low_f32x4 + +set(memF32, 4, [4, 3, 1, 2]) +ins.exports.f64x2_protomote_f32x4(); +assertSame(get(memF64, 0, 2), [4, 3]); + +set(memF32, 4, [NaN, 0, 0, 0]) +ins.exports.f64x2_protomote_f32x4(); +assertSame(get(memF64, 0, 2), [NaN, 0]); + +set(memF32, 4, [Infinity, -Infinity, 0, 0]) +ins.exports.f64x2_protomote_f32x4(); +assertSame(get(memF64, 0, 2), [Infinity, -Infinity]); + + +// i16x8.extadd_pairwise_i8x16_{s,u} / i32x4.extadd_pairwise_i16x8_{s,u} + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "i16x8_extadd_pairwise_i8x16_s") + (v128.store (i32.const 0) (i16x8.extadd_pairwise_i8x16_s (v128.load (i32.const 16)) ))) + (func (export "i16x8_extadd_pairwise_i8x16_u") + (v128.store (i32.const 0) (i16x8.extadd_pairwise_i8x16_u (v128.load (i32.const 16)) ))) + + (func (export "i32x4_extadd_pairwise_i16x8_s") + (v128.store (i32.const 0) (i32x4.extadd_pairwise_i16x8_s (v128.load (i32.const 16)) ))) + (func (export "i32x4_extadd_pairwise_i16x8_u") + (v128.store (i32.const 0) (i32x4.extadd_pairwise_i16x8_u (v128.load (i32.const 16)) ))) + )`); + +var mem8 = new Int8Array(ins.exports.mem.buffer); +var memU8 = new Uint8Array(ins.exports.mem.buffer); +var mem16 = new Int16Array(ins.exports.mem.buffer); +var memU16 = new Uint16Array(ins.exports.mem.buffer); +var mem32 = new Int32Array(ins.exports.mem.buffer); +var memU32 = new Uint32Array(ins.exports.mem.buffer); + +set(mem8, 16, [0, 0, 1, 1, 2, -2, 0, 42, 1, -101, 101, -1, 127, 125, -1, -2]); +ins.exports.i16x8_extadd_pairwise_i8x16_s(); +assertSame(get(mem16, 0, 8), [0, 2, 0, 42, -100, 100, 252, -3]); + +set(memU8, 16, [0, 0, 1, 1, 2, 255, 0, 42, 0, 255, 254, 0, 127, 125, 255, 255]); +ins.exports.i16x8_extadd_pairwise_i8x16_u(); +assertSame(get(memU16, 0, 8), [0, 2, 257, 42, 255, 254, 252, 510]); + +set(mem16, 8, [0, 0, 1, 1, 2, -2, -1, -2]); +ins.exports.i32x4_extadd_pairwise_i16x8_s(); +assertSame(get(mem32, 0, 4), [0, 2, 0, -3]); +set(mem16, 8, [0, 42, 1, -32760, 32766, -1, 32761, 32762]); +ins.exports.i32x4_extadd_pairwise_i16x8_s(); +assertSame(get(mem32, 0, 4), [42, -32759, 32765, 65523]); + +set(memU16, 8, [0, 0, 1, 1, 2, 65535, 65535, 65535]); +ins.exports.i32x4_extadd_pairwise_i16x8_u(); +assertSame(get(memU32, 0, 4), [0, 2, 65537, 131070]); +set(memU16, 8, [0, 42, 0, 65535, 65534, 0, 32768, 32765]); +ins.exports.i32x4_extadd_pairwise_i16x8_u(); +assertSame(get(memU32, 0, 4), [42, 65535, 65534, 65533]); diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js b/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js new file mode 100644 index 0000000000..407b59476f --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ad-hack-preamble.js @@ -0,0 +1,211 @@ +// |jit-test| skip-if: true + +// Common code for the ad-hack test cases. + +function get(arr, loc, len) { + let res = []; + for ( let i=0; i < len; i++ ) { + res.push(arr[loc+i]); + } + return res; +} + +function getUnaligned(arr, width, loc, len) { + assertEq(arr.constructor, Uint8Array); + assertEq(width <= 4, true); + let res = []; + for ( let i=0; i < len; i++ ) { + let x = 0; + for ( let j=width-1; j >=0; j-- ) + x = (x << 8) | arr[loc+i*width+j]; + res.push(x); + } + return res; +} + +function set(arr, loc, vals) { + for ( let i=0; i < vals.length; i++ ) { + if (arr instanceof BigInt64Array) { + arr[loc+i] = BigInt(vals[i]); + } else { + arr[loc+i] = vals[i]; + } + } +} + +function setUnaligned(arr, width, loc, vals) { + assertEq(arr.constructor, Uint8Array); + assertEq(width <= 4, true); + for ( let i=0; i < vals.length; i++ ) { + let x = vals[i]; + for ( let j=0 ; j < width ; j++ ) { + arr[loc+i*width + j] = x & 255; + x >>= 8; + } + } +} + +function equal(a, b) { + return a === b || isNaN(a) && isNaN(b); +} + +function upd(xs, at, val) { + let ys = Array.from(xs); + ys[at] = val; + return ys; +} + +// The following operations are not always generalized fully, they are just +// functional enough for the existing test cases to pass. + +function sign_extend(n, bits) { + if (bits < 32) { + n = Number(n); + return (n << (32 - bits)) >> (32 - bits); + } + if (typeof n == "bigint") { + if (bits == 32) + return Number(n & 0xFFFF_FFFFn) | 0; + assertEq(bits, 64); + n = (n & 0xFFFF_FFFF_FFFF_FFFFn) + if (n > 0x7FFF_FFFF_FFFF_FFFFn) + return n - 0x1_0000_0000_0000_0000n; + return n; + } + assertEq(bits, 32); + return n|0; +} + +function zero_extend(n, bits) { + if (bits < 32) { + return n & ((1 << bits) - 1); + } + if (n < 0) + n = 0x100000000 + n; + return n; +} + +function signed_saturate(z, bits) { + let min = -(1 << (bits-1)); + if (z <= min) { + return min; + } + let max = (1 << (bits-1)) - 1; + if (z > max) { + return max; + } + return z; +} + +function unsigned_saturate(z, bits) { + if (z <= 0) { + return 0; + } + let max = (1 << bits) - 1; + if (z > max) { + return max; + } + return z; +} + +function shl(count, width) { + if (width == 64) { + count = BigInt(count); + return (v) => { + v = BigInt(v); + if (v < 0) + v = (1n << 64n) + v; + let r = (v << count) & ((1n << 64n) - 1n); + if (r & (1n << 63n)) + r = -((1n << 64n) - r); + return r; + } + } else { + return (v) => { + let mask = (width == 32) ? -1 : ((1 << width) - 1); + return (v << count) & mask; + } + } +} + +function popcount(n) { + n = n - ((n >> 1) & 0x55555555) + n = (n & 0x33333333) + ((n >> 2) & 0x33333333) + return ((n + (n >> 4) & 0xF0F0F0F) * 0x1010101) >> 24 +} + +function jsValueToWasmName(x) { + if (typeof x == "number") { + if (x == 0) return 1 / x < 0 ? "-0" : "0"; + if (isNaN(x)) return "+nan"; + if (!isFinite(x)) return (x < 0 ? "-" : "+") + "inf"; + } + return x; +} + +// For each input array, a set of arrays of the proper length for v128, with +// values in range but possibly of the wrong signedness (eg, for Int8Array, 128 +// is in range but is really -128). Also a unary operator `rectify` that +// transforms the value to the proper sign and bitwidth. + +Int8Array.inputs = [iota(16).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)), + iota(16).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)), + [1,2,128,127,1,4,128,127,1,2,129,125,1,2,254,0], + [2,1,127,128,5,1,127,128,2,1,126,130,2,1,1,255], + iota(16).map((x) => ((x + 37) * 8 + 12) % 256), + iota(16).map((x) => ((x + 12) * 4 + 9) % 256)]; +Int8Array.rectify = (x) => sign_extend(x,8); +Int8Array.layoutName = 'i8x16'; + +Uint8Array.inputs = Int8Array.inputs; +Uint8Array.rectify = (x) => zero_extend(x,8); +Uint8Array.layoutName = 'i8x16'; + +Int16Array.inputs = [iota(8).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)), + iota(8).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)), + [1,2,32768,32767,1,4,32768,32767], + [2,1,32767,32768,5,1,32767,32768], + [1,2,128,127,1,4,128,127].map((x) => (x << 8) + x*2), + [2,1,127,128,1,1,128,128].map((x) => (x << 8) + x*3)]; +Int16Array.rectify = (x) => sign_extend(x,16); +Int16Array.layoutName = 'i16x8'; + +Uint16Array.inputs = Int16Array.inputs; +Uint16Array.rectify = (x) => zero_extend(x,16); +Uint16Array.layoutName = 'i16x8'; + +Int32Array.inputs = [iota(4).map((x) => (x+1) * (x % 3 == 0 ? -1 : 1)), + iota(4).map((x) => (x*2+3) * (x % 3 == 1 ? -1 : 1)), + [1,2,32768 << 16,32767 << 16], + [2,1,32767 << 16,32768 << 16], + [1,2,128,127].map((x) => (x << 24) + (x << 8) + x*3), + [2,1,127,128].map((x) => (x << 24) + (x << 8) + x*4)]; +Int32Array.rectify = (x) => sign_extend(x,32); +Int32Array.layoutName = 'i32x4'; + +Uint32Array.inputs = Int32Array.inputs; +Uint32Array.rectify = (x) => zero_extend(x,32); +Uint32Array.layoutName = 'i32x4'; + +BigInt64Array.inputs = [[1,2],[2,1],[-1,-2],[-2,-1],[2n ** 32n, 2n ** 32n - 5n], + [(2n ** 38n) / 5n, (2n ** 41n) / 7n], + [-((2n ** 38n) / 5n), (2n ** 41n) / 7n]]; +BigInt64Array.rectify = (x) => BigInt(x); +BigInt64Array.layoutName = 'i64x2'; + +Float32Array.inputs = [[1, -1, 1e10, -1e10], + [-1, -2, -1e10, 1e10], + [5.1, -1.1, -4.3, -0], + ...permute([1, -10, NaN, Infinity])]; +Float32Array.rectify = (x) => Math.fround(x); +Float32Array.layoutName = 'f32x4'; + +Float64Array.inputs = Float32Array.inputs.map((x) => x.slice(0, 2)) +Float64Array.rectify = (x) => x; +Float64Array.layoutName = 'f64x2'; + +// Tidy up all the inputs +for ( let A of [Int8Array, Uint8Array, Int16Array, Uint16Array, Int32Array, Uint32Array, BigInt64Array, + Float32Array, Float64Array]) { + A.inputs = A.inputs.map((xs) => xs.map(A.rectify)); +} diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js new file mode 100644 index 0000000000..f3406ac44a --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops0.js @@ -0,0 +1,7 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Do not include these in the preamble, they must be loaded after lib/wasm.js +load(scriptdir + "ad-hack-preamble.js") +load(scriptdir + "ad-hack-binop-preamble.js") + +runSimpleBinopTest(0, 3); diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js new file mode 100644 index 0000000000..e6d6f7e2fc --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops1.js @@ -0,0 +1,7 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Do not include these in the preamble, they must be loaded after lib/wasm.js +load(scriptdir + "ad-hack-preamble.js") +load(scriptdir + "ad-hack-binop-preamble.js") + +runSimpleBinopTest(1, 3); diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js new file mode 100644 index 0000000000..a196aa28fc --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-binops2.js @@ -0,0 +1,7 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Do not include these in the preamble, they must be loaded after lib/wasm.js +load(scriptdir + "ad-hack-preamble.js") +load(scriptdir + "ad-hack-binop-preamble.js") + +runSimpleBinopTest(2, 3); diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js new file mode 100644 index 0000000000..6e562a97a1 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ad-hack-simple-unops.js @@ -0,0 +1,122 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Do not include this in the preamble, it must be loaded after lib/wasm.js +load(scriptdir + "ad-hack-preamble.js") + +// Simple unary operators. Place parameter in memory at offset 16, +// read the result at offset 0. + +function expandConstantUnopInputs(op, memtype, inputs) { + let s = ''; + let ident = 0; + for ( let a of inputs ) { + let constval = `${memtype.layoutName} ${a.map(jsValueToWasmName).join(' ')}`; + s += ` + (func (export "run_const${ident}") + (v128.store (i32.const 0) + (${op} (v128.const ${constval})))) +`; + ident++; + } + return s; +} + +function insAndMemUnop(op, memtype, resultmemtype, inputs) { + var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + + (func (export "run") + (v128.store (i32.const 0) + (call $doit (v128.load (i32.const 16))))) + + (func $doit (param $a v128) (result v128) + (${op} (local.get $a))) + + ${expandConstantUnopInputs(op, memtype, inputs)})`); + var mem = new memtype(ins.exports.mem.buffer); + var resultmem = !resultmemtype || memtype == resultmemtype ? mem : new resultmemtype(ins.exports.mem.buffer); + return [ins, mem, resultmem]; +} + +function ineg(bits) { return (a) => sign_extend(!a ? a : -a,bits) } +function iabs(bits) { return (a) => zero_extend(a < 0 ? -a : a, bits) } +function fneg(a) { return -a } +function fabs(a) { return Math.abs(a) } +function fsqrt(a) { return Math.fround(Math.sqrt(Math.fround(a))) } +function dsqrt(a) { return Math.sqrt(a) } +function bitnot(a) { return (~a) & 255 } +function ffloor(x) { return Math.fround(Math.floor(x)) } +function fceil(x) { return Math.fround(Math.ceil(x)) } +function ftrunc(x) { return Math.fround(Math.sign(x)*Math.floor(Math.abs(x))) } +function fnearest(x) { return Math.fround(Math.round(x)) } +function dfloor(x) { return Math.floor(x) } +function dceil(x) { return Math.ceil(x) } +function dtrunc(x) { return Math.sign(x)*Math.floor(Math.abs(x)) } +function dnearest(x) { return Math.round(x) } + +for ( let [op, memtype, rop, resultmemtype] of + [['i8x16.neg', Int8Array, ineg(8)], + ['i16x8.neg', Int16Array, ineg(16)], + ['i32x4.neg', Int32Array, ineg(32)], + ['i64x2.neg', BigInt64Array, ineg(64)], + ['i8x16.abs', Int8Array, iabs(8), Uint8Array], + ['i16x8.abs', Int16Array, iabs(16), Uint16Array], + ['i32x4.abs', Int32Array, iabs(32), Uint32Array], + ['f32x4.neg', Float32Array, fneg], + ['f64x2.neg', Float64Array, fneg], + ['f32x4.abs', Float32Array, fabs], + ['f64x2.abs', Float64Array, fabs], + ['f32x4.sqrt', Float32Array, fsqrt], + ['f64x2.sqrt', Float64Array, dsqrt], + ['f32x4.ceil', Float32Array, fceil], + ['f32x4.floor', Float32Array, ffloor], + ['f32x4.trunc', Float32Array, ftrunc], + ['f32x4.nearest', Float32Array, fnearest], + ['f64x2.ceil', Float64Array, dceil], + ['f64x2.floor', Float64Array, dfloor], + ['f64x2.trunc', Float64Array, dtrunc], + ['f64x2.nearest', Float64Array, dnearest], + ['v128.not', Uint8Array, bitnot], + ]) +{ + let [ins, mem, resultmem] = insAndMemUnop(op, memtype, resultmemtype, memtype.inputs); + let len = 16/memtype.BYTES_PER_ELEMENT; + let xs = iota(len); + let zero = xs.map(_ => 0); + let bitsForF32 = memtype == Float32Array ? new Uint32Array(mem.buffer) : null; + let bitsForF64 = memtype == Float64Array ? new BigInt64Array(mem.buffer) : null; + + function testIt(a, r) { + set(mem, len, a); + ins.exports.run(); + assertSame(get(resultmem, 0, len), r); + + // Test signalling NaN superficially by replacing QNaN inputs with SNaN + if (bitsForF32 != null && a.some(isNaN)) { + a.forEach((x, i) => { if (isNaN(x)) { bitsForF32[len+i] = 0x7FA0_0000; } }); + ins.exports.run(); + assertSame(get(resultmem, 0, len), r); + } + if (bitsForF64 != null && a.some(isNaN)) { + a.forEach((x, i) => { if (isNaN(x)) { bitsForF64[len+i] = 0x7FF4_0000_0000_0000n; } }); + ins.exports.run(); + assertSame(get(resultmem, 0, len), r); + } + } + + function testConstIt(i,r) { + set(resultmem, 0, zero); + ins.exports["run_const" + i](); + assertSame(get(resultmem, 0, len), r); + } + + let i = 0; + for (let a of memtype.inputs) { + let r = xs.map((i) => rop(a[i])); + testIt(a, r); + testConstIt(i, r); + i++; + } +} + diff --git a/js/src/jit-test/tests/wasm/simd/ad-hack.js b/js/src/jit-test/tests/wasm/simd/ad-hack.js new file mode 100644 index 0000000000..b64b11cf52 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ad-hack.js @@ -0,0 +1,1747 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Ad-hoc test cases used during development. Generally these are ordered from +// easier toward harder. +// +// The test cases here are usually those that require some special processing. +// Simple binary operators (v128 x v128 -> v128) and unary operators (v128 -> +// v128) are tested in ad-hack-simple-binops*.js and ad-hack-simple-unops.js. + +// Do not include this in the preamble, it must be loaded after lib/wasm.js +load(scriptdir + "ad-hack-preamble.js") + +// v128.store +// oob store +// v128.const + +for ( let offset of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) { + var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "f") (param $loc i32) + (v128.store offset=${offset} (local.get $loc) (v128.const i32x4 ${1+offset} 2 3 ${4+offset*2}))))`); + var mem8 = new Uint8Array(ins.exports.mem.buffer); + ins.exports.f(160); + assertSame(getUnaligned(mem8, 4, 160 + offset, 4), [1+offset, 2, 3, 4+offset*2]); + + // OOB write should trap + assertErrorMessage(() => ins.exports.f(65536-15), + WebAssembly.RuntimeError, + /index out of bounds/) + + // Ensure that OOB writes don't write anything: moved to simd-partial-oob-store.js +} + +// v128.load +// oob load +// v128.store +// temp register + +for ( let offset of [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) { + var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "copy") (param $dest i32) (param $src i32) + (v128.store (local.get $dest) (v128.load offset=${offset} (local.get $src)))))`); + var mem32 = new Uint32Array(ins.exports.mem.buffer); + var mem8 = new Uint8Array(ins.exports.mem.buffer); + setUnaligned(mem8, 4, 4*4 + offset, [8+offset, 10, 12, 14+offset*2]); + ins.exports.copy(40*4, 4*4); + assertSame(get(mem32, 40, 4), [8+offset, 10, 12, 14+offset*2]); + assertErrorMessage(() => ins.exports.copy(40*4, 65536-15), + WebAssembly.RuntimeError, + /index out of bounds/); +} + +// call [with register params] +// parameters [in registers] +// return [with register values] +// locals +// +// local.get +// local.set +// v128.const +// v128.store + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func $g (param $param v128) (result v128) + (local $tmp v128) + (local.set $tmp (local.get $param)) + (local.get $tmp)) + (func (export "f") + (v128.store (i32.const 160) (call $g (v128.const i32x4 1 2 3 4)))))`); +var mem = new Uint32Array(ins.exports.mem.buffer); +ins.exports.f(); +assertSame(get(mem, 40, 4), [1, 2, 3, 4]); + +// Same test but with local.tee + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func $g (param $param v128) (result v128) + (local $tmp v128) + (local.tee $tmp (local.get $param))) + (func (export "f") + (v128.store (i32.const 160) (call $g (v128.const i32x4 1 2 3 4)))))`); +var mem = new Uint32Array(ins.exports.mem.buffer); +ins.exports.f(); +assertSame(get(mem, 40, 4), [1, 2, 3, 4]); + +// Locals that end up on the stack. Try to create unaligned placement (in the +// baseline compiler anyway) by inserting i32 locals before or after and +// inbetween the v128 ones and by having so many locals that we run out of +// registers. + +var nlocals = 64; +for ( let start of [0, 1]) { + let decl = ""; + let set = ""; + let sum = "(v128.const i32x4 0 0 0 0)"; + var res = [0,0,0,0]; + var locno = start; + for ( let i=start ; i < start + nlocals ; i++ ) { + decl += "(local v128) "; + set += `(local.set ${locno} (v128.const i32x4 ${i} ${i+1} ${i+2} ${i+3})) `; + sum = `(i32x4.add ${sum} (local.get ${locno}))`; + locno++; + res[0] += i; + res[1] += i+1; + res[2] += i+2; + res[3] += i+3; + if ((i % 5) == 3) { + decl += "(local i32) "; + locno++; + } + } + if (start) + decl = "(local i32) " + decl; + else + decl += "(local i32) "; + var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func $g (result v128) + ${decl} + ${set} + ${sum}) + (func (export "f") + (v128.store (i32.const 160) (call $g))))`); + + var mem = new Uint32Array(ins.exports.mem.buffer); + ins.exports.f(); + assertSame(get(mem, 40, 4), res); +} + +// Ditto parameters. This is like the case above but values are passed rather +// than set. +// +// call +// call_indirect + +var nlocals = 64; +for ( let start of [0, 1]) { + let decl = ""; + let pass = ""; + let sum = "(v128.const i32x4 0 0 0 0)"; + var res = [0,0,0,0]; + var locno = start; + for ( let i=start ; i < start + nlocals ; i++ ) { + decl += "(param v128) "; + pass += `(v128.const i32x4 ${i} ${i+1} ${i+2} ${i+3}) `; + sum = `(i32x4.add ${sum} (local.get ${locno}))`; + locno++; + res[0] += i; + res[1] += i+1; + res[2] += i+2; + res[3] += i+3; + if ((i % 5) == 3) { + decl += "(param i32) "; + pass += "(i32.const 0) "; + locno++; + } + } + if (start) { + decl = "(param i32) " + decl; + pass = "(i32.const 0) " + pass; + } else { + decl += "(param i32) "; + pass += "(i32.const 0) "; + } + var txt = ` + (module + (memory (export "mem") 1 1) + (type $t1 (func ${decl} (result v128))) + (table funcref (elem $h)) + (func $g ${decl} (result v128) + ${sum}) + (func (export "f1") + (v128.store (i32.const 160) (call $g ${pass}))) + (func $h ${decl} (result v128) + ${sum}) + (func (export "f2") + (v128.store (i32.const 512) (call_indirect (type $t1) ${pass} (i32.const 0)))))`; + var ins = wasmEvalText(txt); + + var mem = new Uint32Array(ins.exports.mem.buffer); + ins.exports.f1(); + assertSame(get(mem, 40, 4), res); + ins.exports.f2(); + assertSame(get(mem, 128, 4), res); +} + +// Widening integer dot product + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) + (i32x4.dot_i16x8_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`); + +var xs = [5, 1, -4, 2, 20, -15, 12, 3]; +var ys = [6, 0, -7, 3, 8, -1, -3, 7]; +var ans = [xs[0]*ys[0] + xs[1]*ys[1], + xs[2]*ys[2] + xs[3]*ys[3], + xs[4]*ys[4] + xs[5]*ys[5], + xs[6]*ys[6] + xs[7]*ys[7]]; + +var mem16 = new Int16Array(ins.exports.mem.buffer); +var mem32 = new Int32Array(ins.exports.mem.buffer); +set(mem16, 8, xs); +set(mem16, 16, ys); +ins.exports.run(); +var result = get(mem32, 0, 4); +assertSame(result, ans); + +// Splat, with and without constants (different code paths in ion) + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "splat_i8x16") (param $src i32) + (v128.store (i32.const 0) (i8x16.splat (local.get $src)))) + (func (export "csplat_i8x16") + (v128.store (i32.const 0) (i8x16.splat (i32.const 37)))) + (func (export "splat_i16x8") (param $src i32) + (v128.store (i32.const 0) (i16x8.splat (local.get $src)))) + (func (export "csplat_i16x8") + (v128.store (i32.const 0) (i16x8.splat (i32.const 1175)))) + (func (export "splat_i32x4") (param $src i32) + (v128.store (i32.const 0) (i32x4.splat (local.get $src)))) + (func (export "csplat_i32x4") + (v128.store (i32.const 0) (i32x4.splat (i32.const 127639)))) + (func (export "splat_i64x2") (param $src i64) + (v128.store (i32.const 0) (i64x2.splat (local.get $src)))) + (func (export "csplat_i64x2") + (v128.store (i32.const 0) (i64x2.splat (i64.const 0x1234_5678_4365)))) + (func (export "splat_f32x4") (param $src f32) + (v128.store (i32.const 0) (f32x4.splat (local.get $src)))) + (func (export "csplat_f32x4") + (v128.store (i32.const 0) (f32x4.splat (f32.const 9121.25)))) + (func (export "splat_f64x2") (param $src f64) + (v128.store (i32.const 0) (f64x2.splat (local.get $src)))) + (func (export "csplat_f64x2") + (v128.store (i32.const 0) (f64x2.splat (f64.const 26789.125)))) +)`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +ins.exports.splat_i8x16(3); +assertSame(get(mem8, 0, 16), iota(16).map(_=>3)); +ins.exports.csplat_i8x16(); +assertSame(get(mem8, 0, 16), iota(16).map(_=>37)); + +var mem16 = new Uint16Array(ins.exports.mem.buffer); +ins.exports.splat_i16x8(976); +assertSame(get(mem16, 0, 8), iota(8).map(_=>976)); +ins.exports.csplat_i16x8(); +assertSame(get(mem16, 0, 8), iota(8).map(_=>1175)); + +var mem32 = new Uint32Array(ins.exports.mem.buffer); +ins.exports.splat_i32x4(147812); +assertSame(get(mem32, 0, 4), [147812, 147812, 147812, 147812]); +ins.exports.csplat_i32x4(); +assertSame(get(mem32, 0, 4), [127639, 127639, 127639, 127639]); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); +ins.exports.splat_i64x2(147812n); +assertSame(get(mem64, 0, 2), [147812, 147812]); +ins.exports.csplat_i64x2(); +assertSame(get(mem64, 0, 2), [0x1234_5678_4365n, 0x1234_5678_4365n]); + +var memf32 = new Float32Array(ins.exports.mem.buffer); +ins.exports.splat_f32x4(147812.5); +assertSame(get(memf32, 0, 4), [147812.5, 147812.5, 147812.5, 147812.5]); +ins.exports.csplat_f32x4(); +assertSame(get(memf32, 0, 4), [9121.25, 9121.25, 9121.25, 9121.25]); + +var memf64 = new Float64Array(ins.exports.mem.buffer); +ins.exports.splat_f64x2(147812.5); +assertSame(get(memf64, 0, 2), [147812.5, 147812.5]); +ins.exports.csplat_f64x2(); +assertSame(get(memf64, 0, 2), [26789.125, 26789.125]); + +// AnyTrue. Ion constant folds, so test that too. + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "anytrue_i8x16") (result i32) + (v128.any_true (v128.load (i32.const 16)))) + (func (export "true_anytrue_i8x16") (result i32) + (v128.any_true (v128.const i8x16 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0))) + (func (export "false_anytrue_i8x16") (result i32) + (v128.any_true (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0))))`); + +var mem = new Uint8Array(ins.exports.mem.buffer); +set(mem, 16, iota(16).map((_) => 0)); +assertEq(ins.exports.anytrue_i8x16(), 0); + +for ( let dope of [1, 7, 32, 195 ] ) { + set(mem, 16, iota(16).map((x) => x == 7 ? dope : 0)); + assertEq(ins.exports.anytrue_i8x16(), 1); +} + +assertEq(ins.exports.true_anytrue_i8x16(), 1); +assertEq(ins.exports.false_anytrue_i8x16(), 0); + +// AllTrue. Ion constant folds, so test that too. + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "alltrue_i8x16") (result i32) + (i8x16.all_true (v128.load (i32.const 16)))) + (func (export "true_alltrue_i8x16") (result i32) + (i8x16.all_true (v128.const i8x16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16))) + (func (export "false_alltrue_i8x16") (result i32) + (i8x16.all_true (v128.const i8x16 1 2 3 4 5 6 0 8 9 10 11 12 13 14 15 16))) + (func (export "alltrue_i16x8") (result i32) + (i16x8.all_true (v128.load (i32.const 16)))) + (func (export "true_alltrue_i16x8") (result i32) + (i16x8.all_true (v128.const i16x8 1 2 3 4 5 6 7 8))) + (func (export "false_alltrue_i16x8") (result i32) + (i16x8.all_true (v128.const i16x8 1 2 3 4 5 0 7 8))) + (func (export "alltrue_i32x4") (result i32) + (i32x4.all_true (v128.load (i32.const 16)))) + (func (export "true_alltrue_i32x4") (result i32) + (i32x4.all_true (v128.const i32x4 1 2 3 4))) + (func (export "false_alltrue_i32x4") (result i32) + (i32x4.all_true (v128.const i32x4 1 2 3 0))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +var mem16 = new Uint16Array(ins.exports.mem.buffer); +var mem32 = new Uint32Array(ins.exports.mem.buffer); + +set(mem8, 16, iota(16).map((_) => 0)); +assertEq(ins.exports.alltrue_i8x16(), 0); +assertEq(ins.exports.alltrue_i16x8(), 0); +assertEq(ins.exports.alltrue_i32x4(), 0); + +set(mem8, 16, iota(16).map((_) => 1)); +assertEq(ins.exports.alltrue_i8x16(), 1); + +set(mem16, 8, iota(8).map((_) => 1)); +assertEq(ins.exports.alltrue_i16x8(), 1); + +set(mem32, 4, iota(4).map((_) => 1)); +assertEq(ins.exports.alltrue_i32x4(), 1); + +for ( let dope of [1, 7, 32, 195 ] ) { + set(mem8, 16, iota(16).map((x) => x == 7 ? 0 : dope)); + assertEq(ins.exports.alltrue_i8x16(), 0); + + set(mem16, 8, iota(8).map((x) => x == 4 ? 0 : dope)); + assertEq(ins.exports.alltrue_i16x8(), 0); + + set(mem32, 4, iota(4).map((x) => x == 2 ? 0 : dope)); + assertEq(ins.exports.alltrue_i32x4(), 0); +} + +assertEq(ins.exports.true_alltrue_i8x16(), 1); +assertEq(ins.exports.false_alltrue_i8x16(), 0); +assertEq(ins.exports.true_alltrue_i16x8(), 1); +assertEq(ins.exports.false_alltrue_i16x8(), 0); +assertEq(ins.exports.true_alltrue_i32x4(), 1); +assertEq(ins.exports.false_alltrue_i32x4(), 0); + +// Bitmask. Ion constant folds, so test that too. + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "bitmask_i8x16") (result i32) + (i8x16.bitmask (v128.load (i32.const 16)))) + (func (export "const_bitmask_i8x16") (result i32) + (i8x16.bitmask (v128.const i8x16 0x80 0x7f 0xff 0x33 0x42 0x98 0x01 0x00 + 0x31 0xcc 0xdd 0x12 0xf0 0x40 0x02 0xa0))) + (func (export "bitmask_i16x8") (result i32) + (i16x8.bitmask (v128.load (i32.const 16)))) + (func (export "const_bitmask_i16x8") (result i32) + (i16x8.bitmask (v128.const i16x8 0x7f80 0xff33 0x9842 0x0001 0xcc31 0x12dd 0x40f0 0xa002))) + (func (export "bitmask_i32x4") (result i32) + (i32x4.bitmask (v128.load (i32.const 16)))) + (func (export "const_bitmask_i32x4") (result i32) + (i32x4.bitmask (v128.const i32x4 0xff337f80 0x00019842 0xcc3112dd 0xa00240f0))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +var mem16 = new Uint16Array(ins.exports.mem.buffer); +var mem32 = new Uint32Array(ins.exports.mem.buffer); + +set(mem8, 16, iota(16).map((_) => 0)); +assertEq(ins.exports.bitmask_i8x16(), 0); +assertEq(ins.exports.bitmask_i16x8(), 0); +assertEq(ins.exports.bitmask_i32x4(), 0); + +set(mem8, 16, iota(16).map((_) => 0x80)); +assertEq(ins.exports.bitmask_i8x16(), 0xFFFF); + +set(mem8, 16, iota(16).map((_) => 0x7F)); +assertEq(ins.exports.bitmask_i8x16(), 0); + +set(mem8, 16, iota(16).map((i) => popcount(i) == 1 ? 0x80 : 0)); +assertEq(ins.exports.bitmask_i8x16(), (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8)); + +assertEq(ins.exports.const_bitmask_i8x16(), 0x9625); + +set(mem16, 8, iota(8).map((i) => 0x8000)) +assertEq(ins.exports.bitmask_i16x8(), 0xFF) + +set(mem16, 8, iota(8).map((i) => 0x7FFF)) +assertEq(ins.exports.bitmask_i16x8(), 0) + +set(mem16, 8, iota(8).map((i) => popcount(i) == 1 ? 0x8000 : 0)) +assertEq(ins.exports.bitmask_i16x8(), (1 << 1) | (1 << 2) | (1 << 4)); + +assertEq(ins.exports.const_bitmask_i16x8(), 0x96); + +set(mem32, 4, iota(4).map((_) => 0x80000000)) +assertEq(ins.exports.bitmask_i32x4(), 0xF); + +set(mem32, 4, iota(4).map((_) => 0x7FFFFFFF)) +assertEq(ins.exports.bitmask_i32x4(), 0); + +set(mem32, 4, iota(4).map((i) => popcount(i) == 1 ? 0x80000000 : 0)) +assertEq(ins.exports.bitmask_i32x4(), (1 << 1) | (1 << 2)); + +assertEq(ins.exports.const_bitmask_i32x4(), 0xd); + +// Shifts +// +// lhs is v128 in memory +// rhs is i32 (passed directly) +// result is v128 in memory + +function shr(count, width) { + return (v) => { + if (count == 0) + return v; + if (width == 64) { + if (v < 0) { + // This basically mirrors what the SIMD code does, so if there's + // a bug there then there's a bug here too. Seems OK though. + let s = 0x1_0000_0000_0000_0000n + BigInt(v); + let t = s / (1n << BigInt(count)); + let u = ((1n << BigInt(count)) - 1n) * (2n ** BigInt(64-count)); + let w = t + u; + return w - 0x1_0000_0000_0000_0000n; + } + return BigInt(v) / (1n << BigInt(count)); + } else { + let mask = (width == 32) ? -1 : ((1 << width) - 1); + return (sign_extend(v, width) >> count) & mask; + } + } +} + +function shru(count, width) { + if (width == 64) { + return (v) => { + if (count == 0) + return v; + if (v < 0) { + v = 0x1_0000_0000_0000_0000n + BigInt(v); + } + return BigInt(v) / (1n << BigInt(count)); + } + } else { + return (v) => { + let mask = (width == 32) ? -1 : ((1 << width) - 1); + return (v >>> count) & mask; + } + } +} + +var constantI8Shifts = ""; +for ( let i of iota(10).concat([-7]) ) { + constantI8Shifts += ` + (func (export "shl_i8x16_${i}") + (v128.store (i32.const 0) (i8x16.shl (v128.load (i32.const 16)) (i32.const ${i})))) + (func (export "shr_i8x16_${i}") + (v128.store (i32.const 0) (i8x16.shr_s (v128.load (i32.const 16)) (i32.const ${i})))) + (func (export "shr_u8x16_${i}") + (v128.store (i32.const 0) (i8x16.shr_u (v128.load (i32.const 16)) (i32.const ${i}))))`; +} + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "shl_i8x16") (param $count i32) + (v128.store (i32.const 0) (i8x16.shl (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shr_i8x16") (param $count i32) + (v128.store (i32.const 0) (i8x16.shr_s (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shr_u8x16") (param $count i32) + (v128.store (i32.const 0) (i8x16.shr_u (v128.load (i32.const 16)) (local.get $count)))) + ${constantI8Shifts} + (func (export "shl_i16x8") (param $count i32) + (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shl_i16x8_3") + (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 3)))) + (func (export "shl_i16x8_15") + (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 15)))) + (func (export "shl_i16x8_16") + (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const 16)))) + (func (export "shl_i16x8_-15") + (v128.store (i32.const 0) (i16x8.shl (v128.load (i32.const 16)) (i32.const -15)))) + (func (export "shr_i16x8") (param $count i32) + (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shr_i16x8_3") + (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 3)))) + (func (export "shr_i16x8_15") + (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 15)))) + (func (export "shr_i16x8_16") + (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const 16)))) + (func (export "shr_i16x8_-15") + (v128.store (i32.const 0) (i16x8.shr_s (v128.load (i32.const 16)) (i32.const -15)))) + (func (export "shr_u16x8") (param $count i32) + (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shr_u16x8_3") + (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 3)))) + (func (export "shr_u16x8_15") + (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 15)))) + (func (export "shr_u16x8_16") + (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const 16)))) + (func (export "shr_u16x8_-15") + (v128.store (i32.const 0) (i16x8.shr_u (v128.load (i32.const 16)) (i32.const -15)))) + (func (export "shl_i32x4") (param $count i32) + (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shl_i32x4_12") + (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 12)))) + (func (export "shl_i32x4_31") + (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 31)))) + (func (export "shl_i32x4_32") + (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const 32)))) + (func (export "shl_i32x4_-27") + (v128.store (i32.const 0) (i32x4.shl (v128.load (i32.const 16)) (i32.const -27)))) + (func (export "shr_i32x4") (param $count i32) + (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shr_i32x4_12") + (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 12)))) + (func (export "shr_i32x4_31") + (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 31)))) + (func (export "shr_i32x4_32") + (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const 32)))) + (func (export "shr_i32x4_-27") + (v128.store (i32.const 0) (i32x4.shr_s (v128.load (i32.const 16)) (i32.const -27)))) + (func (export "shr_u32x4") (param $count i32) + (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shr_u32x4_12") + (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 12)))) + (func (export "shr_u32x4_31") + (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 31)))) + (func (export "shr_u32x4_32") + (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const 32)))) + (func (export "shr_u32x4_-27") + (v128.store (i32.const 0) (i32x4.shr_u (v128.load (i32.const 16)) (i32.const -27)))) + (func (export "shl_i64x2") (param $count i32) + (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shl_i64x2_27") + (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 27)))) + (func (export "shl_i64x2_63") + (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 63)))) + (func (export "shl_i64x2_64") + (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const 64)))) + (func (export "shl_i64x2_-231") + (v128.store (i32.const 0) (i64x2.shl (v128.load (i32.const 16)) (i32.const -231)))) + (func (export "shr_i64x2") (param $count i32) + (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shr_i64x2_27") + (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 27)))) + (func (export "shr_i64x2_45") + (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 45)))) + (func (export "shr_i64x2_63") + (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 63)))) + (func (export "shr_i64x2_64") + (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const 64)))) + (func (export "shr_i64x2_-231") + (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const -231)))) + (func (export "shr_i64x2_-1") + (v128.store (i32.const 0) (i64x2.shr_s (v128.load (i32.const 16)) (i32.const -1)))) + (func (export "shr_u64x2") (param $count i32) + (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (local.get $count)))) + (func (export "shr_u64x2_27") + (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 27)))) + (func (export "shr_u64x2_63") + (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 63)))) + (func (export "shr_u64x2_64") + (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const 64)))) + (func (export "shr_u64x2_-231") + (v128.store (i32.const 0) (i64x2.shr_u (v128.load (i32.const 16)) (i32.const -231)))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +var as = [1, 2, 4, 8, 16, 32, 64, 128, 129, 130, 132, 136, 144, 160, 192, 255]; + +set(mem8, 16, as); + +for (let [meth,op] of [["shl_i8x16",shl], ["shr_i8x16",shr], ["shr_u8x16",shru]]) { + for ( let i=0 ; i < 8 ; i++ ) { + ins.exports[meth](i); + assertSame(get(mem8, 0, 16), as.map(op(i, 8))) + ins.exports[meth + "_" + i](); + assertSame(get(mem8, 0, 16), as.map(op(i, 8))) + } + + ins.exports[meth](1); + let a = get(mem8, 0, 16); + ins.exports[meth](9); + let b = get(mem8, 0, 16); + assertSame(a, b); + ins.exports[meth](-7); + let c = get(mem8, 0, 16); + assertSame(a, c); + + ins.exports[meth + "_1"](); + let x = get(mem8, 0, 16); + ins.exports[meth + "_9"](); + let y = get(mem8, 0, 16); + ins.exports[meth + "_-7"](); + let z = get(mem8, 0, 16); + assertSame(x, y); + assertSame(x, z); +} + +var mem16 = new Uint16Array(ins.exports.mem.buffer); +var as = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]; +set(mem16, 8, as) + +ins.exports.shl_i16x8(2); +var res = get(mem16, 0, 8); +assertSame(res, as.map(shl(2, 16))) + +ins.exports.shl_i16x8(18); // Masked count +assertSame(get(mem16, 0, 8), res); + +ins.exports.shl_i16x8(-14); // Masked count +assertSame(get(mem16, 0, 8), res); + +for ( let shift of [3, 15, 16, -15] ) { + ins.exports["shl_i16x8_" + shift](); + assertSame(get(mem16, 0, 8), as.map(shl(shift & 15, 16))) +} + +ins.exports.shr_i16x8(1); +var res = get(mem16, 0, 8); +assertSame(res, as.map(shr(1, 16))) + +ins.exports.shr_i16x8(17); // Masked count +assertSame(get(mem16, 0, 8), res); + +ins.exports.shr_i16x8(-15); // Masked count +assertSame(get(mem16, 0, 8), res); + +for ( let shift of [3, 15, 16, -15] ) { + ins.exports["shr_i16x8_" + shift](); + assertSame(get(mem16, 0, 8), as.map(shr(shift & 15, 16))) +} + +ins.exports.shr_u16x8(1); +var res = get(mem16, 0, 8); +assertSame(res, as.map(shru(1, 16))) + +ins.exports.shr_u16x8(17); // Masked count +assertSame(get(mem16, 0, 8), res); + +ins.exports.shr_u16x8(-15); // Masked count +assertSame(get(mem16, 0, 8), res); + +for ( let shift of [3, 15, 16, -15] ) { + ins.exports["shr_u16x8_" + shift](); + assertSame(get(mem16, 0, 8), as.map(shru(shift & 15, 16))) +} + +var mem32 = new Uint32Array(ins.exports.mem.buffer); +var as = [5152, 6768, 7074, 800811]; + +set(mem32, 4, as) +ins.exports.shl_i32x4(2); +var res = get(mem32, 0, 4); +assertSame(res, as.map(shl(2, 32))) + +ins.exports.shl_i32x4(34); // Masked count +assertSame(get(mem32, 0, 4), res); + +ins.exports.shl_i32x4(-30); // Masked count +assertSame(get(mem32, 0, 4), res); + +for ( let shift of [12, 31, 32, -27] ) { + ins.exports["shl_i32x4_" + shift](); + assertSame(get(mem32, 0, 4), as.map(shl(shift & 31, 32)).map(x => x>>>0)) +} + +ins.exports.shr_i32x4(1); +var res = get(mem32, 0, 4); +assertSame(res, as.map(shr(1, 32))) + +ins.exports.shr_i32x4(33); // Masked count +assertSame(get(mem32, 0, 4), res); + +ins.exports.shr_i32x4(-31); // Masked count +assertSame(get(mem32, 0, 4), res); + +for ( let shift of [12, 31, 32, -27] ) { + ins.exports["shr_i32x4_" + shift](); + assertSame(get(mem32, 0, 4), as.map(shr(shift & 31, 32))) +} + +ins.exports.shr_u32x4(1); +var res = get(mem32, 0, 4); +assertSame(res, as.map(shru(1, 32))) + +ins.exports.shr_u32x4(33); // Masked count +assertSame(get(mem32, 0, 4), res); + +ins.exports.shr_u32x4(-31); // Masked count +assertSame(get(mem32, 0, 4), res); + +for ( let shift of [12, 31, 32, -27] ) { + ins.exports["shr_u32x4_" + shift](); + assertSame(get(mem32, 0, 4), as.map(shru(shift & 31, 32))) +} + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); +var as = [50515253, -616263]; + +set(mem64, 2, as) +ins.exports.shl_i64x2(2); +var res = get(mem64, 0, 2); +assertSame(res, as.map(shl(2, 64))) + +ins.exports.shl_i64x2(66); // Masked count +assertSame(get(mem64, 0, 2), res); + +ins.exports.shl_i64x2(-62); // Masked count +assertSame(get(mem64, 0, 2), res); + +for ( let shift of [27, 63, 64, -231] ) { + ins.exports["shl_i64x2_" + shift](); + assertSame(get(mem64, 0, 2), as.map(shl(shift & 63, 64))) +} + +ins.exports.shr_u64x2(1); +var res = get(mem64, 0, 2); +assertSame(res, as.map(shru(1, 64))) + +ins.exports.shr_u64x2(65); // Masked count +assertSame(get(mem64, 0, 2), res); + +ins.exports.shr_u64x2(-63); // Masked count +assertSame(get(mem64, 0, 2), res); + +for ( let shift of [27, 63, 64, -231] ) { + ins.exports["shr_u64x2_" + shift](); + assertSame(get(mem64, 0, 2), as.map(shru(shift & 63, 64))) +} + +ins.exports.shr_i64x2(2); +var res = get(mem64, 0, 2); +assertSame(res, as.map(shr(2, 64))) + +ins.exports.shr_i64x2(66); // Masked count +assertSame(get(mem64, 0, 2), res); + +ins.exports.shr_i64x2(-62); // Masked count +assertSame(get(mem64, 0, 2), res); + +// The ion code generator has multiple paths here, for < 32 and >= 32 +for ( let shift of [27, 45, 63, 64, -1, -231] ) { + ins.exports["shr_i64x2_" + shift](); + assertSame(get(mem64, 0, 2), as.map(shr(shift & 63, 64))) +} + +// Narrow + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "narrow_i16x8_s") + (v128.store (i32.const 0) (i8x16.narrow_i16x8_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func (export "narrow_i16x8_u") + (v128.store (i32.const 0) (i8x16.narrow_i16x8_u (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func (export "narrow_i32x4_s") + (v128.store (i32.const 0) (i16x8.narrow_i32x4_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func (export "narrow_i32x4_u") + (v128.store (i32.const 0) (i16x8.narrow_i32x4_u (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`); + +var mem8 = new Int8Array(ins.exports.mem.buffer); +var mem8u = new Uint8Array(ins.exports.mem.buffer); +var mem16 = new Int16Array(ins.exports.mem.buffer); +var mem16u = new Uint16Array(ins.exports.mem.buffer); +var mem32 = new Int32Array(ins.exports.mem.buffer); + +var as = [1, 267, 3987, 14523, 32768, 3, 312, 4876].map((x) => sign_extend(x, 16)); +var bs = [2, 312, 4876, 15987, 33777, 1, 267, 3987].map((x) => sign_extend(x, 16)); + +set(mem16, 8, as); +set(mem16, 16, bs); + +ins.exports.narrow_i16x8_s(); +var cs = as.concat(...bs).map((x) => signed_saturate(x, 8)); +assertSame(get(mem8, 0, 16), cs); + +ins.exports.narrow_i16x8_u(); +var cs = as.concat(...bs).map((x) => unsigned_saturate(x, 8)); +assertSame(get(mem8u, 0, 16), cs); + +var xs = [1, 3987, 14523, 32768].map((x) => x << 16).map((x) => sign_extend(x, 32)); +var ys = [2, 4876, 15987, 33777].map((x) => x << 16).map((x) => sign_extend(x, 32)); + +set(mem32, 4, xs); +set(mem32, 8, ys); + +ins.exports.narrow_i32x4_s(); +var cs = xs.concat(...ys).map((x) => signed_saturate(x, 16)); +assertSame(get(mem16, 0, 8), cs); + +ins.exports.narrow_i32x4_u(); +var cs = xs.concat(...ys).map((x) => unsigned_saturate(x, 16)); +assertSame(get(mem16u, 0, 8), cs); + +// Extend low/high + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "extend_low_i8x16_s") + (v128.store (i32.const 0) (i16x8.extend_low_i8x16_s (v128.load (i32.const 16))))) + (func (export "extend_high_i8x16_s") + (v128.store (i32.const 0) (i16x8.extend_high_i8x16_s (v128.load (i32.const 16))))) + (func (export "extend_low_i8x16_u") + (v128.store (i32.const 0) (i16x8.extend_low_i8x16_u (v128.load (i32.const 16))))) + (func (export "extend_high_i8x16_u") + (v128.store (i32.const 0) (i16x8.extend_high_i8x16_u (v128.load (i32.const 16))))) + (func (export "extend_low_i16x8_s") + (v128.store (i32.const 0) (i32x4.extend_low_i16x8_s (v128.load (i32.const 16))))) + (func (export "extend_high_i16x8_s") + (v128.store (i32.const 0) (i32x4.extend_high_i16x8_s (v128.load (i32.const 16))))) + (func (export "extend_low_i16x8_u") + (v128.store (i32.const 0) (i32x4.extend_low_i16x8_u (v128.load (i32.const 16))))) + (func (export "extend_high_i16x8_u") + (v128.store (i32.const 0) (i32x4.extend_high_i16x8_u (v128.load (i32.const 16))))))`); + +var mem16 = new Int16Array(ins.exports.mem.buffer); +var mem16u = new Uint16Array(ins.exports.mem.buffer); +var mem8 = new Int8Array(ins.exports.mem.buffer); +var as = [0, 1, 192, 3, 205, 5, 6, 133, 8, 9, 129, 11, 201, 13, 14, 255]; + +set(mem8, 16, as); + +ins.exports.extend_low_i8x16_s(); +assertSame(get(mem16, 0, 8), iota(8).map((n) => sign_extend(as[n], 8))); + +ins.exports.extend_high_i8x16_s(); +assertSame(get(mem16, 0, 8), iota(8).map((n) => sign_extend(as[n+8], 8))); + +ins.exports.extend_low_i8x16_u(); +assertSame(get(mem16u, 0, 8), iota(8).map((n) => zero_extend(as[n], 8))); + +ins.exports.extend_high_i8x16_u(); +assertSame(get(mem16u, 0, 8), iota(8).map((n) => zero_extend(as[n+8], 8))); + +var mem32 = new Int32Array(ins.exports.mem.buffer); +var mem32u = new Uint32Array(ins.exports.mem.buffer); + +var as = [0, 1, 192, 3, 205, 5, 6, 133].map((x) => x << 8); + +set(mem16, 8, as); + +ins.exports.extend_low_i16x8_s(); +assertSame(get(mem32, 0, 4), iota(4).map((n) => sign_extend(as[n], 16))); + +ins.exports.extend_high_i16x8_s(); +assertSame(get(mem32, 0, 4), iota(4).map((n) => sign_extend(as[n+4], 16))); + +ins.exports.extend_low_i16x8_u(); +assertSame(get(mem32u, 0, 4), iota(4).map((n) => zero_extend(as[n], 16))); + +ins.exports.extend_high_i16x8_u(); +assertSame(get(mem32u, 0, 4), iota(4).map((n) => zero_extend(as[n+4], 16))); + + +// Extract lane. Ion constant folds, so test that too. +// +// operand is v128 in memory (or constant) +// lane index is immediate so we're testing something randomish but not zero +// result is scalar (returned directly) + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "extract_i8x16_9") (result i32) + (i8x16.extract_lane_s 9 (v128.load (i32.const 16)))) + (func (export "const_extract_i8x16_9") (result i32) + (i8x16.extract_lane_s 9 (v128.const i8x16 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16))) + (func (export "extract_u8x16_6") (result i32) + (i8x16.extract_lane_u 6 (v128.load (i32.const 16)))) + (func (export "const_extract_u8x16_9") (result i32) + (i8x16.extract_lane_u 9 (v128.const i8x16 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16))) + (func (export "extract_i16x8_5") (result i32) + (i16x8.extract_lane_s 5 (v128.load (i32.const 16)))) + (func (export "const_extract_i16x8_5") (result i32) + (i16x8.extract_lane_s 5 (v128.const i16x8 -1 -2 -3 -4 -5 -6 -7 -8))) + (func (export "extract_u16x8_3") (result i32) + (i16x8.extract_lane_u 3 (v128.load (i32.const 16)))) + (func (export "const_extract_u16x8_3") (result i32) + (i16x8.extract_lane_u 3 (v128.const i16x8 -1 -2 -3 -4 -5 -6 -7 -8))) + (func (export "extract_i32x4_2") (result i32) + (i32x4.extract_lane 2 (v128.load (i32.const 16)))) + (func (export "const_extract_i32x4_2") (result i32) + (i32x4.extract_lane 2 (v128.const i32x4 -1 -2 -3 -4))) + (func (export "extract_i64x2_1") (result i64) + (i64x2.extract_lane 1 (v128.load (i32.const 16)))) + (func (export "const_extract_i64x2_1") (result i64) + (i64x2.extract_lane 1 (v128.const i64x2 -1 -2))) + (func (export "extract_f32x4_2") (result f32) + (f32x4.extract_lane 2 (v128.load (i32.const 16)))) + (func (export "const_extract_f32x4_2") (result f32) + (f32x4.extract_lane 2 (v128.const f32x4 -1 -2 -3 -4))) + (func (export "extract_f64x2_1") (result f64) + (f64x2.extract_lane 1 (v128.load (i32.const 16)))) + (func (export "const_extract_f64x2_1") (result f64) + (f64x2.extract_lane 1 (v128.const f64x2 -1 -2))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +var as = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; +var bs = as.map((x) => -x); + +set(mem8, 16, as) +assertEq(ins.exports.extract_i8x16_9(), as[9]); + +set(mem8, 16, bs) +assertEq(ins.exports.extract_u8x16_6(), 256 - as[6]); + +assertEq(ins.exports.const_extract_i8x16_9(), -10); +assertEq(ins.exports.const_extract_u8x16_9(), 256-10); + +var mem16 = new Uint16Array(ins.exports.mem.buffer); +var as = [1, 2, 3, 4, 5, 6, 7, 8]; +var bs = as.map((x) => -x); + +set(mem16, 8, as) +assertEq(ins.exports.extract_i16x8_5(), as[5]); + +set(mem16, 8, bs) +assertEq(ins.exports.extract_u16x8_3(), 65536 - as[3]); + +assertEq(ins.exports.const_extract_i16x8_5(), -6); +assertEq(ins.exports.const_extract_u16x8_3(), 65536-4); + +var mem32 = new Uint32Array(ins.exports.mem.buffer); +var as = [1, 2, 3, 4]; + +set(mem32, 4, as) +assertEq(ins.exports.extract_i32x4_2(), as[2]); + +assertEq(ins.exports.const_extract_i32x4_2(), -3); + +var mem32 = new Float32Array(ins.exports.mem.buffer); +var as = [1.5, 2.5, 3.5, 4.5]; + +set(mem32, 4, as) +assertEq(ins.exports.extract_f32x4_2(), as[2]); + +assertEq(ins.exports.const_extract_f32x4_2(), -3); + +var mem64 = new Float64Array(ins.exports.mem.buffer); +var as = [1.5, 2.5]; + +set(mem64, 2, as) +assertEq(ins.exports.extract_f64x2_1(), as[1]); + +assertEq(ins.exports.const_extract_f64x2_1(), -2); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); +var as = [12345, 67890]; + +set(mem64, 2, as) +assertSame(ins.exports.extract_i64x2_1(), as[1]); + +assertEq(ins.exports.const_extract_i64x2_1(), -2n); + +// Replace lane +// +// operand 1 is v128 in memory +// operand 2 is immediate scalar +// lane index is immediate so we're testing something randomish but not zero +// (note though that fp operations have special cases for zero) +// result is v128 in memory + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "replace_i8x16_9") (param $value i32) + (v128.store (i32.const 0) + (i8x16.replace_lane 9 (v128.load (i32.const 16)) (local.get $value)))) + (func (export "replace_i16x8_5") (param $value i32) + (v128.store (i32.const 0) + (i16x8.replace_lane 5 (v128.load (i32.const 16)) (local.get $value)))) + (func (export "replace_i32x4_3") (param $value i32) + (v128.store (i32.const 0) + (i32x4.replace_lane 3 (v128.load (i32.const 16)) (local.get $value)))) + (func (export "replace_i64x2_1") (param $value i64) + (v128.store (i32.const 0) + (i64x2.replace_lane 1 (v128.load (i32.const 16)) (local.get $value)))) + (func (export "replace_f32x4_0") (param $value f32) + (v128.store (i32.const 0) + (f32x4.replace_lane 0 (v128.load (i32.const 16)) (local.get $value)))) + (func (export "replace_f32x4_3") (param $value f32) + (v128.store (i32.const 0) + (f32x4.replace_lane 3 (v128.load (i32.const 16)) (local.get $value)))) + (func (export "replace_f64x2_0") (param $value f64) + (v128.store (i32.const 0) + (f64x2.replace_lane 0 (v128.load (i32.const 16)) (local.get $value)))) + (func (export "replace_f64x2_1") (param $value f64) + (v128.store (i32.const 0) + (f64x2.replace_lane 1 (v128.load (i32.const 16)) (local.get $value)))))`); + + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +var as = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + +set(mem8, 16, as) +ins.exports.replace_i8x16_9(42); +assertSame(get(mem8, 0, 16), upd(as, 9, 42)); + +var mem16 = new Uint16Array(ins.exports.mem.buffer); +var as = [1, 2, 3, 4, 5, 6, 7, 8]; + +set(mem16, 8, as) +ins.exports.replace_i16x8_5(42); +assertSame(get(mem16, 0, 8), upd(as, 5, 42)); + +var mem32 = new Uint32Array(ins.exports.mem.buffer); +var as = [1, 2, 3, 4]; + +set(mem32, 4, as) +ins.exports.replace_i32x4_3(42); +assertSame(get(mem32, 0, 4), upd(as, 3, 42)); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); +var as = [1, 2]; + +set(mem64, 2, as) +ins.exports.replace_i64x2_1(42n); +assertSame(get(mem64, 0, 2), upd(as, 1, 42)); + +var mem32 = new Float32Array(ins.exports.mem.buffer); +var as = [1.5, 2.5, 3.5, 4.5]; + +set(mem32, 4, as) +ins.exports.replace_f32x4_0(42.5); +assertSame(get(mem32, 0, 4), upd(as, 0, 42.5)); + +set(mem32, 4, as) +ins.exports.replace_f32x4_3(42.5); +assertSame(get(mem32, 0, 4), upd(as, 3, 42.5)); + +var mem64 = new Float64Array(ins.exports.mem.buffer); +var as = [1.5, 2.5]; + +set(mem64, 2, as) +ins.exports.replace_f64x2_0(42.5); +assertSame(get(mem64, 0, 2), upd(as, 0, 42.5)); + +set(mem64, 2, as) +ins.exports.replace_f64x2_1(42.5); +assertSame(get(mem64, 0, 2), upd(as, 1, 42.5)); + +// Load and splat +// +// Operand is memory address of scalar +// Result is v128 in memory + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "load_splat_v8x16") (param $addr i32) + (v128.store (i32.const 0) (v128.load8_splat (local.get $addr)))) + (func (export "load_splat_v16x8") (param $addr i32) + (v128.store (i32.const 0) (v128.load16_splat (local.get $addr)))) + (func (export "load_splat_v32x4") (param $addr i32) + (v128.store (i32.const 0) (v128.load32_splat (local.get $addr)))) + (func (export "load_splat_v64x2") (param $addr i32) + (v128.store (i32.const 0) (v128.load64_splat (local.get $addr)))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +mem8[37] = 42; +ins.exports.load_splat_v8x16(37); +assertSame(get(mem8, 0, 16), [42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42]); + +var mem16 = new Uint16Array(ins.exports.mem.buffer); +mem16[37] = 69; +ins.exports.load_splat_v16x8(37*2); +assertSame(get(mem16, 0, 8), [69, 69, 69, 69, 69, 69, 69, 69]); + +var mem32 = new Int32Array(ins.exports.mem.buffer); +mem32[37] = 83; +ins.exports.load_splat_v32x4(37*4); +assertSame(get(mem32, 0, 4), [83, 83, 83, 83]); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); +mem64[37] = 83n; +ins.exports.load_splat_v64x2(37*8); +assertSame(get(mem64, 0, 2), [83, 83]); + +// Load and zero +// +// Operand is memory address of scalar +// Result is v128 in memory + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "load32_zero") (param $addr i32) + (v128.store (i32.const 0) (v128.load32_zero (local.get $addr)))) + (func (export "load64_zero") (param $addr i32) + (v128.store (i32.const 0) (v128.load64_zero (local.get $addr)))))`); + +var mem32 = new Int32Array(ins.exports.mem.buffer); +mem32[37] = 0x12345678; +mem32[38] = 0xffffffff; +mem32[39] = 0xfffffffe; +mem32[40] = 0xfffffffd; +ins.exports.load32_zero(37*4); +assertSame(get(mem32, 0, 4), [0x12345678, 0, 0, 0]); + +var mem64 = new BigInt64Array(ins.exports.mem.buffer); +mem64[37] = 0x12345678abcdef01n; +mem64[38] = 0xffffffffffffffffn; +ins.exports.load64_zero(37*8); +assertSame(get(mem64, 0, 2), [0x12345678abcdef01n, 0n]); + +// Load and extend +// +// Operand is memory address of 64-bit scalar representing 8, 4, or 2 values +// Result is v128 in memory + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "load8x8_s") (param $addr i32) + (v128.store (i32.const 0) (v128.load8x8_s (local.get $addr)))) + (func (export "load8x8_u") (param $addr i32) + (v128.store (i32.const 0) (v128.load8x8_u (local.get $addr)))) + (func (export "load16x4_s") (param $addr i32) + (v128.store (i32.const 0) (v128.load16x4_s (local.get $addr)))) + (func (export "load16x4_u") (param $addr i32) + (v128.store (i32.const 0) (v128.load16x4_u (local.get $addr)))) + (func (export "load32x2_s") (param $addr i32) + (v128.store (i32.const 0) (v128.load32x2_s (local.get $addr)))) + (func (export "load32x2_u") (param $addr i32) + (v128.store (i32.const 0) (v128.load32x2_u (local.get $addr)))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +var mem16s = new Int16Array(ins.exports.mem.buffer); +var mem16u = new Uint16Array(ins.exports.mem.buffer); +var mem32s = new Int32Array(ins.exports.mem.buffer); +var mem32u = new Uint32Array(ins.exports.mem.buffer); +var mem64s = new BigInt64Array(ins.exports.mem.buffer); +var mem64u = new BigUint64Array(ins.exports.mem.buffer); +var xs = [42, 129, 2, 212, 44, 27, 12, 199]; +set(mem8, 48, xs); + +ins.exports.load8x8_s(48); +assertSame(get(mem16s, 0, 8), xs.map((x) => sign_extend(x, 8))); + +ins.exports.load8x8_u(48); +assertSame(get(mem16u, 0, 8), xs.map((x) => zero_extend(x, 8))); + +var xs = [(42 << 8) | 129, (212 << 8) | 2, (44 << 8) | 27, (199 << 8) | 12]; +set(mem16u, 24, xs); + +ins.exports.load16x4_s(48); +assertSame(get(mem32s, 0, 4), xs.map((x) => sign_extend(x, 16))); + +ins.exports.load16x4_u(48); +assertSame(get(mem32u, 0, 4), xs.map((x) => zero_extend(x, 16))); + +var xs = [5, -8]; +set(mem32u, 12, xs); + +ins.exports.load32x2_s(48); +assertSame(get(mem64s, 0, 2), xs.map((x) => sign_extend(x, 32))); + +ins.exports.load32x2_u(48); +assertSame(get(mem64s, 0, 2), xs.map((x) => zero_extend(x, 32))); + +// Vector select +// +// Operands and results are all in memory + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "bitselect_v128") + (v128.store (i32.const 0) + (v128.bitselect (v128.load (i32.const 16)) + (v128.load (i32.const 32)) + (v128.load (i32.const 48))))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +set(mem8, 16, iota(16).map((_) => 0xAA)); +set(mem8, 32, iota(16).map((_) => 0x55)); + +set(mem8, 48, iota(16).map((_) => 0x99)); +ins.exports.bitselect_v128(); +assertSame(get(mem8, 0, 16), iota(16).map((_) => 0xCC)); + +set(mem8, 48, iota(16).map((_) => 0x77)); +ins.exports.bitselect_v128(); +assertSame(get(mem8, 0, 16), iota(16).map((_) => 0x22)); + +// Vector shuffle +// +// Operands and results are all in memory + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + ;; the result interleaves the low eight bytes of the inputs + (func (export "shuffle1") + (v128.store (i32.const 0) + (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23 + (v128.load (i32.const 16)) + (v128.load (i32.const 32))))) + ;; ditto the high eight bytes + (func (export "shuffle2") + (v128.store (i32.const 0) + (i8x16.shuffle 8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31 + (v128.load (i32.const 16)) + (v128.load (i32.const 32))))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); +var xs = iota(16).map((n) => 0xA0 + n); +var ys = iota(16).map((n) => 0x50 + n); +set(mem8, 16, xs); +set(mem8, 32, ys); + +ins.exports.shuffle1(); +assertSame(get(mem8, 0, 16), iota(16).map((x) => ((x & 1) ? ys : xs)[x >>> 1])) + +ins.exports.shuffle2(); +assertSame(get(mem8, 0, 16), iota(32).map((x) => ((x & 1) ? ys : xs)[x >>> 1]).slice(16)); + +// Vector swizzle (variable permute). +// +// Case 1: Operands and results are all in memory + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "swizzle") + (v128.store (i32.const 0) + (i8x16.swizzle (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`); + +var mem8 = new Uint8Array(ins.exports.mem.buffer); + +var xs = [100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115]; +set(mem8, 16, xs); + +set(mem8, 32, [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]); +ins.exports.swizzle(); +assertSame(get(mem8, 0, 16), [101,100,103,102,105,104,107,106,109,108,111,110,113,112,115,114]); + +set(mem8, 32, [9,8,11,10,13,12,16,14,1,0,3,2,5,192,7,6]); +ins.exports.swizzle(); +assertSame(get(mem8, 0, 16), [109,108,111,110,113,112,0,114,101,100,103,102,105,0,107,106]); + +// Case 2: The mask operand is a constant; the swizzle gets optimized into a +// shuffle (also see ion-analysis.js). + +for ( let [mask, expected] of [[[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14], + [101,100,103,102,105,104,107,106,109,108,111,110,113,112,115,114]], + [[9,8,11,10,13,12,16,14,1,0,3,2,5,192,7,6], + [109,108,111,110,113,112,0,114,101,100,103,102,105,0,107,106]]] ) { + + let ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "swizzle") + (v128.store (i32.const 0) + (i8x16.swizzle (v128.load (i32.const 16)) (v128.const i8x16 ${mask.join(' ')}))))) +`); + + let mem8 = new Uint8Array(ins.exports.mem.buffer); + set(mem8, 16, [100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115]); + ins.exports.swizzle(); + assertSame(get(mem8, 0, 16), expected); +} + +// Convert integer to floating point + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "convert_s") + (v128.store (i32.const 0) + (f32x4.convert_i32x4_s (v128.load (i32.const 16))))) + (func (export "convert_u") + (v128.store (i32.const 0) + (f32x4.convert_i32x4_u (v128.load (i32.const 16))))))`); + +var mem32s = new Int32Array(ins.exports.mem.buffer); +var mem32f = new Float32Array(ins.exports.mem.buffer); +var xs = [1, -9, 77987, -34512]; + +set(mem32s, 4, xs); +ins.exports.convert_s(); +assertSame(get(mem32f, 0, 4), xs); + +var mem32u = new Uint32Array(ins.exports.mem.buffer); +var ys = xs.map((x) => x>>>0); + +set(mem32u, 4, ys); +ins.exports.convert_u(); +assertSame(get(mem32f, 0, 4), ys.map(Math.fround)); + +// Convert floating point to integer with saturating truncation + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "trunc_sat_s") + (v128.store (i32.const 0) + (i32x4.trunc_sat_f32x4_s (v128.load (i32.const 16))))) + (func (export "trunc_sat_u") + (v128.store (i32.const 0) + (i32x4.trunc_sat_f32x4_u (v128.load (i32.const 16))))))`); + +var mem32s = new Int32Array(ins.exports.mem.buffer); +var mem32u = new Uint32Array(ins.exports.mem.buffer); +var mem32f = new Float32Array(ins.exports.mem.buffer); +var xs = [1.5, -9.5, 7.5e12, -8e13]; + +set(mem32f, 4, xs); +ins.exports.trunc_sat_s(); +assertSame(get(mem32s, 0, 4), [1, -9, 0x7FFFFFFF, -0x80000000]); + +var xs = [1.5, -9.5, 7.5e12, 812]; +set(mem32f, 4, xs); +ins.exports.trunc_sat_u(); +assertSame(get(mem32u, 0, 4), [1, 0, 0xFFFFFFFF, 812]); + +var xs = [0, -0, 0x80860000, 0x100000000]; +set(mem32f, 4, xs); +ins.exports.trunc_sat_u(); +assertSame(get(mem32u, 0, 4), [0, 0, 0x80860000, 0xFFFFFFFF]); + +// Loops and blocks. This should at least test "sync" in the baseline compiler. + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func $f (param $count i32) (param $v v128) (result v128) + (local $tmp v128) + (block $B1 + (loop $L1 + (br_if $B1 (i32.eqz (local.get $count))) + (local.set $tmp (i32x4.add (local.get $tmp) (local.get $v))) + (local.set $count (i32.sub (local.get $count) (i32.const 1))) + (br $L1))) + (local.get $tmp)) + (func (export "run") (param $count i32) + (v128.store (i32.const 0) + (call $f (local.get $count) (v128.load (i32.const 16))))))`); + +var mem32 = new Int32Array(ins.exports.mem.buffer); +set(mem32, 4, [1,2,3,4]); +ins.exports.run(7); +assertSame(get(mem32, 0, 4), [7,14,21,28]); + +// Lots of parameters, this should trigger stack parameter passing +// +// 10 parameters in memory, we load them and pass them and operate on them. + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func $f (param $v0 v128) (param $v1 v128) (param $v2 v128) (param $v3 v128) (param $v4 v128) + (param $v5 v128) (param $v6 v128) (param $v7 v128) (param $v8 v128) (param $v9 v128) + (result v128) + (i32x4.add (local.get $v0) + (i32x4.add (local.get $v1) + (i32x4.add (local.get $v2) + (i32x4.add (local.get $v3) + (i32x4.add (local.get $v4) + (i32x4.add (local.get $v5) + (i32x4.add (local.get $v6) + (i32x4.add (local.get $v7) + (i32x4.add (local.get $v8) (local.get $v9))))))))))) + (func (export "run") + (v128.store (i32.const 0) + (call $f (v128.load (i32.const ${16*1})) + (v128.load (i32.const ${16*2})) + (v128.load (i32.const ${16*3})) + (v128.load (i32.const ${16*4})) + (v128.load (i32.const ${16*5})) + (v128.load (i32.const ${16*6})) + (v128.load (i32.const ${16*7})) + (v128.load (i32.const ${16*8})) + (v128.load (i32.const ${16*9})) + (v128.load (i32.const ${16*10}))))))`); + + +var mem32 = new Int32Array(ins.exports.mem.buffer); +var sum = [0, 0, 0, 0]; +for ( let i=1; i <= 10; i++ ) { + let v = [1,2,3,4].map((x) => x*i); + set(mem32, 4*i, v); + for ( let j=0; j < 4; j++ ) + sum[j] += v[j]; +} + +ins.exports.run(); + +assertSame(get(mem32, 0, 4), sum); + +// Globals. +// +// We have a number of different code paths and representations and +// need to test them all. +// +// Cases: +// - private global, mutable / immutable, initialized from constant or imported immutable global +// - exported global, mutable / immutable, initialized from constant or imported immutable global +// - imported global, mutable / immutable +// - imported global that's re-exported, mutable / immutable + +// Global used for initialization below. + +var init = (function () { + var ins = wasmEvalText(` + (module + (global (export "init") v128 (v128.const i32x4 9 8 7 6)))`); + return ins.exports; +})(); + +for ( let exportspec of ['', '(export "g")'] ) { + + // Private/exported immutable initialized from constant + + let ins1 = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (global $g ${exportspec} v128 (v128.const i32x4 9 8 7 6)) + (func (export "get") (param $dest i32) + (v128.store (local.get $dest) (global.get $g))))`); + + let mem1 = new Int32Array(ins1.exports.mem.buffer); + ins1.exports.get(0); + assertSame(get(mem1, 0, 4), [9, 8, 7, 6]); + + // Private/exported mutable initialized from constant + + let ins2 = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (global $g ${exportspec} (mut v128) (v128.const i32x4 9 8 7 6)) + (func (export "put") (param $val i32) + (global.set $g (i32x4.splat (local.get $val)))) + (func (export "get") (param $dest i32) + (v128.store (local.get $dest) (global.get $g))))`); + + let mem2 = new Int32Array(ins2.exports.mem.buffer); + ins2.exports.get(0); + assertSame(get(mem2, 0, 4), [9, 8, 7, 6]); + ins2.exports.put(37); + ins2.exports.get(0); + assertSame(get(mem2, 0, 4), [37, 37, 37, 37]); + + // Private/exported immutable initialized from imported immutable global + + let ins3 = wasmEvalText(` + (module + (global $init (import "m" "init") v128) + (memory (export "mem") 1 1) + (global $g ${exportspec} v128 (global.get $init)) + (func (export "get") (param $dest i32) + (v128.store (local.get $dest) (global.get $g))))`, + {m:init}); + + let mem3 = new Int32Array(ins3.exports.mem.buffer); + ins3.exports.get(0); + assertSame(get(mem3, 0, 4), [9, 8, 7, 6]); + + // Private/exported mutable initialized from imported immutable global + + let ins4 = wasmEvalText(` + (module + (global $init (import "m" "init") v128) + (memory (export "mem") 1 1) + (global $g ${exportspec} (mut v128) (global.get $init)) + (func (export "put") (param $val i32) + (global.set $g (i32x4.splat (local.get $val)))) + (func (export "get") (param $dest i32) + (v128.store (local.get $dest) (global.get $g))))`, + {m:init}); + + let mem4 = new Int32Array(ins4.exports.mem.buffer); + ins4.exports.get(0); + assertSame(get(mem4, 0, 4), [9, 8, 7, 6]); + ins4.exports.put(37); + ins4.exports.get(0); + assertSame(get(mem4, 0, 4), [37, 37, 37, 37]); + + // Imported private/re-exported immutable + + let ins5 = wasmEvalText(` + (module + (global $g ${exportspec} (import "m" "init") v128) + (memory (export "mem") 1 1) + (func (export "get") (param $dest i32) + (v128.store (local.get $dest) (global.get $g))))`, + {m:init}); + + let mem5 = new Int32Array(ins5.exports.mem.buffer); + ins5.exports.get(0); + assertSame(get(mem5, 0, 4), [9, 8, 7, 6]); + + // Imported private/re-exported mutable + + let mutg = (function () { + var ins = wasmEvalText(` + (module + (global (export "mutg") (mut v128) (v128.const i32x4 19 18 17 16)))`); + return ins.exports; + })(); + + let ins6 = wasmEvalText(` + (module + (global $g ${exportspec} (import "m" "mutg") (mut v128)) + (memory (export "mem") 1 1) + (func (export "put") (param $val i32) + (global.set $g (i32x4.splat (local.get $val)))) + (func (export "get") (param $dest i32) + (v128.store (local.get $dest) (global.get $g))))`, + {m:mutg}); + + let mem6 = new Int32Array(ins6.exports.mem.buffer); + ins6.exports.get(0); + assertSame(get(mem6, 0, 4), [19, 18, 17, 16]); + ins6.exports.put(37); + ins6.exports.get(0); + assertSame(get(mem6, 0, 4), [37, 37, 37, 37]); +} + +// Imports and exports that pass and return v128 + +var insworker = wasmEvalText(` + (module + (func (export "worker") (param v128) (result v128) + (i8x16.add (local.get 0) (v128.const i8x16 ${iota(16).join(' ')}))))`); + +var insrun = wasmEvalText(` + (module + (import "" "worker" (func $worker (param v128) (result v128))) + (memory (export "mem") 1 1) + (func (export "run") (param $srcloc i32) (param $destloc i32) + (v128.store (local.get $destloc) + (call $worker (v128.load (local.get $srcloc))))))`, + {"":insworker.exports}); + +var mem = new Uint8Array(insrun.exports.mem.buffer); +var xs = iota(16).map((x) => x+5); +set(mem, 0, xs); +insrun.exports.run(0, 16); +assertSame(get(mem, 16, 16), xs.map((x,i) => x+i)) + +// Make sure JS<->wasm call guards are sensible. + +// Calling from JS to export that accepts v128. +assertErrorMessage(() => insworker.exports.worker(), + TypeError, + /cannot pass.*v128.*to or from JS/); + +// Calling from wasm with v128 to import that comes from JS. The instantiation +// will succeed even if the param type of the import is v128 (see "create a host +// function" in the Wasm JSAPI spec), it is the act of invoking it that checks +// that verboten types are not used (see "run a host function", ibid.). +var badImporter = wasmEvalText(` + (module + (import "" "worker" (func $worker (param v128) (result v128))) + (func (export "run") + (drop (call $worker (v128.const i32x4 0 1 2 3)))))`, + {"":{worker: function(a) { return a; }}}); + +assertErrorMessage(() => badImporter.exports.run(), + TypeError, + /cannot pass.*v128.*to or from JS/); + +// Imports and exports that pass and return v128 as stack (not register) args. + +var exportWithStackArgs = wasmEvalText(` + (module + (func (export "worker") (param v128) (param v128) (param v128) (param v128) + (param v128) (param v128) (param v128) (param v128) + (param v128) (param v128) (param v128) (param v128) + (param v128) (param v128) + (result v128 v128) + (i8x16.add (local.get 3) (local.get 12)) + (local.get 7)))`); + +var importWithStackArgs = wasmEvalText(` + (module + (type $t1 (func (param v128) (param v128) (param v128) (param v128) + (param v128) (param v128) (param v128) (param v128) + (param v128) (param v128) (param v128) (param v128) + (param v128) (param v128) + (result v128 v128))) + (import "" "worker" (func $worker (type $t1))) + (memory (export "mem") 1 1) + (table funcref (elem $worker)) + (func (export "run") + (i32.const 16) + (call_indirect (type $t1) (v128.const i32x4 1 1 1 1) (v128.const i32x4 2 2 2 2) (v128.const i32x4 3 3 3 3) + (v128.const i32x4 4 4 4 4) (v128.const i32x4 5 5 5 5) (v128.const i32x4 6 6 6 6) + (v128.const i32x4 7 7 7 7) (v128.const i32x4 8 8 8 8) (v128.const i32x4 9 9 9 9) + (v128.const i32x4 10 10 10 10) (v128.const i32x4 11 11 11 11) (v128.const i32x4 12 12 12 12) + (v128.const i32x4 13 13 13 13) (v128.const i32x4 14 14 14 14) + (i32.const 0)) + drop + v128.store + (i32.const 0) + (call $worker (v128.const i32x4 1 1 1 1) (v128.const i32x4 2 2 2 2) (v128.const i32x4 3 3 3 3) + (v128.const i32x4 4 4 4 4) (v128.const i32x4 5 5 5 5) (v128.const i32x4 6 6 6 6) + (v128.const i32x4 7 7 7 7) (v128.const i32x4 8 8 8 8) (v128.const i32x4 9 9 9 9) + (v128.const i32x4 10 10 10 10) (v128.const i32x4 11 11 11 11) (v128.const i32x4 12 12 12 12) + (v128.const i32x4 13 13 13 13) (v128.const i32x4 14 14 14 14)) + drop + v128.store))`, + {"": exportWithStackArgs.exports}); + +var mem = new Int32Array(importWithStackArgs.exports.mem.buffer); +importWithStackArgs.exports.run(); +assertSame(get(mem, 0, 4), [17, 17, 17, 17]); +assertSame(get(mem, 4, 4), [17, 17, 17, 17]); + +// Imports and exports of v128 globals + +var insexporter = wasmEvalText(` + (module + (global (export "myglobal") (mut v128) (v128.const i8x16 ${iota(16).join(' ')})))`); + +var insimporter = wasmEvalText(` + (module + (import "m" "myglobal" (global $g (mut v128))) + (memory (export "mem") 1 1) + (func (export "run") (param $dest i32) + (v128.store (local.get $dest) (global.get $g))))`, + {m:insexporter.exports}); + +var mem = new Uint8Array(insimporter.exports.mem.buffer); +insimporter.exports.run(16); +assertSame(get(mem, 16, 16), iota(16)); + +// Guards on accessing v128 globals from JS + +assertErrorMessage(() => insexporter.exports.myglobal.value = 0, + TypeError, + /cannot pass.*v128.*to or from JS/); + +assertErrorMessage(function () { let v = insexporter.exports.myglobal.value }, + TypeError, + /cannot pass.*v128.*to or from JS/); + +// Multi-value cases + v128 parameters to if, block, loop + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func $mvreturn (result v128 v128 v128) + (v128.load (i32.const 16)) + (v128.load (i32.const 0)) + (v128.load (i32.const 32))) + (func (export "runreturn") + i32.const 48 + (call $mvreturn) + i32x4.sub ;; [-20, -20, -20, -20] + i32x4.sub ;; [31, 32, 33, 34] + v128.store) + (func (export "runif") (param $cond i32) + i32.const 48 + (v128.load (i32.const 0)) + (v128.load (i32.const 16)) + (if (param v128) (param v128) (result v128 v128) + (local.get $cond) + (then i32x4.add + (v128.load (i32.const 32))) + (else i32x4.sub + (v128.load (i32.const 0)))) + i32x4.add + v128.store) + (func (export "runblock") + i32.const 48 + (v128.load (i32.const 0)) + (v128.load (i32.const 16)) + (block (param v128 v128) (result v128 v128) + i32x4.add + (v128.load (i32.const 32))) + i32x4.add + v128.store) + (func (export "runloop") (param $count i32) + i32.const 48 + (v128.load (i32.const 0)) + (v128.load (i32.const 16)) + (block $B (param v128 v128) (result v128 v128) + (loop $L (param v128 v128) (result v128 v128) + i32x4.add + (v128.load (i32.const 32)) + (local.set $count (i32.sub (local.get $count) (i32.const 1))) + (br_if $B (i32.eqz (local.get $count))) + (br $L))) + i32x4.add + v128.store))`); + +var mem = new Int32Array(ins.exports.mem.buffer); +set(mem, 0, [1, 2, 3, 4]); +set(mem, 4, [11, 12, 13, 14]); +set(mem, 8, [21, 22, 23, 24]); + +// Multi-value returns + +ins.exports.runreturn(); +assertSame(get(mem, 12, 4), [31, 32, 33, 34]); + +// Multi-parameters to and multi-returns from "if" + +// This should be vector@0 + vector@16 + vector@32 +ins.exports.runif(1); +assertSame(get(mem, 12, 4), + [33, 36, 39, 42]); + +// This should be vector@0 - vector@16 + vector@0 +ins.exports.runif(0); +assertSame(get(mem, 12, 4), + [-9, -8, -7, -6]); + +// This should be vector@0 + vector@16 + vector@32 +ins.exports.runblock(); +assertSame(get(mem, 12, 4), + [33, 36, 39, 42]); + +// This should be vector@0 + vector@16 + N * vector@32 where +// N is the parameter to runloop. +ins.exports.runloop(3); +assertSame(get(mem, 12, 4), + [12+3*21, 14+3*22, 16+3*23, 18+3*24]); diff --git a/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js new file mode 100644 index 0000000000..af8269e190 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/avx2-x64-ion-codegen.js @@ -0,0 +1,584 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || !isAvxPresent(); include:codegen-x64-test.js + +// Test that there are no extraneous moves for various SIMD conversion +// operations. See README-codegen.md for general information about this type of +// test case. + +// Note, these tests test the beginning of the output but not the end. + +// Currently AVX2 exhibits a defect when function uses its first v128 arg and +// returns v128: the register allocator adds unneeded extra moves from xmm0, +// then into different temporary, and then the latter temporary is used as arg. +// In the tests below, to simplify things, don't use/ignore the first arg. +// v128 OP v128 -> v128 +// inputs: [[complete-opname, expected-pattern], ...] +function codegenTestX64_v128xv128_v128_avxhack(inputs, options = {}) { + for ( let [op, expected] of inputs ) { + codegenTestX64_adhoc(wrap(options, ` + (func (export "f") (param v128 v128 v128) (result v128) + (${op} (local.get 1) (local.get 2)))`), + 'f', + expected, + options); + } +} +// (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect) +// v128 OP const -> v128 +// inputs: [[complete-opname, const, expected-pattern], ...] +function codegenTestX64_v128xLITERAL_v128_avxhack(inputs, options = {}) { + for ( let [op, const_, expected] of inputs ) { + codegenTestX64_adhoc(wrap(options, ` + (func (export "f") (param v128 v128) (result v128) + (${op} (local.get 1) ${const_}))`), + 'f', + expected, + options); + } +} +// (see codegenTestX64_v128xv128_v128_avxhack comment about AVX defect) +// const OP v128 -> v128 +// inputs: [[complete-opname, const, expected-pattern], ...] +function codegenTestX64_LITERALxv128_v128_avxhack(inputs, options = {}) { + for ( let [op, const_, expected] of inputs ) { + codegenTestX64_adhoc(wrap(options, ` + (func (export "f") (param v128 v128) (result v128) + (${op} ${const_} (local.get 1)))`), + 'f', + expected, + options); + } +} + +// Utility function to test SIMD operations encoding, where the input argument +// has the specified type (T). +// inputs: [[type, complete-opname, expected-pattern], ...] +function codegenTestX64_T_v128_avxhack(inputs, options = {}) { + for ( let [ty, op, expected] of inputs ) { + codegenTestX64_adhoc(wrap(options, ` + (func (export "f") (param ${ty}) (result v128) + (${op} (local.get 0)))`), + 'f', + expected, + options); + } +} + +// Machers for any 64- and 32-bit registers. +var GPR_I64 = "%r\\w+"; +var GPR_I32 = "%(?:e\\w+|r\\d+d)"; + +// Simple binary ops: e.g. add, sub, mul +codegenTestX64_v128xv128_v128_avxhack( + [['i8x16.avgr_u', `c5 f1 e0 c2 vpavgb %xmm2, %xmm1, %xmm0`], + ['i16x8.avgr_u', `c5 f1 e3 c2 vpavgw %xmm2, %xmm1, %xmm0`], + ['i8x16.add', `c5 f1 fc c2 vpaddb %xmm2, %xmm1, %xmm0`], + ['i8x16.add_sat_s', `c5 f1 ec c2 vpaddsb %xmm2, %xmm1, %xmm0`], + ['i8x16.add_sat_u', `c5 f1 dc c2 vpaddusb %xmm2, %xmm1, %xmm0`], + ['i8x16.sub', `c5 f1 f8 c2 vpsubb %xmm2, %xmm1, %xmm0`], + ['i8x16.sub_sat_s', `c5 f1 e8 c2 vpsubsb %xmm2, %xmm1, %xmm0`], + ['i8x16.sub_sat_u', `c5 f1 d8 c2 vpsubusb %xmm2, %xmm1, %xmm0`], + ['i16x8.mul', `c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0`], + ['i16x8.min_s', `c5 f1 ea c2 vpminsw %xmm2, %xmm1, %xmm0`], + ['i16x8.min_u', `c4 e2 71 3a c2 vpminuw %xmm2, %xmm1, %xmm0`], + ['i16x8.max_s', `c5 f1 ee c2 vpmaxsw %xmm2, %xmm1, %xmm0`], + ['i16x8.max_u', `c4 e2 71 3e c2 vpmaxuw %xmm2, %xmm1, %xmm0`], + ['i32x4.add', `c5 f1 fe c2 vpaddd %xmm2, %xmm1, %xmm0`], + ['i32x4.sub', `c5 f1 fa c2 vpsubd %xmm2, %xmm1, %xmm0`], + ['i32x4.mul', `c4 e2 71 40 c2 vpmulld %xmm2, %xmm1, %xmm0`], + ['i32x4.min_s', `c4 e2 71 39 c2 vpminsd %xmm2, %xmm1, %xmm0`], + ['i32x4.min_u', `c4 e2 71 3b c2 vpminud %xmm2, %xmm1, %xmm0`], + ['i32x4.max_s', `c4 e2 71 3d c2 vpmaxsd %xmm2, %xmm1, %xmm0`], + ['i32x4.max_u', `c4 e2 71 3f c2 vpmaxud %xmm2, %xmm1, %xmm0`], + ['i64x2.add', `c5 f1 d4 c2 vpaddq %xmm2, %xmm1, %xmm0`], + ['i64x2.sub', `c5 f1 fb c2 vpsubq %xmm2, %xmm1, %xmm0`], + ['i64x2.mul', ` +c5 e1 73 d1 20 vpsrlq \\$0x20, %xmm1, %xmm3 +66 0f f4 da pmuludq %xmm2, %xmm3 +c5 81 73 d2 20 vpsrlq \\$0x20, %xmm2, %xmm15 +66 44 0f f4 f9 pmuludq %xmm1, %xmm15 +66 44 0f d4 fb paddq %xmm3, %xmm15 +66 41 0f 73 f7 20 psllq \\$0x20, %xmm15 +c5 f1 f4 c2 vpmuludq %xmm2, %xmm1, %xmm0 +66 41 0f d4 c7 paddq %xmm15, %xmm0`], + ['f32x4.add', `c5 f0 58 c2 vaddps %xmm2, %xmm1, %xmm0`], + ['f32x4.sub', `c5 f0 5c c2 vsubps %xmm2, %xmm1, %xmm0`], + ['f32x4.mul', `c5 f0 59 c2 vmulps %xmm2, %xmm1, %xmm0`], + ['f32x4.div', `c5 f0 5e c2 vdivps %xmm2, %xmm1, %xmm0`], + ['f64x2.add', `c5 f1 58 c2 vaddpd %xmm2, %xmm1, %xmm0`], + ['f64x2.sub', `c5 f1 5c c2 vsubpd %xmm2, %xmm1, %xmm0`], + ['f64x2.mul', `c5 f1 59 c2 vmulpd %xmm2, %xmm1, %xmm0`], + ['f64x2.div', `c5 f1 5e c2 vdivpd %xmm2, %xmm1, %xmm0`], + ['i8x16.narrow_i16x8_s', `c5 f1 63 c2 vpacksswb %xmm2, %xmm1, %xmm0`], + ['i8x16.narrow_i16x8_u', `c5 f1 67 c2 vpackuswb %xmm2, %xmm1, %xmm0`], + ['i16x8.narrow_i32x4_s', `c5 f1 6b c2 vpackssdw %xmm2, %xmm1, %xmm0`], + ['i16x8.narrow_i32x4_u', `c4 e2 71 2b c2 vpackusdw %xmm2, %xmm1, %xmm0`], + ['i32x4.dot_i16x8_s', `c5 f1 f5 c2 vpmaddwd %xmm2, %xmm1, %xmm0`]]); + +// Simple comparison ops +codegenTestX64_v128xv128_v128_avxhack( + [['i8x16.eq', `c5 f1 74 c2 vpcmpeqb %xmm2, %xmm1, %xmm0`], + ['i8x16.ne', ` +c5 f1 74 c2 vpcmpeqb %xmm2, %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i8x16.lt_s', `c5 e9 64 c1 vpcmpgtb %xmm1, %xmm2, %xmm0`], + ['i8x16.gt_u', ` +c5 f1 de c2 vpmaxub %xmm2, %xmm1, %xmm0 +66 0f 74 c2 pcmpeqb %xmm2, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i16x8.eq', `c5 f1 75 c2 vpcmpeqw %xmm2, %xmm1, %xmm0`], + ['i16x8.ne', ` +c5 f1 75 c2 vpcmpeqw %xmm2, %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i16x8.le_s', ` +c5 f1 65 c2 vpcmpgtw %xmm2, %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i16x8.ge_u', ` +c4 e2 71 3a c2 vpminuw %xmm2, %xmm1, %xmm0 +66 0f 75 c2 pcmpeqw %xmm2, %xmm0`], + ['i32x4.eq', `c5 f1 76 c2 vpcmpeqd %xmm2, %xmm1, %xmm0`], + ['i32x4.ne', ` +c5 f1 76 c2 vpcmpeqd %xmm2, %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i32x4.lt_s', `c5 e9 66 c1 vpcmpgtd %xmm1, %xmm2, %xmm0`], + ['i32x4.gt_u', ` +c4 e2 71 3f c2 vpmaxud %xmm2, %xmm1, %xmm0 +66 0f 76 c2 pcmpeqd %xmm2, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i64x2.eq', `c4 e2 71 29 c2 vpcmpeqq %xmm2, %xmm1, %xmm0`], + ['i64x2.ne', ` +c4 e2 71 29 c2 vpcmpeqq %xmm2, %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i64x2.lt_s', `c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0`], + ['i64x2.ge_s', ` +c4 e2 69 37 c1 vpcmpgtq %xmm1, %xmm2, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['f32x4.eq', `c5 f0 c2 c2 00 vcmpps \\$0x00, %xmm2, %xmm1, %xmm0`], + ['f32x4.lt', `c5 f0 c2 c2 01 vcmpps \\$0x01, %xmm2, %xmm1, %xmm0`], + ['f32x4.ge', `c5 e8 c2 c1 02 vcmpps \\$0x02, %xmm1, %xmm2, %xmm0`], + ['f64x2.eq', `c5 f1 c2 c2 00 vcmppd \\$0x00, %xmm2, %xmm1, %xmm0`], + ['f64x2.lt', `c5 f1 c2 c2 01 vcmppd \\$0x01, %xmm2, %xmm1, %xmm0`], + ['f64x2.ge', `c5 e9 c2 c1 02 vcmppd \\$0x02, %xmm1, %xmm2, %xmm0`], + ['f32x4.pmin', `c5 e8 5d c1 vminps %xmm1, %xmm2, %xmm0`], + ['f32x4.pmax', `c5 e8 5f c1 vmaxps %xmm1, %xmm2, %xmm0`], + ['f64x2.pmin', `c5 e9 5d c1 vminpd %xmm1, %xmm2, %xmm0`], + ['f64x2.pmax', `c5 e9 5f c1 vmaxpd %xmm1, %xmm2, %xmm0`], + ['i8x16.swizzle', ` +c5 69 dc 3d ${RIPRADDR} vpaddusbx ${RIPR}, %xmm2, %xmm15 +c4 c2 71 00 c7 vpshufb %xmm15, %xmm1, %xmm0`], + ['i16x8.extmul_high_i8x16_s', ` +66 44 0f 3a 0f fa 08 palignr \\$0x08, %xmm2, %xmm15 +c4 42 79 20 ff vpmovsxbw %xmm15, %xmm15 +66 0f 3a 0f c1 08 palignr \\$0x08, %xmm1, %xmm0 +c4 e2 79 20 c0 vpmovsxbw %xmm0, %xmm0 +66 41 0f d5 c7 pmullw %xmm15, %xmm0`], + ['i32x4.extmul_low_i16x8_u', ` +c5 71 e4 fa vpmulhuw %xmm2, %xmm1, %xmm15 +c5 f1 d5 c2 vpmullw %xmm2, %xmm1, %xmm0 +66 41 0f 61 c7 punpcklwd %xmm15, %xmm0`], + ['i64x2.extmul_low_i32x4_s', ` +c5 79 70 f9 10 vpshufd \\$0x10, %xmm1, %xmm15 +c5 f9 70 c2 10 vpshufd \\$0x10, %xmm2, %xmm0 +66 41 0f 38 28 c7 pmuldq %xmm15, %xmm0`], + ['i16x8.q15mulr_sat_s', ` +c4 e2 71 0b c2 vpmulhrsw %xmm2, %xmm1, %xmm0 +c5 79 75 3d ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm0, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], +]); + +// Bitwise binary ops +codegenTestX64_v128xv128_v128_avxhack( + [['v128.and', `c5 f1 db c2 vpand %xmm2, %xmm1, %xmm0`], + ['v128.andnot', `c5 e9 df c1 vpandn %xmm1, %xmm2, %xmm0`], + ['v128.or', `c5 f1 eb c2 vpor %xmm2, %xmm1, %xmm0`], + ['v128.xor', `c5 f1 ef c2 vpxor %xmm2, %xmm1, %xmm0`]]); + + +// Replace lane ops. +codegenTestX64_adhoc(`(module + (func (export "f") (param v128 v128 i32) (result v128) + (i8x16.replace_lane 7 (local.get 1) (local.get 2))))`, 'f', ` +c4 .. 71 20 .. 07 vpinsrb \\$0x07, ${GPR_I32}, %xmm1, %xmm0`); +codegenTestX64_adhoc(`(module + (func (export "f") (param v128 v128 i32) (result v128) + (i16x8.replace_lane 3 (local.get 1) (local.get 2))))`, 'f', ` +(?:c4 .. 71|c5 f1) c4 .. 03 vpinsrw \\$0x03, ${GPR_I32}, %xmm1, %xmm0`); +codegenTestX64_adhoc(`(module + (func (export "f") (param v128 v128 i32) (result v128) + (i32x4.replace_lane 2 (local.get 1) (local.get 2))))`, 'f', ` +c4 .. 71 22 .. 02 vpinsrd \\$0x02, ${GPR_I32}, %xmm1, %xmm0`); +codegenTestX64_adhoc(`(module + (func (export "f") (param v128 v128 i64) (result v128) + (i64x2.replace_lane 1 (local.get 1) (local.get 2))))`, 'f', ` +c4 .. f1 22 .. 01 vpinsrq \\$0x01, ${GPR_I64}, %xmm1, %xmm0`); + + +if (isAvxPresent(2)) { + codegenTestX64_T_v128_avxhack( + [['i32', 'i8x16.splat', ` +c5 f9 6e .. vmovd ${GPR_I32}, %xmm0 +c4 e2 79 78 c0 vpbroadcastb %xmm0, %xmm0`], + ['i32', 'i16x8.splat', ` +c5 f9 6e .. vmovd ${GPR_I32}, %xmm0 +c4 e2 79 79 c0 vpbroadcastw %xmm0, %xmm0`], + ['i32', 'i32x4.splat', ` +c5 f9 6e .. vmovd ${GPR_I32}, %xmm0 +c4 e2 79 58 c0 vpbroadcastd %xmm0, %xmm0`], + ['i64', 'i64x2.splat', ` +c4 e1 f9 6e .. vmovq ${GPR_I64}, %xmm0 +c4 e2 79 59 c0 vpbroadcastq %xmm0, %xmm0`], + ['f32', 'f32x4.splat', `c4 e2 79 18 c0 vbroadcastss %xmm0, %xmm0`]], {log:true}); + + codegenTestX64_T_v128_avxhack( + [['i32', 'v128.load8_splat', + 'c4 c2 79 78 04 .. vpbroadcastbb \\(%r15,%r\\w+,1\\), %xmm0'], + ['i32', 'v128.load16_splat', + 'c4 c2 79 79 04 .. vpbroadcastww \\(%r15,%r\\w+,1\\), %xmm0'], + ['i32', 'v128.load32_splat', + 'c4 c2 79 18 04 .. vbroadcastssl \\(%r15,%r\\w+,1\\), %xmm0']], {memory: 1}); +} + +// Using VEX during shuffle ops +codegenTestX64_v128xv128_v128_avxhack([ + // Identity op on second argument should generate a move + ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15', + 'c5 f9 6f c1 vmovdqa %xmm1, %xmm0'], + + // Broadcast a byte from first argument + ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5', + ` +c5 f1 60 c1 vpunpcklbw %xmm1, %xmm1, %xmm0 +c5 fa 70 c0 55 vpshufhw \\$0x55, %xmm0, %xmm0 +c5 f9 70 c0 aa vpshufd \\$0xAA, %xmm0, %xmm0`], + + // Broadcast a word from first argument + ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5', + ` +c5 fb 70 c1 aa vpshuflw \\$0xAA, %xmm1, %xmm0 +c5 f9 70 c0 00 vpshufd \\$0x00, %xmm0, %xmm0`], + + // Permute words + ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13', +` +c5 fb 70 c1 b1 vpshuflw \\$0xB1, %xmm1, %xmm0 +c5 fa 70 c0 b1 vpshufhw \\$0xB1, %xmm0, %xmm0`], + + // Permute doublewords + ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11', + 'c5 f9 70 c1 b1 vpshufd \\$0xB1, %xmm1, %xmm0'], + + // Interleave doublewords + ['i8x16.shuffle 0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23', + 'c5 f1 62 c2 vpunpckldq %xmm2, %xmm1, %xmm0'], + + // Interleave quadwords + ['i8x16.shuffle 24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15', + 'c5 e9 6d c1 vpunpckhqdq %xmm1, %xmm2, %xmm0'], + + // Rotate right + ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12', + `c4 e3 71 0f c1 0d vpalignr \\$0x0D, %xmm1, %xmm1, %xmm0`], + ['i8x16.shuffle 28 29 30 31 0 1 2 3 4 5 6 7 8 9 10 11', + `c4 e3 71 0f c2 0c vpalignr \\$0x0C, %xmm2, %xmm1, %xmm0`]]); + +if (isAvxPresent(2)) { + codegenTestX64_v128xv128_v128_avxhack([ + // Broadcast low byte from second argument + ['i8x16.shuffle 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0', + 'c4 e2 79 78 c1 vpbroadcastb %xmm1, %xmm0'], + + // Broadcast low word from third argument + ['i8x16.shuffle 16 17 16 17 16 17 16 17 16 17 16 17 16 17 16 17', + 'c4 e2 79 79 c2 vpbroadcastw %xmm2, %xmm0'], + + // Broadcast low doubleword from second argument + ['i8x16.shuffle 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3', + 'c4 e2 79 58 c1 vpbroadcastd %xmm1, %xmm0']]); +} + +// Testing AVX optimization where VPBLENDVB accepts four XMM registers as args. +codegenTestX64_adhoc( + `(func (export "f") (param v128 v128 v128 v128) (result v128) + (i8x16.shuffle 0 17 2 3 4 5 6 7 24 25 26 11 12 13 30 15 + (local.get 2)(local.get 3)))`, + 'f', +` +66 0f 6f 0d ${RIPRADDR} movdqax ${RIPR}, %xmm1 +c4 e3 69 4c c3 10 vpblendvb %xmm1, %xmm3, %xmm2, %xmm0`); + +// Constant arguments that are folded into the instruction +codegenTestX64_v128xLITERAL_v128_avxhack( + [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 fc 05 ${RIPRADDR} vpaddbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 f8 05 ${RIPRADDR} vpsubbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 ec 05 ${RIPRADDR} vpaddsbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 dc 05 ${RIPRADDR} vpaddusbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 e8 05 ${RIPRADDR} vpsubsbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 d8 05 ${RIPRADDR} vpsubusbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c4 e2 71 38 05 ${RIPRADDR} vpminsbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 da 05 ${RIPRADDR} vpminubx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c4 e2 71 3c 05 ${RIPRADDR} vpmaxsbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 de 05 ${RIPRADDR} vpmaxubx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` + c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 64 05 ${RIPRADDR} vpcmpgtbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` + c5 f1 64 05 ${RIPRADDR} vpcmpgtbx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 63 05 ${RIPRADDR} vpacksswbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 67 05 ${RIPRADDR} vpackuswbx ${RIPR}, %xmm1, %xmm0`], + + ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 fd 05 ${RIPRADDR} vpaddwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 f9 05 ${RIPRADDR} vpsubwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 d5 05 ${RIPRADDR} vpmullwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 ed 05 ${RIPRADDR} vpaddswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 dd 05 ${RIPRADDR} vpadduswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 e9 05 ${RIPRADDR} vpsubswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 d9 05 ${RIPRADDR} vpsubuswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 ea 05 ${RIPRADDR} vpminswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c4 e2 71 3a 05 ${RIPRADDR} vpminuwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 ee 05 ${RIPRADDR} vpmaxswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c4 e2 71 3e 05 ${RIPRADDR} vpmaxuwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` + c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 65 05 ${RIPRADDR} vpcmpgtwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` + c5 f1 65 05 ${RIPRADDR} vpcmpgtwx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 6b 05 ${RIPRADDR} vpackssdwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c4 e2 71 2b 05 ${RIPRADDR} vpackusdwx ${RIPR}, %xmm1, %xmm0`], + + ['i32x4.add', '(v128.const i32x4 1 2 1 2)', + `c5 f1 fe 05 ${RIPRADDR} vpadddx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.sub', '(v128.const i32x4 1 2 1 2)', + `c5 f1 fa 05 ${RIPRADDR} vpsubdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 40 05 ${RIPRADDR} vpmulldx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 39 05 ${RIPRADDR} vpminsdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 3b 05 ${RIPRADDR} vpminudx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 3d 05 ${RIPRADDR} vpmaxsdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 3f 05 ${RIPRADDR} vpmaxudx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', + `c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` + c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)', + `c5 f1 66 05 ${RIPRADDR} vpcmpgtdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', ` + c5 f1 66 05 ${RIPRADDR} vpcmpgtdx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', + `c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`], + + ['i64x2.add', '(v128.const i64x2 1 2)', + `c5 f1 d4 05 ${RIPRADDR} vpaddqx ${RIPR}, %xmm1, %xmm0`], + ['i64x2.sub', '(v128.const i64x2 1 2)', + `c5 f1 fb 05 ${RIPRADDR} vpsubqx ${RIPR}, %xmm1, %xmm0`], + + ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0`], + ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 eb 05 ${RIPRADDR} vporx ${RIPR}, %xmm1, %xmm0`], + ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0`], + + ['f32x4.add', '(v128.const f32x4 1 2 3 4)', + `c5 f0 58 05 ${RIPRADDR} vaddpsx ${RIPR}, %xmm1, %xmm0`], + ['f32x4.sub', '(v128.const f32x4 1 2 3 4)', + `c5 f0 5c 05 ${RIPRADDR} vsubpsx ${RIPR}, %xmm1, %xmm0`], + ['f32x4.mul', '(v128.const f32x4 1 2 3 4)', + `c5 f0 59 05 ${RIPRADDR} vmulpsx ${RIPR}, %xmm1, %xmm0`], + ['f32x4.div', '(v128.const f32x4 1 2 3 4)', + `c5 f0 5e 05 ${RIPRADDR} vdivpsx ${RIPR}, %xmm1, %xmm0`], + + ['f64x2.add', '(v128.const f64x2 1 2)', + `c5 f1 58 05 ${RIPRADDR} vaddpdx ${RIPR}, %xmm1, %xmm0`], + ['f64x2.sub', '(v128.const f64x2 1 2)', + `c5 f1 5c 05 ${RIPRADDR} vsubpdx ${RIPR}, %xmm1, %xmm0`], + ['f64x2.mul', '(v128.const f64x2 1 2)', + `c5 f1 59 05 ${RIPRADDR} vmulpdx ${RIPR}, %xmm1, %xmm0`], + ['f64x2.div', '(v128.const f64x2 1 2)', + `c5 f1 5e 05 ${RIPRADDR} vdivpdx ${RIPR}, %xmm1, %xmm0`], + + ['f32x4.eq', '(v128.const f32x4 1 2 3 4)', + `c5 f0 c2 05 ${RIPRADDR} 00 vcmppsx \\$0x00, ${RIPR}, %xmm1, %xmm0`], + ['f32x4.ne', '(v128.const f32x4 1 2 3 4)', + `c5 f0 c2 05 ${RIPRADDR} 04 vcmppsx \\$0x04, ${RIPR}, %xmm1, %xmm0`], + ['f32x4.lt', '(v128.const f32x4 1 2 3 4)', + `c5 f0 c2 05 ${RIPRADDR} 01 vcmppsx \\$0x01, ${RIPR}, %xmm1, %xmm0`], + ['f32x4.le', '(v128.const f32x4 1 2 3 4)', + `c5 f0 c2 05 ${RIPRADDR} 02 vcmppsx \\$0x02, ${RIPR}, %xmm1, %xmm0`], + + ['f64x2.eq', '(v128.const f64x2 1 2)', + `c5 f1 c2 05 ${RIPRADDR} 00 vcmppdx \\$0x00, ${RIPR}, %xmm1, %xmm0`], + ['f64x2.ne', '(v128.const f64x2 1 2)', + `c5 f1 c2 05 ${RIPRADDR} 04 vcmppdx \\$0x04, ${RIPR}, %xmm1, %xmm0`], + ['f64x2.lt', '(v128.const f64x2 1 2)', + `c5 f1 c2 05 ${RIPRADDR} 01 vcmppdx \\$0x01, ${RIPR}, %xmm1, %xmm0`], + ['f64x2.le', '(v128.const f64x2 1 2)', + `c5 f1 c2 05 ${RIPRADDR} 02 vcmppdx \\$0x02, ${RIPR}, %xmm1, %xmm0`]]); + + // Commutative operations with constants on the lhs should generate the same + // code as with the constant on the rhs. + codegenTestX64_LITERALxv128_v128_avxhack( + [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 fc 05 ${RIPRADDR} vpaddbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 ec 05 ${RIPRADDR} vpaddsbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 dc 05 ${RIPRADDR} vpaddusbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c4 e2 71 38 05 ${RIPRADDR} vpminsbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 da 05 ${RIPRADDR} vpminubx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c4 e2 71 3c 05 ${RIPRADDR} vpmaxsbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 de 05 ${RIPRADDR} vpmaxubx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0`], + ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` + c5 f1 74 05 ${RIPRADDR} vpcmpeqbx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + + ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 fd 05 ${RIPRADDR} vpaddwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 d5 05 ${RIPRADDR} vpmullwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 ed 05 ${RIPRADDR} vpaddswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 dd 05 ${RIPRADDR} vpadduswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 ea 05 ${RIPRADDR} vpminswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c4 e2 71 3a 05 ${RIPRADDR} vpminuwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 ee 05 ${RIPRADDR} vpmaxswx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c4 e2 71 3e 05 ${RIPRADDR} vpmaxuwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0`], + ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` + c5 f1 75 05 ${RIPRADDR} vpcmpeqwx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + + ['i32x4.add', '(v128.const i32x4 1 2 1 2)', + `c5 f1 fe 05 ${RIPRADDR} vpadddx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 40 05 ${RIPRADDR} vpmulldx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 39 05 ${RIPRADDR} vpminsdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 3b 05 ${RIPRADDR} vpminudx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 3d 05 ${RIPRADDR} vpmaxsdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', + `c4 e2 71 3f 05 ${RIPRADDR} vpmaxudx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', + `c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` + c5 f1 76 05 ${RIPRADDR} vpcmpeqdx ${RIPR}, %xmm1, %xmm0 + 66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 + 66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', + `c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`], + + ['i64x2.add', '(v128.const i64x2 1 2)', + `c5 f1 d4 05 ${RIPRADDR} vpaddqx ${RIPR}, %xmm1, %xmm0`], + + ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0`], + ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 eb 05 ${RIPRADDR} vporx ${RIPR}, %xmm1, %xmm0`], + ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0`]]); + +// Shift by constant encodings +codegenTestX64_v128xLITERAL_v128_avxhack( + [['i8x16.shl', '(i32.const 2)', ` +c5 f1 fc c1 vpaddb %xmm1, %xmm1, %xmm0 +66 0f fc c0 paddb %xmm0, %xmm0`], + ['i8x16.shl', '(i32.const 4)', ` +c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0 +66 0f 71 f0 04 psllw \\$0x04, %xmm0`], + ['i16x8.shl', '(i32.const 1)', + 'c5 f9 71 f1 01 vpsllw \\$0x01, %xmm1, %xmm0'], + ['i16x8.shr_s', '(i32.const 3)', + 'c5 f9 71 e1 03 vpsraw \\$0x03, %xmm1, %xmm0'], + ['i16x8.shr_u', '(i32.const 2)', + 'c5 f9 71 d1 02 vpsrlw \\$0x02, %xmm1, %xmm0'], + ['i32x4.shl', '(i32.const 5)', + 'c5 f9 72 f1 05 vpslld \\$0x05, %xmm1, %xmm0'], + ['i32x4.shr_s', '(i32.const 2)', + 'c5 f9 72 e1 02 vpsrad \\$0x02, %xmm1, %xmm0'], + ['i32x4.shr_u', '(i32.const 5)', + 'c5 f9 72 d1 05 vpsrld \\$0x05, %xmm1, %xmm0'], + ['i64x2.shr_s', '(i32.const 7)', ` +c5 79 70 f9 f5 vpshufd \\$0xF5, %xmm1, %xmm15 +66 41 0f 72 e7 1f psrad \\$0x1F, %xmm15 +c4 c1 71 ef c7 vpxor %xmm15, %xmm1, %xmm0 +66 0f 73 d0 07 psrlq \\$0x07, %xmm0 +66 41 0f ef c7 pxor %xmm15, %xmm0`]]); + +// vpblendvp optimization when bitselect follows comparison. +codegenTestX64_adhoc( + `(module + (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128) + (v128.bitselect (local.get 2) (local.get 3) + (i32x4.eq (local.get 0) (local.get 1)))))`, + 'f', ` +66 0f 76 c1 pcmpeqd %xmm1, %xmm0 +c4 e3 61 4c c2 00 vpblendvb %xmm0, %xmm2, %xmm3, %xmm0`); diff --git a/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js b/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js new file mode 100644 index 0000000000..da1fb68e6b --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/baseline-bug1636235.js @@ -0,0 +1,111 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Bug 1636235: assorted corner case baseline SIMD bugs. + +function get(arr, loc, len) { + let res = []; + for ( let i=0; i < len; i++ ) { + res.push(arr[loc+i]); + } + return res; +} + +// Pass v128 along a control flow edge in br_table + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "run") (param $k i32) + (v128.store (i32.const 0) (call $f (local.get $k)))) + (func $f (param $k i32) (result v128) + (block $B2 (result v128) + (block $B1 (result v128) + (v128.const i32x4 1 2 3 4) + (br_table $B1 $B2 (local.get $k))) + (drop) + (v128.const i32x4 5 6 7 8))))`); + +var mem = new Int32Array(ins.exports.mem.buffer); +ins.exports.run(0); +assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]); + +ins.exports.run(1); +assertDeepEq(get(mem, 0, 4), [1, 2, 3, 4]); + +// Materialize a ConstV128 off the value stack in popStackResults (also: check +// that br passing v128 values works as it should). + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + + (func (export "run") (param $k i32) + (local $t0 v128) (local $t1 v128) (local $t2 v128) + (call $f (local.get $k)) + (local.set $t2) + (local.set $t1) + (local.set $t0) + (v128.store (i32.const 32) (local.get $t2)) + (v128.store (i32.const 16) (local.get $t1)) + (v128.store (i32.const 0) (local.get $t0))) + + (func $f (param $k i32) (result v128 v128 v128) + (block $B2 (result v128 v128 v128) + (if (local.get $k) + (br $B2 (v128.const i32x4 5 6 7 8) + (v128.const i32x4 9 10 11 12) + (v128.const i32x4 13 14 15 16)) + (br $B2 (v128.const i32x4 -5 -6 -7 -8) + (v128.const i32x4 -9 -10 -11 -12) + (v128.const i32x4 -13 -14 -15 -16))) + (unreachable))))`); + +var mem = new Int32Array(ins.exports.mem.buffer); +ins.exports.run(0); +assertDeepEq(get(mem, 0, 4), [-5, -6, -7, -8]); +assertDeepEq(get(mem, 4, 4), [-9, -10, -11, -12]); +assertDeepEq(get(mem, 8, 4), [-13, -14, -15, -16]); + +ins.exports.run(1); +assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]); +assertDeepEq(get(mem, 4, 4), [9, 10, 11, 12]); +assertDeepEq(get(mem, 8, 4), [13, 14, 15, 16]); + +// Check that br_if passing v128 values works as it should. + +var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + + (func (export "run") (param $k i32) + (local $t0 v128) (local $t1 v128) (local $t2 v128) + (call $f (local.get $k)) + (local.set $t2) + (local.set $t1) + (local.set $t0) + (v128.store (i32.const 32) (local.get $t2)) + (v128.store (i32.const 16) (local.get $t1)) + (v128.store (i32.const 0) (local.get $t0))) + + (func $f (param $k i32) (result v128 v128 v128) + (block $B2 (result v128 v128 v128) + (v128.const i32x4 5 6 7 8) + (v128.const i32x4 9 10 11 12) + (v128.const i32x4 13 14 15 16) + (br_if $B2 (local.get $k)) + drop drop drop + (v128.const i32x4 -5 -6 -7 -8) + (v128.const i32x4 -9 -10 -11 -12) + (v128.const i32x4 -13 -14 -15 -16))))`); + +var mem = new Int32Array(ins.exports.mem.buffer); +ins.exports.run(0); +assertDeepEq(get(mem, 0, 4), [-5, -6, -7, -8]); +assertDeepEq(get(mem, 4, 4), [-9, -10, -11, -12]); +assertDeepEq(get(mem, 8, 4), [-13, -14, -15, -16]); + +ins.exports.run(1); +assertDeepEq(get(mem, 0, 4), [5, 6, 7, 8]); +assertDeepEq(get(mem, 4, 4), [9, 10, 11, 12]); +assertDeepEq(get(mem, 8, 4), [13, 14, 15, 16]); + diff --git a/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js new file mode 100644 index 0000000000..17c15e22d5 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/binop-x64-ion-codegen.js @@ -0,0 +1,255 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js + +// Test that there are no extraneous moves or fixups for sundry SIMD binary +// operations. See README-codegen.md for general information about this type of +// test case. + +// Inputs (xmm0, xmm1) + +codegenTestX64_v128xPTYPE_v128( + [['f32x4.replace_lane 0', 'f32', `f3 0f 10 c1 movss %xmm1, %xmm0`], + ['f32x4.replace_lane 1', 'f32', `66 0f 3a 21 c1 10 insertps \\$0x10, %xmm1, %xmm0`], + ['f32x4.replace_lane 3', 'f32', `66 0f 3a 21 c1 30 insertps \\$0x30, %xmm1, %xmm0`], + ['f64x2.replace_lane 0', 'f64', `f2 0f 10 c1 movsd %xmm1, %xmm0`], + ['f64x2.replace_lane 1', 'f64', `66 0f c6 c1 00 shufpd \\$0x00, %xmm1, %xmm0`]] ); + +// Inputs (xmm1, xmm0) + +codegenTestX64_v128xv128_v128_reversed( + [['f32x4.pmin', `0f 5d c1 minps %xmm1, %xmm0`], + ['f32x4.pmax', `0f 5f c1 maxps %xmm1, %xmm0`], + ['f64x2.pmin', `66 0f 5d c1 minpd %xmm1, %xmm0`], + ['f64x2.pmax', `66 0f 5f c1 maxpd %xmm1, %xmm0`]] ); + +// Constant arguments that are folded into the instruction + +codegenTestX64_v128xLITERAL_v128( + [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f fc 05 ${RIPRADDR} paddbx ${RIPR}, %xmm0`], + ['i8x16.sub', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f f8 05 ${RIPRADDR} psubbx ${RIPR}, %xmm0`], + ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f ec 05 ${RIPRADDR} paddsbx ${RIPR}, %xmm0`], + ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f dc 05 ${RIPRADDR} paddusbx ${RIPR}, %xmm0`], + ['i8x16.sub_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f e8 05 ${RIPRADDR} psubsbx ${RIPR}, %xmm0`], + ['i8x16.sub_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f d8 05 ${RIPRADDR} psubusbx ${RIPR}, %xmm0`], + ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 38 38 05 ${RIPRADDR} pminsbx ${RIPR}, %xmm0`], + ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f da 05 ${RIPRADDR} pminubx ${RIPR}, %xmm0`], + ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 38 3c 05 ${RIPRADDR} pmaxsbx ${RIPR}, %xmm0`], + ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f de 05 ${RIPRADDR} pmaxubx ${RIPR}, %xmm0`], + ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0`], + ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` +66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i8x16.gt_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 64 05 ${RIPRADDR} pcmpgtbx ${RIPR}, %xmm0`], + ['i8x16.le_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` +66 0f 64 05 ${RIPRADDR} pcmpgtbx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i8x16.narrow_i16x8_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 63 05 ${RIPRADDR} packsswbx ${RIPR}, %xmm0`], + ['i8x16.narrow_i16x8_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 67 05 ${RIPRADDR} packuswbx ${RIPR}, %xmm0`], + + ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f fd 05 ${RIPRADDR} paddwx ${RIPR}, %xmm0`], + ['i16x8.sub', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f f9 05 ${RIPRADDR} psubwx ${RIPR}, %xmm0`], + ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f d5 05 ${RIPRADDR} pmullwx ${RIPR}, %xmm0`], + ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f ed 05 ${RIPRADDR} paddswx ${RIPR}, %xmm0`], + ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f dd 05 ${RIPRADDR} padduswx ${RIPR}, %xmm0`], + ['i16x8.sub_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f e9 05 ${RIPRADDR} psubswx ${RIPR}, %xmm0`], + ['i16x8.sub_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f d9 05 ${RIPRADDR} psubuswx ${RIPR}, %xmm0`], + ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f ea 05 ${RIPRADDR} pminswx ${RIPR}, %xmm0`], + ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 38 3a 05 ${RIPRADDR} pminuwx ${RIPR}, %xmm0`], + ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f ee 05 ${RIPRADDR} pmaxswx ${RIPR}, %xmm0`], + ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 38 3e 05 ${RIPRADDR} pmaxuwx ${RIPR}, %xmm0`], + ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0`], + ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` +66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i16x8.gt_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 65 05 ${RIPRADDR} pcmpgtwx ${RIPR}, %xmm0`], + ['i16x8.le_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` +66 0f 65 05 ${RIPRADDR} pcmpgtwx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i16x8.narrow_i32x4_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 6b 05 ${RIPRADDR} packssdwx ${RIPR}, %xmm0`], + ['i16x8.narrow_i32x4_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 38 2b 05 ${RIPRADDR} packusdwx ${RIPR}, %xmm0`], + + ['i32x4.add', '(v128.const i32x4 1 2 1 2)', + `66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`], + ['i32x4.sub', '(v128.const i32x4 1 2 1 2)', + `66 0f fa 05 ${RIPRADDR} psubdx ${RIPR}, %xmm0`], + ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 40 05 ${RIPRADDR} pmulldx ${RIPR}, %xmm0`], + ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 39 05 ${RIPRADDR} pminsdx ${RIPR}, %xmm0`], + ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 3b 05 ${RIPRADDR} pminudx ${RIPR}, %xmm0`], + ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 3d 05 ${RIPRADDR} pmaxsdx ${RIPR}, %xmm0`], + ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 3f 05 ${RIPRADDR} pmaxudx ${RIPR}, %xmm0`], + ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', + `66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0`], + ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` +66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i32x4.gt_s', '(v128.const i32x4 1 2 1 2)', + `66 0f 66 05 ${RIPRADDR} pcmpgtdx ${RIPR}, %xmm0`], + ['i32x4.le_s', '(v128.const i32x4 1 2 1 2)', ` +66 0f 66 05 ${RIPRADDR} pcmpgtdx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', + `66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`], + + ['i64x2.add', '(v128.const i64x2 1 2)', + `66 0f d4 05 ${RIPRADDR} paddqx ${RIPR}, %xmm0`], + ['i64x2.sub', '(v128.const i64x2 1 2)', + `66 0f fb 05 ${RIPRADDR} psubqx ${RIPR}, %xmm0`], + + ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`], + ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f eb 05 ${RIPRADDR} porx ${RIPR}, %xmm0`], + ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`], + + ['f32x4.add', '(v128.const f32x4 1 2 3 4)', + `0f 58 05 ${RIPRADDR} addpsx ${RIPR}, %xmm0`], + ['f32x4.sub', '(v128.const f32x4 1 2 3 4)', + `0f 5c 05 ${RIPRADDR} subpsx ${RIPR}, %xmm0`], + ['f32x4.mul', '(v128.const f32x4 1 2 3 4)', + `0f 59 05 ${RIPRADDR} mulpsx ${RIPR}, %xmm0`], + ['f32x4.div', '(v128.const f32x4 1 2 3 4)', + `0f 5e 05 ${RIPRADDR} divpsx ${RIPR}, %xmm0`], + ['f32x4.eq', '(v128.const f32x4 1 2 3 4)', + `0f c2 05 ${RIPRADDR} 00 cmppsx \\$0x00, ${RIPR}, %xmm0`], + ['f32x4.ne', '(v128.const f32x4 1 2 3 4)', + `0f c2 05 ${RIPRADDR} 04 cmppsx \\$0x04, ${RIPR}, %xmm0`], + ['f32x4.lt', '(v128.const f32x4 1 2 3 4)', + `0f c2 05 ${RIPRADDR} 01 cmppsx \\$0x01, ${RIPR}, %xmm0`], + ['f32x4.le', '(v128.const f32x4 1 2 3 4)', + `0f c2 05 ${RIPRADDR} 02 cmppsx \\$0x02, ${RIPR}, %xmm0`], + + ['f64x2.add', '(v128.const f64x2 1 2)', + `66 0f 58 05 ${RIPRADDR} addpdx ${RIPR}, %xmm0`], + ['f64x2.sub', '(v128.const f64x2 1 2)', + `66 0f 5c 05 ${RIPRADDR} subpdx ${RIPR}, %xmm0`], + ['f64x2.mul', '(v128.const f64x2 1 2)', + `66 0f 59 05 ${RIPRADDR} mulpdx ${RIPR}, %xmm0`], + ['f64x2.div', '(v128.const f64x2 1 2)', + `66 0f 5e 05 ${RIPRADDR} divpdx ${RIPR}, %xmm0`], + ['f64x2.eq', '(v128.const f64x2 1 2)', + `66 0f c2 05 ${RIPRADDR} 00 cmppdx \\$0x00, ${RIPR}, %xmm0`], + ['f64x2.ne', '(v128.const f64x2 1 2)', + `66 0f c2 05 ${RIPRADDR} 04 cmppdx \\$0x04, ${RIPR}, %xmm0`], + ['f64x2.lt', '(v128.const f64x2 1 2)', + `66 0f c2 05 ${RIPRADDR} 01 cmppdx \\$0x01, ${RIPR}, %xmm0`], + ['f64x2.le', '(v128.const f64x2 1 2)', + `66 0f c2 05 ${RIPRADDR} 02 cmppdx \\$0x02, ${RIPR}, %xmm0`]]); + +// Commutative operations with constants on the lhs should generate the same +// code as with the constant on the rhs. + +codegenTestX64_LITERALxv128_v128( + [['i8x16.add', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f fc 05 ${RIPRADDR} paddbx ${RIPR}, %xmm0`], + ['i8x16.add_sat_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f ec 05 ${RIPRADDR} paddsbx ${RIPR}, %xmm0`], + ['i8x16.add_sat_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f dc 05 ${RIPRADDR} paddusbx ${RIPR}, %xmm0`], + ['i8x16.min_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 38 38 05 ${RIPRADDR} pminsbx ${RIPR}, %xmm0`], + ['i8x16.min_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f da 05 ${RIPRADDR} pminubx ${RIPR}, %xmm0`], + ['i8x16.max_s', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 38 3c 05 ${RIPRADDR} pmaxsbx ${RIPR}, %xmm0`], + ['i8x16.max_u', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f de 05 ${RIPRADDR} pmaxubx ${RIPR}, %xmm0`], + ['i8x16.eq', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0`], + ['i8x16.ne', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', ` +66 0f 74 05 ${RIPRADDR} pcmpeqbx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + + ['i16x8.add', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f fd 05 ${RIPRADDR} paddwx ${RIPR}, %xmm0`], + ['i16x8.mul', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f d5 05 ${RIPRADDR} pmullwx ${RIPR}, %xmm0`], + ['i16x8.add_sat_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f ed 05 ${RIPRADDR} paddswx ${RIPR}, %xmm0`], + ['i16x8.add_sat_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f dd 05 ${RIPRADDR} padduswx ${RIPR}, %xmm0`], + ['i16x8.min_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f ea 05 ${RIPRADDR} pminswx ${RIPR}, %xmm0`], + ['i16x8.min_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 38 3a 05 ${RIPRADDR} pminuwx ${RIPR}, %xmm0`], + ['i16x8.max_s', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f ee 05 ${RIPRADDR} pmaxswx ${RIPR}, %xmm0`], + ['i16x8.max_u', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 38 3e 05 ${RIPRADDR} pmaxuwx ${RIPR}, %xmm0`], + ['i16x8.eq', '(v128.const i16x8 1 2 1 2 1 2 1 2)', + `66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0`], + ['i16x8.ne', '(v128.const i16x8 1 2 1 2 1 2 1 2)', ` +66 0f 75 05 ${RIPRADDR} pcmpeqwx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + + ['i32x4.add', '(v128.const i32x4 1 2 1 2)', + `66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`], + ['i32x4.mul', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 40 05 ${RIPRADDR} pmulldx ${RIPR}, %xmm0`], + ['i32x4.min_s', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 39 05 ${RIPRADDR} pminsdx ${RIPR}, %xmm0`], + ['i32x4.min_u', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 3b 05 ${RIPRADDR} pminudx ${RIPR}, %xmm0`], + ['i32x4.max_s', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 3d 05 ${RIPRADDR} pmaxsdx ${RIPR}, %xmm0`], + ['i32x4.max_u', '(v128.const i32x4 1 2 1 2)', + `66 0f 38 3f 05 ${RIPRADDR} pmaxudx ${RIPR}, %xmm0`], + ['i32x4.eq', '(v128.const i32x4 1 2 1 2)', + `66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0`], + ['i32x4.ne', '(v128.const i32x4 1 2 1 2)', ` +66 0f 76 05 ${RIPRADDR} pcmpeqdx ${RIPR}, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i32x4.dot_i16x8_s', '(v128.const i32x4 1 2 1 2)', + `66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`], + + ['i64x2.add', '(v128.const i64x2 1 2)', + `66 0f d4 05 ${RIPRADDR} paddqx ${RIPR}, %xmm0`], + + ['v128.and', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`], + ['v128.or', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f eb 05 ${RIPRADDR} porx ${RIPR}, %xmm0`], + ['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)', + `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`]]); diff --git a/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js new file mode 100644 index 0000000000..2cb5f2e969 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/binop-x86-ion-codegen.js @@ -0,0 +1,20 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x86 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x86-test.js + +codegenTestX86_v128xLITERAL_v128( + [['f32x4.eq', '(v128.const f32x4 1 2 3 4)', + `0f c2 05 ${ABSADDR} 00 cmppsx \\$0x00, ${ABS}, %xmm0`], + ['f32x4.ne', '(v128.const f32x4 1 2 3 4)', + `0f c2 05 ${ABSADDR} 04 cmppsx \\$0x04, ${ABS}, %xmm0`], + ['f32x4.lt', '(v128.const f32x4 1 2 3 4)', + `0f c2 05 ${ABSADDR} 01 cmppsx \\$0x01, ${ABS}, %xmm0`], + ['f32x4.le', '(v128.const f32x4 1 2 3 4)', + `0f c2 05 ${ABSADDR} 02 cmppsx \\$0x02, ${ABS}, %xmm0`], + + ['f64x2.eq', '(v128.const f64x2 1 2)', + `66 0f c2 05 ${ABSADDR} 00 cmppdx \\$0x00, ${ABS}, %xmm0`], + ['f64x2.ne', '(v128.const f64x2 1 2)', + `66 0f c2 05 ${ABSADDR} 04 cmppdx \\$0x04, ${ABS}, %xmm0`], + ['f64x2.lt', '(v128.const f64x2 1 2)', + `66 0f c2 05 ${ABSADDR} 01 cmppdx \\$0x01, ${ABS}, %xmm0`], + ['f64x2.le', '(v128.const f64x2 1 2)', + `66 0f c2 05 ${ABSADDR} 02 cmppdx \\$0x02, ${ABS}, %xmm0`]]); diff --git a/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js new file mode 100644 index 0000000000..62951bce62 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/bitselect-x64-ion-codegen.js @@ -0,0 +1,45 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js + +// Test that there are no extraneous moves or fixups for SIMD bitselect +// operations. See README-codegen.md for general information about this type of +// test case. + +// The codegen enforces onTrue == output so we avoid a move to set that up. +// +// The remaining movdqa is currently unavoidable, it moves the control mask into a temp. +// The temp should be identical to the mask but the regalloc does not currently +// allow this constraint to be enforced. + +// Inputs (xmm0, xmm1, xmm2) + +codegenTestX64_adhoc( +`(module + (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128) + (v128.bitselect (local.get 0) (local.get 1) (local.get 2))))`, + 'f', +`66 0f 6f da movdqa %xmm2, %xmm3 +66 0f db c3 pand %xmm3, %xmm0 +66 0f df d9 pandn %xmm1, %xmm3 +66 0f eb c3 por %xmm3, %xmm0`); + +// Blend constant optimizations + +codegenTestX64_adhoc( + `(module + (func (export "f") (param v128) (param v128) (param v128) (result v128) + (v128.bitselect (local.get 0) (local.get 1) (v128.const i32x4 -1 0 0 -1))))`, + 'f', + `66 0f 3a 0e c1 c3 pblendw \\$0xC3, %xmm1, %xmm0`); + +// vpblendvp optimization when bitselect follows comparison. +// Non-AVX pblendvb uses xmm0 as an implicit read-only operand. +codegenTestX64_adhoc( + `(module + (func (export "f") (param v128) (param v128) (param v128) (param v128) (result v128) + (v128.bitselect (local.get 2) (local.get 3) + (i32x4.eq (local.get 0) (local.get 1)))))`, + 'f', ` +66 0f 76 c1 pcmpeqd %xmm1, %xmm0 +66 0f 6f cb movdqa %xmm3, %xmm1 +66 0f 38 10 ca pblendvb %xmm2, %xmm1 +66 0f 6f c1 movdqa %xmm1, %xmm0`); diff --git a/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js b/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js new file mode 100644 index 0000000000..0629455b71 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/cmp-bitselect.js @@ -0,0 +1,107 @@ +// |jit-test| skip-if: !wasmSimdEnabled() +// Tests if combination of comparsion and bitselect produces correct result. +// On x86/64 platforms, it is expected to replace slow bitselect emulation, +// with its faster laneselect equivalent (pblendvb). +// See bug 1751488 for more information. + +let verifyCodegen = _method => {}; +if (hasDisassembler() && wasmCompileMode() == "ion" && + getBuildConfiguration().x64 && !getBuildConfiguration().simulator) { + if (isAvxPresent()) { + verifyCodegen = method => { + assertEq(wasmDis(method, {asString: true}).includes('vpblendvb'), true); + }; + } else { + verifyCodegen = method => { + assertEq(wasmDis(method, {asString: true}).includes("pblendvb"), true); + }; + } +} + +const checkOps = { + eq(a, b) { return a == b; }, + ne(a, b) { return a != b; }, + lt(a, b) { return a < b; }, + le(a, b) { return a <= b; }, + gt(a, b) { return a > b; }, + ge(a, b) { return a >= b; }, +}; +const checkPattern = new Uint8Array(Array(32).fill(null).map((_, i) => i)); + +for (let [laneSize, aty_s, aty_u] of [ + [8, Int8Array, Uint8Array], [16, Int16Array, Uint16Array], + [32, Int32Array, Uint32Array], [64, BigInt64Array, BigUint64Array]]) { + const laneCount = 128 / laneSize; + const ty = `i${laneSize}x${laneCount}`; + for (let op of ['eq', 'ne', 'lt_s', 'le_s', 'gt_s', 'ge_s', 'lt_u', 'le_u', 'gt_u', 'ge_u']) { + if (laneSize == 64 && op.includes('_u')) continue; + const wrap = laneSize < 64 ? x => x : x => BigInt(x); + const aty = op.includes('_u') ? aty_u : aty_s; + const check = checkOps[op.replace(/_[us]$/, "")]; + // Items to test: 0, 1, all 1s, top half 1s, low half 1s, top bit 1 + const testData = new aty([wrap(0), wrap(1), ~wrap(0), ~wrap(0) << wrap(laneSize / 2), + ~((~wrap(0)) << wrap(laneSize / 2)), wrap(1) << wrap(laneSize - 1)]); + const ins = new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(`(module + (memory (export "memory") 1) + (func (export "run") + (v128.store (i32.const 32) + (v128.bitselect (v128.load (i32.const 64)) (v128.load (i32.const 80)) (${ty}.${op} (v128.load (i32.const 0)) (v128.load (i32.const 16))))) ))`))); + const mem = new aty(ins.exports.memory.buffer); + const memI8 = new Uint8Array(ins.exports.memory.buffer); + memI8.subarray(64, 96).set(checkPattern); + verifyCodegen(ins.exports.run); + for (let i = 0; i < testData.length; i++) { + for (let j = 0; j < testData.length; j++) { + for (let q = 0; q < laneCount; q++) { + mem[q] = testData[(i + q) % testData.length]; + mem[q + laneCount] = testData[(j + q) % testData.length]; + } + ins.exports.run(); + for (let q = 0; q < laneCount; q++) { + const val = check(mem[q], mem[q + laneCount]); + const n = laneSize >> 3; + for (let k = 0; k < n; k++) { + assertEq(checkPattern[q * n + k + (val ? 0 : 16)], + memI8[32 + q * n + k]); + } + } + } + } + } +} + +for (let [laneSize, aty] of [[32, Float32Array], [64, Float64Array]]) { + const laneCount = 128 / laneSize; + const ty = `f${laneSize}x${laneCount}`; + for (let op of ['eq', 'ne', 'lt', 'le', 'gt', 'ge']) { + const check = checkOps[op]; + // Items to test: 0, 1, -1, PI, NaN, Inf, -0, -Inf + const testData = new aty([0, 1, -1, Math.PI, NaN, Infinity, 0/-Infinity, -Infinity]); + const ins = new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(`(module + (memory (export "memory") 1) + (func (export "run") + (v128.store (i32.const 32) + (v128.bitselect (v128.load (i32.const 64)) (v128.load (i32.const 80)) (${ty}.${op} (v128.load (i32.const 0)) (v128.load (i32.const 16))))) ))`))); + const mem = new aty(ins.exports.memory.buffer); + const memI8 = new Uint8Array(ins.exports.memory.buffer); + memI8.subarray(64, 96).set(checkPattern); + verifyCodegen(ins.exports.run); + for (let i = 0; i < testData.length; i++) { + for (let j = 0; j < testData.length; j++) { + for (let q = 0; q < laneCount; q++) { + mem[q] = testData[(i + q) % testData.length]; + mem[q + laneCount] = testData[(j + q) % testData.length]; + } + ins.exports.run(); + for (let q = 0; q < laneCount; q++) { + const val = check(mem[q], mem[q + laneCount]); + const n = laneSize >> 3; + for (let k = 0; k < n; k++) { + assertEq(checkPattern[q * n + k + (val ? 0 : 16)], + memI8[32 + q * n + k]); + } + } + } + } + } +} diff --git a/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js new file mode 100644 index 0000000000..b4fe1d0281 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/cmp-x64-ion-codegen.js @@ -0,0 +1,77 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js + +// Test that there are no extraneous moves or fixups for various SIMD comparison +// operations. See README-codegen.md for general information about this type of +// test case. + +// Inputs (xmm0, xmm1) + +codegenTestX64_v128xv128_v128( + [['i8x16.gt_s', `66 0f 64 c1 pcmpgtb %xmm1, %xmm0`], + ['i16x8.gt_s', `66 0f 65 c1 pcmpgtw %xmm1, %xmm0`], + ['i32x4.gt_s', `66 0f 66 c1 pcmpgtd %xmm1, %xmm0`], + ['i8x16.le_s', ` +66 0f 64 c1 pcmpgtb %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0 +`], + ['i16x8.le_s', ` +66 0f 65 c1 pcmpgtw %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0 +`], + ['i32x4.le_s', ` +66 0f 66 c1 pcmpgtd %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0 +`], + ['i8x16.eq', `66 0f 74 c1 pcmpeqb %xmm1, %xmm0`], + ['i16x8.eq', `66 0f 75 c1 pcmpeqw %xmm1, %xmm0`], + ['i32x4.eq', `66 0f 76 c1 pcmpeqd %xmm1, %xmm0`], + ['i8x16.ne', ` +66 0f 74 c1 pcmpeqb %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0 +`], + ['i16x8.ne', ` +66 0f 75 c1 pcmpeqw %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0 +`], + ['i32x4.ne', ` +66 0f 76 c1 pcmpeqd %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0 +`], + ['f32x4.eq', `0f c2 c1 00 cmpps \\$0x00, %xmm1, %xmm0`], + ['f32x4.ne', `0f c2 c1 04 cmpps \\$0x04, %xmm1, %xmm0`], + ['f32x4.lt', `0f c2 c1 01 cmpps \\$0x01, %xmm1, %xmm0`], + ['f32x4.le', `0f c2 c1 02 cmpps \\$0x02, %xmm1, %xmm0`], + ['f64x2.eq', `66 0f c2 c1 00 cmppd \\$0x00, %xmm1, %xmm0`], + ['f64x2.ne', `66 0f c2 c1 04 cmppd \\$0x04, %xmm1, %xmm0`], + ['f64x2.lt', `66 0f c2 c1 01 cmppd \\$0x01, %xmm1, %xmm0`], + ['f64x2.le', `66 0f c2 c1 02 cmppd \\$0x02, %xmm1, %xmm0`]] ); + +// Inputs (xmm1, xmm0) because the operation reverses its arguments. + +codegenTestX64_v128xv128_v128_reversed( + [['i8x16.ge_s', ` +66 0f 64 c1 pcmpgtb %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i16x8.ge_s', +` +66 0f 65 c1 pcmpgtw %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i32x4.ge_s', ` +66 0f 66 c1 pcmpgtd %xmm1, %xmm0 +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`], + ['i8x16.lt_s', `66 0f 64 c1 pcmpgtb %xmm1, %xmm0`], + ['i16x8.lt_s', `66 0f 65 c1 pcmpgtw %xmm1, %xmm0`], + ['i32x4.lt_s', `66 0f 66 c1 pcmpgtd %xmm1, %xmm0`], + ['f32x4.gt', `0f c2 c1 01 cmpps \\$0x01, %xmm1, %xmm0`], + ['f32x4.ge', `0f c2 c1 02 cmpps \\$0x02, %xmm1, %xmm0`], + ['f64x2.gt', `66 0f c2 c1 01 cmppd \\$0x01, %xmm1, %xmm0`], + ['f64x2.ge', `66 0f c2 c1 02 cmppd \\$0x02, %xmm1, %xmm0`]] ); diff --git a/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js b/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js new file mode 100644 index 0000000000..9dc08c6e6b --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/const-arm64-vixl-codegen.js @@ -0,0 +1,109 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "baseline" || !getBuildConfiguration().arm64 + +// Test that the vixl logic for v128 constant loads is at least somewhat +// reasonable. + +var lead = `0x[0-9a-f]+ +[0-9a-f]{8} +`; + +var prefix = `${lead}sub sp, sp, #0x.. \\(..\\) +${lead}str x23, \\[sp, #..\\]`; + +var suffix = +`${lead}b #\\+0x8 \\(addr 0x.*\\) +${lead}brk #0x0`; + +for ( let [bits, expected, values] of [ + // If high == low and the byte is 0 or ff then a single movi is sufficient. + ['i8x16 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00', ` +${prefix} +${lead}movi v0\\.2d, #0x0 +${suffix} +`, + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], + + ['i8x16 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0', ` +${prefix} +${lead}movi v0\\.2d, #0xff00ff00ff00ff +${suffix} +`, + [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0]], + + // Splattable small things (up to a byte, at a byte location) + // can also use just one instruction + ['i32x4 1 1 1 1', ` +${prefix} +${lead}movi v0\\.4s, #0x1, lsl #0 +${suffix} +`, + [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0]], + + ['i32x4 0x300 0x300 0x300 0x300', ` +${prefix} +${lead}movi v0\\.4s, #0x3, lsl #8 +${suffix} +`, + [0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0]], + + // If high == low but the value is more complex then a constant load + // plus a dup is sufficient. x16 is the designated temp. + ['i32x4 1 2 1 2', ` +${prefix} +${lead}mov x16, #0x1 +${lead}movk x16, #0x2, lsl #32 +${lead}dup v0\\.2d, x16 +${suffix} +`, + [1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0]], + + // If high != low then we degenerate to a more complicated pattern: dup the low value + // and then overwrite the high part with the high value. + ['i32x4 1 2 2 1', ` +${prefix} +${lead}mov x16, #0x1 +${lead}movk x16, #0x2, lsl #32 +${lead}dup v0\\.2d, x16 +${lead}mov x16, #0x2 +${lead}movk x16, #0x1, lsl #32 +${lead}mov v0\\.d\\[1\\], x16 +${suffix} +`, + [1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0]], + + // Things are not always bleak, and vixl finds a way. + ['i32x4 1 1 2 2', ` +${prefix} +${lead}movi v0\\.4s, #0x1, lsl #0 +${lead}mov x16, #0x200000002 +${lead}mov v0\\.d\\[1\\], x16 +${suffix} +`, + [1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0]], +] ) { + let ins = wasmEvalText(` + (module + (memory (export "mem") 1) + (func (export "run") + (v128.store (i32.const 0) (call $f))) + (func $f (export "f") (result v128) + (v128.const ${bits})))`); + let output = wasmDis(ins.exports.f, {tier:"baseline", asString:true}); + assertEq(output.match(new RegExp(expected)) != null, true); + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 0, iota(16).map(x => -1-x)); + ins.exports.run(); + assertSame(get(mem, 0, 16), values); +} + +function get(arr, loc, len) { + let res = []; + for ( let i=0; i < len; i++ ) { + res.push(arr[loc+i]); + } + return res; +} + +function set(arr, loc, vals) { + for ( let i=0; i < vals.length; i++ ) { + arr[loc+i] = vals[i]; + } +} diff --git a/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js new file mode 100644 index 0000000000..04a00b538d --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/const-x64-ion-codegen.js @@ -0,0 +1,28 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js + +// Test that constants that can be synthesized are synthesized. See README-codegen.md +// for general information about this type of test case. + +codegenTestX64_unit_v128( + [['v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0', + `66 0f ef c0 pxor %xmm0, %xmm0`], + ['v128.const i8x16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1', + `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`], + ['v128.const i16x8 0 0 0 0 0 0 0 0', + `66 0f ef c0 pxor %xmm0, %xmm0`], + ['v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1', + `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`], + ['v128.const i32x4 0 0 0 0', + `66 0f ef c0 pxor %xmm0, %xmm0`], + ['v128.const i32x4 -1 -1 -1 -1', + `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`], + ['v128.const i64x2 0 0', + `66 0f ef c0 pxor %xmm0, %xmm0`], + ['v128.const i64x2 -1 -1', + `66 0f 75 c0 pcmpeqw %xmm0, %xmm0`], + ['v128.const f32x4 0 0 0 0', + // Arguably this should be xorps but that's for later + `66 0f ef c0 pxor %xmm0, %xmm0`], + ['v128.const f64x2 0 0', + // Arguably this should be xorpd but that's for later + `66 0f ef c0 pxor %xmm0, %xmm0`]] ); diff --git a/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js new file mode 100644 index 0000000000..1e2d613c1a --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/cvt-x64-ion-codegen.js @@ -0,0 +1,27 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js + +// Test that there are no extraneous moves for various SIMD conversion +// operations. See README-codegen.md for general information about this type of +// test case. + +// Note, these tests test the beginning of the output but not the end. + +codegenTestX64_v128_v128( + [['i32x4.trunc_sat_f32x4_s', + // The movaps is dest -> scratch and needs to be here. The test is + // asserting that there is not an additional (redundant) move here. +` +44 0f 28 f8 movaps %xmm0, %xmm15 +45 0f c2 ff 00 cmpps \\$0x00, %xmm15, %xmm15 +66 41 0f db c7 pand %xmm15, %xmm0`], + ['i32x4.trunc_sat_f32x4_u', ` +45 0f 57 ff xorps %xmm15, %xmm15 +41 0f 5f c7 maxps %xmm15, %xmm0`], + ['f32x4.convert_i32x4_u', ` +66 45 0f ef ff pxor %xmm15, %xmm15 +66 44 0f 3a 0e f8 55 pblendw \\$0x55, %xmm0, %xmm15 +66 41 0f fa c7 psubd %xmm15, %xmm0 +45 0f 5b ff cvtdq2ps %xmm15, %xmm15`]], + {no_suffix:true}); + + diff --git a/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js b/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js new file mode 100644 index 0000000000..0de46e0f0c --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/debug-bug1644759.js @@ -0,0 +1,20 @@ +// |jit-test| skip-if: !wasmDebuggingEnabled() || !wasmSimdEnabled() + +var g7 = newGlobal({newCompartment: true}); +g7.parent = this; +g7.eval(` + Debugger(parent).onEnterFrame = function(frame) { }; +`); +var ins = wasmEvalText(` + (memory (export "mem") 1 1) + (func (export "run") + (param $k i32) + (v128.store (i32.const 0) (call $f (local.get $k))) + ) + (func $f + (param $k i32) + (result v128) + (v128.const i32x4 5 6 7 8) + ) +`); +ins.exports.run(0); diff --git a/js/src/jit-test/tests/wasm/simd/directives.txt b/js/src/jit-test/tests/wasm/simd/directives.txt new file mode 100644 index 0000000000..3e89e7550b --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/directives.txt @@ -0,0 +1 @@ +|jit-test| test-also=--wasm-compiler=baseline; test-also=--wasm-compiler=optimizing; test-also=--wasm-test-serialization; test-also=--wasm-compiler=optimizing --no-avx; skip-variant-if: --wasm-compiler=optimizing --no-avx, !getBuildConfiguration().x86 && !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:wasm.js diff --git a/js/src/jit-test/tests/wasm/simd/disabled.js b/js/src/jit-test/tests/wasm/simd/disabled.js new file mode 100644 index 0000000000..feae414697 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/disabled.js @@ -0,0 +1,28 @@ +// |jit-test| skip-if: wasmSimdEnabled() + +// ../binary.js checks that all SIMD extended opcodes in the 0..255 range are +// rejected if !wasmSimdEnabled, so no need to check that here. + +// Non-opcode cases that should also be rejected, lest feature sniffing may +// erroneously conclude that simd is available when it's not. The error message +// may differ depending on ENABLE_WASM_SIMD: if SIMD is compiled in we usually +// get a sensible error about v128; if not, we get something generic. + +wasmFailValidateText(`(module (func (param v128)))`, + /(v128 not enabled)|(bad type)/); + +wasmFailValidateText(`(module (func (result v128)))`, + /(v128 not enabled)|(bad type)/); + +wasmFailValidateText(`(module (func (local v128)))`, + /(v128 not enabled)|(bad type)|(SIMD support is not enabled)/); + +wasmFailValidateText(`(module (global (import "m" "g") v128))`, + /expected global type/); + +wasmFailValidateText(`(module (global (import "m" "g") (mut v128)))`, + /expected global type/); + +wasmFailValidateText(`(module (global i32 (v128.const i32x4 0 0 0 0)))`, + /(v128 not enabled)|(unrecognized opcode)/); + diff --git a/js/src/jit-test/tests/wasm/simd/experimental.js b/js/src/jit-test/tests/wasm/simd/experimental.js new file mode 100644 index 0000000000..3f4a85ae75 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/experimental.js @@ -0,0 +1,411 @@ +// |jit-test| --wasm-relaxed-simd; skip-if: !wasmRelaxedSimdEnabled() + +// Experimental opcodes. We have no text parsing support for these yet. The +// tests will be cleaned up and moved into ad-hack.js if the opcodes are +// adopted. + +load(libdir + "wasm-binary.js"); + +function wasmEval(bytes, imports) { + return new WebAssembly.Instance(new WebAssembly.Module(bytes), imports); +} + +function wasmValidateAndEval(bytes, imports) { + assertEq(WebAssembly.validate(bytes), true, "test of WasmValidate.cpp"); + return wasmEval(bytes, imports); +} + +function get(arr, loc, len) { + let res = []; + for ( let i=0; i < len; i++ ) { + res.push(arr[loc+i]); + } + return res; +} + +function set(arr, loc, vals) { + for ( let i=0; i < vals.length; i++ ) { + if (arr instanceof BigInt64Array) { + arr[loc+i] = BigInt(vals[i]); + } else { + arr[loc+i] = vals[i]; + } + } +} + +const v2vSig = {args:[], ret:VoidCode}; + +function V128Load(addr) { + return [I32ConstCode, varS32(addr), + SimdPrefix, V128LoadCode, 4, varU32(0)] +} + +function V128StoreExpr(addr, v) { + return [I32ConstCode, varS32(addr), + ...v, + SimdPrefix, V128StoreCode, 4, varU32(0)]; +} + +// FMA/FNMA, https://github.com/WebAssembly/relaxed-simd/issues/27 and +// https://github.com/WebAssembly/relaxed-simd/pull/81 + +function fma(x, y, a) { return (x * y) + a; } +function fnma(x, y, a) { return - (x * y) + a; } + +var fxs = [10, 20, 30, 40]; +var fys = [-2, -3, -4, -5]; +var fas = [0, 100, 500, 700]; +var dxs = [10, 20]; +var dys = [-2, -3]; +var das = [0, 100]; + +for ( let [opcode, xs, ys, as, operator] of [[F32x4RelaxedFmaCode, fxs, fys, fas, fma], + [F32x4RelaxedFnmaCode, fxs, fys, fas, fnma], + [F64x2RelaxedFmaCode, dxs, dys, das, fma], + [F64x2RelaxedFnmaCode, dxs, dys, das, fnma]] ) { + var k = xs.length; + var ans = iota(k).map((i) => operator(xs[i], ys[i], as[i])) + + var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "run"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + ...V128Load(48), + SimdPrefix, varU32(opcode)])]})])])); + + var mem = new (k == 4 ? Float32Array : Float64Array)(ins.exports.mem.buffer); + set(mem, k, xs); + set(mem, 2*k, ys); + set(mem, 3*k, as); + ins.exports.run(); + var result = get(mem, 0, k); + assertSame(result, ans); + + assertEq(false, WebAssembly.validate(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "run"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(0), + ...V128Load(0), + SimdPrefix, varU32(opcode)])]})])]))); +} + +// Relaxed swizzle, https://github.com/WebAssembly/relaxed-simd/issues/22 + +var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "run"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + SimdPrefix, varU32(I8x16RelaxedSwizzleCode)])]})])])); +var mem = new Uint8Array(ins.exports.mem.buffer); +var test = [1, 4, 3, 7, 123, 0, 8, 222]; +set(mem, 16, test); +for (let [i, s] of [[0, 0], [0, 1], [1,1], [1, 3], [7,5]]) { + var ans = new Uint8Array(16); + for (let j = 0; j < 16; j++) { + mem[32 + j] = (j * s + i) & 15; + ans[j] = test[(j * s + i) & 15]; + } + ins.exports.run(); + var result = get(mem, 0, 16); + assertSame(result, ans); +} + +assertEq(false, WebAssembly.validate(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + SimdPrefix, varU32(I8x16RelaxedSwizzleCode)])]})])]))); + + +// Relaxed MIN/MAX, https://github.com/WebAssembly/relaxed-simd/issues/33 + +const Neg0 = -1/Infinity; +var minMaxTests = [ + {a: 0, b: 0, min: 0, max: 0, }, + {a: Neg0, b: Neg0, min: Neg0, max: Neg0, }, + {a: 1/3, b: 2/3, min: 1/3, max: 2/3, }, + {a: -1/3, b: -2/3, min: -2/3, max: -1/3, }, + {a: -1000, b: 1, min: -1000, max: 1, }, + {a: 10, b: -2, min: -2, max: 10, }, +]; + +for (let k of [4, 2]) { + const minOpcode = k == 4 ? F32x4RelaxedMinCode : F64x2RelaxedMinCode; + const maxOpcode = k == 4 ? F32x4RelaxedMaxCode : F64x2RelaxedMaxCode; + + var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0, 0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "min"}, + {funcIndex: 1, name: "max"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + SimdPrefix, varU32(minOpcode)])]}), + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + SimdPrefix, varU32(maxOpcode)])]})])])); + for (let i = 0; i < minMaxTests.length; i++) { + var Ty = k == 4 ? Float32Array : Float64Array; + var mem = new Ty(ins.exports.mem.buffer); + var minResult = new Ty(k); + var maxResult = new Ty(k); + for (let j = 0; j < k; j++) { + const {a, b, min, max } = minMaxTests[(j + i) % minMaxTests.length]; + mem[j + k] = a; + mem[j + k * 2] = b; + minResult[j] = min; + maxResult[j] = max; + } + ins.exports.min(); + var result = get(mem, 0, k); + assertSame(result, minResult); + ins.exports.max(); + var result = get(mem, 0, k); + assertSame(result, maxResult); + } + + for (let op of [minOpcode, maxOpcode]) { + assertEq(false, WebAssembly.validate(moduleWithSections([ + sigSection([v2vSig]), + declSection([0, 0]), + memorySection(1), + exportSection([]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(0), + SimdPrefix, varU32(op)])]})])]))); + } +} + +// Relaxed I32x4.TruncFXXX, https://github.com/WebAssembly/relaxed-simd/issues/21 + +var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0, 0, 0, 0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "from32s"}, + {funcIndex: 1, name: "from32u"}, + {funcIndex: 2, name: "from64s"}, + {funcIndex: 3, name: "from64u"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + SimdPrefix, varU32(I32x4RelaxedTruncSSatF32x4Code)])]}), + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + SimdPrefix, varU32(I32x4RelaxedTruncUSatF32x4Code)])]}), + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + SimdPrefix, varU32(I32x4RelaxedTruncSatF64x2SZeroCode)])]}), + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + SimdPrefix, varU32(I32x4RelaxedTruncSatF64x2UZeroCode)])]})])])); + +var mem = ins.exports.mem.buffer; +set(new Float32Array(mem), 4, [0, 2.3, -3.4, 100000]); +ins.exports.from32s(); +var result = get(new Int32Array(mem), 0, 4); +assertSame(result, [0, 2, -3, 100000]); + +set(new Float32Array(mem), 4, [0, 3.3, 0x80000000, 200000]); +ins.exports.from32u(); +var result = get(new Uint32Array(mem), 0, 4); +assertSame(result, [0, 3, 0x80000000, 200000]); +set(new Float32Array(mem), 4, [0, 0x80000100, 0x80000101, 0xFFFFFF00]); +ins.exports.from32u(); +var result = get(new Uint32Array(mem), 0, 4); +assertSame(result, [0, 0x80000100, 0x80000100, 0xFFFFFF00]); + +set(new Float64Array(mem), 2, [200000.3, -3.4]); +ins.exports.from64s(); +var result = get(new Int32Array(mem), 0, 4); +assertSame(result, [200000, -3, 0, 0]); +set(new Float64Array(mem), 2, [0x90000000 + 0.1, 0]); +ins.exports.from64u(); +var result = get(new Uint32Array(mem), 0, 4); +assertSame(result, [0x90000000, 0, 0, 0]); + +for (let op of [I32x4RelaxedTruncSSatF32x4Code, I32x4RelaxedTruncUSatF32x4Code, + I32x4RelaxedTruncSatF64x2SZeroCode, I32x4RelaxedTruncSatF64x2UZeroCode]) { + assertEq(false, WebAssembly.validate(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [SimdPrefix, varU32(op)])]})])]))); +} + +// Relaxed blend / laneselect, https://github.com/WebAssembly/relaxed-simd/issues/17 + +for (let [k, opcode, AT] of [[1, I8x16RelaxedLaneSelectCode, Int8Array], + [2, I16x8RelaxedLaneSelectCode, Int16Array], + [4, I32x4RelaxedLaneSelectCode, Int32Array], + [8, I64x2RelaxedLaneSelectCode, BigInt64Array]]) { + + var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "run"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + ...V128Load(48), + SimdPrefix, varU32(opcode)])]})])])); + + var mem = ins.exports.mem.buffer; + var mem8 = new Uint8Array(mem); + set(mem8, 16, [1,2,3,4,0,0,0,0,100,0,102,0,0,250,251,252,253]); + set(mem8, 32, [0,0,0,0,5,6,7,8,0,101,0,103,0,254,255,0,1]); + var c = new AT(mem, 48, 16 / k); + for (let i = 0; i < c.length; i++) { + // Use popcnt to randomize 0 and ~0 + const popcnt_i = i.toString(2).replace(/0/g, "").length; + const v = popcnt_i & 1 ? -1 : 0 + c[i] = k == 8 ? BigInt(v) : v; + } + ins.exports.run(); + for (let i = 0; i < 16; i++) { + const r = c[(i / k) | 0] ? mem8[16 + i] : mem8[32 + i]; + assertEq(r, mem8[i]); + } + + assertEq(false, WebAssembly.validate(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "run"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(0), + ...V128Load(0), + SimdPrefix, varU32(opcode)])]})])]))); +} + + +// Relaxed rounding q-format multiplication. +var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "relaxed_q15mulr_s"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + SimdPrefix, varU32(I16x8RelaxedQ15MulrS)])]})])])); + +var mem16 = new Int16Array(ins.exports.mem.buffer); +for (let [as, bs] of cross([ + [1, -3, 5, -7, 11, -13, -17, 19], + [-1, 0, 16, -32, 64, 128, -1024, 0, 1], + [1,2,-32768,32767,1,4,-32768,32767]]) ) { + set(mem16, 8, as); + set(mem16, 16, bs); + ins.exports.relaxed_q15mulr_s(); + const result = get(mem16, 0, 8); + for (let i = 0; i < 8; i++) { + const expected = (as[i] * bs[i] + 0x4000) >> 15; + if (as[i] == -32768 && bs[i] == -32768) continue; + assertEq(expected, result[i], `result of ${as[i]} * ${bs[i]}`); + } +} + + +// Check relaxed dot product results. +var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "dot_i8x16_i7x16_s"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + SimdPrefix, varU32(I16x8DotI8x16I7x16S)])]})])])); +var mem8 = new Int8Array(ins.exports.mem.buffer); +var mem16 = new Int16Array(ins.exports.mem.buffer); +var test7bit = [1, 2, 3, 4, 5, 64, 65, 127, 127, 0, 0, + 1, 65, 64, 2, 3, 0, 0, 127, 127, 5, 4]; +var testNeg = test7bit.concat(test7bit.map(i => ~i)); +for (let ai = 0; ai < testNeg.length - 15; ai++) + for (let bi = 0; bi < test7bit.length - 15; bi++) { + set(mem8, 16, testNeg.slice(ai, ai + 16)); + set(mem8, 32, test7bit.slice(bi, bi + 16)); + ins.exports.dot_i8x16_i7x16_s(); + const result = get(mem16, 0, 8); + for (let i = 0; i < 8; i++) { + const expected = ((testNeg[ai + i * 2] * test7bit[bi + i * 2]) + + (testNeg[ai + i * 2 + 1] * test7bit[bi + i * 2 + 1])) | 0; + assertEq(expected, result[i]); + } + } + +var ins = wasmValidateAndEval(moduleWithSections([ + sigSection([v2vSig]), + declSection([0]), + memorySection(1), + exportSection([{funcIndex: 0, name: "dot_i8x16_i7x16_add_s"}, + {memIndex: 0, name: "mem"}]), + bodySection([ + funcBody({locals:[], + body: [...V128StoreExpr(0, [...V128Load(16), + ...V128Load(32), + ...V128Load(48), + SimdPrefix, varU32(I32x4DotI8x16I7x16AddS)])]})])])); +var mem8 = new Int8Array(ins.exports.mem.buffer); +var mem32 = new Int32Array(ins.exports.mem.buffer); +var test7bit = [1, 2, 3, 4, 5, 64, 65, 127, 127, 0, 0, + 1, 65, 64, 2, 3, 0, 0, 127, 127, 5, 4]; +var testNeg = test7bit.concat(test7bit.map(i => ~i)); +var testAcc = [0, 12, 65336, -1, 0x10000000, -0xffffff]; +for (let ai = 0; ai < testNeg.length - 15; ai++) + for (let bi = 0; bi < test7bit.length - 15; bi++) + for (let ci = 0; ci < testAcc.length - 3; ci++) { + set(mem8, 16, testNeg.slice(ai, ai + 16)); + set(mem8, 32, test7bit.slice(bi, bi + 16)); + set(mem32, 48/4, testAcc.slice(ci, ci + 4)); + ins.exports.dot_i8x16_i7x16_add_s(); + const result = get(mem32, 0, 4); + for (let i = 0; i < 4; i++) { + const a1 = (testNeg[ai + i * 4] * test7bit[bi + i * 4]) + + (testNeg[ai + i * 4 + 1] * test7bit[bi + i * 4 + 1]); + const a2 = (testNeg[ai + i * 4 + 2] * test7bit[bi + i * 4 + 2]) + + (testNeg[ai + i * 4 + 3] * test7bit[bi + i * 4 + 3]); + const expected = (testAcc[ci + i] + a1 + a2) | 0; + assertEq(expected, result[i]); + } + } diff --git a/js/src/jit-test/tests/wasm/simd/ion-analysis.js b/js/src/jit-test/tests/wasm/simd/ion-analysis.js new file mode 100644 index 0000000000..723b42b40b --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ion-analysis.js @@ -0,0 +1,902 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || wasmCompileMode() != "ion" || !this.wasmSimdAnalysis + +// White-box tests for SIMD optimizations. These are sensitive to internal +// details of the front-end and lowering logic, which is partly platform-dependent. +// +// In DEBUG builds, the testing function wasmSimdAnalysis() returns a string +// describing the last decision made by the SIMD lowering code: to perform an +// optimized lowering or the default byte shuffle+blend for i8x16.shuffle; to +// shift by a constant or a variable for the various shifts; and so on. +// +// We test that the expected transformation applies, and that the machine code +// generates the expected result. + +var isArm64 = getBuildConfiguration().arm64; + +// 32-bit permutation that is not a rotation. +let perm32x4_pattern = [4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3]; + +// Operands the same, dword permutation +{ + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${perm32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), perm32x4_pattern); +} + +// Right operand ignored, dword permutation +{ + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${perm32x4_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + set(mem, 32, iota(16).map(x => x+16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), perm32x4_pattern); +} + +// Left operand ignored, dword permutation +{ + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${perm32x4_pattern.map(x => x+16).join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16).map(x => x+16)); + set(mem, 32, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), perm32x4_pattern); +} + +// Operands the same, word permutation on both sides of the qword divide, with a qword swap +{ + let perm16x8_pattern = [12, 13, 14, 15, 10, 11, 8, 9, + 6, 7, 4, 5, 2, 3, 0, 1]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), perm16x8_pattern); +} + +// Operands the same, word permutation on both sides of the qword divide, no qword swap +{ + let perm16x8_pattern = [ 6, 7, 4, 5, 2, 3, 0, 1, + 12, 13, 14, 15, 10, 11, 8, 9]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), perm16x8_pattern); +} + +// Operands the same, word permutation on low side of the qword divide, no qword swap +{ + let perm16x8_pattern = [ 6, 7, 4, 5, 2, 3, 0, 1, + 8, 9, 10, 11, 12, 13, 14, 15]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), perm16x8_pattern); +} + +// Operands the same, word permutation on high side of the qword divide, no qword swap +{ + let perm16x8_pattern = [ 0, 1, 2, 3, 4, 5, 6, 7, + 12, 13, 14, 15, 10, 11, 8, 9]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${perm16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), perm16x8_pattern); +} + +// Same operands, byte rotate +{ + // 8-bit permutation that is a rotation + let rot8x16_pattern = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${rot8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> rotate-right 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), rot8x16_pattern); +} + +// Operands the same, random jumble => byte permutation +{ + // 8-bit permutation that is not a rotation + let perm8x16_pattern = [5, 7, 6, 8, 9, 10, 11, 4, 13, 14, 15, 0, 1, 2, 3, 12]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${perm8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), perm8x16_pattern); +} + +// Operands differ, both accessed, rhs is constant zero, left-shift pattern +{ + // 8-bit shift with zeroes shifted in at the right end + let shift8x16_pattern = [16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> shift-left 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x)); +} + +// The same as above but the constant is lhs. +{ + // 8-bit shift with zeroes shifted in at the right end + let shift8x16_pattern = [16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(x => x ^ 16); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${shift8x16_pattern.join(' ')} (v128.const i32x4 0 0 0 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> shift-left 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x < 16 ? 0 : x - 16)); +} + +// Operands differ, both accessed, rhs is constant zero, left-shift pattern that +// does not start properly. +{ + // 8-bit shift with zeroes shifted in at the right end + let shift8x16_pattern = [16, 16, 16, 16, 16, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x)); +} + +// Operands differ, both accessed, rhs is constant zero, right-shift pattern +{ + // 8-bit shift with zeroes shifted in at the right end + let shift8x16_pattern = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 20, 20, 20, 20, 20]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> shift-right 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x)); +} + +// Operands differ, both accessed, rhs is constant zero, right-shift pattern +// that does not end properly. +{ + // 8-bit shift with zeroes shifted in at the right end + let shift8x16_pattern = [6, 7, 8, 9, 10, 11, 12, 13, 14, 20, 20, 20, 20, 20, 20, 20]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${shift8x16_pattern.join(' ')} (local.get 0) (v128.const i32x4 0 0 0 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), shift8x16_pattern.map(x => x >= 16 ? 0 : x)); +} + +// Operands differ and are variable, both accessed, (lhs ++ rhs) >> k +{ + let concat8x16_pattern = [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${concat8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> concat+shift-right 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + set(mem, 32, iota(16).map(k => k+16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), concat8x16_pattern); +} + +// Operands differ and are variable, both accessed, (rhs ++ lhs) >> k +{ + let concat8x16_pattern = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${concat8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> concat+shift-right 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + set(mem, 32, iota(16).map(k => k+16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), concat8x16_pattern); +} + +// Operands differ, both accessed, but inputs stay in their lanes => byte blend +{ + let blend8x16_pattern = iota(16).map(x => (x % 3 == 0) ? x + 16 : x); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${blend8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> blend 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + let lhs = iota(16); + let rhs = iota(16).map(x => x+16); + set(mem, 16, lhs); + set(mem, 32, rhs); + ins.exports.run(); + assertSame(get(mem, 0, 16), blend8x16_pattern); +} + +// Operands differ, both accessed, but inputs stay in their lanes => word blend +{ + let blend16x8_pattern = iota(16).map(x => (x & 2) ? x + 16 : x); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${blend16x8_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> blend 16x8"); + + let mem = new Int8Array(ins.exports.mem.buffer); + let lhs = iota(16); + let rhs = iota(16).map(x => x+16); + set(mem, 16, lhs); + set(mem, 32, rhs); + ins.exports.run(); + assertSame(get(mem, 0, 16), blend16x8_pattern); +} + +// Interleave i32x4s +for ( let [lhs, rhs, expected] of + [[[0, 1], [4, 5], "shuffle -> interleave-low 32x4"], + [[2, 3], [6, 7], "shuffle -> interleave-high 32x4"]] ) { + for (let swap of [false, true]) { + if (swap) + [lhs, rhs] = [rhs, lhs]; + let interleave_pattern = i32ToI8(interleave(lhs, rhs)); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), expected); + + let mem = new Int8Array(ins.exports.mem.buffer); + let lhsval = iota(16); + let rhsval = iota(16).map(x => x+16); + set(mem, 16, lhsval); + set(mem, 32, rhsval); + ins.exports.run(); + assertSame(get(mem, 0, 16), interleave_pattern); + } +} + +// Interleave i64x2s +for ( let [lhs, rhs, expected] of + [[[0], [2], "shuffle -> interleave-low 64x2"], + [[1], [3], "shuffle -> interleave-high 64x2"]] ) { + for (let swap of [false, true]) { + if (swap) + [lhs, rhs] = [rhs, lhs]; + let interleave_pattern = i64ToI2(interleave(lhs, rhs)); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), expected); + + let mem = new Int8Array(ins.exports.mem.buffer); + let lhsval = iota(16); + let rhsval = iota(16).map(x => x+16); + set(mem, 16, lhsval); + set(mem, 32, rhsval); + ins.exports.run(); + assertSame(get(mem, 0, 16), interleave_pattern); + } +} + +// Interleave i16x8s +for ( let [lhs, rhs, expected] of + [[[0, 1, 2, 3], [8, 9, 10, 11], "shuffle -> interleave-low 16x8"], + [[4, 5, 6, 7], [12, 13, 14, 15], "shuffle -> interleave-high 16x8"]] ) { + for (let swap of [false, true]) { + if (swap) + [lhs, rhs] = [rhs, lhs]; + let interleave_pattern = i16ToI8(interleave(lhs, rhs)); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), expected); + + let mem = new Int8Array(ins.exports.mem.buffer); + let lhsval = iota(16); + let rhsval = iota(16).map(x => x+16); + set(mem, 16, lhsval); + set(mem, 32, rhsval); + ins.exports.run(); + assertSame(get(mem, 0, 16), interleave_pattern); + } +} + +// Interleave i8x16s +for ( let [lhs, rhs, expected] of + [[[0, 1, 2, 3, 4, 5, 6, 7], [16, 17, 18, 19, 20, 21, 22, 23], "shuffle -> interleave-low 8x16"], + [[8, 9, 10, 11, 12, 13, 14, 15],[24, 25, 26, 27, 28, 29, 30, 31], "shuffle -> interleave-high 8x16"]] ) { + for (let swap of [false, true]) { + if (swap) + [lhs, rhs] = [rhs, lhs]; + let interleave_pattern = interleave(lhs, rhs); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${interleave_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), expected); + + let mem = new Int8Array(ins.exports.mem.buffer); + let lhsval = iota(16); + let rhsval = iota(16).map(x => x+16); + set(mem, 16, lhsval); + set(mem, 32, rhsval); + ins.exports.run(); + assertSame(get(mem, 0, 16), interleave_pattern); + } +} + +// Operands differ, both accessed, random jumble => byte shuffle+blend +{ + let blend_perm8x16_pattern = [5, 23, 6, 24, 9, 10, 11, 7, 7, 14, 15, 19, 1, 2, 3, 12]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${blend_perm8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> shuffle+blend 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + let lhs = iota(16).map(x => x+16); + let rhs = iota(16); + set(mem, 16, lhs); + set(mem, 32, rhs); + ins.exports.run(); + assertSame(get(mem, 0, 16), + blend_perm8x16_pattern.map(x => x < 16 ? lhs[x] : rhs[x-16])); +} + +// No-op, ignoring right operand, should turn into a move. +{ + let nop8x16_pattern = iota(16); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${nop8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> move"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + set(mem, 32, iota(16).map(x => x+16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), nop8x16_pattern); +} + +// No-op, ignoring left operand, should turn into a move. +{ + let nop8x16_pattern = iota(16).map(x => x+16); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16)) (v128.load (i32.const 32))))) + (func $f (param v128) (param v128) (result v128) + (i8x16.shuffle ${nop8x16_pattern.join(' ')} (local.get 0) (local.get 1))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> move"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + set(mem, 32, iota(16).map(x => x+16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), nop8x16_pattern); +} + +// Broadcast byte +for ( let byte of [3, 11, 8, 2] ) { + let broadcast8x16_pattern = iota(16).map(_ => byte); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${broadcast8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), broadcast8x16_pattern); +} + +// Broadcast word from high quadword +{ + let broadcast16x8_pattern = [10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${broadcast16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 16x8"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), broadcast16x8_pattern); +} + +// Broadcast word from low quadword +{ + let broadcast16x8_pattern = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${broadcast16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> broadcast 16x8"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), broadcast16x8_pattern); +} + +// Broadcast dword from low quadword should turn into a dword permute +{ + let broadcast32x4_pattern = [4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7]; + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${broadcast32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), broadcast32x4_pattern); +} + +// Broadcast high qword should turn into a dword permute +{ + let broadcast64x2_pattern = [8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15] + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${broadcast64x2_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), broadcast64x2_pattern); +} + +// Byte reversal should be a byte permute +{ + let rev8x16_pattern = iota(16).reverse(); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${rev8x16_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 8x16"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), rev8x16_pattern); +} + +// Byteswap of half-word, word and quad-word groups should be +// reverse bytes analysis +for (let k of [2, 4, 8]) { + let rev8_pattern = iota(16).map(i => i ^ (k - 1)); + let ins = wasmCompile(` +(module +(memory (export "mem") 1 1) +(func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) +(func $f (param v128) (result v128) + (i8x16.shuffle ${rev8_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), `shuffle -> reverse bytes in ${8 * k}-bit lanes`); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), rev8_pattern); +} + +// Word reversal should be a word permute +{ + let rev16x8_pattern = i16ToI8(iota(8).reverse()); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${rev16x8_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 16x8"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), rev16x8_pattern); +} + +// Dword reversal should be a dword permute +{ + let rev32x4_pattern = i32ToI8([3, 2, 1, 0]); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${rev32x4_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), rev32x4_pattern); +} + +// Qword reversal should be a dword permute +{ + let rev64x2_pattern = i32ToI8([2, 3, 0, 1]); + let ins = wasmCompile(` +(module + (memory (export "mem") 1 1) + (func (export "run") + (v128.store (i32.const 0) (call $f (v128.load (i32.const 16))))) + (func $f (param v128) (result v128) + (i8x16.shuffle ${rev64x2_pattern.join(' ')} (local.get 0) (local.get 0))))`); + + assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4"); + + let mem = new Int8Array(ins.exports.mem.buffer); + set(mem, 16, iota(16)); + ins.exports.run(); + assertSame(get(mem, 0, 16), rev64x2_pattern); +} + +// In the case of shifts, we have separate tests that constant shifts work +// correctly, so no such testing is done here. + +for ( let lanes of ['i8x16', 'i16x8', 'i32x4', 'i64x2'] ) { + for ( let shift of ['shl', 'shr_s', 'shr_u'] ) { + for ( let [count, result] of [['(i32.const 5)', /shift -> constant shift/], + ['(local.get 1)', /shift -> variable(?: scalarized)? shift/]] ) { + wasmCompile(`(module (func (param v128) (param i32) (result v128) (${lanes}.${shift} (local.get 0) ${count})))`); + assertEq(wasmSimdAnalysis().match(result).length, 1); + } + } +} + +// Constant folding scalar->simd. There are functional tests for all these in +// ad-hack.js so here we only check that the transformation is triggered. + +for ( let [ty128, ty] of [['i8x16', 'i32'], ['i16x8', 'i32'], ['i32x4', 'i32'], + ['i64x2', 'i64'], ['f32x4', 'f32'], ['f64x2', 'f64']] ) +{ + wasmCompile(`(module (func (result v128) (${ty128}.splat (${ty}.const 37))))`); + assertEq(wasmSimdAnalysis(), "scalar-to-simd128 -> constant folded"); +} + +// Ditto simd->scalar. + +for ( let [ty128, suffix] of [['i8x16', '_s'], ['i8x16', '_u'], ['i16x8','_s'], ['i16x8','_u'], ['i32x4', '']] ) { + for ( let op of ['any_true', 'all_true', 'bitmask', `extract_lane${suffix} 0`] ) { + let operation = op == 'any_true' ? 'v128.any_true' : `${ty128}.${op}`; + wasmCompile(`(module (func (result i32) (${operation} (v128.const i64x2 0 0))))`); + assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded"); + } +} + +for ( let ty128 of ['f32x4','f64x2','i64x2'] ) { + wasmCompile(`(module (func (result ${ty128.match(/(...)x.*/)[1]}) (${ty128}.extract_lane 0 (v128.const i64x2 0 0))))`); + assertEq(wasmSimdAnalysis(), "simd128-to-scalar -> constant folded"); +} + +// Optimizing all_true, any_true, and bitmask that are used for control flow, also when negated. + +for ( let [ty128,size] of [['i8x16',1], ['i16x8',2], ['i32x4',4]] ) { + let all = iota(16/size).map(n => n*n); + let some = iota(16/size).map(n => n*(n % 3)); + let none = iota(16/size).map(n => 0); + let inputs = [all, some, none]; + let ops = { all_true: allTrue, any_true: anyTrue, bitmask }; + + for ( let op of ['any_true', 'all_true', 'bitmask'] ) { + let folded = op != 'bitmask' || (size == 2 && !isArm64); + let operation = op == 'any_true' ? 'v128.any_true' : `${ty128}.${op}`; + let positive = + wasmCompile( + `(module + (memory (export "mem") 1 1) + (func $f (param v128) (result i32) + (if (result i32) (${operation} (local.get 0)) + (i32.const 42) + (i32.const 37))) + (func (export "run") (result i32) + (call $f (v128.load (i32.const 16)))))`); + assertEq(wasmSimdAnalysis(), folded ? "simd128-to-scalar-and-branch -> folded" : "none"); + + let negative = + wasmCompile( + `(module + (memory (export "mem") 1 1) + (func $f (param v128) (result i32) + (if (result i32) (i32.eqz (${operation} (local.get 0))) + (i32.const 42) + (i32.const 37))) + (func (export "run") (result i32) + (call $f (v128.load (i32.const 16)))))`); + assertEq(wasmSimdAnalysis(), folded ? "simd128-to-scalar-and-branch -> folded" : "none"); + + for ( let inp of inputs ) { + let mem = new this[`Int${8*size}Array`](positive.exports.mem.buffer); + set(mem, 16/size, inp); + assertEq(positive.exports.run(), ops[op](inp) ? 42 : 37); + + mem = new this[`Int${8*size}Array`](negative.exports.mem.buffer); + set(mem, 16/size, inp); + assertEq(negative.exports.run(), ops[op](inp) ? 37 : 42); + } + } +} + +// Constant folding + +{ + // Swizzle-with-constant rewritten as shuffle, and then further optimized + // into a dword permute. Correctness is tested in ad-hack.js. + wasmCompile(` +(module (func (param v128) (result v128) + (i8x16.swizzle (local.get 0) (v128.const i8x16 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11)))) +`); + assertEq(wasmSimdAnalysis(), "shuffle -> permute 32x4"); +} + +// Bitselect with constant mask folded into shuffle operation + +if (!isArm64) { + wasmCompile(` + (module (func (param v128) (param v128) (result v128) + (v128.bitselect (local.get 0) (local.get 1) (v128.const i8x16 0 -1 -1 0 0 0 0 0 -1 -1 -1 -1 -1 -1 0 0)))) + `); + assertEq(wasmSimdAnalysis(), "shuffle -> blend 8x16"); +} + +// Library + +function wasmCompile(text) { + return new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(text))) +} + +function get(arr, loc, len) { + let res = []; + for ( let i=0; i < len; i++ ) { + res.push(arr[loc+i]); + } + return res; +} + +function set(arr, loc, vals) { + for ( let i=0; i < vals.length; i++ ) { + arr[loc+i] = vals[i]; + } +} + +function i32ToI8(xs) { + return xs.map(x => [x*4, x*4+1, x*4+2, x*4+3]).flat(); +} + +function i64ToI2(xs) { + return xs.map(x => [x*8, x*8+1, x*8+2, x*8+3, + x*8+4, x*8+5, x*8+6, x*8+7]).flat(); +} + +function i16ToI8(xs) { + return xs.map(x => [x*2, x*2+1]).flat(); +} + +function allTrue(xs) { + return xs.every(v => v != 0); +} + +function anyTrue(xs) { + return xs.some(v => v != 0); +} + +function bitmask(xs) { + let shift = 128/xs.length - 1; + let res = 0; + let k = 0; + xs.forEach(v => { res |= ((v >>> shift) & 1) << k; k++; }); + return res; +} diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js b/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js new file mode 100644 index 0000000000..7dfdf3afad --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ion-bug1641973.js @@ -0,0 +1,13 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Fuzz test case. The initial unreachable will result in the subsequent +// i8x16.shuffle popping null pointers off the value stack. Due to a missing +// isDeadCode() check in WasmIonCompile.cpp the compiler would dereference those +// null pointers. +new WebAssembly.Module(wasmTextToBinary(` +(module + (func (result v128) + (unreachable) + (i8x16.shuffle 0 0 23 0 4 4 4 4 4 16 1 0 4 4 4 4))) +`)) + diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js b/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js new file mode 100644 index 0000000000..ebb2f72864 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ion-bug1688262.js @@ -0,0 +1,22 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Shuffle pattern incorrectly recognized as a rotate due to a missing guard in +// the optimizer. + +let ins = wasmEvalText(` + (module + (memory (export "mem") 1) + (func (export "test") + (v128.store (i32.const 0) + (i8x16.shuffle 0 1 2 3 4 5 6 7 8 0 1 2 3 4 5 6 + (v128.load (i32.const 16)) + (v128.const i32x4 0 0 0 0))))) +`); + +let mem = new Int8Array(ins.exports.mem.buffer); +let input = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]; +let output = [10, 11, 12, 13, 14, 15, 16, 17, 18, 10, 11, 12, 13, 14, 15, 16]; +mem.set(input, 16); +ins.exports.test(); +let result = Array.from(mem.subarray(0, 16)); +assertDeepEq(output, result); diff --git a/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js b/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js new file mode 100644 index 0000000000..86a2ff0b3c --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/ion-bug1688713.js @@ -0,0 +1,24 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js + +// This checks that we emit a REX prefix that includes the SIB index when +// appropriate. +// +// This test case is a little tricky. On Win64, the arg registers are rcx, rdx, +// r8, r9; so we want to use local 2 or 3 as the index. But on other x64 +// platforms, the arg registers are rdi, rsi, rdx, rcx, r8, r9; so we want to +// use local 4 or 5 as the index. This test uses both, and then looks for a hit +// on the REX byte which must be 0x43. Before the bugfix, since the index +// register was ignored, the byte would always be 0x41, as it will continue to +// be for the access that does not use an extended register. +// +// The test is brittle: the register allocator can easily make a mess of it. +// But for now it works. + +codegenTestX64_adhoc( +`(module + (memory 1) + (func $f (export "f") (param i32) (param i32) (param i32) (param i32) (param i32) (result v128) + (i32x4.add (v128.load8x8_s (local.get 4)) (v128.load8x8_s (local.get 2)))))`, + 'f', + `66 43 0f 38 20 .. .. pmovsxbwq \\(%r15,%r(8|9|10|11|12|13),1\\), %xmm[0-9]+`, + {no_prefix: true, no_suffix: true, log:true}); diff --git a/js/src/jit-test/tests/wasm/simd/js-api.js b/js/src/jit-test/tests/wasm/simd/js-api.js new file mode 100644 index 0000000000..ea7f3fb147 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/js-api.js @@ -0,0 +1,130 @@ +// |jit-test| test-also=--no-threads; skip-if: !wasmSimdEnabled() + +// SIMD JS API +// +// As of 31 March 2020 the SIMD spec is very light on information about the JS +// API, and what it has is ridden with misspellings, grammatical errors, and +// apparent redundancies. The rules below represent my best effort at +// understanding the intent of the spec. As far as I can tell, the rules for +// v128 are intended to match the rules for i64 in the Wasm MVP. + +// Hopefully, these are enough to test that various JIT stubs are generated and +// used if we run the tests in a loop. + +setJitCompilerOption("baseline.warmup.trigger", 2); +setJitCompilerOption("ion.warmup.trigger", 4); + +// RULE: v128 cannot cross the JS/wasm boundary as a function parameter. +// +// A wasm function that: +// - takes or returns v128 +// - was imported into wasm +// - is ultimately a JS function +// should always throw TypeError when called from wasm. +// +// Note, JIT exit stubs should be generated here because settings above should +// cause the JIT to tier up. + +var ins = wasmEvalText(` + (module + (import "m" "v128_param" (func $f (param v128))) + (import "m" "v128_return" (func $g (result v128))) + (func (export "v128_param") + (call $f (v128.const i32x4 0 0 0 0))) + (func (export "v128_result") + (drop (call $g))))`, + {m:{v128_param: (x) => 0, + v128_return: () => 0}}); + +function call_v128_param() { ins.exports.v128_param(); } +function call_v128_result() { ins.exports.v128_result(); } + +for ( let i = 0 ; i < 100; i++ ) { + assertErrorMessage(call_v128_param, + TypeError, + /cannot pass.*v128.*to or from JS/); + assertErrorMessage(call_v128_result, + TypeError, + /cannot pass.*v128.*to or from JS/); +} + +// RULE: v128 cannot cross the JS/wasm boundary as a function parameter. +// +// A wasm function that: +// - takes or returns v128 +// - is exported from wasm +// - is ultimately a true wasm function +// should always throw TypeError when called from JS. +// +// Note, JIT entry stubs should be generated here because settings above should +// cause the JIT to tier up. + +var ins2 = wasmEvalText(` + (module + (func (export "v128_param") (param v128) (result i32) + (i32.const 0)) + (func (export "v128_result") (result v128) + (v128.const i32x4 0 0 0 0)))`); + +function call_v128_param2() { ins2.exports.v128_param(); } +function call_v128_result2() { ins2.exports.v128_result(); } + +for ( let i = 0 ; i < 100; i++ ) { + assertErrorMessage(call_v128_param2, + TypeError, + /cannot pass.*v128.*to or from JS/); + assertErrorMessage(call_v128_result2, + TypeError, + /cannot pass.*v128.*to or from JS/); +} + +// RULE: The rules about v128 passing into or out of a function apply even when +// an imported JS function is re-exported and is then called. + +var newfn = (x) => x; +var ins = wasmEvalText(` + (module + (import "m" "fn" (func $f (param v128) (result v128))) + (export "newfn" (func $f)))`, + {m:{fn: newfn}}); +assertErrorMessage(() => ins.exports.newfn(3), + TypeError, + /cannot pass.*v128.*to or from JS/); + +// RULE: WebAssembly.Global of type v128 is constructable from JS with a default +// value. + + +// RULE: WebAssembly.Global constructor for type v128 is not constructable with +// or without a default value. + +assertErrorMessage(() => new WebAssembly.Global({value: "v128"}, 37), + TypeError, + /cannot pass.*v128.*to or from JS/); +assertErrorMessage(() => new WebAssembly.Global({value: "v128"}), + TypeError, + /cannot pass.*v128.*to or from JS/); +assertErrorMessage(() => new WebAssembly.Global({value: "v128", mutable: true}), + TypeError, + /cannot pass.*v128.*to or from JS/); + +// RULE: WebAssembly.Global of type v128 have getters and setters that throw +// TypeError when called from JS. + +let {gi, gm} = wasmEvalText(` + (module + (global (export "gi") v128 v128.const i64x2 0 0) + (global (export "gm") (mut v128) v128.const i64x2 0 0) + )`).exports; + +assertErrorMessage(() => gi.value, + TypeError, + /cannot pass.*v128.*to or from JS/); +assertErrorMessage(() => gi.valueOf(), + TypeError, + /cannot pass.*v128.*to or from JS/); +assertErrorMessage(() => gm.value = 0, + TypeError, + /cannot pass.*v128.*to or from JS/); + + diff --git a/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js new file mode 100644 index 0000000000..0ae75f38fb --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/neg-abs-not-x64-ion-codegen.js @@ -0,0 +1,34 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js + +// Test that there are no extraneous moves for variable SIMD negate, abs, and +// not instructions. See README-codegen.md for general information about this +// type of test case. + +// Integer negates don't have to reuse the input for the output, and prefer for +// the registers to be different. So use parameter 1 and ignore parameter 0. + +codegenTestX64_IGNOREDxv128_v128( + [['i8x16.neg', ` +66 0f ef c0 pxor %xmm0, %xmm0 +66 0f f8 c1 psubb %xmm1, %xmm0`], + ['i16x8.neg', ` +66 0f ef c0 pxor %xmm0, %xmm0 +66 0f f9 c1 psubw %xmm1, %xmm0`], + ['i32x4.neg', ` +66 0f ef c0 pxor %xmm0, %xmm0 +66 0f fa c1 psubd %xmm1, %xmm0`], + ['i64x2.neg', ` +66 0f ef c0 pxor %xmm0, %xmm0 +66 0f fb c1 psubq %xmm1, %xmm0`]] ); + +// Floating point negate and absolute value, and bitwise not, prefer for the +// registers to be the same and guarantee that no move is inserted if so. + +codegenTestX64_v128_v128( + [['f32x4.neg', `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`], + ['f64x2.neg', `66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0`], + ['f32x4.abs', `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`], + ['f64x2.abs', `66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0`], + ['v128.not', ` +66 45 0f 75 ff pcmpeqw %xmm15, %xmm15 +66 41 0f ef c7 pxor %xmm15, %xmm0`]] ); diff --git a/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js new file mode 100644 index 0000000000..53ab47fdb8 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/pairwise-x64-ion-codegen.js @@ -0,0 +1,38 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js + +// Tests for SIMD add pairwise instructions. + +if (!isAvxPresent()) { + + codegenTestX64_IGNOREDxv128_v128( + [['i16x8.extadd_pairwise_i8x16_s', ` +66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0 +66 0f 38 04 c1 pmaddubsw %xmm1, %xmm0`], + ['i16x8.extadd_pairwise_i8x16_u', ` +66 0f 6f c1 movdqa %xmm1, %xmm0 +66 0f 38 04 05 ${RIPRADDR} pmaddubswx ${RIPR}, %xmm0`], + ['i32x4.extadd_pairwise_i16x8_s', ` +66 0f 6f c1 movdqa %xmm1, %xmm0 +66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`], + ['i32x4.extadd_pairwise_i16x8_u', ` +66 0f 6f c1 movdqa %xmm1, %xmm0 +66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0 +66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0 +66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`]]); + +} else { + + codegenTestX64_IGNOREDxv128_v128( + [['i16x8.extadd_pairwise_i8x16_s', ` +66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0 +66 0f 38 04 c1 pmaddubsw %xmm1, %xmm0`], + ['i16x8.extadd_pairwise_i8x16_u', ` +c4 e2 71 04 05 ${RIPRADDR} vpmaddubswx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.extadd_pairwise_i16x8_s', ` +c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`], + ['i32x4.extadd_pairwise_i16x8_u', ` +c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0 +66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0 +66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`]]); + +} diff --git a/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js new file mode 100644 index 0000000000..94abfd5c54 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/pmaddubsw-x64-ion-codegen.js @@ -0,0 +1,154 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || wasmCompileMode() != "ion" + +// Testing _mm_maddubs_epi16 / vpmaddubsw behavoir for all platforms. +// +// Bug 1762413 adds specialization for emscripten's pattern to directly +// emit PMADDUBSW machine code. + +const isX64 = getBuildConfiguration().x64 && !getBuildConfiguration().simulator; + +// Simple test. +const simple = wasmTextToBinary(`(module + (memory (export "memory") 1 1) + (func $_mm_maddubs_epi16 (export "t") (param v128 v128) (result v128) + local.get 1 + i32.const 8 + i16x8.shl + i32.const 8 + i16x8.shr_s + local.get 0 + v128.const i32x4 0x00ff00ff 0x00ff00ff 0x00ff00ff 0x00ff00ff + v128.and + i16x8.mul + local.get 1 + i32.const 8 + i16x8.shr_s + local.get 0 + i32.const 8 + i16x8.shr_u + i16x8.mul + i16x8.add_sat_s) + (func (export "run") + i32.const 0 + v128.const i8x16 0 2 1 2 1 2 -1 1 255 255 255 255 0 0 255 255 + v128.const i8x16 1 0 3 4 -3 -4 -128 127 127 127 -128 -128 0 0 -128 127 + call $_mm_maddubs_epi16 + v128.store + ) +)`); +var ins = new WebAssembly.Instance(new WebAssembly.Module(simple)); +ins.exports.run(); +var mem16 = new Int16Array(ins.exports.memory.buffer, 0, 8); +assertSame(mem16, [0, 11, -11, -32513, 32767, -32768, 0, -255]); + +if (hasDisassembler() && isX64) { + assertEq(wasmDis(ins.exports.t, {tier:"ion", asString:true}).includes('pmaddubsw'), true); +} + +if (hasDisassembler() && isX64) { + // Two pmaddubsw has common operand, and code was optimized. + const realWorldOutput = wasmTextToBinary(`(module + (memory 1 1) + (func (export "test") + (local i32 i32 i32 i32 v128 v128 v128 v128 v128 v128) + local.get 0 + local.get 1 + i32.add + local.set 2 + local.get 0 + i32.const 16 + i32.add + local.set 0 + local.get 3 + local.set 1 + loop + local.get 5 + local.get 0 + v128.load + local.tee 5 + i32.const 7 + i8x16.shr_s + local.tee 8 + local.get 1 + v128.load offset=240 + local.get 5 + v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000 + i8x16.eq + local.tee 7 + v128.andnot + i8x16.add + local.get 8 + v128.xor + local.tee 4 + i32.const 8 + i16x8.shl + i32.const 8 + i16x8.shr_s + local.get 5 + i8x16.abs + local.tee 5 + v128.const i32x4 0x00ff00ff 0x00ff00ff 0x00ff00ff 0x00ff00ff + v128.and + local.tee 9 + i16x8.mul + local.get 4 + i32.const 8 + i16x8.shr_s + local.get 5 + i32.const 8 + i16x8.shr_u + local.tee 4 + i16x8.mul + i16x8.add_sat_s + i16x8.add_sat_s + local.set 5 + + local.get 6 + local.get 8 + local.get 1 + v128.load offset=224 + local.get 7 + v128.andnot + i8x16.add + local.get 8 + v128.xor + local.tee 6 + i32.const 8 + i16x8.shl + i32.const 8 + i16x8.shr_s + local.get 9 + i16x8.mul + local.get 6 + i32.const 8 + i16x8.shr_s + local.get 4 + i16x8.mul + i16x8.add_sat_s + i16x8.add_sat_s + local.set 6 + + local.get 1 + i32.const 128 + i32.add + local.set 1 + local.get 0 + i32.const 16 + i32.add + local.tee 0 + local.get 2 + i32.ne + br_if 0 + end +))`); + + var ins = new WebAssembly.Instance(new WebAssembly.Module(realWorldOutput)); + const output = wasmDis(ins.exports.test, {tier:"ion", asString:true}).replace(/^[0-9a-f]{8} (?:[0-9a-f]{2} )+\n?\s+/gmi, ""); + // Find two pmaddubsw+paddsw. + const re = /\bv?pmaddubsw[^\n]+\nv?paddsw /g; + assertEq(re.exec(output) != null, true); + assertEq(re.exec(output) != null, true); + assertEq(re.exec(output) == null, true); + // No leftover PMULL, PSLLW, or PSRAW. + assertEq(/pmullw|psllw|psraw/.test(output), false); +} diff --git a/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js new file mode 100644 index 0000000000..bd59f30632 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/reduce-x64-ion-codegen.js @@ -0,0 +1,46 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js + +// Test encoding of the all_true, and any_true operations. + +codegenTestX64_v128_i32( + [['v128.any_true', ` +66 0f 38 17 c0 ptest %xmm0, %xmm0 +0f 95 c0 setnz %al +0f b6 c0 movzx %al, %eax`], + ['i8x16.all_true', ` +66 45 0f ef ff pxor %xmm15, %xmm15 +66 44 0f 74 f8 pcmpeqb %xmm0, %xmm15 +66 45 0f 38 17 ff ptest %xmm15, %xmm15 +0f 94 c0 setz %al +0f b6 c0 movzx %al, %eax`], + ['i16x8.all_true', ` +66 45 0f ef ff pxor %xmm15, %xmm15 +66 44 0f 75 f8 pcmpeqw %xmm0, %xmm15 +66 45 0f 38 17 ff ptest %xmm15, %xmm15 +0f 94 c0 setz %al +0f b6 c0 movzx %al, %eax`], + ['i32x4.all_true', ` +66 45 0f ef ff pxor %xmm15, %xmm15 +66 44 0f 76 f8 pcmpeqd %xmm0, %xmm15 +66 45 0f 38 17 ff ptest %xmm15, %xmm15 +0f 94 c0 setz %al +0f b6 c0 movzx %al, %eax`], + ['i64x2.all_true', ` +66 45 0f ef ff pxor %xmm15, %xmm15 +66 44 0f 38 29 f8 pcmpeqq %xmm0, %xmm15 +66 45 0f 38 17 ff ptest %xmm15, %xmm15 +0f 94 c0 setz %al +0f b6 c0 movzx %al, %eax`]], {} +) + +// Utils. +function codegenTestX64_v128_i32(inputs, options = {}) { + for ( let [op, expected] of inputs ) { + codegenTestX64_adhoc(wrap(options, ` + (func (export "f") (param v128) (result i32) + (${op} (local.get 0)))`), + 'f', + expected, + options); + } + } diff --git a/js/src/jit-test/tests/wasm/simd/select.js b/js/src/jit-test/tests/wasm/simd/select.js new file mode 100644 index 0000000000..b3535d3039 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/select.js @@ -0,0 +1,33 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +wasmAssert(` +(module + (func $f (param i32) (result v128) + (select ;; no type + (v128.const i32x4 1 2 3 4) + (v128.const i32x4 4 3 2 1) + (local.get 0) + ) + ) + (export "" (func 0)) +)`, [ + { type: 'v128', func: '$f', args: ['i32.const 0'], expected: 'i32x4 4 3 2 1' }, + { type: 'v128', func: '$f', args: ['i32.const 1'], expected: 'i32x4 1 2 3 4' }, + { type: 'v128', func: '$f', args: ['i32.const -1'], expected: 'i32x4 1 2 3 4' }, +], {}); + +wasmAssert(` +(module + (func $f (param i32) (result v128) + (select (result v128) + (v128.const i32x4 1 2 3 4) + (v128.const i32x4 4 3 2 1) + (local.get 0) + ) + ) + (export "" (func 0)) +)`, [ + { type: 'v128', func: '$f', args: ['i32.const 0'], expected: 'i32x4 4 3 2 1' }, + { type: 'v128', func: '$f', args: ['i32.const 1'], expected: 'i32x4 1 2 3 4' }, + { type: 'v128', func: '$f', args: ['i32.const -1'], expected: 'i32x4 1 2 3 4' }, +], {}); diff --git a/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js new file mode 100644 index 0000000000..9c9f4871d2 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/shift-x64-ion-codegen.js @@ -0,0 +1,26 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js + +// Test that there are no extraneous moves for a constant integer SIMD shift +// that can reuse its input for its output. See README-codegen.md for general +// information about this type of test case. +// +// There are test cases here for all codegen cases that include a potential move +// to set up the operation, but not for all shift operations in general. + +codegenTestX64_v128xLITERAL_v128( + [['i8x16.shl', '(i32.const 2)', ` +66 0f fc c0 paddb %xmm0, %xmm0 +66 0f fc c0 paddb %xmm0, %xmm0`], + ['i16x8.shl', '(i32.const 2)', `66 0f 71 f0 02 psllw \\$0x02, %xmm0`], + ['i32x4.shl', '(i32.const 2)', `66 0f 72 f0 02 pslld \\$0x02, %xmm0`], + ['i64x2.shl', '(i32.const 2)', `66 0f 73 f0 02 psllq \\$0x02, %xmm0`], + ['i8x16.shr_u', '(i32.const 2)', ` +66 0f db 05 ${RIPRADDR} pandx ${RIPR}, %xmm0 +66 0f 71 d0 02 psrlw \\$0x02, %xmm0`], + ['i16x8.shr_s', '(i32.const 2)', `66 0f 71 e0 02 psraw \\$0x02, %xmm0`], + ['i16x8.shr_u', '(i32.const 2)', `66 0f 71 d0 02 psrlw \\$0x02, %xmm0`], + ['i32x4.shr_s', '(i32.const 2)', `66 0f 72 e0 02 psrad \\$0x02, %xmm0`], + ['i32x4.shr_u', '(i32.const 2)', `66 0f 72 d0 02 psrld \\$0x02, %xmm0`], + ['i64x2.shr_u', '(i32.const 2)', `66 0f 73 d0 02 psrlq \\$0x02, %xmm0`]] ); + + diff --git a/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js new file mode 100644 index 0000000000..b9d0cc0e82 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/shuffle-x86-ion-codegen.js @@ -0,0 +1,88 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js + +// Test that there are no extraneous moves or fixups for SIMD shuffle +// operations. See README-codegen.md for general information about this type of +// test case. + +codegenTestX64_v128xv128_v128([ + // Identity op on first argument should generate no code + ['i8x16.shuffle 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15', + ''], + + // Identity op on second argument should generate a move + ['i8x16.shuffle 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31', + `66 0f 6f c1 movdqa %xmm1, %xmm0`], + + // Broadcast a byte from first argument + ['i8x16.shuffle 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5', + ` +66 0f 60 c0 punpcklbw %xmm0, %xmm0 +f3 0f 70 c0 55 pshufhw \\$0x55, %xmm0, %xmm0 +66 0f 70 c0 aa pshufd \\$0xAA, %xmm0, %xmm0`], + + // Broadcast a word from first argument + ['i8x16.shuffle 4 5 4 5 4 5 4 5 4 5 4 5 4 5 4 5', + ` +f2 0f 70 c0 aa pshuflw \\$0xAA, %xmm0, %xmm0 +66 0f 70 c0 00 pshufd \\$0x00, %xmm0, %xmm0`], + + // Permute bytes + ['i8x16.shuffle 2 1 4 3 6 5 8 7 10 9 12 11 14 13 0 15', +` +66 0f 38 00 05 ${RIPRADDR} pshufbx ${RIPR}, %xmm0`], + + // Permute words + ['i8x16.shuffle 2 3 0 1 6 7 4 5 10 11 8 9 14 15 12 13', +` +f2 0f 70 c0 b1 pshuflw \\$0xB1, %xmm0, %xmm0 +f3 0f 70 c0 b1 pshufhw \\$0xB1, %xmm0, %xmm0`], + + // Permute doublewords + ['i8x16.shuffle 4 5 6 7 0 1 2 3 12 13 14 15 8 9 10 11', + `66 0f 70 c0 b1 pshufd \\$0xB1, %xmm0, %xmm0`], + + // Rotate right + ['i8x16.shuffle 13 14 15 0 1 2 3 4 5 6 7 8 9 10 11 12', + `66 0f 3a 0f c0 0d palignr \\$0x0D, %xmm0, %xmm0`], + + // General shuffle + blend. The initial movdqa to scratch is unavoidable + // unless we can convince the compiler that it's OK to destroy xmm1. + ['i8x16.shuffle 15 29 0 1 2 1 2 0 3 4 7 8 16 8 17 9', +` +66 44 0f 6f f9 movdqa %xmm1, %xmm15 +66 44 0f 38 00 3d ${RIPRADDR} pshufbx ${RIPR}, %xmm15 +66 0f 38 00 05 ${RIPRADDR} pshufbx ${RIPR}, %xmm0 +66 41 0f eb c7 por %xmm15, %xmm0`]]); + +codegenTestX64_v128xLITERAL_v128( + [// Shift left bytes, shifting in zeroes + // + // Remember the low-order bytes are at the "right" end + // + // The pxor is a code generation bug: the operand is unused, and no + // code should need to be generated for it, and no register should + // be allocated to it. The lowering does not use that operand, but + // code generation still touches it. + ['i8x16.shuffle 16 16 16 0 1 2 3 4 5 6 7 8 9 10 11 12', + '(v128.const i32x4 0 0 0 0)', +` +66 0f 73 f8 03 pslldq \\$0x03, %xmm0`], + + // Shift right bytes, shifting in zeroes. See above. + ['i8x16.shuffle 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18', + '(v128.const i32x4 0 0 0 0)', +` +66 0f 73 d8 03 psrldq \\$0x03, %xmm0`]]); + +// SSE4.1 PBLENDVB instruction is using XMM0, checking if blend +// operation generated as expected. +codegenTestX64_adhoc( + `(func (export "f") (param v128 v128 v128 v128) (result v128) + (i8x16.shuffle 0 17 2 3 4 5 6 7 24 25 26 11 12 13 30 15 + (local.get 2)(local.get 3)))`, + 'f', +` +66 0f 6f ca movdqa %xmm2, %xmm1 +66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0 +66 0f 38 10 cb pblendvb %xmm3, %xmm1 +66 0f 6f c1 movdqa %xmm1, %xmm0`); diff --git a/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js b/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js new file mode 100644 index 0000000000..00daceb438 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/simd-partial-oob-store.js @@ -0,0 +1,38 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +// Cloned from ad-hack.js but kept separate because it may have to be disabled +// on some devices until bugs are fixed. + +// Bug 1666747 - partially OOB stores are not handled correctly on ARM and ARM64. +// The simulators don't implement the correct semantics anyhow, so when the bug +// is fixed in the code generator they must remain excluded here. +var conf = getBuildConfiguration(); +if (conf.arm64 || conf["arm64-simulator"] || conf.arm || conf["arm-simulator"]) + quit(0); + +function get(arr, loc, len) { + let res = []; + for ( let i=0; i < len; i++ ) { + res.push(arr[loc+i]); + } + return res; +} + +for ( let offset of iota(16) ) { + var ins = wasmEvalText(` + (module + (memory (export "mem") 1 1) + (func (export "f") (param $loc i32) + (v128.store offset=${offset} (local.get $loc) (v128.const i32x4 ${1+offset} 2 3 ${4+offset*2}))))`); + + // OOB write should trap + assertErrorMessage(() => ins.exports.f(65536-15), + WebAssembly.RuntimeError, + /index out of bounds/) + + // Ensure that OOB writes don't write anything. + let start = 65536 - 15 + offset; + let legalBytes = 65536 - start; + var mem8 = new Uint8Array(ins.exports.mem.buffer); + assertSame(get(mem8, start, legalBytes), iota(legalBytes).map((_) => 0)); +} diff --git a/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js b/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js new file mode 100644 index 0000000000..ce1d7adb12 --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/splat-x64-ion-codegen.js @@ -0,0 +1,29 @@ +// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator || isAvxPresent(); include:codegen-x64-test.js + +// Test that there are no extraneous moves or other instructions for splat and +// other splat-like operations that can reuse its input for its output and/or +// has a specializable code path. See README-codegen.md for general information +// about this type of test case. + +codegenTestX64_PTYPE_v128( + [['f32x4.splat', 'f32', `0f c6 c0 00 shufps \\$0x00, %xmm0, %xmm0`], + ['f64x2.splat', 'f64', `f2 0f 12 c0 movddup %xmm0, %xmm0`]] , {log:true}); + +// Skip these on Win64 because the ABI differs and there's a different parameter +// register, this changes not just the name slightly but the binary encoding in +// larger ways. + +if (!getBuildConfiguration().windows) { + codegenTestX64_PTYPE_v128( + [['v128.load32_splat', 'i32', ` +f3 41 0f 10 04 3f movssl \\(%r15,%rdi,1\\), %xmm0 +0f c6 c0 00 shufps \\$0x00, %xmm0, %xmm0`], + ['v128.load64_splat', 'i32', `f2 41 0f 12 04 3f movddupq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load8x8_s', 'i32', `66 41 0f 38 20 04 3f pmovsxbwq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load8x8_u', 'i32', `66 41 0f 38 30 04 3f pmovzxbwq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load16x4_s', 'i32', `66 41 0f 38 23 04 3f pmovsxwdq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load16x4_u', 'i32', `66 41 0f 38 33 04 3f pmovzxwdq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load32x2_s', 'i32', `66 41 0f 38 25 04 3f pmovsxdqq \\(%r15,%rdi,1\\), %xmm0`], + ['v128.load32x2_u', 'i32', `66 41 0f 38 35 04 3f pmovzxdqq \\(%r15,%rdi,1\\), %xmm0`]], + {memory:1}); +} diff --git a/js/src/jit-test/tests/wasm/simd/validation.js b/js/src/jit-test/tests/wasm/simd/validation.js new file mode 100644 index 0000000000..46b8df620f --- /dev/null +++ b/js/src/jit-test/tests/wasm/simd/validation.js @@ -0,0 +1,368 @@ +// |jit-test| skip-if: !wasmSimdEnabled() + +function testValid(code) { + assertEq(WebAssembly.validate(wasmTextToBinary(code)), true); +} + +function testInvalid(code) { + assertEq(WebAssembly.validate(wasmTextToBinary(code)), false); +} + +// v128 -> v128 + +for (let op of [ + 'i8x16.neg', + 'i8x16.abs', + 'i16x8.neg', + 'i16x8.abs', + 'i16x8.extend_low_i8x16_s', + 'i16x8.extend_high_i8x16_s', + 'i16x8.extend_low_i8x16_u', + 'i16x8.extend_high_i8x16_u', + 'i32x4.neg', + 'i32x4.abs', + 'i32x4.extend_low_i16x8_s', + 'i32x4.extend_high_i16x8_s', + 'i32x4.extend_low_i16x8_u', + 'i32x4.extend_high_i16x8_u', + 'i32x4.trunc_sat_f32x4_s', + 'i32x4.trunc_sat_f32x4_u', + 'i64x2.neg', + 'f32x4.abs', + 'f32x4.neg', + 'f32x4.sqrt', + 'f32x4.convert_i32x4_s', + 'f32x4.convert_i32x4_s', + 'f64x2.abs', + 'f64x2.neg', + 'f64x2.sqrt', + 'v128.not']) +{ + testValid(`(module + (func (param v128) (result v128) + (${op} (local.get 0))))`); +} + +for (let [prefix, result, suffix] of [['i8x16', 'i32', '_s'], + ['i8x16', 'i32', '_u'], + ['i16x8', 'i32', '_s'], + ['i16x8', 'i32', '_u'], + ['i32x4', 'i32', ''], + ['i64x2', 'i64', ''], + ['f32x4', 'f32', ''], + ['f64x2', 'f64', '']]) +{ + testValid(`(module + (func (param v128) (result ${result}) + (${prefix}.extract_lane${suffix} 1 (local.get 0))))`); +} + +// The wat parser accepts small out-of-range lane indices, but they must be +// caught in validation. + +testInvalid( + `(module + (func (param v128) (result i32) + (i8x16.extract_lane_u 16 (local.get 0))))`); + +// (v128, v128) -> v128 + +for (let op of [ + 'i8x16.eq', + 'i8x16.ne', + 'i8x16.lt_s', + 'i8x16.lt_u', + 'i8x16.gt_s', + 'i8x16.gt_u', + 'i8x16.le_s', + 'i8x16.le_u', + 'i8x16.ge_s', + 'i8x16.ge_u', + 'i16x8.eq', + 'i16x8.ne', + 'i16x8.lt_s', + 'i16x8.lt_u', + 'i16x8.gt_s', + 'i16x8.gt_u', + 'i16x8.le_s', + 'i16x8.le_u', + 'i16x8.ge_s', + 'i16x8.ge_u', + 'i32x4.eq', + 'i32x4.ne', + 'i32x4.lt_s', + 'i32x4.lt_u', + 'i32x4.gt_s', + 'i32x4.gt_u', + 'i32x4.le_s', + 'i32x4.le_u', + 'i32x4.ge_s', + 'i32x4.ge_u', + 'f32x4.eq', + 'f32x4.ne', + 'f32x4.lt', + 'f32x4.gt', + 'f32x4.le', + 'f32x4.ge', + 'f64x2.eq', + 'f64x2.ne', + 'f64x2.lt', + 'f64x2.gt', + 'f64x2.le', + 'f64x2.ge', + 'v128.and', + 'v128.or', + 'v128.xor', + 'v128.andnot', + 'i8x16.avgr_u', + 'i16x8.avgr_u', + 'i8x16.add', + 'i8x16.add_sat_s', + 'i8x16.add_sat_u', + 'i8x16.sub', + 'i8x16.sub_sat_s', + 'i8x16.sub_sat_u', + 'i8x16.min_s', + 'i8x16.max_s', + 'i8x16.min_u', + 'i8x16.max_u', + 'i16x8.add', + 'i16x8.add_sat_s', + 'i16x8.add_sat_u', + 'i16x8.sub', + 'i16x8.sub_sat_s', + 'i16x8.sub_sat_u', + 'i16x8.mul', + 'i16x8.min_s', + 'i16x8.max_s', + 'i16x8.min_u', + 'i16x8.max_u', + 'i32x4.add', + 'i32x4.sub', + 'i32x4.mul', + 'i32x4.min_s', + 'i32x4.max_s', + 'i32x4.min_u', + 'i32x4.max_u', + 'i64x2.add', + 'i64x2.sub', + 'i64x2.mul', + 'f32x4.add', + 'f32x4.sub', + 'f32x4.mul', + 'f32x4.div', + 'f32x4.min', + 'f32x4.max', + 'f64x2.add', + 'f64x2.sub', + 'f64x2.mul', + 'f64x2.div', + 'f64x2.min', + 'f64x2.max', + 'i8x16.narrow_i16x8_s', + 'i8x16.narrow_i16x8_u', + 'i16x8.narrow_i32x4_s', + 'i16x8.narrow_i32x4_u', + 'i8x16.swizzle']) +{ + testValid(`(module + (func (param v128) (param v128) (result v128) + (${op} (local.get 0) (local.get 1))))`); +} + +testValid(`(module + (func (param v128) (param v128) (result v128) + (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23 (local.get 0) (local.get 1))))`); + +assertErrorMessage(() => testValid( + `(module + (func (param v128) (param v128) (result v128) + (i8x16.shuffle 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 (local.get 0) (local.get 1))))`), + SyntaxError, + /expected a u8/); + +// (v128, i32) -> v128 + +for (let op of [ + 'i8x16.shl', + 'i8x16.shr_s', + 'i8x16.shr_u', + 'i16x8.shl', + 'i16x8.shr_s', + 'i16x8.shr_u', + 'i32x4.shl', + 'i32x4.shr_s', + 'i32x4.shr_u', + 'i64x2.shl', + 'i64x2.shr_s', + 'i64x2.shr_u']) +{ + testValid(`(module + (func (param v128) (param i32) (result v128) + (${op} (local.get 0) (local.get 1))))`); +} + +// v128 -> i32 + +for (let op of [ + 'v128.any_true', + 'i8x16.all_true', + 'i16x8.all_true', + 'i32x4.all_true', + 'i8x16.bitmask', + 'i16x8.bitmask', + 'i32x4.bitmask']) +{ + testValid(`(module + (func (param v128) (result i32) + (${op} (local.get 0))))`); +} + +// T -> V128 + +for (let [op, input] of [ + ['i8x16.splat', 'i32'], + ['i16x8.splat', 'i32'], + ['i32x4.splat', 'i32'], + ['i64x2.splat', 'i64'], + ['f32x4.splat', 'f32'], + ['f64x2.splat', 'f64']]) +{ + testValid(`(module + (func (param ${input}) (result v128) + (${op} (local.get 0))))`); +} + +// i32 -> v128 + +for (let op of [ + 'v128.load', + 'v128.load8_splat', + 'v128.load16_splat', + 'v128.load32_splat', + 'v128.load64_splat', + 'v128.load8x8_s', + 'v128.load8x8_u', + 'v128.load16x4_s', + 'v128.load16x4_u', + 'v128.load32x2_s', + 'v128.load32x2_u']) +{ + testValid(`(module + (memory 1 1) + (func (param i32) (result v128) + (${op} (local.get 0))))`); +} + +testValid(`(module + (func (result v128) + (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)) + (func (result v128) + (v128.const i16x8 0 1 2 3 4 5 6 7)) + (func (result v128) + (v128.const i32x4 0 1 2 3)) + (func (result v128) + (v128.const i64x2 0 1)) + (func (result v128) + (v128.const f32x4 0 1 2 3)) + (func (result v128) + (v128.const f32x4 0.5 1.5 2.5 3.5)) + (func (result v128) + (v128.const f64x2 0 1)) + (func (result v128) + (v128.const f64x2 0.5 1.5)))`); + +assertErrorMessage(() => testValid( + `(module + (func (result v128) + (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)))`), + SyntaxError, + /expected a i8/); + +assertErrorMessage(() => testValid( + `(module + (func (result v128) + (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 256 15)))`), + SyntaxError, + /invalid i8 number/); + +assertErrorMessage(() => testValid( + `(module + (func (result v128) + (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 3.14 15)))`), + SyntaxError, + /expected a i8/); + +assertErrorMessage(() => testValid( + `(module + (func (result v128) + (v128.const f32x4 0.5 1.5 2.5))`), + SyntaxError, + /expected a float/); + +assertErrorMessage(() => testValid( + `(module + (func (result v128) + (v128.const i8x8 0 1 2 3 4 5 6 7)))`), + SyntaxError, + /expected one of/); + +// v128 -> () + +testValid(`(module + (memory 1 1) + (func (param i32) (param v128) + (v128.store (local.get 0) (local.get 1))))`); + +// (v128, v128, v128) -> v128 + +testValid(`(module + (func (param v128) (param v128) (param v128) (result v128) + (v128.bitselect (local.get 0) (local.get 1) (local.get 2))))`); + +// (v128, t) -> v128 + +for (let [prefix, input] of [['i8x16', 'i32'], + ['i16x8', 'i32'], + ['i32x4', 'i32'], + ['i64x2', 'i64'], + ['f32x4', 'f32'], + ['f64x2', 'f64']]) +{ + testValid(`(module + (func (param v128) (param ${input}) (result v128) + (${prefix}.replace_lane 1 (local.get 0) (local.get 1))))`); +} + +testInvalid( + `(module + (func (param v128) (param i32) (result v128) + (i8x16.replace_lane 16 (local.get 0) (local.get 1))))`); + +// Global variables + +testValid(`(module + (global $g (mut v128) (v128.const f32x4 1 2 3 4)))`); + +testValid(`(module + (global $g (import "m" "g") v128) + (global $h (mut v128) (global.get $g)))`); + +testValid(`(module + (global $g (export "g") v128 (v128.const f32x4 1 2 3 4)))`); + +testValid(`(module + (global $g (export "g") (mut v128) (v128.const f32x4 1 2 3 4)))`); + +// Imports, exports, calls + +testValid(`(module + (import "m" "g" (func (param v128) (result v128))) + (func (export "f") (param v128) (result v128) + (f64x2.add (local.get 0) (v128.const f64x2 1 2))))`); + +testValid(`(module + (func $f (param v128) (result v128) + (i8x16.neg (local.get 0))) + (func $g (export "g") (param v128) (result v128) + (call $f (local.get 0))))`); |