4 files changed, 6313 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
new file mode 100644
index 000000000..52d4bea87
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs
@@ -0,0 +1,93 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+extern "C" {
+    #[link_name = "llvm.wasm.memory.atomic.wait32"]
+    fn llvm_atomic_wait_i32(ptr: *mut i32, exp: i32, timeout: i64) -> i32;
+    #[link_name = "llvm.wasm.memory.atomic.wait64"]
+    fn llvm_atomic_wait_i64(ptr: *mut i64, exp: i64, timeout: i64) -> i32;
+    #[link_name = "llvm.wasm.memory.atomic.notify"]
+    fn llvm_atomic_notify(ptr: *mut i32, cnt: i32) -> i32;
+}
+
+/// Corresponding intrinsic to wasm's [`memory.atomic.wait32` instruction][instr]
+///
+/// This function, when called, will block the current thread if the memory
+/// pointed to by `ptr` is equal to `expression` (performing this action
+/// atomically).
+///
+/// The argument `timeout_ns` is a maximum number of nanoseconds the calling
+/// thread will be blocked for, if it blocks. If the timeout is negative then
+/// the calling thread will be blocked forever.
+///
+/// The calling thread can only be woken up with a call to the `wake` intrinsic
+/// once it has been blocked. Changing the memory behind `ptr` will not wake
+/// the thread once it's blocked.
+///
+/// # Return value
+///
+/// * 0 - indicates that the thread blocked and then was woken up
+/// * 1 - the loaded value from `ptr` didn't match `expression`, the thread
+///   didn't block
+/// * 2 - the thread blocked, but the timeout expired.
+///
+/// [instr]: https://webassembly.github.io/threads/core/syntax/instructions.html#syntax-instr-atomic-memory
+#[inline]
+#[cfg_attr(test, assert_instr(memory.atomic.wait32))]
+#[target_feature(enable = "atomics")]
+#[doc(alias("memory.atomic.wait32"))]
+pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 {
+    llvm_atomic_wait_i32(ptr, expression, timeout_ns)
+}
+
+/// Corresponding intrinsic to wasm's [`memory.atomic.wait64` instruction][instr]
+///
+/// This function, when called, will block the current thread if the memory
+/// pointed to by `ptr` is equal to `expression` (performing this action
+/// atomically).
+///
+/// The argument `timeout_ns` is a maximum number of nanoseconds the calling
+/// thread will be blocked for, if it blocks. If the timeout is negative then
+/// the calling thread will be blocked forever.
+///
+/// The calling thread can only be woken up with a call to the `wake` intrinsic
+/// once it has been blocked. Changing the memory behind `ptr` will not wake
+/// the thread once it's blocked.
+///
+/// # Return value
+///
+/// * 0 - indicates that the thread blocked and then was woken up
+/// * 1 - the loaded value from `ptr` didn't match `expression`, the thread
+///   didn't block
+/// * 2 - the thread blocked, but the timeout expired.
+///
+/// [instr]: https://webassembly.github.io/threads/core/syntax/instructions.html#syntax-instr-atomic-memory
+#[inline]
+#[cfg_attr(test, assert_instr(memory.atomic.wait64))]
+#[target_feature(enable = "atomics")]
+#[doc(alias("memory.atomic.wait64"))]
+pub unsafe fn memory_atomic_wait64(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 {
+    llvm_atomic_wait_i64(ptr, expression, timeout_ns)
+}
+
+/// Corresponding intrinsic to wasm's [`memory.atomic.notify` instruction][instr]
+///
+/// This function will notify a number of threads blocked on the address
+/// indicated by `ptr`. Threads previously blocked with the `i32_atomic_wait`
+/// and `i64_atomic_wait` functions above will be woken up.
+///
+/// The `waiters` argument indicates how many waiters should be woken up (a
+/// maximum). If the value is zero no waiters are woken up.
+///
+/// # Return value
+///
+/// Returns the number of waiters which were actually notified.
+///
+/// [instr]: https://webassembly.github.io/threads/core/syntax/instructions.html#syntax-instr-atomic-memory
+#[inline]
+#[cfg_attr(test, assert_instr(memory.atomic.notify))]
+#[target_feature(enable = "atomics")]
+#[doc(alias("memory.atomic.notify"))]
+pub unsafe fn memory_atomic_notify(ptr: *mut i32, waiters: u32) -> u32 {
+    llvm_atomic_notify(ptr, waiters as i32) as u32
+}
diff --git a/library/stdarch/crates/core_arch/src/wasm32/memory.rs b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
new file mode 100644
index 000000000..b5cf13e98
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/wasm32/memory.rs
@@ -0,0 +1,58 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+extern "C" {
+    #[link_name = "llvm.wasm.memory.grow"]
+    fn llvm_memory_grow(mem: u32, pages: usize) -> usize;
+    #[link_name = "llvm.wasm.memory.size"]
+    fn llvm_memory_size(mem: u32) -> usize;
+}
+
+/// Corresponding intrinsic to wasm's [`memory.size` instruction][instr]
+///
+/// This function, when called, will return the current memory size in units of
+/// pages. The current WebAssembly page size is 65536 bytes (64 KB).
+///
+/// The argument `MEM` is the numerical index of which memory to return the
+/// size of. Note that currently the WebAssembly specification only supports one
+/// memory, so it is required that zero is passed in. The argument is present to
+/// be forward-compatible with future WebAssembly revisions. If a nonzero
+/// argument is passed to this function it will currently unconditionally abort.
+///
+/// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-size
+#[inline]
+#[cfg_attr(test, assert_instr("memory.size", MEM = 0))]
+#[rustc_legacy_const_generics(0)]
+#[stable(feature = "simd_wasm32", since = "1.33.0")]
+#[doc(alias("memory.size"))]
+pub fn memory_size<const MEM: u32>() -> usize {
+    static_assert!(MEM: u32 where MEM == 0);
+    unsafe { llvm_memory_size(MEM) }
+}
+
+/// Corresponding intrinsic to wasm's [`memory.grow` instruction][instr]
+///
+/// This function, when called, will attempt to grow the default linear memory
+/// by the specified `delta` of pages. The current WebAssembly page size is
+/// 65536 bytes (64 KB). If memory is successfully grown then the previous size
+/// of memory, in pages, is returned. If memory cannot be grown then
+/// `usize::MAX` is returned.
+///
+/// The argument `MEM` is the numerical index of which memory to return the
+/// size of. Note that currently the WebAssembly specification only supports one
+/// memory, so it is required that zero is passed in. The argument is present to
+/// be forward-compatible with future WebAssembly revisions. If a nonzero
+/// argument is passed to this function it will currently unconditionally abort.
+///
+/// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-grow
+#[inline]
+#[cfg_attr(test, assert_instr("memory.grow", MEM = 0))]
+#[rustc_legacy_const_generics(0)]
+#[stable(feature = "simd_wasm32", since = "1.33.0")]
+#[doc(alias("memory.grow"))]
+pub fn memory_grow<const MEM: u32>(delta: usize) -> usize {
+    unsafe {
+        static_assert!(MEM: u32 where MEM == 0);
+        llvm_memory_grow(MEM, delta)
+    }
+}
diff --git a/library/stdarch/crates/core_arch/src/wasm32/mod.rs b/library/stdarch/crates/core_arch/src/wasm32/mod.rs
new file mode 100644
index 000000000..2fbe80e99
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/wasm32/mod.rs
@@ -0,0 +1,26 @@
+//! WASM32 intrinsics
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+mod atomic;
+pub use self::atomic::*;
+
+mod simd128;
+pub use self::simd128::*;
+
+mod memory;
+pub use self::memory::*;
+
+/// Generates the [`unreachable`] instruction, which causes an unconditional [trap].
+///
+/// This function is safe to call and immediately aborts the execution.
+///
+/// [`unreachable`]: https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-instr-control
+/// [trap]: https://webassembly.github.io/spec/core/intro/overview.html#trap
+#[cfg_attr(test, assert_instr(unreachable))]
+#[inline]
+#[stable(feature = "unreachable_wasm32", since = "1.37.0")]
+pub fn unreachable() -> ! {
+    crate::intrinsics::abort()
+}
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
new file mode 100644
index 000000000..c0025696b
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -0,0 +1,6136 @@
+//! This module implements the [WebAssembly `SIMD128` ISA].
+//!
+//! [WebAssembly `SIMD128` ISA]:
+//! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md
+
+#![allow(non_camel_case_types)]
+#![allow(unused_imports)]
+
+use crate::{
+    core_arch::{simd, simd_llvm::*},
+    marker::Sized,
+    mem, ptr,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+types! {
+    /// WASM-specific 128-bit wide SIMD vector type.
+    ///
+    /// This type corresponds to the `v128` type in the [WebAssembly SIMD
+    /// proposal](https://github.com/webassembly/simd). This type is 128-bits
+    /// large and the meaning of all the bits is defined within the context of
+    /// how this value is used.
+    ///
+    /// This same type is used simultaneously for all 128-bit-wide SIMD types,
+    /// for example:
+    ///
+    /// * sixteen 8-bit integers (both `i8` and `u8`)
+    /// * eight 16-bit integers (both `i16` and `u16`)
+    /// * four 32-bit integers (both `i32` and `u32`)
+    /// * two 64-bit integers (both `i64` and `u64`)
+    /// * four 32-bit floats (`f32`)
+    /// * two 64-bit floats (`f64`)
+    ///
+    /// The `v128` type in Rust is intended to be quite analogous to the `v128`
+    /// type in WebAssembly. Operations on `v128` can only be performed with the
+    /// functions in this module.
+    // N.B., internals here are arbitrary.
+    #[stable(feature = "wasm_simd", since = "1.54.0")]
+    pub struct v128(i32, i32, i32, i32);
+}
+
+macro_rules! conversions {
+    ($(($name:ident = $ty:ty))*) => {
+        impl v128 {
+            $(
+                #[inline(always)]
+                fn $name(self) -> $ty {
+                    unsafe { mem::transmute(self) }
+                }
+            )*
+        }
+        $(
+            impl $ty {
+                #[inline(always)]
+                #[rustc_const_stable(feature = "wasm_simd_const", since = "1.56.0")]
+                const fn v128(self) -> v128 {
+                    unsafe { mem::transmute(self) }
+                }
+            }
+        )*
+    }
+}
+
+conversions! {
+    (as_u8x16 = simd::u8x16)
+    (as_u16x8 = simd::u16x8)
+    (as_u32x4 = simd::u32x4)
+    (as_u64x2 = simd::u64x2)
+    (as_i8x16 = simd::i8x16)
+    (as_i16x8 = simd::i16x8)
+    (as_i32x4 = simd::i32x4)
+    (as_i64x2 = simd::i64x2)
+    (as_f32x4 = simd::f32x4)
+    (as_f64x2 = simd::f64x2)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.wasm.swizzle"]
+    fn llvm_swizzle(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
+
+    #[link_name = "llvm.wasm.bitselect.v16i8"]
+    fn llvm_bitselect(a: simd::i8x16, b: simd::i8x16, c: simd::i8x16) -> simd::i8x16;
+    #[link_name = "llvm.wasm.anytrue.v16i8"]
+    fn llvm_any_true_i8x16(x: simd::i8x16) -> i32;
+
+    #[link_name = "llvm.wasm.alltrue.v16i8"]
+    fn llvm_i8x16_all_true(x: simd::i8x16) -> i32;
+    #[link_name = "llvm.ctpop.v16i8"]
+    fn llvm_popcnt(a: simd::i8x16) -> simd::i8x16;
+    #[link_name = "llvm.wasm.bitmask.v16i8"]
+    fn llvm_bitmask_i8x16(a: simd::i8x16) -> i32;
+    #[link_name = "llvm.wasm.narrow.signed.v16i8.v8i16"]
+    fn llvm_narrow_i8x16_s(a: simd::i16x8, b: simd::i16x8) -> simd::i8x16;
+    #[link_name = "llvm.wasm.narrow.unsigned.v16i8.v8i16"]
+    fn llvm_narrow_i8x16_u(a: simd::i16x8, b: simd::i16x8) -> simd::i8x16;
+    #[link_name = "llvm.sadd.sat.v16i8"]
+    fn llvm_i8x16_add_sat_s(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
+    #[link_name = "llvm.uadd.sat.v16i8"]
+    fn llvm_i8x16_add_sat_u(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
+    #[link_name = "llvm.wasm.sub.sat.signed.v16i8"]
+    fn llvm_i8x16_sub_sat_s(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
+    #[link_name = "llvm.wasm.sub.sat.unsigned.v16i8"]
+    fn llvm_i8x16_sub_sat_u(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
+    #[link_name = "llvm.wasm.avgr.unsigned.v16i8"]
+    fn llvm_avgr_u_i8x16(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
+
+    #[link_name = "llvm.wasm.extadd.pairwise.signed.v8i16"]
+    fn llvm_i16x8_extadd_pairwise_i8x16_s(x: simd::i8x16) -> simd::i16x8;
+    #[link_name = "llvm.wasm.extadd.pairwise.unsigned.v8i16"]
+    fn llvm_i16x8_extadd_pairwise_i8x16_u(x: simd::i8x16) -> simd::i16x8;
+    #[link_name = "llvm.wasm.q15mulr.sat.signed"]
+    fn llvm_q15mulr(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8;
+    #[link_name = "llvm.wasm.alltrue.v8i16"]
+    fn llvm_i16x8_all_true(x: simd::i16x8) -> i32;
+    #[link_name = "llvm.wasm.bitmask.v8i16"]
+    fn llvm_bitmask_i16x8(a: simd::i16x8) -> i32;
+    #[link_name = "llvm.wasm.narrow.signed.v8i16.v4i32"]
+    fn llvm_narrow_i16x8_s(a: simd::i32x4, b: simd::i32x4) -> simd::i16x8;
+    #[link_name = "llvm.wasm.narrow.unsigned.v8i16.v4i32"]
+    fn llvm_narrow_i16x8_u(a: simd::i32x4, b: simd::i32x4) -> simd::i16x8;
+    #[link_name = "llvm.sadd.sat.v8i16"]
+    fn llvm_i16x8_add_sat_s(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8;
+    #[link_name = "llvm.uadd.sat.v8i16"]
+    fn llvm_i16x8_add_sat_u(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8;
+    #[link_name = "llvm.wasm.sub.sat.signed.v8i16"]
+    fn llvm_i16x8_sub_sat_s(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8;
+    #[link_name = "llvm.wasm.sub.sat.unsigned.v8i16"]
+    fn llvm_i16x8_sub_sat_u(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8;
+    #[link_name = "llvm.wasm.avgr.unsigned.v8i16"]
+    fn llvm_avgr_u_i16x8(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8;
+
+    #[link_name = "llvm.wasm.extadd.pairwise.signed.v16i8"]
+    fn llvm_i32x4_extadd_pairwise_i16x8_s(x: simd::i16x8) -> simd::i32x4;
+    #[link_name = "llvm.wasm.extadd.pairwise.unsigned.v16i8"]
+    fn llvm_i32x4_extadd_pairwise_i16x8_u(x: simd::i16x8) -> simd::i32x4;
+    #[link_name = "llvm.wasm.alltrue.v4i32"]
+    fn llvm_i32x4_all_true(x: simd::i32x4) -> i32;
+    #[link_name = "llvm.wasm.bitmask.v4i32"]
+    fn llvm_bitmask_i32x4(a: simd::i32x4) -> i32;
+    #[link_name = "llvm.wasm.dot"]
+    fn llvm_i32x4_dot_i16x8_s(a: simd::i16x8, b: simd::i16x8) -> simd::i32x4;
+
+    #[link_name = "llvm.wasm.alltrue.v2i64"]
+    fn llvm_i64x2_all_true(x: simd::i64x2) -> i32;
+    #[link_name = "llvm.wasm.bitmask.v2i64"]
+    fn llvm_bitmask_i64x2(a: simd::i64x2) -> i32;
+
+    #[link_name = "llvm.ceil.v4f32"]
+    fn llvm_f32x4_ceil(x: simd::f32x4) -> simd::f32x4;
+    #[link_name = "llvm.floor.v4f32"]
+    fn llvm_f32x4_floor(x: simd::f32x4) -> simd::f32x4;
+    #[link_name = "llvm.trunc.v4f32"]
+    fn llvm_f32x4_trunc(x: simd::f32x4) -> simd::f32x4;
+    #[link_name = "llvm.nearbyint.v4f32"]
+    fn llvm_f32x4_nearest(x: simd::f32x4) -> simd::f32x4;
+    #[link_name = "llvm.fabs.v4f32"]
+    fn llvm_f32x4_abs(x: simd::f32x4) -> simd::f32x4;
+    #[link_name = "llvm.sqrt.v4f32"]
+    fn llvm_f32x4_sqrt(x: simd::f32x4) -> simd::f32x4;
+    #[link_name = "llvm.minimum.v4f32"]
+    fn llvm_f32x4_min(x: simd::f32x4, y: simd::f32x4) -> simd::f32x4;
+    #[link_name = "llvm.maximum.v4f32"]
+    fn llvm_f32x4_max(x: simd::f32x4, y: simd::f32x4) -> simd::f32x4;
+
+    #[link_name = "llvm.ceil.v2f64"]
+    fn llvm_f64x2_ceil(x: simd::f64x2) -> simd::f64x2;
+    #[link_name = "llvm.floor.v2f64"]
+    fn llvm_f64x2_floor(x: simd::f64x2) -> simd::f64x2;
+    #[link_name = "llvm.trunc.v2f64"]
+    fn llvm_f64x2_trunc(x: simd::f64x2) -> simd::f64x2;
+    #[link_name = "llvm.nearbyint.v2f64"]
+    fn llvm_f64x2_nearest(x: simd::f64x2) -> simd::f64x2;
+    #[link_name = "llvm.fabs.v2f64"]
+    fn llvm_f64x2_abs(x: simd::f64x2) -> simd::f64x2;
+    #[link_name = "llvm.sqrt.v2f64"]
+    fn llvm_f64x2_sqrt(x: simd::f64x2) -> simd::f64x2;
+    #[link_name = "llvm.minimum.v2f64"]
+    fn llvm_f64x2_min(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2;
+    #[link_name = "llvm.maximum.v2f64"]
+    fn llvm_f64x2_max(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2;
+
+    #[link_name = "llvm.fptosi.sat.v4i32.v4f32"]
+    fn llvm_i32x4_trunc_sat_f32x4_s(x: simd::f32x4) -> simd::i32x4;
+    #[link_name = "llvm.fptoui.sat.v4i32.v4f32"]
+    fn llvm_i32x4_trunc_sat_f32x4_u(x: simd::f32x4) -> simd::i32x4;
+    #[link_name = "llvm.fptosi.sat.v2i32.v2f64"]
+    fn llvm_i32x2_trunc_sat_f64x2_s(x: simd::f64x2) -> simd::i32x2;
+    #[link_name = "llvm.fptoui.sat.v2i32.v2f64"]
+    fn llvm_i32x2_trunc_sat_f64x2_u(x: simd::f64x2) -> simd::i32x2;
+}
+
+#[repr(packed)]
+#[derive(Copy)]
+struct Unaligned<T>(T);
+
+impl<T: Copy> Clone for Unaligned<T> {
+    fn clone(&self) -> Unaligned<T> {
+        *self
+    }
+}
+
+/// Loads a `v128` vector from the given heap address.
+///
+/// This intrinsic will emit a load with an alignment of 1. While this is
+/// provided for completeness it is not strictly necessary, you can also load
+/// the pointer directly:
+///
+/// ```rust,ignore
+/// let a: &v128 = ...;
+/// let value = unsafe { v128_load(a) };
+/// // .. is the same as ..
+/// let value = *a;
+/// ```
+///
+/// The alignment of the load can be configured by doing a manual load without
+/// this intrinsic.
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 16 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load(m: *const v128) -> v128 {
+    (*(m as *const Unaligned<v128>)).0
+}
+
+/// Load eight 8-bit integers and sign extend each one to a 16-bit lane
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load8x8_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load8x8_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn i16x8_load_extend_i8x8(m: *const i8) -> v128 {
+    let m = *(m as *const Unaligned<simd::i8x8>);
+    simd_cast::<_, simd::i16x8>(m.0).v128()
+}
+
+/// Load eight 8-bit integers and zero extend each one to a 16-bit lane
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load8x8_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load8x8_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn i16x8_load_extend_u8x8(m: *const u8) -> v128 {
+    let m = *(m as *const Unaligned<simd::u8x8>);
+    simd_cast::<_, simd::u16x8>(m.0).v128()
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_load_extend_u8x8 as u16x8_load_extend_u8x8;
+
+/// Load four 16-bit integers and sign extend each one to a 32-bit lane
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load16x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load16x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn i32x4_load_extend_i16x4(m: *const i16) -> v128 {
+    let m = *(m as *const Unaligned<simd::i16x4>);
+    simd_cast::<_, simd::i32x4>(m.0).v128()
+}
+
+/// Load four 16-bit integers and zero extend each one to a 32-bit lane
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load16x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load16x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn i32x4_load_extend_u16x4(m: *const u16) -> v128 {
+    let m = *(m as *const Unaligned<simd::u16x4>);
+    simd_cast::<_, simd::u32x4>(m.0).v128()
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_load_extend_u16x4 as u32x4_load_extend_u16x4;
+
+/// Load two 32-bit integers and sign extend each one to a 64-bit lane
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load32x2_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load32x2_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn i64x2_load_extend_i32x2(m: *const i32) -> v128 {
+    let m = *(m as *const Unaligned<simd::i32x2>);
+    simd_cast::<_, simd::i64x2>(m.0).v128()
+}
+
+/// Load two 32-bit integers and zero extend each one to a 64-bit lane
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load32x2_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load32x2_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn i64x2_load_extend_u32x2(m: *const u32) -> v128 {
+    let m = *(m as *const Unaligned<simd::u32x2>);
+    simd_cast::<_, simd::u64x2>(m.0).v128()
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_load_extend_u32x2 as u64x2_load_extend_u32x2;
+
+/// Load a single element and splat to all lanes of a v128 vector.
+///
+/// While this intrinsic is provided for completeness it can also be replaced
+/// with `u8x16_splat(*m)` and it should generate equivalent code (and also not
+/// require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 1 byte from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load8_splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load8_splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load8_splat(m: *const u8) -> v128 {
+    u8x16_splat(*m)
+}
+
+/// Load a single element and splat to all lanes of a v128 vector.
+///
+/// While this intrinsic is provided for completeness it can also be replaced
+/// with `u16x8_splat(*m)` and it should generate equivalent code (and also not
+/// require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 2 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load16_splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load16_splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load16_splat(m: *const u16) -> v128 {
+    u16x8_splat(ptr::read_unaligned(m))
+}
+
+/// Load a single element and splat to all lanes of a v128 vector.
+///
+/// While this intrinsic is provided for completeness it can also be replaced
+/// with `u32x4_splat(*m)` and it should generate equivalent code (and also not
+/// require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 4 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load32_splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load32_splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load32_splat(m: *const u32) -> v128 {
+    u32x4_splat(ptr::read_unaligned(m))
+}
+
+/// Load a single element and splat to all lanes of a v128 vector.
+///
+/// While this intrinsic is provided for completeness it can also be replaced
+/// with `u64x2_splat(*m)` and it should generate equivalent code (and also not
+/// require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load64_splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load64_splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load64_splat(m: *const u64) -> v128 {
+    u64x2_splat(ptr::read_unaligned(m))
+}
+
+/// Load a 32-bit element into the low bits of the vector and sets all other
+/// bits to zero.
+///
+/// This intrinsic is provided for completeness and is equivalent to `u32x4(*m,
+/// 0, 0, 0)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 4 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load32_zero))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load32_zero"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load32_zero(m: *const u32) -> v128 {
+    u32x4(ptr::read_unaligned(m), 0, 0, 0)
+}
+
+/// Load a 64-bit element into the low bits of the vector and sets all other
+/// bits to zero.
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `u64x2_replace_lane::<0>(u64x2(0, 0), *m)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load64_zero))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load64_zero"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load64_zero(m: *const u64) -> v128 {
+    u64x2_replace_lane::<0>(u64x2(0, 0), ptr::read_unaligned(m))
+}
+
+/// Stores a `v128` vector to the given heap address.
+///
+/// This intrinsic will emit a store with an alignment of 1. While this is
+/// provided for completeness it is not strictly necessary, you can also store
+/// the pointer directly:
+///
+/// ```rust,ignore
+/// let a: &mut v128 = ...;
+/// unsafe { v128_store(a, value) };
+/// // .. is the same as ..
+/// *a = value;
+/// ```
+///
+/// The alignment of the store can be configured by doing a manual store without
+/// this intrinsic.
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to store 16 bytes to. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned store.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.store))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.store"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_store(m: *mut v128, a: v128) {
+    *(m as *mut Unaligned<v128>) = Unaligned(a);
+}
+
+/// Loads an 8-bit value from `m` and sets lane `L` of `v` to that value.
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `u8x16_replace_lane::<L>(v, *m)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 1 byte from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load8_lane, L = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load8_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load8_lane<const L: usize>(v: v128, m: *const u8) -> v128 {
+    u8x16_replace_lane::<L>(v, *m)
+}
+
+/// Loads a 16-bit value from `m` and sets lane `L` of `v` to that value.
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `u16x8_replace_lane::<L>(v, *m)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 2 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load16_lane, L = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load16_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load16_lane<const L: usize>(v: v128, m: *const u16) -> v128 {
+    u16x8_replace_lane::<L>(v, ptr::read_unaligned(m))
+}
+
+/// Loads a 32-bit value from `m` and sets lane `L` of `v` to that value.
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `u32x4_replace_lane::<L>(v, *m)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 4 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load32_lane, L = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load32_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load32_lane<const L: usize>(v: v128, m: *const u32) -> v128 {
+    u32x4_replace_lane::<L>(v, ptr::read_unaligned(m))
+}
+
+/// Loads a 64-bit value from `m` and sets lane `L` of `v` to that value.
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `u64x2_replace_lane::<L>(v, *m)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to load 8 bytes from. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned load.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.load64_lane, L = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.load64_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_load64_lane<const L: usize>(v: v128, m: *const u64) -> v128 {
+    u64x2_replace_lane::<L>(v, ptr::read_unaligned(m))
+}
+
+/// Stores the 8-bit value from lane `L` of `v` into `m`
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `*m = u8x16_extract_lane::<L>(v)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to store 1 byte to. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned store.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.store8_lane, L = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.store8_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_store8_lane<const L: usize>(v: v128, m: *mut u8) {
+    *m = u8x16_extract_lane::<L>(v);
+}
+
+/// Stores the 16-bit value from lane `L` of `v` into `m`
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `*m = u16x8_extract_lane::<L>(v)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to store 2 bytes to. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned store.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.store16_lane, L = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.store16_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_store16_lane<const L: usize>(v: v128, m: *mut u16) {
+    ptr::write_unaligned(m, u16x8_extract_lane::<L>(v))
+}
+
+/// Stores the 32-bit value from lane `L` of `v` into `m`
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `*m = u32x4_extract_lane::<L>(v)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to store 4 bytes to. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned store.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.store32_lane, L = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.store32_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_store32_lane<const L: usize>(v: v128, m: *mut u32) {
+    ptr::write_unaligned(m, u32x4_extract_lane::<L>(v))
+}
+
+/// Stores the 64-bit value from lane `L` of `v` into `m`
+///
+/// This intrinsic is provided for completeness and is equivalent to
+/// `*m = u64x2_extract_lane::<L>(v)` (which doesn't require `unsafe`).
+///
+/// # Unsafety
+///
+/// This intrinsic is unsafe because it takes a raw pointer as an argument, and
+/// the pointer must be valid to store 8 bytes to. Note that there is no
+/// alignment requirement on this pointer since this intrinsic performs a
+/// 1-aligned store.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.store64_lane, L = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.store64_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub unsafe fn v128_store64_lane<const L: usize>(v: v128, m: *mut u64) {
+    ptr::write_unaligned(m, u64x2_extract_lane::<L>(v))
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(
+    test,
+    assert_instr(
+        v128.const,
+        a0 = 0,
+        a1 = 1,
+        a2 = 2,
+        a3 = 3,
+        a4 = 4,
+        a5 = 5,
+        a6 = 6,
+        a7 = 7,
+        a8 = 8,
+        a9 = 9,
+        a10 = 10,
+        a11 = 11,
+        a12 = 12,
+        a13 = 13,
+        a14 = 14,
+        a15 = 15,
+    )
+)]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
+pub const fn i8x16(
+    a0: i8,
+    a1: i8,
+    a2: i8,
+    a3: i8,
+    a4: i8,
+    a5: i8,
+    a6: i8,
+    a7: i8,
+    a8: i8,
+    a9: i8,
+    a10: i8,
+    a11: i8,
+    a12: i8,
+    a13: i8,
+    a14: i8,
+    a15: i8,
+) -> v128 {
+    simd::i8x16(
+        a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15,
+    )
+    .v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
+pub const fn u8x16(
+    a0: u8,
+    a1: u8,
+    a2: u8,
+    a3: u8,
+    a4: u8,
+    a5: u8,
+    a6: u8,
+    a7: u8,
+    a8: u8,
+    a9: u8,
+    a10: u8,
+    a11: u8,
+    a12: u8,
+    a13: u8,
+    a14: u8,
+    a15: u8,
+) -> v128 {
+    simd::u8x16(
+        a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15,
+    )
+    .v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(
+    test,
+    assert_instr(
+        v128.const,
+        a0 = 0,
+        a1 = 1,
+        a2 = 2,
+        a3 = 3,
+        a4 = 4,
+        a5 = 5,
+        a6 = 6,
+        a7 = 7,
+    )
+)]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
+pub const fn i16x8(a0: i16, a1: i16, a2: i16, a3: i16, a4: i16, a5: i16, a6: i16, a7: i16) -> v128 {
+    simd::i16x8(a0, a1, a2, a3, a4, a5, a6, a7).v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
+pub const fn u16x8(a0: u16, a1: u16, a2: u16, a3: u16, a4: u16, a5: u16, a6: u16, a7: u16) -> v128 {
+    simd::u16x8(a0, a1, a2, a3, a4, a5, a6, a7).v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(test, assert_instr(v128.const, a0 = 0, a1 = 1, a2 = 2, a3 = 3))]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
+pub const fn i32x4(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 {
+    simd::i32x4(a0, a1, a2, a3).v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
+pub const fn u32x4(a0: u32, a1: u32, a2: u32, a3: u32) -> v128 {
+    simd::u32x4(a0, a1, a2, a3).v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(test, assert_instr(v128.const, a0 = 1, a1 = 2))]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
+pub const fn i64x2(a0: i64, a1: i64) -> v128 {
+    simd::i64x2(a0, a1).v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
+pub const fn u64x2(a0: u64, a1: u64) -> v128 {
+    simd::u64x2(a0, a1).v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(test, assert_instr(v128.const, a0 = 0.0, a1 = 1.0, a2 = 2.0, a3 = 3.0))]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd_const", since = "1.56.0")]
+pub const fn f32x4(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 {
+    simd::f32x4(a0, a1, a2, a3).v128()
+}
+
+/// Materializes a SIMD value from the provided operands.
+///
+/// If possible this will generate a `v128.const` instruction, otherwise it may
+/// be lowered to a sequence of instructions to materialize the vector value.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[cfg_attr(test, assert_instr(v128.const, a0 = 0.0, a1 = 1.0))]
+#[doc(alias("v128.const"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+#[rustc_const_stable(feature = "wasm_simd_const", since = "1.56.0")]
+pub const fn f64x2(a0: f64, a1: f64) -> v128 {
+    simd::f64x2(a0, a1).v128()
+}
+
+/// Returns a new vector with lanes selected from the lanes of the two input
+/// vectors `$a` and `$b` specified in the 16 immediate operands.
+///
+/// The `$a` and `$b` expressions must have type `v128`, and this function
+/// generates a wasm instruction that is encoded with 16 bytes providing the
+/// indices of the elements to return. The indices `i` in range [0, 15] select
+/// the `i`-th element of `a`. The indices in range [16, 31] select the `i -
+/// 16`-th element of `b`.
+///
+/// Note that this is a macro due to the codegen requirements of all of the
+/// index expressions `$i*` must be constant. A compiler error will be
+/// generated if any of the expressions are not constant.
+///
+/// All indexes `$i*` must have the type `u32`.
+#[inline]
+#[cfg_attr(test,
+    assert_instr(
+        i8x16.shuffle,
+        I0 = 0,
+        I1 = 2,
+        I2 = 4,
+        I3 = 6,
+        I4 = 8,
+        I5 = 10,
+        I6 = 12,
+        I7 = 14,
+        I8 = 16,
+        I9 = 18,
+        I10 = 20,
+        I11 = 22,
+        I12 = 24,
+        I13 = 26,
+        I14 = 28,
+        I15 = 30,
+    )
+)]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.shuffle"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_shuffle<
+    const I0: usize,
+    const I1: usize,
+    const I2: usize,
+    const I3: usize,
+    const I4: usize,
+    const I5: usize,
+    const I6: usize,
+    const I7: usize,
+    const I8: usize,
+    const I9: usize,
+    const I10: usize,
+    const I11: usize,
+    const I12: usize,
+    const I13: usize,
+    const I14: usize,
+    const I15: usize,
+>(
+    a: v128,
+    b: v128,
+) -> v128 {
+    static_assert!(I0: usize where I0 < 32);
+    static_assert!(I1: usize where I1 < 32);
+    static_assert!(I2: usize where I2 < 32);
+    static_assert!(I3: usize where I3 < 32);
+    static_assert!(I4: usize where I4 < 32);
+    static_assert!(I5: usize where I5 < 32);
+    static_assert!(I6: usize where I6 < 32);
+    static_assert!(I7: usize where I7 < 32);
+    static_assert!(I8: usize where I8 < 32);
+    static_assert!(I9: usize where I9 < 32);
+    static_assert!(I10: usize where I10 < 32);
+    static_assert!(I11: usize where I11 < 32);
+    static_assert!(I12: usize where I12 < 32);
+    static_assert!(I13: usize where I13 < 32);
+    static_assert!(I14: usize where I14 < 32);
+    static_assert!(I15: usize where I15 < 32);
+    let shuf: simd::u8x16 = unsafe {
+        simd_shuffle16!(
+            a.as_u8x16(),
+            b.as_u8x16(),
+            <
+                const I0: usize,
+                const I1: usize,
+                const I2: usize,
+                const I3: usize,
+                const I4: usize,
+                const I5: usize,
+                const I6: usize,
+                const I7: usize,
+                const I8: usize,
+                const I9: usize,
+                const I10: usize,
+                const I11: usize,
+                const I12: usize,
+                const I13: usize,
+                const I14: usize,
+                const I15: usize,
+            > [
+                I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, I7 as u32,
+                I8 as u32, I9 as u32, I10 as u32, I11 as u32, I12 as u32, I13 as u32, I14 as u32,
+                I15 as u32,
+            ],
+        )
+    };
+    shuf.v128()
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_shuffle as u8x16_shuffle;
+
+/// Same as [`i8x16_shuffle`], except operates as if the inputs were eight
+/// 16-bit integers, only taking 8 indices to shuffle.
+///
+/// Indices in the range [0, 7] select from `a` while [8, 15] select from `b`.
+/// Note that this will generate the `i8x16.shuffle` instruction, since there
+/// is no native `i16x8.shuffle` instruction (there is no need for one since
+/// `i8x16.shuffle` suffices).
+#[inline]
+#[cfg_attr(test,
+    assert_instr(
+        i8x16.shuffle,
+        I0 = 0,
+        I1 = 2,
+        I2 = 4,
+        I3 = 6,
+        I4 = 8,
+        I5 = 10,
+        I6 = 12,
+        I7 = 14,
+    )
+)]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.shuffle"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_shuffle<
+    const I0: usize,
+    const I1: usize,
+    const I2: usize,
+    const I3: usize,
+    const I4: usize,
+    const I5: usize,
+    const I6: usize,
+    const I7: usize,
+>(
+    a: v128,
+    b: v128,
+) -> v128 {
+    static_assert!(I0: usize where I0 < 16);
+    static_assert!(I1: usize where I1 < 16);
+    static_assert!(I2: usize where I2 < 16);
+    static_assert!(I3: usize where I3 < 16);
+    static_assert!(I4: usize where I4 < 16);
+    static_assert!(I5: usize where I5 < 16);
+    static_assert!(I6: usize where I6 < 16);
+    static_assert!(I7: usize where I7 < 16);
+    let shuf: simd::u16x8 = unsafe {
+        simd_shuffle8!(
+            a.as_u16x8(),
+            b.as_u16x8(),
+            <
+                const I0: usize,
+                const I1: usize,
+                const I2: usize,
+                const I3: usize,
+                const I4: usize,
+                const I5: usize,
+                const I6: usize,
+                const I7: usize,
+            > [
+                I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, I7 as u32,
+            ],
+        )
+    };
+    shuf.v128()
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_shuffle as u16x8_shuffle;
+
+/// Same as [`i8x16_shuffle`], except operates as if the inputs were four
+/// 32-bit integers, only taking 4 indices to shuffle.
+///
+/// Indices in the range [0, 3] select from `a` while [4, 7] select from `b`.
+/// Note that this will generate the `i8x16.shuffle` instruction, since there
+/// is no native `i32x4.shuffle` instruction (there is no need for one since
+/// `i8x16.shuffle` suffices).
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.shuffle, I0 = 0, I1 = 2, I2 = 4, I3 = 6))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.shuffle"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_shuffle<const I0: usize, const I1: usize, const I2: usize, const I3: usize>(
+    a: v128,
+    b: v128,
+) -> v128 {
+    static_assert!(I0: usize where I0 < 8);
+    static_assert!(I1: usize where I1 < 8);
+    static_assert!(I2: usize where I2 < 8);
+    static_assert!(I3: usize where I3 < 8);
+    let shuf: simd::u32x4 = unsafe {
+        simd_shuffle4!(
+            a.as_u32x4(),
+            b.as_u32x4(),
+            <const I0: usize, const I1: usize, const I2: usize, const I3: usize> [I0 as u32, I1 as u32, I2 as u32, I3 as u32],
+        )
+    };
+    shuf.v128()
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_shuffle as u32x4_shuffle;
+
+/// Same as [`i8x16_shuffle`], except operates as if the inputs were two
+/// 64-bit integers, only taking 2 indices to shuffle.
+///
+/// Indices in the range [0, 1] select from `a` while [2, 3] select from `b`.
+/// Note that this will generate the `v8x16.shuffle` instruction, since there
+/// is no native `i64x2.shuffle` instruction (there is no need for one since
+/// `i8x16.shuffle` suffices).
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.shuffle, I0 = 0, I1 = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.shuffle"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_shuffle<const I0: usize, const I1: usize>(a: v128, b: v128) -> v128 {
+    static_assert!(I0: usize where I0 < 4);
+    static_assert!(I1: usize where I1 < 4);
+    let shuf: simd::u64x2 = unsafe {
+        simd_shuffle2!(
+            a.as_u64x2(),
+            b.as_u64x2(),
+            <const I0: usize, const I1: usize> [I0 as u32, I1 as u32],
+        )
+    };
+    shuf.v128()
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_shuffle as u64x2_shuffle;
+
+/// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
+///
+/// Extracts the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.extract_lane_s, N = 3))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.extract_lane_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_extract_lane<const N: usize>(a: v128) -> i8 {
+    static_assert!(N: usize where N < 16);
+    unsafe { simd_extract(a.as_i8x16(), N as u32) }
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 16 packed u8 numbers.
+///
+/// Extracts the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.extract_lane_u, N = 3))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.extract_lane_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_extract_lane<const N: usize>(a: v128) -> u8 {
+    static_assert!(N: usize where N < 16);
+    unsafe { simd_extract(a.as_u8x16(), N as u32) }
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.replace_lane, N = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_replace_lane<const N: usize>(a: v128, val: i8) -> v128 {
+    static_assert!(N: usize where N < 16);
+    unsafe { simd_insert(a.as_i8x16(), N as u32, val).v128() }
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 16 packed u8 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.replace_lane, N = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_replace_lane<const N: usize>(a: v128, val: u8) -> v128 {
+    static_assert!(N: usize where N < 16);
+    unsafe { simd_insert(a.as_u8x16(), N as u32, val).v128() }
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
+///
+/// Extracts a the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extract_lane_s, N = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extract_lane_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extract_lane<const N: usize>(a: v128) -> i16 {
+    static_assert!(N: usize where N < 8);
+    unsafe { simd_extract(a.as_i16x8(), N as u32) }
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 8 packed u16 numbers.
+///
+/// Extracts a the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extract_lane_u, N = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extract_lane_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_extract_lane<const N: usize>(a: v128) -> u16 {
+    static_assert!(N: usize where N < 8);
+    unsafe { simd_extract(a.as_u16x8(), N as u32) }
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.replace_lane, N = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_replace_lane<const N: usize>(a: v128, val: i16) -> v128 {
+    static_assert!(N: usize where N < 8);
+    unsafe { simd_insert(a.as_i16x8(), N as u32, val).v128() }
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 8 packed u16 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.replace_lane, N = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_replace_lane<const N: usize>(a: v128, val: u16) -> v128 {
+    static_assert!(N: usize where N < 8);
+    unsafe { simd_insert(a.as_u16x8(), N as u32, val).v128() }
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
+///
+/// Extracts the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extract_lane, N = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extract_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extract_lane<const N: usize>(a: v128) -> i32 {
+    static_assert!(N: usize where N < 4);
+    unsafe { simd_extract(a.as_i32x4(), N as u32) }
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 4 packed u32 numbers.
+///
+/// Extracts the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extract_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_extract_lane<const N: usize>(a: v128) -> u32 {
+    i32x4_extract_lane::<N>(a) as u32
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.replace_lane, N = 2))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_replace_lane<const N: usize>(a: v128, val: i32) -> v128 {
+    static_assert!(N: usize where N < 4);
+    unsafe { simd_insert(a.as_i32x4(), N as u32, val).v128() }
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 4 packed u32 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_replace_lane<const N: usize>(a: v128, val: u32) -> v128 {
+    i32x4_replace_lane::<N>(a, val as i32)
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
+///
+/// Extracts the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extract_lane, N = 1))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extract_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extract_lane<const N: usize>(a: v128) -> i64 {
+    static_assert!(N: usize where N < 2);
+    unsafe { simd_extract(a.as_i64x2(), N as u32) }
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 2 packed u64 numbers.
+///
+/// Extracts the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extract_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u64x2_extract_lane<const N: usize>(a: v128) -> u64 {
+    i64x2_extract_lane::<N>(a) as u64
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.replace_lane, N = 0))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_replace_lane<const N: usize>(a: v128, val: i64) -> v128 {
+    static_assert!(N: usize where N < 2);
+    unsafe { simd_insert(a.as_i64x2(), N as u32, val).v128() }
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 2 packed u64 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u64x2_replace_lane<const N: usize>(a: v128, val: u64) -> v128 {
+    i64x2_replace_lane::<N>(a, val as i64)
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
+///
+/// Extracts the scalar value of lane specified fn the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.extract_lane, N = 1))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.extract_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_extract_lane<const N: usize>(a: v128) -> f32 {
+    static_assert!(N: usize where N < 4);
+    unsafe { simd_extract(a.as_f32x4(), N as u32) }
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers.
+///
+/// Replaces the scalar value of lane specified fn the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.replace_lane, N = 1))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_replace_lane<const N: usize>(a: v128, val: f32) -> v128 {
+    static_assert!(N: usize where N < 4);
+    unsafe { simd_insert(a.as_f32x4(), N as u32, val).v128() }
+}
+
+/// Extracts a lane from a 128-bit vector interpreted as 2 packed f64 numbers.
+///
+/// Extracts the scalar value of lane specified fn the immediate mode operand
+/// `N` from `a`. If `N` fs out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.extract_lane, N = 1))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.extract_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_extract_lane<const N: usize>(a: v128) -> f64 {
+    static_assert!(N: usize where N < 2);
+    unsafe { simd_extract(a.as_f64x2(), N as u32) }
+}
+
+/// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers.
+///
+/// Replaces the scalar value of lane specified in the immediate mode operand
+/// `N` from `a`. If `N` is out of bounds then it is a compile time error.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.replace_lane, N = 1))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.replace_lane"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_replace_lane<const N: usize>(a: v128, val: f64) -> v128 {
+    static_assert!(N: usize where N < 2);
+    unsafe { simd_insert(a.as_f64x2(), N as u32, val).v128() }
+}
+
+/// Returns a new vector with lanes selected from the lanes of the first input
+/// vector `a` specified in the second input vector `s`.
+///
+/// The indices `i` in range [0, 15] select the `i`-th element of `a`. For
+/// indices outside of the range the resulting lane is 0.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.swizzle))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.swizzle"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_swizzle(a: v128, s: v128) -> v128 {
+    unsafe { llvm_swizzle(a.as_i8x16(), s.as_i8x16()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_swizzle as u8x16_swizzle;
+
+/// Creates a vector with identical lanes.
+///
+/// Constructs a vector with `x` replicated to all 16 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_splat(a: i8) -> v128 {
+    simd::i8x16::splat(a).v128()
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Constructs a vector with `x` replicated to all 16 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_splat(a: u8) -> v128 {
+    simd::u8x16::splat(a).v128()
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Construct a vector with `x` replicated to all 8 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_splat(a: i16) -> v128 {
+    simd::i16x8::splat(a).v128()
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Construct a vector with `x` replicated to all 8 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_splat(a: u16) -> v128 {
+    simd::u16x8::splat(a).v128()
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Constructs a vector with `x` replicated to all 4 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_splat(a: i32) -> v128 {
+    simd::i32x4::splat(a).v128()
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Constructs a vector with `x` replicated to all 4 lanes.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_splat(a: u32) -> v128 {
+    i32x4_splat(a as i32)
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Construct a vector with `x` replicated to all 2 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_splat(a: i64) -> v128 {
+    simd::i64x2::splat(a).v128()
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Construct a vector with `x` replicated to all 2 lanes.
+#[inline]
+#[target_feature(enable = "simd128")]
+#[doc(alias("u64x2.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u64x2_splat(a: u64) -> v128 {
+    i64x2_splat(a as i64)
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Constructs a vector with `x` replicated to all 4 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_splat(a: f32) -> v128 {
+    simd::f32x4::splat(a).v128()
+}
+
+/// Creates a vector with identical lanes.
+///
+/// Constructs a vector with `x` replicated to all 2 lanes.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.splat))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.splat"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_splat(a: f64) -> v128 {
+    simd::f64x2::splat(a).v128()
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// integers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.eq))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.eq"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_eq(a: v128, b: v128) -> v128 {
+    unsafe { simd_eq::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// integers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were not equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.ne))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.ne"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_ne(a: v128, b: v128) -> v128 {
+    unsafe { simd_ne::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_eq as u8x16_eq;
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_ne as u8x16_ne;
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.lt_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.lt_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.lt_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.lt_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i8x16>(a.as_u8x16(), b.as_u8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.gt_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.gt_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.gt_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.gt_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i8x16>(a.as_u8x16(), b.as_u8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.le_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.le_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.le_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.le_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i8x16>(a.as_u8x16(), b.as_u8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.ge_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.ge_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i8x16>(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.ge_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.ge_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i8x16>(a.as_u8x16(), b.as_u8x16()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// integers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.eq))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.eq"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_eq(a: v128, b: v128) -> v128 {
+    unsafe { simd_eq::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// integers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were not equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.ne))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.ne"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_ne(a: v128, b: v128) -> v128 {
+    unsafe { simd_ne::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_eq as u16x8_eq;
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_ne as u16x8_ne;
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.lt_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.lt_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.lt_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.lt_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i16x8>(a.as_u16x8(), b.as_u16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.gt_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.gt_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.gt_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.gt_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i16x8>(a.as_u16x8(), b.as_u16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.le_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.le_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.le_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.le_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i16x8>(a.as_u16x8(), b.as_u16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.ge_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.ge_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i16x8>(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.ge_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.ge_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i16x8>(a.as_u16x8(), b.as_u16x8()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// integers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.eq))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.eq"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_eq(a: v128, b: v128) -> v128 {
+    unsafe { simd_eq::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// integers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were not equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.ne))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.ne"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_ne(a: v128, b: v128) -> v128 {
+    unsafe { simd_ne::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_eq as u32x4_eq;
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_ne as u32x4_ne;
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.lt_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.lt_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.lt_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.lt_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i32x4>(a.as_u32x4(), b.as_u32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.gt_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.gt_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.gt_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.gt_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i32x4>(a.as_u32x4(), b.as_u32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.le_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.le_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.le_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.le_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i32x4>(a.as_u32x4(), b.as_u32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.ge_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.ge_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i32x4>(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// unsigned integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.ge_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.ge_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i32x4>(a.as_u32x4(), b.as_u32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// integers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.eq))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.eq"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_eq(a: v128, b: v128) -> v128 {
+    unsafe { simd_eq::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// integers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were not equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.ne))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.ne"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_ne(a: v128, b: v128) -> v128 {
+    unsafe { simd_ne::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_eq as u64x2_eq;
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_ne as u64x2_ne;
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.lt_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.lt_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.gt_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.gt_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.le_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.le_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// signed integers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.ge_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.ge_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i64x2>(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.eq))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.eq"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_eq(a: v128, b: v128) -> v128 {
+    unsafe { simd_eq::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were not equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.ne))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.ne"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_ne(a: v128, b: v128) -> v128 {
+    unsafe { simd_ne::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.lt))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.lt"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.gt))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.gt"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.le))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.le"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.ge))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.ge"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i32x4>(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.eq))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.eq"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_eq(a: v128, b: v128) -> v128 {
+    unsafe { simd_eq::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the corresponding input elements
+/// were not equal, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.ne))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.ne"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_ne(a: v128, b: v128) -> v128 {
+    unsafe { simd_ne::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.lt))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.lt"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_lt(a: v128, b: v128) -> v128 {
+    unsafe { simd_lt::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.gt))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.gt"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_gt(a: v128, b: v128) -> v128 {
+    unsafe { simd_gt::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is less than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.le))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.le"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_le(a: v128, b: v128) -> v128 {
+    unsafe { simd_le::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit
+/// floating point numbers.
+///
+/// Returns a new vector where each lane is all ones if the lane-wise left
+/// element is greater than the right element, or all zeros otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.ge))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.ge"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_ge(a: v128, b: v128) -> v128 {
+    unsafe { simd_ge::<_, simd::i64x2>(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Flips each bit of the 128-bit input vector.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.not))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.not"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn v128_not(a: v128) -> v128 {
+    unsafe { simd_xor(a.as_i64x2(), simd::i64x2(!0, !0)).v128() }
+}
+
+/// Performs a bitwise and of the two input 128-bit vectors, returning the
+/// resulting vector.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.and))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.and"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn v128_and(a: v128, b: v128) -> v128 {
+    unsafe { simd_and(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+/// Bitwise AND of bits of `a` and the logical inverse of bits of `b`.
+///
+/// This operation is equivalent to `v128.and(a, v128.not(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(v128.andnot))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.andnot"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn v128_andnot(a: v128, b: v128) -> v128 {
+    unsafe { simd_and(a.as_i64x2(), simd_xor(b.as_i64x2(), simd::i64x2(-1, -1))).v128() }
+}
+
+/// Performs a bitwise or of the two input 128-bit vectors, returning the
+/// resulting vector.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.or))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.or"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn v128_or(a: v128, b: v128) -> v128 {
+    unsafe { simd_or(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+/// Performs a bitwise xor of the two input 128-bit vectors, returning the
+/// resulting vector.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.xor))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.xor"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn v128_xor(a: v128, b: v128) -> v128 {
+    unsafe { simd_xor(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+/// Use the bitmask in `c` to select bits from `v1` when 1 and `v2` when 0.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.bitselect))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.bitselect"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 {
+    unsafe { llvm_bitselect(v1.as_i8x16(), v2.as_i8x16(), c.as_i8x16()).v128() }
+}
+
+/// Returns `true` if any bit in `a` is set, or `false` otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(v128.any_true))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("v128.any_true"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn v128_any_true(a: v128) -> bool {
+    unsafe { llvm_any_true_i8x16(a.as_i8x16()) != 0 }
+}
+
+/// Lane-wise wrapping absolute value.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.abs))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.abs"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_abs(a: v128) -> v128 {
+    unsafe {
+        let a = a.as_i8x16();
+        let zero = simd::i8x16::splat(0);
+        simd_select::<simd::m8x16, simd::i8x16>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
+    }
+}
+
+/// Negates a 128-bit vectors interpreted as sixteen 8-bit signed integers
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.neg))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.neg"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_neg(a: v128) -> v128 {
+    unsafe { simd_mul(a.as_i8x16(), simd::i8x16::splat(-1)).v128() }
+}
+
+/// Count the number of bits set to one within each lane.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.popcnt))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.popcnt"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_popcnt(v: v128) -> v128 {
+    unsafe { llvm_popcnt(v.as_i8x16()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_popcnt as u8x16_popcnt;
+
+/// Returns true if all lanes are non-zero, false otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.all_true))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.all_true"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_all_true(a: v128) -> bool {
+    unsafe { llvm_i8x16_all_true(a.as_i8x16()) != 0 }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_all_true as u8x16_all_true;
+
+/// Extracts the high bit for each lane in `a` and produce a scalar mask with
+/// all bits concatenated.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.bitmask))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.bitmask"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_bitmask(a: v128) -> u16 {
+    // FIXME(https://bugs.llvm.org/show_bug.cgi?id=50507) - this produces an
+    // extraneous `i32.and` instruction against a mask of 65535 when converting
+    // from the native intrinsic's i32 return value to our desired u16. This
+    // shouldn't be necessary, though, but requires upstream LLVM changes.
+    unsafe { llvm_bitmask_i8x16(a.as_i8x16()) as u16 }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_bitmask as u8x16_bitmask;
+
+/// Converts two input vectors into a smaller lane vector by narrowing each
+/// lane.
+///
+/// Signed saturation to 0x7f or 0x80 is used and the input lanes are always
+/// interpreted as signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.narrow_i16x8_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
+    unsafe { llvm_narrow_i8x16_s(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Converts two input vectors into a smaller lane vector by narrowing each
+/// lane.
+///
+/// Signed saturation to 0x00 or 0xff is used and the input lanes are always
+/// interpreted as signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.narrow_i16x8_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
+    unsafe { llvm_narrow_i8x16_u(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Shifts each lane to the left by the specified number of bits.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.shl))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.shl"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_shl(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_shl as u8x16_shl;
+
+/// Shifts each lane to the right by the specified number of bits, sign
+/// extending.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.shr_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.shr_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() }
+}
+
+/// Shifts each lane to the right by the specified number of bits, shifting in
+/// zeros.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.shr_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.shr_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_shr(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat(amt as u8)).v128() }
+}
+
+/// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.add))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.add"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_add(a: v128, b: v128) -> v128 {
+    unsafe { simd_add(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_add as u8x16_add;
+
+/// Adds two 128-bit vectors as if they were two packed sixteen 8-bit signed
+/// integers, saturating on overflow to `i8::MAX`.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.add_sat_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.add_sat_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_add_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i8x16_add_sat_s(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Adds two 128-bit vectors as if they were two packed sixteen 8-bit unsigned
+/// integers, saturating on overflow to `u8::MAX`.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.add_sat_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.add_sat_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_add_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i8x16_add_sat_u(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.sub))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.sub"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_sub(a: v128, b: v128) -> v128 {
+    unsafe { simd_sub(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i8x16_sub as u8x16_sub;
+
+/// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit
+/// signed integers, saturating on overflow to `i8::MIN`.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.sub_sat_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.sub_sat_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_sub_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i8x16_sub_sat_s(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit
+/// unsigned integers, saturating on overflow to 0.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.sub_sat_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.sub_sat_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_sub_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i8x16_sub_sat_u(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Compares lane-wise signed integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.min_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.min_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_min(a: v128, b: v128) -> v128 {
+    let a = a.as_i8x16();
+    let b = b.as_i8x16();
+    unsafe { simd_select::<simd::i8x16, _>(simd_lt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise unsigned integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.min_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.min_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_min(a: v128, b: v128) -> v128 {
+    let a = a.as_u8x16();
+    let b = b.as_u8x16();
+    unsafe { simd_select::<simd::i8x16, _>(simd_lt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise signed integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.max_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.max_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i8x16_max(a: v128, b: v128) -> v128 {
+    let a = a.as_i8x16();
+    let b = b.as_i8x16();
+    unsafe { simd_select::<simd::i8x16, _>(simd_gt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise unsigned integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.max_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.max_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_max(a: v128, b: v128) -> v128 {
+    let a = a.as_u8x16();
+    let b = b.as_u8x16();
+    unsafe { simd_select::<simd::i8x16, _>(simd_gt(a, b), a, b).v128() }
+}
+
+/// Lane-wise rounding average.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.avgr_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i8x16.avgr_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u8x16_avgr(a: v128, b: v128) -> v128 {
+    unsafe { llvm_avgr_u_i8x16(a.as_i8x16(), b.as_i8x16()).v128() }
+}
+
+/// Integer extended pairwise addition producing extended results
+/// (twice wider results than the inputs).
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extadd_pairwise_i8x16_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extadd_pairwise_i8x16_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extadd_pairwise_i8x16(a: v128) -> v128 {
+    unsafe { llvm_i16x8_extadd_pairwise_i8x16_s(a.as_i8x16()).v128() }
+}
+
+/// Integer extended pairwise addition producing extended results
+/// (twice wider results than the inputs).
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extadd_pairwise_i8x16_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extadd_pairwise_i8x16_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extadd_pairwise_u8x16(a: v128) -> v128 {
+    unsafe { llvm_i16x8_extadd_pairwise_i8x16_u(a.as_i8x16()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_extadd_pairwise_u8x16 as u16x8_extadd_pairwise_u8x16;
+
+/// Lane-wise wrapping absolute value.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.abs))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.abs"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_abs(a: v128) -> v128 {
+    let a = a.as_i16x8();
+    let zero = simd::i16x8::splat(0);
+    unsafe {
+        simd_select::<simd::m16x8, simd::i16x8>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
+    }
+}
+
+/// Negates a 128-bit vectors interpreted as eight 16-bit signed integers
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.neg))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.neg"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_neg(a: v128) -> v128 {
+    unsafe { simd_mul(a.as_i16x8(), simd::i16x8::splat(-1)).v128() }
+}
+
+/// Lane-wise saturating rounding multiplication in Q15 format.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.q15mulr_sat_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.q15mulr_sat_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_q15mulr_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_q15mulr(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Returns true if all lanes are non-zero, false otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.all_true))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.all_true"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_all_true(a: v128) -> bool {
+    unsafe { llvm_i16x8_all_true(a.as_i16x8()) != 0 }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_all_true as u16x8_all_true;
+
+/// Extracts the high bit for each lane in `a` and produce a scalar mask with
+/// all bits concatenated.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.bitmask))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.bitmask"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_bitmask(a: v128) -> u8 {
+    unsafe { llvm_bitmask_i16x8(a.as_i16x8()) as u8 }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_bitmask as u16x8_bitmask;
+
+/// Converts two input vectors into a smaller lane vector by narrowing each
+/// lane.
+///
+/// Signed saturation to 0x7fff or 0x8000 is used and the input lanes are always
+/// interpreted as signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.narrow_i32x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_narrow_i32x4(a: v128, b: v128) -> v128 {
+    unsafe { llvm_narrow_i16x8_s(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+/// Converts two input vectors into a smaller lane vector by narrowing each
+/// lane.
+///
+/// Signed saturation to 0x0000 or 0xffff is used and the input lanes are always
+/// interpreted as signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.narrow_i32x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_narrow_i32x4(a: v128, b: v128) -> v128 {
+    unsafe { llvm_narrow_i16x8_u(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extend_low_i8x16_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extend_low_i8x16_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extend_low_i8x16(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::i8x8, simd::i16x8>(simd_shuffle8!(
+            a.as_i8x16(),
+            a.as_i8x16(),
+            [0, 1, 2, 3, 4, 5, 6, 7],
+        ))
+        .v128()
+    }
+}
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extend_high_i8x16_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extend_high_i8x16_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extend_high_i8x16(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::i8x8, simd::i16x8>(simd_shuffle8!(
+            a.as_i8x16(),
+            a.as_i8x16(),
+            [8, 9, 10, 11, 12, 13, 14, 15],
+        ))
+        .v128()
+    }
+}
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extend_low_i8x16_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extend_low_i8x16_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extend_low_u8x16(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::u8x8, simd::u16x8>(simd_shuffle8!(
+            a.as_u8x16(),
+            a.as_u8x16(),
+            [0, 1, 2, 3, 4, 5, 6, 7],
+        ))
+        .v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_extend_low_u8x16 as u16x8_extend_low_u8x16;
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extend_high_i8x16_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extend_high_i8x16_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extend_high_u8x16(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::u8x8, simd::u16x8>(simd_shuffle8!(
+            a.as_u8x16(),
+            a.as_u8x16(),
+            [8, 9, 10, 11, 12, 13, 14, 15],
+        ))
+        .v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_extend_high_u8x16 as u16x8_extend_high_u8x16;
+
+/// Shifts each lane to the left by the specified number of bits.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.shl))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.shl"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_shl(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_shl as u16x8_shl;
+
+/// Shifts each lane to the right by the specified number of bits, sign
+/// extending.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.shr_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.shr_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() }
+}
+
+/// Shifts each lane to the right by the specified number of bits, shifting in
+/// zeros.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.shr_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.shr_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_shr(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat(amt as u16)).v128() }
+}
+
+/// Adds two 128-bit vectors as if they were two packed eight 16-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.add))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.add"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_add(a: v128, b: v128) -> v128 {
+    unsafe { simd_add(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_add as u16x8_add;
+
+/// Adds two 128-bit vectors as if they were two packed eight 16-bit signed
+/// integers, saturating on overflow to `i16::MAX`.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.add_sat_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.add_sat_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_add_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i16x8_add_sat_s(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Adds two 128-bit vectors as if they were two packed eight 16-bit unsigned
+/// integers, saturating on overflow to `u16::MAX`.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.add_sat_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.add_sat_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_add_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i16x8_add_sat_u(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Subtracts two 128-bit vectors as if they were two packed eight 16-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.sub))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.sub"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_sub(a: v128, b: v128) -> v128 {
+    unsafe { simd_sub(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_sub as u16x8_sub;
+
+/// Subtracts two 128-bit vectors as if they were two packed eight 16-bit
+/// signed integers, saturating on overflow to `i16::MIN`.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.sub_sat_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.sub_sat_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_sub_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i16x8_sub_sat_s(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Subtracts two 128-bit vectors as if they were two packed eight 16-bit
+/// unsigned integers, saturating on overflow to 0.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.sub_sat_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.sub_sat_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_sub_sat(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i16x8_sub_sat_u(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Multiplies two 128-bit vectors as if they were two packed eight 16-bit
+/// signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.mul))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.mul"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_mul(a: v128, b: v128) -> v128 {
+    unsafe { simd_mul(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_mul as u16x8_mul;
+
+/// Compares lane-wise signed integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.min_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.min_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_min(a: v128, b: v128) -> v128 {
+    let a = a.as_i16x8();
+    let b = b.as_i16x8();
+    unsafe { simd_select::<simd::i16x8, _>(simd_lt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise unsigned integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.min_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.min_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_min(a: v128, b: v128) -> v128 {
+    let a = a.as_u16x8();
+    let b = b.as_u16x8();
+    unsafe { simd_select::<simd::i16x8, _>(simd_lt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise signed integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.max_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.max_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_max(a: v128, b: v128) -> v128 {
+    let a = a.as_i16x8();
+    let b = b.as_i16x8();
+    unsafe { simd_select::<simd::i16x8, _>(simd_gt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise unsigned integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.max_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.max_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_max(a: v128, b: v128) -> v128 {
+    let a = a.as_u16x8();
+    let b = b.as_u16x8();
+    unsafe { simd_select::<simd::i16x8, _>(simd_gt(a, b), a, b).v128() }
+}
+
+/// Lane-wise rounding average.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.avgr_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.avgr_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u16x8_avgr(a: v128, b: v128) -> v128 {
+    unsafe { llvm_avgr_u_i16x8(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i16x8_mul(i16x8_extend_low_i8x16(a), i16x8_extend_low_i8x16(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extmul_low_i8x16_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extmul_low_i8x16_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extmul_low_i8x16(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::i8x8, simd::i16x8>(simd_shuffle8!(
+            a.as_i8x16(),
+            a.as_i8x16(),
+            [0, 1, 2, 3, 4, 5, 6, 7],
+        ));
+        let rhs = simd_cast::<simd::i8x8, simd::i16x8>(simd_shuffle8!(
+            b.as_i8x16(),
+            b.as_i8x16(),
+            [0, 1, 2, 3, 4, 5, 6, 7],
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i16x8_mul(i16x8_extend_high_i8x16(a), i16x8_extend_high_i8x16(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extmul_high_i8x16_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extmul_high_i8x16_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extmul_high_i8x16(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::i8x8, simd::i16x8>(simd_shuffle8!(
+            a.as_i8x16(),
+            a.as_i8x16(),
+            [8, 9, 10, 11, 12, 13, 14, 15],
+        ));
+        let rhs = simd_cast::<simd::i8x8, simd::i16x8>(simd_shuffle8!(
+            b.as_i8x16(),
+            b.as_i8x16(),
+            [8, 9, 10, 11, 12, 13, 14, 15],
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i16x8_mul(i16x8_extend_low_u8x16(a), i16x8_extend_low_u8x16(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extmul_low_i8x16_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extmul_low_i8x16_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extmul_low_u8x16(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::u8x8, simd::u16x8>(simd_shuffle8!(
+            a.as_u8x16(),
+            a.as_u8x16(),
+            [0, 1, 2, 3, 4, 5, 6, 7],
+        ));
+        let rhs = simd_cast::<simd::u8x8, simd::u16x8>(simd_shuffle8!(
+            b.as_u8x16(),
+            b.as_u8x16(),
+            [0, 1, 2, 3, 4, 5, 6, 7],
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_extmul_low_u8x16 as u16x8_extmul_low_u8x16;
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i16x8_mul(i16x8_extend_high_u8x16(a), i16x8_extend_high_u8x16(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.extmul_high_i8x16_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i16x8.extmul_high_i8x16_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i16x8_extmul_high_u8x16(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::u8x8, simd::u16x8>(simd_shuffle8!(
+            a.as_u8x16(),
+            a.as_u8x16(),
+            [8, 9, 10, 11, 12, 13, 14, 15],
+        ));
+        let rhs = simd_cast::<simd::u8x8, simd::u16x8>(simd_shuffle8!(
+            b.as_u8x16(),
+            b.as_u8x16(),
+            [8, 9, 10, 11, 12, 13, 14, 15],
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i16x8_extmul_high_u8x16 as u16x8_extmul_high_u8x16;
+
+/// Integer extended pairwise addition producing extended results
+/// (twice wider results than the inputs).
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extadd_pairwise_i16x8_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extadd_pairwise_i16x8_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extadd_pairwise_i16x8(a: v128) -> v128 {
+    unsafe { llvm_i32x4_extadd_pairwise_i16x8_s(a.as_i16x8()).v128() }
+}
+
+/// Integer extended pairwise addition producing extended results
+/// (twice wider results than the inputs).
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extadd_pairwise_i16x8_u))]
+#[doc(alias("i32x4.extadd_pairwise_i16x8_u"))]
+#[target_feature(enable = "simd128")]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extadd_pairwise_u16x8(a: v128) -> v128 {
+    unsafe { llvm_i32x4_extadd_pairwise_i16x8_u(a.as_i16x8()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_extadd_pairwise_u16x8 as u32x4_extadd_pairwise_u16x8;
+
+/// Lane-wise wrapping absolute value.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.abs))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.abs"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_abs(a: v128) -> v128 {
+    let a = a.as_i32x4();
+    let zero = simd::i32x4::splat(0);
+    unsafe {
+        simd_select::<simd::m32x4, simd::i32x4>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
+    }
+}
+
+/// Negates a 128-bit vectors interpreted as four 32-bit signed integers
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.neg))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.neg"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_neg(a: v128) -> v128 {
+    unsafe { simd_mul(a.as_i32x4(), simd::i32x4::splat(-1)).v128() }
+}
+
+/// Returns true if all lanes are non-zero, false otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.all_true))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.all_true"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_all_true(a: v128) -> bool {
+    unsafe { llvm_i32x4_all_true(a.as_i32x4()) != 0 }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_all_true as u32x4_all_true;
+
+/// Extracts the high bit for each lane in `a` and produce a scalar mask with
+/// all bits concatenated.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.bitmask))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.bitmask"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_bitmask(a: v128) -> u8 {
+    unsafe { llvm_bitmask_i32x4(a.as_i32x4()) as u8 }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_bitmask as u32x4_bitmask;
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extend_low_i16x8_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extend_low_i16x8_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extend_low_i16x8(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::i16x4, simd::i32x4>(simd_shuffle4!(
+            a.as_i16x8(),
+            a.as_i16x8(),
+            [0, 1, 2, 3]
+        ))
+        .v128()
+    }
+}
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extend_high_i16x8_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extend_high_i16x8_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extend_high_i16x8(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::i16x4, simd::i32x4>(simd_shuffle4!(
+            a.as_i16x8(),
+            a.as_i16x8(),
+            [4, 5, 6, 7]
+        ))
+        .v128()
+    }
+}
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extend_low_i16x8_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extend_low_i16x8_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extend_low_u16x8(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::u16x4, simd::u32x4>(simd_shuffle4!(
+            a.as_u16x8(),
+            a.as_u16x8(),
+            [0, 1, 2, 3]
+        ))
+        .v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_extend_low_u16x8 as u32x4_extend_low_u16x8;
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extend_high_i16x8_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extend_high_i16x8_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extend_high_u16x8(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::u16x4, simd::u32x4>(simd_shuffle4!(
+            a.as_u16x8(),
+            a.as_u16x8(),
+            [4, 5, 6, 7]
+        ))
+        .v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_extend_high_u16x8 as u32x4_extend_high_u16x8;
+
+/// Shifts each lane to the left by the specified number of bits.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.shl))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.shl"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_shl(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_shl as u32x4_shl;
+
+/// Shifts each lane to the right by the specified number of bits, sign
+/// extending.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.shr_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.shr_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() }
+}
+
+/// Shifts each lane to the right by the specified number of bits, shifting in
+/// zeros.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.shr_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.shr_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_shr(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt as u32)).v128() }
+}
+
+/// Adds two 128-bit vectors as if they were two packed four 32-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.add))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.add"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_add(a: v128, b: v128) -> v128 {
+    unsafe { simd_add(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_add as u32x4_add;
+
+/// Subtracts two 128-bit vectors as if they were two packed four 32-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.sub))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.sub"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_sub(a: v128, b: v128) -> v128 {
+    unsafe { simd_sub(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_sub as u32x4_sub;
+
+/// Multiplies two 128-bit vectors as if they were two packed four 32-bit
+/// signed integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.mul))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.mul"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_mul(a: v128, b: v128) -> v128 {
+    unsafe { simd_mul(a.as_i32x4(), b.as_i32x4()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_mul as u32x4_mul;
+
+/// Compares lane-wise signed integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.min_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.min_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_min(a: v128, b: v128) -> v128 {
+    let a = a.as_i32x4();
+    let b = b.as_i32x4();
+    unsafe { simd_select::<simd::i32x4, _>(simd_lt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise unsigned integers, and returns the minimum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.min_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.min_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_min(a: v128, b: v128) -> v128 {
+    let a = a.as_u32x4();
+    let b = b.as_u32x4();
+    unsafe { simd_select::<simd::i32x4, _>(simd_lt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise signed integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.max_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.max_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_max(a: v128, b: v128) -> v128 {
+    let a = a.as_i32x4();
+    let b = b.as_i32x4();
+    unsafe { simd_select::<simd::i32x4, _>(simd_gt(a, b), a, b).v128() }
+}
+
+/// Compares lane-wise unsigned integers, and returns the maximum of
+/// each pair.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.max_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.max_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_max(a: v128, b: v128) -> v128 {
+    let a = a.as_u32x4();
+    let b = b.as_u32x4();
+    unsafe { simd_select::<simd::i32x4, _>(simd_gt(a, b), a, b).v128() }
+}
+
+/// Lane-wise multiply signed 16-bit integers in the two input vectors and add
+/// adjacent pairs of the full 32-bit results.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.dot_i16x8_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.dot_i16x8_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_dot_i16x8(a: v128, b: v128) -> v128 {
+    unsafe { llvm_i32x4_dot_i16x8_s(a.as_i16x8(), b.as_i16x8()).v128() }
+}
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i32x4_mul(i32x4_extend_low_i16x8_s(a), i32x4_extend_low_i16x8_s(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extmul_low_i16x8_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extmul_low_i16x8_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extmul_low_i16x8(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::i16x4, simd::i32x4>(simd_shuffle4!(
+            a.as_i16x8(),
+            a.as_i16x8(),
+            [0, 1, 2, 3]
+        ));
+        let rhs = simd_cast::<simd::i16x4, simd::i32x4>(simd_shuffle4!(
+            b.as_i16x8(),
+            b.as_i16x8(),
+            [0, 1, 2, 3]
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i32x4_mul(i32x4_extend_high_i16x8_s(a), i32x4_extend_high_i16x8_s(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extmul_high_i16x8_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extmul_high_i16x8_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extmul_high_i16x8(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::i16x4, simd::i32x4>(simd_shuffle4!(
+            a.as_i16x8(),
+            a.as_i16x8(),
+            [4, 5, 6, 7]
+        ));
+        let rhs = simd_cast::<simd::i16x4, simd::i32x4>(simd_shuffle4!(
+            b.as_i16x8(),
+            b.as_i16x8(),
+            [4, 5, 6, 7]
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i32x4_mul(i32x4_extend_low_u16x8(a), i32x4_extend_low_u16x8(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extmul_low_i16x8_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extmul_low_i16x8_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extmul_low_u16x8(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::u16x4, simd::u32x4>(simd_shuffle4!(
+            a.as_u16x8(),
+            a.as_u16x8(),
+            [0, 1, 2, 3]
+        ));
+        let rhs = simd_cast::<simd::u16x4, simd::u32x4>(simd_shuffle4!(
+            b.as_u16x8(),
+            b.as_u16x8(),
+            [0, 1, 2, 3]
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_extmul_low_u16x8 as u32x4_extmul_low_u16x8;
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i32x4_mul(i32x4_extend_high_u16x8(a), i32x4_extend_high_u16x8(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.extmul_high_i16x8_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.extmul_high_i16x8_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_extmul_high_u16x8(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::u16x4, simd::u32x4>(simd_shuffle4!(
+            a.as_u16x8(),
+            a.as_u16x8(),
+            [4, 5, 6, 7]
+        ));
+        let rhs = simd_cast::<simd::u16x4, simd::u32x4>(simd_shuffle4!(
+            b.as_u16x8(),
+            b.as_u16x8(),
+            [4, 5, 6, 7]
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i32x4_extmul_high_u16x8 as u32x4_extmul_high_u16x8;
+
+/// Lane-wise wrapping absolute value.
+#[inline]
+// #[cfg_attr(test, assert_instr(i64x2.abs))] // FIXME llvm
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.abs"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_abs(a: v128) -> v128 {
+    let a = a.as_i64x2();
+    let zero = simd::i64x2::splat(0);
+    unsafe {
+        simd_select::<simd::m64x2, simd::i64x2>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
+    }
+}
+
+/// Negates a 128-bit vectors interpreted as two 64-bit signed integers
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.neg))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.neg"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_neg(a: v128) -> v128 {
+    unsafe { simd_mul(a.as_i64x2(), simd::i64x2::splat(-1)).v128() }
+}
+
+/// Returns true if all lanes are non-zero, false otherwise.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.all_true))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.all_true"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_all_true(a: v128) -> bool {
+    unsafe { llvm_i64x2_all_true(a.as_i64x2()) != 0 }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_all_true as u64x2_all_true;
+
+/// Extracts the high bit for each lane in `a` and produce a scalar mask with
+/// all bits concatenated.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.bitmask))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.bitmask"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_bitmask(a: v128) -> u8 {
+    unsafe { llvm_bitmask_i64x2(a.as_i64x2()) as u8 }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_bitmask as u64x2_bitmask;
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extend_low_i32x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extend_low_i32x4(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::i32x2, simd::i64x2>(simd_shuffle2!(a.as_i32x4(), a.as_i32x4(), [0, 1]))
+            .v128()
+    }
+}
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, sign extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extend_high_i32x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extend_high_i32x4(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::i32x2, simd::i64x2>(simd_shuffle2!(a.as_i32x4(), a.as_i32x4(), [2, 3]))
+            .v128()
+    }
+}
+
+/// Converts low half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extend_low_i32x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extend_low_u32x4(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::u32x2, simd::i64x2>(simd_shuffle2!(a.as_u32x4(), a.as_u32x4(), [0, 1]))
+            .v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_extend_low_u32x4 as u64x2_extend_low_u32x4;
+
+/// Converts high half of the smaller lane vector to a larger lane
+/// vector, zero extended.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extend_high_i32x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extend_high_u32x4(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::u32x2, simd::i64x2>(simd_shuffle2!(a.as_u32x4(), a.as_u32x4(), [2, 3]))
+            .v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_extend_high_u32x4 as u64x2_extend_high_u32x4;
+
+/// Shifts each lane to the left by the specified number of bits.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.shl))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.shl"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_shl(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_shl as u64x2_shl;
+
+/// Shifts each lane to the right by the specified number of bits, sign
+/// extending.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.shr_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.shr_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() }
+}
+
+/// Shifts each lane to the right by the specified number of bits, shifting in
+/// zeros.
+///
+/// Only the low bits of the shift amount are used if the shift amount is
+/// greater than the lane width.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.shr_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.shr_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u64x2_shr(a: v128, amt: u32) -> v128 {
+    unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat(amt as u64)).v128() }
+}
+
+/// Adds two 128-bit vectors as if they were two packed two 64-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.add))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.add"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_add(a: v128, b: v128) -> v128 {
+    unsafe { simd_add(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_add as u64x2_add;
+
+/// Subtracts two 128-bit vectors as if they were two packed two 64-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.sub))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.sub"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_sub(a: v128, b: v128) -> v128 {
+    unsafe { simd_sub(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_sub as u64x2_sub;
+
+/// Multiplies two 128-bit vectors as if they were two packed two 64-bit integers.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.mul))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.mul"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_mul(a: v128, b: v128) -> v128 {
+    unsafe { simd_mul(a.as_i64x2(), b.as_i64x2()).v128() }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_mul as u64x2_mul;
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i64x2_mul(i64x2_extend_low_i32x4_s(a), i64x2_extend_low_i32x4_s(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extmul_low_i32x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extmul_low_i32x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extmul_low_i32x4(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::i32x2, simd::i64x2>(simd_shuffle2!(
+            a.as_i32x4(),
+            a.as_i32x4(),
+            [0, 1]
+        ));
+        let rhs = simd_cast::<simd::i32x2, simd::i64x2>(simd_shuffle2!(
+            b.as_i32x4(),
+            b.as_i32x4(),
+            [0, 1]
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i64x2_mul(i64x2_extend_high_i32x4_s(a), i64x2_extend_high_i32x4_s(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extmul_high_i32x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extmul_high_i32x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extmul_high_i32x4(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::i32x2, simd::i64x2>(simd_shuffle2!(
+            a.as_i32x4(),
+            a.as_i32x4(),
+            [2, 3]
+        ));
+        let rhs = simd_cast::<simd::i32x2, simd::i64x2>(simd_shuffle2!(
+            b.as_i32x4(),
+            b.as_i32x4(),
+            [2, 3]
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i64x2_mul(i64x2_extend_low_i32x4_u(a), i64x2_extend_low_i32x4_u(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extmul_low_i32x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extmul_low_i32x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extmul_low_u32x4(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::u32x2, simd::u64x2>(simd_shuffle2!(
+            a.as_u32x4(),
+            a.as_u32x4(),
+            [0, 1]
+        ));
+        let rhs = simd_cast::<simd::u32x2, simd::u64x2>(simd_shuffle2!(
+            b.as_u32x4(),
+            b.as_u32x4(),
+            [0, 1]
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_extmul_low_u32x4 as u64x2_extmul_low_u32x4;
+
+/// Lane-wise integer extended multiplication producing twice wider result than
+/// the inputs.
+///
+/// Equivalent of `i64x2_mul(i64x2_extend_high_i32x4_u(a), i64x2_extend_high_i32x4_u(b))`
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.extmul_high_i32x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i64x2.extmul_high_i32x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i64x2_extmul_high_u32x4(a: v128, b: v128) -> v128 {
+    unsafe {
+        let lhs = simd_cast::<simd::u32x2, simd::u64x2>(simd_shuffle2!(
+            a.as_u32x4(),
+            a.as_u32x4(),
+            [2, 3]
+        ));
+        let rhs = simd_cast::<simd::u32x2, simd::u64x2>(simd_shuffle2!(
+            b.as_u32x4(),
+            b.as_u32x4(),
+            [2, 3]
+        ));
+        simd_mul(lhs, rhs).v128()
+    }
+}
+
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub use i64x2_extmul_high_u32x4 as u64x2_extmul_high_u32x4;
+
+/// Lane-wise rounding to the nearest integral value not smaller than the input.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.ceil))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.ceil"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_ceil(a: v128) -> v128 {
+    unsafe { llvm_f32x4_ceil(a.as_f32x4()).v128() }
+}
+
+/// Lane-wise rounding to the nearest integral value not greater than the input.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.floor))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.floor"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_floor(a: v128) -> v128 {
+    unsafe { llvm_f32x4_floor(a.as_f32x4()).v128() }
+}
+
+/// Lane-wise rounding to the nearest integral value with the magnitude not
+/// larger than the input.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.trunc))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.trunc"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_trunc(a: v128) -> v128 {
+    unsafe { llvm_f32x4_trunc(a.as_f32x4()).v128() }
+}
+
+/// Lane-wise rounding to the nearest integral value; if two values are equally
+/// near, rounds to the even one.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.nearest))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.nearest"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_nearest(a: v128) -> v128 {
+    unsafe { llvm_f32x4_nearest(a.as_f32x4()).v128() }
+}
+
+/// Calculates the absolute value of each lane of a 128-bit vector interpreted
+/// as four 32-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.abs))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.abs"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_abs(a: v128) -> v128 {
+    unsafe { llvm_f32x4_abs(a.as_f32x4()).v128() }
+}
+
+/// Negates each lane of a 128-bit vector interpreted as four 32-bit floating
+/// point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.neg))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.neg"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_neg(a: v128) -> v128 {
+    f32x4_mul(a, f32x4_splat(-1.))
+}
+
+/// Calculates the square root of each lane of a 128-bit vector interpreted as
+/// four 32-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.sqrt))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.sqrt"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_sqrt(a: v128) -> v128 {
+    unsafe { llvm_f32x4_sqrt(a.as_f32x4()).v128() }
+}
+
+/// Lane-wise addition of two 128-bit vectors interpreted as four 32-bit
+/// floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.add))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.add"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_add(a: v128, b: v128) -> v128 {
+    unsafe { simd_add(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Lane-wise subtraction of two 128-bit vectors interpreted as four 32-bit
+/// floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.sub))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.sub"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_sub(a: v128, b: v128) -> v128 {
+    unsafe { simd_sub(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Lane-wise multiplication of two 128-bit vectors interpreted as four 32-bit
+/// floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.mul))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.mul"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_mul(a: v128, b: v128) -> v128 {
+    unsafe { simd_mul(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Lane-wise division of two 128-bit vectors interpreted as four 32-bit
+/// floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.div))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.div"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_div(a: v128, b: v128) -> v128 {
+    unsafe { simd_div(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Calculates the lane-wise minimum of two 128-bit vectors interpreted
+/// as four 32-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.min))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.min"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_min(a: v128, b: v128) -> v128 {
+    unsafe { llvm_f32x4_min(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Calculates the lane-wise minimum of two 128-bit vectors interpreted
+/// as four 32-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.max))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.max"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_max(a: v128, b: v128) -> v128 {
+    unsafe { llvm_f32x4_max(a.as_f32x4(), b.as_f32x4()).v128() }
+}
+
+/// Lane-wise minimum value, defined as `b < a ? b : a`
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.pmin))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.pmin"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_pmin(a: v128, b: v128) -> v128 {
+    unsafe {
+        simd_select::<simd::m32x4, simd::f32x4>(
+            simd_lt(b.as_f32x4(), a.as_f32x4()),
+            b.as_f32x4(),
+            a.as_f32x4(),
+        )
+        .v128()
+    }
+}
+
+/// Lane-wise maximum value, defined as `a < b ? b : a`
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.pmax))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.pmax"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_pmax(a: v128, b: v128) -> v128 {
+    unsafe {
+        simd_select::<simd::m32x4, simd::f32x4>(
+            simd_lt(a.as_f32x4(), b.as_f32x4()),
+            b.as_f32x4(),
+            a.as_f32x4(),
+        )
+        .v128()
+    }
+}
+
+/// Lane-wise rounding to the nearest integral value not smaller than the input.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.ceil))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.ceil"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_ceil(a: v128) -> v128 {
+    unsafe { llvm_f64x2_ceil(a.as_f64x2()).v128() }
+}
+
+/// Lane-wise rounding to the nearest integral value not greater than the input.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.floor))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.floor"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_floor(a: v128) -> v128 {
+    unsafe { llvm_f64x2_floor(a.as_f64x2()).v128() }
+}
+
+/// Lane-wise rounding to the nearest integral value with the magnitude not
+/// larger than the input.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.trunc))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.trunc"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_trunc(a: v128) -> v128 {
+    unsafe { llvm_f64x2_trunc(a.as_f64x2()).v128() }
+}
+
+/// Lane-wise rounding to the nearest integral value; if two values are equally
+/// near, rounds to the even one.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.nearest))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.nearest"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_nearest(a: v128) -> v128 {
+    unsafe { llvm_f64x2_nearest(a.as_f64x2()).v128() }
+}
+
+/// Calculates the absolute value of each lane of a 128-bit vector interpreted
+/// as two 64-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.abs))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.abs"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_abs(a: v128) -> v128 {
+    unsafe { llvm_f64x2_abs(a.as_f64x2()).v128() }
+}
+
+/// Negates each lane of a 128-bit vector interpreted as two 64-bit floating
+/// point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.neg))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.neg"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_neg(a: v128) -> v128 {
+    f64x2_mul(a, f64x2_splat(-1.0))
+}
+
+/// Calculates the square root of each lane of a 128-bit vector interpreted as
+/// two 64-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.sqrt))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.sqrt"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_sqrt(a: v128) -> v128 {
+    unsafe { llvm_f64x2_sqrt(a.as_f64x2()).v128() }
+}
+
+/// Lane-wise add of two 128-bit vectors interpreted as two 64-bit
+/// floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.add))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.add"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_add(a: v128, b: v128) -> v128 {
+    unsafe { simd_add(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Lane-wise subtract of two 128-bit vectors interpreted as two 64-bit
+/// floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.sub))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.sub"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_sub(a: v128, b: v128) -> v128 {
+    unsafe { simd_sub(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Lane-wise multiply of two 128-bit vectors interpreted as two 64-bit
+/// floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.mul))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.mul"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_mul(a: v128, b: v128) -> v128 {
+    unsafe { simd_mul(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Lane-wise divide of two 128-bit vectors interpreted as two 64-bit
+/// floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.div))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.div"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_div(a: v128, b: v128) -> v128 {
+    unsafe { simd_div(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Calculates the lane-wise minimum of two 128-bit vectors interpreted
+/// as two 64-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.min))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.min"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_min(a: v128, b: v128) -> v128 {
+    unsafe { llvm_f64x2_min(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Calculates the lane-wise maximum of two 128-bit vectors interpreted
+/// as two 64-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.max))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.max"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_max(a: v128, b: v128) -> v128 {
+    unsafe { llvm_f64x2_max(a.as_f64x2(), b.as_f64x2()).v128() }
+}
+
+/// Lane-wise minimum value, defined as `b < a ? b : a`
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.pmin))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.pmin"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_pmin(a: v128, b: v128) -> v128 {
+    unsafe {
+        simd_select::<simd::m64x2, simd::f64x2>(
+            simd_lt(b.as_f64x2(), a.as_f64x2()),
+            b.as_f64x2(),
+            a.as_f64x2(),
+        )
+        .v128()
+    }
+}
+
+/// Lane-wise maximum value, defined as `a < b ? b : a`
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.pmax))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.pmax"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_pmax(a: v128, b: v128) -> v128 {
+    unsafe {
+        simd_select::<simd::m64x2, simd::f64x2>(
+            simd_lt(a.as_f64x2(), b.as_f64x2()),
+            b.as_f64x2(),
+            a.as_f64x2(),
+        )
+        .v128()
+    }
+}
+
+/// Converts a 128-bit vector interpreted as four 32-bit floating point numbers
+/// into a 128-bit vector of four 32-bit signed integers.
+///
+/// NaN is converted to 0 and if it's out of bounds it becomes the nearest
+/// representable intger.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.trunc_sat_f32x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.trunc_sat_f32x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_trunc_sat_f32x4(a: v128) -> v128 {
+    unsafe { llvm_i32x4_trunc_sat_f32x4_s(a.as_f32x4()).v128() }
+}
+
+/// Converts a 128-bit vector interpreted as four 32-bit floating point numbers
+/// into a 128-bit vector of four 32-bit unsigned integers.
+///
+/// NaN is converted to 0 and if it's out of bounds it becomes the nearest
+/// representable intger.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.trunc_sat_f32x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.trunc_sat_f32x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_trunc_sat_f32x4(a: v128) -> v128 {
+    unsafe { llvm_i32x4_trunc_sat_f32x4_u(a.as_f32x4()).v128() }
+}
+
+/// Converts a 128-bit vector interpreted as four 32-bit signed integers into a
+/// 128-bit vector of four 32-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.convert_i32x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.convert_i32x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_convert_i32x4(a: v128) -> v128 {
+    unsafe { simd_cast::<_, simd::f32x4>(a.as_i32x4()).v128() }
+}
+
+/// Converts a 128-bit vector interpreted as four 32-bit unsigned integers into a
+/// 128-bit vector of four 32-bit floating point numbers.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.convert_i32x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.convert_i32x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_convert_u32x4(a: v128) -> v128 {
+    unsafe { simd_cast::<_, simd::f32x4>(a.as_u32x4()).v128() }
+}
+
+/// Saturating conversion of the two double-precision floating point lanes to
+/// two lower integer lanes using the IEEE `convertToIntegerTowardZero`
+/// function.
+///
+/// The two higher lanes of the result are initialized to zero. If any input
+/// lane is a NaN, the resulting lane is 0. If the rounded integer value of a
+/// lane is outside the range of the destination type, the result is saturated
+/// to the nearest representable integer value.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_s_zero))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.trunc_sat_f64x2_s_zero"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
+    let ret: simd::i32x4 = unsafe {
+        simd_shuffle4!(
+            llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()),
+            simd::i32x2::splat(0),
+            [0, 1, 2, 3],
+        )
+    };
+    ret.v128()
+}
+
+/// Saturating conversion of the two double-precision floating point lanes to
+/// two lower integer lanes using the IEEE `convertToIntegerTowardZero`
+/// function.
+///
+/// The two higher lanes of the result are initialized to zero. If any input
+/// lane is a NaN, the resulting lane is 0. If the rounded integer value of a
+/// lane is outside the range of the destination type, the result is saturated
+/// to the nearest representable integer value.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_u_zero))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("i32x4.trunc_sat_f64x2_u_zero"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
+    let ret: simd::i32x4 = unsafe {
+        simd_shuffle4!(
+            llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()),
+            simd::i32x2::splat(0),
+            [0, 1, 2, 3],
+        )
+    };
+    ret.v128()
+}
+
+/// Lane-wise conversion from integer to floating point.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_s))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.convert_low_i32x4_s"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_convert_low_i32x4(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::i32x2, simd::f64x2>(simd_shuffle2!(a.as_i32x4(), a.as_i32x4(), [0, 1],))
+            .v128()
+    }
+}
+
+/// Lane-wise conversion from integer to floating point.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_u))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f64x2.convert_low_i32x4_u"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_convert_low_u32x4(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::u32x2, simd::f64x2>(simd_shuffle2!(a.as_u32x4(), a.as_u32x4(), [0, 1],))
+            .v128()
+    }
+}
+
+/// Conversion of the two double-precision floating point lanes to two lower
+/// single-precision lanes of the result. The two higher lanes of the result are
+/// initialized to zero. If the conversion result is not representable as a
+/// single-precision floating point number, it is rounded to the nearest-even
+/// representable number.
+#[inline]
+#[cfg_attr(test, assert_instr(f32x4.demote_f64x2_zero))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.demote_f64x2_zero"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f32x4_demote_f64x2_zero(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::f64x4, simd::f32x4>(simd_shuffle4!(
+            a.as_f64x2(),
+            simd::f64x2::splat(0.0),
+            [0, 1, 2, 3]
+        ))
+        .v128()
+    }
+}
+
+/// Conversion of the two lower single-precision floating point lanes to the two
+/// double-precision lanes of the result.
+#[inline]
+#[cfg_attr(test, assert_instr(f64x2.promote_low_f32x4))]
+#[target_feature(enable = "simd128")]
+#[doc(alias("f32x4.promote_low_f32x4"))]
+#[stable(feature = "wasm_simd", since = "1.54.0")]
+pub fn f64x2_promote_low_f32x4(a: v128) -> v128 {
+    unsafe {
+        simd_cast::<simd::f32x2, simd::f64x2>(simd_shuffle2!(a.as_f32x4(), a.as_f32x4(), [0, 1]))
+            .v128()
+    }
+}
+
+#[cfg(test)]
+pub mod tests {
+    use super::*;
+    use core::ops::{Add, Div, Mul, Neg, Sub};
+    use std;
+    use std::fmt::Debug;
+    use std::mem::transmute;
+    use std::num::Wrapping;
+    use std::prelude::v1::*;
+
+    fn compare_bytes(a: v128, b: v128) {
+        let a: [u8; 16] = unsafe { transmute(a) };
+        let b: [u8; 16] = unsafe { transmute(b) };
+        assert_eq!(a, b);
+    }
+
+    #[test]
+    fn test_load() {
+        unsafe {
+            let arr: [i32; 4] = [0, 1, 2, 3];
+            let vec = v128_load(arr.as_ptr() as *const v128);
+            compare_bytes(vec, i32x4(0, 1, 2, 3));
+        }
+    }
+
+    #[test]
+    fn test_load_extend() {
+        unsafe {
+            let arr: [i8; 8] = [-3, -2, -1, 0, 1, 2, 3, 4];
+            let vec = i16x8_load_extend_i8x8(arr.as_ptr());
+            compare_bytes(vec, i16x8(-3, -2, -1, 0, 1, 2, 3, 4));
+            let vec = i16x8_load_extend_u8x8(arr.as_ptr() as *const u8);
+            compare_bytes(vec, i16x8(253, 254, 255, 0, 1, 2, 3, 4));
+
+            let arr: [i16; 4] = [-1, 0, 1, 2];
+            let vec = i32x4_load_extend_i16x4(arr.as_ptr());
+            compare_bytes(vec, i32x4(-1, 0, 1, 2));
+            let vec = i32x4_load_extend_u16x4(arr.as_ptr() as *const u16);
+            compare_bytes(vec, i32x4(65535, 0, 1, 2));
+
+            let arr: [i32; 2] = [-1, 1];
+            let vec = i64x2_load_extend_i32x2(arr.as_ptr());
+            compare_bytes(vec, i64x2(-1, 1));
+            let vec = i64x2_load_extend_u32x2(arr.as_ptr() as *const u32);
+            compare_bytes(vec, i64x2(u32::max_value().into(), 1));
+        }
+    }
+
+    #[test]
+    fn test_load_splat() {
+        unsafe {
+            compare_bytes(v128_load8_splat(&8), i8x16_splat(8));
+            compare_bytes(v128_load16_splat(&9), i16x8_splat(9));
+            compare_bytes(v128_load32_splat(&10), i32x4_splat(10));
+            compare_bytes(v128_load64_splat(&11), i64x2_splat(11));
+        }
+    }
+
+    #[test]
+    fn test_load_zero() {
+        unsafe {
+            compare_bytes(v128_load32_zero(&10), i32x4(10, 0, 0, 0));
+            compare_bytes(v128_load64_zero(&11), i64x2(11, 0));
+        }
+    }
+
+    #[test]
+    fn test_store() {
+        unsafe {
+            let mut spot = i8x16_splat(0);
+            v128_store(&mut spot, i8x16_splat(1));
+            compare_bytes(spot, i8x16_splat(1));
+        }
+    }
+
+    #[test]
+    fn test_load_lane() {
+        unsafe {
+            let zero = i8x16_splat(0);
+            compare_bytes(
+                v128_load8_lane::<2>(zero, &1),
+                i8x16_replace_lane::<2>(zero, 1),
+            );
+
+            compare_bytes(
+                v128_load16_lane::<2>(zero, &1),
+                i16x8_replace_lane::<2>(zero, 1),
+            );
+
+            compare_bytes(
+                v128_load32_lane::<2>(zero, &1),
+                i32x4_replace_lane::<2>(zero, 1),
+            );
+
+            compare_bytes(
+                v128_load64_lane::<1>(zero, &1),
+                i64x2_replace_lane::<1>(zero, 1),
+            );
+        }
+    }
+
+    #[test]
+    fn test_store_lane() {
+        unsafe {
+            let mut spot = 0;
+            let zero = i8x16_splat(0);
+            v128_store8_lane::<5>(i8x16_replace_lane::<5>(zero, 7), &mut spot);
+            assert_eq!(spot, 7);
+
+            let mut spot = 0;
+            v128_store16_lane::<5>(i16x8_replace_lane::<5>(zero, 7), &mut spot);
+            assert_eq!(spot, 7);
+
+            let mut spot = 0;
+            v128_store32_lane::<3>(i32x4_replace_lane::<3>(zero, 7), &mut spot);
+            assert_eq!(spot, 7);
+
+            let mut spot = 0;
+            v128_store64_lane::<0>(i64x2_replace_lane::<0>(zero, 7), &mut spot);
+            assert_eq!(spot, 7);
+        }
+    }
+
+    #[test]
+    fn test_i8x16() {
+        const A: v128 = super::i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        compare_bytes(A, A);
+
+        const _: v128 = i16x8(0, 1, 2, 3, 4, 5, 6, 7);
+        const _: v128 = i32x4(0, 1, 2, 3);
+        const _: v128 = i64x2(0, 1);
+        const _: v128 = f32x4(0., 1., 2., 3.);
+        const _: v128 = f64x2(0., 1.);
+
+        let bytes: [i16; 8] = unsafe { mem::transmute(i16x8(-1, -2, -3, -4, -5, -6, -7, -8)) };
+        assert_eq!(bytes, [-1, -2, -3, -4, -5, -6, -7, -8]);
+        let bytes: [i8; 16] = unsafe {
+            mem::transmute(i8x16(
+                -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+            ))
+        };
+        assert_eq!(
+            bytes,
+            [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16]
+        );
+    }
+
+    #[test]
+    fn test_shuffle() {
+        let vec_a = i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let vec_b = i8x16(
+            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+        );
+
+        let vec_r = i8x16_shuffle::<0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30>(
+            vec_a, vec_b,
+        );
+        let vec_e = i8x16(0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30);
+        compare_bytes(vec_r, vec_e);
+
+        let vec_a = i16x8(0, 1, 2, 3, 4, 5, 6, 7);
+        let vec_b = i16x8(8, 9, 10, 11, 12, 13, 14, 15);
+        let vec_r = i16x8_shuffle::<0, 8, 2, 10, 4, 12, 6, 14>(vec_a, vec_b);
+        let vec_e = i16x8(0, 8, 2, 10, 4, 12, 6, 14);
+        compare_bytes(vec_r, vec_e);
+
+        let vec_a = i32x4(0, 1, 2, 3);
+        let vec_b = i32x4(4, 5, 6, 7);
+        let vec_r = i32x4_shuffle::<0, 4, 2, 6>(vec_a, vec_b);
+        let vec_e = i32x4(0, 4, 2, 6);
+        compare_bytes(vec_r, vec_e);
+
+        let vec_a = i64x2(0, 1);
+        let vec_b = i64x2(2, 3);
+        let vec_r = i64x2_shuffle::<0, 2>(vec_a, vec_b);
+        let vec_e = i64x2(0, 2);
+        compare_bytes(vec_r, vec_e);
+    }
+
+    // tests extract and replace lanes
+    macro_rules! test_extract {
+        (
+            name: $test_id:ident,
+            extract: $extract:ident,
+            replace: $replace:ident,
+            elem: $elem:ty,
+            count: $count:expr,
+            indices: [$($idx:expr),*],
+        ) => {
+            #[test]
+            fn $test_id() {
+                unsafe {
+                    let arr: [$elem; $count] = [123 as $elem; $count];
+                    let vec: v128 = transmute(arr);
+                    $(
+                        assert_eq!($extract::<$idx>(vec), 123 as $elem);
+                    )*
+
+                    // create a vector from array and check that the indices contain
+                    // the same values as in the array:
+                    let arr: [$elem; $count] = [$($idx as $elem),*];
+                    let vec: v128 = transmute(arr);
+                    $(
+                        assert_eq!($extract::<$idx>(vec), $idx as $elem);
+
+                        let tmp = $replace::<$idx>(vec, 124 as $elem);
+                        assert_eq!($extract::<$idx>(tmp), 124 as $elem);
+                    )*
+                }
+            }
+        }
+    }
+
+    test_extract! {
+        name: test_i8x16_extract_replace,
+        extract: i8x16_extract_lane,
+        replace: i8x16_replace_lane,
+        elem: i8,
+        count: 16,
+        indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+    }
+    test_extract! {
+        name: test_i16x8_extract_replace,
+        extract: i16x8_extract_lane,
+        replace: i16x8_replace_lane,
+        elem: i16,
+        count: 8,
+        indices: [0, 1, 2, 3, 4, 5, 6, 7],
+    }
+    test_extract! {
+        name: test_i32x4_extract_replace,
+        extract: i32x4_extract_lane,
+        replace: i32x4_replace_lane,
+        elem: i32,
+        count: 4,
+        indices: [0, 1, 2, 3],
+    }
+    test_extract! {
+        name: test_i64x2_extract_replace,
+        extract: i64x2_extract_lane,
+        replace: i64x2_replace_lane,
+        elem: i64,
+        count: 2,
+        indices: [0, 1],
+    }
+    test_extract! {
+        name: test_f32x4_extract_replace,
+        extract: f32x4_extract_lane,
+        replace: f32x4_replace_lane,
+        elem: f32,
+        count: 4,
+        indices: [0, 1, 2, 3],
+    }
+    test_extract! {
+        name: test_f64x2_extract_replace,
+        extract: f64x2_extract_lane,
+        replace: f64x2_replace_lane,
+        elem: f64,
+        count: 2,
+        indices: [0, 1],
+    }
+
+    #[test]
+    #[rustfmt::skip]
+    fn test_swizzle() {
+        compare_bytes(
+            i8x16_swizzle(
+                i32x4(1, 2, 3, 4),
+                i8x16(
+                    32, 31, 30, 29,
+                    0, 1, 2, 3,
+                    12, 13, 14, 15,
+                    0, 4, 8, 12),
+            ),
+            i32x4(0, 1, 4, 0x04030201),
+        );
+    }
+
+    macro_rules! test_splat {
+        ($test_id:ident: $val:expr => $($vals:expr),*) => {
+            #[test]
+            fn $test_id() {
+                let a = super::$test_id($val);
+                let b = u8x16($($vals as u8),*);
+                compare_bytes(a, b);
+            }
+        }
+    }
+
+    mod splats {
+        use super::*;
+        test_splat!(i8x16_splat: 42 => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42);
+        test_splat!(i16x8_splat: 42 => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0);
+        test_splat!(i32x4_splat: 42 => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0);
+        test_splat!(i64x2_splat: 42 => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0);
+        test_splat!(f32x4_splat: 42. => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66);
+        test_splat!(f64x2_splat: 42. => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64);
+    }
+
+    #[test]
+    fn test_bitmasks() {
+        let zero = i8x16_splat(0);
+        let ones = i8x16_splat(!0);
+
+        assert_eq!(i8x16_bitmask(zero), 0);
+        assert_eq!(i8x16_bitmask(ones), 0xffff);
+        assert_eq!(i8x16_bitmask(i8x16_splat(i8::MAX)), 0);
+        assert_eq!(i8x16_bitmask(i8x16_splat(i8::MIN)), 0xffff);
+        assert_eq!(i8x16_bitmask(i8x16_replace_lane::<1>(zero, -1)), 0b10);
+
+        assert_eq!(i16x8_bitmask(zero), 0);
+        assert_eq!(i16x8_bitmask(ones), 0xff);
+        assert_eq!(i16x8_bitmask(i16x8_splat(i16::MAX)), 0);
+        assert_eq!(i16x8_bitmask(i16x8_splat(i16::MIN)), 0xff);
+        assert_eq!(i16x8_bitmask(i16x8_replace_lane::<1>(zero, -1)), 0b10);
+
+        assert_eq!(i32x4_bitmask(zero), 0);
+        assert_eq!(i32x4_bitmask(ones), 0b1111);
+        assert_eq!(i32x4_bitmask(i32x4_splat(i32::MAX)), 0);
+        assert_eq!(i32x4_bitmask(i32x4_splat(i32::MIN)), 0b1111);
+        assert_eq!(i32x4_bitmask(i32x4_replace_lane::<1>(zero, -1)), 0b10);
+
+        assert_eq!(i64x2_bitmask(zero), 0);
+        assert_eq!(i64x2_bitmask(ones), 0b11);
+        assert_eq!(i64x2_bitmask(i64x2_splat(i64::MAX)), 0);
+        assert_eq!(i64x2_bitmask(i64x2_splat(i64::MIN)), 0b11);
+        assert_eq!(i64x2_bitmask(i64x2_replace_lane::<1>(zero, -1)), 0b10);
+    }
+
+    #[test]
+    fn test_narrow() {
+        let zero = i8x16_splat(0);
+        let ones = i8x16_splat(!0);
+
+        compare_bytes(i8x16_narrow_i16x8(zero, zero), zero);
+        compare_bytes(u8x16_narrow_i16x8(zero, zero), zero);
+        compare_bytes(i8x16_narrow_i16x8(ones, ones), ones);
+        compare_bytes(u8x16_narrow_i16x8(ones, ones), zero);
+
+        compare_bytes(
+            i8x16_narrow_i16x8(
+                i16x8(
+                    0,
+                    1,
+                    2,
+                    -1,
+                    i8::MIN.into(),
+                    i8::MAX.into(),
+                    u8::MIN.into(),
+                    u8::MAX.into(),
+                ),
+                i16x8(
+                    i16::MIN.into(),
+                    i16::MAX.into(),
+                    u16::MIN as i16,
+                    u16::MAX as i16,
+                    0,
+                    0,
+                    0,
+                    0,
+                ),
+            ),
+            i8x16(0, 1, 2, -1, -128, 127, 0, 127, -128, 127, 0, -1, 0, 0, 0, 0),
+        );
+
+        compare_bytes(
+            u8x16_narrow_i16x8(
+                i16x8(
+                    0,
+                    1,
+                    2,
+                    -1,
+                    i8::MIN.into(),
+                    i8::MAX.into(),
+                    u8::MIN.into(),
+                    u8::MAX.into(),
+                ),
+                i16x8(
+                    i16::MIN.into(),
+                    i16::MAX.into(),
+                    u16::MIN as i16,
+                    u16::MAX as i16,
+                    0,
+                    0,
+                    0,
+                    0,
+                ),
+            ),
+            i8x16(0, 1, 2, 0, 0, 127, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0),
+        );
+
+        compare_bytes(i16x8_narrow_i32x4(zero, zero), zero);
+        compare_bytes(u16x8_narrow_i32x4(zero, zero), zero);
+        compare_bytes(i16x8_narrow_i32x4(ones, ones), ones);
+        compare_bytes(u16x8_narrow_i32x4(ones, ones), zero);
+
+        compare_bytes(
+            i16x8_narrow_i32x4(
+                i32x4(0, -1, i16::MIN.into(), i16::MAX.into()),
+                i32x4(
+                    i32::MIN.into(),
+                    i32::MAX.into(),
+                    u32::MIN as i32,
+                    u32::MAX as i32,
+                ),
+            ),
+            i16x8(0, -1, i16::MIN, i16::MAX, i16::MIN, i16::MAX, 0, -1),
+        );
+
+        compare_bytes(
+            u16x8_narrow_i32x4(
+                i32x4(u16::MAX.into(), -1, i16::MIN.into(), i16::MAX.into()),
+                i32x4(
+                    i32::MIN.into(),
+                    i32::MAX.into(),
+                    u32::MIN as i32,
+                    u32::MAX as i32,
+                ),
+            ),
+            i16x8(-1, 0, 0, i16::MAX, 0, -1, 0, 0),
+        );
+    }
+
+    #[test]
+    fn test_extend() {
+        let zero = i8x16_splat(0);
+        let ones = i8x16_splat(!0);
+
+        compare_bytes(i16x8_extend_low_i8x16(zero), zero);
+        compare_bytes(i16x8_extend_high_i8x16(zero), zero);
+        compare_bytes(i16x8_extend_low_u8x16(zero), zero);
+        compare_bytes(i16x8_extend_high_u8x16(zero), zero);
+        compare_bytes(i16x8_extend_low_i8x16(ones), ones);
+        compare_bytes(i16x8_extend_high_i8x16(ones), ones);
+        let halves = u16x8_splat(u8::MAX.into());
+        compare_bytes(i16x8_extend_low_u8x16(ones), halves);
+        compare_bytes(i16x8_extend_high_u8x16(ones), halves);
+
+        compare_bytes(i32x4_extend_low_i16x8(zero), zero);
+        compare_bytes(i32x4_extend_high_i16x8(zero), zero);
+        compare_bytes(i32x4_extend_low_u16x8(zero), zero);
+        compare_bytes(i32x4_extend_high_u16x8(zero), zero);
+        compare_bytes(i32x4_extend_low_i16x8(ones), ones);
+        compare_bytes(i32x4_extend_high_i16x8(ones), ones);
+        let halves = u32x4_splat(u16::MAX.into());
+        compare_bytes(i32x4_extend_low_u16x8(ones), halves);
+        compare_bytes(i32x4_extend_high_u16x8(ones), halves);
+
+        compare_bytes(i64x2_extend_low_i32x4(zero), zero);
+        compare_bytes(i64x2_extend_high_i32x4(zero), zero);
+        compare_bytes(i64x2_extend_low_u32x4(zero), zero);
+        compare_bytes(i64x2_extend_high_u32x4(zero), zero);
+        compare_bytes(i64x2_extend_low_i32x4(ones), ones);
+        compare_bytes(i64x2_extend_high_i32x4(ones), ones);
+        let halves = i64x2_splat(u32::MAX.into());
+        compare_bytes(u64x2_extend_low_u32x4(ones), halves);
+        compare_bytes(u64x2_extend_high_u32x4(ones), halves);
+    }
+
+    #[test]
+    fn test_dot() {
+        let zero = i8x16_splat(0);
+        let ones = i8x16_splat(!0);
+        let two = i32x4_splat(2);
+        compare_bytes(i32x4_dot_i16x8(zero, zero), zero);
+        compare_bytes(i32x4_dot_i16x8(ones, ones), two);
+    }
+
+    macro_rules! test_binop {
+        (
+            $($name:ident => {
+                $([$($vec1:tt)*] ($op:ident | $f:ident) [$($vec2:tt)*],)*
+            })*
+        ) => ($(
+            #[test]
+            fn $name() {
+                unsafe {
+                    $(
+                        let v1 = [$($vec1)*];
+                        let v2 = [$($vec2)*];
+                        let v1_v128: v128 = mem::transmute(v1);
+                        let v2_v128: v128 = mem::transmute(v2);
+                        let v3_v128 = super::$f(v1_v128, v2_v128);
+                        let mut v3 = [$($vec1)*];
+                        drop(v3);
+                        v3 = mem::transmute(v3_v128);
+
+                        for (i, actual) in v3.iter().enumerate() {
+                            let expected = v1[i].$op(v2[i]);
+                            assert_eq!(*actual, expected);
+                        }
+                    )*
+                }
+            }
+        )*)
+    }
+
+    macro_rules! test_unop {
+        (
+            $($name:ident => {
+                $(($op:ident | $f:ident) [$($vec1:tt)*],)*
+            })*
+        ) => ($(
+            #[test]
+            fn $name() {
+                unsafe {
+                    $(
+                        let v1 = [$($vec1)*];
+                        let v1_v128: v128 = mem::transmute(v1);
+                        let v2_v128 = super::$f(v1_v128);
+                        let mut v2 = [$($vec1)*];
+                        drop(v2);
+                        v2 = mem::transmute(v2_v128);
+
+                        for (i, actual) in v2.iter().enumerate() {
+                            let expected = v1[i].$op();
+                            assert_eq!(*actual, expected);
+                        }
+                    )*
+                }
+            }
+        )*)
+    }
+
+    trait Avgr: Sized {
+        fn avgr(self, other: Self) -> Self;
+    }
+
+    macro_rules! impl_avgr {
+        ($($i:ident)*) => ($(impl Avgr for $i {
+            fn avgr(self, other: Self) -> Self {
+                ((self as u64 + other as u64 + 1) / 2) as $i
+            }
+        })*)
+    }
+
+    impl_avgr!(u8 u16);
+
+    test_binop! {
+        test_i8x16_add => {
+            [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (wrapping_add | i8x16_add)
+            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (wrapping_add | i8x16_add)
+            [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (wrapping_add | i8x16_add)
+            [127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 9, -24],
+        }
+
+        test_i8x16_add_sat_s => {
+            [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (saturating_add | i8x16_add_sat)
+            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (saturating_add | i8x16_add_sat)
+            [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (saturating_add | i8x16_add_sat)
+            [127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 9, -24],
+        }
+
+        test_i8x16_add_sat_u => {
+            [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (saturating_add | u8x16_add_sat)
+            [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (saturating_add | u8x16_add_sat)
+            [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (saturating_add | u8x16_add_sat)
+            [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8],
+        }
+
+        test_i8x16_sub => {
+            [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (wrapping_sub | i8x16_sub)
+            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (wrapping_sub | i8x16_sub)
+            [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (wrapping_sub | i8x16_sub)
+            [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8],
+        }
+
+        test_i8x16_sub_sat_s => {
+            [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (saturating_sub | i8x16_sub_sat)
+            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (saturating_sub | i8x16_sub_sat)
+            [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (saturating_sub | i8x16_sub_sat)
+            [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8],
+        }
+
+        test_i8x16_sub_sat_u => {
+            [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (saturating_sub | u8x16_sub_sat)
+            [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (saturating_sub | u8x16_sub_sat)
+            [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (saturating_sub | u8x16_sub_sat)
+            [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8],
+        }
+
+        test_i8x16_min_s => {
+            [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (min | i8x16_min)
+            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (min | i8x16_min)
+            [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (min | i8x16_min)
+            [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8],
+        }
+
+        test_i8x16_min_u => {
+            [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (min | u8x16_min)
+            [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (min | u8x16_min)
+            [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (min | u8x16_min)
+            [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8],
+        }
+
+        test_i8x16_max_s => {
+            [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (max | i8x16_max)
+            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (max | i8x16_max)
+            [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
+
+            [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (max | i8x16_max)
+            [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8],
+        }
+
+        test_i8x16_max_u => {
+            [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (max | u8x16_max)
+            [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (max | u8x16_max)
+            [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (max | u8x16_max)
+            [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8],
+        }
+
+        test_i8x16_avgr_u => {
+            [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                (avgr | u8x16_avgr)
+            [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (avgr | u8x16_avgr)
+            [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240],
+
+            [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+                (avgr | u8x16_avgr)
+            [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8],
+        }
+
+        test_i16x8_add => {
+            [0i16, 0, 0, 0, 0, 0, 0, 0]
+                (wrapping_add | i16x8_add)
+            [1i16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i16, 2, 3, 4, 5, 6, 7, 8]
+                (wrapping_add | i16x8_add)
+            [32767, 8, -2494,-4, 4882, -4, 848, 3830],
+        }
+
+        test_i16x8_add_sat_s => {
+            [0i16, 0, 0, 0, 0, 0, 0, 0]
+                (saturating_add | i16x8_add_sat)
+            [1i16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i16, 2, 3, 4, 5, 6, 7, 8]
+                (saturating_add | i16x8_add_sat)
+            [32767, 8, -2494,-4, 4882, -4, 848, 3830],
+        }
+
+        test_i16x8_add_sat_u => {
+            [0u16, 0, 0, 0, 0, 0, 0, 0]
+                (saturating_add | u16x8_add_sat)
+            [1u16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u16, 2, 3, 4, 5, 6, 7, 8]
+                (saturating_add | u16x8_add_sat)
+            [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830],
+        }
+
+        test_i16x8_sub => {
+            [0i16, 0, 0, 0, 0, 0, 0, 0]
+                (wrapping_sub | i16x8_sub)
+            [1i16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i16, 2, 3, 4, 5, 6, 7, 8]
+                (wrapping_sub | i16x8_sub)
+            [32767, 8, -2494,-4, 4882, -4, 848, 3830],
+        }
+
+        test_i16x8_sub_sat_s => {
+            [0i16, 0, 0, 0, 0, 0, 0, 0]
+                (saturating_sub | i16x8_sub_sat)
+            [1i16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i16, 2, 3, 4, 5, 6, 7, 8]
+                (saturating_sub | i16x8_sub_sat)
+            [32767, 8, -2494,-4, 4882, -4, 848, 3830],
+        }
+
+        test_i16x8_sub_sat_u => {
+            [0u16, 0, 0, 0, 0, 0, 0, 0]
+                (saturating_sub | u16x8_sub_sat)
+            [1u16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u16, 2, 3, 4, 5, 6, 7, 8]
+                (saturating_sub | u16x8_sub_sat)
+            [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830],
+        }
+
+        test_i16x8_mul => {
+            [0i16, 0, 0, 0, 0, 0, 0, 0]
+                (wrapping_mul | i16x8_mul)
+            [1i16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i16, 2, 3, 4, 5, 6, 7, 8]
+                (wrapping_mul | i16x8_mul)
+            [32767, 8, -2494,-4, 4882, -4, 848, 3830],
+        }
+
+        test_i16x8_min_s => {
+            [0i16, 0, 0, 0, 0, 0, 0, 0]
+                (min | i16x8_min)
+            [1i16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i16, 2, 3, 4, 5, 6, 7, 8]
+                (min | i16x8_min)
+            [32767, 8, -2494,-4, 4882, -4, 848, 3830],
+        }
+
+        test_i16x8_min_u => {
+            [0u16, 0, 0, 0, 0, 0, 0, 0]
+                (min | u16x8_min)
+            [1u16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u16, 2, 3, 4, 5, 6, 7, 8]
+                (min | u16x8_min)
+            [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830],
+        }
+
+        test_i16x8_max_s => {
+            [0i16, 0, 0, 0, 0, 0, 0, 0]
+                (max | i16x8_max)
+            [1i16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1i16, 2, 3, 4, 5, 6, 7, 8]
+                (max | i16x8_max)
+            [32767, 8, -2494,-4, 4882, -4, 848, 3830],
+        }
+
+        test_i16x8_max_u => {
+            [0u16, 0, 0, 0, 0, 0, 0, 0]
+                (max | u16x8_max)
+            [1u16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u16, 2, 3, 4, 5, 6, 7, 8]
+                (max | u16x8_max)
+            [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830],
+        }
+
+        test_i16x8_avgr_u => {
+            [0u16, 0, 0, 0, 0, 0, 0, 0]
+                (avgr | u16x8_avgr)
+            [1u16, 1, 1, 1, 1, 1, 1, 1],
+
+            [1u16, 2, 3, 4, 5, 6, 7, 8]
+                (avgr | u16x8_avgr)
+            [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830],
+        }
+
+        test_i32x4_add => {
+            [0i32, 0, 0, 0] (wrapping_add | i32x4_add) [1, 2, 3, 4],
+            [1i32, 1283, i32::MAX, i32::MIN]
+                (wrapping_add | i32x4_add)
+            [i32::MAX; 4],
+        }
+
+        test_i32x4_sub => {
+            [0i32, 0, 0, 0] (wrapping_sub | i32x4_sub) [1, 2, 3, 4],
+            [1i32, 1283, i32::MAX, i32::MIN]
+                (wrapping_sub | i32x4_sub)
+            [i32::MAX; 4],
+        }
+
+        test_i32x4_mul => {
+            [0i32, 0, 0, 0] (wrapping_mul | i32x4_mul) [1, 2, 3, 4],
+            [1i32, 1283, i32::MAX, i32::MIN]
+                (wrapping_mul | i32x4_mul)
+            [i32::MAX; 4],
+        }
+
+        test_i32x4_min_s => {
+            [0i32, 0, 0, 0] (min | i32x4_min) [1, 2, 3, 4],
+            [1i32, 1283, i32::MAX, i32::MIN]
+                (min | i32x4_min)
+            [i32::MAX; 4],
+        }
+
+        test_i32x4_min_u => {
+            [0u32, 0, 0, 0] (min | u32x4_min) [1, 2, 3, 4],
+            [1u32, 1283, i32::MAX as u32, i32::MIN as u32]
+                (min | u32x4_min)
+            [i32::MAX as u32; 4],
+        }
+
+        test_i32x4_max_s => {
+            [0i32, 0, 0, 0] (max | i32x4_max) [1, 2, 3, 4],
+            [1i32, 1283, i32::MAX, i32::MIN]
+                (max | i32x4_max)
+            [i32::MAX; 4],
+        }
+
+        test_i32x4_max_u => {
+            [0u32, 0, 0, 0] (max | u32x4_max) [1, 2, 3, 4],
+            [1u32, 1283, i32::MAX as u32, i32::MIN as u32]
+                (max | u32x4_max)
+            [i32::MAX as u32; 4],
+        }
+
+        test_i64x2_add => {
+            [0i64, 0] (wrapping_add | i64x2_add) [1, 2],
+            [i64::MIN, i64::MAX] (wrapping_add | i64x2_add) [i64::MAX, i64::MIN],
+            [i64::MAX; 2] (wrapping_add | i64x2_add) [i64::MAX; 2],
+            [-4i64, -4] (wrapping_add | i64x2_add) [800, 939],
+        }
+
+        test_i64x2_sub => {
+            [0i64, 0] (wrapping_sub | i64x2_sub) [1, 2],
+            [i64::MIN, i64::MAX] (wrapping_sub | i64x2_sub) [i64::MAX, i64::MIN],
+            [i64::MAX; 2] (wrapping_sub | i64x2_sub) [i64::MAX; 2],
+            [-4i64, -4] (wrapping_sub | i64x2_sub) [800, 939],
+        }
+
+        test_i64x2_mul => {
+            [0i64, 0] (wrapping_mul | i64x2_mul) [1, 2],
+            [i64::MIN, i64::MAX] (wrapping_mul | i64x2_mul) [i64::MAX, i64::MIN],
+            [i64::MAX; 2] (wrapping_mul | i64x2_mul) [i64::MAX; 2],
+            [-4i64, -4] (wrapping_mul | i64x2_mul) [800, 939],
+        }
+
+        test_f32x4_add => {
+            [-1.0f32, 2.0, 3.0, 4.0] (add | f32x4_add) [1., 2., 0., 0.],
+            [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0]
+                (add | f32x4_add)
+            [1., 2., 0., 0.],
+        }
+
+        test_f32x4_sub => {
+            [-1.0f32, 2.0, 3.0, 4.0] (sub | f32x4_sub) [1., 2., 0., 0.],
+            [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0]
+                (sub | f32x4_sub)
+            [1., 2., 0., 0.],
+        }
+
+        test_f32x4_mul => {
+            [-1.0f32, 2.0, 3.0, 4.0] (mul | f32x4_mul) [1., 2., 0., 0.],
+            [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0]
+                (mul | f32x4_mul)
+            [1., 2., 1., 0.],
+        }
+
+        test_f32x4_div => {
+            [-1.0f32, 2.0, 3.0, 4.0] (div | f32x4_div) [1., 2., 0., 0.],
+            [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0]
+                (div | f32x4_div)
+            [1., 2., 0., 0.],
+        }
+
+        test_f32x4_min => {
+            [-1.0f32, 2.0, 3.0, 4.0] (min | f32x4_min) [1., 2., 0., 0.],
+            [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0]
+                (min | f32x4_min)
+            [1., 2., 0., 0.],
+        }
+
+        test_f32x4_max => {
+            [-1.0f32, 2.0, 3.0, 4.0] (max | f32x4_max) [1., 2., 0., 0.],
+            [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0]
+                (max | f32x4_max)
+            [1., 2., 0., 0.],
+        }
+
+        test_f32x4_pmin => {
+            [-1.0f32, 2.0, 3.0, 4.0] (min | f32x4_pmin) [1., 2., 0., 0.],
+            [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0]
+                (min | f32x4_pmin)
+            [1., 2., 0., 0.],
+        }
+
+        test_f32x4_pmax => {
+            [-1.0f32, 2.0, 3.0, 4.0] (max | f32x4_pmax) [1., 2., 0., 0.],
+            [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0]
+                (max | f32x4_pmax)
+            [1., 2., 0., 0.],
+        }
+
+        test_f64x2_add => {
+            [-1.0f64, 2.0] (add | f64x2_add) [1., 2.],
+            [f64::INFINITY, f64::NEG_INFINITY] (add | f64x2_add) [1., 2.],
+        }
+
+        test_f64x2_sub => {
+            [-1.0f64, 2.0] (sub | f64x2_sub) [1., 2.],
+            [f64::INFINITY, f64::NEG_INFINITY] (sub | f64x2_sub) [1., 2.],
+        }
+
+        test_f64x2_mul => {
+            [-1.0f64, 2.0] (mul | f64x2_mul) [1., 2.],
+            [f64::INFINITY, f64::NEG_INFINITY] (mul | f64x2_mul) [1., 2.],
+        }
+
+        test_f64x2_div => {
+            [-1.0f64, 2.0] (div | f64x2_div) [1., 2.],
+            [f64::INFINITY, f64::NEG_INFINITY] (div | f64x2_div) [1., 2.],
+        }
+
+        test_f64x2_min => {
+            [-1.0f64, 2.0] (min | f64x2_min) [1., 2.],
+            [f64::INFINITY, f64::NEG_INFINITY] (min | f64x2_min) [1., 2.],
+        }
+
+        test_f64x2_max => {
+            [-1.0f64, 2.0] (max | f64x2_max) [1., 2.],
+            [f64::INFINITY, f64::NEG_INFINITY] (max | f64x2_max) [1., 2.],
+        }
+
+        test_f64x2_pmin => {
+            [-1.0f64, 2.0] (min | f64x2_pmin) [1., 2.],
+            [f64::INFINITY, f64::NEG_INFINITY] (min | f64x2_pmin) [1., 2.],
+        }
+
+        test_f64x2_pmax => {
+            [-1.0f64, 2.0] (max | f64x2_pmax) [1., 2.],
+            [f64::INFINITY, f64::NEG_INFINITY] (max | f64x2_pmax) [1., 2.],
+        }
+    }
+
+    test_unop! {
+        test_i8x16_abs => {
+            (wrapping_abs | i8x16_abs)
+            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            (wrapping_abs | i8x16_abs)
+            [-2i8, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
+
+            (wrapping_abs | i8x16_abs)
+            [-127i8, -44, 43, 126, 4, -128, 127, -59, -43, 39, -69, 79, -3, 35, 83, 13],
+        }
+
+        test_i8x16_neg => {
+            (wrapping_neg | i8x16_neg)
+            [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+
+            (wrapping_neg | i8x16_neg)
+            [-2i8, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18],
+
+            (wrapping_neg | i8x16_neg)
+            [-127i8, -44, 43, 126, 4, -128, 127, -59, -43, 39, -69, 79, -3, 35, 83, 13],
+        }
+
+        test_i16x8_abs => {
+            (wrapping_abs | i16x8_abs) [1i16, 1, 1, 1, 1, 1, 1, 1],
+            (wrapping_abs | i16x8_abs) [2i16, 0x7fff, !0, 4, 42, -5, 33, -4847],
+        }
+
+        test_i16x8_neg => {
+            (wrapping_neg | i16x8_neg) [1i16, 1, 1, 1, 1, 1, 1, 1],
+            (wrapping_neg | i16x8_neg) [2i16, 0x7fff, !0, 4, 42, -5, 33, -4847],
+        }
+
+        test_i32x4_abs => {
+            (wrapping_abs | i32x4_abs) [1i32, 2, 3, 4],
+            (wrapping_abs | i32x4_abs) [i32::MIN, i32::MAX, 0, 4],
+        }
+
+        test_i32x4_neg => {
+            (wrapping_neg | i32x4_neg) [1i32, 2, 3, 4],
+            (wrapping_neg | i32x4_neg) [i32::MIN, i32::MAX, 0, 4],
+        }
+
+        test_i64x2_abs => {
+            (wrapping_abs | i64x2_abs) [1i64, 2],
+            (wrapping_abs | i64x2_abs) [i64::MIN, i64::MAX],
+        }
+
+        test_i64x2_neg => {
+            (wrapping_neg | i64x2_neg) [1i64, 2],
+            (wrapping_neg | i64x2_neg) [i64::MIN, i64::MAX],
+        }
+
+        test_f32x4_ceil => {
+            (ceil | f32x4_ceil) [1.0f32, 2., 2.5, 3.3],
+            (ceil | f32x4_ceil) [0.0, -0.3, f32::INFINITY, -0.0],
+        }
+
+        test_f32x4_floor => {
+            (floor | f32x4_floor) [1.0f32, 2., 2.5, 3.3],
+            (floor | f32x4_floor) [0.0, -0.3, f32::INFINITY, -0.0],
+        }
+
+        test_f32x4_trunc => {
+            (trunc | f32x4_trunc) [1.0f32, 2., 2.5, 3.3],
+            (trunc | f32x4_trunc) [0.0, -0.3, f32::INFINITY, -0.0],
+        }
+
+        test_f32x4_nearest => {
+            (round | f32x4_nearest) [1.0f32, 2., 2.6, 3.3],
+            (round | f32x4_nearest) [0.0, -0.3, f32::INFINITY, -0.0],
+        }
+
+        test_f32x4_abs => {
+            (abs | f32x4_abs) [1.0f32, 2., 2.6, 3.3],
+            (abs | f32x4_abs) [0.0, -0.3, f32::INFINITY, -0.0],
+        }
+
+        test_f32x4_neg => {
+            (neg | f32x4_neg) [1.0f32, 2., 2.6, 3.3],
+            (neg | f32x4_neg) [0.0, -0.3, f32::INFINITY, -0.0],
+        }
+
+        test_f32x4_sqrt => {
+            (sqrt | f32x4_sqrt) [1.0f32, 2., 2.6, 3.3],
+            (sqrt | f32x4_sqrt) [0.0, 0.3, f32::INFINITY, 0.1],
+        }
+
+        test_f64x2_ceil => {
+            (ceil | f64x2_ceil) [1.0f64, 2.3],
+            (ceil | f64x2_ceil) [f64::INFINITY, -0.1],
+        }
+
+        test_f64x2_floor => {
+            (floor | f64x2_floor) [1.0f64, 2.3],
+            (floor | f64x2_floor) [f64::INFINITY, -0.1],
+        }
+
+        test_f64x2_trunc => {
+            (trunc | f64x2_trunc) [1.0f64, 2.3],
+            (trunc | f64x2_trunc) [f64::INFINITY, -0.1],
+        }
+
+        test_f64x2_nearest => {
+            (round | f64x2_nearest) [1.0f64, 2.3],
+            (round | f64x2_nearest) [f64::INFINITY, -0.1],
+        }
+
+        test_f64x2_abs => {
+            (abs | f64x2_abs) [1.0f64, 2.3],
+            (abs | f64x2_abs) [f64::INFINITY, -0.1],
+        }
+
+        test_f64x2_neg => {
+            (neg | f64x2_neg) [1.0f64, 2.3],
+            (neg | f64x2_neg) [f64::INFINITY, -0.1],
+        }
+
+        test_f64x2_sqrt => {
+            (sqrt | f64x2_sqrt) [1.0f64, 2.3],
+            (sqrt | f64x2_sqrt) [f64::INFINITY, 0.1],
+        }
+    }
+
+    macro_rules! floating_point {
+        (f32) => {
+            true
+        };
+        (f64) => {
+            true
+        };
+        ($id:ident) => {
+            false
+        };
+    }
+
+    trait IsNan: Sized {
+        fn is_nan(self) -> bool {
+            false
+        }
+    }
+    impl IsNan for i8 {}
+    impl IsNan for i16 {}
+    impl IsNan for i32 {}
+    impl IsNan for i64 {}
+
+    macro_rules! test_bop {
+         ($id:ident[$ety:ident; $ecount:expr] |
+          $binary_op:ident [$op_test_id:ident] :
+          ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
+             test_bop!(
+                 $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]:
+                 ([$($in_a),*], [$($in_b),*]) => [$($out),*]
+             );
+
+         };
+         ($id:ident[$ety:ident; $ecount:expr] => $oty:ident |
+          $binary_op:ident [$op_test_id:ident] :
+          ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => {
+             #[test]
+             fn $op_test_id() {
+                 unsafe {
+                     let a_input: [$ety; $ecount] = [$($in_a),*];
+                     let b_input: [$ety; $ecount] = [$($in_b),*];
+                     let output: [$oty; $ecount] = [$($out),*];
+
+                     let a_vec_in: v128 = transmute(a_input);
+                     let b_vec_in: v128 = transmute(b_input);
+                     let vec_res: v128 = $binary_op(a_vec_in, b_vec_in);
+
+                     let res: [$oty; $ecount] = transmute(vec_res);
+
+                     if !floating_point!($ety) {
+                         assert_eq!(res, output);
+                     } else {
+                         for i in 0..$ecount {
+                             let r = res[i];
+                             let o = output[i];
+                             assert_eq!(r.is_nan(), o.is_nan());
+                             if !r.is_nan() {
+                                 assert_eq!(r, o);
+                             }
+                         }
+                     }
+                 }
+             }
+         }
+     }
+
+    macro_rules! test_bops {
+         ($id:ident[$ety:ident; $ecount:expr] |
+          $binary_op:ident [$op_test_id:ident]:
+          ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => {
+             #[test]
+             fn $op_test_id() {
+                 unsafe {
+                     let a_input: [$ety; $ecount] = [$($in_a),*];
+                     let output: [$ety; $ecount] = [$($out),*];
+
+                     let a_vec_in: v128 = transmute(a_input);
+                     let vec_res: v128 = $binary_op(a_vec_in, $in_b);
+
+                     let res: [$ety; $ecount] = transmute(vec_res);
+                     assert_eq!(res, output);
+                 }
+             }
+         }
+     }
+
+    macro_rules! test_uop {
+         ($id:ident[$ety:ident; $ecount:expr] |
+          $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => {
+             #[test]
+             fn $op_test_id() {
+                 unsafe {
+                     let a_input: [$ety; $ecount] = [$($in_a),*];
+                     let output: [$ety; $ecount] = [$($out),*];
+
+                     let a_vec_in: v128 = transmute(a_input);
+                     let vec_res: v128 = $unary_op(a_vec_in);
+
+                     let res: [$ety; $ecount] = transmute(vec_res);
+                     assert_eq!(res, output);
+                 }
+             }
+         }
+     }
+
+    test_bops!(i8x16[i8; 16] | i8x16_shl[i8x16_shl_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+               [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]);
+    test_bops!(i16x8[i16; 8] | i16x8_shl[i16x8_shl_test]:
+                ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
+                [0, -2, 4, 6, 8, 10, 12, -2]);
+    test_bops!(i32x4[i32; 4] | i32x4_shl[i32x4_shl_test]:
+                ([0, -1, 2, 3], 1) => [0, -2, 4, 6]);
+    test_bops!(i64x2[i64; 2] | i64x2_shl[i64x2_shl_test]:
+                ([0, -1], 1) => [0, -2]);
+
+    test_bops!(i8x16[i8; 16] | i8x16_shr[i8x16_shr_s_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+               [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+    test_bops!(i16x8[i16; 8] | i16x8_shr[i16x8_shr_s_test]:
+               ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
+               [0, -1, 1, 1, 2, 2, 3, i16::MAX / 2]);
+    test_bops!(i32x4[i32; 4] | i32x4_shr[i32x4_shr_s_test]:
+               ([0, -1, 2, 3], 1) => [0, -1, 1, 1]);
+    test_bops!(i64x2[i64; 2] | i64x2_shr[i64x2_shr_s_test]:
+               ([0, -1], 1) => [0, -1]);
+
+    test_bops!(i8x16[i8; 16] | u8x16_shr[i8x16_uhr_u_test]:
+                ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) =>
+                [0, i8::MAX, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]);
+    test_bops!(i16x8[i16; 8] | u16x8_shr[i16x8_uhr_u_test]:
+                ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) =>
+                [0, i16::MAX, 1, 1, 2, 2, 3, i16::MAX / 2]);
+    test_bops!(i32x4[i32; 4] | u32x4_shr[i32x4_uhr_u_test]:
+                ([0, -1, 2, 3], 1) => [0, i32::MAX, 1, 1]);
+    test_bops!(i64x2[i64; 2] | u64x2_shr[i64x2_uhr_u_test]:
+                ([0, -1], 1) => [0, i64::MAX]);
+
+    #[test]
+    fn v128_bitwise_logical_ops() {
+        unsafe {
+            let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0];
+            let b: [u32; 4] = [u32::MAX; 4];
+            let c: [u32; 4] = [0; 4];
+
+            let vec_a: v128 = transmute(a);
+            let vec_b: v128 = transmute(b);
+            let vec_c: v128 = transmute(c);
+
+            let r: v128 = v128_and(vec_a, vec_a);
+            compare_bytes(r, vec_a);
+            let r: v128 = v128_and(vec_a, vec_b);
+            compare_bytes(r, vec_a);
+            let r: v128 = v128_andnot(vec_a, vec_b);
+            compare_bytes(r, vec_c);
+            let r: v128 = v128_andnot(vec_a, vec_a);
+            compare_bytes(r, vec_c);
+            let r: v128 = v128_andnot(vec_a, vec_c);
+            compare_bytes(r, vec_a);
+            let r: v128 = v128_or(vec_a, vec_b);
+            compare_bytes(r, vec_b);
+            let r: v128 = v128_not(vec_b);
+            compare_bytes(r, vec_c);
+            let r: v128 = v128_xor(vec_a, vec_c);
+            compare_bytes(r, vec_a);
+
+            let r: v128 = v128_bitselect(vec_b, vec_c, vec_b);
+            compare_bytes(r, vec_b);
+            let r: v128 = v128_bitselect(vec_b, vec_c, vec_c);
+            compare_bytes(r, vec_c);
+            let r: v128 = v128_bitselect(vec_b, vec_c, vec_a);
+            compare_bytes(r, vec_a);
+        }
+    }
+
+    macro_rules! test_bool_red {
+         ([$test_id:ident, $any:ident, $all:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => {
+             #[test]
+             fn $test_id() {
+                 unsafe {
+                     let vec_a: v128 = transmute([$($true),*]); // true
+                     let vec_b: v128 = transmute([$($false),*]); // false
+                     let vec_c: v128 = transmute([$($alt),*]); // alternating
+
+                     // TODO
+                     // assert_eq!($any(vec_a), true);
+                     // assert_eq!($any(vec_b), false);
+                     // assert_eq!($any(vec_c), true);
+
+                     assert_eq!($all(vec_a), true);
+                     assert_eq!($all(vec_b), false);
+                     assert_eq!($all(vec_c), false);
+                 }
+             }
+         }
+     }
+
+    test_bool_red!(
+        [i8x16_boolean_reductions, v128_any_true, i8x16_all_true]
+            | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
+    );
+    test_bool_red!(
+        [i16x8_boolean_reductions, v128_any_true, i16x8_all_true]
+            | [1_i16, 1, 1, 1, 1, 1, 1, 1]
+            | [0_i16, 0, 0, 0, 0, 0, 0, 0]
+            | [1_i16, 0, 1, 0, 1, 0, 1, 0]
+    );
+    test_bool_red!(
+        [i32x4_boolean_reductions, v128_any_true, i32x4_all_true]
+            | [1_i32, 1, 1, 1]
+            | [0_i32, 0, 0, 0]
+            | [1_i32, 0, 1, 0]
+    );
+    test_bool_red!(
+        [i64x2_boolean_reductions, v128_any_true, i64x2_all_true]
+            | [1_i64, 1]
+            | [0_i64, 0]
+            | [1_i64, 0]
+    );
+
+    test_bop!(i8x16[i8; 16] | i8x16_eq[i8x16_eq_test]:
+              ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+               [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+              [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | i16x8_eq[i16x8_eq_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | i32x4_eq[i32x4_eq_test]:
+               ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
+    test_bop!(i64x2[i64; 2] | i64x2_eq[i64x2_eq_test]:
+               ([0, 1], [0, 2]) => [-1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_eq[f32x4_eq_test]:
+               ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_ne[i8x16_ne_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | i16x8_ne[i16x8_ne_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | i32x4_ne[i32x4_ne_test]:
+               ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
+    test_bop!(i64x2[i64; 2] | i64x2_ne[i64x2_ne_test]:
+               ([0, 1], [0, 2]) => [0, -1]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_ne[f32x4_ne_test]:
+               ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_lt[i8x16_lt_s_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -12, 13, 14, 15],
+                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0]);
+    test_bop!(i8x16[i8; 16] | u8x16_lt[i8x16_lt_u_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -12, 13, 14, 15],
+                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | i16x8_lt[i16x8_lt_s_test]:
+               ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [0, -1, 0, -1 ,0, -1, 0, -1]);
+    test_bop!(i16x8[i16; 8] | u16x8_lt[i16x8_lt_u_test]:
+               ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | i32x4_lt[i32x4_lt_s_test]:
+               ([-1, 1, 2, 3], [0, 2, 2, 4]) => [-1, -1, 0, -1]);
+    test_bop!(i32x4[i32; 4] | u32x4_lt[i32x4_lt_u_test]:
+               ([-1, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]);
+    test_bop!(i64x2[i64; 2] | i64x2_lt[i64x2_lt_s_test]:
+               ([-1, 3], [0, 2]) => [-1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_lt[f32x4_lt_test]:
+               ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_gt[i8x16_gt_s_test]:
+           ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15],
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i8x16[i8; 16] | u8x16_gt[i8x16_gt_u_test]:
+           ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15],
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, -1]);
+    test_bop!(i16x8[i16; 8] | i16x8_gt[i16x8_gt_s_test]:
+               ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+               [0, -1, 0, -1 ,0, -1, 0, 0]);
+    test_bop!(i16x8[i16; 8] | u16x8_gt[i16x8_gt_u_test]:
+               ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+               [0, -1, 0, -1 ,0, -1, 0, -1]);
+    test_bop!(i32x4[i32; 4] | i32x4_gt[i32x4_gt_s_test]:
+               ([0, 2, 2, -4], [0, 1, 2, 3]) => [0, -1, 0, 0]);
+    test_bop!(i32x4[i32; 4] | u32x4_gt[i32x4_gt_u_test]:
+               ([0, 2, 2, -4], [0, 1, 2, 3]) => [0, -1, 0, -1]);
+    test_bop!(i64x2[i64; 2] | i64x2_gt[i64x2_gt_s_test]:
+               ([-1, 2], [0, 1]) => [0, -1]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_gt[f32x4_gt_test]:
+               ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_ge[i8x16_ge_s_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -15],
+                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, 0]);
+    test_bop!(i8x16[i8; 16] | u8x16_ge[i8x16_ge_u_test]:
+               ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -15],
+                [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | i16x8_ge[i16x8_ge_s_test]:
+               ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, 0]);
+    test_bop!(i16x8[i16; 8] | u16x8_ge[i16x8_ge_u_test]:
+               ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | i32x4_ge[i32x4_ge_s_test]:
+               ([0, 1, 2, -3], [0, 2, 2, 4]) => [-1, 0, -1, 0]);
+    test_bop!(i32x4[i32; 4] | u32x4_ge[i32x4_ge_u_test]:
+               ([0, 1, 2, -3], [0, 2, 2, 4]) => [-1, 0, -1, -1]);
+    test_bop!(i64x2[i64; 2] | i64x2_ge[i64x2_ge_s_test]:
+               ([0, 1], [-1, 2]) => [-1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_ge[f32x4_ge_test]:
+               ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]);
+
+    test_bop!(i8x16[i8; 16] | i8x16_le[i8x16_le_s_test]:
+               ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15],
+                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+               ) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i8x16[i8; 16] | u8x16_le[i8x16_le_u_test]:
+               ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15],
+                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+               ) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, 0]);
+    test_bop!(i16x8[i16; 8] | i16x8_le[i16x8_le_s_test]:
+               ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, -1]);
+    test_bop!(i16x8[i16; 8] | u16x8_le[i16x8_le_u_test]:
+               ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) =>
+               [-1, 0, -1, 0 ,-1, 0, -1, 0]);
+    test_bop!(i32x4[i32; 4] | i32x4_le[i32x4_le_s_test]:
+               ([0, 2, 2, -4], [0, 1, 2, 3]) => [-1, 0, -1, -1]);
+    test_bop!(i32x4[i32; 4] | u32x4_le[i32x4_le_u_test]:
+               ([0, 2, 2, -4], [0, 1, 2, 3]) => [-1, 0, -1, 0]);
+    test_bop!(i64x2[i64; 2] | i64x2_le[i64x2_le_s_test]:
+               ([0, 2], [0, 1]) => [-1, 0]);
+    test_bop!(f32x4[f32; 4] => i32 | f32x4_le[f32x4_le_test]:
+               ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]);
+    test_bop!(f64x2[f64; 2] => i64 | f64x2_le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]);
+
+    test_uop!(f32x4[f32; 4] | f32x4_neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]);
+    test_uop!(f32x4[f32; 4] | f32x4_abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]);
+    test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]);
+    test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test_nan]:
+              ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
+              => [0., -3., -4., std::f32::NAN]);
+    test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]);
+    test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test_nan]:
+              ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN])
+              => [1., -1., 7., std::f32::NAN]);
+    test_bop!(f32x4[f32; 4] | f32x4_add[f32x4_add_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]);
+    test_bop!(f32x4[f32; 4] | f32x4_sub[f32x4_sub_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]);
+    test_bop!(f32x4[f32; 4] | f32x4_mul[f32x4_mul_test]:
+              ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]);
+    test_bop!(f32x4[f32; 4] | f32x4_div[f32x4_div_test]:
+              ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]);
+
+    test_uop!(f64x2[f64; 2] | f64x2_neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]);
+    test_uop!(f64x2[f64; 2] | f64x2_abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]);
+    test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test]:
+               ([0., -1.], [1., -3.]) => [0., -3.]);
+    test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test_nan]:
+               ([7., 8.], [-4., std::f64::NAN])
+               => [ -4., std::f64::NAN]);
+    test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test]:
+               ([0., -1.], [1., -3.]) => [1., -1.]);
+    test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test_nan]:
+               ([7., 8.], [ -4., std::f64::NAN])
+               => [7., std::f64::NAN]);
+    test_bop!(f64x2[f64; 2] | f64x2_add[f64x2_add_test]:
+               ([0., -1.], [1., -3.]) => [1., -4.]);
+    test_bop!(f64x2[f64; 2] | f64x2_sub[f64x2_sub_test]:
+               ([0., -1.], [1., -3.]) => [-1., 2.]);
+    test_bop!(f64x2[f64; 2] | f64x2_mul[f64x2_mul_test]:
+               ([0., -1.], [1., -3.]) => [0., 3.]);
+    test_bop!(f64x2[f64; 2] | f64x2_div[f64x2_div_test]:
+               ([0., -8.], [1., 4.]) => [0., -2.]);
+
+    macro_rules! test_conv {
+        ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr,  $to:expr) => {
+            #[test]
+            fn $test_id() {
+                unsafe {
+                    let from: v128 = transmute($from);
+                    let to: v128 = transmute($to);
+
+                    let r: v128 = $conv_id(from);
+
+                    compare_bytes(r, to);
+                }
+            }
+        };
+    }
+
+    test_conv!(
+        f32x4_convert_s_i32x4 | f32x4_convert_i32x4 | f32x4 | [1_i32, 2, 3, 4],
+        [1_f32, 2., 3., 4.]
+    );
+    test_conv!(
+        f32x4_convert_u_i32x4 | f32x4_convert_u32x4 | f32x4 | [u32::MAX, 2, 3, 4],
+        [u32::MAX as f32, 2., 3., 4.]
+    );
+
+    #[test]
+    fn test_conversions() {
+        compare_bytes(
+            i32x4_trunc_sat_f32x4(f32x4(1., f32::NEG_INFINITY, f32::INFINITY, f32::NAN)),
+            i32x4(1, i32::MIN, i32::MAX, 0),
+        );
+        compare_bytes(
+            u32x4_trunc_sat_f32x4(f32x4(1., f32::NEG_INFINITY, f32::INFINITY, f32::NAN)),
+            u32x4(1, 0, u32::MAX, 0),
+        );
+        compare_bytes(f64x2_convert_low_i32x4(i32x4(1, 2, 3, 4)), f64x2(1., 2.));
+        compare_bytes(
+            f64x2_convert_low_i32x4(i32x4(i32::MIN, i32::MAX, 3, 4)),
+            f64x2(f64::from(i32::MIN), f64::from(i32::MAX)),
+        );
+        compare_bytes(f64x2_convert_low_u32x4(u32x4(1, 2, 3, 4)), f64x2(1., 2.));
+        compare_bytes(
+            f64x2_convert_low_u32x4(u32x4(u32::MIN, u32::MAX, 3, 4)),
+            f64x2(f64::from(u32::MIN), f64::from(u32::MAX)),
+        );
+
+        compare_bytes(
+            i32x4_trunc_sat_f64x2_zero(f64x2(1., f64::NEG_INFINITY)),
+            i32x4(1, i32::MIN, 0, 0),
+        );
+        compare_bytes(
+            i32x4_trunc_sat_f64x2_zero(f64x2(f64::NAN, f64::INFINITY)),
+            i32x4(0, i32::MAX, 0, 0),
+        );
+        compare_bytes(
+            u32x4_trunc_sat_f64x2_zero(f64x2(1., f64::NEG_INFINITY)),
+            u32x4(1, 0, 0, 0),
+        );
+        compare_bytes(
+            u32x4_trunc_sat_f64x2_zero(f64x2(f64::NAN, f64::INFINITY)),
+            u32x4(0, u32::MAX, 0, 0),
+        );
+    }
+
+    #[test]
+    fn test_popcnt() {
+        unsafe {
+            for i in 0..=255 {
+                compare_bytes(
+                    i8x16_popcnt(u8x16_splat(i)),
+                    u8x16_splat(i.count_ones() as u8),
+                )
+            }
+
+            let vectors = [
+                [0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                [
+                    100, 200, 50, 0, 10, 7, 38, 185, 192, 3, 34, 85, 93, 7, 31, 99,
+                ],
+            ];
+
+            for vector in vectors.iter() {
+                compare_bytes(
+                    i8x16_popcnt(transmute(*vector)),
+                    i8x16(
+                        vector[0].count_ones() as i8,
+                        vector[1].count_ones() as i8,
+                        vector[2].count_ones() as i8,
+                        vector[3].count_ones() as i8,
+                        vector[4].count_ones() as i8,
+                        vector[5].count_ones() as i8,
+                        vector[6].count_ones() as i8,
+                        vector[7].count_ones() as i8,
+                        vector[8].count_ones() as i8,
+                        vector[9].count_ones() as i8,
+                        vector[10].count_ones() as i8,
+                        vector[11].count_ones() as i8,
+                        vector[12].count_ones() as i8,
+                        vector[13].count_ones() as i8,
+                        vector[14].count_ones() as i8,
+                        vector[15].count_ones() as i8,
+                    ),
+                )
+            }
+        }
+    }
+
+    #[test]
+    fn test_promote_demote() {
+        let tests = [
+            [1., 2.],
+            [f64::NAN, f64::INFINITY],
+            [100., 201.],
+            [0., -0.],
+            [f64::NEG_INFINITY, 0.],
+        ];
+
+        for [a, b] in tests {
+            compare_bytes(
+                f32x4_demote_f64x2_zero(f64x2(a, b)),
+                f32x4(a as f32, b as f32, 0., 0.),
+            );
+            compare_bytes(
+                f64x2_promote_low_f32x4(f32x4(a as f32, b as f32, 0., 0.)),
+                f64x2(a, b),
+            );
+        }
+    }
+
+    #[test]
+    fn test_extmul() {
+        macro_rules! test {
+            ($(
+                $ctor:ident {
+                    from: $from:ident,
+                    to: $to:ident,
+                    low: $low:ident,
+                    high: $high:ident,
+                } => {
+                    $(([$($a:tt)*] * [$($b:tt)*]))*
+                }
+            )*) => ($(
+                $(unsafe {
+                    let a: [$from; 16 / mem::size_of::<$from>()] = [$($a)*];
+                    let b: [$from; 16 / mem::size_of::<$from>()] = [$($b)*];
+                    let low = mem::transmute::<_, [$to; 16 / mem::size_of::<$to>()]>($low($ctor($($a)*), $ctor($($b)*)));
+                    let high = mem::transmute::<_, [$to; 16 / mem::size_of::<$to>()]>($high($ctor($($a)*), $ctor($($b)*)));
+
+                    let half = a.len() / 2;
+                    for i in 0..half {
+                        assert_eq!(
+                            (a[i] as $to).wrapping_mul((b[i] as $to)),
+                            low[i],
+                            "expected {} * {}", a[i] as $to, b[i] as $to,
+                        );
+                        assert_eq!(
+                            (a[half + i] as $to).wrapping_mul((b[half + i] as $to)),
+                            high[i],
+                            "expected {} * {}", a[half + i] as $to, b[half + i] as $to,
+                        );
+                    }
+                })*
+            )*)
+        }
+        test! {
+            i8x16 {
+                from: i8,
+                to: i16,
+                low: i16x8_extmul_low_i8x16,
+                high: i16x8_extmul_high_i8x16,
+            } => {
+                (
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                        *
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                )
+                (
+                    [-1, -2, 3, 100, 124, -38, 33, 87, 92, 108, 22, 8, -43, -128, 22, 0]
+                        *
+                    [-5, -2, 6, 10, 45, -4, 4, -2, 0, 88, 92, -102, -98, 83, 73, 54]
+                )
+            }
+            u8x16 {
+                from: u8,
+                to: u16,
+                low: u16x8_extmul_low_u8x16,
+                high: u16x8_extmul_high_u8x16,
+            } => {
+                (
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                        *
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                )
+                (
+                    [1, 2, 3, 100, 124, 38, 33, 87, 92, 198, 22, 8, 43, 128, 22, 0]
+                        *
+                    [5, 200, 6, 10, 45, 248, 4, 2, 0, 2, 92, 102, 234, 83, 73, 54]
+                )
+            }
+            i16x8 {
+                from: i16,
+                to: i32,
+                low: i32x4_extmul_low_i16x8,
+                high: i32x4_extmul_high_i16x8,
+            } => {
+                (
+                    [0, 0, 0, 0, 0, 0, 0, 0]
+                        *
+                    [0, 0, 0, 0, 0, 0, 0, 0]
+                )
+                (
+                    [-1, 0, i16::MAX, 19931, -2259, 64, 200, 87]
+                        *
+                    [1, 1, i16::MIN, 29391, 105, 2, 100, -2]
+                )
+            }
+            u16x8 {
+                from: u16,
+                to: u32,
+                low: u32x4_extmul_low_u16x8,
+                high: u32x4_extmul_high_u16x8,
+            } => {
+                (
+                    [0, 0, 0, 0, 0, 0, 0, 0]
+                        *
+                    [0, 0, 0, 0, 0, 0, 0, 0]
+                )
+                (
+                    [1, 0, u16::MAX, 19931, 2259, 64, 200, 87]
+                        *
+                    [1, 1, 3, 29391, 105, 2, 100, 2]
+                )
+            }
+            i32x4 {
+                from: i32,
+                to: i64,
+                low: i64x2_extmul_low_i32x4,
+                high: i64x2_extmul_high_i32x4,
+            } => {
+                (
+                    [0, 0, 0, 0]
+                        *
+                    [0, 0, 0, 0]
+                )
+                (
+                    [-1, 0, i32::MAX, 19931]
+                        *
+                    [1, 1, i32::MIN, 29391]
+                )
+                (
+                    [i32::MAX, 3003183, 3 << 20, 0xffffff]
+                        *
+                    [i32::MAX, i32::MIN, -40042, 300]
+                )
+            }
+            u32x4 {
+                from: u32,
+                to: u64,
+                low: u64x2_extmul_low_u32x4,
+                high: u64x2_extmul_high_u32x4,
+            } => {
+                (
+                    [0, 0, 0, 0]
+                        *
+                    [0, 0, 0, 0]
+                )
+                (
+                    [1, 0, u32::MAX, 19931]
+                        *
+                    [1, 1, 3, 29391]
+                )
+                (
+                    [u32::MAX, 3003183, 3 << 20, 0xffffff]
+                        *
+                    [u32::MAX, 3000, 40042, 300]
+                )
+            }
+        }
+    }
+
+    #[test]
+    fn test_q15mulr_sat_s() {
+        fn test(a: [i16; 8], b: [i16; 8]) {
+            let a_v = i16x8(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
+            let b_v = i16x8(b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
+            let result = i16x8_q15mulr_sat(a_v, b_v);
+            let result = unsafe { mem::transmute::<v128, [i16; 8]>(result) };
+
+            for (i, (a, b)) in a.iter().zip(&b).enumerate() {
+                assert_eq!(
+                    result[i],
+                    (((*a as i32) * (*b as i32) + 0x4000) >> 15) as i16
+                );
+            }
+        }
+
+        test([0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]);
+        test([1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]);
+        test(
+            [-1, 100, 2003, -29494, 12, 128, 994, 1],
+            [-4049, 8494, -10483, 0, 5, 2222, 883, -9],
+        );
+    }
+
+    #[test]
+    fn test_extadd() {
+        macro_rules! test {
+            ($(
+                $func:ident {
+                    from: $from:ident,
+                    to: $to:ident,
+                } => {
+                    $([$($a:tt)*])*
+                }
+            )*) => ($(
+                $(unsafe {
+                    let a: [$from; 16 / mem::size_of::<$from>()] = [$($a)*];
+                    let a_v = mem::transmute::<_, v128>(a);
+                    let r = mem::transmute::<v128, [$to; 16 / mem::size_of::<$to>()]>($func(a_v));
+
+                    let half = a.len() / 2;
+                    for i in 0..half {
+                        assert_eq!(
+                            (a[2 * i] as $to).wrapping_add((a[2 * i + 1] as $to)),
+                            r[i],
+                            "failed {} + {} != {}",
+                            a[2 * i] as $to,
+                            a[2 * i + 1] as $to,
+                            r[i],
+                        );
+                    }
+                })*
+            )*)
+        }
+        test! {
+            i16x8_extadd_pairwise_i8x16 {
+                from: i8,
+                to: i16,
+            } => {
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                [-1, -2, 3, 100, 124, -38, 33, 87, 92, 108, 22, 8, -43, -128, 22, 0]
+                [-5, -2, 6, 10, 45, -4, 4, -2, 0, 88, 92, -102, -98, 83, 73, 54]
+            }
+            i16x8_extadd_pairwise_u8x16 {
+                from: u8,
+                to: i16,
+            } => {
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                [1, 2, 3, 100, 124, 38, 33, 87, 92, 198, 22, 8, 43, 128, 22, 0]
+                [5, 200, 6, 10, 45, 248, 4, 2, 0, 2, 92, 102, 234, 83, 73, 54]
+            }
+            i32x4_extadd_pairwise_i16x8 {
+                from: i16,
+                to: i32,
+            } => {
+                [0, 0, 0, 0, 0, 0, 0, 0]
+                [-1, 0, i16::MAX, 19931, -2259, 64, 200, 87]
+                [1, 1, i16::MIN, 29391, 105, 2, 100, -2]
+            }
+            i32x4_extadd_pairwise_u16x8 {
+                from: u16,
+                to: i32,
+            } => {
+                [0, 0, 0, 0, 0, 0, 0, 0]
+                [1, 0, u16::MAX, 19931, 2259, 64, 200, 87]
+                [1, 1, 3, 29391, 105, 2, 100, 2]
+            }
+        }
+    }
+}