Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
commit: 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree: 173a775858bd501c378080a10dca74132f05bc50 /library/stdarch/crates/core_arch/src/aarch64
parent: Initial commit. (diff)
download: rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
9 files changed, 31847 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/aarch64/armclang.rs b/library/stdarch/crates/core_arch/src/aarch64/armclang.rs
new file mode 100644
index 000000000..7ad6ae50c
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/armclang.rs
@@ -0,0 +1,23 @@
+//! ARM compiler specific intrinsics
+//!
+//! # References
+//!
+//! - [ARM Compiler v 6.10 - armclang Reference Guide][arm_comp_ref]
+//!
+//! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Inserts a breakpoint instruction.
+///
+/// `VAL` is a compile-time constant integer in range `[0, 65535]`.
+///
+/// The breakpoint instruction inserted is `BRK` on A64.
+#[cfg_attr(test, assert_instr(brk, VAL = 0))]
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+pub unsafe fn __breakpoint<const VAL: i32>() {
+    static_assert_imm16!(VAL);
+    crate::arch::asm!("brk {}", const VAL);
+}
diff --git a/library/stdarch/crates/core_arch/src/aarch64/crc.rs b/library/stdarch/crates/core_arch/src/aarch64/crc.rs
new file mode 100644
index 000000000..6e8128534
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/crc.rs
@@ -0,0 +1,45 @@
+extern "unadjusted" {
+    #[link_name = "llvm.aarch64.crc32x"]
+    fn crc32x_(crc: u32, data: u64) -> u32;
+
+    #[link_name = "llvm.aarch64.crc32cx"]
+    fn crc32cx_(crc: u32, data: u64) -> u32;
+}
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// CRC32 single round checksum for quad words (64 bits).
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg_attr(test, assert_instr(crc32x))]
+pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
+    crc32x_(crc, data)
+}
+
+/// CRC32-C single round checksum for quad words (64 bits).
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg_attr(test, assert_instr(crc32cx))]
+pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
+    crc32cx_(crc, data)
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core_arch::{aarch64::*, simd::*};
+    use std::mem;
+    use stdarch_test::simd_test;
+
+    #[simd_test(enable = "crc")]
+    unsafe fn test_crc32d() {
+        assert_eq!(__crc32d(0, 0), 0);
+        assert_eq!(__crc32d(0, 18446744073709551615), 1147535477);
+    }
+
+    #[simd_test(enable = "crc")]
+    unsafe fn test_crc32cd() {
+        assert_eq!(__crc32cd(0, 0), 0);
+        assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501);
+    }
+}
diff --git a/library/stdarch/crates/core_arch/src/aarch64/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/mod.rs
new file mode 100644
index 000000000..0411fc106
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/mod.rs
@@ -0,0 +1,41 @@
+//! AArch64 intrinsics.
+//!
+//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
+//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
+//!
+//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
+//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
+
+mod v8;
+pub use self::v8::*;
+
+mod neon;
+pub use self::neon::*;
+
+mod tme;
+pub use self::tme::*;
+
+mod crc;
+pub use self::crc::*;
+
+mod prefetch;
+pub use self::prefetch::*;
+
+pub use super::arm_shared::*;
+
+mod armclang;
+
+pub use self::armclang::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Generates the trap instruction `BRK 1`
+#[cfg_attr(test, assert_instr(brk))]
+#[inline]
+pub unsafe fn brk() -> ! {
+    crate::intrinsics::abort()
+}
+
+#[cfg(test)]
+pub(crate) mod test_support;
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
new file mode 100644
index 000000000..74ea2963c
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -0,0 +1,25758 @@
+// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
+//
+// ```
+// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
+// ```
+use super::*;
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v16i8")]
+        fn veor3q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    veor3q_s8_(a, b, c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v8i16")]
+        fn veor3q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    veor3q_s16_(a, b, c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v4i32")]
+        fn veor3q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    veor3q_s32_(a, b, c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3s.v2i64")]
+        fn veor3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    veor3q_s64_(a, b, c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v16i8")]
+        fn veor3q_u8_(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t;
+    }
+    veor3q_u8_(a, b, c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v8i16")]
+        fn veor3q_u16_(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t;
+    }
+    veor3q_u16_(a, b, c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v4i32")]
+        fn veor3q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    veor3q_u32_(a, b, c)
+}
+
+/// Three-way exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.eor3u.v2i64")]
+        fn veor3q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    veor3q_u64_(a, b, c)
+}
+
+/// Absolute difference between the arguments of Floating
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fabd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v1f64")]
+        fn vabd_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vabd_f64_(a, b)
+}
+
+/// Absolute difference between the arguments of Floating
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fabd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v2f64")]
+        fn vabdq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vabdq_f64_(a, b)
+}
+
+/// Floating-point absolute difference
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fabd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabds_f32(a: f32, b: f32) -> f32 {
+    simd_extract(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point absolute difference
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fabd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabdd_f64(a: f64, b: f64) -> f64 {
+    simd_extract(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
+/// Unsigned Absolute difference Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabdl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let c: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    simd_cast(vabd_u8(c, d))
+}
+
+/// Unsigned Absolute difference Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabdl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let c: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let d: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    simd_cast(vabd_u16(c, d))
+}
+
+/// Unsigned Absolute difference Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabdl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let c: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let d: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    simd_cast(vabd_u32(c, d))
+}
+
+/// Signed Absolute difference Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabdl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let c: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let e: uint8x8_t = simd_cast(vabd_s8(c, d));
+    simd_cast(e)
+}
+
+/// Signed Absolute difference Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabdl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let c: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let d: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let e: uint16x4_t = simd_cast(vabd_s16(c, d));
+    simd_cast(e)
+}
+
+/// Signed Absolute difference Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabdl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let c: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let d: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let e: uint32x2_t = simd_cast(vabd_s32(c, d));
+    simd_cast(e)
+}
+
+/// Compare bitwise Equal (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceq_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_eq(a, b)
+}
+
+/// Compare bitwise Equal (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_eq(a, b)
+}
+
+/// Compare bitwise Equal (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceq_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_eq(a, b)
+}
+
+/// Compare bitwise Equal (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_eq(a, b)
+}
+
+/// Compare bitwise Equal (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceq_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
+    simd_eq(a, b)
+}
+
+/// Compare bitwise Equal (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
+    simd_eq(a, b)
+}
+
+/// Floating-point compare equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceq_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_eq(a, b)
+}
+
+/// Floating-point compare equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_eq(a, b)
+}
+
+/// Compare bitwise equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqd_s64(a: i64, b: i64) -> u64 {
+    transmute(vceq_s64(transmute(a), transmute(b)))
+}
+
+/// Compare bitwise equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 {
+    transmute(vceq_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqs_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqd_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_p8(a: poly8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_p8(a: poly8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_p64(a: poly64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_eq(a, transmute(b))
+}
+
+/// Signed compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_p64(a: poly64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Unsigned compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_u8(a: uint8x8_t) -> uint8x8_t {
+    let b: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Unsigned compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_u8(a: uint8x16_t) -> uint8x16_t {
+    let b: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Unsigned compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_u16(a: uint16x4_t) -> uint16x4_t {
+    let b: u16x4 = u16x4::new(0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Unsigned compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_u16(a: uint16x8_t) -> uint16x8_t {
+    let b: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Unsigned compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_u32(a: uint32x2_t) -> uint32x2_t {
+    let b: u32x2 = u32x2::new(0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Unsigned compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_u32(a: uint32x4_t) -> uint32x4_t {
+    let b: u32x4 = u32x4::new(0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Unsigned compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_u64(a: uint64x1_t) -> uint64x1_t {
+    let b: u64x1 = u64x1::new(0);
+    simd_eq(a, transmute(b))
+}
+
+/// Unsigned compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_u64(a: uint64x2_t) -> uint64x2_t {
+    let b: u64x2 = u64x2::new(0, 0);
+    simd_eq(a, transmute(b))
+}
+
+/// Floating-point compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_eq(a, transmute(b))
+}
+
+/// Floating-point compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_eq(a, transmute(b))
+}
+
+/// Floating-point compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_eq(a, transmute(b))
+}
+
+/// Floating-point compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_eq(a, transmute(b))
+}
+
+/// Compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzd_s64(a: i64) -> u64 {
+    transmute(vceqz_s64(transmute(a)))
+}
+
+/// Compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzd_u64(a: u64) -> u64 {
+    transmute(vceqz_u64(transmute(a)))
+}
+
+/// Floating-point compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzs_f32(a: f32) -> u32 {
+    simd_extract(vceqz_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare bitwise equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzd_f64(a: f64) -> u64 {
+    simd_extract(vceqz_f64(vdup_n_f64(a)), 0)
+}
+
+/// Signed compare bitwise Test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    let c: int64x1_t = simd_and(a, b);
+    let d: i64x1 = i64x1::new(0);
+    simd_ne(c, transmute(d))
+}
+
+/// Signed compare bitwise Test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    let c: int64x2_t = simd_and(a, b);
+    let d: i64x2 = i64x2::new(0, 0);
+    simd_ne(c, transmute(d))
+}
+
+/// Signed compare bitwise Test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
+    let c: poly64x1_t = simd_and(a, b);
+    let d: i64x1 = i64x1::new(0);
+    simd_ne(c, transmute(d))
+}
+
+/// Signed compare bitwise Test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
+    let c: poly64x2_t = simd_and(a, b);
+    let d: i64x2 = i64x2::new(0, 0);
+    simd_ne(c, transmute(d))
+}
+
+/// Unsigned compare bitwise Test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    let c: uint64x1_t = simd_and(a, b);
+    let d: u64x1 = u64x1::new(0);
+    simd_ne(c, transmute(d))
+}
+
+/// Unsigned compare bitwise Test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    let c: uint64x2_t = simd_and(a, b);
+    let d: u64x2 = u64x2::new(0, 0);
+    simd_ne(c, transmute(d))
+}
+
+/// Compare bitwise test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtstd_s64(a: i64, b: i64) -> u64 {
+    transmute(vtst_s64(transmute(a), transmute(b)))
+}
+
+/// Compare bitwise test bits nonzero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtstd_u64(a: u64, b: u64) -> u64 {
+    transmute(vtst_u64(transmute(a), transmute(b)))
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqadds_s32(a: i32, b: u32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.i32")]
+        fn vuqadds_s32_(a: i32, b: u32) -> i32;
+    }
+    vuqadds_s32_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqaddd_s64(a: i64, b: u64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.suqadd.i64")]
+        fn vuqaddd_s64_(a: i64, b: u64) -> i64;
+    }
+    vuqaddd_s64_(a, b)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqaddb_s8(a: i8, b: u8) -> i8 {
+    simd_extract(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0)
+}
+
+/// Signed saturating accumulate of unsigned value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqaddh_s16(a: i16, b: u16) -> i16 {
+    simd_extract(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0)
+}
+
+/// Floating-point absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fabs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabs_f64(a: float64x1_t) -> float64x1_t {
+    simd_fabs(a)
+}
+
+/// Floating-point absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fabs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabsq_f64(a: float64x2_t) -> float64x2_t {
+    simd_fabs(a)
+}
+
+/// Compare signed greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_gt(a, b)
+}
+
+/// Compare signed greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_gt(a, b)
+}
+
+/// Compare unsigned highe
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhi))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_gt(a, b)
+}
+
+/// Compare unsigned highe
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhi))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_gt(a, b)
+}
+
+/// Floating-point compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_gt(a, b)
+}
+
+/// Floating-point compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_gt(a, b)
+}
+
+/// Compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtd_s64(a: i64, b: i64) -> u64 {
+    transmute(vcgt_s64(transmute(a), transmute(b)))
+}
+
+/// Compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 {
+    transmute(vcgt_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgts_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtd_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
+/// Compare signed less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_lt(a, b)
+}
+
+/// Compare signed less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_lt(a, b)
+}
+
+/// Compare unsigned less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhi))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_lt(a, b)
+}
+
+/// Compare unsigned less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhi))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_lt(a, b)
+}
+
+/// Floating-point compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_lt(a, b)
+}
+
+/// Floating-point compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_lt(a, b)
+}
+
+/// Compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltd_s64(a: i64, b: i64) -> u64 {
+    transmute(vclt_s64(transmute(a), transmute(b)))
+}
+
+/// Compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltd_u64(a: u64, b: u64) -> u64 {
+    transmute(vclt_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclts_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltd_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
+/// Compare signed less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcle_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_le(a, b)
+}
+
+/// Compare signed less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_le(a, b)
+}
+
+/// Compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcged_s64(a: i64, b: i64) -> u64 {
+    transmute(vcge_s64(transmute(a), transmute(b)))
+}
+
+/// Compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 {
+    transmute(vcge_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcges_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcged_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
+/// Compare unsigned less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcle_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_le(a, b)
+}
+
+/// Compare unsigned less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_le(a, b)
+}
+
+/// Floating-point compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcle_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_le(a, b)
+}
+
+/// Floating-point compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_le(a, b)
+}
+
+/// Compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcled_s64(a: i64, b: i64) -> u64 {
+    transmute(vcle_s64(transmute(a), transmute(b)))
+}
+
+/// Compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 {
+    transmute(vcle_u64(transmute(a), transmute(b)))
+}
+
+/// Floating-point compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcles_f32(a: f32, b: f32) -> u32 {
+    simd_extract(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+/// Floating-point compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcled_f64(a: f64, b: f64) -> u64 {
+    simd_extract(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+}
+
+/// Compare signed greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_ge(a, b)
+}
+
+/// Compare signed greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_ge(a, b)
+}
+
+/// Compare unsigned greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcge_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_ge(a, b)
+}
+
+/// Compare unsigned greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_ge(a, b)
+}
+
+/// Floating-point compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcge_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_ge(a, b)
+}
+
+/// Floating-point compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_ge(a, b)
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgez_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ge(a, transmute(b))
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ge(a, transmute(b))
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgez_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_ge(a, transmute(b))
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ge(a, transmute(b))
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgez_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_ge(a, transmute(b))
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_ge(a, transmute(b))
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgez_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_ge(a, transmute(b))
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_ge(a, transmute(b))
+}
+
+/// Floating-point compare greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgez_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_ge(a, transmute(b))
+}
+
+/// Floating-point compare greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_ge(a, transmute(b))
+}
+
+/// Floating-point compare greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgez_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_ge(a, transmute(b))
+}
+
+/// Floating-point compare greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_ge(a, transmute(b))
+}
+
+/// Compare signed greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(eor))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezd_s64(a: i64) -> u64 {
+    transmute(vcgez_s64(transmute(a)))
+}
+
+/// Floating-point compare greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezs_f32(a: f32) -> u32 {
+    simd_extract(vcgez_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare greater than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgezd_f64(a: f64) -> u64 {
+    simd_extract(vcgez_f64(vdup_n_f64(a)), 0)
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtz_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_gt(a, transmute(b))
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_gt(a, transmute(b))
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtz_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_gt(a, transmute(b))
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_gt(a, transmute(b))
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtz_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_gt(a, transmute(b))
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_gt(a, transmute(b))
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtz_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_gt(a, transmute(b))
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_gt(a, transmute(b))
+}
+
+/// Floating-point compare greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtz_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_gt(a, transmute(b))
+}
+
+/// Floating-point compare greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_gt(a, transmute(b))
+}
+
+/// Floating-point compare greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtz_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_gt(a, transmute(b))
+}
+
+/// Floating-point compare greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_gt(a, transmute(b))
+}
+
+/// Compare signed greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzd_s64(a: i64) -> u64 {
+    transmute(vcgtz_s64(transmute(a)))
+}
+
+/// Floating-point compare greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzs_f32(a: f32) -> u32 {
+    simd_extract(vcgtz_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare greater than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcgtzd_f64(a: f64) -> u64 {
+    simd_extract(vcgtz_f64(vdup_n_f64(a)), 0)
+}
+
+/// Compare signed less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclez_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_le(a, transmute(b))
+}
+
+/// Compare signed less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_le(a, transmute(b))
+}
+
+/// Compare signed less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclez_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_le(a, transmute(b))
+}
+
+/// Compare signed less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_le(a, transmute(b))
+}
+
+/// Compare signed less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclez_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_le(a, transmute(b))
+}
+
+/// Compare signed less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_le(a, transmute(b))
+}
+
+/// Compare signed less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclez_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_le(a, transmute(b))
+}
+
+/// Compare signed less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_le(a, transmute(b))
+}
+
+/// Floating-point compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmle))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclez_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_le(a, transmute(b))
+}
+
+/// Floating-point compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmle))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_le(a, transmute(b))
+}
+
+/// Floating-point compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmle))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclez_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_le(a, transmute(b))
+}
+
+/// Floating-point compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmle))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_le(a, transmute(b))
+}
+
+/// Compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezd_s64(a: i64) -> u64 {
+    transmute(vclez_s64(transmute(a)))
+}
+
+/// Floating-point compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezs_f32(a: f32) -> u32 {
+    simd_extract(vclez_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare less than or equal to zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vclezd_f64(a: f64) -> u64 {
+    simd_extract(vclez_f64(vdup_n_f64(a)), 0)
+}
+
+/// Compare signed less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_lt(a, transmute(b))
+}
+
+/// Compare signed less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_lt(a, transmute(b))
+}
+
+/// Compare signed less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_lt(a, transmute(b))
+}
+
+/// Compare signed less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_lt(a, transmute(b))
+}
+
+/// Compare signed less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_lt(a, transmute(b))
+}
+
+/// Compare signed less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_lt(a, transmute(b))
+}
+
+/// Compare signed less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_lt(a, transmute(b))
+}
+
+/// Compare signed less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_lt(a, transmute(b))
+}
+
+/// Floating-point compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltz_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_lt(a, transmute(b))
+}
+
+/// Floating-point compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_lt(a, transmute(b))
+}
+
+/// Floating-point compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltz_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_lt(a, transmute(b))
+}
+
+/// Floating-point compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmlt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_lt(a, transmute(b))
+}
+
+/// Compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(asr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzd_s64(a: i64) -> u64 {
+    transmute(vcltz_s64(transmute(a)))
+}
+
+/// Floating-point compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzs_f32(a: f32) -> u32 {
+    simd_extract(vcltz_f32(vdup_n_f32(a)), 0)
+}
+
+/// Floating-point compare less than zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcltzd_f64(a: f64) -> u64 {
+    simd_extract(vcltz_f64(vdup_n_f64(a)), 0)
+}
+
+/// Floating-point absolute compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64")]
+        fn vcagt_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
+    }
+    vcagt_f64_(a, b)
+}
+
+/// Floating-point absolute compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64")]
+        fn vcagtq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
+    }
+    vcagtq_f64_(a, b)
+}
+
+/// Floating-point absolute compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.i32.f32")]
+        fn vcagts_f32_(a: f32, b: f32) -> u32;
+    }
+    vcagts_f32_(a, b)
+}
+
+/// Floating-point absolute compare greater than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcagtd_f64(a: f64, b: f64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.i64.f64")]
+        fn vcagtd_f64_(a: f64, b: f64) -> u64;
+    }
+    vcagtd_f64_(a, b)
+}
+
+/// Floating-point absolute compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v1i64.v1f64")]
+        fn vcage_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
+    }
+    vcage_f64_(a, b)
+}
+
+/// Floating-point absolute compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i64.v2f64")]
+        fn vcageq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
+    }
+    vcageq_f64_(a, b)
+}
+
+/// Floating-point absolute compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.i32.f32")]
+        fn vcages_f32_(a: f32, b: f32) -> u32;
+    }
+    vcages_f32_(a, b)
+}
+
+/// Floating-point absolute compare greater than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcaged_f64(a: f64, b: f64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.i64.f64")]
+        fn vcaged_f64_(a: f64, b: f64) -> u64;
+    }
+    vcaged_f64_(a, b)
+}
+
+/// Floating-point absolute compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcalt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    vcagt_f64(b, a)
+}
+
+/// Floating-point absolute compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcaltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    vcagtq_f64(b, a)
+}
+
+/// Floating-point absolute compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcalts_f32(a: f32, b: f32) -> u32 {
+    vcagts_f32(b, a)
+}
+
+/// Floating-point absolute compare less than
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facgt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcaltd_f64(a: f64, b: f64) -> u64 {
+    vcagtd_f64(b, a)
+}
+
+/// Floating-point absolute compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcale_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    vcage_f64(b, a)
+}
+
+/// Floating-point absolute compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    vcageq_f64(b, a)
+}
+
+/// Floating-point absolute compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcales_f32(a: f32, b: f32) -> u32 {
+    vcages_f32(b, a)
+}
+
+/// Floating-point absolute compare less than or equal
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(facge))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcaled_f64(a: f64, b: f64) -> u64 {
+    vcaged_f64(b, a)
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm3!(LANE2);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_imm4!(LANE1);
+    static_assert_imm4!(LANE2);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        2 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        3 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        4 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        5 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        6 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        7 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
+        8 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
+        9 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
+        10 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
+        11 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
+        12 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
+        13 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
+        14 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
+        15 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm2!(LANE2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm3!(LANE2);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm1!(LANE2);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm2!(LANE2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm1!(LANE2);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm3!(LANE2);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_imm4!(LANE1);
+    static_assert_imm4!(LANE2);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        2 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        3 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        4 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        5 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        6 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        7 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
+        8 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
+        9 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
+        10 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
+        11 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
+        12 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
+        13 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
+        14 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
+        15 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm2!(LANE2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm3!(LANE2);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm1!(LANE2);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm2!(LANE2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm1!(LANE2);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm3!(LANE2);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_imm4!(LANE1);
+    static_assert_imm4!(LANE2);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        2 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        3 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        4 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        5 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        6 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        7 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
+        8 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
+        9 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
+        10 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
+        11 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
+        12 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
+        13 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
+        14 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
+        15 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm2!(LANE2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm3!(LANE2);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm1!(LANE2);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm1!(LANE2);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm2!(LANE2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm1!(LANE2);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x16_t) -> int8x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm4!(LANE2);
+    let a: int8x16_t = simd_shuffle16!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm3!(LANE2);
+    let a: int16x8_t = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm2!(LANE2);
+    let a: int32x4_t = simd_shuffle4!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(a: uint8x8_t, b: uint8x16_t) -> uint8x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm4!(LANE2);
+    let a: uint8x16_t = simd_shuffle16!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm3!(LANE2);
+    let a: uint16x8_t = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm2!(LANE2);
+    let a: uint32x4_t = simd_shuffle4!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(a: poly8x8_t, b: poly8x16_t) -> poly8x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm4!(LANE2);
+    let a: poly8x16_t = simd_shuffle16!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(a: poly16x4_t, b: poly16x8_t) -> poly16x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm3!(LANE2);
+    let a: poly16x8_t = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert_imm2!(LANE2);
+    let a: float32x4_t = simd_shuffle4!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x16_t, b: int8x8_t) -> int8x16_t {
+    static_assert_imm4!(LANE1);
+    static_assert_imm3!(LANE2);
+    let b: int8x16_t = simd_shuffle16!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        2 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        3 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        4 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        5 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        6 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        7 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
+        8 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
+        9 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
+        10 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
+        11 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
+        12 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
+        13 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
+        14 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
+        15 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm2!(LANE2);
+    let b: int16x8_t = simd_shuffle8!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm1!(LANE2);
+    let b: int32x4_t = simd_shuffle4!(b, b, [0, 1, 2, 3]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert_imm4!(LANE1);
+    static_assert_imm3!(LANE2);
+    let b: uint8x16_t = simd_shuffle16!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        2 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        3 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        4 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        5 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        6 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        7 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
+        8 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
+        9 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
+        10 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
+        11 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
+        12 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
+        13 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
+        14 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
+        15 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm2!(LANE2);
+    let b: uint16x8_t = simd_shuffle8!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm1!(LANE2);
+    let b: uint32x4_t = simd_shuffle4!(b, b, [0, 1, 2, 3]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(a: poly8x16_t, b: poly8x8_t) -> poly8x16_t {
+    static_assert_imm4!(LANE1);
+    static_assert_imm3!(LANE2);
+    let b: poly8x16_t = simd_shuffle16!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        2 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        3 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        4 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        5 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        6 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        7 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
+        8 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
+        9 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
+        10 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
+        11 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
+        12 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
+        13 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
+        14 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
+        15 => simd_shuffle16!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(a: poly16x8_t, b: poly16x4_t) -> poly16x8_t {
+    static_assert_imm3!(LANE1);
+    static_assert_imm2!(LANE2);
+    let b: poly16x8_t = simd_shuffle8!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle8!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t, b: int64x1_t) -> int64x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert!(LANE2 : i32 where LANE2 == 0);
+    let b: int64x2_t = simd_shuffle2!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t, b: uint64x1_t) -> uint64x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert!(LANE2 : i32 where LANE2 == 0);
+    let b: uint64x2_t = simd_shuffle2!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(a: poly64x2_t, b: poly64x1_t) -> poly64x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert!(LANE2 : i32 where LANE2 == 0);
+    let b: poly64x2_t = simd_shuffle2!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_imm2!(LANE1);
+    static_assert_imm1!(LANE2);
+    let b: float32x4_t = simd_shuffle4!(b, b, [0, 1, 2, 3]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle4!(a, b, <const LANE1: i32, const LANE2: i32> [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
+    static_assert_imm1!(LANE1);
+    static_assert!(LANE2 : i32 where LANE2 == 0);
+    let b: float64x2_t = simd_shuffle2!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle2!(a, b, <const LANE1: i32, const LANE2: i32> [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcreate_f64(a: u64) -> float64x1_t {
+    transmute(a)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f64_s64(a: int64x1_t) -> float64x1_t {
+    simd_cast(a)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_f64_s64(a: int64x2_t) -> float64x2_t {
+    simd_cast(a)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f64_u64(a: uint64x1_t) -> float64x1_t {
+    simd_cast(a)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    simd_cast(a)
+}
+
+/// Floating-point convert to higher precision long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t {
+    simd_cast(a)
+}
+
+/// Floating-point convert to higher precision long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
+    let b: float32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    simd_cast(b)
+}
+
+/// Floating-point convert to lower precision narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
+    simd_cast(a)
+}
+
+/// Floating-point convert to lower precision narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
+    simd_shuffle4!(a, simd_cast(b), [0, 1, 2, 3])
+}
+
+/// Floating-point convert to lower precision narrow, rounding to odd
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtxn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64")]
+        fn vcvtx_f32_f64_(a: float64x2_t) -> float32x2_t;
+    }
+    vcvtx_f32_f64_(a)
+}
+
+/// Floating-point convert to lower precision narrow, rounding to odd
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtxn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 {
+    simd_extract(vcvtx_f32_f64(vdupq_n_f64(a)), 0)
+}
+
+/// Floating-point convert to lower precision narrow, rounding to odd
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtxn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
+    simd_shuffle4!(a, vcvtx_f32_f64(b), [0, 1, 2, 3])
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64")]
+        fn vcvt_n_f64_s64_(a: int64x1_t, n: i32) -> float64x1_t;
+    }
+    vcvt_n_f64_s64_(a, N)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64")]
+        fn vcvtq_n_f64_s64_(a: int64x2_t, n: i32) -> float64x2_t;
+    }
+    vcvtq_n_f64_s64_(a, N)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32")]
+        fn vcvts_n_f32_s32_(a: i32, n: i32) -> f32;
+    }
+    vcvts_n_f32_s32_(a, N)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64")]
+        fn vcvtd_n_f64_s64_(a: i64, n: i32) -> f64;
+    }
+    vcvtd_n_f64_s64_(a, N)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64")]
+        fn vcvt_n_f64_u64_(a: uint64x1_t, n: i32) -> float64x1_t;
+    }
+    vcvt_n_f64_u64_(a, N)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64")]
+        fn vcvtq_n_f64_u64_(a: uint64x2_t, n: i32) -> float64x2_t;
+    }
+    vcvtq_n_f64_u64_(a, N)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32")]
+        fn vcvts_n_f32_u32_(a: u32, n: i32) -> f32;
+    }
+    vcvts_n_f32_u32_(a, N)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64")]
+        fn vcvtd_n_f64_u64_(a: u64, n: i32) -> f64;
+    }
+    vcvtd_n_f64_u64_(a, N)
+}
+
+/// Floating-point convert to fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64")]
+        fn vcvt_n_s64_f64_(a: float64x1_t, n: i32) -> int64x1_t;
+    }
+    vcvt_n_s64_f64_(a, N)
+}
+
+/// Floating-point convert to fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64")]
+        fn vcvtq_n_s64_f64_(a: float64x2_t, n: i32) -> int64x2_t;
+    }
+    vcvtq_n_s64_f64_(a, N)
+}
+
+/// Floating-point convert to fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32")]
+        fn vcvts_n_s32_f32_(a: f32, n: i32) -> i32;
+    }
+    vcvts_n_s32_f32_(a, N)
+}
+
+/// Floating-point convert to fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64")]
+        fn vcvtd_n_s64_f64_(a: f64, n: i32) -> i64;
+    }
+    vcvtd_n_s64_f64_(a, N)
+}
+
+/// Floating-point convert to fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64")]
+        fn vcvt_n_u64_f64_(a: float64x1_t, n: i32) -> uint64x1_t;
+    }
+    vcvt_n_u64_f64_(a, N)
+}
+
+/// Floating-point convert to fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64")]
+        fn vcvtq_n_u64_f64_(a: float64x2_t, n: i32) -> uint64x2_t;
+    }
+    vcvtq_n_u64_f64_(a, N)
+}
+
+/// Floating-point convert to fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32")]
+        fn vcvts_n_u32_f32_(a: f32, n: i32) -> u32;
+    }
+    vcvts_n_u32_f32_(a, N)
+}
+
+/// Floating-point convert to fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_n_u64_f64<const N: i32>(a: f64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64")]
+        fn vcvtd_n_u64_f64_(a: f64, n: i32) -> u64;
+    }
+    vcvtd_n_u64_f64_(a, N)
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_f32_s32(a: i32) -> f32 {
+    a as f32
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_f64_s64(a: i64) -> f64 {
+    a as f64
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_f32_u32(a: u32) -> f32 {
+    a as f32
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_f64_u64(a: u64) -> f64 {
+    a as f64
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_s32_f32(a: f32) -> i32 {
+    a as i32
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_s64_f64(a: f64) -> i64 {
+    a as i64
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_u32_f32(a: f32) -> u32 {
+    a as u32
+}
+
+/// Fixed-point convert to floating-point
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
+    a as u64
+}
+
+/// Floating-point convert to signed fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v1i64.v1f64")]
+        fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t;
+    }
+    vcvt_s64_f64_(a)
+}
+
+/// Floating-point convert to signed fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i64.v2f64")]
+        fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t;
+    }
+    vcvtq_s64_f64_(a)
+}
+
+/// Floating-point convert to unsigned fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v1i64.v1f64")]
+        fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t;
+    }
+    vcvt_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned fixed-point, rounding toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i64.v2f64")]
+        fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+    }
+    vcvtq_u64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32")]
+        fn vcvta_s32_f32_(a: float32x2_t) -> int32x2_t;
+    }
+    vcvta_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32")]
+        fn vcvtaq_s32_f32_(a: float32x4_t) -> int32x4_t;
+    }
+    vcvtaq_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64")]
+        fn vcvta_s64_f64_(a: float64x1_t) -> int64x1_t;
+    }
+    vcvta_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64")]
+        fn vcvtaq_s64_f64_(a: float64x2_t) -> int64x2_t;
+    }
+    vcvtaq_s64_f64_(a)
+}
+
+/// Floating-point convert to integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.i32.f32")]
+        fn vcvtas_s32_f32_(a: f32) -> i32;
+    }
+    vcvtas_s32_f32_(a)
+}
+
+/// Floating-point convert to integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.i64.f64")]
+        fn vcvtad_s64_f64_(a: f64) -> i64;
+    }
+    vcvtad_s64_f64_(a)
+}
+
+/// Floating-point convert to integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.i32.f32")]
+        fn vcvtas_u32_f32_(a: f32) -> u32;
+    }
+    vcvtas_u32_f32_(a)
+}
+
+/// Floating-point convert to integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.i64.f64")]
+        fn vcvtad_u64_f64_(a: f64) -> u64;
+    }
+    vcvtad_u64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32")]
+        fn vcvtn_s32_f32_(a: float32x2_t) -> int32x2_t;
+    }
+    vcvtn_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32")]
+        fn vcvtnq_s32_f32_(a: float32x4_t) -> int32x4_t;
+    }
+    vcvtnq_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64")]
+        fn vcvtn_s64_f64_(a: float64x1_t) -> int64x1_t;
+    }
+    vcvtn_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64")]
+        fn vcvtnq_s64_f64_(a: float64x2_t) -> int64x2_t;
+    }
+    vcvtnq_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.i32.f32")]
+        fn vcvtns_s32_f32_(a: f32) -> i32;
+    }
+    vcvtns_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.i64.f64")]
+        fn vcvtnd_s64_f64_(a: f64) -> i64;
+    }
+    vcvtnd_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32")]
+        fn vcvtm_s32_f32_(a: float32x2_t) -> int32x2_t;
+    }
+    vcvtm_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32")]
+        fn vcvtmq_s32_f32_(a: float32x4_t) -> int32x4_t;
+    }
+    vcvtmq_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64")]
+        fn vcvtm_s64_f64_(a: float64x1_t) -> int64x1_t;
+    }
+    vcvtm_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64")]
+        fn vcvtmq_s64_f64_(a: float64x2_t) -> int64x2_t;
+    }
+    vcvtmq_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.i32.f32")]
+        fn vcvtms_s32_f32_(a: f32) -> i32;
+    }
+    vcvtms_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.i64.f64")]
+        fn vcvtmd_s64_f64_(a: f64) -> i64;
+    }
+    vcvtmd_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32")]
+        fn vcvtp_s32_f32_(a: float32x2_t) -> int32x2_t;
+    }
+    vcvtp_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32")]
+        fn vcvtpq_s32_f32_(a: float32x4_t) -> int32x4_t;
+    }
+    vcvtpq_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64")]
+        fn vcvtp_s64_f64_(a: float64x1_t) -> int64x1_t;
+    }
+    vcvtp_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64")]
+        fn vcvtpq_s64_f64_(a: float64x2_t) -> int64x2_t;
+    }
+    vcvtpq_s64_f64_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.i32.f32")]
+        fn vcvtps_s32_f32_(a: f32) -> i32;
+    }
+    vcvtps_s32_f32_(a)
+}
+
+/// Floating-point convert to signed integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.i64.f64")]
+        fn vcvtpd_s64_f64_(a: f64) -> i64;
+    }
+    vcvtpd_s64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32")]
+        fn vcvta_u32_f32_(a: float32x2_t) -> uint32x2_t;
+    }
+    vcvta_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32")]
+        fn vcvtaq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+    }
+    vcvtaq_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64")]
+        fn vcvta_u64_f64_(a: float64x1_t) -> uint64x1_t;
+    }
+    vcvta_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64")]
+        fn vcvtaq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+    }
+    vcvtaq_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32")]
+        fn vcvtn_u32_f32_(a: float32x2_t) -> uint32x2_t;
+    }
+    vcvtn_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32")]
+        fn vcvtnq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+    }
+    vcvtnq_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64")]
+        fn vcvtn_u64_f64_(a: float64x1_t) -> uint64x1_t;
+    }
+    vcvtn_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64")]
+        fn vcvtnq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+    }
+    vcvtnq_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.i32.f32")]
+        fn vcvtns_u32_f32_(a: f32) -> u32;
+    }
+    vcvtns_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.i64.f64")]
+        fn vcvtnd_u64_f64_(a: f64) -> u64;
+    }
+    vcvtnd_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32")]
+        fn vcvtm_u32_f32_(a: float32x2_t) -> uint32x2_t;
+    }
+    vcvtm_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32")]
+        fn vcvtmq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+    }
+    vcvtmq_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64")]
+        fn vcvtm_u64_f64_(a: float64x1_t) -> uint64x1_t;
+    }
+    vcvtm_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64")]
+        fn vcvtmq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+    }
+    vcvtmq_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.i32.f32")]
+        fn vcvtms_u32_f32_(a: f32) -> u32;
+    }
+    vcvtms_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.i64.f64")]
+        fn vcvtmd_u64_f64_(a: f64) -> u64;
+    }
+    vcvtmd_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32")]
+        fn vcvtp_u32_f32_(a: float32x2_t) -> uint32x2_t;
+    }
+    vcvtp_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32")]
+        fn vcvtpq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+    }
+    vcvtpq_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64")]
+        fn vcvtp_u64_f64_(a: float64x1_t) -> uint64x1_t;
+    }
+    vcvtp_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64")]
+        fn vcvtpq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+    }
+    vcvtpq_u64_f64_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.i32.f32")]
+        fn vcvtps_u32_f32_(a: f32) -> u32;
+    }
+    vcvtps_u32_f32_(a)
+}
+
+/// Floating-point convert to unsigned integer, rounding toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.i64.f64")]
+        fn vcvtpd_u64_f64_(a: f64) -> u64;
+    }
+    vcvtpd_u64_f64_(a)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
+    static_assert_imm1!(N);
+    simd_shuffle2!(a, a, <const N: i32> [N as u32, N as u32])
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x2_t {
+    static_assert!(N : i32 where N == 0);
+    simd_shuffle2!(a, a, <const N: i32> [N as u32, N as u32])
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_laneq_f64<const N: i32>(a: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(N);
+    simd_shuffle2!(a, a, <const N: i32> [N as u32, N as u32])
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_lane_f64<const N: i32>(a: float64x1_t) -> float64x2_t {
+    static_assert!(N : i32 where N == 0);
+    simd_shuffle2!(a, a, <const N: i32> [N as u32, N as u32])
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x1_t {
+    static_assert!(N : i32 where N == 0);
+    a
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_lane_f64<const N: i32>(a: float64x1_t) -> float64x1_t {
+    static_assert!(N : i32 where N == 0);
+    a
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
+    static_assert_imm1!(N);
+    transmute::<u64, _>(simd_extract(a, N as u32))
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
+    static_assert_imm1!(N);
+    transmute::<f64, _>(simd_extract(a, N as u32))
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
+    static_assert_imm3!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
+    static_assert_imm4!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
+    static_assert_imm2!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
+    static_assert_imm3!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
+    static_assert_imm1!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
+    static_assert_imm2!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
+    static_assert!(N : i32 where N == 0);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
+    static_assert_imm1!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
+    static_assert_imm3!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
+    static_assert_imm4!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
+    static_assert_imm2!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
+    static_assert_imm3!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
+    static_assert_imm1!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
+    static_assert_imm2!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
+    static_assert!(N : i32 where N == 0);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
+    static_assert_imm1!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
+    static_assert_imm3!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
+    static_assert_imm4!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 {
+    static_assert_imm2!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
+    static_assert_imm3!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
+    static_assert_imm1!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
+    static_assert_imm2!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
+    static_assert!(N : i32 where N == 0);
+    simd_extract(a, N as u32)
+}
+
+/// Set all vector lanes to the same value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
+    static_assert_imm1!(N);
+    simd_extract(a, N as u32)
+}
+
+/// Extract vector from pair of vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ext, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert_imm1!(N);
+    match N & 0b1 {
+        0 => simd_shuffle2!(a, b, [0, 1]),
+        1 => simd_shuffle2!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Extract vector from pair of vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ext, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(N);
+    match N & 0b1 {
+        0 => simd_shuffle2!(a, b, [0, 1]),
+        1 => simd_shuffle2!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
+}
+
+/// Floating-point multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmla_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    simd_add(a, simd_mul(b, c))
+}
+
+/// Floating-point multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    simd_add(a, simd_mul(b, c))
+}
+
+/// Signed multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: int8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlal_s8(a, b, c)
+}
+
+/// Signed multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let c: int16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
+    vmlal_s16(a, b, c)
+}
+
+/// Signed multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let c: int32x2_t = simd_shuffle2!(c, c, [2, 3]);
+    vmlal_s32(a, b, c)
+}
+
+/// Unsigned multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: uint8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlal_u8(a, b, c)
+}
+
+/// Unsigned multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let c: uint16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
+    vmlal_u16(a, b, c)
+}
+
+/// Unsigned multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let c: uint32x2_t = simd_shuffle2!(c, c, [2, 3]);
+    vmlal_u32(a, b, c)
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vmlal_high_s16(a, b, vdupq_n_s16(c))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vmlal_high_s32(a, b, vdupq_n_s32(c))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
+    vmlal_high_u16(a, b, vdupq_n_u16(c))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
+    vmlal_high_u32(a, b, vdupq_n_u32(c))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    vmlal_high_s16(a, b, simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(LANE);
+    vmlal_high_s16(a, b, simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(LANE);
+    vmlal_high_s32(a, b, simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(LANE);
+    vmlal_high_s32(a, b, simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t {
+    static_assert_imm2!(LANE);
+    vmlal_high_u16(a, b, simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    static_assert_imm3!(LANE);
+    vmlal_high_u16(a, b, simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t {
+    static_assert_imm1!(LANE);
+    vmlal_high_u32(a, b, simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    static_assert_imm2!(LANE);
+    vmlal_high_u32(a, b, simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply-subtract from accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmls_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    simd_sub(a, simd_mul(b, c))
+}
+
+/// Floating-point multiply-subtract from accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    simd_sub(a, simd_mul(b, c))
+}
+
+/// Signed multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: int8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlsl_s8(a, b, c)
+}
+
+/// Signed multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let c: int16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
+    vmlsl_s16(a, b, c)
+}
+
+/// Signed multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let c: int32x2_t = simd_shuffle2!(c, c, [2, 3]);
+    vmlsl_s32(a, b, c)
+}
+
+/// Unsigned multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: uint8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlsl_u8(a, b, c)
+}
+
+/// Unsigned multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let c: uint16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
+    vmlsl_u16(a, b, c)
+}
+
+/// Unsigned multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let c: uint32x2_t = simd_shuffle2!(c, c, [2, 3]);
+    vmlsl_u32(a, b, c)
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vmlsl_high_s16(a, b, vdupq_n_s16(c))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vmlsl_high_s32(a, b, vdupq_n_s32(c))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
+    vmlsl_high_u16(a, b, vdupq_n_u16(c))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
+    vmlsl_high_u32(a, b, vdupq_n_u32(c))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    vmlsl_high_s16(a, b, simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(LANE);
+    vmlsl_high_s16(a, b, simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(LANE);
+    vmlsl_high_s32(a, b, simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(LANE);
+    vmlsl_high_s32(a, b, simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t {
+    static_assert_imm2!(LANE);
+    vmlsl_high_u16(a, b, simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    static_assert_imm3!(LANE);
+    vmlsl_high_u16(a, b, simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t {
+    static_assert_imm1!(LANE);
+    vmlsl_high_u32(a, b, simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    static_assert_imm2!(LANE);
+    vmlsl_high_u32(a, b, simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    let c: int8x8_t = simd_cast(b);
+    simd_shuffle16!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    let c: int16x4_t = simd_cast(b);
+    simd_shuffle8!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    let c: int32x2_t = simd_cast(b);
+    simd_shuffle4!(a, c, [0, 1, 2, 3])
+}
+
+/// Extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    let c: uint8x8_t = simd_cast(b);
+    simd_shuffle16!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    let c: uint16x4_t = simd_cast(b);
+    simd_shuffle8!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(xtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    let c: uint32x2_t = simd_cast(b);
+    simd_shuffle4!(a, c, [0, 1, 2, 3])
+}
+
+/// Negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(neg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vneg_s64(a: int64x1_t) -> int64x1_t {
+    simd_neg(a)
+}
+
+/// Negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(neg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
+    simd_neg(a)
+}
+
+/// Negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(neg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vnegd_s64(a: i64) -> i64 {
+    a.wrapping_neg()
+}
+
+/// Negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fneg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vneg_f64(a: float64x1_t) -> float64x1_t {
+    simd_neg(a)
+}
+
+/// Negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fneg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t {
+    simd_neg(a)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v1i64")]
+        fn vqneg_s64_(a: int64x1_t) -> int64x1_t;
+    }
+    vqneg_s64_(a)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i64")]
+        fn vqnegq_s64_(a: int64x2_t) -> int64x2_t;
+    }
+    vqnegq_s64_(a)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqnegb_s8(a: i8) -> i8 {
+    simd_extract(vqneg_s8(vdup_n_s8(a)), 0)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqnegh_s16(a: i16) -> i16 {
+    simd_extract(vqneg_s16(vdup_n_s16(a)), 0)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqnegs_s32(a: i32) -> i32 {
+    simd_extract(vqneg_s32(vdup_n_s32(a)), 0)
+}
+
+/// Signed saturating negate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqneg))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqnegd_s64(a: i64) -> i64 {
+    simd_extract(vqneg_s64(vdup_n_s64(a)), 0)
+}
+
+/// Saturating subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqsubb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract(vqsub_s8(a, b), 0)
+}
+
+/// Saturating subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqsub_s16(a, b), 0)
+}
+
+/// Saturating subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqsubb_u8(a: u8, b: u8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: uint8x8_t = vdup_n_u8(b);
+    simd_extract(vqsub_u8(a, b), 0)
+}
+
+/// Saturating subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: uint16x4_t = vdup_n_u16(b);
+    simd_extract(vqsub_u16(a, b), 0)
+}
+
+/// Saturating subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i32")]
+        fn vqsubs_u32_(a: u32, b: u32) -> u32;
+    }
+    vqsubs_u32_(a, b)
+}
+
+/// Saturating subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i64")]
+        fn vqsubd_u64_(a: u64, b: u64) -> u64;
+    }
+    vqsubd_u64_(a, b)
+}
+
+/// Saturating subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i32")]
+        fn vqsubs_s32_(a: i32, b: i32) -> i32;
+    }
+    vqsubs_s32_(a, b)
+}
+
+/// Saturating subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i64")]
+        fn vqsubd_s64_(a: i64, b: i64) -> i64;
+    }
+    vqsubd_s64_(a, b)
+}
+
+/// Reverse bit order
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rbit))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v8i8")]
+        fn vrbit_s8_(a: int8x8_t) -> int8x8_t;
+    }
+    vrbit_s8_(a)
+}
+
+/// Reverse bit order
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rbit))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v16i8")]
+        fn vrbitq_s8_(a: int8x16_t) -> int8x16_t;
+    }
+    vrbitq_s8_(a)
+}
+
+/// Reverse bit order
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rbit))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
+    transmute(vrbit_s8(transmute(a)))
+}
+
+/// Reverse bit order
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rbit))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
+    transmute(vrbitq_s8(transmute(a)))
+}
+
+/// Reverse bit order
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rbit))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
+    transmute(vrbit_s8(transmute(a)))
+}
+
+/// Reverse bit order
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rbit))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
+    transmute(vrbitq_s8(transmute(a)))
+}
+
+/// Floating-point round to integral exact, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f32")]
+        fn vrndx_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrndx_f32_(a)
+}
+
+/// Floating-point round to integral exact, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v4f32")]
+        fn vrndxq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrndxq_f32_(a)
+}
+
+/// Floating-point round to integral exact, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v1f64")]
+        fn vrndx_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrndx_f64_(a)
+}
+
+/// Floating-point round to integral exact, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f64")]
+        fn vrndxq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrndxq_f64_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frinta))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f32")]
+        fn vrnda_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnda_f32_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frinta))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v4f32")]
+        fn vrndaq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrndaq_f32_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frinta))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v1f64")]
+        fn vrnda_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrnda_f64_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to away
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frinta))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f64")]
+        fn vrndaq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrndaq_f64_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")]
+        fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrndn_f64_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")]
+        fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrndnq_f64_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndns_f32(a: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.roundeven.f32")]
+        fn vrndns_f32_(a: f32) -> f32;
+    }
+    vrndns_f32_(a)
+}
+
+/// Floating-point round to integral, toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintm))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f32")]
+        fn vrndm_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrndm_f32_(a)
+}
+
+/// Floating-point round to integral, toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintm))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v4f32")]
+        fn vrndmq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrndmq_f32_(a)
+}
+
+/// Floating-point round to integral, toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintm))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v1f64")]
+        fn vrndm_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrndm_f64_(a)
+}
+
+/// Floating-point round to integral, toward minus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintm))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f64")]
+        fn vrndmq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrndmq_f64_(a)
+}
+
+/// Floating-point round to integral, toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f32")]
+        fn vrndp_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrndp_f32_(a)
+}
+
+/// Floating-point round to integral, toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v4f32")]
+        fn vrndpq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrndpq_f32_(a)
+}
+
+/// Floating-point round to integral, toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v1f64")]
+        fn vrndp_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrndp_f64_(a)
+}
+
+/// Floating-point round to integral, toward plus infinity
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f64")]
+        fn vrndpq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrndpq_f64_(a)
+}
+
+/// Floating-point round to integral, toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintz))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f32")]
+        fn vrnd_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd_f32_(a)
+}
+
+/// Floating-point round to integral, toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintz))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v4f32")]
+        fn vrndq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrndq_f32_(a)
+}
+
+/// Floating-point round to integral, toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintz))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v1f64")]
+        fn vrnd_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrnd_f64_(a)
+}
+
+/// Floating-point round to integral, toward zero
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintz))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f64")]
+        fn vrndq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrndq_f64_(a)
+}
+
+/// Floating-point round to integral, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frinti))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f32")]
+        fn vrndi_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrndi_f32_(a)
+}
+
+/// Floating-point round to integral, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frinti))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v4f32")]
+        fn vrndiq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrndiq_f32_(a)
+}
+
+/// Floating-point round to integral, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frinti))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v1f64")]
+        fn vrndi_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrndi_f64_(a)
+}
+
+/// Floating-point round to integral, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frinti))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f64")]
+        fn vrndiq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrndiq_f64_(a)
+}
+
+/// Saturating add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqaddb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract(vqadd_s8(a, b), 0)
+}
+
+/// Saturating add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqadd_s16(a, b), 0)
+}
+
+/// Saturating add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqaddb_u8(a: u8, b: u8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: uint8x8_t = vdup_n_u8(b);
+    simd_extract(vqadd_u8(a, b), 0)
+}
+
+/// Saturating add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: uint16x4_t = vdup_n_u16(b);
+    simd_extract(vqadd_u16(a, b), 0)
+}
+
+/// Saturating add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i32")]
+        fn vqadds_u32_(a: u32, b: u32) -> u32;
+    }
+    vqadds_u32_(a, b)
+}
+
+/// Saturating add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i64")]
+        fn vqaddd_u64_(a: u64, b: u64) -> u64;
+    }
+    vqaddd_u64_(a, b)
+}
+
+/// Saturating add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i32")]
+        fn vqadds_s32_(a: i32, b: i32) -> i32;
+    }
+    vqadds_s32_(a, b)
+}
+
+/// Saturating add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i64")]
+        fn vqaddd_s64_(a: i64, b: i64) -> i64;
+    }
+    vqaddd_s64_(a, b)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64")]
+        fn vld1_f64_x2_(a: *const f64) -> float64x1x2_t;
+    }
+    vld1_f64_x2_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64")]
+        fn vld1q_f64_x2_(a: *const f64) -> float64x2x2_t;
+    }
+    vld1q_f64_x2_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64")]
+        fn vld1_f64_x3_(a: *const f64) -> float64x1x3_t;
+    }
+    vld1_f64_x3_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64")]
+        fn vld1q_f64_x3_(a: *const f64) -> float64x2x3_t;
+    }
+    vld1q_f64_x3_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64")]
+        fn vld1_f64_x4_(a: *const f64) -> float64x1x4_t;
+    }
+    vld1_f64_x4_(a)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64")]
+        fn vld1q_f64_x4_(a: *const f64) -> float64x2x4_t;
+    }
+    vld1q_f64_x4_(a)
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64")]
+        fn vld2q_s64_(ptr: *const int64x2_t) -> int64x2x2_t;
+    }
+    vld2q_s64_(a as _)
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
+    transmute(vld2q_s64(transmute(a)))
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
+    transmute(vld2q_s64(transmute(a)))
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64")]
+        fn vld2_f64_(ptr: *const float64x1_t) -> float64x1x2_t;
+    }
+    vld2_f64_(a as _)
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64")]
+        fn vld2q_f64_(ptr: *const float64x2_t) -> float64x2x2_t;
+    }
+    vld2q_f64_(a as _)
+}
+
+/// Load single 2-element structure and replicate to all lanes of two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64")]
+        fn vld2q_dup_s64_(ptr: *const i64) -> int64x2x2_t;
+    }
+    vld2q_dup_s64_(a as _)
+}
+
+/// Load single 2-element structure and replicate to all lanes of two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
+    transmute(vld2q_dup_s64(transmute(a)))
+}
+
+/// Load single 2-element structure and replicate to all lanes of two registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld2r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
+    transmute(vld2q_dup_s64(transmute(a)))
+}
+
+/// Load single 2-element structure and replicate to all lanes of two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64")]
+        fn vld2_dup_f64_(ptr: *const f64) -> float64x1x2_t;
+    }
+    vld2_dup_f64_(a as _)
+}
+
+/// Load single 2-element structure and replicate to all lanes of two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64")]
+        fn vld2q_dup_f64_(ptr: *const f64) -> float64x2x2_t;
+    }
+    vld2q_dup_f64_(a as _)
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> int8x16x2_t {
+    static_assert_imm4!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8")]
+        fn vld2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *const i8) -> int8x16x2_t;
+    }
+    vld2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> int64x1x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8")]
+        fn vld2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *const i8) -> int64x1x2_t;
+    }
+    vld2_lane_s64_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) -> int64x2x2_t {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8")]
+        fn vld2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *const i8) -> int64x2x2_t;
+    }
+    vld2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x2_t) -> poly64x1x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x2_t) -> poly64x2x2_t {
+    static_assert_imm1!(LANE);
+    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x2_t) -> uint8x16x2_t {
+    static_assert_imm4!(LANE);
+    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x2_t) -> uint64x1x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x2_t) -> uint64x2x2_t {
+    static_assert_imm1!(LANE);
+    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x2_t) -> poly8x16x2_t {
+    static_assert_imm4!(LANE);
+    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) -> float64x1x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8")]
+        fn vld2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *const i8) -> float64x1x2_t;
+    }
+    vld2_lane_f64_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Load multiple 2-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -> float64x2x2_t {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8")]
+        fn vld2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *const i8) -> float64x2x2_t;
+    }
+    vld2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64")]
+        fn vld3q_s64_(ptr: *const int64x2_t) -> int64x2x3_t;
+    }
+    vld3q_s64_(a as _)
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
+    transmute(vld3q_s64(transmute(a)))
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld3))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
+    transmute(vld3q_s64(transmute(a)))
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64")]
+        fn vld3_f64_(ptr: *const float64x1_t) -> float64x1x3_t;
+    }
+    vld3_f64_(a as _)
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64")]
+        fn vld3q_f64_(ptr: *const float64x2_t) -> float64x2x3_t;
+    }
+    vld3q_f64_(a as _)
+}
+
+/// Load single 3-element structure and replicate to all lanes of three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64")]
+        fn vld3q_dup_s64_(ptr: *const i64) -> int64x2x3_t;
+    }
+    vld3q_dup_s64_(a as _)
+}
+
+/// Load single 3-element structure and replicate to all lanes of three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
+    transmute(vld3q_dup_s64(transmute(a)))
+}
+
+/// Load single 3-element structure and replicate to all lanes of three registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld3r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
+    transmute(vld3q_dup_s64(transmute(a)))
+}
+
+/// Load single 3-element structure and replicate to all lanes of three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64")]
+        fn vld3_dup_f64_(ptr: *const f64) -> float64x1x3_t;
+    }
+    vld3_dup_f64_(a as _)
+}
+
+/// Load single 3-element structure and replicate to all lanes of three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64")]
+        fn vld3q_dup_f64_(ptr: *const f64) -> float64x2x3_t;
+    }
+    vld3q_dup_f64_(a as _)
+}
+
+/// Load multiple 3-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> int8x16x3_t {
+    static_assert_imm4!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8")]
+        fn vld3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *const i8) -> int8x16x3_t;
+    }
+    vld3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Load multiple 3-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> int64x1x3_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8")]
+        fn vld3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *const i8) -> int64x1x3_t;
+    }
+    vld3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Load multiple 3-element structures to two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) -> int64x2x3_t {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8")]
+        fn vld3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *const i8) -> int64x2x3_t;
+    }
+    vld3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x3_t) -> poly64x1x3_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x3_t) -> poly64x2x3_t {
+    static_assert_imm1!(LANE);
+    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x3_t) -> poly8x16x3_t {
+    static_assert_imm4!(LANE);
+    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x3_t) -> uint8x16x3_t {
+    static_assert_imm4!(LANE);
+    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x3_t) -> uint64x1x3_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x3_t) -> uint64x2x3_t {
+    static_assert_imm1!(LANE);
+    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) -> float64x1x3_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8")]
+        fn vld3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *const i8) -> float64x1x3_t;
+    }
+    vld3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Load multiple 3-element structures to three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -> float64x2x3_t {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8")]
+        fn vld3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *const i8) -> float64x2x3_t;
+    }
+    vld3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64")]
+        fn vld4q_s64_(ptr: *const int64x2_t) -> int64x2x4_t;
+    }
+    vld4q_s64_(a as _)
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
+    transmute(vld4q_s64(transmute(a)))
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
+    transmute(vld4q_s64(transmute(a)))
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64")]
+        fn vld4_f64_(ptr: *const float64x1_t) -> float64x1x4_t;
+    }
+    vld4_f64_(a as _)
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64")]
+        fn vld4q_f64_(ptr: *const float64x2_t) -> float64x2x4_t;
+    }
+    vld4q_f64_(a as _)
+}
+
+/// Load single 4-element structure and replicate to all lanes of four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64")]
+        fn vld4q_dup_s64_(ptr: *const i64) -> int64x2x4_t;
+    }
+    vld4q_dup_s64_(a as _)
+}
+
+/// Load single 4-element structure and replicate to all lanes of four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
+    transmute(vld4q_dup_s64(transmute(a)))
+}
+
+/// Load single 4-element structure and replicate to all lanes of four registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
+    transmute(vld4q_dup_s64(transmute(a)))
+}
+
+/// Load single 4-element structure and replicate to all lanes of four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64")]
+        fn vld4_dup_f64_(ptr: *const f64) -> float64x1x4_t;
+    }
+    vld4_dup_f64_(a as _)
+}
+
+/// Load single 4-element structure and replicate to all lanes of four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64")]
+        fn vld4q_dup_f64_(ptr: *const f64) -> float64x2x4_t;
+    }
+    vld4q_dup_f64_(a as _)
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> int8x16x4_t {
+    static_assert_imm4!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8")]
+        fn vld4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *const i8) -> int8x16x4_t;
+    }
+    vld4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> int64x1x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8")]
+        fn vld4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *const i8) -> int64x1x4_t;
+    }
+    vld4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) -> int64x2x4_t {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8")]
+        fn vld4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *const i8) -> int64x2x4_t;
+    }
+    vld4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x4_t) -> poly64x1x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x4_t) -> poly64x2x4_t {
+    static_assert_imm1!(LANE);
+    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x4_t) -> poly8x16x4_t {
+    static_assert_imm4!(LANE);
+    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x4_t) -> uint8x16x4_t {
+    static_assert_imm4!(LANE);
+    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x4_t) -> uint64x1x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x4_t) -> uint64x2x4_t {
+    static_assert_imm1!(LANE);
+    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) -> float64x1x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8")]
+        fn vld4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *const i8) -> float64x1x4_t;
+    }
+    vld4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Load multiple 4-element structures to four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -> float64x2x4_t {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8")]
+        fn vld4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *const i8) -> float64x2x4_t;
+    }
+    vld4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    *a = simd_extract(b, LANE as u32);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
+    static_assert_imm1!(LANE);
+    *a = simd_extract(b, LANE as u32);
+}
+
+/// Store multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64")]
+        fn vst1_f64_x2_(a: float64x1_t, b: float64x1_t, ptr: *mut f64);
+    }
+    vst1_f64_x2_(b.0, b.1, a)
+}
+
+/// Store multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64")]
+        fn vst1q_f64_x2_(a: float64x2_t, b: float64x2_t, ptr: *mut f64);
+    }
+    vst1q_f64_x2_(b.0, b.1, a)
+}
+
+/// Store multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64")]
+        fn vst1_f64_x3_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64);
+    }
+    vst1_f64_x3_(b.0, b.1, b.2, a)
+}
+
+/// Store multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64")]
+        fn vst1q_f64_x3_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64);
+    }
+    vst1q_f64_x3_(b.0, b.1, b.2, a)
+}
+
+/// Store multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64")]
+        fn vst1_f64_x4_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut f64);
+    }
+    vst1_f64_x4_(b.0, b.1, b.2, b.3, a)
+}
+
+/// Store multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64")]
+        fn vst1q_f64_x4_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut f64);
+    }
+    vst1q_f64_x4_(b.0, b.1, b.2, b.3, a)
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2i64.p0i8")]
+        fn vst2q_s64_(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
+    }
+    vst2q_s64_(b.0, b.1, a as _)
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
+    transmute(vst2q_s64(transmute(a), transmute(b)))
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) {
+    transmute(vst2q_s64(transmute(a), transmute(b)))
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v1f64.p0i8")]
+        fn vst2_f64_(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
+    }
+    vst2_f64_(b.0, b.1, a as _)
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2f64.p0i8")]
+        fn vst2q_f64_(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
+    }
+    vst2q_f64_(b.0, b.1, a as _)
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
+    static_assert_imm4!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8")]
+        fn vst2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8);
+    }
+    vst2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8")]
+        fn vst2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8);
+    }
+    vst2_lane_s64_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8")]
+        fn vst2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8);
+    }
+    vst2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x2_t) {
+    static_assert_imm4!(LANE);
+    transmute(vst2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vst2_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x2_t) {
+    static_assert_imm1!(LANE);
+    transmute(vst2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x2_t) {
+    static_assert_imm4!(LANE);
+    transmute(vst2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x2_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vst2_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x2_t) {
+    static_assert_imm1!(LANE);
+    transmute(vst2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8")]
+        fn vst2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8);
+    }
+    vst2_lane_f64_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Store multiple 2-element structures from two registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8")]
+        fn vst2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8);
+    }
+    vst2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2i64.p0i8")]
+        fn vst3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
+    }
+    vst3q_s64_(b.0, b.1, b.2, a as _)
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
+    transmute(vst3q_s64(transmute(a), transmute(b)))
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) {
+    transmute(vst3q_s64(transmute(a), transmute(b)))
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v1f64.p0i8")]
+        fn vst3_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
+    }
+    vst3_f64_(b.0, b.1, b.2, a as _)
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2f64.p0i8")]
+        fn vst3q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
+    }
+    vst3q_f64_(b.0, b.1, b.2, a as _)
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
+    static_assert_imm4!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8")]
+        fn vst3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8);
+    }
+    vst3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8")]
+        fn vst3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8);
+    }
+    vst3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8")]
+        fn vst3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8);
+    }
+    vst3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x3_t) {
+    static_assert_imm4!(LANE);
+    transmute(vst3q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vst3_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x3_t) {
+    static_assert_imm1!(LANE);
+    transmute(vst3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x3_t) {
+    static_assert_imm4!(LANE);
+    transmute(vst3q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x3_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vst3_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x3_t) {
+    static_assert_imm1!(LANE);
+    transmute(vst3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8")]
+        fn vst3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8);
+    }
+    vst3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Store multiple 3-element structures from three registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8")]
+        fn vst3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8);
+    }
+    vst3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2i64.p0i8")]
+        fn vst4q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
+    }
+    vst4q_s64_(b.0, b.1, b.2, b.3, a as _)
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) {
+    transmute(vst4q_s64(transmute(a), transmute(b)))
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st4))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) {
+    transmute(vst4q_s64(transmute(a), transmute(b)))
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v1f64.p0i8")]
+        fn vst4_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
+    }
+    vst4_f64_(b.0, b.1, b.2, b.3, a as _)
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2f64.p0i8")]
+        fn vst4q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
+    }
+    vst4q_f64_(b.0, b.1, b.2, b.3, a as _)
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
+    static_assert_imm4!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8")]
+        fn vst4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *mut i8);
+    }
+    vst4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8")]
+        fn vst4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *mut i8);
+    }
+    vst4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8")]
+        fn vst4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *mut i8);
+    }
+    vst4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x4_t) {
+    static_assert_imm4!(LANE);
+    transmute(vst4q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vst4_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x4_t) {
+    static_assert_imm1!(LANE);
+    transmute(vst4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x4_t) {
+    static_assert_imm4!(LANE);
+    transmute(vst4q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x4_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    transmute(vst4_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x4_t) {
+    static_assert_imm1!(LANE);
+    transmute(vst4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
+    static_assert!(LANE : i32 where LANE == 0);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8")]
+        fn vst4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *mut i8);
+    }
+    vst4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Store multiple 4-element structures from four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
+    static_assert_imm1!(LANE);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8")]
+        fn vst4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *mut i8);
+    }
+    vst4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+
+/// Multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_mul(a, b)
+}
+
+/// Multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_mul(a, b)
+}
+
+/// Vector multiply by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmul_n_f64(a: float64x1_t, b: f64) -> float64x1_t {
+    simd_mul(a, vdup_n_f64(b))
+}
+
+/// Vector multiply by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
+    simd_mul(a, vdupq_n_f64(b))
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    simd_mul(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
+    static_assert_imm1!(LANE);
+    simd_mul(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(LANE);
+    simd_mul(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
+    static_assert_imm1!(LANE);
+    let b: f32 = simd_extract(b, LANE as u32);
+    a * b
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
+    static_assert_imm2!(LANE);
+    let b: f32 = simd_extract(b, LANE as u32);
+    a * b
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
+    static_assert!(LANE : i32 where LANE == 0);
+    let b: f64 = simd_extract(b, LANE as u32);
+    a * b
+}
+
+/// Floating-point multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
+    static_assert_imm1!(LANE);
+    let b: f64 = simd_extract(b, LANE as u32);
+    a * b
+}
+
+/// Signed multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_s8(a, b)
+}
+
+/// Signed multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    vmull_s16(a, b)
+}
+
+/// Signed multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    vmull_s32(a, b)
+}
+
+/// Unsigned multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_u8(a, b)
+}
+
+/// Unsigned multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    vmull_u16(a, b)
+}
+
+/// Unsigned multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    vmull_u32(a, b)
+}
+
+/// Polynomial multiply long
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(pmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")]
+        fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
+    }
+    transmute(vmull_p64_(a, b))
+}
+
+/// Polynomial multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(pmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
+    let a: poly8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: poly8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_p8(a, b)
+}
+
+/// Polynomial multiply long
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(pmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
+    vmull_p64(simd_extract(a, 1), simd_extract(b, 1))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
+    vmull_high_s16(a, vdupq_n_s16(b))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
+    vmull_high_s32(a, vdupq_n_s32(b))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
+    vmull_high_u16(a, vdupq_n_u16(b))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
+    vmull_high_u32(a, vdupq_n_u32(b))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    vmull_high_s16(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(LANE);
+    vmull_high_s16(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(LANE);
+    vmull_high_s32(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(LANE);
+    vmull_high_s32(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
+    static_assert_imm2!(LANE);
+    vmull_high_u16(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    static_assert_imm3!(LANE);
+    vmull_high_u16(a, simd_shuffle8!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
+    static_assert_imm1!(LANE);
+    vmull_high_u32(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    static_assert_imm2!(LANE);
+    vmull_high_u32(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f32")]
+        fn vmulx_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    vmulx_f32_(a, b)
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v4f32")]
+        fn vmulxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vmulxq_f32_(a, b)
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v1f64")]
+        fn vmulx_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vmulx_f64_(a, b)
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f64")]
+        fn vmulxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vmulxq_f64_(a, b)
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    vmulx_f64(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
+    static_assert_imm1!(LANE);
+    vmulx_f64(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    vmulx_f32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_imm2!(LANE);
+    vmulx_f32(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    vmulxq_f32(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_imm2!(LANE);
+    vmulxq_f32(a, simd_shuffle4!(b, b, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    vmulxq_f64(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(LANE);
+    vmulxq_f64(a, simd_shuffle2!(b, b, <const LANE: i32> [LANE as u32, LANE as u32]))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f32")]
+        fn vmulxs_f32_(a: f32, b: f32) -> f32;
+    }
+    vmulxs_f32_(a, b)
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f64")]
+        fn vmulxd_f64_(a: f64, b: f64) -> f64;
+    }
+    vmulxd_f64_(a, b)
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
+    static_assert_imm1!(LANE);
+    vmulxs_f32(a, simd_extract(b, LANE as u32))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
+    static_assert_imm2!(LANE);
+    vmulxs_f32(a, simd_extract(b, LANE as u32))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
+    static_assert!(LANE : i32 where LANE == 0);
+    vmulxd_f64(a, simd_extract(b, LANE as u32))
+}
+
+/// Floating-point multiply extended
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
+    static_assert_imm1!(LANE);
+    vmulxd_f64(a, simd_extract(b, LANE as u32))
+}
+
+/// Floating-point fused Multiply-Add to accumulator(vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v1f64")]
+        fn vfma_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t;
+    }
+    vfma_f64_(b, c, a)
+}
+
+/// Floating-point fused Multiply-Add to accumulator(vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f64")]
+        fn vfmaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    vfmaq_f64_(b, c, a)
+}
+
+/// Floating-point fused Multiply-Add to accumulator(vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
+    vfma_f64(a, b, vdup_n_f64(c))
+}
+
+/// Floating-point fused Multiply-Add to accumulator(vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
+    vfmaq_f64(a, b, vdupq_n_f64(c))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    vfma_f32(a, b, vdup_n_f32(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm2!(LANE);
+    vfma_f32(a, b, vdup_n_f32(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    vfmaq_f32(a, b, vdupq_n_f32(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm2!(LANE);
+    vfmaq_f32(a, b, vdupq_n_f32(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    vfma_f64(a, b, vdup_n_f64(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t, c: float64x2_t) -> float64x1_t {
+    static_assert_imm1!(LANE);
+    vfma_f64(a, b, vdup_n_f64(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c: float64x1_t) -> float64x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    vfmaq_f64(a, b, vdupq_n_f64(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(LANE);
+    vfmaq_f64(a, b, vdupq_n_f64(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f32")]
+        fn vfmas_lane_f32_(a: f32, b: f32, c: f32) -> f32;
+    }
+    static_assert_imm1!(LANE);
+    let c: f32 = simd_extract(c, LANE as u32);
+    vfmas_lane_f32_(b, c, a)
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f32")]
+        fn vfmas_laneq_f32_(a: f32, b: f32, c: f32) -> f32;
+    }
+    static_assert_imm2!(LANE);
+    let c: f32 = simd_extract(c, LANE as u32);
+    vfmas_laneq_f32_(b, c, a)
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f64")]
+        fn vfmad_lane_f64_(a: f64, b: f64, c: f64) -> f64;
+    }
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: f64 = simd_extract(c, LANE as u32);
+    vfmad_lane_f64_(b, c, a)
+}
+
+/// Floating-point fused multiply-add to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.f64")]
+        fn vfmad_laneq_f64_(a: f64, b: f64, c: f64) -> f64;
+    }
+    static_assert_imm1!(LANE);
+    let c: f64 = simd_extract(c, LANE as u32);
+    vfmad_laneq_f64_(b, c, a)
+}
+
+/// Floating-point fused multiply-subtract from accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    let b: float64x1_t = simd_neg(b);
+    vfma_f64(a, b, c)
+}
+
+/// Floating-point fused multiply-subtract from accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    let b: float64x2_t = simd_neg(b);
+    vfmaq_f64(a, b, c)
+}
+
+/// Floating-point fused Multiply-subtract to accumulator(vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
+    vfms_f64(a, b, vdup_n_f64(c))
+}
+
+/// Floating-point fused Multiply-subtract to accumulator(vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
+    vfmsq_f64(a, b, vdupq_n_f64(c))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    vfms_f32(a, b, vdup_n_f32(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm2!(LANE);
+    vfms_f32(a, b, vdup_n_f32(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    vfmsq_f32(a, b, vdupq_n_f32(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm2!(LANE);
+    vfmsq_f32(a, b, vdupq_n_f32(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    vfms_f64(a, b, vdup_n_f64(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t, c: float64x2_t) -> float64x1_t {
+    static_assert_imm1!(LANE);
+    vfms_f64(a, b, vdup_n_f64(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c: float64x1_t) -> float64x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    vfmsq_f64(a, b, vdupq_n_f64(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(LANE);
+    vfmsq_f64(a, b, vdupq_n_f64(simd_extract(c, LANE as u32)))
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
+    vfmas_lane_f32::<LANE>(a, -b, c)
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
+    vfmas_laneq_f32::<LANE>(a, -b, c)
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
+    vfmad_lane_f64::<LANE>(a, -b, c)
+}
+
+/// Floating-point fused multiply-subtract to accumulator
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
+    vfmad_laneq_f64::<LANE>(a, -b, c)
+}
+
+/// Divide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fdiv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdiv_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_div(a, b)
+}
+
+/// Divide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fdiv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdivq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_div(a, b)
+}
+
+/// Divide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fdiv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdiv_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_div(a, b)
+}
+
+/// Divide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fdiv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_div(a, b)
+}
+
+/// Subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_sub(a, b)
+}
+
+/// Subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_sub(a, b)
+}
+
+/// Subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
+    a.wrapping_sub(b)
+}
+
+/// Subtract
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
+    a.wrapping_sub(b)
+}
+
+/// Add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
+    a.wrapping_add(b)
+}
+
+/// Add
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
+    a.wrapping_add(b)
+}
+
+/// Floating-point add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(faddp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f32.v2f32")]
+        fn vaddv_f32_(a: float32x2_t) -> f32;
+    }
+    vaddv_f32_(a)
+}
+
+/// Floating-point add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(faddp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f32.v4f32")]
+        fn vaddvq_f32_(a: float32x4_t) -> f32;
+    }
+    vaddvq_f32_(a)
+}
+
+/// Floating-point add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(faddp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddv.f64.v2f64")]
+        fn vaddvq_f64_(a: float64x2_t) -> f64;
+    }
+    vaddvq_f64_(a)
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
+        fn vaddlv_s16_(a: int16x4_t) -> i32;
+    }
+    vaddlv_s16_(a)
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
+        fn vaddlvq_s16_(a: int16x8_t) -> i32;
+    }
+    vaddlvq_s16_(a)
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
+        fn vaddlv_s32_(a: int32x2_t) -> i64;
+    }
+    vaddlv_s32_(a)
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v4i32")]
+        fn vaddlvq_s32_(a: int32x4_t) -> i64;
+    }
+    vaddlvq_s32_(a)
+}
+
+/// Unsigned Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uaddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16")]
+        fn vaddlv_u16_(a: uint16x4_t) -> u32;
+    }
+    vaddlv_u16_(a)
+}
+
+/// Unsigned Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uaddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16")]
+        fn vaddlvq_u16_(a: uint16x8_t) -> u32;
+    }
+    vaddlvq_u16_(a)
+}
+
+/// Unsigned Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uaddlp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32")]
+        fn vaddlv_u32_(a: uint32x2_t) -> u64;
+    }
+    vaddlv_u32_(a)
+}
+
+/// Unsigned Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uaddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32")]
+        fn vaddlvq_u32_(a: uint32x4_t) -> u64;
+    }
+    vaddlvq_u32_(a)
+}
+
+/// Signed Subtract Wide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ssubw))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let c: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    simd_sub(a, simd_cast(c))
+}
+
+/// Signed Subtract Wide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ssubw))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let c: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    simd_sub(a, simd_cast(c))
+}
+
+/// Signed Subtract Wide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ssubw))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let c: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    simd_sub(a, simd_cast(c))
+}
+
+/// Unsigned Subtract Wide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usubw))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let c: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    simd_sub(a, simd_cast(c))
+}
+
+/// Unsigned Subtract Wide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usubw))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let c: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    simd_sub(a, simd_cast(c))
+}
+
+/// Unsigned Subtract Wide
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usubw))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let c: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    simd_sub(a, simd_cast(c))
+}
+
+/// Signed Subtract Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ssubl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let c: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: int16x8_t = simd_cast(c);
+    let e: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: int16x8_t = simd_cast(e);
+    simd_sub(d, f)
+}
+
+/// Signed Subtract Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ssubl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let c: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let d: int32x4_t = simd_cast(c);
+    let e: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let f: int32x4_t = simd_cast(e);
+    simd_sub(d, f)
+}
+
+/// Signed Subtract Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ssubl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let c: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let d: int64x2_t = simd_cast(c);
+    let e: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let f: int64x2_t = simd_cast(e);
+    simd_sub(d, f)
+}
+
+/// Unsigned Subtract Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usubl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let c: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: uint16x8_t = simd_cast(c);
+    let e: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: uint16x8_t = simd_cast(e);
+    simd_sub(d, f)
+}
+
+/// Unsigned Subtract Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usubl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let c: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let d: uint32x4_t = simd_cast(c);
+    let e: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let f: uint32x4_t = simd_cast(e);
+    simd_sub(d, f)
+}
+
+/// Unsigned Subtract Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usubl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let c: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let d: uint64x2_t = simd_cast(c);
+    let e: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let f: uint64x2_t = simd_cast(e);
+    simd_sub(d, f)
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v16i8")]
+        fn vbcaxq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    vbcaxq_s8_(a, b, c)
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v8i16")]
+        fn vbcaxq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    vbcaxq_s16_(a, b, c)
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v4i32")]
+        fn vbcaxq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    vbcaxq_s32_(a, b, c)
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxs.v2i64")]
+        fn vbcaxq_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    vbcaxq_s64_(a, b, c)
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v16i8")]
+        fn vbcaxq_u8_(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t;
+    }
+    vbcaxq_u8_(a, b, c)
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v8i16")]
+        fn vbcaxq_u16_(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t;
+    }
+    vbcaxq_u16_(a, b, c)
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v4i32")]
+        fn vbcaxq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    vbcaxq_u32_(a, b, c)
+}
+
+/// Bit clear and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.bcaxu.v2i64")]
+        fn vbcaxq_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    vbcaxq_u64_(a, b, c)
+}
+
+/// Floating-point complex add
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32")]
+        fn vcadd_rot270_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    vcadd_rot270_f32_(a, b)
+}
+
+/// Floating-point complex add
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32")]
+        fn vcaddq_rot270_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vcaddq_rot270_f32_(a, b)
+}
+
+/// Floating-point complex add
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64")]
+        fn vcaddq_rot270_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vcaddq_rot270_f64_(a, b)
+}
+
+/// Floating-point complex add
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32")]
+        fn vcadd_rot90_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    vcadd_rot90_f32_(a, b)
+}
+
+/// Floating-point complex add
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32")]
+        fn vcaddq_rot90_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vcaddq_rot90_f32_(a, b)
+}
+
+/// Floating-point complex add
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64")]
+        fn vcaddq_rot90_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vcaddq_rot90_f64_(a, b)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32")]
+        fn vcmla_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    vcmla_f32_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32")]
+        fn vcmlaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    vcmlaq_f32_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64")]
+        fn vcmlaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    vcmlaq_f64_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32")]
+        fn vcmla_rot90_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    vcmla_rot90_f32_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32")]
+        fn vcmlaq_rot90_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    vcmlaq_rot90_f32_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64")]
+        fn vcmlaq_rot90_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    vcmlaq_rot90_f64_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32")]
+        fn vcmla_rot180_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    vcmla_rot180_f32_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32")]
+        fn vcmlaq_rot180_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    vcmlaq_rot180_f32_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64")]
+        fn vcmlaq_rot180_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    vcmlaq_rot180_f64_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32")]
+        fn vcmla_rot270_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    vcmla_rot270_f32_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32")]
+        fn vcmlaq_rot270_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    vcmlaq_rot270_f32_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64")]
+        fn vcmlaq_rot270_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    vcmlaq_rot270_f64_(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot90_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot90_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot90_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot90_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot90_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot90_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot90_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot90_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot180_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot180_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot180_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot180_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot180_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot180_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot180_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot180_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot270_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot270_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmla_rot270_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+    static_assert_imm1!(LANE);
+    let c: float32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot270_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot270_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot270_f32(a, b, c)
+}
+
+/// Floating-point complex multiply accumulate
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vcmlaq_rot270_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    static_assert_imm1!(LANE);
+    let c: float32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmlaq_rot270_f32(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot))]
+pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8")]
+        fn vdot_s32_(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
+    }
+    vdot_s32_(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot))]
+pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8")]
+        fn vdotq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    vdotq_s32_(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot))]
+pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v2i32.v8i8")]
+        fn vdot_u32_(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t;
+    }
+    vdot_u32_(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot))]
+pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.udot.v4i32.v16i8")]
+        fn vdotq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
+    }
+    vdotq_u32_(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    let c: int8x8_t = simd_shuffle8!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdot_s32(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    let c: int8x8_t = simd_shuffle8!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdot_s32(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    let c: int8x16_t = simd_shuffle16!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdotq_s32(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    let c: int8x16_t = simd_shuffle16!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdotq_s32(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    static_assert_imm1!(LANE);
+    let c: uint8x8_t = simd_shuffle8!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdot_u32(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
+    static_assert_imm2!(LANE);
+    let c: uint8x8_t = simd_shuffle8!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdot_u32(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
+    static_assert_imm1!(LANE);
+    let c: uint8x16_t = simd_shuffle16!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdotq_u32(a, b, c)
+}
+
+/// Dot product arithmetic
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    static_assert_imm2!(LANE);
+    let c: uint8x16_t = simd_shuffle16!(c, c, <const LANE: i32> [4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3, 4 * LANE as u32, 4 * LANE as u32 + 1, 4 * LANE as u32 + 2, 4 * LANE as u32 + 3]);
+    vdotq_u32(a, b, c)
+}
+
+/// Maximum (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmax))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v1f64")]
+        fn vmax_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vmax_f64_(a, b)
+}
+
+/// Maximum (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmax))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f64")]
+        fn vmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vmaxq_f64_(a, b)
+}
+
+/// Floating-point Maximum Number (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
+        fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vmaxnm_f64_(a, b)
+}
+
+/// Floating-point Maximum Number (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
+        fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vmaxnmq_f64_(a, b)
+}
+
+/// Floating-point maximum number across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
+        fn vmaxnmv_f32_(a: float32x2_t) -> f32;
+    }
+    vmaxnmv_f32_(a)
+}
+
+/// Floating-point maximum number across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
+        fn vmaxnmvq_f64_(a: float64x2_t) -> f64;
+    }
+    vmaxnmvq_f64_(a)
+}
+
+/// Floating-point maximum number across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32")]
+        fn vmaxnmvq_f32_(a: float32x4_t) -> f32;
+    }
+    vmaxnmvq_f32_(a)
+}
+
+/// Floating-point Maximum Number Pairwise (vector).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
+        fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    vpmaxnm_f32_(a, b)
+}
+
+/// Floating-point Maximum Number Pairwise (vector).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
+        fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vpmaxnmq_f64_(a, b)
+}
+
+/// Floating-point Maximum Number Pairwise (vector).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
+        fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vpmaxnmq_f32_(a, b)
+}
+
+/// Floating-point maximum number pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
+        fn vpmaxnms_f32_(a: float32x2_t) -> f32;
+    }
+    vpmaxnms_f32_(a)
+}
+
+/// Floating-point maximum number pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
+        fn vpmaxnmqd_f64_(a: float64x2_t) -> f64;
+    }
+    vpmaxnmqd_f64_(a)
+}
+
+/// Floating-point maximum pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32")]
+        fn vpmaxs_f32_(a: float32x2_t) -> f32;
+    }
+    vpmaxs_f32_(a)
+}
+
+/// Floating-point maximum pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64")]
+        fn vpmaxqd_f64_(a: float64x2_t) -> f64;
+    }
+    vpmaxqd_f64_(a)
+}
+
+/// Minimum (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmin))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v1f64")]
+        fn vmin_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vmin_f64_(a, b)
+}
+
+/// Minimum (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmin))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f64")]
+        fn vminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vminq_f64_(a, b)
+}
+
+/// Floating-point Minimum Number (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnm))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
+        fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vminnm_f64_(a, b)
+}
+
+/// Floating-point Minimum Number (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnm))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
+        fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vminnmq_f64_(a, b)
+}
+
+/// Floating-point minimum number across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
+        fn vminnmv_f32_(a: float32x2_t) -> f32;
+    }
+    vminnmv_f32_(a)
+}
+
+/// Floating-point minimum number across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
+        fn vminnmvq_f64_(a: float64x2_t) -> f64;
+    }
+    vminnmvq_f64_(a)
+}
+
+/// Floating-point minimum number across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32")]
+        fn vminnmvq_f32_(a: float32x4_t) -> f32;
+    }
+    vminnmvq_f32_(a)
+}
+
+/// Vector move
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sxtl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmovl_s8(a)
+}
+
+/// Vector move
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sxtl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    vmovl_s16(a)
+}
+
+/// Vector move
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sxtl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    vmovl_s32(a)
+}
+
+/// Vector move
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uxtl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmovl_u8(a)
+}
+
+/// Vector move
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uxtl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    vmovl_u16(a)
+}
+
+/// Vector move
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uxtl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    vmovl_u32(a)
+}
+
+/// Floating-point add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(faddp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v4f32")]
+        fn vpaddq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vpaddq_f32_(a, b)
+}
+
+/// Floating-point add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(faddp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v2f64")]
+        fn vpaddq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vpaddq_f64_(a, b)
+}
+
+/// Floating-point add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 {
+    let a1: f32 = simd_extract(a, 0);
+    let a2: f32 = simd_extract(a, 1);
+    a1 + a2
+}
+
+/// Floating-point add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddd_f64(a: float64x2_t) -> f64 {
+    let a1: f64 = simd_extract(a, 0);
+    let a2: f64 = simd_extract(a, 1);
+    a1 + a2
+}
+
+/// Floating-point Minimum Number Pairwise (vector).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
+        fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    vpminnm_f32_(a, b)
+}
+
+/// Floating-point Minimum Number Pairwise (vector).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
+        fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vpminnmq_f64_(a, b)
+}
+
+/// Floating-point Minimum Number Pairwise (vector).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
+        fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    vpminnmq_f32_(a, b)
+}
+
+/// Floating-point minimum number pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
+        fn vpminnms_f32_(a: float32x2_t) -> f32;
+    }
+    vpminnms_f32_(a)
+}
+
+/// Floating-point minimum number pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
+        fn vpminnmqd_f64_(a: float64x2_t) -> f64;
+    }
+    vpminnmqd_f64_(a)
+}
+
+/// Floating-point minimum pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminv.f32.v2f32")]
+        fn vpmins_f32_(a: float32x2_t) -> f32;
+    }
+    vpmins_f32_(a)
+}
+
+/// Floating-point minimum pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminv.f64.v2f64")]
+        fn vpminqd_f64_(a: float64x2_t) -> f64;
+    }
+    vpminqd_f64_(a)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqdmull_s16(a, b), 0)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulls.scalar")]
+        fn vqdmulls_s32_(a: i32, b: i32) -> i64;
+    }
+    vqdmulls_s32_(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    vqdmull_s16(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    vqdmull_s32(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmull_s16(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmull_s32(a, b)
+}
+
+/// Vector saturating doubling long multiply by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
+}
+
+/// Vector saturating doubling long multiply by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
+    vqdmull_s32(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
+    static_assert_imm2!(N);
+    let b: i16 = simd_extract(b, N as u32);
+    vqdmullh_s16(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
+    static_assert_imm3!(N);
+    let b: i16 = simd_extract(b, N as u32);
+    vqdmullh_s16(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
+    static_assert_imm1!(N);
+    let b: i32 = simd_extract(b, N as u32);
+    vqdmulls_s32(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
+    static_assert_imm2!(N);
+    let b: i32 = simd_extract(b, N as u32);
+    vqdmulls_s32(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
+    vqdmull_s32(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle4!(b, b, <const N: i32> [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
+}
+
+/// Signed saturating doubling multiply long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle2!(b, b, <const N: i32> [N as u32, N as u32]);
+    vqdmull_s32(a, b)
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_high_s16(b, c))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_high_s32(b, c))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_high_n_s16(b, c))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_high_n_s32(b, c))
+}
+
+/// Vector widening saturating doubling multiply accumulate with scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    vqaddq_s32(a, vqdmull_laneq_s16::<N>(b, c))
+}
+
+/// Vector widening saturating doubling multiply accumulate with scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    vqaddq_s64(a, vqdmull_laneq_s32::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    vqaddq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    vqaddq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    vqaddq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlal_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    vqaddq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
+    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
+    vqadds_s32(a, simd_extract(x, 0))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
+    let x: int64x2_t = vqdmull_s32(vdup_n_s32(b), vdup_n_s32(c));
+    vqaddd_s64(a, simd_extract(x, 0))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqdmlalh_s16(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
+    static_assert_imm3!(LANE);
+    vqdmlalh_s16(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
+    static_assert_imm1!(LANE);
+    vqdmlals_s32(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating doubling multiply-add long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
+    static_assert_imm2!(LANE);
+    vqdmlals_s32(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_high_s16(b, c))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_high_s32(b, c))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_high_n_s16(b, c))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_high_n_s32(b, c))
+}
+
+/// Vector widening saturating doubling multiply subtract with scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    vqsubq_s32(a, vqdmull_laneq_s16::<N>(b, c))
+}
+
+/// Vector widening saturating doubling multiply subtract with scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    vqsubq_s64(a, vqdmull_laneq_s32::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
+    static_assert_imm2!(N);
+    vqsubq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    static_assert_imm3!(N);
+    vqsubq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
+    static_assert_imm1!(N);
+    vqsubq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    static_assert_imm2!(N);
+    vqsubq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
+    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
+    vqsubs_s32(a, simd_extract(x, 0))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
+    let x: int64x2_t = vqdmull_s32(vdup_n_s32(b), vdup_n_s32(c));
+    vqsubd_s64(a, simd_extract(x, 0))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqdmlslh_s16(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
+    static_assert_imm3!(LANE);
+    vqdmlslh_s16(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
+    static_assert_imm1!(LANE);
+    vqdmlsls_s32(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating doubling multiply-subtract long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
+    static_assert_imm2!(LANE);
+    vqdmlsls_s32(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqdmulh_s16(a, b), 0)
+}
+
+/// Signed saturating doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    simd_extract(vqdmulh_s32(a, b), 0)
+}
+
+/// Signed saturating doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
+    static_assert_imm2!(N);
+    let b: i16 = simd_extract(b, N as u32);
+    vqdmulhh_s16(a, b)
+}
+
+/// Signed saturating doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
+    static_assert_imm3!(N);
+    let b: i16 = simd_extract(b, N as u32);
+    vqdmulhh_s16(a, b)
+}
+
+/// Signed saturating doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
+    static_assert_imm1!(N);
+    let b: i32 = simd_extract(b, N as u32);
+    vqdmulhs_s32(a, b)
+}
+
+/// Signed saturating doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
+    static_assert_imm2!(N);
+    let b: i32 = simd_extract(b, N as u32);
+    vqdmulhs_s32(a, b)
+}
+
+/// Vector saturating doubling multiply high by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    vqdmulh_s16(a, vdup_n_s16(simd_extract(b, LANE as u32)))
+}
+
+/// Vector saturating doubling multiply high by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_imm2!(LANE);
+    vqdmulhq_s16(a, vdupq_n_s16(simd_extract(b, LANE as u32)))
+}
+
+/// Vector saturating doubling multiply high by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    vqdmulh_s32(a, vdup_n_s32(simd_extract(b, LANE as u32)))
+}
+
+/// Vector saturating doubling multiply high by scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    vqdmulhq_s32(a, vdupq_n_s32(simd_extract(b, LANE as u32)))
+}
+
+/// Saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovnh_s16(a: i16) -> i8 {
+    simd_extract(vqmovn_s16(vdupq_n_s16(a)), 0)
+}
+
+/// Saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovns_s32(a: i32) -> i16 {
+    simd_extract(vqmovn_s32(vdupq_n_s32(a)), 0)
+}
+
+/// Saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovnh_u16(a: u16) -> u8 {
+    simd_extract(vqmovn_u16(vdupq_n_u16(a)), 0)
+}
+
+/// Saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovns_u32(a: u32) -> u16 {
+    simd_extract(vqmovn_u32(vdupq_n_u32(a)), 0)
+}
+
+/// Saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
+        fn vqmovnd_s64_(a: i64) -> i32;
+    }
+    vqmovnd_s64_(a)
+}
+
+/// Saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
+        fn vqmovnd_u64_(a: u64) -> u32;
+    }
+    vqmovnd_u64_(a)
+}
+
+/// Signed saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    simd_shuffle16!(a, vqmovn_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Signed saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    simd_shuffle8!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Signed saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    simd_shuffle4!(a, vqmovn_s64(b), [0, 1, 2, 3])
+}
+
+/// Signed saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    simd_shuffle16!(a, vqmovn_u16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Signed saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    simd_shuffle8!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Signed saturating extract narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    simd_shuffle4!(a, vqmovn_u64(b), [0, 1, 2, 3])
+}
+
+/// Signed saturating extract unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovunh_s16(a: i16) -> u8 {
+    simd_extract(vqmovun_s16(vdupq_n_s16(a)), 0)
+}
+
+/// Signed saturating extract unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovuns_s32(a: i32) -> u16 {
+    simd_extract(vqmovun_s32(vdupq_n_s32(a)), 0)
+}
+
+/// Signed saturating extract unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovund_s64(a: i64) -> u32 {
+    simd_extract(vqmovun_s64(vdupq_n_s64(a)), 0)
+}
+
+/// Signed saturating extract unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    simd_shuffle16!(a, vqmovun_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Signed saturating extract unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    simd_shuffle8!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Signed saturating extract unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    simd_shuffle4!(a, vqmovun_s64(b), [0, 1, 2, 3])
+}
+
+/// Signed saturating rounding doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
+    simd_extract(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0)
+}
+
+/// Signed saturating rounding doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
+    simd_extract(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0)
+}
+
+/// Signed saturating rounding doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
+    static_assert_imm2!(LANE);
+    vqrdmulhh_s16(a, simd_extract(b, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
+    static_assert_imm3!(LANE);
+    vqrdmulhh_s16(a, simd_extract(b, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
+    static_assert_imm1!(LANE);
+    vqrdmulhs_s32(a, simd_extract(b, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply returning high half
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqrdmulhs_s32(a, simd_extract(b, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i16")]
+        fn vqrdmlah_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
+    }
+    vqrdmlah_s16_(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v8i16")]
+        fn vqrdmlahq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    vqrdmlahq_s16_(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v2i32")]
+        fn vqrdmlah_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
+    }
+    vqrdmlah_s32_(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i32")]
+        fn vqrdmlahq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    vqrdmlahq_s32_(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    let c: int16x4_t = vdup_n_s16(c);
+    simd_extract(vqrdmlah_s16(a, b, c), 0)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    let c: int32x2_t = vdup_n_s32(c);
+    simd_extract(vqrdmlah_s32(a, b, c), 0)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlah_s16(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
+    static_assert_imm3!(LANE);
+    let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlah_s16(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
+    static_assert_imm2!(LANE);
+    let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlahq_s16(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE);
+    let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlahq_s16(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
+    vqrdmlah_s32(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
+    vqrdmlah_s32(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlahq_s32(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlahq_s32(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
+    static_assert_imm2!(LANE);
+    vqrdmlahh_s16(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
+    static_assert_imm3!(LANE);
+    vqrdmlahh_s16(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
+    static_assert_imm1!(LANE);
+    vqrdmlahs_s32(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply accumulate returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqrdmlahs_s32(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlsh.v4i16")]
+        fn vqrdmlsh_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
+    }
+    vqrdmlsh_s16_(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlsh.v8i16")]
+        fn vqrdmlshq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    vqrdmlshq_s16_(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlsh.v2i32")]
+        fn vqrdmlsh_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
+    }
+    vqrdmlsh_s32_(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlsh.v4i32")]
+        fn vqrdmlshq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    vqrdmlshq_s32_(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    let c: int16x4_t = vdup_n_s16(c);
+    simd_extract(vqrdmlsh_s16(a, b, c), 0)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    let c: int32x2_t = vdup_n_s32(c);
+    simd_extract(vqrdmlsh_s32(a, b, c), 0)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    static_assert_imm2!(LANE);
+    let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlsh_s16(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
+    static_assert_imm3!(LANE);
+    let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlsh_s16(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
+    static_assert_imm2!(LANE);
+    let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlshq_s16(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    static_assert_imm3!(LANE);
+    let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlshq_s16(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    static_assert_imm1!(LANE);
+    let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
+    vqrdmlsh_s32(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
+    static_assert_imm2!(LANE);
+    let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
+    vqrdmlsh_s32(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+    static_assert_imm1!(LANE);
+    let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlshq_s32(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    static_assert_imm2!(LANE);
+    let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlshq_s32(a, b, c)
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
+    static_assert_imm2!(LANE);
+    vqrdmlshh_s16(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
+    static_assert_imm3!(LANE);
+    vqrdmlshh_s16(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
+    static_assert_imm1!(LANE);
+    vqrdmlshs_s32(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating rounding doubling multiply subtract returning high half
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
+    static_assert_imm2!(LANE);
+    vqrdmlshs_s32(a, b, simd_extract(c, LANE as u32))
+}
+
+/// Signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")]
+        fn vqrshls_s32_(a: i32, b: i32) -> i32;
+    }
+    vqrshls_s32_(a, b)
+}
+
+/// Signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")]
+        fn vqrshld_s64_(a: i64, b: i64) -> i64;
+    }
+    vqrshld_s64_(a, b)
+}
+
+/// Signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract(vqrshl_s8(a, b), 0)
+}
+
+/// Signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqrshl_s16(a, b), 0)
+}
+
+/// Unsigned signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")]
+        fn vqrshls_u32_(a: u32, b: i32) -> u32;
+    }
+    vqrshls_u32_(a, b)
+}
+
+/// Unsigned signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")]
+        fn vqrshld_u64_(a: u64, b: i64) -> u64;
+    }
+    vqrshld_u64_(a, b)
+}
+
+/// Unsigned signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract(vqrshl_u8(a, b), 0)
+}
+
+/// Unsigned signed saturating rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract(vqrshl_u16(a, b), 0)
+}
+
+/// Signed saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    let a: int16x8_t = vdupq_n_s16(a);
+    simd_extract(vqrshrn_n_s16::<N>(a), 0)
+}
+
+/// Signed saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    let a: int32x4_t = vdupq_n_s32(a);
+    simd_extract(vqrshrn_n_s32::<N>(a), 0)
+}
+
+/// Signed saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    let a: int64x2_t = vdupq_n_s64(a);
+    simd_extract(vqrshrn_n_s64::<N>(a), 0)
+}
+
+/// Signed saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Signed saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Signed saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Unsigned saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    let a: uint16x8_t = vdupq_n_u16(a);
+    simd_extract(vqrshrn_n_u16::<N>(a), 0)
+}
+
+/// Unsigned saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    let a: uint32x4_t = vdupq_n_u32(a);
+    simd_extract(vqrshrn_n_u32::<N>(a), 0)
+}
+
+/// Unsigned saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    let a: uint64x2_t = vdupq_n_u64(a);
+    simd_extract(vqrshrn_n_u64::<N>(a), 0)
+}
+
+/// Unsigned saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Unsigned saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Unsigned saturating rounded shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Signed saturating rounded shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    let a: int16x8_t = vdupq_n_s16(a);
+    simd_extract(vqrshrun_n_s16::<N>(a), 0)
+}
+
+/// Signed saturating rounded shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    let a: int32x4_t = vdupq_n_s32(a);
+    simd_extract(vqrshrun_n_s32::<N>(a), 0)
+}
+
+/// Signed saturating rounded shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    let a: int64x2_t = vdupq_n_s64(a);
+    simd_extract(vqrshrun_n_s64::<N>(a), 0)
+}
+
+/// Signed saturating rounded shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqrshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Signed saturating rounded shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Signed saturating rounded shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Signed saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")]
+        fn vqshld_s64_(a: i64, b: i64) -> i64;
+    }
+    vqshld_s64_(a, b)
+}
+
+/// Signed saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 {
+    let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
+    simd_extract(c, 0)
+}
+
+/// Signed saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 {
+    let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
+    simd_extract(c, 0)
+}
+
+/// Signed saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
+    let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
+    simd_extract(c, 0)
+}
+
+/// Unsigned saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")]
+        fn vqshld_u64_(a: u64, b: i64) -> u64;
+    }
+    vqshld_u64_(a, b)
+}
+
+/// Unsigned saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 {
+    let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
+    simd_extract(c, 0)
+}
+
+/// Unsigned saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 {
+    let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
+    simd_extract(c, 0)
+}
+
+/// Unsigned saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
+    let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
+    simd_extract(c, 0)
+}
+
+/// Signed saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
+    static_assert_imm3!(N);
+    simd_extract(vqshl_n_s8::<N>(vdup_n_s8(a)), 0)
+}
+
+/// Signed saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
+    static_assert_imm4!(N);
+    simd_extract(vqshl_n_s16::<N>(vdup_n_s16(a)), 0)
+}
+
+/// Signed saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
+    static_assert_imm5!(N);
+    simd_extract(vqshl_n_s32::<N>(vdup_n_s32(a)), 0)
+}
+
+/// Signed saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert_imm6!(N);
+    simd_extract(vqshl_n_s64::<N>(vdup_n_s64(a)), 0)
+}
+
+/// Unsigned saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
+    static_assert_imm3!(N);
+    simd_extract(vqshl_n_u8::<N>(vdup_n_u8(a)), 0)
+}
+
+/// Unsigned saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
+    static_assert_imm4!(N);
+    simd_extract(vqshl_n_u16::<N>(vdup_n_u16(a)), 0)
+}
+
+/// Unsigned saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
+    static_assert_imm5!(N);
+    simd_extract(vqshl_n_u32::<N>(vdup_n_u32(a)), 0)
+}
+
+/// Unsigned saturating shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert_imm6!(N);
+    simd_extract(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
+    static_assert_imm3!(N);
+    simd_extract(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0)
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
+    static_assert_imm4!(N);
+    simd_extract(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0)
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
+    static_assert_imm5!(N);
+    simd_extract(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0)
+}
+
+/// Signed saturating shift left unsigned
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
+    static_assert_imm6!(N);
+    simd_extract(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0)
+}
+
+/// Signed saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")]
+        fn vqshrnd_n_s64_(a: i64, n: i32) -> i32;
+    }
+    vqshrnd_n_s64_(a, N)
+}
+
+/// Signed saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_extract(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0)
+}
+
+/// Signed saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_extract(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
+}
+
+/// Signed saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Signed saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Signed saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")]
+        fn vqshrnd_n_u64_(a: u64, n: i32) -> u32;
+    }
+    vqshrnd_n_u64_(a, N)
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_extract(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0)
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_extract(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Unsigned saturating shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_extract(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0)
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_extract(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0)
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_extract(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0)
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vqshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Signed saturating shift right unsigned narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Unsigned saturating accumulate of signed value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqaddb_u8(a: u8, b: i8) -> u8 {
+    simd_extract(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0)
+}
+
+/// Unsigned saturating accumulate of signed value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 {
+    simd_extract(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0)
+}
+
+/// Unsigned saturating accumulate of signed value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.usqadd.i32")]
+        fn vsqadds_u32_(a: u32, b: i32) -> u32;
+    }
+    vsqadds_u32_(a, b)
+}
+
+/// Unsigned saturating accumulate of signed value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.usqadd.i64")]
+        fn vsqaddd_u64_(a: u64, b: i64) -> u64;
+    }
+    vsqaddd_u64_(a, b)
+}
+
+/// Calculates the square root of each lane.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqrt_f32(a: float32x2_t) -> float32x2_t {
+    simd_fsqrt(a)
+}
+
+/// Calculates the square root of each lane.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqrtq_f32(a: float32x4_t) -> float32x4_t {
+    simd_fsqrt(a)
+}
+
+/// Calculates the square root of each lane.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t {
+    simd_fsqrt(a)
+}
+
+/// Calculates the square root of each lane.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
+    simd_fsqrt(a)
+}
+
+/// Reciprocal square-root estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v1f64")]
+        fn vrsqrte_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrsqrte_f64_(a)
+}
+
+/// Reciprocal square-root estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f64")]
+        fn vrsqrteq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrsqrteq_f64_(a)
+}
+
+/// Reciprocal square-root estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.f32")]
+        fn vrsqrtes_f32_(a: f32) -> f32;
+    }
+    vrsqrtes_f32_(a)
+}
+
+/// Reciprocal square-root estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.f64")]
+        fn vrsqrted_f64_(a: f64) -> f64;
+    }
+    vrsqrted_f64_(a)
+}
+
+/// Floating-point reciprocal square root step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v1f64")]
+        fn vrsqrts_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vrsqrts_f64_(a, b)
+}
+
+/// Floating-point reciprocal square root step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v2f64")]
+        fn vrsqrtsq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vrsqrtsq_f64_(a, b)
+}
+
+/// Floating-point reciprocal square root step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.f32")]
+        fn vrsqrtss_f32_(a: f32, b: f32) -> f32;
+    }
+    vrsqrtss_f32_(a, b)
+}
+
+/// Floating-point reciprocal square root step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.f64")]
+        fn vrsqrtsd_f64_(a: f64, b: f64) -> f64;
+    }
+    vrsqrtsd_f64_(a, b)
+}
+
+/// Reciprocal estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecpe))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v1f64")]
+        fn vrecpe_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrecpe_f64_(a)
+}
+
+/// Reciprocal estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecpe))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f64")]
+        fn vrecpeq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrecpeq_f64_(a)
+}
+
+/// Reciprocal estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecpe))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecpes_f32(a: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.f32")]
+        fn vrecpes_f32_(a: f32) -> f32;
+    }
+    vrecpes_f32_(a)
+}
+
+/// Reciprocal estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecpe))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecped_f64(a: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.f64")]
+        fn vrecped_f64_(a: f64) -> f64;
+    }
+    vrecped_f64_(a)
+}
+
+/// Floating-point reciprocal step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v1f64")]
+        fn vrecps_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    vrecps_f64_(a, b)
+}
+
+/// Floating-point reciprocal step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v2f64")]
+        fn vrecpsq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    vrecpsq_f64_(a, b)
+}
+
+/// Floating-point reciprocal step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.f32")]
+        fn vrecpss_f32_(a: f32, b: f32) -> f32;
+    }
+    vrecpss_f32_(a, b)
+}
+
+/// Floating-point reciprocal step
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.f64")]
+        fn vrecpsd_f64_(a: f64, b: f64) -> f64;
+    }
+    vrecpsd_f64_(a, b)
+}
+
+/// Floating-point reciprocal exponent
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecpx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpx.f32")]
+        fn vrecpxs_f32_(a: f32) -> f32;
+    }
+    vrecpxs_f32_(a)
+}
+
+/// Floating-point reciprocal exponent
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecpx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpx.f64")]
+        fn vrecpxd_f64_(a: f64) -> f64;
+    }
+    vrecpxd_f64_(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
+    transmute(a)
+}
+
+/// Vector reinterpret cast operation
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+    transmute(a)
+}
+
+/// Signed rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(srshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")]
+        fn vrshld_s64_(a: i64, b: i64) -> i64;
+    }
+    vrshld_s64_(a, b)
+}
+
+/// Unsigned rounding shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(urshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")]
+        fn vrshld_u64_(a: u64, b: i64) -> u64;
+    }
+    vrshld_u64_(a, b)
+}
+
+/// Signed rounding shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    vrshld_s64(a, -N as i64)
+}
+
+/// Unsigned rounding shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    vrshld_u64(a, -N as i64)
+}
+
+/// Rounding shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Rounding shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Rounding shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Rounding shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Rounding shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Rounding shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Signed rounding shift right and accumulate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(srsra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    let b: i64 = vrshrd_n_s64::<N>(b);
+    a.wrapping_add(b)
+}
+
+/// Ungisned rounding shift right and accumulate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ursra, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    let b: u64 = vrshrd_n_u64::<N>(b);
+    a.wrapping_add(b)
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let x: int8x8_t = vrsubhn_s16(b, c);
+    simd_shuffle16!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let x: int16x4_t = vrsubhn_s32(b, c);
+    simd_shuffle8!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let x: int32x2_t = vrsubhn_s64(b, c);
+    simd_shuffle4!(a, x, [0, 1, 2, 3])
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let x: uint8x8_t = vrsubhn_u16(b, c);
+    simd_shuffle16!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let x: uint16x4_t = vrsubhn_u32(b, c);
+    simd_shuffle8!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Rounding subtract returning high narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let x: uint32x2_t = vrsubhn_u64(b, c);
+    simd_shuffle4!(a, x, [0, 1, 2, 3])
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    simd_insert(b, LANE as u32, a)
+}
+
+/// Insert vector element from another vector element
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(b, LANE as u32, a)
+}
+
+/// Signed Shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
+    transmute(vshl_s64(transmute(a), transmute(b)))
+}
+
+/// Unsigned Shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ushl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
+    transmute(vshl_u64(transmute(a), transmute(b)))
+}
+
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 0 && N <= 8);
+    let b: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vshll_n_s8::<N>(b)
+}
+
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 0 && N <= 16);
+    let b: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    vshll_n_s16::<N>(b)
+}
+
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
+    static_assert!(N : i32 where N >= 0 && N <= 32);
+    let b: int32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    vshll_n_s32::<N>(b)
+}
+
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 0 && N <= 8);
+    let b: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vshll_n_u8::<N>(b)
+}
+
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 0 && N <= 16);
+    let b: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]);
+    vshll_n_u16::<N>(b)
+}
+
+/// Signed shift left long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
+    static_assert!(N : i32 where N >= 0 && N <= 32);
+    let b: uint32x2_t = simd_shuffle2!(a, a, [2, 3]);
+    vshll_n_u32::<N>(b)
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3])
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N : i32 where N >= 1 && N <= 8);
+    simd_shuffle16!(a, vshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N : i32 where N >= 1 && N <= 16);
+    simd_shuffle8!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Shift right narrow
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N : i32 where N >= 1 && N <= 32);
+    simd_shuffle4!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3])
+}
+
+/// SM3PARTW1
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw1))]
+pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3partw1")]
+        fn vsm3partw1q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    vsm3partw1q_u32_(a, b, c)
+}
+
+/// SM3PARTW2
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw2))]
+pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3partw2")]
+        fn vsm3partw2q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    vsm3partw2q_u32_(a, b, c)
+}
+
+/// SM3SS1
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3ss1))]
+pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3ss1")]
+        fn vsm3ss1q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    vsm3ss1q_u32_(a, b, c)
+}
+
+/// SM4 key
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4ekey))]
+pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm4ekey")]
+        fn vsm4ekeyq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    vsm4ekeyq_u32_(a, b)
+}
+
+/// SM4 encode
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4e))]
+pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm4e")]
+        fn vsm4eq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    vsm4eq_u32_(a, b)
+}
+
+/// Rotate and exclusive OR
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(rax1))]
+pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.rax1")]
+        fn vrax1q_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
+    }
+    vrax1q_u64_(a, b)
+}
+
+/// SHA512 hash update part 1
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h))]
+pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512h")]
+        fn vsha512hq_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    vsha512hq_u64_(a, b, c)
+}
+
+/// SHA512 hash update part 2
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h2))]
+pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512h2")]
+        fn vsha512h2q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    vsha512h2q_u64_(a, b, c)
+}
+
+/// SHA512 schedule update 0
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su0))]
+pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512su0")]
+        fn vsha512su0q_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
+    }
+    vsha512su0q_u64_(a, b)
+}
+
+/// SHA512 schedule update 1
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su1))]
+pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sha512su1")]
+        fn vsha512su1q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    vsha512su1q_u64_(a, b, c)
+}
+
+/// Floating-point round to 32-bit integer, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint32x))]
+pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v2f32")]
+        fn vrnd32x_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd32x_f32_(a)
+}
+
+/// Floating-point round to 32-bit integer, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint32x))]
+pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v4f32")]
+        fn vrnd32xq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrnd32xq_f32_(a)
+}
+
+/// Floating-point round to 32-bit integer toward zero
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint32z))]
+pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v2f32")]
+        fn vrnd32z_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd32z_f32_(a)
+}
+
+/// Floating-point round to 32-bit integer toward zero
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint32z))]
+pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v4f32")]
+        fn vrnd32zq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrnd32zq_f32_(a)
+}
+
+/// Floating-point round to 64-bit integer, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint64x))]
+pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v2f32")]
+        fn vrnd64x_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd64x_f32_(a)
+}
+
+/// Floating-point round to 64-bit integer, using current rounding mode
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint64x))]
+pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v4f32")]
+        fn vrnd64xq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrnd64xq_f32_(a)
+}
+
+/// Floating-point round to 64-bit integer toward zero
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint64z))]
+pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v2f32")]
+        fn vrnd64z_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrnd64z_f32_(a)
+}
+
+/// Floating-point round to 64-bit integer toward zero
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[cfg_attr(test, assert_instr(frint64z))]
+pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v4f32")]
+        fn vrnd64zq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrnd64zq_f32_(a)
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 2, 6])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(trn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [1, 5, 3, 7])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Transpose vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [0, 4, 1, 5])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [2, 6, 3, 7])
+}
+
+/// Zip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [0, 2, 4, 6])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [0, 2])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uzp2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle4!(a, b, [1, 3, 5, 7])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unzip vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(zip2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle2!(a, b, [1, 3])
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabal))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let d: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let e: uint8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: uint8x8_t = vabd_u8(d, e);
+    simd_add(a, simd_cast(f))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabal))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let d: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let e: uint16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
+    let f: uint16x4_t = vabd_u16(d, e);
+    simd_add(a, simd_cast(f))
+}
+
+/// Unsigned Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uabal))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let d: uint32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let e: uint32x2_t = simd_shuffle2!(c, c, [2, 3]);
+    let f: uint32x2_t = vabd_u32(d, e);
+    simd_add(a, simd_cast(f))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabal))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let d: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let e: int8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: int8x8_t = vabd_s8(d, e);
+    let f: uint8x8_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabal))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let d: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]);
+    let e: int16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]);
+    let f: int16x4_t = vabd_s16(d, e);
+    let f: uint16x4_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
+}
+
+/// Signed Absolute difference and Accumulate Long
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sabal))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let d: int32x2_t = simd_shuffle2!(b, b, [2, 3]);
+    let e: int32x2_t = simd_shuffle2!(c, c, [2, 3]);
+    let f: int32x2_t = vabd_s32(d, e);
+    let f: uint32x2_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v1i64")]
+        fn vqabs_s64_(a: int64x1_t) -> int64x1_t;
+    }
+    vqabs_s64_(a)
+}
+
+/// Singned saturating Absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i64")]
+        fn vqabsq_s64_(a: int64x2_t) -> int64x2_t;
+    }
+    vqabsq_s64_(a)
+}
+
+/// Signed saturating absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqabsb_s8(a: i8) -> i8 {
+    simd_extract(vqabs_s8(vdup_n_s8(a)), 0)
+}
+
+/// Signed saturating absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqabsh_s16(a: i16) -> i16 {
+    simd_extract(vqabs_s16(vdup_n_s16(a)), 0)
+}
+
+/// Signed saturating absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqabss_s32(a: i32) -> i32 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.i32")]
+        fn vqabss_s32_(a: i32) -> i32;
+    }
+    vqabss_s32_(a)
+}
+
+/// Signed saturating absolute value
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqabs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqabsd_s64(a: i64) -> i64 {
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.i64")]
+        fn vqabsd_s64_(a: i64) -> i64;
+    }
+    vqabsd_s64_(a)
+}
+
+/// Shift left and insert
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N : i32 where N >= 0 && N <= 63);
+    transmute(vsli_n_s64::<N>(transmute(a), transmute(b)))
+}
+
+/// Shift left and insert
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N : i32 where N >= 0 && N <= 63);
+    transmute(vsli_n_u64::<N>(transmute(a), transmute(b)))
+}
+
+/// Shift right and insert
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    transmute(vsri_n_s64::<N>(transmute(a), transmute(b)))
+}
+
+/// Shift right and insert
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    transmute(vsri_n_u64::<N>(transmute(a), transmute(b)))
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::core_arch::simd::*;
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_veor3q_s8() {
+        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let c: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let r: i8x16 = transmute(veor3q_s8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_veor3q_s16() {
+        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let c: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: i16x8 = transmute(veor3q_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_veor3q_s32() {
+        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
+        let c: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: i32x4 = transmute(veor3q_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_veor3q_s64() {
+        let a: i64x2 = i64x2::new(0x00, 0x01);
+        let b: i64x2 = i64x2::new(0x00, 0x00);
+        let c: i64x2 = i64x2::new(0x00, 0x00);
+        let e: i64x2 = i64x2::new(0x00, 0x01);
+        let r: i64x2 = transmute(veor3q_s64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_veor3q_u8() {
+        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let c: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
+        let r: u8x16 = transmute(veor3q_u8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_veor3q_u16() {
+        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let c: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        let r: u16x8 = transmute(veor3q_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_veor3q_u32() {
+        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
+        let c: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
+        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
+        let r: u32x4 = transmute(veor3q_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_veor3q_u64() {
+        let a: u64x2 = u64x2::new(0x00, 0x01);
+        let b: u64x2 = u64x2::new(0x00, 0x00);
+        let c: u64x2 = u64x2::new(0x00, 0x00);
+        let e: u64x2 = u64x2::new(0x00, 0x01);
+        let r: u64x2 = transmute(veor3q_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabd_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 9.0;
+        let e: f64 = 8.0;
+        let r: f64 = transmute(vabd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(9.0, 3.0);
+        let e: f64x2 = f64x2::new(8.0, 1.0);
+        let r: f64x2 = transmute(vabdq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabds_f32() {
+        let a: f32 = 1.0;
+        let b: f32 = 9.0;
+        let e: f32 = 8.0;
+        let r: f32 = transmute(vabds_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdd_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 9.0;
+        let e: f64 = 8.0;
+        let r: f64 = transmute(vabdd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
+        let e: u16x8 = u16x8::new(1, 0, 1, 2, 3, 4, 5, 6);
+        let r: u16x8 = transmute(vabdl_high_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 8, 9, 11, 12);
+        let b: u16x8 = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
+        let e: u32x4 = u32x4::new(2, 1, 1, 2);
+        let r: u32x4 = transmute(vabdl_high_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(10, 10, 10, 10);
+        let e: u64x2 = u64x2::new(7, 6);
+        let r: u64x2 = transmute(vabdl_high_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
+        let e: i16x8 = i16x8::new(1, 0, 1, 2, 3, 4, 5, 6);
+        let r: i16x8 = transmute(vabdl_high_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
+        let b: i16x8 = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
+        let e: i32x4 = i32x4::new(1, 0, 1, 2);
+        let r: i32x4 = transmute(vabdl_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabdl_high_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x4 = i32x4::new(10, 10, 10, 10);
+        let e: i64x2 = i64x2::new(7, 6);
+        let r: i64x2 = transmute(vabdl_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_u64() {
+        let a: u64x2 = u64x2::new(0, 0x01);
+        let b: u64x2 = u64x2::new(0, 0x01);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: u64x2 = u64x2::new(0, 0);
+        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x01);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x01);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_p64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_p64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x01);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x01);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+
+        let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_f64() {
+        let a: f64 = 1.2;
+        let b: f64 = 1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_f64() {
+        let a: f64x2 = f64x2::new(1.2, 3.4);
+        let b: f64x2 = f64x2::new(1.2, 3.4);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqd_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqd_u64() {
+        let a: u64 = 1;
+        let b: u64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqs_f32() {
+        let a: f32 = 1.;
+        let b: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vceqs_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqd_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_s8() {
+        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vceqz_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_s8() {
+        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
+        let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vceqzq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_s16() {
+        let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
+        let e: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
+        let r: u16x4 = transmute(vceqz_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_s16() {
+        let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vceqzq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, 0x00);
+        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vceqz_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
+        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
+        let r: u32x4 = transmute(vceqzq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vceqz_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqzq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_p8() {
+        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vceqz_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_p8() {
+        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
+        let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vceqzq_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_p64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vceqz_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_p64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqzq_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_u8() {
+        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vceqz_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_u8() {
+        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vceqzq_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_u16() {
+        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
+        let r: u16x4 = transmute(vceqz_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_u16() {
+        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vceqzq_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_u32() {
+        let a: u32x2 = u32x2::new(0, 0x00);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vceqz_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_u32() {
+        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
+        let r: u32x4 = transmute(vceqzq_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceqz_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_u64() {
+        let a: u64x2 = u64x2::new(0, 0x00);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vceqzq_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_f32() {
+        let a: f32x2 = f32x2::new(0.0, 1.2);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let r: u32x2 = transmute(vceqz_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_f32() {
+        let a: f32x4 = f32x4::new(0.0, 1.2, 3.4, 5.6);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
+        let r: u32x4 = transmute(vceqzq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqz_f64() {
+        let a: f64 = 0.0;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vceqz_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzq_f64() {
+        let a: f64x2 = f64x2::new(0.0, 1.2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vceqzq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzd_s64() {
+        let a: i64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqzd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzd_u64() {
+        let a: u64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqzd_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzs_f32() {
+        let a: f32 = 1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vceqzs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqzd_f64() {
+        let a: f64 = 1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vceqzd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtst_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vtst_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vtstq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtst_p64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let b: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vtst_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstq_p64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let b: i64x2 = i64x2::new(-9223372036854775808, 0x00);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vtstq_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtst_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vtst_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstq_u64() {
+        let a: u64x2 = u64x2::new(0, 0x00);
+        let b: u64x2 = u64x2::new(0, 0x00);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vtstq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstd_s64() {
+        let a: i64 = 0;
+        let b: i64 = 0;
+        let e: u64 = 0;
+        let r: u64 = transmute(vtstd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtstd_u64() {
+        let a: u64 = 0;
+        let b: u64 = 0;
+        let e: u64 = 0;
+        let r: u64 = transmute(vtstd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadds_s32() {
+        let a: i32 = 1;
+        let b: u32 = 1;
+        let e: i32 = 2;
+        let r: i32 = transmute(vuqadds_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddd_s64() {
+        let a: i64 = 1;
+        let b: u64 = 1;
+        let e: i64 = 2;
+        let r: i64 = transmute(vuqaddd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddb_s8() {
+        let a: i8 = 1;
+        let b: u8 = 2;
+        let e: i8 = 3;
+        let r: i8 = transmute(vuqaddb_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddh_s16() {
+        let a: i16 = 1;
+        let b: u16 = 2;
+        let e: i16 = 3;
+        let r: i16 = transmute(vuqaddh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabs_f64() {
+        let a: f64 = -0.1;
+        let e: f64 = 0.1;
+        let r: f64 = transmute(vabs_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabsq_f64() {
+        let a: f64x2 = f64x2::new(-0.1, -2.2);
+        let e: f64x2 = f64x2::new(0.1, 2.2);
+        let r: f64x2 = transmute(vabsq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x1 = i64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcgt_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgtq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcgt_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgtq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_f64() {
+        let a: f64 = 1.2;
+        let b: f64 = 0.1;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcgt_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_f64() {
+        let a: f64x2 = f64x2::new(1.2, 2.3);
+        let b: f64x2 = f64x2::new(0.1, 1.2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgtq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtd_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtd_u64() {
+        let a: u64 = 1;
+        let b: u64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgts_f32() {
+        let a: f32 = 1.;
+        let b: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcgts_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtd_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclt_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let b: i64x1 = i64x1::new(1);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclt_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let b: i64x2 = i64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcltq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclt_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(1);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclt_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_u64() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcltq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclt_f64() {
+        let a: f64 = 0.1;
+        let b: f64 = 1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclt_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_f64() {
+        let a: f64x2 = f64x2::new(0.1, 1.2);
+        let b: f64x2 = f64x2::new(1.2, 2.3);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcltq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltd_s64() {
+        let a: i64 = 2;
+        let b: i64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltd_u64() {
+        let a: u64 = 2;
+        let b: u64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclts_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vclts_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltd_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let b: i64x1 = i64x1::new(1);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcle_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let b: i64x2 = i64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcleq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcged_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcged_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcged_u64() {
+        let a: u64 = 1;
+        let b: u64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcged_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcges_f32() {
+        let a: f32 = 1.;
+        let b: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcges_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcged_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcged_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(1);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcle_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_u64() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let b: u64x2 = u64x2::new(1, 2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcleq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_f64() {
+        let a: f64 = 0.1;
+        let b: f64 = 1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcle_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_f64() {
+        let a: f64x2 = f64x2::new(0.1, 1.2);
+        let b: f64x2 = f64x2::new(1.2, 2.3);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcleq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcled_s64() {
+        let a: i64 = 2;
+        let b: i64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcled_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcled_u64() {
+        let a: u64 = 2;
+        let b: u64 = 1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcled_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcles_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcles_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcled_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcled_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcge_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x1 = i64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcge_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgeq_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgeq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcge_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let b: u64x1 = u64x1::new(0);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcge_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgeq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgeq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcge_f64() {
+        let a: f64 = 1.2;
+        let b: f64 = 0.1;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcge_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgeq_f64() {
+        let a: f64x2 = f64x2::new(1.2, 2.3);
+        let b: f64x2 = f64x2::new(0.1, 1.2);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgeq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vcgez_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
+        let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcgezq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF);
+        let r: u16x4 = transmute(vcgez_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcgezq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vcgez_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgezq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcgez_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vcgezq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcgez_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgezq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgez_f64() {
+        let a: f64 = -1.2;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcgez_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcgezq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezd_s64() {
+        let a: i64 = -1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgezd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezs_f32() {
+        let a: f32 = -1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcgezs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgezd_f64() {
+        let a: f64 = -1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgezd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u8x8 = u8x8::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x8 = transmute(vcgtz_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
+        let e: u8x16 = u8x16::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vcgtzq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: u16x4 = u16x4::new(0, 0, 0, 0xFF_FF);
+        let r: u16x4 = transmute(vcgtz_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u16x8 = u16x8::new(0, 0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vcgtzq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vcgtz_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgtzq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcgtz_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vcgtzq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vcgtz_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vcgtzq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtz_f64() {
+        let a: f64 = -1.2;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcgtz_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vcgtzq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzd_s64() {
+        let a: i64 = -1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtzd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzs_f32() {
+        let a: f32 = -1.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcgtzs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtzd_f64() {
+        let a: f64 = -1.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcgtzd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vclez_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vclezq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0);
+        let r: u16x4 = transmute(vclez_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vclezq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vclez_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
+        let r: u32x4 = transmute(vclezq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclez_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vclezq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vclez_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
+        let r: u32x4 = transmute(vclezq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclez_f64() {
+        let a: f64 = -1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vclez_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vclezq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezd_s64() {
+        let a: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vclezd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezs_f32() {
+        let a: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vclezs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclezd_f64() {
+        let a: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vclezd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltz_s8() {
+        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vcltz_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzq_s8() {
+        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vcltzq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltz_s16() {
+        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
+        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
+        let r: u16x4 = transmute(vcltz_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzq_s16() {
+        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vcltzq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltz_s32() {
+        let a: i32x2 = i32x2::new(-2147483648, -1);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x2 = transmute(vcltz_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzq_s32() {
+        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
+        let r: u32x4 = transmute(vcltzq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltz_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcltz_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcltzq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltz_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 0.0);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
+        let r: u32x2 = transmute(vcltz_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzq_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
+        let r: u32x4 = transmute(vcltzq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltz_f64() {
+        let a: f64 = -1.2;
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcltz_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: u64x2 = transmute(vcltzq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzd_s64() {
+        let a: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltzd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzs_f32() {
+        let a: f32 = 2.;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcltzs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltzd_f64() {
+        let a: f64 = 2.;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcltzd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcagt_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64x1 = u64x1::new(!0);
+        let r: u64x1 = transmute(vcagt_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcagtq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let b: f64x2 = f64x2::new(-1.1, 0.0);
+        let e: u64x2 = u64x2::new(!0, 0);
+        let r: u64x2 = transmute(vcagtq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcagts_f32() {
+        let a: f32 = -1.2;
+        let b: f32 = -1.1;
+        let e: u32 = !0;
+        let r: u32 = transmute(vcagts_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcagtd_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64 = !0;
+        let r: u64 = transmute(vcagtd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcage_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64x1 = u64x1::new(!0);
+        let r: u64x1 = transmute(vcage_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcageq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let b: f64x2 = f64x2::new(-1.1, 0.0);
+        let e: u64x2 = u64x2::new(!0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcageq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcages_f32() {
+        let a: f32 = -1.2;
+        let b: f32 = -1.1;
+        let e: u32 = !0;
+        let r: u32 = transmute(vcages_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaged_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64 = !0;
+        let r: u64 = transmute(vcaged_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcalt_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcalt_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaltq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let b: f64x2 = f64x2::new(-1.1, 0.0);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vcaltq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcalts_f32() {
+        let a: f32 = -1.2;
+        let b: f32 = -1.1;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcalts_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaltd_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcaltd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcale_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vcale_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaleq_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 0.0);
+        let b: f64x2 = f64x2::new(-1.1, 0.0);
+        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcaleq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcales_f32() {
+        let a: f32 = -1.2;
+        let b: f32 = -1.1;
+        let e: u32 = 0;
+        let r: u32 = transmute(vcales_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaled_f64() {
+        let a: f64 = -1.2;
+        let b: f64 = -1.1;
+        let e: u64 = 0;
+        let r: u64 = transmute(vcaled_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
+        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vcopy_lane_s8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vcopyq_laneq_s8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
+        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
+        let r: i16x4 = transmute(vcopy_lane_s16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
+        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vcopyq_laneq_s16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
+        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2);
+        let r: i32x2 = transmute(vcopy_lane_s32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
+        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4);
+        let r: i32x4 = transmute(vcopyq_laneq_s32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2);
+        let r: i64x2 = transmute(vcopyq_laneq_s64::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
+        let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8);
+        let r: u8x8 = transmute(vcopy_lane_u8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: u8x16 = transmute(vcopyq_laneq_u8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
+        let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4);
+        let r: u16x4 = transmute(vcopy_lane_u16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: u16x8 = transmute(vcopyq_laneq_u16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2);
+        let r: u32x2 = transmute(vcopy_lane_u32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4);
+        let r: u32x4 = transmute(vcopyq_laneq_u32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 2);
+        let r: u64x2 = transmute(vcopyq_laneq_u64::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_p8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
+        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vcopy_lane_p8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_p8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vcopyq_laneq_p8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_p16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
+        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
+        let r: i16x4 = transmute(vcopy_lane_p16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_p16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
+        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vcopyq_laneq_p16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_p64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2);
+        let r: i64x2 = transmute(vcopyq_laneq_p64::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(0., 0.5);
+        let e: f32x2 = f32x2::new(0.5, 2.);
+        let r: f32x2 = transmute(vcopy_lane_f32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(0., 0.5, 0., 0.);
+        let e: f32x4 = f32x4::new(0.5, 2., 3., 4.);
+        let r: f32x4 = transmute(vcopyq_laneq_f32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(0., 0.5);
+        let e: f64x2 = f64x2::new(0.5, 2.);
+        let r: f64x2 = transmute(vcopyq_laneq_f64::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_s8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vcopy_laneq_s8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
+        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
+        let r: i16x4 = transmute(vcopy_laneq_s16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
+        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2);
+        let r: i32x2 = transmute(vcopy_laneq_s32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_u8() {
+        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8);
+        let r: u8x8 = transmute(vcopy_laneq_u8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
+        let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4);
+        let r: u16x4 = transmute(vcopy_laneq_u16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
+        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2);
+        let r: u32x2 = transmute(vcopy_laneq_u32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_p8() {
+        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vcopy_laneq_p8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_p16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
+        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
+        let r: i16x4 = transmute(vcopy_laneq_p16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x4 = f32x4::new(0., 0.5, 0., 0.);
+        let e: f32x2 = f32x2::new(0.5, 2.);
+        let r: f32x2 = transmute(vcopy_laneq_f32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
+        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vcopyq_lane_s8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
+        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vcopyq_lane_s16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
+        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4);
+        let r: i32x4 = transmute(vcopyq_lane_s32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
+        let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: u8x16 = transmute(vcopyq_lane_u8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
+        let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: u16x8 = transmute(vcopyq_lane_u16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4);
+        let r: u32x4 = transmute(vcopyq_lane_u32::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_p8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
+        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vcopyq_lane_p8::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_p16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
+        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vcopyq_lane_p16::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x2 = transmute(vcopyq_lane_s64::<1, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x2 = u64x2::new(1, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x2 = transmute(vcopyq_lane_u64::<1, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_p64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x2 = transmute(vcopyq_lane_p64::<1, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x2 = f32x2::new(0.5, 0.);
+        let e: f32x4 = f32x4::new(1., 0.5, 3., 4.);
+        let r: f32x4 = transmute(vcopyq_lane_f32::<1, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopyq_lane_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64 = 0.5;
+        let e: f64x2 = f64x2::new(1., 0.5);
+        let r: f64x2 = transmute(vcopyq_lane_f64::<1, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcreate_f64() {
+        let a: u64 = 0;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vcreate_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_f64_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vcvt_f64_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_f64_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let e: f64x2 = f64x2::new(1., 2.);
+        let r: f64x2 = transmute(vcvtq_f64_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_f64_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vcvt_f64_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_f64_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let e: f64x2 = f64x2::new(1., 2.);
+        let r: f64x2 = transmute(vcvtq_f64_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_f64_f32() {
+        let a: f32x2 = f32x2::new(-1.2, 1.2);
+        let e: f64x2 = f64x2::new(-1.2f32 as f64, 1.2f32 as f64);
+        let r: f64x2 = transmute(vcvt_f64_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_high_f64_f32() {
+        let a: f32x4 = f32x4::new(-1.2, 1.2, 2.3, 3.4);
+        let e: f64x2 = f64x2::new(2.3f32 as f64, 3.4f32 as f64);
+        let r: f64x2 = transmute(vcvt_high_f64_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_f32_f64() {
+        let a: f64x2 = f64x2::new(-1.2, 1.2);
+        let e: f32x2 = f32x2::new(-1.2f64 as f32, 1.2f64 as f32);
+        let r: f32x2 = transmute(vcvt_f32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_high_f32_f64() {
+        let a: f32x2 = f32x2::new(-1.2, 1.2);
+        let b: f64x2 = f64x2::new(-2.3, 3.4);
+        let e: f32x4 = f32x4::new(-1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32);
+        let r: f32x4 = transmute(vcvt_high_f32_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtx_f32_f64() {
+        let a: f64x2 = f64x2::new(-1.0, 2.0);
+        let e: f32x2 = f32x2::new(-1.0, 2.0);
+        let r: f32x2 = transmute(vcvtx_f32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtxd_f32_f64() {
+        let a: f64 = -1.0;
+        let e: f32 = -1.0;
+        let r: f32 = transmute(vcvtxd_f32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtx_high_f32_f64() {
+        let a: f32x2 = f32x2::new(-1.0, 2.0);
+        let b: f64x2 = f64x2::new(-3.0, 4.0);
+        let e: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0);
+        let r: f32x4 = transmute(vcvtx_high_f32_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_n_f64_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: f64 = 0.25;
+        let r: f64 = transmute(vcvt_n_f64_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_n_f64_s64() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let e: f64x2 = f64x2::new(0.25, 0.5);
+        let r: f64x2 = transmute(vcvtq_n_f64_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_n_f32_s32() {
+        let a: i32 = 1;
+        let e: f32 = 0.25;
+        let r: f32 = transmute(vcvts_n_f32_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtd_n_f64_s64() {
+        let a: i64 = 1;
+        let e: f64 = 0.25;
+        let r: f64 = transmute(vcvtd_n_f64_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_n_f64_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let e: f64 = 0.25;
+        let r: f64 = transmute(vcvt_n_f64_u64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_n_f64_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let e: f64x2 = f64x2::new(0.25, 0.5);
+        let r: f64x2 = transmute(vcvtq_n_f64_u64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_n_f32_u32() {
+        let a: u32 = 1;
+        let e: f32 = 0.25;
+        let r: f32 = transmute(vcvts_n_f32_u32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtd_n_f64_u64() {
+        let a: u64 = 1;
+        let e: f64 = 0.25;
+        let r: f64 = transmute(vcvtd_n_f64_u64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_n_s64_f64() {
+        let a: f64 = 0.25;
+        let e: i64x1 = i64x1::new(1);
+        let r: i64x1 = transmute(vcvt_n_s64_f64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_n_s64_f64() {
+        let a: f64x2 = f64x2::new(0.25, 0.5);
+        let e: i64x2 = i64x2::new(1, 2);
+        let r: i64x2 = transmute(vcvtq_n_s64_f64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_n_s32_f32() {
+        let a: f32 = 0.25;
+        let e: i32 = 1;
+        let r: i32 = transmute(vcvts_n_s32_f32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtd_n_s64_f64() {
+        let a: f64 = 0.25;
+        let e: i64 = 1;
+        let r: i64 = transmute(vcvtd_n_s64_f64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_n_u64_f64() {
+        let a: f64 = 0.25;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvt_n_u64_f64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_n_u64_f64() {
+        let a: f64x2 = f64x2::new(0.25, 0.5);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtq_n_u64_f64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_n_u32_f32() {
+        let a: f32 = 0.25;
+        let e: u32 = 1;
+        let r: u32 = transmute(vcvts_n_u32_f32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtd_n_u64_f64() {
+        let a: f64 = 0.25;
+        let e: u64 = 1;
+        let r: u64 = transmute(vcvtd_n_u64_f64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_f32_s32() {
+        let a: i32 = 1;
+        let e: f32 = 1.;
+        let r: f32 = transmute(vcvts_f32_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtd_f64_s64() {
+        let a: i64 = 1;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vcvtd_f64_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_f32_u32() {
+        let a: u32 = 1;
+        let e: f32 = 1.;
+        let r: f32 = transmute(vcvts_f32_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtd_f64_u64() {
+        let a: u64 = 1;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vcvtd_f64_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_s32_f32() {
+        let a: f32 = 1.;
+        let e: i32 = 1;
+        let r: i32 = transmute(vcvts_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtd_s64_f64() {
+        let a: f64 = 1.;
+        let e: i64 = 1;
+        let r: i64 = transmute(vcvtd_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvts_u32_f32() {
+        let a: f32 = 1.;
+        let e: u32 = 1;
+        let r: u32 = transmute(vcvts_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtd_u64_f64() {
+        let a: f64 = 1.;
+        let e: u64 = 1;
+        let r: u64 = transmute(vcvtd_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64x1 = i64x1::new(-1);
+        let r: i64x1 = transmute(vcvt_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.1, 2.1);
+        let e: i64x2 = i64x2::new(-1, 2);
+        let r: i64x2 = transmute(vcvtq_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.1, 2.1);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvta_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.1, 2.1);
+        let e: i32x2 = i32x2::new(-1, 2);
+        let r: i32x2 = transmute(vcvta_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtaq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-1, 2, -3, 4);
+        let r: i32x4 = transmute(vcvtaq_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvta_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64x1 = i64x1::new(-1);
+        let r: i64x1 = transmute(vcvta_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtaq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.1, 2.1);
+        let e: i64x2 = i64x2::new(-1, 2);
+        let r: i64x2 = transmute(vcvtaq_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtas_s32_f32() {
+        let a: f32 = 2.9;
+        let e: i32 = 3;
+        let r: i32 = transmute(vcvtas_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtad_s64_f64() {
+        let a: f64 = 2.9;
+        let e: i64 = 3;
+        let r: i64 = transmute(vcvtad_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtas_u32_f32() {
+        let a: f32 = 2.9;
+        let e: u32 = 3;
+        let r: u32 = transmute(vcvtas_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtad_u64_f64() {
+        let a: f64 = 2.9;
+        let e: u64 = 3;
+        let r: u64 = transmute(vcvtad_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtn_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 2.1);
+        let e: i32x2 = i32x2::new(-2, 2);
+        let r: i32x2 = transmute(vcvtn_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtnq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-2, 2, -3, 4);
+        let r: i32x4 = transmute(vcvtnq_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtn_s64_f64() {
+        let a: f64 = -1.5;
+        let e: i64x1 = i64x1::new(-2);
+        let r: i64x1 = transmute(vcvtn_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtnq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 2.1);
+        let e: i64x2 = i64x2::new(-2, 2);
+        let r: i64x2 = transmute(vcvtnq_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtns_s32_f32() {
+        let a: f32 = -1.5;
+        let e: i32 = -2;
+        let r: i32 = transmute(vcvtns_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtnd_s64_f64() {
+        let a: f64 = -1.5;
+        let e: i64 = -2;
+        let r: i64 = transmute(vcvtnd_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtm_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.1, 2.1);
+        let e: i32x2 = i32x2::new(-2, 2);
+        let r: i32x2 = transmute(vcvtm_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtmq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-2, 2, -3, 3);
+        let r: i32x4 = transmute(vcvtmq_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtm_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64x1 = i64x1::new(-2);
+        let r: i64x1 = transmute(vcvtm_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtmq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.1, 2.1);
+        let e: i64x2 = i64x2::new(-2, 2);
+        let r: i64x2 = transmute(vcvtmq_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtms_s32_f32() {
+        let a: f32 = -1.1;
+        let e: i32 = -2;
+        let r: i32 = transmute(vcvtms_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtmd_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64 = -2;
+        let r: i64 = transmute(vcvtmd_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtp_s32_f32() {
+        let a: f32x2 = f32x2::new(-1.1, 2.1);
+        let e: i32x2 = i32x2::new(-1, 3);
+        let r: i32x2 = transmute(vcvtp_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtpq_s32_f32() {
+        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
+        let e: i32x4 = i32x4::new(-1, 3, -2, 4);
+        let r: i32x4 = transmute(vcvtpq_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtp_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64x1 = i64x1::new(-1);
+        let r: i64x1 = transmute(vcvtp_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtpq_s64_f64() {
+        let a: f64x2 = f64x2::new(-1.1, 2.1);
+        let e: i64x2 = i64x2::new(-1, 3);
+        let r: i64x2 = transmute(vcvtpq_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtps_s32_f32() {
+        let a: f32 = -1.1;
+        let e: i32 = -1;
+        let r: i32 = transmute(vcvtps_s32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtpd_s64_f64() {
+        let a: f64 = -1.1;
+        let e: i64 = -1;
+        let r: i64 = transmute(vcvtpd_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvta_u32_f32() {
+        let a: f32x2 = f32x2::new(1.1, 2.1);
+        let e: u32x2 = u32x2::new(1, 2);
+        let r: u32x2 = transmute(vcvta_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtaq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(1, 2, 3, 4);
+        let r: u32x4 = transmute(vcvtaq_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvta_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvta_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtaq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.1, 2.1);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtaq_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtn_u32_f32() {
+        let a: f32x2 = f32x2::new(1.5, 2.1);
+        let e: u32x2 = u32x2::new(2, 2);
+        let r: u32x2 = transmute(vcvtn_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtnq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.5, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(2, 2, 3, 4);
+        let r: u32x4 = transmute(vcvtnq_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtn_u64_f64() {
+        let a: f64 = 1.5;
+        let e: u64x1 = u64x1::new(2);
+        let r: u64x1 = transmute(vcvtn_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtnq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.5, 2.1);
+        let e: u64x2 = u64x2::new(2, 2);
+        let r: u64x2 = transmute(vcvtnq_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtns_u32_f32() {
+        let a: f32 = 1.5;
+        let e: u32 = 2;
+        let r: u32 = transmute(vcvtns_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtnd_u64_f64() {
+        let a: f64 = 1.5;
+        let e: u64 = 2;
+        let r: u64 = transmute(vcvtnd_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtm_u32_f32() {
+        let a: f32x2 = f32x2::new(1.1, 2.1);
+        let e: u32x2 = u32x2::new(1, 2);
+        let r: u32x2 = transmute(vcvtm_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtmq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(1, 2, 2, 3);
+        let r: u32x4 = transmute(vcvtmq_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtm_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvtm_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtmq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.1, 2.1);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtmq_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtms_u32_f32() {
+        let a: f32 = 1.1;
+        let e: u32 = 1;
+        let r: u32 = transmute(vcvtms_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtmd_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64 = 1;
+        let r: u64 = transmute(vcvtmd_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtp_u32_f32() {
+        let a: f32x2 = f32x2::new(1.1, 2.1);
+        let e: u32x2 = u32x2::new(2, 3);
+        let r: u32x2 = transmute(vcvtp_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtpq_u32_f32() {
+        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
+        let e: u32x4 = u32x4::new(2, 3, 3, 4);
+        let r: u32x4 = transmute(vcvtpq_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtp_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64x1 = u64x1::new(2);
+        let r: u64x1 = transmute(vcvtp_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtpq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.1, 2.1);
+        let e: u64x2 = u64x2::new(2, 3);
+        let r: u64x2 = transmute(vcvtpq_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtps_u32_f32() {
+        let a: f32 = 1.1;
+        let e: u32 = 2;
+        let r: u32 = transmute(vcvtps_u32_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtpd_u64_f64() {
+        let a: f64 = 1.1;
+        let e: u64 = 2;
+        let r: u64 = transmute(vcvtpd_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupq_laneq_p64() {
+        let a: i64x2 = i64x2::new(1, 1);
+        let e: i64x2 = i64x2::new(1, 1);
+        let r: i64x2 = transmute(vdupq_laneq_p64::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupq_lane_p64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: i64x2 = i64x2::new(1, 1);
+        let r: i64x2 = transmute(vdupq_lane_p64::<0>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupq_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(1., 1.);
+        let r: f64x2 = transmute(vdupq_laneq_f64::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupq_lane_f64() {
+        let a: f64 = 1.;
+        let e: f64x2 = f64x2::new(1., 1.);
+        let r: f64x2 = transmute(vdupq_lane_f64::<0>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdup_lane_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vdup_lane_p64::<0>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdup_lane_f64() {
+        let a: f64 = 0.;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vdup_lane_f64::<0>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdup_laneq_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i64x1 = i64x1::new(1);
+        let r: i64x1 = transmute(vdup_laneq_p64::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdup_laneq_f64() {
+        let a: f64x2 = f64x2::new(0., 1.);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vdup_laneq_f64::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupb_lane_s8() {
+        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i8 = 1;
+        let r: i8 = transmute(vdupb_lane_s8::<4>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupb_laneq_s8() {
+        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: i8 = 1;
+        let r: i8 = transmute(vdupb_laneq_s8::<8>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vduph_lane_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 4);
+        let e: i16 = 1;
+        let r: i16 = transmute(vduph_lane_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vduph_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: i16 = 1;
+        let r: i16 = transmute(vduph_laneq_s16::<4>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdups_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let e: i32 = 1;
+        let r: i32 = transmute(vdups_lane_s32::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdups_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 4);
+        let e: i32 = 1;
+        let r: i32 = transmute(vdups_laneq_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupd_lane_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let e: i64 = 1;
+        let r: i64 = transmute(vdupd_lane_s64::<0>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupd_laneq_s64() {
+        let a: i64x2 = i64x2::new(1, 1);
+        let e: i64 = 1;
+        let r: i64 = transmute(vdupd_laneq_s64::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupb_lane_u8() {
+        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: u8 = 1;
+        let r: u8 = transmute(vdupb_lane_u8::<4>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupb_laneq_u8() {
+        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: u8 = 1;
+        let r: u8 = transmute(vdupb_laneq_u8::<8>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vduph_lane_u16() {
+        let a: u16x4 = u16x4::new(1, 1, 1, 4);
+        let e: u16 = 1;
+        let r: u16 = transmute(vduph_lane_u16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vduph_laneq_u16() {
+        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: u16 = 1;
+        let r: u16 = transmute(vduph_laneq_u16::<4>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdups_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 1);
+        let e: u32 = 1;
+        let r: u32 = transmute(vdups_lane_u32::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdups_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 1, 1, 4);
+        let e: u32 = 1;
+        let r: u32 = transmute(vdups_laneq_u32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupd_lane_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let e: u64 = 1;
+        let r: u64 = transmute(vdupd_lane_u64::<0>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupd_laneq_u64() {
+        let a: u64x2 = u64x2::new(1, 1);
+        let e: u64 = 1;
+        let r: u64 = transmute(vdupd_laneq_u64::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupb_lane_p8() {
+        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: p8 = 1;
+        let r: p8 = transmute(vdupb_lane_p8::<4>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupb_laneq_p8() {
+        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
+        let e: p8 = 1;
+        let r: p8 = transmute(vdupb_laneq_p8::<8>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vduph_lane_p16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 4);
+        let e: p16 = 1;
+        let r: p16 = transmute(vduph_lane_p16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vduph_laneq_p16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
+        let e: p16 = 1;
+        let r: p16 = transmute(vduph_laneq_p16::<4>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdups_lane_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let e: f32 = 1.;
+        let r: f32 = transmute(vdups_lane_f32::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdups_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
+        let e: f32 = 1.;
+        let r: f32 = transmute(vdups_laneq_f32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupd_lane_f64() {
+        let a: f64 = 1.;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vdupd_lane_f64::<0>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupd_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vdupd_laneq_f64::<1>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vextq_p64() {
+        let a: i64x2 = i64x2::new(0, 8);
+        let b: i64x2 = i64x2::new(9, 11);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vextq_p64::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vextq_f64() {
+        let a: f64x2 = f64x2::new(0., 2.);
+        let b: f64x2 = f64x2::new(3., 4.);
+        let e: f64x2 = f64x2::new(2., 3.);
+        let r: f64x2 = transmute(vextq_f64::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmla_f64() {
+        let a: f64 = 0.;
+        let b: f64 = 2.;
+        let c: f64 = 3.;
+        let e: f64 = 6.;
+        let r: f64 = transmute(vmla_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlaq_f64() {
+        let a: f64x2 = f64x2::new(0., 1.);
+        let b: f64x2 = f64x2::new(2., 2.);
+        let c: f64x2 = f64x2::new(3., 3.);
+        let e: f64x2 = f64x2::new(6., 7.);
+        let r: f64x2 = transmute(vmlaq_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_s8() {
+        let a: i16x8 = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
+        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let r: i16x8 = transmute(vmlal_high_s8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_s16() {
+        let a: i32x4 = i32x4::new(8, 7, 6, 5);
+        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let e: i32x4 = i32x4::new(8, 9, 10, 11);
+        let r: i32x4 = transmute(vmlal_high_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_s32() {
+        let a: i64x2 = i64x2::new(8, 7);
+        let b: i32x4 = i32x4::new(2, 2, 2, 2);
+        let c: i32x4 = i32x4::new(3, 3, 0, 1);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vmlal_high_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_u8() {
+        let a: u16x8 = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
+        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let r: u16x8 = transmute(vmlal_high_u8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_u16() {
+        let a: u32x4 = u32x4::new(8, 7, 6, 5);
+        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(8, 9, 10, 11);
+        let r: u32x4 = transmute(vmlal_high_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_u32() {
+        let a: u64x2 = u64x2::new(8, 7);
+        let b: u32x4 = u32x4::new(2, 2, 2, 2);
+        let c: u32x4 = u32x4::new(3, 3, 0, 1);
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vmlal_high_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_n_s16() {
+        let a: i32x4 = i32x4::new(8, 7, 6, 5);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16 = 2;
+        let e: i32x4 = i32x4::new(8, 9, 10, 11);
+        let r: i32x4 = transmute(vmlal_high_n_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_n_s32() {
+        let a: i64x2 = i64x2::new(8, 7);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32 = 2;
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vmlal_high_n_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_n_u16() {
+        let a: u32x4 = u32x4::new(8, 7, 6, 5);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16 = 2;
+        let e: u32x4 = u32x4::new(8, 9, 10, 11);
+        let r: u32x4 = transmute(vmlal_high_n_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_n_u32() {
+        let a: u64x2 = u64x2::new(8, 7);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32 = 2;
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vmlal_high_n_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_lane_s16() {
+        let a: i32x4 = i32x4::new(8, 7, 6, 5);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(8, 9, 10, 11);
+        let r: i32x4 = transmute(vmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_laneq_s16() {
+        let a: i32x4 = i32x4::new(8, 7, 6, 5);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(8, 9, 10, 11);
+        let r: i32x4 = transmute(vmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_lane_s32() {
+        let a: i64x2 = i64x2::new(8, 7);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_laneq_s32() {
+        let a: i64x2 = i64x2::new(8, 7);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i64x2 = i64x2::new(8, 9);
+        let r: i64x2 = transmute(vmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_lane_u16() {
+        let a: u32x4 = u32x4::new(8, 7, 6, 5);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16x4 = u16x4::new(0, 2, 0, 0);
+        let e: u32x4 = u32x4::new(8, 9, 10, 11);
+        let r: u32x4 = transmute(vmlal_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_laneq_u16() {
+        let a: u32x4 = u32x4::new(8, 7, 6, 5);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: u32x4 = u32x4::new(8, 9, 10, 11);
+        let r: u32x4 = transmute(vmlal_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_lane_u32() {
+        let a: u64x2 = u64x2::new(8, 7);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32x2 = u32x2::new(0, 2);
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vmlal_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlal_high_laneq_u32() {
+        let a: u64x2 = u64x2::new(8, 7);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32x4 = u32x4::new(0, 2, 0, 0);
+        let e: u64x2 = u64x2::new(8, 9);
+        let r: u64x2 = transmute(vmlal_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmls_f64() {
+        let a: f64 = 6.;
+        let b: f64 = 2.;
+        let c: f64 = 3.;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vmls_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsq_f64() {
+        let a: f64x2 = f64x2::new(6., 7.);
+        let b: f64x2 = f64x2::new(2., 2.);
+        let c: f64x2 = f64x2::new(3., 3.);
+        let e: f64x2 = f64x2::new(0., 1.);
+        let r: f64x2 = transmute(vmlsq_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_s8() {
+        let a: i16x8 = i16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
+        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e: i16x8 = i16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
+        let r: i16x8 = transmute(vmlsl_high_s8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_s16() {
+        let a: i32x4 = i32x4::new(14, 15, 16, 17);
+        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let e: i32x4 = i32x4::new(14, 13, 12, 11);
+        let r: i32x4 = transmute(vmlsl_high_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_s32() {
+        let a: i64x2 = i64x2::new(14, 15);
+        let b: i32x4 = i32x4::new(2, 2, 2, 2);
+        let c: i32x4 = i32x4::new(3, 3, 0, 1);
+        let e: i64x2 = i64x2::new(14, 13);
+        let r: i64x2 = transmute(vmlsl_high_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_u8() {
+        let a: u16x8 = u16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
+        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e: u16x8 = u16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
+        let r: u16x8 = transmute(vmlsl_high_u8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_u16() {
+        let a: u32x4 = u32x4::new(14, 15, 16, 17);
+        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let e: u32x4 = u32x4::new(14, 13, 12, 11);
+        let r: u32x4 = transmute(vmlsl_high_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_u32() {
+        let a: u64x2 = u64x2::new(14, 15);
+        let b: u32x4 = u32x4::new(2, 2, 2, 2);
+        let c: u32x4 = u32x4::new(3, 3, 0, 1);
+        let e: u64x2 = u64x2::new(14, 13);
+        let r: u64x2 = transmute(vmlsl_high_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_n_s16() {
+        let a: i32x4 = i32x4::new(14, 15, 16, 17);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16 = 2;
+        let e: i32x4 = i32x4::new(14, 13, 12, 11);
+        let r: i32x4 = transmute(vmlsl_high_n_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_n_s32() {
+        let a: i64x2 = i64x2::new(14, 15);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32 = 2;
+        let e: i64x2 = i64x2::new(14, 13);
+        let r: i64x2 = transmute(vmlsl_high_n_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_n_u16() {
+        let a: u32x4 = u32x4::new(14, 15, 16, 17);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16 = 2;
+        let e: u32x4 = u32x4::new(14, 13, 12, 11);
+        let r: u32x4 = transmute(vmlsl_high_n_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_n_u32() {
+        let a: u64x2 = u64x2::new(14, 15);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32 = 2;
+        let e: u64x2 = u64x2::new(14, 13);
+        let r: u64x2 = transmute(vmlsl_high_n_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_lane_s16() {
+        let a: i32x4 = i32x4::new(14, 15, 16, 17);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(14, 13, 12, 11);
+        let r: i32x4 = transmute(vmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_laneq_s16() {
+        let a: i32x4 = i32x4::new(14, 15, 16, 17);
+        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(14, 13, 12, 11);
+        let r: i32x4 = transmute(vmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_lane_s32() {
+        let a: i64x2 = i64x2::new(14, 15);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(14, 13);
+        let r: i64x2 = transmute(vmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_laneq_s32() {
+        let a: i64x2 = i64x2::new(14, 15);
+        let b: i32x4 = i32x4::new(3, 3, 0, 1);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i64x2 = i64x2::new(14, 13);
+        let r: i64x2 = transmute(vmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_lane_u16() {
+        let a: u32x4 = u32x4::new(14, 15, 16, 17);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16x4 = u16x4::new(0, 2, 0, 0);
+        let e: u32x4 = u32x4::new(14, 13, 12, 11);
+        let r: u32x4 = transmute(vmlsl_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_laneq_u16() {
+        let a: u32x4 = u32x4::new(14, 15, 16, 17);
+        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
+        let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: u32x4 = u32x4::new(14, 13, 12, 11);
+        let r: u32x4 = transmute(vmlsl_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_lane_u32() {
+        let a: u64x2 = u64x2::new(14, 15);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32x2 = u32x2::new(0, 2);
+        let e: u64x2 = u64x2::new(14, 13);
+        let r: u64x2 = transmute(vmlsl_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmlsl_high_laneq_u32() {
+        let a: u64x2 = u64x2::new(14, 15);
+        let b: u32x4 = u32x4::new(3, 3, 0, 1);
+        let c: u32x4 = u32x4::new(0, 2, 0, 0);
+        let e: u64x2 = u64x2::new(14, 13);
+        let r: u64x2 = transmute(vmlsl_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovn_high_s16() {
+        let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
+        let b: i16x8 = i16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
+        let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
+        let r: i8x16 = transmute(vmovn_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovn_high_s32() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(2, 3, 4, 5);
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
+        let r: i16x8 = transmute(vmovn_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovn_high_s64() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i64x2 = i64x2::new(2, 3);
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vmovn_high_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovn_high_u16() {
+        let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
+        let b: u16x8 = u16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
+        let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vmovn_high_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovn_high_u32() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(2, 3, 4, 5);
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
+        let r: u16x8 = transmute(vmovn_high_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovn_high_u64() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u64x2 = u64x2::new(2, 3);
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vmovn_high_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vneg_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vneg_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vnegq_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i64x2 = i64x2::new(0, -1);
+        let r: i64x2 = transmute(vnegq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vnegd_s64() {
+        let a: i64 = 1;
+        let e: i64 = -1;
+        let r: i64 = transmute(vnegd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vneg_f64() {
+        let a: f64 = 0.;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vneg_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vnegq_f64() {
+        let a: f64x2 = f64x2::new(0., 1.);
+        let e: f64x2 = f64x2::new(0., -1.);
+        let r: f64x2 = transmute(vnegq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqneg_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x1 = transmute(vqneg_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqnegq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, 0);
+        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0);
+        let r: i64x2 = transmute(vqnegq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqnegb_s8() {
+        let a: i8 = 1;
+        let e: i8 = -1;
+        let r: i8 = transmute(vqnegb_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqnegh_s16() {
+        let a: i16 = 1;
+        let e: i16 = -1;
+        let r: i16 = transmute(vqnegh_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqnegs_s32() {
+        let a: i32 = 1;
+        let e: i32 = -1;
+        let r: i32 = transmute(vqnegs_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqnegd_s64() {
+        let a: i64 = 1;
+        let e: i64 = -1;
+        let r: i64 = transmute(vqnegd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqsubb_s8() {
+        let a: i8 = 42;
+        let b: i8 = 1;
+        let e: i8 = 41;
+        let r: i8 = transmute(vqsubb_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqsubh_s16() {
+        let a: i16 = 42;
+        let b: i16 = 1;
+        let e: i16 = 41;
+        let r: i16 = transmute(vqsubh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqsubb_u8() {
+        let a: u8 = 42;
+        let b: u8 = 1;
+        let e: u8 = 41;
+        let r: u8 = transmute(vqsubb_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqsubh_u16() {
+        let a: u16 = 42;
+        let b: u16 = 1;
+        let e: u16 = 41;
+        let r: u16 = transmute(vqsubh_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqsubs_u32() {
+        let a: u32 = 42;
+        let b: u32 = 1;
+        let e: u32 = 41;
+        let r: u32 = transmute(vqsubs_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqsubd_u64() {
+        let a: u64 = 42;
+        let b: u64 = 1;
+        let e: u64 = 41;
+        let r: u64 = transmute(vqsubd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqsubs_s32() {
+        let a: i32 = 42;
+        let b: i32 = 1;
+        let e: i32 = 41;
+        let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqsubd_s64() {
+        let a: i64 = 42;
+        let b: i64 = 1;
+        let e: i64 = 41;
+        let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrbit_s8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
+        let r: i8x8 = transmute(vrbit_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrbitq_s8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
+        let r: i8x16 = transmute(vrbitq_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrbit_u8() {
+        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let e: u8x8 = u8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
+        let r: u8x8 = transmute(vrbit_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrbitq_u8() {
+        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let e: u8x16 = u8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
+        let r: u8x16 = transmute(vrbitq_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrbit_p8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
+        let r: i8x8 = transmute(vrbit_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrbitq_p8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
+        let r: i8x16 = transmute(vrbitq_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndx_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndx_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndxq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
+        let r: f32x4 = transmute(vrndxq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndx_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndx_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndxq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 0.0);
+        let r: f64x2 = transmute(vrndxq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrnda_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 1.0);
+        let r: f32x2 = transmute(vrnda_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndaq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 1.0, 2.0, 3.0);
+        let r: f32x4 = transmute(vrndaq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrnda_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrnda_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndaq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 1.0);
+        let r: f64x2 = transmute(vrndaq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndn_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndn_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndnq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 0.0);
+        let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndns_f32() {
+        let a: f32 = -1.5;
+        let e: f32 = -2.0;
+        let r: f32 = transmute(vrndns_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndm_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndm_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndmq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 1.0, 2.0);
+        let r: f32x4 = transmute(vrndmq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndm_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndm_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndmq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 0.0);
+        let r: f64x2 = transmute(vrndmq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndp_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-1.0, 1.0);
+        let r: f32x2 = transmute(vrndp_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndpq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-1.0, 1.0, 2.0, 3.0);
+        let r: f32x4 = transmute(vrndpq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndp_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -1.0;
+        let r: f64 = transmute(vrndp_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndpq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-1.0, 1.0);
+        let r: f64x2 = transmute(vrndpq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrnd_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-1.0, 0.0);
+        let r: f32x2 = transmute(vrnd_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-1.0, 0.0, 1.0, 2.0);
+        let r: f32x4 = transmute(vrndq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrnd_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -1.0;
+        let r: f64 = transmute(vrnd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-1.0, 0.0);
+        let r: f64x2 = transmute(vrndq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndi_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndi_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndiq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
+        let r: f32x4 = transmute(vrndiq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndi_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndi_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndiq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 0.5);
+        let e: f64x2 = f64x2::new(-2.0, 0.0);
+        let r: f64x2 = transmute(vrndiq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqaddb_s8() {
+        let a: i8 = 42;
+        let b: i8 = 1;
+        let e: i8 = 43;
+        let r: i8 = transmute(vqaddb_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqaddh_s16() {
+        let a: i16 = 42;
+        let b: i16 = 1;
+        let e: i16 = 43;
+        let r: i16 = transmute(vqaddh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqaddb_u8() {
+        let a: u8 = 42;
+        let b: u8 = 1;
+        let e: u8 = 43;
+        let r: u8 = transmute(vqaddb_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqaddh_u16() {
+        let a: u16 = 42;
+        let b: u16 = 1;
+        let e: u16 = 43;
+        let r: u16 = transmute(vqaddh_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqadds_u32() {
+        let a: u32 = 42;
+        let b: u32 = 1;
+        let e: u32 = 43;
+        let r: u32 = transmute(vqadds_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqaddd_u64() {
+        let a: u64 = 42;
+        let b: u64 = 1;
+        let e: u64 = 43;
+        let r: u64 = transmute(vqaddd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqadds_s32() {
+        let a: i32 = 42;
+        let b: i32 = 1;
+        let e: i32 = 43;
+        let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqaddd_s64() {
+        let a: i64 = 42;
+        let b: i64 = 1;
+        let e: i64 = 43;
+        let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_f64_x2() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let r: [f64; 2] = transmute(vld1_f64_x2(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_f64_x2() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(3., 4.)];
+        let r: [f64x2; 2] = transmute(vld1q_f64_x2(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_f64_x3() {
+        let a: [f64; 4] = [0., 1., 2., 3.];
+        let e: [f64; 3] = [1., 2., 3.];
+        let r: [f64; 3] = transmute(vld1_f64_x3(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_f64_x3() {
+        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
+        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.)];
+        let r: [f64x2; 3] = transmute(vld1q_f64_x3(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_f64_x4() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64; 4] = [1., 2., 3., 4.];
+        let r: [f64; 4] = transmute(vld1_f64_x4(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_f64_x4() {
+        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
+        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.), f64x2::new(7., 8.)];
+        let r: [f64x2; 4] = transmute(vld1q_f64_x4(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 3];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 3)];
+        let r: [i64x2; 2] = transmute(vld2q_s64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(2, 3)];
+        let r: [u64x2; 2] = transmute(vld2q_u64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 3)];
+        let r: [i64x2; 2] = transmute(vld2q_p64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let r: [f64; 2] = transmute(vld2_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 3.];
+        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(2., 3.)];
+        let r: [f64x2; 2] = transmute(vld2q_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_dup_s64() {
+        let a: [i64; 5] = [0, 1, 1, 2, 3];
+        let e: [i64x2; 2] = [i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 2] = transmute(vld2q_dup_s64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_dup_u64() {
+        let a: [u64; 5] = [0, 1, 1, 2, 3];
+        let e: [u64x2; 2] = [u64x2::new(1, 1), u64x2::new(1, 1)];
+        let r: [u64x2; 2] = transmute(vld2q_dup_u64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_dup_p64() {
+        let a: [u64; 5] = [0, 1, 1, 2, 3];
+        let e: [i64x2; 2] = [i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 2] = transmute(vld2q_dup_p64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2_dup_f64() {
+        let a: [f64; 3] = [0., 1., 1.];
+        let e: [f64; 2] = [1., 1.];
+        let r: [f64; 2] = transmute(vld2_dup_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_dup_f64() {
+        let a: [f64; 5] = [0., 1., 1., 2., 3.];
+        let e: [f64x2; 2] = [f64x2::new(1., 1.), f64x2::new(1., 1.)];
+        let r: [f64x2; 2] = transmute(vld2q_dup_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_lane_s8() {
+        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [i8x16; 2] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let r: [i8x16; 2] = transmute(vld2q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2_lane_s64() {
+        let a: [i64; 3] = [0, 1, 2];
+        let b: [i64x1; 2] = [i64x1::new(0), i64x1::new(2)];
+        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
+        let r: [i64x1; 2] = transmute(vld2_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_lane_s64() {
+        let a: [i64; 5] = [0, 1, 2, 3, 4];
+        let b: [i64x2; 2] = [i64x2::new(0, 2), i64x2::new(2, 14)];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 14)];
+        let r: [i64x2; 2] = transmute(vld2q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2_lane_p64() {
+        let a: [u64; 3] = [0, 1, 2];
+        let b: [i64x1; 2] = [i64x1::new(0), i64x1::new(2)];
+        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
+        let r: [i64x1; 2] = transmute(vld2_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_lane_p64() {
+        let a: [u64; 5] = [0, 1, 2, 3, 4];
+        let b: [i64x2; 2] = [i64x2::new(0, 2), i64x2::new(2, 14)];
+        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 14)];
+        let r: [i64x2; 2] = transmute(vld2q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_lane_u8() {
+        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [u8x16; 2] = [u8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let e: [u8x16; 2] = [u8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let r: [u8x16; 2] = transmute(vld2q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2_lane_u64() {
+        let a: [u64; 3] = [0, 1, 2];
+        let b: [u64x1; 2] = [u64x1::new(0), u64x1::new(2)];
+        let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)];
+        let r: [u64x1; 2] = transmute(vld2_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_lane_u64() {
+        let a: [u64; 5] = [0, 1, 2, 3, 4];
+        let b: [u64x2; 2] = [u64x2::new(0, 2), u64x2::new(2, 14)];
+        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(2, 14)];
+        let r: [u64x2; 2] = transmute(vld2q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_lane_p8() {
+        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [i8x16; 2] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
+        let r: [i8x16; 2] = transmute(vld2q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2_lane_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let b: [f64; 2] = [0., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let r: [f64; 2] = transmute(vld2_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld2q_lane_f64() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let b: [f64x2; 2] = [f64x2::new(0., 2.), f64x2::new(2., 14.)];
+        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(2., 14.)];
+        let r: [f64x2; 2] = transmute(vld2q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_s64() {
+        let a: [i64; 7] = [0, 1, 2, 2, 2, 4, 4];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 4), i64x2::new(2, 4)];
+        let r: [i64x2; 3] = transmute(vld3q_s64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_u64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 2, 4, 4];
+        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(2, 4), u64x2::new(2, 4)];
+        let r: [u64x2; 3] = transmute(vld3q_u64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_p64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 2, 4, 4];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 4), i64x2::new(2, 4)];
+        let r: [i64x2; 3] = transmute(vld3q_p64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3_f64() {
+        let a: [f64; 4] = [0., 1., 2., 2.];
+        let e: [f64; 3] = [1., 2., 2.];
+        let r: [f64; 3] = transmute(vld3_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_f64() {
+        let a: [f64; 7] = [0., 1., 2., 2., 2., 4., 4.];
+        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(2., 4.), f64x2::new(2., 4.)];
+        let r: [f64x2; 3] = transmute(vld3q_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_dup_s64() {
+        let a: [i64; 7] = [0, 1, 1, 1, 3, 1, 4];
+        let e: [i64x2; 3] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 3] = transmute(vld3q_dup_s64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_dup_u64() {
+        let a: [u64; 7] = [0, 1, 1, 1, 3, 1, 4];
+        let e: [u64x2; 3] = [u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1)];
+        let r: [u64x2; 3] = transmute(vld3q_dup_u64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_dup_p64() {
+        let a: [u64; 7] = [0, 1, 1, 1, 3, 1, 4];
+        let e: [i64x2; 3] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 3] = transmute(vld3q_dup_p64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3_dup_f64() {
+        let a: [f64; 4] = [0., 1., 1., 1.];
+        let e: [f64; 3] = [1., 1., 1.];
+        let r: [f64; 3] = transmute(vld3_dup_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_dup_f64() {
+        let a: [f64; 7] = [0., 1., 1., 1., 3., 1., 4.];
+        let e: [f64x2; 3] = [f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.)];
+        let r: [f64x2; 3] = transmute(vld3q_dup_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_lane_s8() {
+        let a: [i8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [i8x16; 3] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let r: [i8x16; 3] = transmute(vld3q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3_lane_s64() {
+        let a: [i64; 4] = [0, 1, 2, 2];
+        let b: [i64x1; 3] = [i64x1::new(0), i64x1::new(2), i64x1::new(2)];
+        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
+        let r: [i64x1; 3] = transmute(vld3_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_lane_s64() {
+        let a: [i64; 7] = [0, 1, 2, 2, 4, 5, 6];
+        let b: [i64x2; 3] = [i64x2::new(0, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
+        let r: [i64x2; 3] = transmute(vld3q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3_lane_p64() {
+        let a: [u64; 4] = [0, 1, 2, 2];
+        let b: [i64x1; 3] = [i64x1::new(0), i64x1::new(2), i64x1::new(2)];
+        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
+        let r: [i64x1; 3] = transmute(vld3_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_lane_p64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 5, 6];
+        let b: [i64x2; 3] = [i64x2::new(0, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
+        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
+        let r: [i64x2; 3] = transmute(vld3q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_lane_p8() {
+        let a: [u8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [i8x16; 3] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let r: [i8x16; 3] = transmute(vld3q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_lane_u8() {
+        let a: [u8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
+        let b: [u8x16; 3] = [u8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let e: [u8x16; 3] = [u8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
+        let r: [u8x16; 3] = transmute(vld3q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3_lane_u64() {
+        let a: [u64; 4] = [0, 1, 2, 2];
+        let b: [u64x1; 3] = [u64x1::new(0), u64x1::new(2), u64x1::new(2)];
+        let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(2)];
+        let r: [u64x1; 3] = transmute(vld3_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_lane_u64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 5, 6];
+        let b: [u64x2; 3] = [u64x2::new(0, 2), u64x2::new(2, 14), u64x2::new(2, 16)];
+        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(2, 14), u64x2::new(2, 16)];
+        let r: [u64x2; 3] = transmute(vld3q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3_lane_f64() {
+        let a: [f64; 4] = [0., 1., 2., 2.];
+        let b: [f64; 3] = [0., 2., 2.];
+        let e: [f64; 3] = [1., 2., 2.];
+        let r: [f64; 3] = transmute(vld3_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld3q_lane_f64() {
+        let a: [f64; 7] = [0., 1., 2., 2., 4., 5., 6.];
+        let b: [f64x2; 3] = [f64x2::new(0., 2.), f64x2::new(2., 14.), f64x2::new(9., 16.)];
+        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(2., 14.), f64x2::new(2., 16.)];
+        let r: [f64x2; 3] = transmute(vld3q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_s64() {
+        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 6), i64x2::new(2, 6), i64x2::new(6, 8)];
+        let r: [i64x2; 4] = transmute(vld4q_s64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_u64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(2, 6), u64x2::new(2, 6), u64x2::new(6, 8)];
+        let r: [u64x2; 4] = transmute(vld4q_u64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_p64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 6), i64x2::new(2, 6), i64x2::new(6, 8)];
+        let r: [i64x2; 4] = transmute(vld4q_p64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 6.];
+        let e: [f64; 4] = [1., 2., 2., 6.];
+        let r: [f64; 4] = transmute(vld4_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_f64() {
+        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
+        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(2., 6.), f64x2::new(2., 6.), f64x2::new(6., 8.)];
+        let r: [f64x2; 4] = transmute(vld4q_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_dup_s64() {
+        let a: [i64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
+        let e: [i64x2; 4] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 4] = transmute(vld4q_dup_s64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_dup_u64() {
+        let a: [u64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
+        let e: [u64x2; 4] = [u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1)];
+        let r: [u64x2; 4] = transmute(vld4q_dup_u64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_dup_p64() {
+        let a: [u64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
+        let e: [i64x2; 4] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
+        let r: [i64x2; 4] = transmute(vld4q_dup_p64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4_dup_f64() {
+        let a: [f64; 5] = [0., 1., 1., 1., 1.];
+        let e: [f64; 4] = [1., 1., 1., 1.];
+        let r: [f64; 4] = transmute(vld4_dup_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_dup_f64() {
+        let a: [f64; 9] = [0., 1., 1., 1., 1., 6., 4., 3., 5.];
+        let e: [f64x2; 4] = [f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.)];
+        let r: [f64x2; 4] = transmute(vld4q_dup_f64(a[1..].as_ptr()));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_lane_s8() {
+        let a: [i8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
+        let b: [i8x16; 4] = [i8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let r: [i8x16; 4] = transmute(vld4q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4_lane_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 2];
+        let b: [i64x1; 4] = [i64x1::new(0), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
+        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
+        let r: [i64x1; 4] = transmute(vld4_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_lane_s64() {
+        let a: [i64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
+        let b: [i64x2; 4] = [i64x2::new(0, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
+        let r: [i64x2; 4] = transmute(vld4q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4_lane_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 2];
+        let b: [i64x1; 4] = [i64x1::new(0), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
+        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
+        let r: [i64x1; 4] = transmute(vld4_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_lane_p64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
+        let b: [i64x2; 4] = [i64x2::new(0, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
+        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
+        let r: [i64x2; 4] = transmute(vld4q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_lane_p8() {
+        let a: [u8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
+        let b: [i8x16; 4] = [i8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let r: [i8x16; 4] = transmute(vld4q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_lane_u8() {
+        let a: [u8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
+        let b: [u8x16; 4] = [u8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), u8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let e: [u8x16; 4] = [u8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), u8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
+        let r: [u8x16; 4] = transmute(vld4q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4_lane_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 2];
+        let b: [u64x1; 4] = [u64x1::new(0), u64x1::new(2), u64x1::new(2), u64x1::new(2)];
+        let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(2), u64x1::new(2)];
+        let r: [u64x1; 4] = transmute(vld4_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_lane_u64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
+        let b: [u64x2; 4] = [u64x2::new(0, 2), u64x2::new(2, 2), u64x2::new(2, 16), u64x2::new(2, 18)];
+        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(2, 2), u64x2::new(2, 16), u64x2::new(2, 18)];
+        let r: [u64x2; 4] = transmute(vld4q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4_lane_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 2.];
+        let b: [f64; 4] = [0., 2., 2., 2.];
+        let e: [f64; 4] = [1., 2., 2., 2.];
+        let r: [f64; 4] = transmute(vld4_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld4q_lane_f64() {
+        let a: [f64; 9] = [0., 1., 2., 2., 2., 5., 6., 7., 8.];
+        let b: [f64x2; 4] = [f64x2::new(0., 2.), f64x2::new(2., 2.), f64x2::new(2., 16.), f64x2::new(2., 18.)];
+        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(2., 2.), f64x2::new(2., 16.), f64x2::new(2., 18.)];
+        let r: [f64x2; 4] = transmute(vld4q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1_lane_f64() {
+        let a: [f64; 2] = [0., 1.];
+        let e: [f64; 1] = [1.];
+        let mut r: [f64; 1] = [0f64; 1];
+        vst1_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1q_lane_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 0.];
+        let mut r: [f64; 2] = [0f64; 2];
+        vst1q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1_f64_x2() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let mut r: [f64; 2] = [0f64; 2];
+        vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1q_f64_x2() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64; 4] = [1., 2., 3., 4.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1_f64_x3() {
+        let a: [f64; 4] = [0., 1., 2., 3.];
+        let e: [f64; 3] = [1., 2., 3.];
+        let mut r: [f64; 3] = [0f64; 3];
+        vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1q_f64_x3() {
+        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
+        let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
+        let mut r: [f64; 6] = [0f64; 6];
+        vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1_f64_x4() {
+        let a: [f64; 5] = [0., 1., 2., 3., 4.];
+        let e: [f64; 4] = [1., 2., 3., 4.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1q_f64_x4() {
+        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
+        let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
+        let mut r: [f64; 8] = [0f64; 8];
+        vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 3];
+        let e: [i64; 4] = [1, 2, 2, 3];
+        let mut r: [i64; 4] = [0i64; 4];
+        vst2q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64; 4] = [1, 2, 2, 3];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst2q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64; 4] = [1, 2, 2, 3];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst2q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let mut r: [f64; 2] = [0f64; 2];
+        vst2_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 3.];
+        let e: [f64; 4] = [1., 2., 2., 3.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst2q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_lane_s8() {
+        let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
+        let e: [i8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [i8; 32] = [0i8; 32];
+        vst2q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2_lane_s64() {
+        let a: [i64; 3] = [0, 1, 2];
+        let e: [i64; 2] = [1, 2];
+        let mut r: [i64; 2] = [0i64; 2];
+        vst2_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_lane_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 3];
+        let e: [i64; 4] = [1, 2, 0, 0];
+        let mut r: [i64; 4] = [0i64; 4];
+        vst2q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_lane_u8() {
+        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
+        let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 32] = [0u8; 32];
+        vst2q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2_lane_u64() {
+        let a: [u64; 3] = [0, 1, 2];
+        let e: [u64; 2] = [1, 2];
+        let mut r: [u64; 2] = [0u64; 2];
+        vst2_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_lane_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64; 4] = [1, 2, 0, 0];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst2q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_lane_p8() {
+        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
+        let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 32] = [0u8; 32];
+        vst2q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2_lane_p64() {
+        let a: [u64; 3] = [0, 1, 2];
+        let e: [u64; 2] = [1, 2];
+        let mut r: [u64; 2] = [0u64; 2];
+        vst2_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_lane_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 3];
+        let e: [u64; 4] = [1, 2, 0, 0];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst2q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2_lane_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e: [f64; 2] = [1., 2.];
+        let mut r: [f64; 2] = [0f64; 2];
+        vst2_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst2q_lane_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 3.];
+        let e: [f64; 4] = [1., 2., 0., 0.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst2q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_s64() {
+        let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [i64; 6] = [1, 2, 2, 2, 4, 4];
+        let mut r: [i64; 6] = [0i64; 6];
+        vst3q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_u64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
+        let mut r: [u64; 6] = [0u64; 6];
+        vst3q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_p64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
+        let mut r: [u64; 6] = [0u64; 6];
+        vst3q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3_f64() {
+        let a: [f64; 4] = [0., 1., 2., 2.];
+        let e: [f64; 3] = [1., 2., 2.];
+        let mut r: [f64; 3] = [0f64; 3];
+        vst3_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_f64() {
+        let a: [f64; 7] = [0., 1., 2., 2., 4., 2., 4.];
+        let e: [f64; 6] = [1., 2., 2., 2., 4., 4.];
+        let mut r: [f64; 6] = [0f64; 6];
+        vst3q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_lane_s8() {
+        let a: [i8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
+        let e: [i8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [i8; 48] = [0i8; 48];
+        vst3q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3_lane_s64() {
+        let a: [i64; 4] = [0, 1, 2, 2];
+        let e: [i64; 3] = [1, 2, 2];
+        let mut r: [i64; 3] = [0i64; 3];
+        vst3_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_lane_s64() {
+        let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [i64; 6] = [1, 2, 2, 0, 0, 0];
+        let mut r: [i64; 6] = [0i64; 6];
+        vst3q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_lane_u8() {
+        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
+        let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 48] = [0u8; 48];
+        vst3q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3_lane_u64() {
+        let a: [u64; 4] = [0, 1, 2, 2];
+        let e: [u64; 3] = [1, 2, 2];
+        let mut r: [u64; 3] = [0u64; 3];
+        vst3_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_lane_u64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
+        let mut r: [u64; 6] = [0u64; 6];
+        vst3q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_lane_p8() {
+        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
+        let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 48] = [0u8; 48];
+        vst3q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3_lane_p64() {
+        let a: [u64; 4] = [0, 1, 2, 2];
+        let e: [u64; 3] = [1, 2, 2];
+        let mut r: [u64; 3] = [0u64; 3];
+        vst3_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_lane_p64() {
+        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
+        let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
+        let mut r: [u64; 6] = [0u64; 6];
+        vst3q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3_lane_f64() {
+        let a: [f64; 4] = [0., 1., 2., 2.];
+        let e: [f64; 3] = [1., 2., 2.];
+        let mut r: [f64; 3] = [0f64; 3];
+        vst3_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst3q_lane_f64() {
+        let a: [f64; 7] = [0., 1., 2., 2., 3., 2., 3.];
+        let e: [f64; 6] = [1., 2., 2., 0., 0., 0.];
+        let mut r: [f64; 6] = [0f64; 6];
+        vst3q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_s64() {
+        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [i64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
+        let mut r: [i64; 8] = [0i64; 8];
+        vst4q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_u64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
+        let mut r: [u64; 8] = [0u64; 8];
+        vst4q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_p64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
+        let mut r: [u64; 8] = [0u64; 8];
+        vst4q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 6.];
+        let e: [f64; 4] = [1., 2., 2., 6.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst4_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_f64() {
+        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
+        let e: [f64; 8] = [1., 2., 2., 6., 2., 6., 6., 8.];
+        let mut r: [f64; 8] = [0f64; 8];
+        vst4q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_lane_s8() {
+        let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
+        let e: [i8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [i8; 64] = [0i8; 64];
+        vst4q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4_lane_s64() {
+        let a: [i64; 5] = [0, 1, 2, 2, 6];
+        let e: [i64; 4] = [1, 2, 2, 6];
+        let mut r: [i64; 4] = [0i64; 4];
+        vst4_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_lane_s64() {
+        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [i64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
+        let mut r: [i64; 8] = [0i64; 8];
+        vst4q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_lane_u8() {
+        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
+        let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 64] = [0u8; 64];
+        vst4q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4_lane_u64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 6];
+        let e: [u64; 4] = [1, 2, 2, 6];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst4_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_lane_u64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
+        let mut r: [u64; 8] = [0u64; 8];
+        vst4q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_lane_p8() {
+        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
+        let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let mut r: [u8; 64] = [0u8; 64];
+        vst4q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4_lane_p64() {
+        let a: [u64; 5] = [0, 1, 2, 2, 6];
+        let e: [u64; 4] = [1, 2, 2, 6];
+        let mut r: [u64; 4] = [0u64; 4];
+        vst4_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_lane_p64() {
+        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
+        let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
+        let mut r: [u64; 8] = [0u64; 8];
+        vst4q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4_lane_f64() {
+        let a: [f64; 5] = [0., 1., 2., 2., 6.];
+        let e: [f64; 4] = [1., 2., 2., 6.];
+        let mut r: [f64; 4] = [0f64; 4];
+        vst4_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst4q_lane_f64() {
+        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
+        let e: [f64; 8] = [1., 2., 2., 6., 0., 0., 0., 0.];
+        let mut r: [f64; 8] = [0f64; 8];
+        vst4q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmul_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 2.0;
+        let e: f64 = 2.0;
+        let r: f64 = transmute(vmul_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(2.0, 3.0);
+        let e: f64x2 = f64x2::new(2.0, 6.0);
+        let r: f64x2 = transmute(vmulq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmul_n_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmul_n_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulq_n_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64 = 2.;
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulq_n_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmul_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmul_lane_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmul_laneq_f64() {
+        let a: f64 = 1.;
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmul_laneq_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulq_lane_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64 = 2.;
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulq_lane_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulq_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulq_laneq_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmuls_lane_f32() {
+        let a: f32 = 1.;
+        let b: f32x2 = f32x2::new(2., 0.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vmuls_lane_f32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmuls_laneq_f32() {
+        let a: f32 = 1.;
+        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vmuls_laneq_f32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmuld_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmuld_lane_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmuld_laneq_f64() {
+        let a: f64 = 1.;
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmuld_laneq_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
+        let e: i16x8 = i16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
+        let r: i16x8 = transmute(vmull_high_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
+        let e: i32x4 = i32x4::new(9, 20, 11, 24);
+        let r: i32x4 = transmute(vmull_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 9, 10);
+        let b: i32x4 = i32x4::new(1, 2, 1, 2);
+        let e: i64x2 = i64x2::new(9, 20);
+        let r: i64x2 = transmute(vmull_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
+        let e: u16x8 = u16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
+        let r: u16x8 = transmute(vmull_high_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
+        let e: u32x4 = u32x4::new(9, 20, 11, 24);
+        let r: u32x4 = transmute(vmull_high_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 9, 10);
+        let b: u32x4 = u32x4::new(1, 2, 1, 2);
+        let e: u64x2 = u64x2::new(9, 20);
+        let r: u64x2 = transmute(vmull_high_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_p64() {
+        let a: p64 = 15;
+        let b: p64 = 3;
+        let e: p128 = 17;
+        let r: p128 = transmute(vmull_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_p8() {
+        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3);
+        let e: i16x8 = i16x8::new(9, 30, 11, 20, 13, 18, 15, 48);
+        let r: i16x8 = transmute(vmull_high_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_p64() {
+        let a: i64x2 = i64x2::new(1, 15);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: p128 = 17;
+        let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_n_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: i16 = 2;
+        let e: i32x4 = i32x4::new(18, 20, 22, 24);
+        let r: i32x4 = transmute(vmull_high_n_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_n_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 9, 10);
+        let b: i32 = 2;
+        let e: i64x2 = i64x2::new(18, 20);
+        let r: i64x2 = transmute(vmull_high_n_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_n_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: u16 = 2;
+        let e: u32x4 = u32x4::new(18, 20, 22, 24);
+        let r: u32x4 = transmute(vmull_high_n_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_n_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 9, 10);
+        let b: u32 = 2;
+        let e: u64x2 = u64x2::new(18, 20);
+        let r: u64x2 = transmute(vmull_high_n_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_lane_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(18, 20, 22, 24);
+        let r: i32x4 = transmute(vmull_high_lane_s16::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(18, 20, 22, 24);
+        let r: i32x4 = transmute(vmull_high_laneq_s16::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 9, 10);
+        let b: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(18, 20);
+        let r: i64x2 = transmute(vmull_high_lane_s32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 9, 10);
+        let b: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i64x2 = i64x2::new(18, 20);
+        let r: i64x2 = transmute(vmull_high_laneq_s32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_lane_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: u16x4 = u16x4::new(0, 2, 0, 0);
+        let e: u32x4 = u32x4::new(18, 20, 22, 24);
+        let r: u32x4 = transmute(vmull_high_lane_u16::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_laneq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
+        let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: u32x4 = u32x4::new(18, 20, 22, 24);
+        let r: u32x4 = transmute(vmull_high_laneq_u16::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_lane_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 9, 10);
+        let b: u32x2 = u32x2::new(0, 2);
+        let e: u64x2 = u64x2::new(18, 20);
+        let r: u64x2 = transmute(vmull_high_lane_u32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 9, 10);
+        let b: u32x4 = u32x4::new(0, 2, 0, 0);
+        let e: u64x2 = u64x2::new(18, 20);
+        let r: u64x2 = transmute(vmull_high_laneq_u32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulx_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(2., 2.);
+        let e: f32x2 = f32x2::new(2., 4.);
+        let r: f32x2 = transmute(vmulx_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
+        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
+        let r: f32x4 = transmute(vmulxq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulx_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmulx_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(2., 2.);
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulxq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulx_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmulx_lane_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulx_laneq_f64() {
+        let a: f64 = 1.;
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmulx_laneq_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulx_lane_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x2 = f32x2::new(2., 0.);
+        let e: f32x2 = f32x2::new(2., 4.);
+        let r: f32x2 = transmute(vmulx_lane_f32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulx_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x2 = f32x2::new(2., 4.);
+        let r: f32x2 = transmute(vmulx_laneq_f32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxq_lane_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x2 = f32x2::new(2., 0.);
+        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
+        let r: f32x4 = transmute(vmulxq_lane_f32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxq_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
+        let r: f32x4 = transmute(vmulxq_laneq_f32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxq_lane_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64 = 2.;
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulxq_lane_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxq_laneq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(2., 0.);
+        let e: f64x2 = f64x2::new(2., 4.);
+        let r: f64x2 = transmute(vmulxq_laneq_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxs_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 3.;
+        let e: f32 = 6.;
+        let r: f32 = transmute(vmulxs_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxd_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 3.;
+        let e: f64 = 6.;
+        let r: f64 = transmute(vmulxd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxs_lane_f32() {
+        let a: f32 = 2.;
+        let b: f32x2 = f32x2::new(3., 0.);
+        let e: f32 = 6.;
+        let r: f32 = transmute(vmulxs_lane_f32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxs_laneq_f32() {
+        let a: f32 = 2.;
+        let b: f32x4 = f32x4::new(3., 0., 0., 0.);
+        let e: f32 = 6.;
+        let r: f32 = transmute(vmulxs_laneq_f32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxd_lane_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 3.;
+        let e: f64 = 6.;
+        let r: f64 = transmute(vmulxd_lane_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulxd_laneq_f64() {
+        let a: f64 = 2.;
+        let b: f64x2 = f64x2::new(3., 0.);
+        let e: f64 = 6.;
+        let r: f64 = transmute(vmulxd_laneq_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfma_f64() {
+        let a: f64 = 8.0;
+        let b: f64 = 6.0;
+        let c: f64 = 2.0;
+        let e: f64 = 20.0;
+        let r: f64 = transmute(vfma_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmaq_f64() {
+        let a: f64x2 = f64x2::new(8.0, 18.0);
+        let b: f64x2 = f64x2::new(6.0, 4.0);
+        let c: f64x2 = f64x2::new(2.0, 3.0);
+        let e: f64x2 = f64x2::new(20.0, 30.0);
+        let r: f64x2 = transmute(vfmaq_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfma_n_f64() {
+        let a: f64 = 2.0;
+        let b: f64 = 6.0;
+        let c: f64 = 8.0;
+        let e: f64 = 50.0;
+        let r: f64 = transmute(vfma_n_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmaq_n_f64() {
+        let a: f64x2 = f64x2::new(2.0, 3.0);
+        let b: f64x2 = f64x2::new(6.0, 4.0);
+        let c: f64 = 8.0;
+        let e: f64x2 = f64x2::new(50.0, 35.0);
+        let r: f64x2 = transmute(vfmaq_n_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfma_lane_f32() {
+        let a: f32x2 = f32x2::new(2., 3.);
+        let b: f32x2 = f32x2::new(6., 4.);
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32x2 = f32x2::new(14., 11.);
+        let r: f32x2 = transmute(vfma_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfma_laneq_f32() {
+        let a: f32x2 = f32x2::new(2., 3.);
+        let b: f32x2 = f32x2::new(6., 4.);
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x2 = f32x2::new(14., 11.);
+        let r: f32x2 = transmute(vfma_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmaq_lane_f32() {
+        let a: f32x4 = f32x4::new(2., 3., 4., 5.);
+        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32x4 = f32x4::new(14., 11., 18., 21.);
+        let r: f32x4 = transmute(vfmaq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmaq_laneq_f32() {
+        let a: f32x4 = f32x4::new(2., 3., 4., 5.);
+        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x4 = f32x4::new(14., 11., 18., 21.);
+        let r: f32x4 = transmute(vfmaq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfma_lane_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 6.;
+        let c: f64 = 2.;
+        let e: f64 = 14.;
+        let r: f64 = transmute(vfma_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfma_laneq_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 6.;
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 14.;
+        let r: f64 = transmute(vfma_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmaq_lane_f64() {
+        let a: f64x2 = f64x2::new(2., 3.);
+        let b: f64x2 = f64x2::new(6., 4.);
+        let c: f64 = 2.;
+        let e: f64x2 = f64x2::new(14., 11.);
+        let r: f64x2 = transmute(vfmaq_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmaq_laneq_f64() {
+        let a: f64x2 = f64x2::new(2., 3.);
+        let b: f64x2 = f64x2::new(6., 4.);
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64x2 = f64x2::new(14., 11.);
+        let r: f64x2 = transmute(vfmaq_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmas_lane_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 6.;
+        let c: f32x2 = f32x2::new(3., 0.);
+        let e: f32 = 20.;
+        let r: f32 = transmute(vfmas_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmas_laneq_f32() {
+        let a: f32 = 2.;
+        let b: f32 = 6.;
+        let c: f32x4 = f32x4::new(3., 0., 0., 0.);
+        let e: f32 = 20.;
+        let r: f32 = transmute(vfmas_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmad_lane_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 6.;
+        let c: f64 = 3.;
+        let e: f64 = 20.;
+        let r: f64 = transmute(vfmad_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmad_laneq_f64() {
+        let a: f64 = 2.;
+        let b: f64 = 6.;
+        let c: f64x2 = f64x2::new(3., 0.);
+        let e: f64 = 20.;
+        let r: f64 = transmute(vfmad_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfms_f64() {
+        let a: f64 = 20.0;
+        let b: f64 = 6.0;
+        let c: f64 = 2.0;
+        let e: f64 = 8.0;
+        let r: f64 = transmute(vfms_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmsq_f64() {
+        let a: f64x2 = f64x2::new(20.0, 30.0);
+        let b: f64x2 = f64x2::new(6.0, 4.0);
+        let c: f64x2 = f64x2::new(2.0, 3.0);
+        let e: f64x2 = f64x2::new(8.0, 18.0);
+        let r: f64x2 = transmute(vfmsq_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfms_n_f64() {
+        let a: f64 = 50.0;
+        let b: f64 = 6.0;
+        let c: f64 = 8.0;
+        let e: f64 = 2.0;
+        let r: f64 = transmute(vfms_n_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmsq_n_f64() {
+        let a: f64x2 = f64x2::new(50.0, 35.0);
+        let b: f64x2 = f64x2::new(6.0, 4.0);
+        let c: f64 = 8.0;
+        let e: f64x2 = f64x2::new(2.0, 3.0);
+        let r: f64x2 = transmute(vfmsq_n_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfms_lane_f32() {
+        let a: f32x2 = f32x2::new(14., 11.);
+        let b: f32x2 = f32x2::new(6., 4.);
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32x2 = f32x2::new(2., 3.);
+        let r: f32x2 = transmute(vfms_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfms_laneq_f32() {
+        let a: f32x2 = f32x2::new(14., 11.);
+        let b: f32x2 = f32x2::new(6., 4.);
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x2 = f32x2::new(2., 3.);
+        let r: f32x2 = transmute(vfms_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmsq_lane_f32() {
+        let a: f32x4 = f32x4::new(14., 11., 18., 21.);
+        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32x4 = f32x4::new(2., 3., 4., 5.);
+        let r: f32x4 = transmute(vfmsq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmsq_laneq_f32() {
+        let a: f32x4 = f32x4::new(14., 11., 18., 21.);
+        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32x4 = f32x4::new(2., 3., 4., 5.);
+        let r: f32x4 = transmute(vfmsq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfms_lane_f64() {
+        let a: f64 = 14.;
+        let b: f64 = 6.;
+        let c: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vfms_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfms_laneq_f64() {
+        let a: f64 = 14.;
+        let b: f64 = 6.;
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vfms_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmsq_lane_f64() {
+        let a: f64x2 = f64x2::new(14., 11.);
+        let b: f64x2 = f64x2::new(6., 4.);
+        let c: f64 = 2.;
+        let e: f64x2 = f64x2::new(2., 3.);
+        let r: f64x2 = transmute(vfmsq_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmsq_laneq_f64() {
+        let a: f64x2 = f64x2::new(14., 11.);
+        let b: f64x2 = f64x2::new(6., 4.);
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64x2 = f64x2::new(2., 3.);
+        let r: f64x2 = transmute(vfmsq_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmss_lane_f32() {
+        let a: f32 = 14.;
+        let b: f32 = 6.;
+        let c: f32x2 = f32x2::new(2., 0.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vfmss_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmss_laneq_f32() {
+        let a: f32 = 14.;
+        let b: f32 = 6.;
+        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vfmss_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmsd_lane_f64() {
+        let a: f64 = 14.;
+        let b: f64 = 6.;
+        let c: f64 = 2.;
+        let e: f64 = 2.;
+        let r: f64 = transmute(vfmsd_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vfmsd_laneq_f64() {
+        let a: f64 = 14.;
+        let b: f64 = 6.;
+        let c: f64x2 = f64x2::new(2., 0.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vfmsd_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdiv_f32() {
+        let a: f32x2 = f32x2::new(2.0, 6.0);
+        let b: f32x2 = f32x2::new(1.0, 2.0);
+        let e: f32x2 = f32x2::new(2.0, 3.0);
+        let r: f32x2 = transmute(vdiv_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdivq_f32() {
+        let a: f32x4 = f32x4::new(2.0, 6.0, 4.0, 10.0);
+        let b: f32x4 = f32x4::new(1.0, 2.0, 1.0, 2.0);
+        let e: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
+        let r: f32x4 = transmute(vdivq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdiv_f64() {
+        let a: f64 = 2.0;
+        let b: f64 = 1.0;
+        let e: f64 = 2.0;
+        let r: f64 = transmute(vdiv_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdivq_f64() {
+        let a: f64x2 = f64x2::new(2.0, 6.0);
+        let b: f64x2 = f64x2::new(1.0, 2.0);
+        let e: f64x2 = f64x2::new(2.0, 3.0);
+        let r: f64x2 = transmute(vdivq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsub_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 1.0;
+        let e: f64 = 0.0;
+        let r: f64 = transmute(vsub_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 4.0);
+        let b: f64x2 = f64x2::new(1.0, 2.0);
+        let e: f64x2 = f64x2::new(0.0, 2.0);
+        let r: f64x2 = transmute(vsubq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubd_s64() {
+        let a: i64 = 3;
+        let b: i64 = 2;
+        let e: i64 = 1;
+        let r: i64 = transmute(vsubd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubd_u64() {
+        let a: u64 = 3;
+        let b: u64 = 2;
+        let e: u64 = 1;
+        let r: u64 = transmute(vsubd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddd_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: i64 = 3;
+        let r: i64 = transmute(vaddd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddd_u64() {
+        let a: u64 = 1;
+        let b: u64 = 2;
+        let e: u64 = 3;
+        let r: u64 = transmute(vaddd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddv_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 3.;
+        let r: f32 = transmute(vaddv_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 0., 0.);
+        let e: f32 = 3.;
+        let r: f32 = transmute(vaddvq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 3.;
+        let r: f64 = transmute(vaddvq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlv_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let e: i32 = 10;
+        let r: i32 = transmute(vaddlv_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlvq_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32 = 36;
+        let r: i32 = transmute(vaddlvq_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlv_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let e: i64 = 3;
+        let r: i64 = transmute(vaddlv_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlvq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i64 = 10;
+        let r: i64 = transmute(vaddlvq_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlv_u16() {
+        let a: u16x4 = u16x4::new(1, 2, 3, 4);
+        let e: u32 = 10;
+        let r: u32 = transmute(vaddlv_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlvq_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32 = 36;
+        let r: u32 = transmute(vaddlvq_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlv_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let e: u64 = 3;
+        let r: u64 = transmute(vaddlv_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlvq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u64 = 10;
+        let r: u64 = transmute(vaddlvq_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubw_high_s8() {
+        let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16);
+        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vsubw_high_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubw_high_s16() {
+        let a: i32x4 = i32x4::new(8, 9, 10, 11);
+        let b: i16x8 = i16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
+        let e: i32x4 = i32x4::new(0, 0, 0, 0);
+        let r: i32x4 = transmute(vsubw_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubw_high_s32() {
+        let a: i64x2 = i64x2::new(8, 9);
+        let b: i32x4 = i32x4::new(6, 7, 8, 9);
+        let e: i64x2 = i64x2::new(0, 0);
+        let r: i64x2 = transmute(vsubw_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubw_high_u8() {
+        let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vsubw_high_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubw_high_u16() {
+        let a: u32x4 = u32x4::new(8, 9, 10, 11);
+        let b: u16x8 = u16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
+        let e: u32x4 = u32x4::new(0, 0, 0, 0);
+        let r: u32x4 = transmute(vsubw_high_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubw_high_u32() {
+        let a: u64x2 = u64x2::new(8, 9);
+        let b: u32x4 = u32x4::new(6, 7, 8, 9);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vsubw_high_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubl_high_s8() {
+        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: i16x8 = transmute(vsubl_high_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubl_high_s16() {
+        let a: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let b: i16x8 = i16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
+        let e: i32x4 = i32x4::new(4, 5, 6, 7);
+        let r: i32x4 = transmute(vsubl_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubl_high_s32() {
+        let a: i32x4 = i32x4::new(12, 13, 14, 15);
+        let b: i32x4 = i32x4::new(6, 6, 8, 8);
+        let e: i64x2 = i64x2::new(6, 7);
+        let r: i64x2 = transmute(vsubl_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubl_high_u8() {
+        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
+        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
+        let r: u16x8 = transmute(vsubl_high_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubl_high_u16() {
+        let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
+        let b: u16x8 = u16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
+        let e: u32x4 = u32x4::new(4, 5, 6, 7);
+        let r: u32x4 = transmute(vsubl_high_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubl_high_u32() {
+        let a: u32x4 = u32x4::new(12, 13, 14, 15);
+        let b: u32x4 = u32x4::new(6, 6, 8, 8);
+        let e: u64x2 = u64x2::new(6, 7);
+        let r: u64x2 = transmute(vsubl_high_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vbcaxq_s8() {
+        let a: i8x16 = i8x16::new(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0);
+        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let c: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e: i8x16 = i8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+        let r: i8x16 = transmute(vbcaxq_s8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vbcaxq_s16() {
+        let a: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
+        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let c: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let e: i16x8 = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
+        let r: i16x8 = transmute(vbcaxq_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vbcaxq_s32() {
+        let a: i32x4 = i32x4::new(1, 0, 1, 0);
+        let b: i32x4 = i32x4::new(0, 1, 2, 3);
+        let c: i32x4 = i32x4::new(1, 1, 1, 1);
+        let e: i32x4 = i32x4::new(1, 0, 3, 2);
+        let r: i32x4 = transmute(vbcaxq_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vbcaxq_s64() {
+        let a: i64x2 = i64x2::new(1, 0);
+        let b: i64x2 = i64x2::new(0, 1);
+        let c: i64x2 = i64x2::new(1, 1);
+        let e: i64x2 = i64x2::new(1, 0);
+        let r: i64x2 = transmute(vbcaxq_s64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vbcaxq_u8() {
+        let a: u8x16 = u8x16::new(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0);
+        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let c: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let e: u8x16 = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+        let r: u8x16 = transmute(vbcaxq_u8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vbcaxq_u16() {
+        let a: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
+        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let c: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let e: u16x8 = u16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
+        let r: u16x8 = transmute(vbcaxq_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vbcaxq_u32() {
+        let a: u32x4 = u32x4::new(1, 0, 1, 0);
+        let b: u32x4 = u32x4::new(0, 1, 2, 3);
+        let c: u32x4 = u32x4::new(1, 1, 1, 1);
+        let e: u32x4 = u32x4::new(1, 0, 3, 2);
+        let r: u32x4 = transmute(vbcaxq_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vbcaxq_u64() {
+        let a: u64x2 = u64x2::new(1, 0);
+        let b: u64x2 = u64x2::new(0, 1);
+        let c: u64x2 = u64x2::new(1, 1);
+        let e: u64x2 = u64x2::new(1, 0);
+        let r: u64x2 = transmute(vbcaxq_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcadd_rot270_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let e: f32x2 = f32x2::new(2., 0.);
+        let r: f32x2 = transmute(vcadd_rot270_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaddq_rot270_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
+        let r: f32x4 = transmute(vcaddq_rot270_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaddq_rot270_f64() {
+        let a: f64x2 = f64x2::new(1., -1.);
+        let b: f64x2 = f64x2::new(-1., 1.);
+        let e: f64x2 = f64x2::new(2., 0.);
+        let r: f64x2 = transmute(vcaddq_rot270_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcadd_rot90_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let e: f32x2 = f32x2::new(0., -2.);
+        let r: f32x2 = transmute(vcadd_rot90_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaddq_rot90_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
+        let r: f32x4 = transmute(vcaddq_rot90_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcaddq_rot90_f64() {
+        let a: f64x2 = f64x2::new(1., -1.);
+        let b: f64x2 = f64x2::new(-1., 1.);
+        let e: f64x2 = f64x2::new(0., -2.);
+        let r: f64x2 = transmute(vcaddq_rot90_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., -2.);
+        let r: f32x2 = transmute(vcmla_f32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(0., -2., 2., 0.);
+        let r: f32x4 = transmute(vcmlaq_f32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_f64() {
+        let a: f64x2 = f64x2::new(1., -1.);
+        let b: f64x2 = f64x2::new(-1., 1.);
+        let c: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(0., -2.);
+        let r: f64x2 = transmute(vcmlaq_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot90_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let b: f32x2 = f32x2::new(1., -1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(2., 0.);
+        let r: f32x2 = transmute(vcmla_rot90_f32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot90_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot90_f32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot90_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let b: f64x2 = f64x2::new(1., -1.);
+        let c: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(2., 0.);
+        let r: f64x2 = transmute(vcmlaq_rot90_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot180_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let b: f32x2 = f32x2::new(1., -1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vcmla_rot180_f32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot180_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot180_f32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot180_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let b: f64x2 = f64x2::new(1., -1.);
+        let c: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vcmlaq_rot180_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot270_f32() {
+        let a: f32x2 = f32x2::new(1., 1.);
+        let b: f32x2 = f32x2::new(1., -1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., 2.);
+        let r: f32x2 = transmute(vcmla_rot270_f32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot270_f32() {
+        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
+        let e: f32x4 = f32x4::new(0., 2., 0., 2.);
+        let r: f32x4 = transmute(vcmlaq_rot270_f32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot270_f64() {
+        let a: f64x2 = f64x2::new(1., 1.);
+        let b: f64x2 = f64x2::new(1., -1.);
+        let c: f64x2 = f64x2::new(1., 1.);
+        let e: f64x2 = f64x2::new(0., 2.);
+        let r: f64x2 = transmute(vcmlaq_rot270_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_lane_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., -2.);
+        let r: f32x2 = transmute(vcmla_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x2 = f32x2::new(0., -2.);
+        let r: f32x2 = transmute(vcmla_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_lane_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
+        let r: f32x4 = transmute(vcmlaq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
+        let r: f32x4 = transmute(vcmlaq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot90_lane_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vcmla_rot90_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot90_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vcmla_rot90_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot90_lane_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot90_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot90_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot90_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot180_lane_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(2., 0.);
+        let r: f32x2 = transmute(vcmla_rot180_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot180_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x2 = f32x2::new(2., 0.);
+        let r: f32x2 = transmute(vcmla_rot180_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot180_lane_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot180_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot180_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
+        let r: f32x4 = transmute(vcmlaq_rot180_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot270_lane_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x2 = f32x2::new(2., -2.);
+        let r: f32x2 = transmute(vcmla_rot270_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmla_rot270_laneq_f32() {
+        let a: f32x2 = f32x2::new(1., -1.);
+        let b: f32x2 = f32x2::new(-1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x2 = f32x2::new(2., -2.);
+        let r: f32x2 = transmute(vcmla_rot270_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot270_lane_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x2 = f32x2::new(1., 1.);
+        let e: f32x4 = f32x4::new(2., -2., 2., -2.);
+        let r: f32x4 = transmute(vcmlaq_rot270_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcmlaq_rot270_laneq_f32() {
+        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
+        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
+        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
+        let e: f32x4 = f32x4::new(2., -2., 2., -2.);
+        let r: f32x4 = transmute(vcmlaq_rot270_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdot_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x2 = i32x2::new(31, 176);
+        let r: i32x2 = transmute(vdot_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdotq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 1, 2);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(31, 176, 31, 176);
+        let r: i32x4 = transmute(vdotq_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdot_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x2 = u32x2::new(31, 176);
+        let r: u32x2 = transmute(vdot_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdotq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 1, 2);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x4 = u32x4::new(31, 176, 31, 176);
+        let r: u32x4 = transmute(vdotq_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdot_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x2 = i32x2::new(31, 72);
+        let r: i32x2 = transmute(vdot_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdot_laneq_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x2 = i32x2::new(31, 72);
+        let r: i32x2 = transmute(vdot_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdotq_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 1, 2);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(31, 72, 31, 72);
+        let r: i32x4 = transmute(vdotq_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdotq_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 1, 2);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(31, 72, 31, 72);
+        let r: i32x4 = transmute(vdotq_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdot_lane_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x2 = u32x2::new(31, 72);
+        let r: u32x2 = transmute(vdot_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdot_laneq_u32() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x2 = u32x2::new(31, 72);
+        let r: u32x2 = transmute(vdot_laneq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdotq_lane_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 1, 2);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x4 = u32x4::new(31, 72, 31, 72);
+        let r: u32x4 = transmute(vdotq_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdotq_laneq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 1, 2);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u32x4 = u32x4::new(31, 72, 31, 72);
+        let r: u32x4 = transmute(vdotq_laneq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmax_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 0.0;
+        let e: f64 = 1.0;
+        let r: f64 = transmute(vmax_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxq_f64() {
+        let a: f64x2 = f64x2::new(1.0, -2.0);
+        let b: f64x2 = f64x2::new(0.0, 3.0);
+        let e: f64x2 = f64x2::new(1.0, 3.0);
+        let r: f64x2 = transmute(vmaxq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxnm_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 8.0;
+        let e: f64 = 8.0;
+        let r: f64 = transmute(vmaxnm_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxnmq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(8.0, 16.0);
+        let e: f64x2 = f64x2::new(8.0, 16.0);
+        let r: f64x2 = transmute(vmaxnmq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxnmv_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vmaxnmv_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxnmvq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vmaxnmvq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxnmvq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 0., 1.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vmaxnmvq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxnm_f32() {
+        let a: f32x2 = f32x2::new(1.0, 2.0);
+        let b: f32x2 = f32x2::new(6.0, -3.0);
+        let e: f32x2 = f32x2::new(2.0, 6.0);
+        let r: f32x2 = transmute(vpmaxnm_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxnmq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(6.0, -3.0);
+        let e: f64x2 = f64x2::new(2.0, 6.0);
+        let r: f64x2 = transmute(vpmaxnmq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxnmq_f32() {
+        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
+        let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
+        let e: f32x4 = f32x4::new(2.0, 3.0, 16.0, 6.0);
+        let r: f32x4 = transmute(vpmaxnmq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxnms_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vpmaxnms_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxnmqd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vpmaxnmqd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxs_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 2.;
+        let r: f32 = transmute(vpmaxs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxqd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 2.;
+        let r: f64 = transmute(vpmaxqd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmin_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 0.0;
+        let e: f64 = 0.0;
+        let r: f64 = transmute(vmin_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminq_f64() {
+        let a: f64x2 = f64x2::new(1.0, -2.0);
+        let b: f64x2 = f64x2::new(0.0, 3.0);
+        let e: f64x2 = f64x2::new(0.0, -2.0);
+        let r: f64x2 = transmute(vminq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminnm_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 8.0;
+        let e: f64 = 1.0;
+        let r: f64 = transmute(vminnm_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminnmq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(8.0, 16.0);
+        let e: f64x2 = f64x2::new(1.0, 2.0);
+        let r: f64x2 = transmute(vminnmq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminnmv_f32() {
+        let a: f32x2 = f32x2::new(1., 0.);
+        let e: f32 = 0.;
+        let r: f32 = transmute(vminnmv_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminnmvq_f64() {
+        let a: f64x2 = f64x2::new(1., 0.);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vminnmvq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminnmvq_f32() {
+        let a: f32x4 = f32x4::new(1., 0., 2., 3.);
+        let e: f32 = 0.;
+        let r: f32 = transmute(vminnmvq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_s8() {
+        let a: i8x16 = i8x16::new(1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10);
+        let e: i16x8 = i16x8::new(3, 4, 5, 6, 7, 8, 9, 10);
+        let r: i16x8 = transmute(vmovl_high_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_s16() {
+        let a: i16x8 = i16x8::new(1, 2, 3, 4, 3, 4, 5, 6);
+        let e: i32x4 = i32x4::new(3, 4, 5, 6);
+        let r: i32x4 = transmute(vmovl_high_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_s32() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i64x2 = i64x2::new(3, 4);
+        let r: i64x2 = transmute(vmovl_high_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_u8() {
+        let a: u8x16 = u8x16::new(1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10);
+        let e: u16x8 = u16x8::new(3, 4, 5, 6, 7, 8, 9, 10);
+        let r: u16x8 = transmute(vmovl_high_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_u16() {
+        let a: u16x8 = u16x8::new(1, 2, 3, 4, 3, 4, 5, 6);
+        let e: u32x4 = u32x4::new(3, 4, 5, 6);
+        let r: u32x4 = transmute(vmovl_high_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovl_high_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u64x2 = u64x2::new(3, 4);
+        let r: u64x2 = transmute(vmovl_high_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_f32() {
+        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
+        let b: f32x4 = f32x4::new(3., 4., 5., 6.);
+        let e: f32x4 = f32x4::new(3., 7., 7., 11.);
+        let r: f32x4 = transmute(vpaddq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let b: f64x2 = f64x2::new(3., 4.);
+        let e: f64x2 = f64x2::new(3., 7.);
+        let r: f64x2 = transmute(vpaddq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpadds_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 3.;
+        let r: f32 = transmute(vpadds_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 3.;
+        let r: f64 = transmute(vpaddd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminnm_f32() {
+        let a: f32x2 = f32x2::new(1.0, 2.0);
+        let b: f32x2 = f32x2::new(6.0, -3.0);
+        let e: f32x2 = f32x2::new(1.0, -3.0);
+        let r: f32x2 = transmute(vpminnm_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminnmq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(6.0, -3.0);
+        let e: f64x2 = f64x2::new(1.0, -3.0);
+        let r: f64x2 = transmute(vpminnmq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminnmq_f32() {
+        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
+        let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
+        let e: f32x4 = f32x4::new(1.0, -4.0, 8.0, -1.0);
+        let r: f32x4 = transmute(vpminnmq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminnms_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 1.;
+        let r: f32 = transmute(vpminnms_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminnmqd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vpminnmqd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmins_f32() {
+        let a: f32x2 = f32x2::new(1., 2.);
+        let e: f32 = 1.;
+        let r: f32 = transmute(vpmins_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminqd_f64() {
+        let a: f64x2 = f64x2::new(1., 2.);
+        let e: f64 = 1.;
+        let r: f64 = transmute(vpminqd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmullh_s16() {
+        let a: i16 = 2;
+        let b: i16 = 3;
+        let e: i32 = 12;
+        let r: i32 = transmute(vqdmullh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulls_s32() {
+        let a: i32 = 2;
+        let b: i32 = 3;
+        let e: i64 = 12;
+        let r: i64 = transmute(vqdmulls_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_high_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let b: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(40, 60, 84, 112);
+        let r: i32x4 = transmute(vqdmull_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_high_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 4, 5);
+        let b: i32x4 = i32x4::new(1, 2, 5, 6);
+        let e: i64x2 = i64x2::new(40, 60);
+        let r: i64x2 = transmute(vqdmull_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_high_n_s16() {
+        let a: i16x8 = i16x8::new(0, 2, 8, 10, 8, 10, 12, 14);
+        let b: i16 = 2;
+        let e: i32x4 = i32x4::new(32, 40, 48, 56);
+        let r: i32x4 = transmute(vqdmull_high_n_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_high_n_s32() {
+        let a: i32x4 = i32x4::new(0, 2, 8, 10);
+        let b: i32 = 2;
+        let e: i64x2 = i64x2::new(32, 40);
+        let r: i64x2 = transmute(vqdmull_high_n_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_laneq_s16() {
+        let a: i16x4 = i16x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
+        let e: i32x4 = i32x4::new(4, 8, 12, 16);
+        let r: i32x4 = transmute(vqdmull_laneq_s16::<4>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_laneq_s32() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i32x4 = i32x4::new(0, 2, 2, 0);
+        let e: i64x2 = i64x2::new(4, 8);
+        let r: i64x2 = transmute(vqdmull_laneq_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmullh_lane_s16() {
+        let a: i16 = 2;
+        let b: i16x4 = i16x4::new(0, 2, 2, 0);
+        let e: i32 = 8;
+        let r: i32 = transmute(vqdmullh_lane_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmullh_laneq_s16() {
+        let a: i16 = 2;
+        let b: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
+        let e: i32 = 8;
+        let r: i32 = transmute(vqdmullh_laneq_s16::<4>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulls_lane_s32() {
+        let a: i32 = 2;
+        let b: i32x2 = i32x2::new(0, 2);
+        let e: i64 = 8;
+        let r: i64 = transmute(vqdmulls_lane_s32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulls_laneq_s32() {
+        let a: i32 = 2;
+        let b: i32x4 = i32x4::new(0, 2, 2, 0);
+        let e: i64 = 8;
+        let r: i64 = transmute(vqdmulls_laneq_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_high_lane_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let b: i16x4 = i16x4::new(0, 2, 2, 0);
+        let e: i32x4 = i32x4::new(16, 20, 24, 28);
+        let r: i32x4 = transmute(vqdmull_high_lane_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_high_lane_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 4, 5);
+        let b: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(16, 20);
+        let r: i64x2 = transmute(vqdmull_high_lane_s32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_high_laneq_s16() {
+        let a: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let b: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
+        let e: i32x4 = i32x4::new(16, 20, 24, 28);
+        let r: i32x4 = transmute(vqdmull_high_laneq_s16::<4>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmull_high_laneq_s32() {
+        let a: i32x4 = i32x4::new(0, 1, 4, 5);
+        let b: i32x4 = i32x4::new(0, 2, 2, 0);
+        let e: i64x2 = i64x2::new(16, 20);
+        let r: i64x2 = transmute(vqdmull_high_laneq_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_high_s16() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let c: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(41, 62, 87, 116);
+        let r: i32x4 = transmute(vqdmlal_high_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_high_s32() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i32x4 = i32x4::new(0, 1, 4, 5);
+        let c: i32x4 = i32x4::new(1, 2, 5, 6);
+        let e: i64x2 = i64x2::new(41, 62);
+        let r: i64x2 = transmute(vqdmlal_high_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_high_n_s16() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 2, 8, 10, 8, 10, 12, 14);
+        let c: i16 = 2;
+        let e: i32x4 = i32x4::new(33, 42, 51, 60);
+        let r: i32x4 = transmute(vqdmlal_high_n_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_high_n_s32() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i32x4 = i32x4::new(0, 2, 8, 10);
+        let c: i32 = 2;
+        let e: i64x2 = i64x2::new(33, 42);
+        let r: i64x2 = transmute(vqdmlal_high_n_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_laneq_s16() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i16x4 = i16x4::new(1, 2, 3, 4);
+        let c: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
+        let e: i32x4 = i32x4::new(5, 10, 15, 20);
+        let r: i32x4 = transmute(vqdmlal_laneq_s16::<2>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_laneq_s32() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i32x2 = i32x2::new(1, 2);
+        let c: i32x4 = i32x4::new(0, 2, 2, 0);
+        let e: i64x2 = i64x2::new(5, 10);
+        let r: i64x2 = transmute(vqdmlal_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_high_lane_s16() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(17, 22, 27, 32);
+        let r: i32x4 = transmute(vqdmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_high_laneq_s16() {
+        let a: i32x4 = i32x4::new(1, 2, 3, 4);
+        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(17, 22, 27, 32);
+        let r: i32x4 = transmute(vqdmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_high_lane_s32() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i32x4 = i32x4::new(0, 1, 4, 5);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(17, 22);
+        let r: i64x2 = transmute(vqdmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlal_high_laneq_s32() {
+        let a: i64x2 = i64x2::new(1, 2);
+        let b: i32x4 = i32x4::new(0, 1, 4, 5);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i64x2 = i64x2::new(17, 22);
+        let r: i64x2 = transmute(vqdmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlalh_s16() {
+        let a: i32 = 1;
+        let b: i16 = 1;
+        let c: i16 = 2;
+        let e: i32 = 5;
+        let r: i32 = transmute(vqdmlalh_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlals_s32() {
+        let a: i64 = 1;
+        let b: i32 = 1;
+        let c: i32 = 2;
+        let e: i64 = 5;
+        let r: i64 = transmute(vqdmlals_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlalh_lane_s16() {
+        let a: i32 = 1;
+        let b: i16 = 1;
+        let c: i16x4 = i16x4::new(2, 1, 1, 1);
+        let e: i32 = 5;
+        let r: i32 = transmute(vqdmlalh_lane_s16::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlalh_laneq_s16() {
+        let a: i32 = 1;
+        let b: i16 = 1;
+        let c: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
+        let e: i32 = 5;
+        let r: i32 = transmute(vqdmlalh_laneq_s16::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlals_lane_s32() {
+        let a: i64 = 1;
+        let b: i32 = 1;
+        let c: i32x2 = i32x2::new(2, 1);
+        let e: i64 = 5;
+        let r: i64 = transmute(vqdmlals_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlals_laneq_s32() {
+        let a: i64 = 1;
+        let b: i32 = 1;
+        let c: i32x4 = i32x4::new(2, 1, 1, 1);
+        let e: i64 = 5;
+        let r: i64 = transmute(vqdmlals_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_high_s16() {
+        let a: i32x4 = i32x4::new(39, 58, 81, 108);
+        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let c: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
+        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
+        let r: i32x4 = transmute(vqdmlsl_high_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_high_s32() {
+        let a: i64x2 = i64x2::new(39, 58);
+        let b: i32x4 = i32x4::new(0, 1, 4, 5);
+        let c: i32x4 = i32x4::new(1, 2, 5, 6);
+        let e: i64x2 = i64x2::new(-1, -2);
+        let r: i64x2 = transmute(vqdmlsl_high_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_high_n_s16() {
+        let a: i32x4 = i32x4::new(31, 38, 45, 52);
+        let b: i16x8 = i16x8::new(0, 2, 8, 10, 8, 10, 12, 14);
+        let c: i16 = 2;
+        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
+        let r: i32x4 = transmute(vqdmlsl_high_n_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_high_n_s32() {
+        let a: i64x2 = i64x2::new(31, 38);
+        let b: i32x4 = i32x4::new(0, 2, 8, 10);
+        let c: i32 = 2;
+        let e: i64x2 = i64x2::new(-1, -2);
+        let r: i64x2 = transmute(vqdmlsl_high_n_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_laneq_s16() {
+        let a: i32x4 = i32x4::new(3, 6, 9, 12);
+        let b: i16x4 = i16x4::new(1, 2, 3, 4);
+        let c: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
+        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
+        let r: i32x4 = transmute(vqdmlsl_laneq_s16::<2>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_laneq_s32() {
+        let a: i64x2 = i64x2::new(3, 6);
+        let b: i32x2 = i32x2::new(1, 2);
+        let c: i32x4 = i32x4::new(0, 2, 2, 0);
+        let e: i64x2 = i64x2::new(-1, -2);
+        let r: i64x2 = transmute(vqdmlsl_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_high_lane_s16() {
+        let a: i32x4 = i32x4::new(15, 18, 21, 24);
+        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
+        let r: i32x4 = transmute(vqdmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_high_laneq_s16() {
+        let a: i32x4 = i32x4::new(15, 18, 21, 24);
+        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
+        let r: i32x4 = transmute(vqdmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_high_lane_s32() {
+        let a: i64x2 = i64x2::new(15, 18);
+        let b: i32x4 = i32x4::new(0, 1, 4, 5);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i64x2 = i64x2::new(-1, -2);
+        let r: i64x2 = transmute(vqdmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsl_high_laneq_s32() {
+        let a: i64x2 = i64x2::new(15, 18);
+        let b: i32x4 = i32x4::new(0, 1, 4, 5);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i64x2 = i64x2::new(-1, -2);
+        let r: i64x2 = transmute(vqdmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlslh_s16() {
+        let a: i32 = 10;
+        let b: i16 = 1;
+        let c: i16 = 2;
+        let e: i32 = 6;
+        let r: i32 = transmute(vqdmlslh_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsls_s32() {
+        let a: i64 = 10;
+        let b: i32 = 1;
+        let c: i32 = 2;
+        let e: i64 = 6;
+        let r: i64 = transmute(vqdmlsls_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlslh_lane_s16() {
+        let a: i32 = 10;
+        let b: i16 = 1;
+        let c: i16x4 = i16x4::new(2, 1, 1, 1);
+        let e: i32 = 6;
+        let r: i32 = transmute(vqdmlslh_lane_s16::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlslh_laneq_s16() {
+        let a: i32 = 10;
+        let b: i16 = 1;
+        let c: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
+        let e: i32 = 6;
+        let r: i32 = transmute(vqdmlslh_laneq_s16::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsls_lane_s32() {
+        let a: i64 = 10;
+        let b: i32 = 1;
+        let c: i32x2 = i32x2::new(2, 1);
+        let e: i64 = 6;
+        let r: i64 = transmute(vqdmlsls_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmlsls_laneq_s32() {
+        let a: i64 = 10;
+        let b: i32 = 1;
+        let c: i32x4 = i32x4::new(2, 1, 1, 1);
+        let e: i64 = 6;
+        let r: i64 = transmute(vqdmlsls_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhh_s16() {
+        let a: i16 = 1;
+        let b: i16 = 2;
+        let e: i16 = 0;
+        let r: i16 = transmute(vqdmulhh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhs_s32() {
+        let a: i32 = 1;
+        let b: i32 = 2;
+        let e: i32 = 0;
+        let r: i32 = transmute(vqdmulhs_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhh_lane_s16() {
+        let a: i16 = 2;
+        let b: i16x4 = i16x4::new(0, 0, 0x7F_FF, 0);
+        let e: i16 = 1;
+        let r: i16 = transmute(vqdmulhh_lane_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhh_laneq_s16() {
+        let a: i16 = 2;
+        let b: i16x8 = i16x8::new(0, 0, 0x7F_FF, 0, 0, 0, 0, 0);
+        let e: i16 = 1;
+        let r: i16 = transmute(vqdmulhh_laneq_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhs_lane_s32() {
+        let a: i32 = 2;
+        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
+        let e: i32 = 1;
+        let r: i32 = transmute(vqdmulhs_lane_s32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhs_laneq_s32() {
+        let a: i32 = 2;
+        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
+        let e: i32 = 1;
+        let r: i32 = transmute(vqdmulhs_laneq_s32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulh_lane_s16() {
+        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let b: i16x4 = i16x4::new(2, 1, 1, 1);
+        let e: i16x4 = i16x4::new(1, 1, 1, 1);
+        let r: i16x4 = transmute(vqdmulh_lane_s16::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhq_lane_s16() {
+        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let b: i16x4 = i16x4::new(2, 1, 1, 1);
+        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let r: i16x8 = transmute(vqdmulhq_lane_s16::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulh_lane_s32() {
+        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let b: i32x2 = i32x2::new(2, 1);
+        let e: i32x2 = i32x2::new(1, 1);
+        let r: i32x2 = transmute(vqdmulh_lane_s32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqdmulhq_lane_s32() {
+        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let b: i32x2 = i32x2::new(2, 1);
+        let e: i32x4 = i32x4::new(1, 1, 1, 1);
+        let r: i32x4 = transmute(vqdmulhq_lane_s32::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovnh_s16() {
+        let a: i16 = 1;
+        let e: i8 = 1;
+        let r: i8 = transmute(vqmovnh_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovns_s32() {
+        let a: i32 = 1;
+        let e: i16 = 1;
+        let r: i16 = transmute(vqmovns_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovnh_u16() {
+        let a: u16 = 1;
+        let e: u8 = 1;
+        let r: u8 = transmute(vqmovnh_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovns_u32() {
+        let a: u32 = 1;
+        let e: u16 = 1;
+        let r: u16 = transmute(vqmovns_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovnd_s64() {
+        let a: i64 = 1;
+        let e: i32 = 1;
+        let r: i32 = transmute(vqmovnd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovnd_u64() {
+        let a: u64 = 1;
+        let e: u32 = 1;
+        let r: u32 = transmute(vqmovnd_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovn_high_s16() {
+        let a: i8x8 = i8x8::new(0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
+        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let e: i8x16 = i8x16::new(0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
+        let r: i8x16 = transmute(vqmovn_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovn_high_s32() {
+        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let e: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let r: i16x8 = transmute(vqmovn_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovn_high_s64() {
+        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let r: i32x4 = transmute(vqmovn_high_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovn_high_u16() {
+        let a: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let b: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+        let r: u8x16 = transmute(vqmovn_high_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovn_high_u32() {
+        let a: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
+        let r: u16x8 = transmute(vqmovn_high_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovn_high_u64() {
+        let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
+        let r: u32x4 = transmute(vqmovn_high_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovunh_s16() {
+        let a: i16 = 1;
+        let e: u8 = 1;
+        let r: u8 = transmute(vqmovunh_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovuns_s32() {
+        let a: i32 = 1;
+        let e: u16 = 1;
+        let r: u16 = transmute(vqmovuns_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovund_s64() {
+        let a: i64 = 1;
+        let e: u32 = 1;
+        let r: u32 = transmute(vqmovund_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovun_high_s16() {
+        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let b: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
+        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vqmovun_high_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovun_high_s32() {
+        let a: u16x4 = u16x4::new(0, 0, 0, 0);
+        let b: i32x4 = i32x4::new(-1, -1, -1, -1);
+        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vqmovun_high_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqmovun_high_s64() {
+        let a: u32x2 = u32x2::new(0, 0);
+        let b: i64x2 = i64x2::new(-1, -1);
+        let e: u32x4 = u32x4::new(0, 0, 0, 0);
+        let r: u32x4 = transmute(vqmovun_high_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmulhh_s16() {
+        let a: i16 = 1;
+        let b: i16 = 2;
+        let e: i16 = 0;
+        let r: i16 = transmute(vqrdmulhh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmulhs_s32() {
+        let a: i32 = 1;
+        let b: i32 = 2;
+        let e: i32 = 0;
+        let r: i32 = transmute(vqrdmulhs_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmulhh_lane_s16() {
+        let a: i16 = 1;
+        let b: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i16 = 0;
+        let r: i16 = transmute(vqrdmulhh_lane_s16::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmulhh_laneq_s16() {
+        let a: i16 = 1;
+        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i16 = 0;
+        let r: i16 = transmute(vqrdmulhh_laneq_s16::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmulhs_lane_s32() {
+        let a: i32 = 1;
+        let b: i32x2 = i32x2::new(0, 2);
+        let e: i32 = 0;
+        let r: i32 = transmute(vqrdmulhs_lane_s32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmulhs_laneq_s32() {
+        let a: i32 = 1;
+        let b: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i32 = 0;
+        let r: i32 = transmute(vqrdmulhs_laneq_s32::<1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlah_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 1);
+        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x4 = i16x4::new(2, 2, 2, 2);
+        let e: i16x4 = i16x4::new(3, 3, 3, 3);
+        let r: i16x4 = transmute(vqrdmlah_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahq_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let r: i16x8 = transmute(vqrdmlahq_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlah_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x2 = i32x2::new(2, 2);
+        let e: i32x2 = i32x2::new(3, 3);
+        let r: i32x2 = transmute(vqrdmlah_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahq_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 1);
+        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x4 = i32x4::new(2, 2, 2, 2);
+        let e: i32x4 = i32x4::new(3, 3, 3, 3);
+        let r: i32x4 = transmute(vqrdmlahq_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahh_s16() {
+        let a: i16 = 1;
+        let b: i16 = 1;
+        let c: i16 = 2;
+        let e: i16 = 1;
+        let r: i16 = transmute(vqrdmlahh_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahs_s32() {
+        let a: i32 = 1;
+        let b: i32 = 1;
+        let c: i32 = 2;
+        let e: i32 = 1;
+        let r: i32 = transmute(vqrdmlahs_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlah_lane_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 1);
+        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i16x4 = i16x4::new(3, 3, 3, 3);
+        let r: i16x4 = transmute(vqrdmlah_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlah_laneq_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 1);
+        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i16x4 = i16x4::new(3, 3, 3, 3);
+        let r: i16x4 = transmute(vqrdmlah_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahq_lane_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let r: i16x8 = transmute(vqrdmlahq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahq_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
+        let r: i16x8 = transmute(vqrdmlahq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlah_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i32x2 = i32x2::new(3, 3);
+        let r: i32x2 = transmute(vqrdmlah_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlah_laneq_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i32x2 = i32x2::new(3, 3);
+        let r: i32x2 = transmute(vqrdmlah_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahq_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 1);
+        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i32x4 = i32x4::new(3, 3, 3, 3);
+        let r: i32x4 = transmute(vqrdmlahq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahq_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 1);
+        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(3, 3, 3, 3);
+        let r: i32x4 = transmute(vqrdmlahq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahh_lane_s16() {
+        let a: i16 = 1;
+        let b: i16 = 1;
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i16 = 1;
+        let r: i16 = transmute(vqrdmlahh_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahh_laneq_s16() {
+        let a: i16 = 1;
+        let b: i16 = 1;
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i16 = 1;
+        let r: i16 = transmute(vqrdmlahh_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahs_lane_s32() {
+        let a: i32 = 1;
+        let b: i32 = 1;
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i32 = 1;
+        let r: i32 = transmute(vqrdmlahs_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlahs_laneq_s32() {
+        let a: i32 = 1;
+        let b: i32 = 1;
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i32 = 1;
+        let r: i32 = transmute(vqrdmlahs_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlsh_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 1);
+        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x4 = i16x4::new(2, 2, 2, 2);
+        let e: i16x4 = i16x4::new(-1, -1, -1, -1);
+        let r: i16x4 = transmute(vqrdmlsh_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshq_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
+        let e: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
+        let r: i16x8 = transmute(vqrdmlshq_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlsh_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x2 = i32x2::new(2, 2);
+        let e: i32x2 = i32x2::new(-1, -1);
+        let r: i32x2 = transmute(vqrdmlsh_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshq_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 1);
+        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x4 = i32x4::new(2, 2, 2, 2);
+        let e: i32x4 = i32x4::new(-1, -1, -1, -1);
+        let r: i32x4 = transmute(vqrdmlshq_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshh_s16() {
+        let a: i16 = 1;
+        let b: i16 = 1;
+        let c: i16 = 2;
+        let e: i16 = 1;
+        let r: i16 = transmute(vqrdmlshh_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshs_s32() {
+        let a: i32 = 1;
+        let b: i32 = 1;
+        let c: i32 = 2;
+        let e: i32 = 1;
+        let r: i32 = transmute(vqrdmlshs_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlsh_lane_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 1);
+        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i16x4 = i16x4::new(-1, -1, -1, -1);
+        let r: i16x4 = transmute(vqrdmlsh_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlsh_laneq_s16() {
+        let a: i16x4 = i16x4::new(1, 1, 1, 1);
+        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i16x4 = i16x4::new(-1, -1, -1, -1);
+        let r: i16x4 = transmute(vqrdmlsh_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshq_lane_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
+        let r: i16x8 = transmute(vqrdmlshq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshq_laneq_s16() {
+        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
+        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
+        let r: i16x8 = transmute(vqrdmlshq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlsh_lane_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i32x2 = i32x2::new(-1, -1);
+        let r: i32x2 = transmute(vqrdmlsh_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlsh_laneq_s32() {
+        let a: i32x2 = i32x2::new(1, 1);
+        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i32x2 = i32x2::new(-1, -1);
+        let r: i32x2 = transmute(vqrdmlsh_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshq_lane_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 1);
+        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i32x4 = i32x4::new(-1, -1, -1, -1);
+        let r: i32x4 = transmute(vqrdmlshq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshq_laneq_s32() {
+        let a: i32x4 = i32x4::new(1, 1, 1, 1);
+        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i32x4 = i32x4::new(-1, -1, -1, -1);
+        let r: i32x4 = transmute(vqrdmlshq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshh_lane_s16() {
+        let a: i16 = 1;
+        let b: i16 = 1;
+        let c: i16x4 = i16x4::new(0, 2, 0, 0);
+        let e: i16 = 1;
+        let r: i16 = transmute(vqrdmlshh_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshh_laneq_s16() {
+        let a: i16 = 1;
+        let b: i16 = 1;
+        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
+        let e: i16 = 1;
+        let r: i16 = transmute(vqrdmlshh_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshs_lane_s32() {
+        let a: i32 = 1;
+        let b: i32 = 1;
+        let c: i32x2 = i32x2::new(0, 2);
+        let e: i32 = 1;
+        let r: i32 = transmute(vqrdmlshs_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrdmlshs_laneq_s32() {
+        let a: i32 = 1;
+        let b: i32 = 1;
+        let c: i32x4 = i32x4::new(0, 2, 0, 0);
+        let e: i32 = 1;
+        let r: i32 = transmute(vqrdmlshs_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshls_s32() {
+        let a: i32 = 2;
+        let b: i32 = 2;
+        let e: i32 = 8;
+        let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshld_s64() {
+        let a: i64 = 2;
+        let b: i64 = 2;
+        let e: i64 = 8;
+        let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshlb_s8() {
+        let a: i8 = 1;
+        let b: i8 = 2;
+        let e: i8 = 4;
+        let r: i8 = transmute(vqrshlb_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshlh_s16() {
+        let a: i16 = 1;
+        let b: i16 = 2;
+        let e: i16 = 4;
+        let r: i16 = transmute(vqrshlh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshls_u32() {
+        let a: u32 = 2;
+        let b: i32 = 2;
+        let e: u32 = 8;
+        let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshld_u64() {
+        let a: u64 = 2;
+        let b: i64 = 2;
+        let e: u64 = 8;
+        let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshlb_u8() {
+        let a: u8 = 1;
+        let b: i8 = 2;
+        let e: u8 = 4;
+        let r: u8 = transmute(vqrshlb_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshlh_u16() {
+        let a: u16 = 1;
+        let b: i16 = 2;
+        let e: u16 = 4;
+        let r: u16 = transmute(vqrshlh_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrnh_n_s16() {
+        let a: i16 = 4;
+        let e: i8 = 1;
+        let r: i8 = transmute(vqrshrnh_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrns_n_s32() {
+        let a: i32 = 4;
+        let e: i16 = 1;
+        let r: i16 = transmute(vqrshrns_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrnd_n_s64() {
+        let a: i64 = 4;
+        let e: i32 = 1;
+        let r: i32 = transmute(vqrshrnd_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrn_high_n_s16() {
+        let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
+        let b: i16x8 = i16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
+        let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
+        let r: i8x16 = transmute(vqrshrn_high_n_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrn_high_n_s32() {
+        let a: i16x4 = i16x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(8, 12, 24, 28);
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
+        let r: i16x8 = transmute(vqrshrn_high_n_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrn_high_n_s64() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i64x2 = i64x2::new(8, 12);
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vqrshrn_high_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrnh_n_u16() {
+        let a: u16 = 4;
+        let e: u8 = 1;
+        let r: u8 = transmute(vqrshrnh_n_u16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrns_n_u32() {
+        let a: u32 = 4;
+        let e: u16 = 1;
+        let r: u16 = transmute(vqrshrns_n_u32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrnd_n_u64() {
+        let a: u64 = 4;
+        let e: u32 = 1;
+        let r: u32 = transmute(vqrshrnd_n_u64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrn_high_n_u16() {
+        let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
+        let b: u16x8 = u16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
+        let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vqrshrn_high_n_u16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrn_high_n_u32() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: u32x4 = u32x4::new(8, 12, 24, 28);
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
+        let r: u16x8 = transmute(vqrshrn_high_n_u32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrn_high_n_u64() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u64x2 = u64x2::new(8, 12);
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vqrshrn_high_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrunh_n_s16() {
+        let a: i16 = 4;
+        let e: u8 = 1;
+        let r: u8 = transmute(vqrshrunh_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshruns_n_s32() {
+        let a: i32 = 4;
+        let e: u16 = 1;
+        let r: u16 = transmute(vqrshruns_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrund_n_s64() {
+        let a: i64 = 4;
+        let e: u32 = 1;
+        let r: u32 = transmute(vqrshrund_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrun_high_n_s16() {
+        let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
+        let b: i16x8 = i16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
+        let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vqrshrun_high_n_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrun_high_n_s32() {
+        let a: u16x4 = u16x4::new(0, 1, 2, 3);
+        let b: i32x4 = i32x4::new(8, 12, 24, 28);
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
+        let r: u16x8 = transmute(vqrshrun_high_n_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqrshrun_high_n_s64() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: i64x2 = i64x2::new(8, 12);
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vqrshrun_high_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshld_s64() {
+        let a: i64 = 0;
+        let b: i64 = 2;
+        let e: i64 = 0;
+        let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlb_s8() {
+        let a: i8 = 1;
+        let b: i8 = 2;
+        let e: i8 = 4;
+        let r: i8 = transmute(vqshlb_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlh_s16() {
+        let a: i16 = 1;
+        let b: i16 = 2;
+        let e: i16 = 4;
+        let r: i16 = transmute(vqshlh_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshls_s32() {
+        let a: i32 = 1;
+        let b: i32 = 2;
+        let e: i32 = 4;
+        let r: i32 = transmute(vqshls_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshld_u64() {
+        let a: u64 = 0;
+        let b: i64 = 2;
+        let e: u64 = 0;
+        let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlb_u8() {
+        let a: u8 = 1;
+        let b: i8 = 2;
+        let e: u8 = 4;
+        let r: u8 = transmute(vqshlb_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlh_u16() {
+        let a: u16 = 1;
+        let b: i16 = 2;
+        let e: u16 = 4;
+        let r: u16 = transmute(vqshlh_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshls_u32() {
+        let a: u32 = 1;
+        let b: i32 = 2;
+        let e: u32 = 4;
+        let r: u32 = transmute(vqshls_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlb_n_s8() {
+        let a: i8 = 1;
+        let e: i8 = 4;
+        let r: i8 = transmute(vqshlb_n_s8::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlh_n_s16() {
+        let a: i16 = 1;
+        let e: i16 = 4;
+        let r: i16 = transmute(vqshlh_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshls_n_s32() {
+        let a: i32 = 1;
+        let e: i32 = 4;
+        let r: i32 = transmute(vqshls_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshld_n_s64() {
+        let a: i64 = 1;
+        let e: i64 = 4;
+        let r: i64 = transmute(vqshld_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlb_n_u8() {
+        let a: u8 = 1;
+        let e: u8 = 4;
+        let r: u8 = transmute(vqshlb_n_u8::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlh_n_u16() {
+        let a: u16 = 1;
+        let e: u16 = 4;
+        let r: u16 = transmute(vqshlh_n_u16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshls_n_u32() {
+        let a: u32 = 1;
+        let e: u32 = 4;
+        let r: u32 = transmute(vqshls_n_u32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshld_n_u64() {
+        let a: u64 = 1;
+        let e: u64 = 4;
+        let r: u64 = transmute(vqshld_n_u64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlub_n_s8() {
+        let a: i8 = 1;
+        let e: u8 = 4;
+        let r: u8 = transmute(vqshlub_n_s8::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshluh_n_s16() {
+        let a: i16 = 1;
+        let e: u16 = 4;
+        let r: u16 = transmute(vqshluh_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlus_n_s32() {
+        let a: i32 = 1;
+        let e: u32 = 4;
+        let r: u32 = transmute(vqshlus_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshlud_n_s64() {
+        let a: i64 = 1;
+        let e: u64 = 4;
+        let r: u64 = transmute(vqshlud_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrnd_n_s64() {
+        let a: i64 = 0;
+        let e: i32 = 0;
+        let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrnh_n_s16() {
+        let a: i16 = 4;
+        let e: i8 = 1;
+        let r: i8 = transmute(vqshrnh_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrns_n_s32() {
+        let a: i32 = 4;
+        let e: i16 = 1;
+        let r: i16 = transmute(vqshrns_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrn_high_n_s16() {
+        let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
+        let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: i8x16 = transmute(vqshrn_high_n_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrn_high_n_s32() {
+        let a: i16x4 = i16x4::new(0, 1, 8, 9);
+        let b: i32x4 = i32x4::new(32, 36, 40, 44);
+        let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let r: i16x8 = transmute(vqshrn_high_n_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrn_high_n_s64() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i64x2 = i64x2::new(32, 36);
+        let e: i32x4 = i32x4::new(0, 1, 8, 9);
+        let r: i32x4 = transmute(vqshrn_high_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrnd_n_u64() {
+        let a: u64 = 0;
+        let e: u32 = 0;
+        let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrnh_n_u16() {
+        let a: u16 = 4;
+        let e: u8 = 1;
+        let r: u8 = transmute(vqshrnh_n_u16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrns_n_u32() {
+        let a: u32 = 4;
+        let e: u16 = 1;
+        let r: u16 = transmute(vqshrns_n_u32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrn_high_n_u16() {
+        let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
+        let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vqshrn_high_n_u16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrn_high_n_u32() {
+        let a: u16x4 = u16x4::new(0, 1, 8, 9);
+        let b: u32x4 = u32x4::new(32, 36, 40, 44);
+        let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let r: u16x8 = transmute(vqshrn_high_n_u32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrn_high_n_u64() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u64x2 = u64x2::new(32, 36);
+        let e: u32x4 = u32x4::new(0, 1, 8, 9);
+        let r: u32x4 = transmute(vqshrn_high_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrunh_n_s16() {
+        let a: i16 = 4;
+        let e: u8 = 1;
+        let r: u8 = transmute(vqshrunh_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshruns_n_s32() {
+        let a: i32 = 4;
+        let e: u16 = 1;
+        let r: u16 = transmute(vqshruns_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrund_n_s64() {
+        let a: i64 = 4;
+        let e: u32 = 1;
+        let r: u32 = transmute(vqshrund_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrun_high_n_s16() {
+        let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
+        let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vqshrun_high_n_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrun_high_n_s32() {
+        let a: u16x4 = u16x4::new(0, 1, 8, 9);
+        let b: i32x4 = i32x4::new(32, 36, 40, 44);
+        let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let r: u16x8 = transmute(vqshrun_high_n_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqshrun_high_n_s64() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: i64x2 = i64x2::new(32, 36);
+        let e: u32x4 = u32x4::new(0, 1, 8, 9);
+        let r: u32x4 = transmute(vqshrun_high_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddb_u8() {
+        let a: u8 = 2;
+        let b: i8 = 2;
+        let e: u8 = 4;
+        let r: u8 = transmute(vsqaddb_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddh_u16() {
+        let a: u16 = 2;
+        let b: i16 = 2;
+        let e: u16 = 4;
+        let r: u16 = transmute(vsqaddh_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqadds_u32() {
+        let a: u32 = 2;
+        let b: i32 = 2;
+        let e: u32 = 4;
+        let r: u32 = transmute(vsqadds_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddd_u64() {
+        let a: u64 = 2;
+        let b: i64 = 2;
+        let e: u64 = 4;
+        let r: u64 = transmute(vsqaddd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqrt_f32() {
+        let a: f32x2 = f32x2::new(4.0, 9.0);
+        let e: f32x2 = f32x2::new(2.0, 3.0);
+        let r: f32x2 = transmute(vsqrt_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqrtq_f32() {
+        let a: f32x4 = f32x4::new(4.0, 9.0, 16.0, 25.0);
+        let e: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
+        let r: f32x4 = transmute(vsqrtq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqrt_f64() {
+        let a: f64 = 4.0;
+        let e: f64 = 2.0;
+        let r: f64 = transmute(vsqrt_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqrtq_f64() {
+        let a: f64x2 = f64x2::new(4.0, 9.0);
+        let e: f64x2 = f64x2::new(2.0, 3.0);
+        let r: f64x2 = transmute(vsqrtq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrte_f64() {
+        let a: f64 = 1.0;
+        let e: f64 = 0.998046875;
+        let r: f64 = transmute(vrsqrte_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrteq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let e: f64x2 = f64x2::new(0.998046875, 0.705078125);
+        let r: f64x2 = transmute(vrsqrteq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtes_f32() {
+        let a: f32 = 1.0;
+        let e: f32 = 0.998046875;
+        let r: f32 = transmute(vrsqrtes_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrted_f64() {
+        let a: f64 = 1.0;
+        let e: f64 = 0.998046875;
+        let r: f64 = transmute(vrsqrted_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrts_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 1.0;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vrsqrts_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtsq_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let b: f64x2 = f64x2::new(1.0, 2.0);
+        let e: f64x2 = f64x2::new(1., -0.5);
+        let r: f64x2 = transmute(vrsqrtsq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtss_f32() {
+        let a: f32 = 1.0;
+        let b: f32 = 1.0;
+        let e: f32 = 1.;
+        let r: f32 = transmute(vrsqrtss_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsqrtsd_f64() {
+        let a: f64 = 1.0;
+        let b: f64 = 1.0;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vrsqrtsd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpe_f64() {
+        let a: f64 = 4.0;
+        let e: f64 = 0.24951171875;
+        let r: f64 = transmute(vrecpe_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpeq_f64() {
+        let a: f64x2 = f64x2::new(4.0, 3.0);
+        let e: f64x2 = f64x2::new(0.24951171875, 0.3330078125);
+        let r: f64x2 = transmute(vrecpeq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpes_f32() {
+        let a: f32 = 4.0;
+        let e: f32 = 0.24951171875;
+        let r: f32 = transmute(vrecpes_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecped_f64() {
+        let a: f64 = 4.0;
+        let e: f64 = 0.24951171875;
+        let r: f64 = transmute(vrecped_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecps_f64() {
+        let a: f64 = 4.0;
+        let b: f64 = 4.0;
+        let e: f64 = -14.;
+        let r: f64 = transmute(vrecps_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpsq_f64() {
+        let a: f64x2 = f64x2::new(4.0, 3.0);
+        let b: f64x2 = f64x2::new(4.0, 3.0);
+        let e: f64x2 = f64x2::new(-14., -7.);
+        let r: f64x2 = transmute(vrecpsq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpss_f32() {
+        let a: f32 = 4.0;
+        let b: f32 = 4.0;
+        let e: f32 = -14.;
+        let r: f32 = transmute(vrecpss_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpsd_f64() {
+        let a: f64 = 4.0;
+        let b: f64 = 4.0;
+        let e: f64 = -14.;
+        let r: f64 = transmute(vrecpsd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpxs_f32() {
+        let a: f32 = 4.0;
+        let e: f32 = 0.5;
+        let r: f32 = transmute(vrecpxs_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpxd_f64() {
+        let a: f64 = 4.0;
+        let e: f64 = 0.5;
+        let r: f64 = transmute(vrecpxd_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_s64_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_s64_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_u64_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vreinterpret_u64_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s64_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_s64_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u64_p64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: u64x2 = u64x2::new(0, 1);
+        let r: u64x2 = transmute(vreinterpretq_u64_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_s64() {
+        let a: i64x2 = i64x2::new(0, 1);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_u64() {
+        let a: u64x2 = u64x2::new(0, 1);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vreinterpretq_p64_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_s8_f64() {
+        let a: f64 = 0.;
+        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x8 = transmute(vreinterpret_s8_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_s16_f64() {
+        let a: f64 = 0.;
+        let e: i16x4 = i16x4::new(0, 0, 0, 0);
+        let r: i16x4 = transmute(vreinterpret_s16_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_s32_f64() {
+        let a: f64 = 0.;
+        let e: i32x2 = i32x2::new(0, 0);
+        let r: i32x2 = transmute(vreinterpret_s32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_s64_f64() {
+        let a: f64 = 0.;
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s8_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vreinterpretq_s8_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s16_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vreinterpretq_s16_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s32_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: i32x4 = i32x4::new(0, 0, 0, 0);
+        let r: i32x4 = transmute(vreinterpretq_s32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_s64_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: i64x2 = i64x2::new(0, 0);
+        let r: i64x2 = transmute(vreinterpretq_s64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_u8_f64() {
+        let a: f64 = 0.;
+        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x8 = transmute(vreinterpret_u8_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_u16_f64() {
+        let a: f64 = 0.;
+        let e: u16x4 = u16x4::new(0, 0, 0, 0);
+        let r: u16x4 = transmute(vreinterpret_u16_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_u32_f64() {
+        let a: f64 = 0.;
+        let e: u32x2 = u32x2::new(0, 0);
+        let r: u32x2 = transmute(vreinterpret_u32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_u64_f64() {
+        let a: f64 = 0.;
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vreinterpret_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u8_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vreinterpretq_u8_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u16_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vreinterpretq_u16_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u32_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: u32x4 = u32x4::new(0, 0, 0, 0);
+        let r: u32x4 = transmute(vreinterpretq_u32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_u64_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: u64x2 = u64x2::new(0, 0);
+        let r: u64x2 = transmute(vreinterpretq_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p8_f64() {
+        let a: f64 = 0.;
+        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x8 = transmute(vreinterpret_p8_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p16_f64() {
+        let a: f64 = 0.;
+        let e: i16x4 = i16x4::new(0, 0, 0, 0);
+        let r: i16x4 = transmute(vreinterpret_p16_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_f32() {
+        let a: f32x2 = f32x2::new(0., 0.);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_p64_f64() {
+        let a: f64 = 0.;
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vreinterpret_p64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p8_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vreinterpretq_p8_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p16_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vreinterpretq_p16_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_f32() {
+        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let e: i64x2 = i64x2::new(0, 0);
+        let r: i64x2 = transmute(vreinterpretq_p64_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p64_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: i64x2 = i64x2::new(0, 0);
+        let r: i64x2 = transmute(vreinterpretq_p64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_p128_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: p128 = 0;
+        let r: p128 = transmute(vreinterpretq_p128_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_s8() {
+        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_s16() {
+        let a: i16x4 = i16x4::new(0, 0, 0, 0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_s32() {
+        let a: i32x2 = i32x2::new(0, 0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_s8() {
+        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_s16() {
+        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_s32() {
+        let a: i32x4 = i32x4::new(0, 0, 0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_s64() {
+        let a: i64x2 = i64x2::new(0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_p8() {
+        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_u16() {
+        let a: u16x4 = u16x4::new(0, 0, 0, 0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_u32() {
+        let a: u32x2 = u32x2::new(0, 0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_p8() {
+        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_p8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_u16() {
+        let a: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_u16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_u32() {
+        let a: u32x4 = u32x4::new(0, 0, 0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_u32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_u64() {
+        let a: u64x2 = u64x2::new(0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_u64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_u8() {
+        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_p16() {
+        let a: i16x4 = i16x4::new(0, 0, 0, 0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_p16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f32_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vreinterpret_f32_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_u8() {
+        let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_u8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_p16() {
+        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_p16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_p64() {
+        let a: i64x2 = i64x2::new(0, 0);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f32_p64() {
+        let a: i64x2 = i64x2::new(0, 0);
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vreinterpretq_f32_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_p128() {
+        let a: p128 = 0;
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_p128(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f64_f32() {
+        let a: f32x2 = f32x2::new(0., 0.);
+        let e: f64 = 0.;
+        let r: f64 = transmute(vreinterpret_f64_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpret_f32_f64() {
+        let a: f64 = 0.;
+        let e: f32x2 = f32x2::new(0., 0.);
+        let r: f32x2 = transmute(vreinterpret_f32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f64_f32() {
+        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let e: f64x2 = f64x2::new(0., 0.);
+        let r: f64x2 = transmute(vreinterpretq_f64_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vreinterpretq_f32_f64() {
+        let a: f64x2 = f64x2::new(0., 0.);
+        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
+        let r: f32x4 = transmute(vreinterpretq_f32_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshld_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: i64 = 4;
+        let r: i64 = transmute(vrshld_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshld_u64() {
+        let a: u64 = 1;
+        let b: i64 = 2;
+        let e: u64 = 4;
+        let r: u64 = transmute(vrshld_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshrd_n_s64() {
+        let a: i64 = 4;
+        let e: i64 = 1;
+        let r: i64 = transmute(vrshrd_n_s64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshrd_n_u64() {
+        let a: u64 = 4;
+        let e: u64 = 1;
+        let r: u64 = transmute(vrshrd_n_u64::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshrn_high_n_s16() {
+        let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
+        let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: i8x16 = transmute(vrshrn_high_n_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshrn_high_n_s32() {
+        let a: i16x4 = i16x4::new(0, 1, 8, 9);
+        let b: i32x4 = i32x4::new(32, 36, 40, 44);
+        let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let r: i16x8 = transmute(vrshrn_high_n_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshrn_high_n_s64() {
+        let a: i32x2 = i32x2::new(0, 1);
+        let b: i64x2 = i64x2::new(32, 36);
+        let e: i32x4 = i32x4::new(0, 1, 8, 9);
+        let r: i32x4 = transmute(vrshrn_high_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshrn_high_n_u16() {
+        let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
+        let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vrshrn_high_n_u16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshrn_high_n_u32() {
+        let a: u16x4 = u16x4::new(0, 1, 8, 9);
+        let b: u32x4 = u32x4::new(32, 36, 40, 44);
+        let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
+        let r: u16x8 = transmute(vrshrn_high_n_u32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrshrn_high_n_u64() {
+        let a: u32x2 = u32x2::new(0, 1);
+        let b: u64x2 = u64x2::new(32, 36);
+        let e: u32x4 = u32x4::new(0, 1, 8, 9);
+        let r: u32x4 = transmute(vrshrn_high_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsrad_n_s64() {
+        let a: i64 = 1;
+        let b: i64 = 4;
+        let e: i64 = 2;
+        let r: i64 = transmute(vrsrad_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsrad_n_u64() {
+        let a: u64 = 1;
+        let b: u64 = 4;
+        let e: u64 = 2;
+        let r: u64 = transmute(vrsrad_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_s16() {
+        let a: i8x8 = i8x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i8x16 = i8x16::new(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: i8x16 = transmute(vrsubhn_high_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_s32() {
+        let a: i16x4 = i16x4::new(1, 2, 0, 0);
+        let b: i32x4 = i32x4::new(1, 2, 3, 4);
+        let c: i32x4 = i32x4::new(1, 2, 3, 4);
+        let e: i16x8 = i16x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let r: i16x8 = transmute(vrsubhn_high_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_s64() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i64x2 = i64x2::new(1, 2);
+        let c: i64x2 = i64x2::new(1, 2);
+        let e: i32x4 = i32x4::new(1, 2, 0, 0);
+        let r: i32x4 = transmute(vrsubhn_high_s64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_u16() {
+        let a: u8x8 = u8x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let c: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u8x16 = u8x16::new(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        let r: u8x16 = transmute(vrsubhn_high_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_u32() {
+        let a: u16x4 = u16x4::new(1, 2, 0, 0);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u16x8 = u16x8::new(1, 2, 0, 0, 0, 0, 0, 0);
+        let r: u16x8 = transmute(vrsubhn_high_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrsubhn_high_u64() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u64x2 = u64x2::new(1, 2);
+        let c: u64x2 = u64x2::new(1, 2);
+        let e: u32x4 = u32x4::new(1, 2, 0, 0);
+        let r: u32x4 = transmute(vrsubhn_high_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vset_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 0.;
+        let e: f64 = 1.;
+        let r: f64 = transmute(vset_lane_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsetq_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64x2 = f64x2::new(0., 2.);
+        let e: f64x2 = f64x2::new(1., 2.);
+        let r: f64x2 = transmute(vsetq_lane_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshld_s64() {
+        let a: i64 = 1;
+        let b: i64 = 2;
+        let e: i64 = 4;
+        let r: i64 = transmute(vshld_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshld_u64() {
+        let a: u64 = 1;
+        let b: i64 = 2;
+        let e: u64 = 4;
+        let r: u64 = transmute(vshld_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshll_high_n_s8() {
+        let a: i8x16 = i8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
+        let r: i16x8 = transmute(vshll_high_n_s8::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshll_high_n_s16() {
+        let a: i16x8 = i16x8::new(0, 0, 1, 2, 1, 2, 3, 4);
+        let e: i32x4 = i32x4::new(4, 8, 12, 16);
+        let r: i32x4 = transmute(vshll_high_n_s16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshll_high_n_s32() {
+        let a: i32x4 = i32x4::new(0, 0, 1, 2);
+        let e: i64x2 = i64x2::new(4, 8);
+        let r: i64x2 = transmute(vshll_high_n_s32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshll_high_n_u8() {
+        let a: u8x16 = u8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8);
+        let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
+        let r: u16x8 = transmute(vshll_high_n_u8::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshll_high_n_u16() {
+        let a: u16x8 = u16x8::new(0, 0, 1, 2, 1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(4, 8, 12, 16);
+        let r: u32x4 = transmute(vshll_high_n_u16::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshll_high_n_u32() {
+        let a: u32x4 = u32x4::new(0, 0, 1, 2);
+        let e: u64x2 = u64x2::new(4, 8);
+        let r: u64x2 = transmute(vshll_high_n_u32::<2>(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshrn_high_n_s16() {
+        let a: i8x8 = i8x8::new(1, 2, 5, 6, 5, 6, 7, 8);
+        let b: i16x8 = i16x8::new(20, 24, 28, 32, 52, 56, 60, 64);
+        let e: i8x16 = i8x16::new(1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vshrn_high_n_s16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshrn_high_n_s32() {
+        let a: i16x4 = i16x4::new(1, 2, 5, 6);
+        let b: i32x4 = i32x4::new(20, 24, 28, 32);
+        let e: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
+        let r: i16x8 = transmute(vshrn_high_n_s32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshrn_high_n_s64() {
+        let a: i32x2 = i32x2::new(1, 2);
+        let b: i64x2 = i64x2::new(20, 24);
+        let e: i32x4 = i32x4::new(1, 2, 5, 6);
+        let r: i32x4 = transmute(vshrn_high_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshrn_high_n_u16() {
+        let a: u8x8 = u8x8::new(1, 2, 5, 6, 5, 6, 7, 8);
+        let b: u16x8 = u16x8::new(20, 24, 28, 32, 52, 56, 60, 64);
+        let e: u8x16 = u8x16::new(1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16);
+        let r: u8x16 = transmute(vshrn_high_n_u16::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshrn_high_n_u32() {
+        let a: u16x4 = u16x4::new(1, 2, 5, 6);
+        let b: u32x4 = u32x4::new(20, 24, 28, 32);
+        let e: u16x8 = u16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
+        let r: u16x8 = transmute(vshrn_high_n_u32::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshrn_high_n_u64() {
+        let a: u32x2 = u32x2::new(1, 2);
+        let b: u64x2 = u64x2::new(20, 24);
+        let e: u32x4 = u32x4::new(1, 2, 5, 6);
+        let r: u32x4 = transmute(vshrn_high_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3partw1q_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2147549312, 3221323968, 131329, 2684362752);
+        let r: u32x4 = transmute(vsm3partw1q_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3partw2q_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(128, 256, 384, 1077977696);
+        let r: u32x4 = transmute(vsm3partw2q_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3ss1q_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(0, 0, 0, 2098176);
+        let r: u32x4 = transmute(vsm3ss1q_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm4ekeyq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(1784948604, 136020997, 2940231695, 3789947679);
+        let r: u32x4 = transmute(vsm4ekeyq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm4eq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(1093874472, 3616769504, 3878330411, 2765298765);
+        let r: u32x4 = transmute(vsm4eq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vrax1q_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let e: u64x2 = u64x2::new(7, 10);
+        let r: u64x2 = transmute(vrax1q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vsha512hq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let c: u64x2 = u64x2::new(5, 6);
+        let e: u64x2 = u64x2::new(11189044327219203, 7177611956453380);
+        let r: u64x2 = transmute(vsha512hq_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vsha512h2q_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let c: u64x2 = u64x2::new(5, 6);
+        let e: u64x2 = u64x2::new(5770237651009406214, 349133864969);
+        let r: u64x2 = transmute(vsha512h2q_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vsha512su0q_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let e: u64x2 = u64x2::new(144115188075855874, 9439544818968559619);
+        let r: u64x2 = transmute(vsha512su0q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vsha512su1q_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let c: u64x2 = u64x2::new(5, 6);
+        let e: u64x2 = u64x2::new(105553116266526, 140737488355368);
+        let r: u64x2 = transmute(vsha512su1q_u64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd32x_f32() {
+        let a: f32x2 = f32x2::new(1.1, 1.9);
+        let e: f32x2 = f32x2::new(1.0, 2.0);
+        let r: f32x2 = transmute(vrnd32x_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd32xq_f32() {
+        let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
+        let e: f32x4 = f32x4::new(1.0, 2.0, -2.0, -2.0);
+        let r: f32x4 = transmute(vrnd32xq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd32z_f32() {
+        let a: f32x2 = f32x2::new(1.1, 1.9);
+        let e: f32x2 = f32x2::new(1.0, 1.0);
+        let r: f32x2 = transmute(vrnd32z_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd32zq_f32() {
+        let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
+        let e: f32x4 = f32x4::new(1.0, 1.0, -1.0, -2.0);
+        let r: f32x4 = transmute(vrnd32zq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd64x_f32() {
+        let a: f32x2 = f32x2::new(1.1, 1.9);
+        let e: f32x2 = f32x2::new(1.0, 2.0);
+        let r: f32x2 = transmute(vrnd64x_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd64xq_f32() {
+        let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
+        let e: f32x4 = f32x4::new(1.0, 2.0, -2.0, -2.0);
+        let r: f32x4 = transmute(vrnd64xq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd64z_f32() {
+        let a: f32x2 = f32x2::new(1.1, 1.9);
+        let e: f32x2 = f32x2::new(1.0, 1.0);
+        let r: f32x2 = transmute(vrnd64z_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,frintts")]
+    unsafe fn test_vrnd64zq_f32() {
+        let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
+        let e: f32x4 = f32x4::new(1.0, 1.0, -1.0, -2.0);
+        let r: f32x4 = transmute(vrnd64zq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_s8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i8x8 = i8x8::new(0, 1, 4, 5, 8, 9, 12, 13);
+        let r: i8x8 = transmute(vtrn1_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_s8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: i8x16 = i8x16::new(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29);
+        let r: i8x16 = transmute(vtrn1q_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_s16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: i16x4 = i16x4::new(0, 1, 4, 5);
+        let r: i16x4 = transmute(vtrn1_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_s16() {
+        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i16x8 = i16x8::new(0, 1, 4, 5, 8, 9, 12, 13);
+        let r: i16x8 = transmute(vtrn1q_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_s32() {
+        let a: i32x4 = i32x4::new(0, 2, 4, 6);
+        let b: i32x4 = i32x4::new(1, 3, 5, 7);
+        let e: i32x4 = i32x4::new(0, 1, 4, 5);
+        let r: i32x4 = transmute(vtrn1q_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_u8() {
+        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: u8x8 = u8x8::new(0, 1, 4, 5, 8, 9, 12, 13);
+        let r: u8x8 = transmute(vtrn1_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_u8() {
+        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: u8x16 = u8x16::new(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29);
+        let r: u8x16 = transmute(vtrn1q_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_u16() {
+        let a: u16x4 = u16x4::new(0, 2, 4, 6);
+        let b: u16x4 = u16x4::new(1, 3, 5, 7);
+        let e: u16x4 = u16x4::new(0, 1, 4, 5);
+        let r: u16x4 = transmute(vtrn1_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_u16() {
+        let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: u16x8 = u16x8::new(0, 1, 4, 5, 8, 9, 12, 13);
+        let r: u16x8 = transmute(vtrn1q_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_u32() {
+        let a: u32x4 = u32x4::new(0, 2, 4, 6);
+        let b: u32x4 = u32x4::new(1, 3, 5, 7);
+        let e: u32x4 = u32x4::new(0, 1, 4, 5);
+        let r: u32x4 = transmute(vtrn1q_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_p8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i8x8 = i8x8::new(0, 1, 4, 5, 8, 9, 12, 13);
+        let r: i8x8 = transmute(vtrn1_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_p8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: i8x16 = i8x16::new(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29);
+        let r: i8x16 = transmute(vtrn1q_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_p16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: i16x4 = i16x4::new(0, 1, 4, 5);
+        let r: i16x4 = transmute(vtrn1_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_p16() {
+        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i16x8 = i16x8::new(0, 1, 4, 5, 8, 9, 12, 13);
+        let r: i16x8 = transmute(vtrn1q_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_s32() {
+        let a: i32x2 = i32x2::new(0, 2);
+        let b: i32x2 = i32x2::new(1, 3);
+        let e: i32x2 = i32x2::new(0, 1);
+        let r: i32x2 = transmute(vtrn1_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_s64() {
+        let a: i64x2 = i64x2::new(0, 2);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vtrn1q_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_u32() {
+        let a: u32x2 = u32x2::new(0, 2);
+        let b: u32x2 = u32x2::new(1, 3);
+        let e: u32x2 = u32x2::new(0, 1);
+        let r: u32x2 = transmute(vtrn1_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_u64() {
+        let a: u64x2 = u64x2::new(0, 2);
+        let b: u64x2 = u64x2::new(1, 3);
+        let e: u64x2 = u64x2::new(0, 1);
+        let r: u64x2 = transmute(vtrn1q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_p64() {
+        let a: i64x2 = i64x2::new(0, 2);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vtrn1q_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_f32() {
+        let a: f32x4 = f32x4::new(0., 2., 4., 6.);
+        let b: f32x4 = f32x4::new(1., 3., 5., 7.);
+        let e: f32x4 = f32x4::new(0., 1., 4., 5.);
+        let r: f32x4 = transmute(vtrn1q_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1_f32() {
+        let a: f32x2 = f32x2::new(0., 2.);
+        let b: f32x2 = f32x2::new(1., 3.);
+        let e: f32x2 = f32x2::new(0., 1.);
+        let r: f32x2 = transmute(vtrn1_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn1q_f64() {
+        let a: f64x2 = f64x2::new(0., 2.);
+        let b: f64x2 = f64x2::new(1., 3.);
+        let e: f64x2 = f64x2::new(0., 1.);
+        let r: f64x2 = transmute(vtrn1q_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_s8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i8x8 = i8x8::new(2, 3, 6, 7, 10, 11, 14, 15);
+        let r: i8x8 = transmute(vtrn2_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_s8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: i8x16 = i8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
+        let r: i8x16 = transmute(vtrn2q_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_s16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: i16x4 = i16x4::new(2, 3, 6, 7);
+        let r: i16x4 = transmute(vtrn2_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_s16() {
+        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i16x8 = i16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
+        let r: i16x8 = transmute(vtrn2q_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_s32() {
+        let a: i32x4 = i32x4::new(0, 2, 4, 6);
+        let b: i32x4 = i32x4::new(1, 3, 5, 7);
+        let e: i32x4 = i32x4::new(2, 3, 6, 7);
+        let r: i32x4 = transmute(vtrn2q_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_u8() {
+        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: u8x8 = u8x8::new(2, 3, 6, 7, 10, 11, 14, 15);
+        let r: u8x8 = transmute(vtrn2_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_u8() {
+        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: u8x16 = u8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
+        let r: u8x16 = transmute(vtrn2q_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_u16() {
+        let a: u16x4 = u16x4::new(0, 2, 4, 6);
+        let b: u16x4 = u16x4::new(1, 3, 5, 7);
+        let e: u16x4 = u16x4::new(2, 3, 6, 7);
+        let r: u16x4 = transmute(vtrn2_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_u16() {
+        let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: u16x8 = u16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
+        let r: u16x8 = transmute(vtrn2q_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_u32() {
+        let a: u32x4 = u32x4::new(0, 2, 4, 6);
+        let b: u32x4 = u32x4::new(1, 3, 5, 7);
+        let e: u32x4 = u32x4::new(2, 3, 6, 7);
+        let r: u32x4 = transmute(vtrn2q_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_p8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i8x8 = i8x8::new(2, 3, 6, 7, 10, 11, 14, 15);
+        let r: i8x8 = transmute(vtrn2_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_p8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: i8x16 = i8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
+        let r: i8x16 = transmute(vtrn2q_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_p16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: i16x4 = i16x4::new(2, 3, 6, 7);
+        let r: i16x4 = transmute(vtrn2_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_p16() {
+        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i16x8 = i16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
+        let r: i16x8 = transmute(vtrn2q_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_s32() {
+        let a: i32x2 = i32x2::new(0, 2);
+        let b: i32x2 = i32x2::new(1, 3);
+        let e: i32x2 = i32x2::new(2, 3);
+        let r: i32x2 = transmute(vtrn2_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_s64() {
+        let a: i64x2 = i64x2::new(0, 2);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: i64x2 = i64x2::new(2, 3);
+        let r: i64x2 = transmute(vtrn2q_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_u32() {
+        let a: u32x2 = u32x2::new(0, 2);
+        let b: u32x2 = u32x2::new(1, 3);
+        let e: u32x2 = u32x2::new(2, 3);
+        let r: u32x2 = transmute(vtrn2_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_u64() {
+        let a: u64x2 = u64x2::new(0, 2);
+        let b: u64x2 = u64x2::new(1, 3);
+        let e: u64x2 = u64x2::new(2, 3);
+        let r: u64x2 = transmute(vtrn2q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_p64() {
+        let a: i64x2 = i64x2::new(0, 2);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: i64x2 = i64x2::new(2, 3);
+        let r: i64x2 = transmute(vtrn2q_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_f32() {
+        let a: f32x4 = f32x4::new(0., 2., 4., 6.);
+        let b: f32x4 = f32x4::new(1., 3., 5., 7.);
+        let e: f32x4 = f32x4::new(2., 3., 6., 7.);
+        let r: f32x4 = transmute(vtrn2q_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2_f32() {
+        let a: f32x2 = f32x2::new(0., 2.);
+        let b: f32x2 = f32x2::new(1., 3.);
+        let e: f32x2 = f32x2::new(2., 3.);
+        let r: f32x2 = transmute(vtrn2_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vtrn2q_f64() {
+        let a: f64x2 = f64x2::new(0., 2.);
+        let b: f64x2 = f64x2::new(1., 3.);
+        let e: f64x2 = f64x2::new(2., 3.);
+        let r: f64x2 = transmute(vtrn2q_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_s8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i8x8 = transmute(vzip1_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_s8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: i8x16 = transmute(vzip1q_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_s16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: i16x4 = i16x4::new(0, 1, 2, 3);
+        let r: i16x4 = transmute(vzip1_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_s16() {
+        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i16x8 = transmute(vzip1q_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_s32() {
+        let a: i32x2 = i32x2::new(0, 2);
+        let b: i32x2 = i32x2::new(1, 3);
+        let e: i32x2 = i32x2::new(0, 1);
+        let r: i32x2 = transmute(vzip1_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_s32() {
+        let a: i32x4 = i32x4::new(0, 2, 4, 6);
+        let b: i32x4 = i32x4::new(1, 3, 5, 7);
+        let e: i32x4 = i32x4::new(0, 1, 2, 3);
+        let r: i32x4 = transmute(vzip1q_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_s64() {
+        let a: i64x2 = i64x2::new(0, 2);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vzip1q_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_u8() {
+        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: u8x8 = transmute(vzip1_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_u8() {
+        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: u8x16 = transmute(vzip1q_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_u16() {
+        let a: u16x4 = u16x4::new(0, 2, 4, 6);
+        let b: u16x4 = u16x4::new(1, 3, 5, 7);
+        let e: u16x4 = u16x4::new(0, 1, 2, 3);
+        let r: u16x4 = transmute(vzip1_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_u16() {
+        let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: u16x8 = transmute(vzip1q_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_u32() {
+        let a: u32x2 = u32x2::new(0, 2);
+        let b: u32x2 = u32x2::new(1, 3);
+        let e: u32x2 = u32x2::new(0, 1);
+        let r: u32x2 = transmute(vzip1_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_u32() {
+        let a: u32x4 = u32x4::new(0, 2, 4, 6);
+        let b: u32x4 = u32x4::new(1, 3, 5, 7);
+        let e: u32x4 = u32x4::new(0, 1, 2, 3);
+        let r: u32x4 = transmute(vzip1q_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_u64() {
+        let a: u64x2 = u64x2::new(0, 2);
+        let b: u64x2 = u64x2::new(1, 3);
+        let e: u64x2 = u64x2::new(0, 1);
+        let r: u64x2 = transmute(vzip1q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_p8() {
+        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i8x8 = transmute(vzip1_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_p8() {
+        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r: i8x16 = transmute(vzip1q_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_p16() {
+        let a: i16x4 = i16x4::new(0, 2, 4, 6);
+        let b: i16x4 = i16x4::new(1, 3, 5, 7);
+        let e: i16x4 = i16x4::new(0, 1, 2, 3);
+        let r: i16x4 = transmute(vzip1_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_p16() {
+        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
+        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
+        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r: i16x8 = transmute(vzip1q_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_p64() {
+        let a: i64x2 = i64x2::new(0, 2);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: i64x2 = i64x2::new(0, 1);
+        let r: i64x2 = transmute(vzip1q_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1_f32() {
+        let a: f32x2 = f32x2::new(0., 2.);
+        let b: f32x2 = f32x2::new(1., 3.);
+        let e: f32x2 = f32x2::new(0., 1.);
+        let r: f32x2 = transmute(vzip1_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_f32() {
+        let a: f32x4 = f32x4::new(0., 2., 4., 6.);
+        let b: f32x4 = f32x4::new(1., 3., 5., 7.);
+        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
+        let r: f32x4 = transmute(vzip1q_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip1q_f64() {
+        let a: f64x2 = f64x2::new(0., 2.);
+        let b: f64x2 = f64x2::new(1., 3.);
+        let e: f64x2 = f64x2::new(0., 1.);
+        let r: f64x2 = transmute(vzip1q_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_s8() {
+        let a: i8x8 = i8x8::new(0, 16, 16, 18, 16, 18, 20, 22);
+        let b: i8x8 = i8x8::new(1, 17, 17, 19, 17, 19, 21, 23);
+        let e: i8x8 = i8x8::new(16, 17, 18, 19, 20, 21, 22, 23);
+        let r: i8x8 = transmute(vzip2_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_s8() {
+        let a: i8x16 = i8x16::new(0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: i8x16 = i8x16::new(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+        let r: i8x16 = transmute(vzip2q_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_s16() {
+        let a: i16x4 = i16x4::new(0, 16, 16, 18);
+        let b: i16x4 = i16x4::new(1, 17, 17, 19);
+        let e: i16x4 = i16x4::new(16, 17, 18, 19);
+        let r: i16x4 = transmute(vzip2_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_s16() {
+        let a: i16x8 = i16x8::new(0, 16, 16, 18, 16, 18, 20, 22);
+        let b: i16x8 = i16x8::new(1, 17, 17, 19, 17, 19, 21, 23);
+        let e: i16x8 = i16x8::new(16, 17, 18, 19, 20, 21, 22, 23);
+        let r: i16x8 = transmute(vzip2q_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_s32() {
+        let a: i32x2 = i32x2::new(0, 16);
+        let b: i32x2 = i32x2::new(1, 17);
+        let e: i32x2 = i32x2::new(16, 17);
+        let r: i32x2 = transmute(vzip2_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_s32() {
+        let a: i32x4 = i32x4::new(0, 16, 16, 18);
+        let b: i32x4 = i32x4::new(1, 17, 17, 19);
+        let e: i32x4 = i32x4::new(16, 17, 18, 19);
+        let r: i32x4 = transmute(vzip2q_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_s64() {
+        let a: i64x2 = i64x2::new(0, 16);
+        let b: i64x2 = i64x2::new(1, 17);
+        let e: i64x2 = i64x2::new(16, 17);
+        let r: i64x2 = transmute(vzip2q_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_u8() {
+        let a: u8x8 = u8x8::new(0, 16, 16, 18, 16, 18, 20, 22);
+        let b: u8x8 = u8x8::new(1, 17, 17, 19, 17, 19, 21, 23);
+        let e: u8x8 = u8x8::new(16, 17, 18, 19, 20, 21, 22, 23);
+        let r: u8x8 = transmute(vzip2_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_u8() {
+        let a: u8x16 = u8x16::new(0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: u8x16 = u8x16::new(1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: u8x16 = u8x16::new(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+        let r: u8x16 = transmute(vzip2q_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_u16() {
+        let a: u16x4 = u16x4::new(0, 16, 16, 18);
+        let b: u16x4 = u16x4::new(1, 17, 17, 19);
+        let e: u16x4 = u16x4::new(16, 17, 18, 19);
+        let r: u16x4 = transmute(vzip2_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_u16() {
+        let a: u16x8 = u16x8::new(0, 16, 16, 18, 16, 18, 20, 22);
+        let b: u16x8 = u16x8::new(1, 17, 17, 19, 17, 19, 21, 23);
+        let e: u16x8 = u16x8::new(16, 17, 18, 19, 20, 21, 22, 23);
+        let r: u16x8 = transmute(vzip2q_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_u32() {
+        let a: u32x2 = u32x2::new(0, 16);
+        let b: u32x2 = u32x2::new(1, 17);
+        let e: u32x2 = u32x2::new(16, 17);
+        let r: u32x2 = transmute(vzip2_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_u32() {
+        let a: u32x4 = u32x4::new(0, 16, 16, 18);
+        let b: u32x4 = u32x4::new(1, 17, 17, 19);
+        let e: u32x4 = u32x4::new(16, 17, 18, 19);
+        let r: u32x4 = transmute(vzip2q_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_u64() {
+        let a: u64x2 = u64x2::new(0, 16);
+        let b: u64x2 = u64x2::new(1, 17);
+        let e: u64x2 = u64x2::new(16, 17);
+        let r: u64x2 = transmute(vzip2q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_p8() {
+        let a: i8x8 = i8x8::new(0, 16, 16, 18, 16, 18, 20, 22);
+        let b: i8x8 = i8x8::new(1, 17, 17, 19, 17, 19, 21, 23);
+        let e: i8x8 = i8x8::new(16, 17, 18, 19, 20, 21, 22, 23);
+        let r: i8x8 = transmute(vzip2_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_p8() {
+        let a: i8x16 = i8x16::new(0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30);
+        let b: i8x16 = i8x16::new(1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31);
+        let e: i8x16 = i8x16::new(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+        let r: i8x16 = transmute(vzip2q_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_p16() {
+        let a: i16x4 = i16x4::new(0, 16, 16, 18);
+        let b: i16x4 = i16x4::new(1, 17, 17, 19);
+        let e: i16x4 = i16x4::new(16, 17, 18, 19);
+        let r: i16x4 = transmute(vzip2_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_p16() {
+        let a: i16x8 = i16x8::new(0, 16, 16, 18, 16, 18, 20, 22);
+        let b: i16x8 = i16x8::new(1, 17, 17, 19, 17, 19, 21, 23);
+        let e: i16x8 = i16x8::new(16, 17, 18, 19, 20, 21, 22, 23);
+        let r: i16x8 = transmute(vzip2q_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_p64() {
+        let a: i64x2 = i64x2::new(0, 16);
+        let b: i64x2 = i64x2::new(1, 17);
+        let e: i64x2 = i64x2::new(16, 17);
+        let r: i64x2 = transmute(vzip2q_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2_f32() {
+        let a: f32x2 = f32x2::new(0., 8.);
+        let b: f32x2 = f32x2::new(1., 9.);
+        let e: f32x2 = f32x2::new(8., 9.);
+        let r: f32x2 = transmute(vzip2_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_f32() {
+        let a: f32x4 = f32x4::new(0., 8., 8., 10.);
+        let b: f32x4 = f32x4::new(1., 9., 9., 11.);
+        let e: f32x4 = f32x4::new(8., 9., 10., 11.);
+        let r: f32x4 = transmute(vzip2q_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vzip2q_f64() {
+        let a: f64x2 = f64x2::new(0., 8.);
+        let b: f64x2 = f64x2::new(1., 9.);
+        let e: f64x2 = f64x2::new(8., 9.);
+        let r: f64x2 = transmute(vzip2q_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_s8() {
+        let a: i8x8 = i8x8::new(1, 0, 2, 0, 2, 0, 3, 0);
+        let b: i8x8 = i8x8::new(2, 0, 3, 0, 7, 0, 8, 0);
+        let e: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 7, 8);
+        let r: i8x8 = transmute(vuzp1_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_s8() {
+        let a: i8x16 = i8x16::new(1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0);
+        let b: i8x16 = i8x16::new(2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0);
+        let e: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vuzp1q_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_s16() {
+        let a: i16x4 = i16x4::new(1, 0, 2, 0);
+        let b: i16x4 = i16x4::new(2, 0, 3, 0);
+        let e: i16x4 = i16x4::new(1, 2, 2, 3);
+        let r: i16x4 = transmute(vuzp1_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_s16() {
+        let a: i16x8 = i16x8::new(1, 0, 2, 0, 2, 0, 3, 0);
+        let b: i16x8 = i16x8::new(2, 0, 3, 0, 7, 0, 8, 0);
+        let e: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 7, 8);
+        let r: i16x8 = transmute(vuzp1q_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_s32() {
+        let a: i32x4 = i32x4::new(1, 0, 2, 0);
+        let b: i32x4 = i32x4::new(2, 0, 3, 0);
+        let e: i32x4 = i32x4::new(1, 2, 2, 3);
+        let r: i32x4 = transmute(vuzp1q_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_u8() {
+        let a: u8x8 = u8x8::new(1, 0, 2, 0, 2, 0, 3, 0);
+        let b: u8x8 = u8x8::new(2, 0, 3, 0, 7, 0, 8, 0);
+        let e: u8x8 = u8x8::new(1, 2, 2, 3, 2, 3, 7, 8);
+        let r: u8x8 = transmute(vuzp1_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_u8() {
+        let a: u8x16 = u8x16::new(1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0);
+        let b: u8x16 = u8x16::new(2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0);
+        let e: u8x16 = u8x16::new(1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16);
+        let r: u8x16 = transmute(vuzp1q_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_u16() {
+        let a: u16x4 = u16x4::new(1, 0, 2, 0);
+        let b: u16x4 = u16x4::new(2, 0, 3, 0);
+        let e: u16x4 = u16x4::new(1, 2, 2, 3);
+        let r: u16x4 = transmute(vuzp1_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_u16() {
+        let a: u16x8 = u16x8::new(1, 0, 2, 0, 2, 0, 3, 0);
+        let b: u16x8 = u16x8::new(2, 0, 3, 0, 7, 0, 8, 0);
+        let e: u16x8 = u16x8::new(1, 2, 2, 3, 2, 3, 7, 8);
+        let r: u16x8 = transmute(vuzp1q_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_u32() {
+        let a: u32x4 = u32x4::new(1, 0, 2, 0);
+        let b: u32x4 = u32x4::new(2, 0, 3, 0);
+        let e: u32x4 = u32x4::new(1, 2, 2, 3);
+        let r: u32x4 = transmute(vuzp1q_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_p8() {
+        let a: i8x8 = i8x8::new(1, 0, 2, 0, 2, 0, 3, 0);
+        let b: i8x8 = i8x8::new(2, 0, 3, 0, 7, 0, 8, 0);
+        let e: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 7, 8);
+        let r: i8x8 = transmute(vuzp1_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_p8() {
+        let a: i8x16 = i8x16::new(1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0);
+        let b: i8x16 = i8x16::new(2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0);
+        let e: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16);
+        let r: i8x16 = transmute(vuzp1q_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_p16() {
+        let a: i16x4 = i16x4::new(1, 0, 2, 0);
+        let b: i16x4 = i16x4::new(2, 0, 3, 0);
+        let e: i16x4 = i16x4::new(1, 2, 2, 3);
+        let r: i16x4 = transmute(vuzp1_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_p16() {
+        let a: i16x8 = i16x8::new(1, 0, 2, 0, 2, 0, 3, 0);
+        let b: i16x8 = i16x8::new(2, 0, 3, 0, 7, 0, 8, 0);
+        let e: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 7, 8);
+        let r: i16x8 = transmute(vuzp1q_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_s32() {
+        let a: i32x2 = i32x2::new(1, 0);
+        let b: i32x2 = i32x2::new(2, 0);
+        let e: i32x2 = i32x2::new(1, 2);
+        let r: i32x2 = transmute(vuzp1_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_s64() {
+        let a: i64x2 = i64x2::new(1, 0);
+        let b: i64x2 = i64x2::new(2, 0);
+        let e: i64x2 = i64x2::new(1, 2);
+        let r: i64x2 = transmute(vuzp1q_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_u32() {
+        let a: u32x2 = u32x2::new(1, 0);
+        let b: u32x2 = u32x2::new(2, 0);
+        let e: u32x2 = u32x2::new(1, 2);
+        let r: u32x2 = transmute(vuzp1_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_u64() {
+        let a: u64x2 = u64x2::new(1, 0);
+        let b: u64x2 = u64x2::new(2, 0);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vuzp1q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_p64() {
+        let a: i64x2 = i64x2::new(1, 0);
+        let b: i64x2 = i64x2::new(2, 0);
+        let e: i64x2 = i64x2::new(1, 2);
+        let r: i64x2 = transmute(vuzp1q_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_f32() {
+        let a: f32x4 = f32x4::new(0., 8., 1., 9.);
+        let b: f32x4 = f32x4::new(1., 10., 3., 11.);
+        let e: f32x4 = f32x4::new(0., 1., 1., 3.);
+        let r: f32x4 = transmute(vuzp1q_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1_f32() {
+        let a: f32x2 = f32x2::new(0., 8.);
+        let b: f32x2 = f32x2::new(1., 10.);
+        let e: f32x2 = f32x2::new(0., 1.);
+        let r: f32x2 = transmute(vuzp1_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp1q_f64() {
+        let a: f64x2 = f64x2::new(0., 8.);
+        let b: f64x2 = f64x2::new(1., 10.);
+        let e: f64x2 = f64x2::new(0., 1.);
+        let r: f64x2 = transmute(vuzp1q_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_s8() {
+        let a: i8x8 = i8x8::new(0, 17, 0, 18, 0, 18, 0, 19);
+        let b: i8x8 = i8x8::new(0, 18, 0, 19, 0, 23, 0, 24);
+        let e: i8x8 = i8x8::new(17, 18, 18, 19, 18, 19, 23, 24);
+        let r: i8x8 = transmute(vuzp2_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_s8() {
+        let a: i8x16 = i8x16::new(0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24);
+        let b: i8x16 = i8x16::new(0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32);
+        let e: i8x16 = i8x16::new(17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32);
+        let r: i8x16 = transmute(vuzp2q_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_s16() {
+        let a: i16x4 = i16x4::new(0, 17, 0, 18);
+        let b: i16x4 = i16x4::new(0, 18, 0, 19);
+        let e: i16x4 = i16x4::new(17, 18, 18, 19);
+        let r: i16x4 = transmute(vuzp2_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_s16() {
+        let a: i16x8 = i16x8::new(0, 17, 0, 18, 0, 18, 0, 19);
+        let b: i16x8 = i16x8::new(0, 18, 0, 19, 0, 23, 0, 24);
+        let e: i16x8 = i16x8::new(17, 18, 18, 19, 18, 19, 23, 24);
+        let r: i16x8 = transmute(vuzp2q_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_s32() {
+        let a: i32x4 = i32x4::new(0, 17, 0, 18);
+        let b: i32x4 = i32x4::new(0, 18, 0, 19);
+        let e: i32x4 = i32x4::new(17, 18, 18, 19);
+        let r: i32x4 = transmute(vuzp2q_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_u8() {
+        let a: u8x8 = u8x8::new(0, 17, 0, 18, 0, 18, 0, 19);
+        let b: u8x8 = u8x8::new(0, 18, 0, 19, 0, 23, 0, 24);
+        let e: u8x8 = u8x8::new(17, 18, 18, 19, 18, 19, 23, 24);
+        let r: u8x8 = transmute(vuzp2_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_u8() {
+        let a: u8x16 = u8x16::new(0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24);
+        let b: u8x16 = u8x16::new(0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32);
+        let e: u8x16 = u8x16::new(17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32);
+        let r: u8x16 = transmute(vuzp2q_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_u16() {
+        let a: u16x4 = u16x4::new(0, 17, 0, 18);
+        let b: u16x4 = u16x4::new(0, 18, 0, 19);
+        let e: u16x4 = u16x4::new(17, 18, 18, 19);
+        let r: u16x4 = transmute(vuzp2_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_u16() {
+        let a: u16x8 = u16x8::new(0, 17, 0, 18, 0, 18, 0, 19);
+        let b: u16x8 = u16x8::new(0, 18, 0, 19, 0, 23, 0, 24);
+        let e: u16x8 = u16x8::new(17, 18, 18, 19, 18, 19, 23, 24);
+        let r: u16x8 = transmute(vuzp2q_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_u32() {
+        let a: u32x4 = u32x4::new(0, 17, 0, 18);
+        let b: u32x4 = u32x4::new(0, 18, 0, 19);
+        let e: u32x4 = u32x4::new(17, 18, 18, 19);
+        let r: u32x4 = transmute(vuzp2q_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_p8() {
+        let a: i8x8 = i8x8::new(0, 17, 0, 18, 0, 18, 0, 19);
+        let b: i8x8 = i8x8::new(0, 18, 0, 19, 0, 23, 0, 24);
+        let e: i8x8 = i8x8::new(17, 18, 18, 19, 18, 19, 23, 24);
+        let r: i8x8 = transmute(vuzp2_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_p8() {
+        let a: i8x16 = i8x16::new(0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24);
+        let b: i8x16 = i8x16::new(0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32);
+        let e: i8x16 = i8x16::new(17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32);
+        let r: i8x16 = transmute(vuzp2q_p8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_p16() {
+        let a: i16x4 = i16x4::new(0, 17, 0, 18);
+        let b: i16x4 = i16x4::new(0, 18, 0, 19);
+        let e: i16x4 = i16x4::new(17, 18, 18, 19);
+        let r: i16x4 = transmute(vuzp2_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_p16() {
+        let a: i16x8 = i16x8::new(0, 17, 0, 18, 0, 18, 0, 19);
+        let b: i16x8 = i16x8::new(0, 18, 0, 19, 0, 23, 0, 24);
+        let e: i16x8 = i16x8::new(17, 18, 18, 19, 18, 19, 23, 24);
+        let r: i16x8 = transmute(vuzp2q_p16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_s32() {
+        let a: i32x2 = i32x2::new(0, 17);
+        let b: i32x2 = i32x2::new(0, 18);
+        let e: i32x2 = i32x2::new(17, 18);
+        let r: i32x2 = transmute(vuzp2_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_s64() {
+        let a: i64x2 = i64x2::new(0, 17);
+        let b: i64x2 = i64x2::new(0, 18);
+        let e: i64x2 = i64x2::new(17, 18);
+        let r: i64x2 = transmute(vuzp2q_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_u32() {
+        let a: u32x2 = u32x2::new(0, 17);
+        let b: u32x2 = u32x2::new(0, 18);
+        let e: u32x2 = u32x2::new(17, 18);
+        let r: u32x2 = transmute(vuzp2_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_u64() {
+        let a: u64x2 = u64x2::new(0, 17);
+        let b: u64x2 = u64x2::new(0, 18);
+        let e: u64x2 = u64x2::new(17, 18);
+        let r: u64x2 = transmute(vuzp2q_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_p64() {
+        let a: i64x2 = i64x2::new(0, 17);
+        let b: i64x2 = i64x2::new(0, 18);
+        let e: i64x2 = i64x2::new(17, 18);
+        let r: i64x2 = transmute(vuzp2q_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_f32() {
+        let a: f32x4 = f32x4::new(0., 8., 1., 9.);
+        let b: f32x4 = f32x4::new(2., 9., 3., 11.);
+        let e: f32x4 = f32x4::new(8., 9., 9., 11.);
+        let r: f32x4 = transmute(vuzp2q_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2_f32() {
+        let a: f32x2 = f32x2::new(0., 8.);
+        let b: f32x2 = f32x2::new(2., 9.);
+        let e: f32x2 = f32x2::new(8., 9.);
+        let r: f32x2 = transmute(vuzp2_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuzp2q_f64() {
+        let a: f64x2 = f64x2::new(0., 8.);
+        let b: f64x2 = f64x2::new(2., 9.);
+        let e: f64x2 = f64x2::new(8., 9.);
+        let r: f64x2 = transmute(vuzp2q_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabal_high_u8() {
+        let a: u16x8 = u16x8::new(9, 10, 11, 12, 13, 14, 15, 16);
+        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let c: u8x16 = u8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12);
+        let e: u16x8 = u16x8::new(20, 20, 20, 20, 20, 20, 20, 20);
+        let r: u16x8 = transmute(vabal_high_u8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabal_high_u16() {
+        let a: u32x4 = u32x4::new(9, 10, 11, 12);
+        let b: u16x8 = u16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
+        let c: u16x8 = u16x8::new(10, 10, 10, 10, 20, 0, 2, 4);
+        let e: u32x4 = u32x4::new(20, 20, 20, 20);
+        let r: u32x4 = transmute(vabal_high_u16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabal_high_u32() {
+        let a: u64x2 = u64x2::new(15, 16);
+        let b: u32x4 = u32x4::new(1, 2, 15, 16);
+        let c: u32x4 = u32x4::new(10, 10, 10, 12);
+        let e: u64x2 = u64x2::new(20, 20);
+        let r: u64x2 = transmute(vabal_high_u32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabal_high_s8() {
+        let a: i16x8 = i16x8::new(9, 10, 11, 12, 13, 14, 15, 16);
+        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let c: i8x16 = i8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12);
+        let e: i16x8 = i16x8::new(20, 20, 20, 20, 20, 20, 20, 20);
+        let r: i16x8 = transmute(vabal_high_s8(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabal_high_s16() {
+        let a: i32x4 = i32x4::new(9, 10, 11, 12);
+        let b: i16x8 = i16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
+        let c: i16x8 = i16x8::new(10, 10, 10, 10, 20, 0, 2, 4);
+        let e: i32x4 = i32x4::new(20, 20, 20, 20);
+        let r: i32x4 = transmute(vabal_high_s16(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabal_high_s32() {
+        let a: i64x2 = i64x2::new(15, 16);
+        let b: i32x4 = i32x4::new(1, 2, 15, 16);
+        let c: i32x4 = i32x4::new(10, 10, 10, 12);
+        let e: i64x2 = i64x2::new(20, 20);
+        let r: i64x2 = transmute(vabal_high_s32(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabs_s64() {
+        let a: i64x1 = i64x1::new(-9223372036854775808);
+        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x1 = transmute(vqabs_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabsq_s64() {
+        let a: i64x2 = i64x2::new(-9223372036854775808, -7);
+        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 7);
+        let r: i64x2 = transmute(vqabsq_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabsb_s8() {
+        let a: i8 = -7;
+        let e: i8 = 7;
+        let r: i8 = transmute(vqabsb_s8(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabsh_s16() {
+        let a: i16 = -7;
+        let e: i16 = 7;
+        let r: i16 = transmute(vqabsh_s16(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabss_s32() {
+        let a: i32 = -7;
+        let e: i32 = 7;
+        let r: i32 = transmute(vqabss_s32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vqabsd_s64() {
+        let a: i64 = -7;
+        let e: i64 = 7;
+        let r: i64 = transmute(vqabsd_s64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vslid_n_s64() {
+        let a: i64 = 333;
+        let b: i64 = 2042;
+        let e: i64 = 8169;
+        let r: i64 = transmute(vslid_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vslid_n_u64() {
+        let a: u64 = 333;
+        let b: u64 = 2042;
+        let e: u64 = 8169;
+        let r: u64 = transmute(vslid_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsrid_n_s64() {
+        let a: i64 = 333;
+        let b: i64 = 2042;
+        let e: i64 = 510;
+        let r: i64 = transmute(vsrid_n_s64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsrid_n_u64() {
+        let a: u64 = 333;
+        let b: u64 = 2042;
+        let e: u64 = 510;
+        let r: u64 = transmute(vsrid_n_u64::<2>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+}
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
new file mode 100644
index 000000000..65ba527ee
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -0,0 +1,5440 @@
+//! ARMv8 ASIMD intrinsics
+
+#![allow(non_camel_case_types)]
+
+#[rustfmt::skip]
+mod generated;
+#[rustfmt::skip]
+pub use self::generated::*;
+
+// FIXME: replace neon with asimd
+
+use crate::{
+    core_arch::{arm_shared::*, simd::*, simd_llvm::*},
+    hint::unreachable_unchecked,
+    mem::{transmute, zeroed},
+    ptr::{read_unaligned, write_unaligned},
+};
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+types! {
+    /// ARM-specific 64-bit wide vector of one packed `f64`.
+    #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+    pub struct float64x1_t(f64); // FIXME: check this!
+    /// ARM-specific 128-bit wide vector of two packed `f64`.
+    #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+    pub struct float64x2_t(f64, f64);
+}
+
+/// ARM-specific type containing two `float64x1_t` vectors.
+#[derive(Copy, Clone, Debug)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
+/// ARM-specific type containing three `float64x1_t` vectors.
+#[derive(Copy, Clone, Debug)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
+/// ARM-specific type containing four `float64x1_t` vectors.
+#[derive(Copy, Clone, Debug)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub struct float64x1x4_t(
+    pub float64x1_t,
+    pub float64x1_t,
+    pub float64x1_t,
+    pub float64x1_t,
+);
+
+/// ARM-specific type containing two `float64x2_t` vectors.
+#[derive(Copy, Clone, Debug)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
+/// ARM-specific type containing three `float64x2_t` vectors.
+#[derive(Copy, Clone, Debug)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
+/// ARM-specific type containing four `float64x2_t` vectors.
+#[derive(Copy, Clone, Debug)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub struct float64x2x4_t(
+    pub float64x2_t,
+    pub float64x2_t,
+    pub float64x2_t,
+    pub float64x2_t,
+);
+
+#[allow(improper_ctypes)]
+extern "unadjusted" {
+    // absolute value
+    #[link_name = "llvm.aarch64.neon.abs.i64"]
+    fn vabsd_s64_(a: i64) -> i64;
+    #[link_name = "llvm.aarch64.neon.abs.v1i64"]
+    fn vabs_s64_(a: int64x1_t) -> int64x1_t;
+    #[link_name = "llvm.aarch64.neon.abs.v2i64"]
+    fn vabsq_s64_(a: int64x2_t) -> int64x2_t;
+
+    #[link_name = "llvm.aarch64.neon.suqadd.v8i8"]
+    fn vuqadd_s8_(a: int8x8_t, b: uint8x8_t) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.suqadd.v16i8"]
+    fn vuqaddq_s8_(a: int8x16_t, b: uint8x16_t) -> int8x16_t;
+    #[link_name = "llvm.aarch64.neon.suqadd.v4i16"]
+    fn vuqadd_s16_(a: int16x4_t, b: uint16x4_t) -> int16x4_t;
+    #[link_name = "llvm.aarch64.neon.suqadd.v8i16"]
+    fn vuqaddq_s16_(a: int16x8_t, b: uint16x8_t) -> int16x8_t;
+    #[link_name = "llvm.aarch64.neon.suqadd.v2i32"]
+    fn vuqadd_s32_(a: int32x2_t, b: uint32x2_t) -> int32x2_t;
+    #[link_name = "llvm.aarch64.neon.suqadd.v4i32"]
+    fn vuqaddq_s32_(a: int32x4_t, b: uint32x4_t) -> int32x4_t;
+    #[link_name = "llvm.aarch64.neon.suqadd.v1i64"]
+    fn vuqadd_s64_(a: int64x1_t, b: uint64x1_t) -> int64x1_t;
+    #[link_name = "llvm.aarch64.neon.suqadd.v2i64"]
+    fn vuqaddq_s64_(a: int64x2_t, b: uint64x2_t) -> int64x2_t;
+
+    #[link_name = "llvm.aarch64.neon.usqadd.v8i8"]
+    fn vsqadd_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+    #[link_name = "llvm.aarch64.neon.usqadd.v16i8"]
+    fn vsqaddq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+    #[link_name = "llvm.aarch64.neon.usqadd.v4i16"]
+    fn vsqadd_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+    #[link_name = "llvm.aarch64.neon.usqadd.v8i16"]
+    fn vsqaddq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+    #[link_name = "llvm.aarch64.neon.usqadd.v2i32"]
+    fn vsqadd_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+    #[link_name = "llvm.aarch64.neon.usqadd.v4i32"]
+    fn vsqaddq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+    #[link_name = "llvm.aarch64.neon.usqadd.v1i64"]
+    fn vsqadd_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+    #[link_name = "llvm.aarch64.neon.usqadd.v2i64"]
+    fn vsqaddq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+
+    #[link_name = "llvm.aarch64.neon.addp.v8i16"]
+    fn vpaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    #[link_name = "llvm.aarch64.neon.addp.v4i32"]
+    fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    #[link_name = "llvm.aarch64.neon.addp.v2i64"]
+    fn vpaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    #[link_name = "llvm.aarch64.neon.addp.v16i8"]
+    fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.saddv.i32.v4i16"]
+    fn vaddv_s16_(a: int16x4_t) -> i16;
+    #[link_name = "llvm.aarch64.neon.saddv.i32.v2i32"]
+    fn vaddv_s32_(a: int32x2_t) -> i32;
+    #[link_name = "llvm.aarch64.neon.saddv.i32.v8i8"]
+    fn vaddv_s8_(a: int8x8_t) -> i8;
+    #[link_name = "llvm.aarch64.neon.uaddv.i32.v4i16"]
+    fn vaddv_u16_(a: uint16x4_t) -> u16;
+    #[link_name = "llvm.aarch64.neon.uaddv.i32.v2i32"]
+    fn vaddv_u32_(a: uint32x2_t) -> u32;
+    #[link_name = "llvm.aarch64.neon.uaddv.i32.v8i8"]
+    fn vaddv_u8_(a: uint8x8_t) -> u8;
+    #[link_name = "llvm.aarch64.neon.saddv.i32.v8i16"]
+    fn vaddvq_s16_(a: int16x8_t) -> i16;
+    #[link_name = "llvm.aarch64.neon.saddv.i32.v4i32"]
+    fn vaddvq_s32_(a: int32x4_t) -> i32;
+    #[link_name = "llvm.aarch64.neon.saddv.i32.v16i8"]
+    fn vaddvq_s8_(a: int8x16_t) -> i8;
+    #[link_name = "llvm.aarch64.neon.uaddv.i32.v8i16"]
+    fn vaddvq_u16_(a: uint16x8_t) -> u16;
+    #[link_name = "llvm.aarch64.neon.uaddv.i32.v4i32"]
+    fn vaddvq_u32_(a: uint32x4_t) -> u32;
+    #[link_name = "llvm.aarch64.neon.uaddv.i32.v16i8"]
+    fn vaddvq_u8_(a: uint8x16_t) -> u8;
+    #[link_name = "llvm.aarch64.neon.saddv.i64.v2i64"]
+    fn vaddvq_s64_(a: int64x2_t) -> i64;
+    #[link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"]
+    fn vaddvq_u64_(a: uint64x2_t) -> u64;
+
+    #[link_name = "llvm.aarch64.neon.saddlv.i32.v8i8"]
+    fn vaddlv_s8_(a: int8x8_t) -> i32;
+    #[link_name = "llvm.aarch64.neon.uaddlv.i32.v8i8"]
+    fn vaddlv_u8_(a: uint8x8_t) -> u32;
+    #[link_name = "llvm.aarch64.neon.saddlv.i32.v16i8"]
+    fn vaddlvq_s8_(a: int8x16_t) -> i32;
+    #[link_name = "llvm.aarch64.neon.uaddlv.i32.v16i8"]
+    fn vaddlvq_u8_(a: uint8x16_t) -> u32;
+
+    #[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"]
+    fn vmaxv_s8_(a: int8x8_t) -> i8;
+    #[link_name = "llvm.aarch64.neon.smaxv.i8.6i8"]
+    fn vmaxvq_s8_(a: int8x16_t) -> i8;
+    #[link_name = "llvm.aarch64.neon.smaxv.i16.v4i16"]
+    fn vmaxv_s16_(a: int16x4_t) -> i16;
+    #[link_name = "llvm.aarch64.neon.smaxv.i16.v8i16"]
+    fn vmaxvq_s16_(a: int16x8_t) -> i16;
+    #[link_name = "llvm.aarch64.neon.smaxv.i32.v2i32"]
+    fn vmaxv_s32_(a: int32x2_t) -> i32;
+    #[link_name = "llvm.aarch64.neon.smaxv.i32.v4i32"]
+    fn vmaxvq_s32_(a: int32x4_t) -> i32;
+
+    #[link_name = "llvm.aarch64.neon.umaxv.i8.v8i8"]
+    fn vmaxv_u8_(a: uint8x8_t) -> u8;
+    #[link_name = "llvm.aarch64.neon.umaxv.i8.6i8"]
+    fn vmaxvq_u8_(a: uint8x16_t) -> u8;
+    #[link_name = "llvm.aarch64.neon.umaxv.i16.v4i16"]
+    fn vmaxv_u16_(a: uint16x4_t) -> u16;
+    #[link_name = "llvm.aarch64.neon.umaxv.i16.v8i16"]
+    fn vmaxvq_u16_(a: uint16x8_t) -> u16;
+    #[link_name = "llvm.aarch64.neon.umaxv.i32.v2i32"]
+    fn vmaxv_u32_(a: uint32x2_t) -> u32;
+    #[link_name = "llvm.aarch64.neon.umaxv.i32.v4i32"]
+    fn vmaxvq_u32_(a: uint32x4_t) -> u32;
+
+    #[link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"]
+    fn vmaxv_f32_(a: float32x2_t) -> f32;
+    #[link_name = "llvm.aarch64.neon.fmaxv.f32.v4f32"]
+    fn vmaxvq_f32_(a: float32x4_t) -> f32;
+    #[link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"]
+    fn vmaxvq_f64_(a: float64x2_t) -> f64;
+
+    #[link_name = "llvm.aarch64.neon.sminv.i8.v8i8"]
+    fn vminv_s8_(a: int8x8_t) -> i8;
+    #[link_name = "llvm.aarch64.neon.sminv.i8.6i8"]
+    fn vminvq_s8_(a: int8x16_t) -> i8;
+    #[link_name = "llvm.aarch64.neon.sminv.i16.v4i16"]
+    fn vminv_s16_(a: int16x4_t) -> i16;
+    #[link_name = "llvm.aarch64.neon.sminv.i16.v8i16"]
+    fn vminvq_s16_(a: int16x8_t) -> i16;
+    #[link_name = "llvm.aarch64.neon.sminv.i32.v2i32"]
+    fn vminv_s32_(a: int32x2_t) -> i32;
+    #[link_name = "llvm.aarch64.neon.sminv.i32.v4i32"]
+    fn vminvq_s32_(a: int32x4_t) -> i32;
+
+    #[link_name = "llvm.aarch64.neon.uminv.i8.v8i8"]
+    fn vminv_u8_(a: uint8x8_t) -> u8;
+    #[link_name = "llvm.aarch64.neon.uminv.i8.6i8"]
+    fn vminvq_u8_(a: uint8x16_t) -> u8;
+    #[link_name = "llvm.aarch64.neon.uminv.i16.v4i16"]
+    fn vminv_u16_(a: uint16x4_t) -> u16;
+    #[link_name = "llvm.aarch64.neon.uminv.i16.v8i16"]
+    fn vminvq_u16_(a: uint16x8_t) -> u16;
+    #[link_name = "llvm.aarch64.neon.uminv.i32.v2i32"]
+    fn vminv_u32_(a: uint32x2_t) -> u32;
+    #[link_name = "llvm.aarch64.neon.uminv.i32.v4i32"]
+    fn vminvq_u32_(a: uint32x4_t) -> u32;
+
+    #[link_name = "llvm.aarch64.neon.fminv.f32.v2f32"]
+    fn vminv_f32_(a: float32x2_t) -> f32;
+    #[link_name = "llvm.aarch64.neon.fminv.f32.v4f32"]
+    fn vminvq_f32_(a: float32x4_t) -> f32;
+    #[link_name = "llvm.aarch64.neon.fminv.f64.v2f64"]
+    fn vminvq_f64_(a: float64x2_t) -> f64;
+
+    #[link_name = "llvm.aarch64.neon.sminp.v16i8"]
+    fn vpminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    #[link_name = "llvm.aarch64.neon.sminp.v8i16"]
+    fn vpminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    #[link_name = "llvm.aarch64.neon.sminp.v4i32"]
+    fn vpminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    #[link_name = "llvm.aarch64.neon.uminp.v16i8"]
+    fn vpminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    #[link_name = "llvm.aarch64.neon.uminp.v8i16"]
+    fn vpminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    #[link_name = "llvm.aarch64.neon.uminp.v4i32"]
+    fn vpminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    #[link_name = "llvm.aarch64.neon.fminp.4f32"]
+    fn vpminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    #[link_name = "llvm.aarch64.neon.fminp.v2f64"]
+    fn vpminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+
+    #[link_name = "llvm.aarch64.neon.smaxp.v16i8"]
+    fn vpmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    #[link_name = "llvm.aarch64.neon.smaxp.v8i16"]
+    fn vpmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    #[link_name = "llvm.aarch64.neon.smaxp.v4i32"]
+    fn vpmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    #[link_name = "llvm.aarch64.neon.umaxp.v16i8"]
+    fn vpmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    #[link_name = "llvm.aarch64.neon.umaxp.v8i16"]
+    fn vpmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    #[link_name = "llvm.aarch64.neon.umaxp.v4i32"]
+    fn vpmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    #[link_name = "llvm.aarch64.neon.fmaxp.4f32"]
+    fn vpmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    #[link_name = "llvm.aarch64.neon.fmaxp.v2f64"]
+    fn vpmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+
+    #[link_name = "llvm.aarch64.neon.tbl1.v8i8"]
+    fn vqtbl1(a: int8x16_t, b: uint8x8_t) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.tbl1.v16i8"]
+    fn vqtbl1q(a: int8x16_t, b: uint8x16_t) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.tbx1.v8i8"]
+    fn vqtbx1(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.tbx1.v16i8"]
+    fn vqtbx1q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.tbl2.v8i8"]
+    fn vqtbl2(a0: int8x16_t, a1: int8x16_t, b: uint8x8_t) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.tbl2.v16i8"]
+    fn vqtbl2q(a0: int8x16_t, a1: int8x16_t, b: uint8x16_t) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.tbx2.v8i8"]
+    fn vqtbx2(a: int8x8_t, b0: int8x16_t, b1: int8x16_t, c: uint8x8_t) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.tbx2.v16i8"]
+    fn vqtbx2q(a: int8x16_t, b0: int8x16_t, b1: int8x16_t, c: uint8x16_t) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.tbl3.v8i8"]
+    fn vqtbl3(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, b: uint8x8_t) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.tbl3.v16i8"]
+    fn vqtbl3q(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, b: uint8x16_t) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.tbx3.v8i8"]
+    fn vqtbx3(a: int8x8_t, b0: int8x16_t, b1: int8x16_t, b2: int8x16_t, c: uint8x8_t) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.tbx3.v16i8"]
+    fn vqtbx3q(
+        a: int8x16_t,
+        b0: int8x16_t,
+        b1: int8x16_t,
+        b2: int8x16_t,
+        c: uint8x16_t,
+    ) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.tbl4.v8i8"]
+    fn vqtbl4(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, a3: int8x16_t, b: uint8x8_t)
+        -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.tbl4.v16i8"]
+    fn vqtbl4q(
+        a0: int8x16_t,
+        a1: int8x16_t,
+        a2: int8x16_t,
+        a3: int8x16_t,
+        b: uint8x16_t,
+    ) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.tbx4.v8i8"]
+    fn vqtbx4(
+        a: int8x8_t,
+        b0: int8x16_t,
+        b1: int8x16_t,
+        b2: int8x16_t,
+        b3: int8x16_t,
+        c: uint8x8_t,
+    ) -> int8x8_t;
+
+    #[link_name = "llvm.aarch64.neon.tbx4.v16i8"]
+    fn vqtbx4q(
+        a: int8x16_t,
+        b0: int8x16_t,
+        b1: int8x16_t,
+        b2: int8x16_t,
+        b3: int8x16_t,
+        c: uint8x16_t,
+    ) -> int8x16_t;
+
+    #[link_name = "llvm.aarch64.neon.vsli.v8i8"]
+    fn vsli_n_s8_(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.vsli.v16i8"]
+    fn vsliq_n_s8_(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
+    #[link_name = "llvm.aarch64.neon.vsli.v4i16"]
+    fn vsli_n_s16_(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
+    #[link_name = "llvm.aarch64.neon.vsli.v8i16"]
+    fn vsliq_n_s16_(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
+    #[link_name = "llvm.aarch64.neon.vsli.v2i32"]
+    fn vsli_n_s32_(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
+    #[link_name = "llvm.aarch64.neon.vsli.v4i32"]
+    fn vsliq_n_s32_(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
+    #[link_name = "llvm.aarch64.neon.vsli.v1i64"]
+    fn vsli_n_s64_(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
+    #[link_name = "llvm.aarch64.neon.vsli.v2i64"]
+    fn vsliq_n_s64_(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
+
+    #[link_name = "llvm.aarch64.neon.vsri.v8i8"]
+    fn vsri_n_s8_(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
+    #[link_name = "llvm.aarch64.neon.vsri.v16i8"]
+    fn vsriq_n_s8_(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
+    #[link_name = "llvm.aarch64.neon.vsri.v4i16"]
+    fn vsri_n_s16_(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
+    #[link_name = "llvm.aarch64.neon.vsri.v8i16"]
+    fn vsriq_n_s16_(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
+    #[link_name = "llvm.aarch64.neon.vsri.v2i32"]
+    fn vsri_n_s32_(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
+    #[link_name = "llvm.aarch64.neon.vsri.v4i32"]
+    fn vsriq_n_s32_(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
+    #[link_name = "llvm.aarch64.neon.vsri.v1i64"]
+    fn vsri_n_s64_(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
+    #[link_name = "llvm.aarch64.neon.vsri.v2i64"]
+    fn vsriq_n_s64_(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_s64<const N1: i32, const N2: i32>(
+    _a: int64x1_t,
+    b: int64x1_t,
+) -> int64x1_t {
+    static_assert!(N1 : i32 where N1 == 0);
+    static_assert!(N2 : i32 where N2 == 0);
+    b
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_u64<const N1: i32, const N2: i32>(
+    _a: uint64x1_t,
+    b: uint64x1_t,
+) -> uint64x1_t {
+    static_assert!(N1 : i32 where N1 == 0);
+    static_assert!(N2 : i32 where N2 == 0);
+    b
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_p64<const N1: i32, const N2: i32>(
+    _a: poly64x1_t,
+    b: poly64x1_t,
+) -> poly64x1_t {
+    static_assert!(N1 : i32 where N1 == 0);
+    static_assert!(N2 : i32 where N2 == 0);
+    b
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_f64<const N1: i32, const N2: i32>(
+    _a: float64x1_t,
+    b: float64x1_t,
+) -> float64x1_t {
+    static_assert!(N1 : i32 where N1 == 0);
+    static_assert!(N2 : i32 where N2 == 0);
+    b
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
+    _a: int64x1_t,
+    b: int64x2_t,
+) -> int64x1_t {
+    static_assert!(LANE1 : i32 where LANE1 == 0);
+    static_assert_imm1!(LANE2);
+    transmute::<i64, _>(simd_extract(b, LANE2 as u32))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
+    _a: uint64x1_t,
+    b: uint64x2_t,
+) -> uint64x1_t {
+    static_assert!(LANE1 : i32 where LANE1 == 0);
+    static_assert_imm1!(LANE2);
+    transmute::<u64, _>(simd_extract(b, LANE2 as u32))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
+    _a: poly64x1_t,
+    b: poly64x2_t,
+) -> poly64x1_t {
+    static_assert!(LANE1 : i32 where LANE1 == 0);
+    static_assert_imm1!(LANE2);
+    transmute::<u64, _>(simd_extract(b, LANE2 as u32))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
+    _a: float64x1_t,
+    b: float64x2_t,
+) -> float64x1_t {
+    static_assert!(LANE1 : i32 where LANE1 == 0);
+    static_assert_imm1!(LANE2);
+    transmute::<f64, _>(simd_extract(b, LANE2 as u32))
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
+    read_unaligned(ptr.cast())
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
+    vld1_f64(ptr)
+}
+
+/// Load multiple single-element structures to one, two, three, or four registers
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld1r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
+    let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
+    simd_shuffle2!(x, x, [0, 0])
+}
+
+/// Load one single-element structure to one lane of one register.
+#[inline]
+#[target_feature(enable = "neon")]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
+    static_assert!(LANE : i32 where LANE == 0);
+    simd_insert(src, LANE as u32, *ptr)
+}
+
+/// Load one single-element structure to one lane of one register.
+#[inline]
+#[target_feature(enable = "neon")]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
+    static_assert_imm1!(LANE);
+    simd_insert(src, LANE as u32, *ptr)
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+// Store multiple single-element structures from one, two, three, or four registers.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) {
+    write_unaligned(ptr.cast(), a);
+}
+
+/// Absolute Value (wrapping).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(abs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabsd_s64(a: i64) -> i64 {
+    vabsd_s64_(a)
+}
+/// Absolute Value (wrapping).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(abs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabs_s64(a: int64x1_t) -> int64x1_t {
+    vabs_s64_(a)
+}
+/// Absolute Value (wrapping).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(abs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
+    vabsq_s64_(a)
+}
+
+/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
+/// to the corresponding bit from the first source SIMD&FP register when the original
+/// destination bit was 1, otherwise from the second source SIMD&FP register.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(bsl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    simd_select(transmute::<_, int64x1_t>(a), b, c)
+}
+/// Bitwise Select.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(bsl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
+    simd_select(transmute::<_, int64x1_t>(a), b, c)
+}
+/// Bitwise Select. (128-bit)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(bsl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    simd_select(transmute::<_, int64x2_t>(a), b, c)
+}
+/// Bitwise Select. (128-bit)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(bsl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t {
+    simd_select(transmute::<_, int64x2_t>(a), b, c)
+}
+
+/// Signed saturating Accumulate of Unsigned value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t {
+    vuqadd_s8_(a, b)
+}
+/// Signed saturating Accumulate of Unsigned value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    vuqaddq_s8_(a, b)
+}
+/// Signed saturating Accumulate of Unsigned value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t {
+    vuqadd_s16_(a, b)
+}
+/// Signed saturating Accumulate of Unsigned value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t {
+    vuqaddq_s16_(a, b)
+}
+/// Signed saturating Accumulate of Unsigned value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t {
+    vuqadd_s32_(a, b)
+}
+/// Signed saturating Accumulate of Unsigned value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t {
+    vuqaddq_s32_(a, b)
+}
+/// Signed saturating Accumulate of Unsigned value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t {
+    vuqadd_s64_(a, b)
+}
+/// Signed saturating Accumulate of Unsigned value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
+    vuqaddq_s64_(a, b)
+}
+
+/// Unsigned saturating Accumulate of Signed value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    vsqadd_u8_(a, b)
+}
+/// Unsigned saturating Accumulate of Signed value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    vsqaddq_u8_(a, b)
+}
+/// Unsigned saturating Accumulate of Signed value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    vsqadd_u16_(a, b)
+}
+/// Unsigned saturating Accumulate of Signed value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    vsqaddq_u16_(a, b)
+}
+/// Unsigned saturating Accumulate of Signed value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    vsqadd_u32_(a, b)
+}
+/// Unsigned saturating Accumulate of Signed value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    vsqaddq_u32_(a, b)
+}
+/// Unsigned saturating Accumulate of Signed value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    vsqadd_u64_(a, b)
+}
+/// Unsigned saturating Accumulate of Signed value.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    vsqaddq_u64_(a, b)
+}
+
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    vpaddq_s16_(a, b)
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    transmute(vpaddq_s16_(transmute(a), transmute(b)))
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    vpaddq_s32_(a, b)
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    transmute(vpaddq_s32_(transmute(a), transmute(b)))
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    vpaddq_s64_(a, b)
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    transmute(vpaddq_s64_(transmute(a), transmute(b)))
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    vpaddq_s8_(a, b)
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    transmute(vpaddq_s8_(transmute(a), transmute(b)))
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddd_s64(a: int64x2_t) -> i64 {
+    transmute(vaddvq_u64_(transmute(a)))
+}
+/// Add pairwise
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpaddd_u64(a: uint64x2_t) -> u64 {
+    transmute(vaddvq_u64_(transmute(a)))
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddv_s16(a: int16x4_t) -> i16 {
+    vaddv_s16_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddv_s32(a: int32x2_t) -> i32 {
+    vaddv_s32_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddv_s8(a: int8x8_t) -> i8 {
+    vaddv_s8_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddv_u16(a: uint16x4_t) -> u16 {
+    vaddv_u16_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddv_u32(a: uint32x2_t) -> u32 {
+    vaddv_u32_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddv_u8(a: uint8x8_t) -> u8 {
+    vaddv_u8_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_s16(a: int16x8_t) -> i16 {
+    vaddvq_s16_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_s32(a: int32x4_t) -> i32 {
+    vaddvq_s32_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_s8(a: int8x16_t) -> i8 {
+    vaddvq_s8_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_u16(a: uint16x8_t) -> u16 {
+    vaddvq_u16_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_u32(a: uint32x4_t) -> u32 {
+    vaddvq_u32_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_u8(a: uint8x16_t) -> u8 {
+    vaddvq_u8_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_s64(a: int64x2_t) -> i64 {
+    vaddvq_s64_(a)
+}
+
+/// Add across vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(addp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddvq_u64(a: uint64x2_t) -> u64 {
+    vaddvq_u64_(a)
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlv_s8(a: int8x8_t) -> i16 {
+    vaddlv_s8_(a) as i16
+}
+
+/// Signed Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(saddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlvq_s8(a: int8x16_t) -> i16 {
+    vaddlvq_s8_(a) as i16
+}
+
+/// Unsigned Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uaddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlv_u8(a: uint8x8_t) -> u16 {
+    vaddlv_u8_(a) as u16
+}
+
+/// Unsigned Add Long across Vector
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uaddlv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddlvq_u8(a: uint8x16_t) -> u16 {
+    vaddlvq_u8_(a) as u16
+}
+
+/// Vector add.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_add(a, b)
+}
+
+/// Vector add.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_add(a, b)
+}
+
+/// Vector add.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(add))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_add(a, b)
+}
+
+/// Vector add.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(add))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_add(a, b)
+}
+
+/// Vector add.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(add))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
+    a.wrapping_add(b)
+}
+
+/// Vector add.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(add))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
+    a.wrapping_add(b)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxv_s8(a: int8x8_t) -> i8 {
+    vmaxv_s8_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxvq_s8(a: int8x16_t) -> i8 {
+    vmaxvq_s8_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxv_s16(a: int16x4_t) -> i16 {
+    vmaxv_s16_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxvq_s16(a: int16x8_t) -> i16 {
+    vmaxvq_s16_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxv_s32(a: int32x2_t) -> i32 {
+    vmaxv_s32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxvq_s32(a: int32x4_t) -> i32 {
+    vmaxvq_s32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxv_u8(a: uint8x8_t) -> u8 {
+    vmaxv_u8_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxvq_u8(a: uint8x16_t) -> u8 {
+    vmaxvq_u8_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxv_u16(a: uint16x4_t) -> u16 {
+    vmaxv_u16_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxvq_u16(a: uint16x8_t) -> u16 {
+    vmaxvq_u16_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxv_u32(a: uint32x2_t) -> u32 {
+    vmaxv_u32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxvq_u32(a: uint32x4_t) -> u32 {
+    vmaxvq_u32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxv_f32(a: float32x2_t) -> f32 {
+    vmaxv_f32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxvq_f32(a: float32x4_t) -> f32 {
+    vmaxvq_f32_(a)
+}
+
+/// Horizontal vector max.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmaxvq_f64(a: float64x2_t) -> f64 {
+    vmaxvq_f64_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminv_s8(a: int8x8_t) -> i8 {
+    vminv_s8_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminvq_s8(a: int8x16_t) -> i8 {
+    vminvq_s8_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminv_s16(a: int16x4_t) -> i16 {
+    vminv_s16_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminvq_s16(a: int16x8_t) -> i16 {
+    vminvq_s16_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminv_s32(a: int32x2_t) -> i32 {
+    vminv_s32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminvq_s32(a: int32x4_t) -> i32 {
+    vminvq_s32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminv_u8(a: uint8x8_t) -> u8 {
+    vminv_u8_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminvq_u8(a: uint8x16_t) -> u8 {
+    vminvq_u8_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminv_u16(a: uint16x4_t) -> u16 {
+    vminv_u16_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminvq_u16(a: uint16x8_t) -> u16 {
+    vminvq_u16_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminv_u32(a: uint32x2_t) -> u32 {
+    vminv_u32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminvq_u32(a: uint32x4_t) -> u32 {
+    vminvq_u32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminv_f32(a: float32x2_t) -> f32 {
+    vminv_f32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminvq_f32(a: float32x4_t) -> f32 {
+    vminvq_f32_(a)
+}
+
+/// Horizontal vector min.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminvq_f64(a: float64x2_t) -> f64 {
+    vminvq_f64_(a)
+}
+
+/// Folding minimum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    vpminq_s8_(a, b)
+}
+
+/// Folding minimum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    vpminq_s16_(a, b)
+}
+
+/// Folding minimum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    vpminq_s32_(a, b)
+}
+
+/// Folding minimum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    vpminq_u8_(a, b)
+}
+
+/// Folding minimum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    vpminq_u16_(a, b)
+}
+
+/// Folding minimum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    vpminq_u32_(a, b)
+}
+
+/// Folding minimum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    vpminq_f32_(a, b)
+}
+
+/// Folding minimum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    vpminq_f64_(a, b)
+}
+
+/// Folding maximum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    vpmaxq_s8_(a, b)
+}
+
+/// Folding maximum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    vpmaxq_s16_(a, b)
+}
+
+/// Folding maximum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    vpmaxq_s32_(a, b)
+}
+
+/// Folding maximum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    vpmaxq_u8_(a, b)
+}
+
+/// Folding maximum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    vpmaxq_u16_(a, b)
+}
+
+/// Folding maximum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    vpmaxq_u32_(a, b)
+}
+
+/// Folding maximum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    vpmaxq_f32_(a, b)
+}
+
+/// Folding maximum of adjacent pairs
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    vpmaxq_f64_(a, b)
+}
+
+/// Extract vector from pair of vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N : i32 where N == 0);
+    a
+}
+
+/// Extract vector from pair of vectors
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
+    static_assert!(N : i32 where N == 0);
+    a
+}
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_s8(low: int8x8_t, high: int8x8_t) -> int8x16_t {
+    simd_shuffle16!(
+        low,
+        high,
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+    )
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_s16(low: int16x4_t, high: int16x4_t) -> int16x8_t {
+    simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_s32(low: int32x2_t, high: int32x2_t) -> int32x4_t {
+    simd_shuffle4!(low, high, [0, 1, 2, 3])
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_s64(low: int64x1_t, high: int64x1_t) -> int64x2_t {
+    simd_shuffle2!(low, high, [0, 1])
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_u8(low: uint8x8_t, high: uint8x8_t) -> uint8x16_t {
+    simd_shuffle16!(
+        low,
+        high,
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+    )
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t {
+    simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_u32(low: uint32x2_t, high: uint32x2_t) -> uint32x4_t {
+    simd_shuffle4!(low, high, [0, 1, 2, 3])
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_u64(low: uint64x1_t, high: uint64x1_t) -> uint64x2_t {
+    simd_shuffle2!(low, high, [0, 1])
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_p64(low: poly64x1_t, high: poly64x1_t) -> poly64x2_t {
+    simd_shuffle2!(low, high, [0, 1])
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_n_p64(value: p64) -> poly64x1_t {
+    transmute(u64x1::new(value))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_n_f64(value: f64) -> float64x1_t {
+    float64x1_t(value)
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_n_p64(value: p64) -> poly64x2_t {
+    transmute(u64x2::new(value, value))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_n_f64(value: f64) -> float64x2_t {
+    float64x2_t(value, value)
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmov_n_p64(value: p64) -> poly64x1_t {
+    vdup_n_p64(value)
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmov_n_f64(value: f64) -> float64x1_t {
+    vdup_n_f64(value)
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovq_n_p64(value: p64) -> poly64x2_t {
+    vdupq_n_p64(value)
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmovq_n_f64(value: f64) -> float64x2_t {
+    vdupq_n_f64(value)
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vget_high_f64(a: float64x2_t) -> float64x1_t {
+    float64x1_t(simd_extract(a, 1))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ext))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
+    transmute(u64x1::new(simd_extract(a, 1)))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vget_low_f64(a: float64x2_t) -> float64x1_t {
+    float64x1_t(simd_extract(a, 0))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
+    transmute(u64x1::new(simd_extract(a, 0)))
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+pub unsafe fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
+    static_assert!(IMM5 : i32 where IMM5 == 0);
+    simd_extract(v, IMM5 as u32)
+}
+
+/// Duplicate vector element to vector or scalar
+#[inline]
+#[target_feature(enable = "neon")]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, IMM5 = 0))]
+pub unsafe fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
+    static_assert_imm1!(IMM5);
+    simd_extract(v, IMM5 as u32)
+}
+
+/* FIXME: 16-bit float
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+pub unsafe fn vcombine_f16 ( low: float16x4_t,  high: float16x4_t) -> float16x8_t {
+    simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+*/
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_f32(low: float32x2_t, high: float32x2_t) -> float32x4_t {
+    simd_shuffle4!(low, high, [0, 1, 2, 3])
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_p8(low: poly8x8_t, high: poly8x8_t) -> poly8x16_t {
+    simd_shuffle16!(
+        low,
+        high,
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+    )
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_p16(low: poly16x4_t, high: poly16x4_t) -> poly16x8_t {
+    simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+
+/// Vector combine
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
+    simd_shuffle2!(low, high, [0, 1])
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    vqtbl1_s8(vcombine_s8(a, zeroed()), transmute(b))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    vqtbl1_u8(vcombine_u8(a, zeroed()), b)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
+    vqtbl1_p8(vcombine_p8(a, zeroed()), b)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
+    vqtbl1_s8(vcombine_s8(a.0, a.1), transmute(b))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
+    vqtbl1_u8(vcombine_u8(a.0, a.1), b)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
+    vqtbl1_p8(vcombine_p8(a.0, a.1), b)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
+    vqtbl2_s8(
+        int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, zeroed())),
+        transmute(b),
+    )
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
+    vqtbl2_u8(
+        uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, zeroed())),
+        b,
+    )
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
+    vqtbl2_p8(
+        poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, zeroed())),
+        b,
+    )
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
+    vqtbl2_s8(
+        int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3)),
+        transmute(b),
+    )
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
+    vqtbl2_u8(
+        uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3)),
+        b,
+    )
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
+    vqtbl2_p8(
+        poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3)),
+        b,
+    )
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    let r = vqtbx1_s8(a, vcombine_s8(b, zeroed()), transmute(c));
+    let m: int8x8_t = simd_lt(c, transmute(i8x8::splat(8)));
+    simd_select(m, r, a)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    let r = vqtbx1_u8(a, vcombine_u8(b, zeroed()), c);
+    let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8)));
+    simd_select(m, r, a)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
+    let r = vqtbx1_p8(a, vcombine_p8(b, zeroed()), c);
+    let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8)));
+    simd_select(m, r, a)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
+    vqtbx1_s8(a, vcombine_s8(b.0, b.1), transmute(c))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
+    vqtbx1_u8(a, vcombine_u8(b.0, b.1), c)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
+    vqtbx1_p8(a, vcombine_p8(b.0, b.1), c)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
+    let r = vqtbx2_s8(
+        a,
+        int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, zeroed())),
+        transmute(c),
+    );
+    let m: int8x8_t = simd_lt(c, transmute(i8x8::splat(24)));
+    simd_select(m, r, a)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
+    let r = vqtbx2_u8(
+        a,
+        uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, zeroed())),
+        c,
+    );
+    let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(24)));
+    simd_select(m, r, a)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
+    let r = vqtbx2_p8(
+        a,
+        poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, zeroed())),
+        c,
+    );
+    let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(24)));
+    simd_select(m, r, a)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    vqtbx2_s8(
+        a,
+        int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3)),
+        transmute(c),
+    )
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
+    vqtbx2_u8(
+        a,
+        uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3)),
+        c,
+    )
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    vqtbx2_p8(
+        a,
+        poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3)),
+        c,
+    )
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl1_s8(t: int8x16_t, idx: uint8x8_t) -> int8x8_t {
+    vqtbl1(t, idx)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl1q_s8(t: int8x16_t, idx: uint8x16_t) -> int8x16_t {
+    vqtbl1q(t, idx)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl1_u8(t: uint8x16_t, idx: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbl1(transmute(t), transmute(idx)))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl1q_u8(t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbl1q(transmute(t), transmute(idx)))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl1_p8(t: poly8x16_t, idx: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbl1(transmute(t), transmute(idx)))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl1q_p8(t: poly8x16_t, idx: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbl1q(transmute(t), transmute(idx)))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx1_s8(a: int8x8_t, t: int8x16_t, idx: uint8x8_t) -> int8x8_t {
+    vqtbx1(a, t, idx)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx1q_s8(a: int8x16_t, t: int8x16_t, idx: uint8x16_t) -> int8x16_t {
+    vqtbx1q(a, t, idx)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx1_u8(a: uint8x8_t, t: uint8x16_t, idx: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx1(transmute(a), transmute(t), transmute(idx)))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx1q_u8(a: uint8x16_t, t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbx1q(transmute(a), transmute(t), transmute(idx)))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx1_p8(a: poly8x8_t, t: poly8x16_t, idx: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx1(transmute(a), transmute(t), transmute(idx)))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx1q_p8(a: poly8x16_t, t: poly8x16_t, idx: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbx1q(transmute(a), transmute(t), transmute(idx)))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl2_s8(t: int8x16x2_t, idx: uint8x8_t) -> int8x8_t {
+    vqtbl2(t.0, t.1, idx)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl2q_s8(t: int8x16x2_t, idx: uint8x16_t) -> int8x16_t {
+    vqtbl2q(t.0, t.1, idx)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl2_u8(t: uint8x16x2_t, idx: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbl2(transmute(t.0), transmute(t.1), transmute(idx)))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl2q_u8(t: uint8x16x2_t, idx: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbl2q(transmute(t.0), transmute(t.1), transmute(idx)))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl2_p8(t: poly8x16x2_t, idx: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbl2(transmute(t.0), transmute(t.1), transmute(idx)))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl2q_p8(t: poly8x16x2_t, idx: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbl2q(transmute(t.0), transmute(t.1), transmute(idx)))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx2_s8(a: int8x8_t, t: int8x16x2_t, idx: uint8x8_t) -> int8x8_t {
+    vqtbx2(a, t.0, t.1, idx)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx2q_s8(a: int8x16_t, t: int8x16x2_t, idx: uint8x16_t) -> int8x16_t {
+    vqtbx2q(a, t.0, t.1, idx)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx2_u8(a: uint8x8_t, t: uint8x16x2_t, idx: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx2(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx2q_u8(a: uint8x16_t, t: uint8x16x2_t, idx: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbx2q(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx2_p8(a: poly8x8_t, t: poly8x16x2_t, idx: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx2(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx2q_p8(a: poly8x16_t, t: poly8x16x2_t, idx: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbx2q(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(idx),
+    ))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl3_s8(t: int8x16x3_t, idx: uint8x8_t) -> int8x8_t {
+    vqtbl3(t.0, t.1, t.2, idx)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl3q_s8(t: int8x16x3_t, idx: uint8x16_t) -> int8x16_t {
+    vqtbl3q(t.0, t.1, t.2, idx)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl3_u8(t: uint8x16x3_t, idx: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbl3(
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(idx),
+    ))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl3q_u8(t: uint8x16x3_t, idx: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbl3q(
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(idx),
+    ))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl3_p8(t: poly8x16x3_t, idx: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbl3(
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(idx),
+    ))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl3q_p8(t: poly8x16x3_t, idx: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbl3q(
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx3_s8(a: int8x8_t, t: int8x16x3_t, idx: uint8x8_t) -> int8x8_t {
+    vqtbx3(a, t.0, t.1, t.2, idx)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx3q_s8(a: int8x16_t, t: int8x16x3_t, idx: uint8x16_t) -> int8x16_t {
+    vqtbx3q(a, t.0, t.1, t.2, idx)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx3_u8(a: uint8x8_t, t: uint8x16x3_t, idx: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx3(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx3q_u8(a: uint8x16_t, t: uint8x16x3_t, idx: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbx3q(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx3_p8(a: poly8x8_t, t: poly8x16x3_t, idx: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx3(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx3q_p8(a: poly8x16_t, t: poly8x16x3_t, idx: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbx3q(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(idx),
+    ))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4_s8(t: int8x16x4_t, idx: uint8x8_t) -> int8x8_t {
+    vqtbl4(t.0, t.1, t.2, t.3, idx)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4q_s8(t: int8x16x4_t, idx: uint8x16_t) -> int8x16_t {
+    vqtbl4q(t.0, t.1, t.2, t.3, idx)
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4_u8(t: uint8x16x4_t, idx: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbl4(
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(t.3),
+        transmute(idx),
+    ))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4q_u8(t: uint8x16x4_t, idx: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbl4q(
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(t.3),
+        transmute(idx),
+    ))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4_p8(t: poly8x16x4_t, idx: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbl4(
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(t.3),
+        transmute(idx),
+    ))
+}
+
+/// Table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4q_p8(t: poly8x16x4_t, idx: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbl4q(
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(t.3),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx4_s8(a: int8x8_t, t: int8x16x4_t, idx: uint8x8_t) -> int8x8_t {
+    vqtbx4(a, t.0, t.1, t.2, t.3, idx)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx4q_s8(a: int8x16_t, t: int8x16x4_t, idx: uint8x16_t) -> int8x16_t {
+    vqtbx4q(a, t.0, t.1, t.2, t.3, idx)
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx4_u8(a: uint8x8_t, t: uint8x16x4_t, idx: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx4(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(t.3),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx4q_u8(a: uint8x16_t, t: uint8x16x4_t, idx: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbx4q(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(t.3),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx4_p8(a: poly8x8_t, t: poly8x16x4_t, idx: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx4(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(t.3),
+        transmute(idx),
+    ))
+}
+
+/// Extended table look-up
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbx4q(
+        transmute(a),
+        transmute(t.0),
+        transmute(t.1),
+        transmute(t.2),
+        transmute(t.3),
+        transmute(idx),
+    ))
+}
+
+/// Shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert_imm6!(N);
+    a << N
+}
+
+/// Shift left
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert_imm6!(N);
+    a << N
+}
+
+/// Signed shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    let n: i32 = if N == 64 { 63 } else { N };
+    a >> n
+}
+
+/// Unsigned shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    let n: i32 = if N == 64 {
+        return 0;
+    } else {
+        N
+    };
+    a >> n
+}
+
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    a.wrapping_add(vshrd_n_s64::<N>(b))
+}
+
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N : i32 where N >= 1 && N <= 64);
+    a.wrapping_add(vshrd_n_u64::<N>(b))
+}
+
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_imm3!(N);
+    vsli_n_s8_(a, b, N)
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_imm3!(N);
+    vsliq_n_s8_(a, b, N)
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_imm4!(N);
+    vsli_n_s16_(a, b, N)
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_imm4!(N);
+    vsliq_n_s16_(a, b, N)
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N: i32 where N >= 0 && N <= 31);
+    vsli_n_s32_(a, b, N)
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N: i32 where N >= 0 && N <= 31);
+    vsliq_n_s32_(a, b, N)
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N: i32 where N >= 0 && N <= 63);
+    vsli_n_s64_(a, b, N)
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N: i32 where N >= 0 && N <= 63);
+    vsliq_n_s64_(a, b, N)
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_imm3!(N);
+    transmute(vsli_n_s8_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_imm3!(N);
+    transmute(vsliq_n_s8_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_imm4!(N);
+    transmute(vsli_n_s16_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_imm4!(N);
+    transmute(vsliq_n_s16_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N: i32 where N >= 0 && N <= 31);
+    transmute(vsli_n_s32_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N: i32 where N >= 0 && N <= 31);
+    transmute(vsliq_n_s32_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N: i32 where N >= 0 && N <= 63);
+    transmute(vsli_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N: i32 where N >= 0 && N <= 63);
+    transmute(vsliq_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_imm3!(N);
+    transmute(vsli_n_s8_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_imm3!(N);
+    transmute(vsliq_n_s8_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_imm4!(N);
+    transmute(vsli_n_s16_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_imm4!(N);
+    transmute(vsliq_n_s16_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N: i32 where N >= 0 && N <= 63);
+    transmute(vsli_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Left and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N: i32 where N >= 0 && N <= 63);
+    transmute(vsliq_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(N: i32 where N >= 1 && N <= 8);
+    vsri_n_s8_(a, b, N)
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(N: i32 where N >= 1 && N <= 8);
+    vsriq_n_s8_(a, b, N)
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(N: i32 where N >= 1 && N <= 16);
+    vsri_n_s16_(a, b, N)
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(N: i32 where N >= 1 && N <= 16);
+    vsriq_n_s16_(a, b, N)
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N: i32 where N >= 1 && N <= 32);
+    vsri_n_s32_(a, b, N)
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N: i32 where N >= 1 && N <= 32);
+    vsriq_n_s32_(a, b, N)
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N: i32 where N >= 1 && N <= 64);
+    vsri_n_s64_(a, b, N)
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N: i32 where N >= 1 && N <= 64);
+    vsriq_n_s64_(a, b, N)
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(N: i32 where N >= 1 && N <= 8);
+    transmute(vsri_n_s8_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(N: i32 where N >= 1 && N <= 8);
+    transmute(vsriq_n_s8_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(N: i32 where N >= 1 && N <= 16);
+    transmute(vsri_n_s16_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(N: i32 where N >= 1 && N <= 16);
+    transmute(vsriq_n_s16_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N: i32 where N >= 1 && N <= 32);
+    transmute(vsri_n_s32_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N: i32 where N >= 1 && N <= 32);
+    transmute(vsriq_n_s32_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N: i32 where N >= 1 && N <= 64);
+    transmute(vsri_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N: i32 where N >= 1 && N <= 64);
+    transmute(vsriq_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert!(N: i32 where N >= 1 && N <= 8);
+    transmute(vsri_n_s8_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert!(N: i32 where N >= 1 && N <= 8);
+    transmute(vsriq_n_s8_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert!(N: i32 where N >= 1 && N <= 16);
+    transmute(vsri_n_s16_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert!(N: i32 where N >= 1 && N <= 16);
+    transmute(vsriq_n_s16_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N: i32 where N >= 1 && N <= 64);
+    transmute(vsri_n_s64_(transmute(a), transmute(b), N))
+}
+/// Shift Right and Insert (immediate)
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N: i32 where N >= 1 && N <= 64);
+    transmute(vsriq_n_s64_(transmute(a), transmute(b), N))
+}
+
+/// SM3TT1A
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vsm3tt1aq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_imm2!(IMM2);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt1a")]
+        fn vsm3tt1aq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
+    }
+    vsm3tt1aq_u32_(a, b, c, IMM2 as i64)
+}
+
+/// SM3TT1B
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vsm3tt1bq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_imm2!(IMM2);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt1b")]
+        fn vsm3tt1bq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
+    }
+    vsm3tt1bq_u32_(a, b, c, IMM2 as i64)
+}
+
+/// SM3TT2A
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vsm3tt2aq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_imm2!(IMM2);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt2a")]
+        fn vsm3tt2aq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
+    }
+    vsm3tt2aq_u32_(a, b, c, IMM2 as i64)
+}
+
+/// SM3TT2B
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn vsm3tt2bq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_imm2!(IMM2);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.sm3tt2b")]
+        fn vsm3tt2bq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
+    }
+    vsm3tt2bq_u32_(a, b, c, IMM2 as i64)
+}
+
+/// Exclusive OR and rotate
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(xar, IMM6 = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_imm6!(IMM6);
+    #[allow(improper_ctypes)]
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.xar")]
+        fn vxarq_u64_(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t;
+    }
+    vxarq_u64_(a, b, IMM6 as i64)
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core_arch::aarch64::test_support::*;
+    use crate::core_arch::arm_shared::test_support::*;
+    use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*};
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadd_s8() {
+        let a = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, i8::MAX);
+        let b = u8x8::new(u8::MAX, 1, 2, 3, 4, 5, 6, 7);
+        let e = i8x8::new(i8::MAX, -2, 0, 2, 4, 6, 8, i8::MAX);
+        let r: i8x8 = transmute(vuqadd_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddq_s8() {
+        let a = i8x16::new(
+            i8::MIN,
+            -7,
+            -6,
+            -5,
+            -4,
+            -3,
+            -2,
+            -1,
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            i8::MAX,
+        );
+        let b = u8x16::new(u8::MAX, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let e = i8x16::new(
+            i8::MAX,
+            -6,
+            -4,
+            -2,
+            0,
+            2,
+            4,
+            6,
+            8,
+            10,
+            12,
+            14,
+            16,
+            18,
+            20,
+            i8::MAX,
+        );
+        let r: i8x16 = transmute(vuqaddq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadd_s16() {
+        let a = i16x4::new(i16::MIN, -1, 0, i16::MAX);
+        let b = u16x4::new(u16::MAX, 1, 2, 3);
+        let e = i16x4::new(i16::MAX, 0, 2, i16::MAX);
+        let r: i16x4 = transmute(vuqadd_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddq_s16() {
+        let a = i16x8::new(i16::MIN, -3, -2, -1, 0, 1, 2, i16::MAX);
+        let b = u16x8::new(u16::MAX, 1, 2, 3, 4, 5, 6, 7);
+        let e = i16x8::new(i16::MAX, -2, 0, 2, 4, 6, 8, i16::MAX);
+        let r: i16x8 = transmute(vuqaddq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadd_s32() {
+        let a = i32x2::new(i32::MIN, i32::MAX);
+        let b = u32x2::new(u32::MAX, 1);
+        let e = i32x2::new(i32::MAX, i32::MAX);
+        let r: i32x2 = transmute(vuqadd_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddq_s32() {
+        let a = i32x4::new(i32::MIN, -1, 0, i32::MAX);
+        let b = u32x4::new(u32::MAX, 1, 2, 3);
+        let e = i32x4::new(i32::MAX, 0, 2, i32::MAX);
+        let r: i32x4 = transmute(vuqaddq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqadd_s64() {
+        let a = i64x1::new(i64::MIN);
+        let b = u64x1::new(u64::MAX);
+        let e = i64x1::new(i64::MAX);
+        let r: i64x1 = transmute(vuqadd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vuqaddq_s64() {
+        let a = i64x2::new(i64::MIN, i64::MAX);
+        let b = u64x2::new(u64::MAX, 1);
+        let e = i64x2::new(i64::MAX, i64::MAX);
+        let r: i64x2 = transmute(vuqaddq_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqadd_u8() {
+        let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, u8::MAX);
+        let b = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, 3);
+        let e = u8x8::new(0, 0, 0, 2, 4, 6, 8, u8::MAX);
+        let r: u8x8 = transmute(vsqadd_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddq_u8() {
+        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, u8::MAX);
+        let b = i8x16::new(i8::MIN, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7);
+        let e = u8x16::new(0, 0, 0, 0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, u8::MAX);
+        let r: u8x16 = transmute(vsqaddq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqadd_u16() {
+        let a = u16x4::new(0, 1, 2, u16::MAX);
+        let b = i16x4::new(i16::MIN, -1, 0, 1);
+        let e = u16x4::new(0, 0, 2, u16::MAX);
+        let r: u16x4 = transmute(vsqadd_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddq_u16() {
+        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, u16::MAX);
+        let b = i16x8::new(i16::MIN, -3, -2, -1, 0, 1, 2, 3);
+        let e = u16x8::new(0, 0, 0, 2, 4, 6, 8, u16::MAX);
+        let r: u16x8 = transmute(vsqaddq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqadd_u32() {
+        let a = u32x2::new(0, u32::MAX);
+        let b = i32x2::new(i32::MIN, 1);
+        let e = u32x2::new(0, u32::MAX);
+        let r: u32x2 = transmute(vsqadd_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddq_u32() {
+        let a = u32x4::new(0, 1, 2, u32::MAX);
+        let b = i32x4::new(i32::MIN, -1, 0, 1);
+        let e = u32x4::new(0, 0, 2, u32::MAX);
+        let r: u32x4 = transmute(vsqaddq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqadd_u64() {
+        let a = u64x1::new(0);
+        let b = i64x1::new(i64::MIN);
+        let e = u64x1::new(0);
+        let r: u64x1 = transmute(vsqadd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsqaddq_u64() {
+        let a = u64x2::new(0, u64::MAX);
+        let b = i64x2::new(i64::MIN, 1);
+        let e = u64x2::new(0, u64::MAX);
+        let r: u64x2 = transmute(vsqaddq_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_s16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7);
+        let r: i16x8 = transmute(vpaddq_s16(transmute(a), transmute(b)));
+        let e = i16x8::new(3, 7, 11, 15, -1, -5, -9, -13);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_s32() {
+        let a = i32x4::new(1, 2, 3, 4);
+        let b = i32x4::new(0, -1, -2, -3);
+        let r: i32x4 = transmute(vpaddq_s32(transmute(a), transmute(b)));
+        let e = i32x4::new(3, 7, -1, -5);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_s64() {
+        let a = i64x2::new(1, 2);
+        let b = i64x2::new(0, -1);
+        let r: i64x2 = transmute(vpaddq_s64(transmute(a), transmute(b)));
+        let e = i64x2::new(3, -1);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_s8() {
+        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b = i8x16::new(
+            0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15,
+        );
+        let r: i8x16 = transmute(vpaddq_s8(transmute(a), transmute(b)));
+        let e = i8x16::new(
+            3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29,
+        );
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_u16() {
+        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let b = u16x8::new(17, 18, 19, 20, 20, 21, 22, 23);
+        let r: u16x8 = transmute(vpaddq_u16(transmute(a), transmute(b)));
+        let e = u16x8::new(1, 5, 9, 13, 35, 39, 41, 45);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_u32() {
+        let a = u32x4::new(0, 1, 2, 3);
+        let b = u32x4::new(17, 18, 19, 20);
+        let r: u32x4 = transmute(vpaddq_u32(transmute(a), transmute(b)));
+        let e = u32x4::new(1, 5, 35, 39);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_u64() {
+        let a = u64x2::new(0, 1);
+        let b = u64x2::new(17, 18);
+        let r: u64x2 = transmute(vpaddq_u64(transmute(a), transmute(b)));
+        let e = u64x2::new(1, 35);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddq_u8() {
+        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let b = i8x16::new(
+            17, 18, 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 29, 29, 30, 31,
+        );
+        let r = i8x16(1, 5, 9, 13, 17, 21, 25, 29, 35, 39, 41, 45, 49, 53, 58, 61);
+        let e: i8x16 = transmute(vpaddq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddd_s64() {
+        let a = i64x2::new(2, -3);
+        let r: i64 = transmute(vpaddd_s64(transmute(a)));
+        let e = -1_i64;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpaddd_u64() {
+        let a = i64x2::new(2, 3);
+        let r: u64 = transmute(vpaddd_u64(transmute(a)));
+        let e = 5_u64;
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vadd_f64() {
+        let a = 1.;
+        let b = 8.;
+        let e = 9.;
+        let r: f64 = transmute(vadd_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_f64() {
+        let a = f64x2::new(1., 2.);
+        let b = f64x2::new(8., 7.);
+        let e = f64x2::new(9., 9.);
+        let r: f64x2 = transmute(vaddq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vadd_s64() {
+        let a = 1_i64;
+        let b = 8_i64;
+        let e = 9_i64;
+        let r: i64 = transmute(vadd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vadd_u64() {
+        let a = 1_u64;
+        let b = 8_u64;
+        let e = 9_u64;
+        let r: u64 = transmute(vadd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddd_s64() {
+        let a = 1_i64;
+        let b = 8_i64;
+        let e = 9_i64;
+        let r: i64 = transmute(vaddd_s64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddd_u64() {
+        let a = 1_u64;
+        let b = 8_u64;
+        let e = 9_u64;
+        let r: u64 = transmute(vaddd_u64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxv_s8() {
+        let r = vmaxv_s8(transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5)));
+        assert_eq!(r, 7_i8);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxvq_s8() {
+        #[rustfmt::skip]
+        let r = vmaxvq_s8(transmute(i8x16::new(
+            1, 2, 3, 4,
+            -16, 6, 7, 5,
+            8, 1, 1, 1,
+            1, 1, 1, 1,
+        )));
+        assert_eq!(r, 8_i8);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxv_s16() {
+        let r = vmaxv_s16(transmute(i16x4::new(1, 2, -4, 3)));
+        assert_eq!(r, 3_i16);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxvq_s16() {
+        let r = vmaxvq_s16(transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5)));
+        assert_eq!(r, 7_i16);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxv_s32() {
+        let r = vmaxv_s32(transmute(i32x2::new(1, -4)));
+        assert_eq!(r, 1_i32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxvq_s32() {
+        let r = vmaxvq_s32(transmute(i32x4::new(1, 2, -32, 4)));
+        assert_eq!(r, 4_i32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxv_u8() {
+        let r = vmaxv_u8(transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5)));
+        assert_eq!(r, 8_u8);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxvq_u8() {
+        #[rustfmt::skip]
+        let r = vmaxvq_u8(transmute(u8x16::new(
+            1, 2, 3, 4,
+            16, 6, 7, 5,
+            8, 1, 1, 1,
+            1, 1, 1, 1,
+        )));
+        assert_eq!(r, 16_u8);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxv_u16() {
+        let r = vmaxv_u16(transmute(u16x4::new(1, 2, 4, 3)));
+        assert_eq!(r, 4_u16);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxvq_u16() {
+        let r = vmaxvq_u16(transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5)));
+        assert_eq!(r, 16_u16);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxv_u32() {
+        let r = vmaxv_u32(transmute(u32x2::new(1, 4)));
+        assert_eq!(r, 4_u32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxvq_u32() {
+        let r = vmaxvq_u32(transmute(u32x4::new(1, 2, 32, 4)));
+        assert_eq!(r, 32_u32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxv_f32() {
+        let r = vmaxv_f32(transmute(f32x2::new(1., 4.)));
+        assert_eq!(r, 4_f32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxvq_f32() {
+        let r = vmaxvq_f32(transmute(f32x4::new(1., 2., 32., 4.)));
+        assert_eq!(r, 32_f32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmaxvq_f64() {
+        let r = vmaxvq_f64(transmute(f64x2::new(1., 4.)));
+        assert_eq!(r, 4_f64);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminv_s8() {
+        let r = vminv_s8(transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5)));
+        assert_eq!(r, -8_i8);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminvq_s8() {
+        #[rustfmt::skip]
+        let r = vminvq_s8(transmute(i8x16::new(
+            1, 2, 3, 4,
+            -16, 6, 7, 5,
+            8, 1, 1, 1,
+            1, 1, 1, 1,
+        )));
+        assert_eq!(r, -16_i8);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminv_s16() {
+        let r = vminv_s16(transmute(i16x4::new(1, 2, -4, 3)));
+        assert_eq!(r, -4_i16);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminvq_s16() {
+        let r = vminvq_s16(transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5)));
+        assert_eq!(r, -16_i16);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminv_s32() {
+        let r = vminv_s32(transmute(i32x2::new(1, -4)));
+        assert_eq!(r, -4_i32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminvq_s32() {
+        let r = vminvq_s32(transmute(i32x4::new(1, 2, -32, 4)));
+        assert_eq!(r, -32_i32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminv_u8() {
+        let r = vminv_u8(transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5)));
+        assert_eq!(r, 1_u8);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminvq_u8() {
+        #[rustfmt::skip]
+        let r = vminvq_u8(transmute(u8x16::new(
+            1, 2, 3, 4,
+            16, 6, 7, 5,
+            8, 1, 1, 1,
+            1, 1, 1, 1,
+        )));
+        assert_eq!(r, 1_u8);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminv_u16() {
+        let r = vminv_u16(transmute(u16x4::new(1, 2, 4, 3)));
+        assert_eq!(r, 1_u16);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminvq_u16() {
+        let r = vminvq_u16(transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5)));
+        assert_eq!(r, 1_u16);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminv_u32() {
+        let r = vminv_u32(transmute(u32x2::new(1, 4)));
+        assert_eq!(r, 1_u32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminvq_u32() {
+        let r = vminvq_u32(transmute(u32x4::new(1, 2, 32, 4)));
+        assert_eq!(r, 1_u32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminv_f32() {
+        let r = vminv_f32(transmute(f32x2::new(1., 4.)));
+        assert_eq!(r, 1_f32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminvq_f32() {
+        let r = vminvq_f32(transmute(f32x4::new(1., 2., 32., 4.)));
+        assert_eq!(r, 1_f32);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vminvq_f64() {
+        let r = vminvq_f64(transmute(f64x2::new(1., 4.)));
+        assert_eq!(r, 1_f64);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminq_s8() {
+        #[cfg_attr(rustfmt, skip)]
+        let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        #[cfg_attr(rustfmt, skip)]
+        let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
+        #[cfg_attr(rustfmt, skip)]
+        let e = i8x16::new(-2, -4, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
+        let r: i8x16 = transmute(vpminq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminq_s16() {
+        let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8);
+        let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
+        let e = i16x8::new(-2, 3, 5, 7, 0, 2, 4, 6);
+        let r: i16x8 = transmute(vpminq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminq_s32() {
+        let a = i32x4::new(1, -2, 3, 4);
+        let b = i32x4::new(0, 3, 2, 5);
+        let e = i32x4::new(-2, 3, 0, 2);
+        let r: i32x4 = transmute(vpminq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminq_u8() {
+        #[cfg_attr(rustfmt, skip)]
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        #[cfg_attr(rustfmt, skip)]
+        let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
+        #[cfg_attr(rustfmt, skip)]
+        let e = u8x16::new(1, 3, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
+        let r: u8x16 = transmute(vpminq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminq_u16() {
+        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
+        let e = u16x8::new(1, 3, 5, 7, 0, 2, 4, 6);
+        let r: u16x8 = transmute(vpminq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpminq_u32() {
+        let a = u32x4::new(1, 2, 3, 4);
+        let b = u32x4::new(0, 3, 2, 5);
+        let e = u32x4::new(1, 3, 0, 2);
+        let r: u32x4 = transmute(vpminq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmin_f32() {
+        let a = f32x4::new(1., -2., 3., 4.);
+        let b = f32x4::new(0., 3., 2., 5.);
+        let e = f32x4::new(-2., 3., 0., 2.);
+        let r: f32x4 = transmute(vpminq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmin_f64() {
+        let a = f64x2::new(1., -2.);
+        let b = f64x2::new(0., 3.);
+        let e = f64x2::new(-2., 0.);
+        let r: f64x2 = transmute(vpminq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxq_s8() {
+        #[cfg_attr(rustfmt, skip)]
+        let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        #[cfg_attr(rustfmt, skip)]
+        let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
+        #[cfg_attr(rustfmt, skip)]
+        let e = i8x16::new(1, 3, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
+        let r: i8x16 = transmute(vpmaxq_s8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxq_s16() {
+        let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8);
+        let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
+        let e = i16x8::new(1, 4, 6, 8, 3, 5, 7, 9);
+        let r: i16x8 = transmute(vpmaxq_s16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxq_s32() {
+        let a = i32x4::new(1, -2, 3, 4);
+        let b = i32x4::new(0, 3, 2, 5);
+        let e = i32x4::new(1, 4, 3, 5);
+        let r: i32x4 = transmute(vpmaxq_s32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxq_u8() {
+        #[cfg_attr(rustfmt, skip)]
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+        #[cfg_attr(rustfmt, skip)]
+        let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
+        #[cfg_attr(rustfmt, skip)]
+        let e = u8x16::new(2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
+        let r: u8x16 = transmute(vpmaxq_u8(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxq_u16() {
+        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
+        let e = u16x8::new(2, 4, 6, 8, 3, 5, 7, 9);
+        let r: u16x8 = transmute(vpmaxq_u16(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmaxq_u32() {
+        let a = u32x4::new(1, 2, 3, 4);
+        let b = u32x4::new(0, 3, 2, 5);
+        let e = u32x4::new(2, 4, 3, 5);
+        let r: u32x4 = transmute(vpmaxq_u32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmax_f32() {
+        let a = f32x4::new(1., -2., 3., 4.);
+        let b = f32x4::new(0., 3., 2., 5.);
+        let e = f32x4::new(1., 4., 3., 5.);
+        let r: f32x4 = transmute(vpmaxq_f32(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vpmax_f64() {
+        let a = f64x2::new(1., -2.);
+        let b = f64x2::new(0., 3.);
+        let e = f64x2::new(1., 3.);
+        let r: f64x2 = transmute(vpmaxq_f64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vext_p64() {
+        let a: i64x1 = i64x1::new(0);
+        let b: i64x1 = i64x1::new(1);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vext_p64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vext_f64() {
+        let a: f64x1 = f64x1::new(0.);
+        let b: f64x1 = f64x1::new(1.);
+        let e: f64x1 = f64x1::new(0.);
+        let r: f64x1 = transmute(vext_f64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshld_n_s64() {
+        let a: i64 = 1;
+        let e: i64 = 4;
+        let r: i64 = vshld_n_s64::<2>(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshld_n_u64() {
+        let a: u64 = 1;
+        let e: u64 = 4;
+        let r: u64 = vshld_n_u64::<2>(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshrd_n_s64() {
+        let a: i64 = 4;
+        let e: i64 = 1;
+        let r: i64 = vshrd_n_s64::<2>(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vshrd_n_u64() {
+        let a: u64 = 4;
+        let e: u64 = 1;
+        let r: u64 = vshrd_n_u64::<2>(a);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsrad_n_s64() {
+        let a: i64 = 1;
+        let b: i64 = 4;
+        let e: i64 = 2;
+        let r: i64 = vsrad_n_s64::<2>(a, b);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsrad_n_u64() {
+        let a: u64 = 1;
+        let b: u64 = 4;
+        let e: u64 = 2;
+        let r: u64 = vsrad_n_u64::<2>(a, b);
+        assert_eq!(r, e);
+    }
+
+    macro_rules! test_vcombine {
+        ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => {
+            #[allow(unused_assignments)]
+            #[simd_test(enable = "neon")]
+            unsafe fn $test_id() {
+                let a = [$($a),*];
+                let b = [$($b),*];
+                let e = [$($a),* $(, $b)*];
+                let c = $fn_id(transmute(a), transmute(b));
+                let mut d = e;
+                d = transmute(c);
+                assert_eq!(d, e);
+            }
+        }
+    }
+
+    test_vcombine!(test_vcombine_s8 => vcombine_s8([3_i8, -4, 5, -6, 7, 8, 9, 10], [13_i8, -14, 15, -16, 17, 18, 19, 110]));
+    test_vcombine!(test_vcombine_u8 => vcombine_u8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110]));
+    test_vcombine!(test_vcombine_p8 => vcombine_p8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110]));
+
+    test_vcombine!(test_vcombine_s16 => vcombine_s16([3_i16, -4, 5, -6], [13_i16, -14, 15, -16]));
+    test_vcombine!(test_vcombine_u16 => vcombine_u16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
+    test_vcombine!(test_vcombine_p16 => vcombine_p16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
+    // FIXME: 16-bit floats
+    // test_vcombine!(test_vcombine_f16 => vcombine_f16([3_f16, 4., 5., 6.],
+    // [13_f16, 14., 15., 16.]));
+
+    test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14]));
+    test_vcombine!(test_vcombine_u32 => vcombine_u32([3_u32, 4], [13_u32, 14]));
+    // note: poly32x4 does not exist, and neither does vcombine_p32
+    test_vcombine!(test_vcombine_f32 => vcombine_f32([3_f32, -4.], [13_f32, -14.]));
+
+    test_vcombine!(test_vcombine_s64 => vcombine_s64([-3_i64], [13_i64]));
+    test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64]));
+    test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64]));
+    test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64]));
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdup_n_f64() {
+        let a: f64 = 3.3;
+        let e = f64x1::new(3.3);
+        let r: f64x1 = transmute(vdup_n_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdup_n_p64() {
+        let a: u64 = 3;
+        let e = u64x1::new(3);
+        let r: u64x1 = transmute(vdup_n_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupq_n_f64() {
+        let a: f64 = 3.3;
+        let e = f64x2::new(3.3, 3.3);
+        let r: f64x2 = transmute(vdupq_n_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vdupq_n_p64() {
+        let a: u64 = 3;
+        let e = u64x2::new(3, 3);
+        let r: u64x2 = transmute(vdupq_n_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmov_n_p64() {
+        let a: u64 = 3;
+        let e = u64x1::new(3);
+        let r: u64x1 = transmute(vmov_n_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmov_n_f64() {
+        let a: f64 = 3.3;
+        let e = f64x1::new(3.3);
+        let r: f64x1 = transmute(vmov_n_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovq_n_p64() {
+        let a: u64 = 3;
+        let e = u64x2::new(3, 3);
+        let r: u64x2 = transmute(vmovq_n_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmovq_n_f64() {
+        let a: f64 = 3.3;
+        let e = f64x2::new(3.3, 3.3);
+        let r: f64x2 = transmute(vmovq_n_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vget_high_f64() {
+        let a = f64x2::new(1.0, 2.0);
+        let e = f64x1::new(2.0);
+        let r: f64x1 = transmute(vget_high_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vget_high_p64() {
+        let a = u64x2::new(1, 2);
+        let e = u64x1::new(2);
+        let r: u64x1 = transmute(vget_high_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vget_low_f64() {
+        let a = f64x2::new(1.0, 2.0);
+        let e = f64x1::new(1.0);
+        let r: f64x1 = transmute(vget_low_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vget_low_p64() {
+        let a = u64x2::new(1, 2);
+        let e = u64x1::new(1);
+        let r: u64x1 = transmute(vget_low_p64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vget_lane_f64() {
+        let v = f64x1::new(1.0);
+        let r = vget_lane_f64::<0>(transmute(v));
+        assert_eq!(r, 1.0);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vgetq_lane_f64() {
+        let v = f64x2::new(0.0, 1.0);
+        let r = vgetq_lane_f64::<1>(transmute(v));
+        assert_eq!(r, 1.0);
+        let r = vgetq_lane_f64::<0>(transmute(v));
+        assert_eq!(r, 0.0);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x1 = transmute(vcopy_lane_s64::<0, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcopy_lane_u64::<0, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_p64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x1 = transmute(vcopy_lane_p64::<0, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_lane_f64() {
+        let a: f64 = 1.;
+        let b: f64 = 0.;
+        let e: f64 = 0.;
+        let r: f64 = transmute(vcopy_lane_f64::<0, 0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_s64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x1 = transmute(vcopy_laneq_s64::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_u64() {
+        let a: u64x1 = u64x1::new(1);
+        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r: u64x1 = transmute(vcopy_laneq_u64::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_p64() {
+        let a: i64x1 = i64x1::new(1);
+        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r: i64x1 = transmute(vcopy_laneq_p64::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcopy_laneq_f64() {
+        let a: f64 = 1.;
+        let b: f64x2 = f64x2::new(0., 0.5);
+        let e: f64 = 0.5;
+        let r: f64 = transmute(vcopy_laneq_f64::<0, 1>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_u64() {
+        test_cmp_u64(
+            |i, j| vceq_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a == b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_u64() {
+        testq_cmp_u64(
+            |i, j| vceqq_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a == b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_s64() {
+        test_cmp_s64(
+            |i, j| vceq_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a == b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_s64() {
+        testq_cmp_s64(
+            |i, j| vceqq_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a == b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_p64() {
+        test_cmp_p64(
+            |i, j| vceq_p64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a == b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_p64() {
+        testq_cmp_p64(
+            |i, j| vceqq_p64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a == b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceq_f64() {
+        test_cmp_f64(
+            |i, j| vceq_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a == b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vceqq_f64() {
+        testq_cmp_f64(
+            |i, j| vceqq_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a == b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_s64() {
+        test_cmp_s64(
+            |i, j| vcgt_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a > b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_s64() {
+        testq_cmp_s64(
+            |i, j| vcgtq_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a > b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_u64() {
+        test_cmp_u64(
+            |i, j| vcgt_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a > b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_u64() {
+        testq_cmp_u64(
+            |i, j| vcgtq_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a > b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgt_f64() {
+        test_cmp_f64(
+            |i, j| vcgt_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a > b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgtq_f64() {
+        testq_cmp_f64(
+            |i, j| vcgtq_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a > b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclt_s64() {
+        test_cmp_s64(
+            |i, j| vclt_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a < b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_s64() {
+        testq_cmp_s64(
+            |i, j| vcltq_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a < b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vclt_u64() {
+        test_cmp_u64(
+            |i, j| vclt_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a < b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_u64() {
+        testq_cmp_u64(
+            |i, j| vcltq_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a < b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vltq_f64() {
+        test_cmp_f64(
+            |i, j| vclt_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a < b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcltq_f64() {
+        testq_cmp_f64(
+            |i, j| vcltq_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a < b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_s64() {
+        test_cmp_s64(
+            |i, j| vcle_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a <= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_s64() {
+        testq_cmp_s64(
+            |i, j| vcleq_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a <= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcle_u64() {
+        test_cmp_u64(
+            |i, j| vcle_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a <= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_u64() {
+        testq_cmp_u64(
+            |i, j| vcleq_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a <= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vleq_f64() {
+        test_cmp_f64(
+            |i, j| vcle_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a <= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcleq_f64() {
+        testq_cmp_f64(
+            |i, j| vcleq_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a <= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcge_s64() {
+        test_cmp_s64(
+            |i, j| vcge_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a >= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgeq_s64() {
+        testq_cmp_s64(
+            |i, j| vcgeq_s64(i, j),
+            |a: i64, b: i64| -> u64 {
+                if a >= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcge_u64() {
+        test_cmp_u64(
+            |i, j| vcge_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a >= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgeq_u64() {
+        testq_cmp_u64(
+            |i, j| vcgeq_u64(i, j),
+            |a: u64, b: u64| -> u64 {
+                if a >= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vgeq_f64() {
+        test_cmp_f64(
+            |i, j| vcge_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a >= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcgeq_f64() {
+        testq_cmp_f64(
+            |i, j| vcgeq_f64(i, j),
+            |a: f64, b: f64| -> u64 {
+                if a >= b {
+                    0xFFFFFFFFFFFFFFFF
+                } else {
+                    0
+                }
+            },
+        );
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmul_f64() {
+        test_ari_f64(|i, j| vmul_f64(i, j), |a: f64, b: f64| -> f64 { a * b });
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmulq_f64() {
+        testq_ari_f64(|i, j| vmulq_f64(i, j), |a: f64, b: f64| -> f64 { a * b });
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsub_f64() {
+        test_ari_f64(|i, j| vsub_f64(i, j), |a: f64, b: f64| -> f64 { a - b });
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vsubq_f64() {
+        testq_ari_f64(|i, j| vsubq_f64(i, j), |a: f64, b: f64| -> f64 { a - b });
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabsd_s64() {
+        assert_eq!(vabsd_s64(-1), 1);
+        assert_eq!(vabsd_s64(0), 0);
+        assert_eq!(vabsd_s64(1), 1);
+        assert_eq!(vabsd_s64(i64::MIN), i64::MIN);
+        assert_eq!(vabsd_s64(i64::MIN + 1), i64::MAX);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabs_s64() {
+        let a = i64x1::new(i64::MIN);
+        let r: i64x1 = transmute(vabs_s64(transmute(a)));
+        let e = i64x1::new(i64::MIN);
+        assert_eq!(r, e);
+        let a = i64x1::new(i64::MIN + 1);
+        let r: i64x1 = transmute(vabs_s64(transmute(a)));
+        let e = i64x1::new(i64::MAX);
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vabsq_s64() {
+        let a = i64x2::new(i64::MIN, i64::MIN + 1);
+        let r: i64x2 = transmute(vabsq_s64(transmute(a)));
+        let e = i64x2::new(i64::MIN, i64::MAX);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vbsl_f64() {
+        let a = u64x1::new(u64::MAX);
+        let b = f64x1::new(f64::MAX);
+        let c = f64x1::new(f64::MIN);
+        let e = f64x1::new(f64::MAX);
+        let r: f64x1 = transmute(vbsl_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vbsl_p64() {
+        let a = u64x1::new(u64::MAX);
+        let b = u64x1::new(u64::MAX);
+        let c = u64x1::new(u64::MIN);
+        let e = u64x1::new(u64::MAX);
+        let r: u64x1 = transmute(vbsl_p64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vbslq_f64() {
+        let a = u64x2::new(u64::MAX, 0);
+        let b = f64x2::new(f64::MAX, f64::MAX);
+        let c = f64x2::new(f64::MIN, f64::MIN);
+        let e = f64x2::new(f64::MAX, f64::MIN);
+        let r: f64x2 = transmute(vbslq_f64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vbslq_p64() {
+        let a = u64x2::new(u64::MAX, 0);
+        let b = u64x2::new(u64::MAX, u64::MAX);
+        let c = u64x2::new(u64::MIN, u64::MIN);
+        let e = u64x2::new(u64::MAX, u64::MIN);
+        let r: u64x2 = transmute(vbslq_p64(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddv_s16() {
+        let a = i16x4::new(1, 2, 3, -4);
+        let r: i16 = transmute(vaddv_s16(transmute(a)));
+        let e = 2_i16;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddv_u16() {
+        let a = u16x4::new(1, 2, 3, 4);
+        let r: u16 = transmute(vaddv_u16(transmute(a)));
+        let e = 10_u16;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddv_s32() {
+        let a = i32x2::new(1, -2);
+        let r: i32 = transmute(vaddv_s32(transmute(a)));
+        let e = -1_i32;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddv_u32() {
+        let a = u32x2::new(1, 2);
+        let r: u32 = transmute(vaddv_u32(transmute(a)));
+        let e = 3_u32;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddv_s8() {
+        let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, -8);
+        let r: i8 = transmute(vaddv_s8(transmute(a)));
+        let e = 20_i8;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddv_u8() {
+        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r: u8 = transmute(vaddv_u8(transmute(a)));
+        let e = 36_u8;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_s16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, -8);
+        let r: i16 = transmute(vaddvq_s16(transmute(a)));
+        let e = 20_i16;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_u16() {
+        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r: u16 = transmute(vaddvq_u16(transmute(a)));
+        let e = 36_u16;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_s32() {
+        let a = i32x4::new(1, 2, 3, -4);
+        let r: i32 = transmute(vaddvq_s32(transmute(a)));
+        let e = 2_i32;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_u32() {
+        let a = u32x4::new(1, 2, 3, 4);
+        let r: u32 = transmute(vaddvq_u32(transmute(a)));
+        let e = 10_u32;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_s8() {
+        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -16);
+        let r: i8 = transmute(vaddvq_s8(transmute(a)));
+        let e = 104_i8;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_u8() {
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: u8 = transmute(vaddvq_u8(transmute(a)));
+        let e = 136_u8;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_s64() {
+        let a = i64x2::new(1, -2);
+        let r: i64 = transmute(vaddvq_s64(transmute(a)));
+        let e = -1_i64;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddvq_u64() {
+        let a = u64x2::new(1, 2);
+        let r: u64 = transmute(vaddvq_u64(transmute(a)));
+        let e = 3_u64;
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlv_s8() {
+        let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, -8);
+        let r: i16 = vaddlv_s8(transmute(a));
+        let e = 20_i16;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlv_u8() {
+        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r: u16 = vaddlv_u8(transmute(a));
+        let e = 36_u16;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlvq_s8() {
+        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -16);
+        let r: i16 = vaddlvq_s8(transmute(a));
+        let e = 104_i16;
+        assert_eq!(r, e);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddlvq_u8() {
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r: u16 = vaddlvq_u8(transmute(a));
+        let e = 136_u16;
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_f64() {
+        let a: [f64; 2] = [0., 1.];
+        let e = f64x1::new(1.);
+        let r: f64x1 = transmute(vld1_f64(a[1..].as_ptr()));
+        assert_eq!(r, e)
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_f64() {
+        let a: [f64; 3] = [0., 1., 2.];
+        let e = f64x2::new(1., 2.);
+        let r: f64x2 = transmute(vld1q_f64(a[1..].as_ptr()));
+        assert_eq!(r, e)
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_dup_f64() {
+        let a: [f64; 2] = [1., 42.];
+        let e = f64x1::new(42.);
+        let r: f64x1 = transmute(vld1_dup_f64(a[1..].as_ptr()));
+        assert_eq!(r, e)
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_dup_f64() {
+        let elem: f64 = 42.;
+        let e = f64x2::new(42., 42.);
+        let r: f64x2 = transmute(vld1q_dup_f64(&elem));
+        assert_eq!(r, e)
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1_lane_f64() {
+        let a = f64x1::new(0.);
+        let elem: f64 = 42.;
+        let e = f64x1::new(42.);
+        let r: f64x1 = transmute(vld1_lane_f64::<0>(&elem, transmute(a)));
+        assert_eq!(r, e)
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vld1q_lane_f64() {
+        let a = f64x2::new(0., 1.);
+        let elem: f64 = 42.;
+        let e = f64x2::new(0., 42.);
+        let r: f64x2 = transmute(vld1q_lane_f64::<1>(&elem, transmute(a)));
+        assert_eq!(r, e)
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1_f64() {
+        let mut vals = [0_f64; 2];
+        let a = f64x1::new(1.);
+
+        vst1_f64(vals[1..].as_mut_ptr(), transmute(a));
+
+        assert_eq!(vals[0], 0.);
+        assert_eq!(vals[1], 1.);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vst1q_f64() {
+        let mut vals = [0_f64; 3];
+        let a = f64x2::new(1., 2.);
+
+        vst1q_f64(vals[1..].as_mut_ptr(), transmute(a));
+
+        assert_eq!(vals[0], 0.);
+        assert_eq!(vals[1], 1.);
+        assert_eq!(vals[2], 2.);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3tt1aq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2, 1536, 4, 16395);
+        let r: u32x4 = transmute(vsm3tt1aq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3tt1bq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2, 1536, 4, 16392);
+        let r: u32x4 = transmute(vsm3tt1bq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3tt2aq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2, 1572864, 4, 1447435);
+        let r: u32x4 = transmute(vsm3tt2aq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sm4")]
+    unsafe fn test_vsm3tt2bq_u32() {
+        let a: u32x4 = u32x4::new(1, 2, 3, 4);
+        let b: u32x4 = u32x4::new(1, 2, 3, 4);
+        let c: u32x4 = u32x4::new(1, 2, 3, 4);
+        let e: u32x4 = u32x4::new(2, 1572864, 4, 1052680);
+        let r: u32x4 = transmute(vsm3tt2bq_u32::<0>(transmute(a), transmute(b), transmute(c)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon,sha3")]
+    unsafe fn test_vxarq_u64() {
+        let a: u64x2 = u64x2::new(1, 2);
+        let b: u64x2 = u64x2::new(3, 4);
+        let e: u64x2 = u64x2::new(2, 6);
+        let r: u64x2 = transmute(vxarq_u64::<0>(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+}
+
+#[cfg(test)]
+#[cfg(target_endian = "little")]
+#[path = "../../arm_shared/neon/table_lookup_tests.rs"]
+mod table_lookup_tests;
+
+#[cfg(test)]
+#[path = "../../arm_shared/neon/shift_and_insert_tests.rs"]
+mod shift_and_insert_tests;
+
+#[cfg(test)]
+#[path = "../../arm_shared/neon/load_tests.rs"]
+mod load_tests;
+
+#[cfg(test)]
+#[path = "../../arm_shared/neon/store_tests.rs"]
+mod store_tests;
diff --git a/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs b/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
new file mode 100644
index 000000000..3ae0ef506
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/prefetch.rs
@@ -0,0 +1,73 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+extern "unadjusted" {
+    #[link_name = "llvm.prefetch"]
+    fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
+}
+
+/// See [`prefetch`](fn._prefetch.html).
+pub const _PREFETCH_READ: i32 = 0;
+
+/// See [`prefetch`](fn._prefetch.html).
+pub const _PREFETCH_WRITE: i32 = 1;
+
+/// See [`prefetch`](fn._prefetch.html).
+pub const _PREFETCH_LOCALITY0: i32 = 0;
+
+/// See [`prefetch`](fn._prefetch.html).
+pub const _PREFETCH_LOCALITY1: i32 = 1;
+
+/// See [`prefetch`](fn._prefetch.html).
+pub const _PREFETCH_LOCALITY2: i32 = 2;
+
+/// See [`prefetch`](fn._prefetch.html).
+pub const _PREFETCH_LOCALITY3: i32 = 3;
+
+/// Fetch the cache line that contains address `p` using the given `RW` and `LOCALITY`.
+///
+/// The `RW` must be one of:
+///
+/// * [`_PREFETCH_READ`](constant._PREFETCH_READ.html): the prefetch is preparing
+///   for a read.
+///
+/// * [`_PREFETCH_WRITE`](constant._PREFETCH_WRITE.html): the prefetch is preparing
+///   for a write.
+///
+/// The `LOCALITY` must be one of:
+///
+/// * [`_PREFETCH_LOCALITY0`](constant._PREFETCH_LOCALITY0.html): Streaming or
+///   non-temporal prefetch, for data that is used only once.
+///
+/// * [`_PREFETCH_LOCALITY1`](constant._PREFETCH_LOCALITY1.html): Fetch into level 3 cache.
+///
+/// * [`_PREFETCH_LOCALITY2`](constant._PREFETCH_LOCALITY2.html): Fetch into level 2 cache.
+///
+/// * [`_PREFETCH_LOCALITY3`](constant._PREFETCH_LOCALITY3.html): Fetch into level 1 cache.
+///
+/// The prefetch memory instructions signal to the memory system that memory accesses
+/// from a specified address are likely to occur in the near future. The memory system
+/// can respond by taking actions that are expected to speed up the memory access when
+/// they do occur, such as preloading the specified address into one or more caches.
+/// Because these signals are only hints, it is valid for a particular CPU to treat
+/// any or all prefetch instructions as a NOP.
+///
+///
+/// [Arm's documentation](https://developer.arm.com/documentation/den0024/a/the-a64-instruction-set/memory-access-instructions/prefetching-memory?lang=en)
+#[inline(always)]
+#[cfg_attr(test, assert_instr("prfm pldl1strm", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY0))]
+#[cfg_attr(test, assert_instr("prfm pldl3keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY1))]
+#[cfg_attr(test, assert_instr("prfm pldl2keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY2))]
+#[cfg_attr(test, assert_instr("prfm pldl1keep", RW = _PREFETCH_READ, LOCALITY = _PREFETCH_LOCALITY3))]
+#[cfg_attr(test, assert_instr("prfm pstl1strm", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY0))]
+#[cfg_attr(test, assert_instr("prfm pstl3keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY1))]
+#[cfg_attr(test, assert_instr("prfm pstl2keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY2))]
+#[cfg_attr(test, assert_instr("prfm pstl1keep", RW = _PREFETCH_WRITE, LOCALITY = _PREFETCH_LOCALITY3))]
+#[rustc_legacy_const_generics(1, 2)]
+// FIXME: Replace this with the standard ACLE __pld/__pldx/__pli/__plix intrinsics
+pub unsafe fn _prefetch<const RW: i32, const LOCALITY: i32>(p: *const i8) {
+    // We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache).
+    static_assert_imm1!(RW);
+    static_assert_imm2!(LOCALITY);
+    prefetch(p, RW, LOCALITY, 1);
+}
diff --git a/library/stdarch/crates/core_arch/src/aarch64/test_support.rs b/library/stdarch/crates/core_arch/src/aarch64/test_support.rs
new file mode 100644
index 000000000..9c5994b15
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/test_support.rs
@@ -0,0 +1,184 @@
+use crate::core_arch::{aarch64::neon::*, arm_shared::*, simd::*};
+use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec};
+
+macro_rules! V_u64 {
+    () => {
+        vec![
+            0x0000000000000000u64,
+            0x0101010101010101u64,
+            0x0202020202020202u64,
+            0x0F0F0F0F0F0F0F0Fu64,
+            0x8080808080808080u64,
+            0xF0F0F0F0F0F0F0F0u64,
+            0xFFFFFFFFFFFFFFFFu64,
+        ]
+    };
+}
+
+macro_rules! V_f64 {
+    () => {
+        vec![
+            0.0f64,
+            1.0f64,
+            -1.0f64,
+            1.2f64,
+            2.4f64,
+            std::f64::MAX,
+            std::f64::MIN,
+            std::f64::INFINITY,
+            std::f64::NEG_INFINITY,
+            std::f64::NAN,
+        ]
+    };
+}
+
+macro_rules! to64 {
+    ($t : ident) => {
+        |v: $t| -> u64 { transmute(v) }
+    };
+}
+
+macro_rules! to128 {
+    ($t : ident) => {
+        |v: $t| -> u128 { transmute(v) }
+    };
+}
+
+pub(crate) fn test<T, U, V, W, X>(
+    vals: Vec<T>,
+    fill1: fn(T) -> V,
+    fill2: fn(U) -> W,
+    cast: fn(W) -> X,
+    test_fun: fn(V, V) -> W,
+    verify_fun: fn(T, T) -> U,
+) where
+    T: Copy + core::fmt::Debug,
+    U: Copy + core::fmt::Debug + std::cmp::PartialEq,
+    V: Copy + core::fmt::Debug,
+    W: Copy + core::fmt::Debug,
+    X: Copy + core::fmt::Debug + std::cmp::PartialEq,
+{
+    let pairs = vals.iter().zip(vals.iter());
+
+    for (i, j) in pairs {
+        let a: V = fill1(*i);
+        let b: V = fill1(*j);
+
+        let actual_pre: W = test_fun(a, b);
+        let expected_pre: W = fill2(verify_fun(*i, *j));
+
+        let actual: X = cast(actual_pre);
+        let expected: X = cast(expected_pre);
+
+        assert_eq!(
+            actual, expected,
+            "[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
+            *i, *j, &a, &b, actual_pre, &a, &b, expected_pre
+        );
+    }
+}
+
+macro_rules! gen_test_fn {
+    ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
+        pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
+            unsafe {
+                test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
+            };
+        }
+    };
+}
+
+macro_rules! gen_fill_fn {
+    ($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
+        pub(crate) fn $id(val: $in_t) -> $out_t {
+            let initial: [$in_t; $num_els] = [val; $num_els];
+            let result: $cmp_t = unsafe { transmute(initial) };
+            let result_out: $out_t = unsafe { transmute(result) };
+
+            // println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
+
+            result_out
+        }
+    };
+}
+
+gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
+gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
+gen_fill_fn!(fill_f64, 64, 1, f64, float64x1_t, u64);
+gen_fill_fn!(fillq_f64, 64, 2, f64, float64x2_t, u128);
+gen_fill_fn!(fill_p64, 64, 1, u64, poly64x1_t, u64);
+gen_fill_fn!(fillq_p64, 64, 2, u64, poly64x2_t, u128);
+
+gen_test_fn!(
+    test_ari_f64,
+    f64,
+    f64,
+    float64x1_t,
+    float64x1_t,
+    u64,
+    V_f64!(),
+    fill_f64,
+    fill_f64,
+    to64!(float64x1_t)
+);
+gen_test_fn!(
+    test_cmp_f64,
+    f64,
+    u64,
+    float64x1_t,
+    uint64x1_t,
+    u64,
+    V_f64!(),
+    fill_f64,
+    fill_u64,
+    to64!(uint64x1_t)
+);
+gen_test_fn!(
+    testq_ari_f64,
+    f64,
+    f64,
+    float64x2_t,
+    float64x2_t,
+    u128,
+    V_f64!(),
+    fillq_f64,
+    fillq_f64,
+    to128!(float64x2_t)
+);
+gen_test_fn!(
+    testq_cmp_f64,
+    f64,
+    u64,
+    float64x2_t,
+    uint64x2_t,
+    u128,
+    V_f64!(),
+    fillq_f64,
+    fillq_u64,
+    to128!(uint64x2_t)
+);
+
+gen_test_fn!(
+    test_cmp_p64,
+    u64,
+    u64,
+    poly64x1_t,
+    uint64x1_t,
+    u64,
+    V_u64!(),
+    fill_p64,
+    fill_u64,
+    to64!(uint64x1_t)
+);
+gen_test_fn!(
+    testq_cmp_p64,
+    u64,
+    u64,
+    poly64x2_t,
+    uint64x2_t,
+    u128,
+    V_u64!(),
+    fillq_p64,
+    fillq_u64,
+    to128!(uint64x2_t)
+);
diff --git a/library/stdarch/crates/core_arch/src/aarch64/tme.rs b/library/stdarch/crates/core_arch/src/aarch64/tme.rs
new file mode 100644
index 000000000..d1b2cf334
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/tme.rs
@@ -0,0 +1,179 @@
+//! ARM's Transactional Memory Extensions (TME).
+//!
+//! This CPU feature is available on Aarch64 - A architecture profile.
+//! This feature is in the non-neon feature set. TME specific vendor documentation can
+//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
+//!
+//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
+//!
+//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
+//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
+//!
+//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
+//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
+//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
+//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+extern "unadjusted" {
+    #[link_name = "llvm.aarch64.tstart"]
+    fn aarch64_tstart() -> u64;
+    #[link_name = "llvm.aarch64.tcommit"]
+    fn aarch64_tcommit() -> ();
+    #[link_name = "llvm.aarch64.tcancel"]
+    fn aarch64_tcancel(imm0: u64) -> ();
+    #[link_name = "llvm.aarch64.ttest"]
+    fn aarch64_ttest() -> u64;
+}
+
+/// Transaction successfully started.
+pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
+
+/// Extraction mask for failure reason
+pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
+
+/// Transaction retry is possible.
+pub const _TMFAILURE_RTRY: u64 = 1 << 15;
+
+/// Transaction executed a TCANCEL instruction
+pub const _TMFAILURE_CNCL: u64 = 1 << 16;
+
+/// Transaction aborted because a conflict occurred
+pub const _TMFAILURE_MEM: u64 = 1 << 17;
+
+/// Fallback error type for any other reason
+pub const _TMFAILURE_IMP: u64 = 1 << 18;
+
+/// Transaction aborted because a non-permissible operation was attempted
+pub const _TMFAILURE_ERR: u64 = 1 << 19;
+
+/// Transaction aborted due to read or write set limit was exceeded
+pub const _TMFAILURE_SIZE: u64 = 1 << 20;
+
+/// Transaction aborted due to transactional nesting level was exceeded
+pub const _TMFAILURE_NEST: u64 = 1 << 21;
+
+/// Transaction aborted due to a debug trap.
+pub const _TMFAILURE_DBG: u64 = 1 << 22;
+
+/// Transaction failed from interrupt
+pub const _TMFAILURE_INT: u64 = 1 << 23;
+
+/// Indicates a TRIVIAL version of TM is available
+pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
+
+/// Starts a new transaction. When the transaction starts successfully the return value is 0.
+/// If the transaction fails, all state modifications are discarded and a cause of the failure
+/// is encoded in the return value.
+///
+/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
+#[inline]
+#[target_feature(enable = "tme")]
+#[cfg_attr(test, assert_instr(tstart))]
+pub unsafe fn __tstart() -> u64 {
+    aarch64_tstart()
+}
+
+/// Commits the current transaction. For a nested transaction, the only effect is that the
+/// transactional nesting depth is decreased. For an outer transaction, the state modifications
+/// performed transactionally are committed to the architectural state.
+///
+/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
+#[inline]
+#[target_feature(enable = "tme")]
+#[cfg_attr(test, assert_instr(tcommit))]
+pub unsafe fn __tcommit() {
+    aarch64_tcommit()
+}
+
+/// Cancels the current transaction and discards all state modifications that were performed transactionally.
+///
+/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
+#[inline]
+#[target_feature(enable = "tme")]
+#[cfg_attr(test, assert_instr(tcancel, IMM16 = 0x0))]
+#[rustc_legacy_const_generics(0)]
+pub unsafe fn __tcancel<const IMM16: u64>() {
+    static_assert!(IMM16: u64 where IMM16 <= 65535);
+    aarch64_tcancel(IMM16);
+}
+
+/// Tests if executing inside a transaction. If no transaction is currently executing,
+/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
+///
+/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
+#[inline]
+#[target_feature(enable = "tme")]
+#[cfg_attr(test, assert_instr(ttest))]
+pub unsafe fn __ttest() -> u64 {
+    aarch64_ttest()
+}
+
+#[cfg(test)]
+mod tests {
+    use stdarch_test::simd_test;
+
+    use crate::core_arch::aarch64::*;
+
+    const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
+
+    #[simd_test(enable = "tme")]
+    unsafe fn test_tstart() {
+        let mut x = 0;
+        for i in 0..10 {
+            let code = tme::__tstart();
+            if code == _TMSTART_SUCCESS {
+                x += 1;
+                assert_eq!(x, i + 1);
+                break;
+            }
+            assert_eq!(x, 0);
+        }
+    }
+
+    #[simd_test(enable = "tme")]
+    unsafe fn test_tcommit() {
+        let mut x = 0;
+        for i in 0..10 {
+            let code = tme::__tstart();
+            if code == _TMSTART_SUCCESS {
+                x += 1;
+                assert_eq!(x, i + 1);
+                tme::__tcommit();
+            }
+            assert_eq!(x, i + 1);
+        }
+    }
+
+    #[simd_test(enable = "tme")]
+    unsafe fn test_tcancel() {
+        let mut x = 0;
+
+        for i in 0..10 {
+            let code = tme::__tstart();
+            if code == _TMSTART_SUCCESS {
+                x += 1;
+                assert_eq!(x, i + 1);
+                tme::__tcancel::<CANCEL_CODE>();
+                break;
+            }
+        }
+
+        assert_eq!(x, 0);
+    }
+
+    #[simd_test(enable = "tme")]
+    unsafe fn test_ttest() {
+        for _ in 0..10 {
+            let code = tme::__tstart();
+            if code == _TMSTART_SUCCESS {
+                if tme::__ttest() == 2 {
+                    tme::__tcancel::<CANCEL_CODE>();
+                    break;
+                }
+            }
+        }
+    }
+}
diff --git a/library/stdarch/crates/core_arch/src/aarch64/v8.rs b/library/stdarch/crates/core_arch/src/aarch64/v8.rs
new file mode 100644
index 000000000..778721c68
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/aarch64/v8.rs
@@ -0,0 +1,104 @@
+//! ARMv8 intrinsics.
+//!
+//! The reference is [ARMv8-A Reference Manual][armv8].
+//!
+//! [armv8]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.
+//! ddi0487a.k_10775/index.html
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Reverse the order of the bytes.
+#[inline]
+#[cfg_attr(test, assert_instr(rev))]
+pub unsafe fn _rev_u64(x: u64) -> u64 {
+    x.swap_bytes() as u64
+}
+
+/// Count Leading Zeros.
+#[inline]
+#[cfg_attr(test, assert_instr(clz))]
+pub unsafe fn _clz_u64(x: u64) -> u64 {
+    x.leading_zeros() as u64
+}
+
+/// Reverse the bit order.
+#[inline]
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn _rbit_u64(x: u64) -> u64 {
+    crate::intrinsics::bitreverse(x)
+}
+
+/// Counts the leading most significant bits set.
+///
+/// When all bits of the operand are set it returns the size of the operand in
+/// bits.
+#[inline]
+#[cfg_attr(test, assert_instr(cls))]
+pub unsafe fn _cls_u32(x: u32) -> u32 {
+    u32::leading_zeros((((((x as i32) >> 31) as u32) ^ x) << 1) | 1) as u32
+}
+
+/// Counts the leading most significant bits set.
+///
+/// When all bits of the operand are set it returns the size of the operand in
+/// bits.
+#[inline]
+#[cfg_attr(test, assert_instr(cls))]
+pub unsafe fn _cls_u64(x: u64) -> u64 {
+    u64::leading_zeros((((((x as i64) >> 63) as u64) ^ x) << 1) | 1) as u64
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core_arch::aarch64::v8;
+
+    #[test]
+    fn _rev_u64() {
+        unsafe {
+            assert_eq!(
+                v8::_rev_u64(0b0000_0000_1111_1111_0000_0000_1111_1111_u64),
+                0b1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
+            );
+        }
+    }
+
+    #[test]
+    fn _clz_u64() {
+        unsafe {
+            assert_eq!(v8::_clz_u64(0b0000_1010u64), 60u64);
+        }
+    }
+
+    #[test]
+    fn _rbit_u64() {
+        unsafe {
+            assert_eq!(
+                v8::_rbit_u64(0b0000_0000_1111_1101_0000_0000_1111_1111_u64),
+                0b1111_1111_0000_0000_1011_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
+            );
+        }
+    }
+
+    #[test]
+    fn _cls_u32() {
+        unsafe {
+            assert_eq!(
+                v8::_cls_u32(0b1111_1111_1111_1111_0000_0000_1111_1111_u32),
+                15_u32
+            );
+        }
+    }
+
+    #[test]
+    fn _cls_u64() {
+        unsafe {
+            assert_eq!(
+                v8::_cls_u64(
+                    0b1111_1111_1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_u64
+                ),
+                15_u64
+            );
+        }
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
commit	698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree	173a775858bd501c378080a10dca74132f05bc50 /library/stdarch/crates/core_arch/src/aarch64
parent	Initial commit. (diff)
download	rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip